Replace more utilities w/ foxts

This commit is contained in:
SukkaW 2024-12-12 23:19:03 +08:00
parent 80ac403944
commit 72d953b230
21 changed files with 30 additions and 384 deletions

View File

@ -14,7 +14,7 @@ import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { getPhishingDomains } from './lib/get-phishing-domains';
import { setAddFromArray } from './lib/set-add-from-array';
import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set';
import { appendArrayInPlace } from './lib/append-array-in-place';
import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
import { DomainsetOutput } from './lib/create-file';
@ -77,7 +77,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
shouldStop = true;
// we should not break here, as we want to see full matches from all data source
}
setAddFromArray(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectOutput(black);
})
),
@ -89,13 +89,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
shouldStop = true;
// we should not break here, as we want to see full matches from all data source
}
setAddFromArray(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectExtraOutput(black);
})
),
ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
setAddFromArray(filterRuleWhitelistDomainSets, white);
setAddFromArray(filterRuleWhitelistDomainSets, black);
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
})),
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf')).then(appendArrayToRejectOutput),

View File

@ -3,7 +3,7 @@ import path from 'node:path';
import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
import { fsFetchCache, getFileContentHash } from './lib/cache-filesystem';
import { processLine } from './lib/process-line';
import { RulesetOutput } from './lib/create-file';

View File

@ -1,6 +1,6 @@
// @ts-check
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { createMemoizedPromise } from './lib/memo-promise';

View File

@ -1,74 +0,0 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import createKeywordFilter from './aho-corasick';
// eslint-disable import-x/no-unresolved -- benchmark
import ModernAhoCorasick from 'modern-ahocorasick';
import { AhoCorasick as MonyoneAhoCorasick } from '@monyone/aho-corasick';
// @ts-expect-error -- no types
import FastScanner from 'fastscan';
import { AhoCorasick as RustAhoCorasick } from '@blackglory/aho-corasick';
// eslint-enable import-x/no-unresolved
function runKeywordFilter(data: string[], testFn: (line: string) => boolean) {
for (let i = 0, len = data.length; i < len; i++) {
testFn(data[i]);
}
}
export function getFns(keywordsSet: string[] | readonly string[]) {
const tmp1 = new ModernAhoCorasick(keywordsSet.slice());
const tmp2 = new MonyoneAhoCorasick(keywordsSet.slice());
const scanner = new FastScanner(keywordsSet.slice());
const tmp3 = new RustAhoCorasick(keywordsSet.slice(), { caseSensitive: true });
return [
['createKeywordFilter', createKeywordFilter(keywordsSet.slice())],
['modern-ahocorasick', (line: string) => tmp1.search(line).length > 0],
['@monyone/aho-corasick', (line: string) => tmp2.hasKeywordInText(line)],
['fastscan', (line: string) => scanner.search(line).length > 0],
['@blackglory/aho-corasick', (line: string) => tmp3.isMatch(line)]
] as const;
}
if (require.main === module) {
(async () => {
const { bench, group, run } = await import('mitata');
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt', true));
console.log({ dataLen: data.length });
const keywordsSet = [
'!',
'?',
'*',
'[',
'(',
']',
')',
',',
'#',
'%',
'&',
'=',
'~',
// special modifier
'$popup',
'$removeparam',
'$popunder',
'$cname',
'$frame',
// some bad syntax
'^popup'
];
const fns = getFns(keywordsSet);
group(() => {
fns.forEach(([name, fn]) => {
bench(name, () => runKeywordFilter(data, fn));
});
});
run();
})();
}

View File

@ -1,33 +0,0 @@
import { describe, it } from 'mocha';
import { expect } from 'expect';
import { getFns } from './aho-corasick.bench';
describe('AhoCorasick', () => {
for (const test of ([
[
['ap', 'an'],
['bananan', 'apple', 'melon'],
[true, true, false]
],
[
['cdn', 'sukka'],
['bananan', 'apple', 'melon'],
[false, false, false]
]
] as const)) {
const kwtests = getFns(test[0]);
const fixtures = test[1];
const expected = test[2];
for (const kwtest of kwtests) {
const fnName = kwtest[0];
const fn = kwtest[1];
it(fnName, () => {
for (let i = 0, len = fixtures.length; i < len; i++) {
expect(fn(fixtures[i])).toBe(expected[i]);
}
});
}
}
});

View File

@ -1,79 +0,0 @@
class Node extends Map<string, Node> {
constructor(
public wordEnd: boolean,
public fail: Node | undefined
) {
super();
}
}
function createKeywordFilter(keys: string[] | Set<string>) {
const root = new Node(false, undefined);
// Create a trie with extra fields and information
const put = (key: string) => {
let node = root;
for (let idx = 0, len = key.length; idx < len; idx++) {
const char = key[idx];
if (node.has(char)) {
node = node.get(char)!;
} else {
const newNode = new Node(false, undefined);
node.set(char, newNode);
node = newNode;
}
}
// If a new node is created, mark it as a word end when loop finish
if (node !== root) {
node.wordEnd = true;
}
};
keys.forEach(put);
// const build = () => {
const queue: Node[] = [root];
while (queue.length) {
const beginNode = queue.pop()!;
beginNode.forEach((node, char) => {
let failNode = beginNode.fail;
while (failNode && !failNode.has(char)) {
failNode = failNode.fail;
}
node.fail = failNode ? failNode.get(char) : root;
queue.push(node);
});
}
// };
// build();
return (text: string) => {
let node: Node | undefined = root;
for (let i = 0, textLen = text.length; i < textLen; i++) {
const char = text[i];
while (node && !node.has(char)) {
node = node.fail;
}
node = node ? node.get(char)! : root;
if (node.wordEnd) {
return true;
}
}
return false;
};
}
export default createKeywordFilter;

View File

@ -10,7 +10,7 @@ import { identity } from 'foxts/identity';
import { fastStringArrayJoin } from 'foxts/fast-string-array-join';
import { performance } from 'node:perf_hooks';
import fs from 'node:fs';
import { stringHash } from './string-hash';
import { simpleStringHash } from 'foxts/simple-string-hash';
import { defaultRequestInit, requestWithLog, ResponseError } from './fetch-retry';
import type { UndiciResponseData } from './fetch-retry';
// import type { UndiciResponseData } from './fetch-retry';
@ -447,7 +447,7 @@ export const deserializeSet = (str: string) => new Set(str.split(separator));
export const serializeArray = (arr: string[]) => fastStringArrayJoin(arr, separator);
export const deserializeArray = (str: string) => str.split(separator);
export const getFileContentHash = (filename: string) => stringHash(fs.readFileSync(filename, 'utf-8'));
export const getFileContentHash = (filename: string) => simpleStringHash(fs.readFileSync(filename, 'utf-8'));
export function createCacheKey(filename: string) {
const fileHash = getFileContentHash(filename);
return (key: string) => key + '$' + fileHash + '$';

View File

@ -1,6 +1,6 @@
import picocolors from 'picocolors';
import { defaultRequestInit, requestWithLog, ResponseError } from './fetch-retry';
import { setTimeout } from 'node:timers/promises';
import { wait } from 'foxts/wait';
// eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
export class CustomAbortError extends Error {
@ -35,8 +35,7 @@ export function sleepWithAbort(ms: number, signal: AbortSignal) {
signal.addEventListener('abort', stop, { once: true });
// eslint-disable-next-line sukka/prefer-timer-id -- node:timers/promises
setTimeout(ms, undefined, { ref: false }).then(resolve).catch(reject).finally(() => signal.removeEventListener('abort', stop));
wait(ms).then(resolve).catch(reject).finally(() => signal.removeEventListener('abort', stop));
function stop(this: AbortSignal) { reject(this.reason as Error); }
});

View File

@ -7,7 +7,7 @@ import { appendArrayInPlaceCurried } from './append-array-in-place';
import { DEBUG_DOMAIN_TO_FIND, PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import picocolors from 'picocolors';
import createKeywordFilter from './aho-corasick';
import { createAhoCorasick as createKeywordFilter } from 'foxts/ahocorasick';
import { createCacheKey, deserializeArray, serializeArray } from './cache-filesystem';
import { cache } from './fs-memo';
import { isCI } from 'ci-info';

View File

@ -1,72 +0,0 @@
/**
* Check if a hostname is an IP. You should be aware that this only works
* because `hostname` is already garanteed to be a valid hostname!
*/
export function isProbablyIpv4(hostname: string): boolean {
// Cannot be shorted than 1.1.1.1 or longer than 255.255.255.255
if (hostname.length < 7 || hostname.length > 15) {
return false;
}
let numberOfDots = 0;
for (let i = 0; i < hostname.length; i += 1) {
const code = hostname.charCodeAt(i);
if (code === 46 /* '.' */) {
numberOfDots += 1;
} else if (code < 48 /* '0' */ || code > 57 /* '9' */) {
return false;
}
}
return (
numberOfDots === 3
&& hostname.charCodeAt(0) !== 46 /* '.' */
&& hostname.charCodeAt(hostname.length - 1) !== 46 /* '.' */
);
}
export function isProbablyIpv6(hostname: string): boolean {
if (hostname.length < 3) {
return false;
}
let start = hostname[0] === '[' ? 1 : 0;
let end = hostname.length;
if (hostname[end - 1] === ']') {
end -= 1;
}
// We only consider the maximum size of a normal IPV6. Note that this will
// fail on so-called "IPv4 mapped IPv6 addresses" but this is a corner-case
// and a proper validation library should be used for these.
if (end - start > 39) {
return false;
}
/* eslint-disable sukka/no-single-return -- here it goes */
let hasColon = false;
for (; start < end; start += 1) {
const code = hostname.charCodeAt(start);
if (code === 58 /* ':' */) {
hasColon = true;
} else if (
!(
(
(code >= 48 && code <= 57) // 0-9
|| (code >= 97 && code <= 102) // a-f
|| (code >= 65 && code <= 90) // A-F
)
)
) {
return false;
}
}
return hasColon;
/* eslint-enable sukka/no-single-return -- here it goes */
}

View File

@ -6,7 +6,7 @@ import picocolors from 'picocolors';
import { normalizeDomain } from './normalize-domain';
import { deserializeArray, fsFetchCache, serializeArray, getFileContentHash } from './cache-filesystem';
import type { Span } from '../trace';
import createKeywordFilter from './aho-corasick';
import { createAhoCorasick as createKeywordFilter } from 'foxts/ahocorasick';
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
import { identity } from 'foxts/identity';
import { DEBUG_DOMAIN_TO_FIND } from '../constants/reject-data-source';

View File

@ -1,5 +1,5 @@
import { invariant } from 'foxts/guard';
import createKeywordFilter from '../aho-corasick';
import { createAhoCorasick as createKeywordFilter } from 'foxts/ahocorasick';
import { RuleOutput } from './base';
import type { SingboxSourceFormat } from '../singbox';

View File

@ -1,13 +1,13 @@
import { merge } from 'fast-cidr-tools';
import type { Span } from '../../trace';
import createKeywordFilter from '../aho-corasick';
import { createAhoCorasick as createKeywordFilter } from 'foxts/ahocorasick';
import { appendArrayInPlace } from '../append-array-in-place';
import { appendSetElementsToArray } from 'foxts/append-set-elements-to-array';
import type { SingboxSourceFormat } from '../singbox';
import { RuleOutput } from './base';
import picocolors from 'picocolors';
import { normalizeDomain } from '../normalize-domain';
import { isProbablyIpv4, isProbablyIpv6 } from '../is-fast-ip';
import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]];

View File

@ -1,25 +0,0 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { bench, group, run } from 'mitata';
(async () => {
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));
group(() => {
bench('setAddFromArray', () => {
const set = new Set(['1', '2', '1', '3', 'skk.moe']);
for (let i = 0, len = data.length; i < len; i++) {
set.add(data[i]);
}
});
});
group(() => {
bench('', () => {
const set = new Set(['1', '2', '1', '3', 'skk.moe']);
// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
data.forEach(set.add, set);
});
});
run();
})();

View File

@ -1,13 +0,0 @@
/**
* In-place adding of elements from an array to a set.
*/
export function setAddFromArray<T>(set: Set<T>, arr: T[]): void {
// for (let i = 0, len = arr.length; i < len; i++) {
// set.add(arr[i]);
// }
// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
arr.forEach(set.add, set);
}
// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
export const setAddFromArrayCurried = <T>(set: Set<T>) => (arr: T[]) => arr.forEach(set.add, set);

View File

@ -1,11 +1,11 @@
const unsupported = Symbol('unsupported');
// const unsupported = Symbol('unsupported');
// https://sing-box.sagernet.org/configuration/rule-set/source-format/
export const PROCESSOR: Record<string, ((raw: string, type: string, value: string) => [key: keyof SingboxHeadlessRule, value: Required<SingboxHeadlessRule>[keyof SingboxHeadlessRule][number]] | null) | typeof unsupported> = {
'IP-ASN': unsupported,
'URL-REGEX': unsupported,
'USER-AGENT': unsupported
};
// export const PROCESSOR: Record<string, ((raw: string, type: string, value: string) => [key: keyof SingboxHeadlessRule, value: Required<SingboxHeadlessRule>[keyof SingboxHeadlessRule][number]] | null) | typeof unsupported> = {
// 'IP-ASN': unsupported,
// 'URL-REGEX': unsupported,
// 'USER-AGENT': unsupported
// };
interface SingboxHeadlessRule {
domain?: string[],

View File

@ -1,57 +0,0 @@
/**
* FNV-1a Hash implementation
* @author Travis Webb (tjwebb) <me@traviswebb.com>
*
* Ported from https://github.com/tjwebb/fnv-plus/blob/master/index.js
*
* Simplified, optimized and add modified for 52 bit, which provides a larger hash space
* and still making use of Javascript's 53-bit integer space.
*/
export function fnv1a52(str: string) {
const len = str.length;
let i = 0,
t0 = 0,
v0 = 0x2325,
t1 = 0,
v1 = 0x8422,
t2 = 0,
v2 = 0x9CE4,
t3 = 0,
v3 = 0xCBF2;
while (i < len) {
v0 ^= str.charCodeAt(i++);
t0 = v0 * 435;
t1 = v1 * 435;
t2 = v2 * 435;
t3 = v3 * 435;
t2 += v0 << 8;
t3 += v1 << 8;
t1 += t0 >>> 16;
v0 = t0 & 65535;
t2 += t1 >>> 16;
v1 = t1 & 65535;
v3 = (t3 + (t2 >>> 16)) & 65535;
v2 = t2 & 65535;
}
return (
(v3 & 15) * 281_474_976_710_656
+ v2 * 4_294_967_296
+ v1 * 65536
+ (v0 ^ (v3 >> 4))
);
}
export function fnv1a(s: string) {
let h = 0x81_1C_9D_C5;
for (let i = 0, l = s.length; i < l; i++) {
h ^= s.charCodeAt(i);
h += (h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24);
}
return (h >>> 0);
}
export const stringHash = (payload: string) => fnv1a52(payload).toString(36) + payload.length.toString(36);

View File

@ -10,7 +10,7 @@ import { newQueue } from '@henrygd/queue';
import asyncRetry from 'async-retry';
import * as whoiser from 'whoiser';
import picocolors from 'picocolors';
import createKeywordFilter from './lib/aho-corasick';
import { createAhoCorasick as createKeywordFilter } from 'foxts/ahocorasick';
import './lib/fetch-retry';
const dohServers: Array<[string, DNS2.DnsResolver]> = ([

View File

@ -7,7 +7,7 @@ import { readFileByLine } from './lib/fetch-text-by-line';
import path from 'node:path';
import { OUTPUT_SURGE_DIR } from './constants/dir';
import { $fetch } from './lib/make-fetch-happen';
import createKeywordFilter from './lib/aho-corasick';
import { createAhoCorasick as createKeywordFilter } from 'foxts/ahocorasick';
export async function parseGfwList() {
const whiteSet = new Set<string>();

View File

@ -33,7 +33,7 @@
"escape-string-regexp-node": "^1.0.2",
"fast-cidr-tools": "^0.3.1",
"fdir": "^6.4.2",
"foxts": "1.0.6",
"foxts": "1.0.7",
"hash-wasm": "^4.12.0",
"json-stringify-pretty-compact": "^3.0.0",
"make-fetch-happen": "^14.0.3",

10
pnpm-lock.yaml generated
View File

@ -51,8 +51,8 @@ importers:
specifier: ^6.4.2
version: 6.4.2(picomatch@4.0.2)
foxts:
specifier: 1.0.6
version: 1.0.6
specifier: 1.0.7
version: 1.0.7
hash-wasm:
specifier: ^4.12.0
version: 4.12.0
@ -1139,8 +1139,8 @@ packages:
resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
engines: {node: '>= 6'}
foxts@1.0.6:
resolution: {integrity: sha512-hVrqkUX5kH1BIQLXgD4a7VZcwWW6xJEFiiVBwA46i9Kq1TzGLhZHY2nDwxDOOp2DSgKaL70zmwGFR845hes3lQ==}
foxts@1.0.7:
resolution: {integrity: sha512-JcLSIXujoAG6GoxKPgpwB6ME0cK23/txZZCPhANo9GdYaMkRwBZa9pN3ghf67juM46A1Trh2nxJLX01Oak+UvQ==}
fs-constants@1.0.0:
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
@ -2997,7 +2997,7 @@ snapshots:
combined-stream: 1.0.8
mime-types: 2.1.35
foxts@1.0.6: {}
foxts@1.0.7: {}
fs-constants@1.0.0: {}