mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Chore: minor changes
This commit is contained in:
parent
eb0623c1a9
commit
efa34399b0
@ -9,7 +9,7 @@ import { createRuleset, compareAndWriteFile } from './lib/create-file';
|
|||||||
import { domainDeduper } from './lib/domain-deduper';
|
import { domainDeduper } from './lib/domain-deduper';
|
||||||
import createKeywordFilter from './lib/aho-corasick';
|
import createKeywordFilter from './lib/aho-corasick';
|
||||||
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
||||||
import { sortDomains } from './lib/stable-sort-domain';
|
import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain';
|
||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
// tldts-experimental is way faster than tldts, but very little bit inaccurate
|
// tldts-experimental is way faster than tldts, but very little bit inaccurate
|
||||||
// (since it is hashes based). But the result is still deterministic, which is
|
// (since it is hashes based). But the result is still deterministic, which is
|
||||||
@ -21,6 +21,10 @@ import { getPhishingDomains } from './lib/get-phishing-domains';
|
|||||||
import { subtract as SetSubstract } from 'mnemonist/set';
|
import { subtract as SetSubstract } from 'mnemonist/set';
|
||||||
import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array';
|
import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array';
|
||||||
import { sort } from './lib/timsort';
|
import { sort } from './lib/timsort';
|
||||||
|
import { looseTldtsOpt } from './constants/loose-tldts-opt';
|
||||||
|
import { build } from 'bun';
|
||||||
|
|
||||||
|
const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'));
|
||||||
|
|
||||||
export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||||
/** Whitelists */
|
/** Whitelists */
|
||||||
@ -37,11 +41,9 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
let shouldStop = false;
|
let shouldStop = false;
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
// Parse from remote hosts & domain lists
|
// Parse from remote hosts & domain lists
|
||||||
...HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSets)),
|
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSets)),
|
||||||
|
DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSets)),
|
||||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSets)),
|
ADGUARD_FILTERS.map(
|
||||||
|
|
||||||
...ADGUARD_FILTERS.map(
|
|
||||||
input => processFilterRules(childSpan, ...input)
|
input => processFilterRules(childSpan, ...input)
|
||||||
.then(({ white, black, foundDebugDomain }) => {
|
.then(({ white, black, foundDebugDomain }) => {
|
||||||
if (foundDebugDomain) {
|
if (foundDebugDomain) {
|
||||||
@ -53,7 +55,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
setAddFromArray(domainSets, black);
|
setAddFromArray(domainSets, black);
|
||||||
})
|
})
|
||||||
),
|
),
|
||||||
...([
|
([
|
||||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt',
|
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt',
|
||||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
|
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
|
||||||
].map(
|
].map(
|
||||||
@ -64,9 +66,8 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
})
|
})
|
||||||
)),
|
)),
|
||||||
getPhishingDomains(childSpan).then(appendArrayToDomainSets),
|
getPhishingDomains(childSpan).then(appendArrayToDomainSets),
|
||||||
childSpan.traceChildAsync('process reject_sukka.conf', () => readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))
|
getRejectSukkaConfPromise.then(appendArrayToDomainSets)
|
||||||
.then(appendArrayToDomainSets))
|
].flat());
|
||||||
]);
|
|
||||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||||
return shouldStop;
|
return shouldStop;
|
||||||
});
|
});
|
||||||
@ -107,30 +108,31 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
const trie = span.traceChildSync('dedupe from white suffixes', () => {
|
const trie = span.traceChildSync('create smol trie', () => createTrie(domainSets, true, true));
|
||||||
const trie = createTrie(domainSets, true, true);
|
|
||||||
filterRuleWhitelistDomainSets.forEach(trie.whitelist);
|
span.traceChildSync('dedupe from white suffixes', () => filterRuleWhitelistDomainSets.forEach(trie.whitelist));
|
||||||
return trie;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Dedupe domainSets
|
// Dedupe domainSets
|
||||||
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(trie));
|
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(trie));
|
||||||
|
|
||||||
console.log(`Final size ${dudupedDominArray.length}`);
|
console.log(`Final size ${dudupedDominArray.length}`);
|
||||||
|
|
||||||
|
const {
|
||||||
|
domainMap: domainArrayMainDomainMap,
|
||||||
|
subdomainMap: domainArraySubdomainMap
|
||||||
|
} = span.traceChildSync(
|
||||||
|
'build map for stat and sort',
|
||||||
|
() => buildParseDomainMap(dudupedDominArray)
|
||||||
|
);
|
||||||
|
|
||||||
// Create reject stats
|
// Create reject stats
|
||||||
const rejectDomainsStats: Array<[string, number]> = span
|
const rejectDomainsStats: Array<[string, number]> = span
|
||||||
.traceChild('create reject stats')
|
.traceChild('create reject stats')
|
||||||
.traceSyncFn(() => {
|
.traceSyncFn(() => {
|
||||||
const tldtsOpt = { allowPrivateDomains: false, detectIp: false, validateHostname: false };
|
|
||||||
const statMap = dudupedDominArray.reduce<Map<string, number>>((acc, cur) => {
|
const statMap = dudupedDominArray.reduce<Map<string, number>>((acc, cur) => {
|
||||||
const suffix = tldts.getDomain(cur, tldtsOpt);
|
const suffix = domainArrayMainDomainMap.get(cur);
|
||||||
if (!suffix) return acc;
|
if (suffix) {
|
||||||
|
acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
|
||||||
if (acc.has(suffix)) {
|
|
||||||
acc.set(suffix, acc.get(suffix)! + 1);
|
|
||||||
} else {
|
|
||||||
acc.set(suffix, 1);
|
|
||||||
}
|
}
|
||||||
return acc;
|
return acc;
|
||||||
}, new Map());
|
}, new Map());
|
||||||
@ -157,7 +159,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
'Sukka\'s Ruleset - Reject Base',
|
'Sukka\'s Ruleset - Reject Base',
|
||||||
description,
|
description,
|
||||||
new Date(),
|
new Date(),
|
||||||
span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray)),
|
span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray, domainArrayMainDomainMap, domainArraySubdomainMap)),
|
||||||
'domainset',
|
'domainset',
|
||||||
path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
|
path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
|
||||||
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')
|
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')
|
||||||
|
|||||||
9
Build/constants/loose-tldts-opt.ts
Normal file
9
Build/constants/loose-tldts-opt.ts
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import type * as tldts from 'tldts';
|
||||||
|
|
||||||
|
export const looseTldtsOpt: Parameters<typeof tldts.getSubdomain>[1] = {
|
||||||
|
allowPrivateDomains: false,
|
||||||
|
extractHostname: false,
|
||||||
|
validateHostname: false,
|
||||||
|
detectIp: false,
|
||||||
|
mixedInputs: false
|
||||||
|
};
|
||||||
@ -1,8 +1,8 @@
|
|||||||
interface Node {
|
interface Node {
|
||||||
/** @default false */
|
/** @default false */
|
||||||
wordEnd?: boolean,
|
wordEnd: boolean,
|
||||||
children: Map<string, Node | undefined>,
|
children: Map<string, Node | undefined>,
|
||||||
fail?: Node
|
fail: Node | undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
const createNode = (): Node => ({
|
const createNode = (): Node => ({
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import { getSubdomain, getPublicSuffix } from 'tldts-experimental';
|
|||||||
import type { Span } from '../trace';
|
import type { Span } from '../trace';
|
||||||
import { appendArrayInPlaceCurried } from './append-array-in-place';
|
import { appendArrayInPlaceCurried } from './append-array-in-place';
|
||||||
import { PHISHING_DOMAIN_LISTS } from './reject-data-source';
|
import { PHISHING_DOMAIN_LISTS } from './reject-data-source';
|
||||||
|
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
||||||
|
|
||||||
const BLACK_TLD = new Set([
|
const BLACK_TLD = new Set([
|
||||||
'accountant',
|
'accountant',
|
||||||
@ -99,14 +100,6 @@ export const WHITELIST_MAIN_DOMAINS = new Set([
|
|||||||
'notion.site'
|
'notion.site'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const tldtsOpt: Parameters<typeof getSubdomain>[1] = {
|
|
||||||
allowPrivateDomains: false,
|
|
||||||
extractHostname: false,
|
|
||||||
validateHostname: false,
|
|
||||||
detectIp: false,
|
|
||||||
mixedInputs: false
|
|
||||||
};
|
|
||||||
|
|
||||||
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
||||||
const gorhill = await getGorhillPublicSuffixPromise();
|
const gorhill = await getGorhillPublicSuffixPromise();
|
||||||
|
|
||||||
@ -132,7 +125,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const tld = getPublicSuffix(safeGorhillLine, tldtsOpt);
|
const tld = getPublicSuffix(safeGorhillLine, looseTldtsOpt);
|
||||||
if (!tld || !BLACK_TLD.has(tld)) continue;
|
if (!tld || !BLACK_TLD.has(tld)) continue;
|
||||||
|
|
||||||
domainCountMap[apexDomain] ||= 0;
|
domainCountMap[apexDomain] ||= 0;
|
||||||
@ -187,7 +180,7 @@ export function calcDomainAbuseScore(line: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const subdomain = getSubdomain(line, tldtsOpt);
|
const subdomain = getSubdomain(line, looseTldtsOpt);
|
||||||
|
|
||||||
if (subdomain) {
|
if (subdomain) {
|
||||||
if (subdomain.slice(1).includes('.')) {
|
if (subdomain.slice(1).includes('.')) {
|
||||||
|
|||||||
@ -7,6 +7,7 @@ export const normalizeDomain = (domain: string) => {
|
|||||||
const parsed = tldtsParse(domain, { allowPrivateDomains: true, detectIp: false });
|
const parsed = tldtsParse(domain, { allowPrivateDomains: true, detectIp: false });
|
||||||
// if (parsed.isIp) return null;
|
// if (parsed.isIp) return null;
|
||||||
if (!parsed.hostname) return null;
|
if (!parsed.hostname) return null;
|
||||||
|
// Private invalid domain (things like .tor, .dn42, etc)
|
||||||
if (!parsed.isIcann && !parsed.isPrivate) return null;
|
if (!parsed.isIcann && !parsed.isPrivate) return null;
|
||||||
|
|
||||||
let h = parsed.hostname;
|
let h = parsed.hostname;
|
||||||
|
|||||||
@ -3,32 +3,51 @@
|
|||||||
// enough when sorting.
|
// enough when sorting.
|
||||||
import { getDomain, getSubdomain } from 'tldts-experimental';
|
import { getDomain, getSubdomain } from 'tldts-experimental';
|
||||||
import { sort } from './timsort';
|
import { sort } from './timsort';
|
||||||
|
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
||||||
|
|
||||||
export const compare = (a: string, b: string) => {
|
export const compare = (a: string, b: string) => {
|
||||||
if (a === b) return 0;
|
if (a === b) return 0;
|
||||||
return (a.length - b.length) || a.localeCompare(b);
|
return (a.length - b.length) || a.localeCompare(b);
|
||||||
};
|
};
|
||||||
|
|
||||||
const tldtsOpt: Parameters<typeof getDomain>[1] = {
|
export const buildParseDomainMap = (inputs: string[]) => {
|
||||||
allowPrivateDomains: false,
|
|
||||||
extractHostname: false,
|
|
||||||
validateHostname: false,
|
|
||||||
detectIp: false,
|
|
||||||
mixedInputs: false
|
|
||||||
};
|
|
||||||
|
|
||||||
export const sortDomains = (inputs: string[]) => {
|
|
||||||
const domainMap = new Map<string, string>();
|
const domainMap = new Map<string, string>();
|
||||||
const subdomainMap = new Map<string, string>();
|
const subdomainMap = new Map<string, string>();
|
||||||
|
|
||||||
for (let i = 0, len = inputs.length; i < len; i++) {
|
for (let i = 0, len = inputs.length; i < len; i++) {
|
||||||
const cur = inputs[i];
|
const cur = inputs[i];
|
||||||
if (!domainMap.has(cur)) {
|
if (!domainMap.has(cur)) {
|
||||||
const topD = getDomain(cur, tldtsOpt);
|
const topD = getDomain(cur, looseTldtsOpt);
|
||||||
domainMap.set(cur, topD ?? cur);
|
domainMap.set(cur, topD ?? cur);
|
||||||
}
|
}
|
||||||
if (!subdomainMap.has(cur)) {
|
if (!subdomainMap.has(cur)) {
|
||||||
const subD = getSubdomain(cur, tldtsOpt);
|
const subD = getSubdomain(cur, looseTldtsOpt);
|
||||||
|
subdomainMap.set(cur, subD ?? cur);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { domainMap, subdomainMap };
|
||||||
|
};
|
||||||
|
|
||||||
|
export const sortDomains = (
|
||||||
|
inputs: string[],
|
||||||
|
domainMap?: Map<string, string>,
|
||||||
|
subdomainMap?: Map<string, string>
|
||||||
|
) => {
|
||||||
|
if (!domainMap || !subdomainMap) {
|
||||||
|
const { domainMap: dm, subdomainMap: sm } = buildParseDomainMap(inputs);
|
||||||
|
domainMap = dm;
|
||||||
|
subdomainMap = sm;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0, len = inputs.length; i < len; i++) {
|
||||||
|
const cur = inputs[i];
|
||||||
|
if (!domainMap.has(cur)) {
|
||||||
|
const topD = getDomain(cur, looseTldtsOpt);
|
||||||
|
domainMap.set(cur, topD ?? cur);
|
||||||
|
}
|
||||||
|
if (!subdomainMap.has(cur)) {
|
||||||
|
const subD = getSubdomain(cur, looseTldtsOpt);
|
||||||
subdomainMap.set(cur, subD ?? cur);
|
subdomainMap.set(cur, subD ?? cur);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -36,16 +36,11 @@ const createNode = (parent: TrieNode | null = null): TrieNode => {
|
|||||||
return node;
|
return node;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
|
const hostnameToTokens = (hostname: string): string[] => {
|
||||||
let size = 0;
|
|
||||||
const root: TrieNode = createNode();
|
|
||||||
|
|
||||||
const suffixToTokens = hostnameMode
|
|
||||||
? (suffix: string) => {
|
|
||||||
let buf = '';
|
let buf = '';
|
||||||
const tokens: string[] = [];
|
const tokens: string[] = [];
|
||||||
for (let i = 0, l = suffix.length; i < l; i++) {
|
for (let i = 0, l = hostname.length; i < l; i++) {
|
||||||
const c = suffix[i];
|
const c = hostname[i];
|
||||||
if (c === '.') {
|
if (c === '.') {
|
||||||
if (buf) {
|
if (buf) {
|
||||||
tokens.push(buf, /* . */ c);
|
tokens.push(buf, /* . */ c);
|
||||||
@ -61,7 +56,14 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
tokens.push(buf);
|
tokens.push(buf);
|
||||||
}
|
}
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
};
|
||||||
|
|
||||||
|
export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
|
||||||
|
let size = 0;
|
||||||
|
const root: TrieNode = createNode();
|
||||||
|
|
||||||
|
const suffixToTokens = hostnameMode
|
||||||
|
? hostnameToTokens
|
||||||
: (suffix: string) => suffix;
|
: (suffix: string) => suffix;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user