mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: further speed up infra
This commit is contained in:
parent
adb8b43357
commit
78afa595a9
@ -1,11 +1,21 @@
|
||||
{
|
||||
"root": true,
|
||||
"extends": ["sukka/node"],
|
||||
"rules": {
|
||||
"no-console": "off"
|
||||
},
|
||||
"parserOptions": {
|
||||
"ecmaVersion": "latest",
|
||||
"sourceType": "module"
|
||||
}
|
||||
"ignorePatterns": [
|
||||
"node_modules/",
|
||||
// disable for now
|
||||
"**/*.d.ts"
|
||||
],
|
||||
"overrides": [
|
||||
{
|
||||
"files": ["**/*.js"],
|
||||
"rules": {
|
||||
"no-console": "off"
|
||||
},
|
||||
"parserOptions": {
|
||||
"ecmaVersion": "latest",
|
||||
"sourceType": "module"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@ -5,11 +5,22 @@ const { minifyRules } = require('./lib/minify-rules');
|
||||
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
|
||||
const Trie = require('./lib/trie');
|
||||
const { runner } = require('./lib/trace-runner');
|
||||
const fs = require('fs');
|
||||
|
||||
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix-list_dat.txt');
|
||||
|
||||
runner(__filename, async () => {
|
||||
const trie = new Trie();
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
|
||||
trie.add(line);
|
||||
|
||||
if (fs.existsSync(publicSuffixPath)) {
|
||||
for await (const line of readFileByLine(publicSuffixPath)) {
|
||||
trie.add(line);
|
||||
}
|
||||
} else {
|
||||
console.log('public_suffix_list.dat not found, fetch directly from remote.');
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
|
||||
trie.add(line);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -18,13 +29,16 @@ runner(__filename, async () => {
|
||||
*/
|
||||
const S3OSSDomains = new Set();
|
||||
|
||||
trie.find('.amazonaws.com')
|
||||
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-'))
|
||||
.forEach(line => S3OSSDomains.add(line));
|
||||
|
||||
trie.find('.scw.cloud')
|
||||
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-'))
|
||||
.forEach(line => S3OSSDomains.add(line));
|
||||
trie.find('.amazonaws.com').forEach(line => {
|
||||
if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
|
||||
S3OSSDomains.add(line);
|
||||
}
|
||||
});
|
||||
trie.find('.scw.cloud').forEach(line => {
|
||||
if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
|
||||
S3OSSDomains.add(line);
|
||||
}
|
||||
});
|
||||
|
||||
/** @type {string[]} */
|
||||
const cdnDomainsList = [];
|
||||
@ -45,7 +59,7 @@ runner(__filename, async () => {
|
||||
];
|
||||
const ruleset = minifyRules(cdnDomainsList);
|
||||
|
||||
await Promise.all(createRuleset(
|
||||
return Promise.all(createRuleset(
|
||||
'Sukka\'s Ruleset - CDN Domains',
|
||||
description,
|
||||
new Date(),
|
||||
|
||||
@ -19,8 +19,15 @@ runner(__filename, async () => {
|
||||
|
||||
results.push(
|
||||
...Object.entries(DOMESTICS)
|
||||
.filter(([key]) => key !== 'SYSTEM')
|
||||
.flatMap(([, { domains }]) => domains)
|
||||
.reduce(
|
||||
(acc, [key, { domains }]) => {
|
||||
if (key === 'SYSTEM') {
|
||||
return acc;
|
||||
}
|
||||
return [...acc, ...domains];
|
||||
},
|
||||
/** @type {string[]} */([])
|
||||
)
|
||||
.sort(domainSorter)
|
||||
.map((domain) => `DOMAIN-SUFFIX,${domain}`)
|
||||
);
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
// @ts-check
|
||||
const fse = require('fs-extra');
|
||||
const path = require('path');
|
||||
const { isDomainLoose } = require('./lib/is-domain-loose');
|
||||
const tldts = require('tldts');
|
||||
const { processLine } = require('./lib/process-line');
|
||||
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
|
||||
@ -35,11 +34,15 @@ runner(__filename, async () => {
|
||||
*/
|
||||
const processLocalDomainSet = async (domainSetPath) => {
|
||||
for await (const line of readFileByLine(domainSetPath)) {
|
||||
if (line[0] === '.') {
|
||||
addApexDomain(line.slice(1));
|
||||
} else if (isDomainLoose(line)) {
|
||||
addApexDomain(line);
|
||||
} else if (processLine(line)) {
|
||||
const parsed = tldts.parse(line, { allowPrivateDomains: true });
|
||||
if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) {
|
||||
if (parsed.domain) {
|
||||
set.add(parsed.domain);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (processLine(line)) {
|
||||
console.warn('[drop line from domainset]', line);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
// @ts-check
|
||||
const path = require('path');
|
||||
const fse = require('fs-extra');
|
||||
const fs = require('fs');
|
||||
const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq');
|
||||
const { runner } = require('./lib/trace-runner');
|
||||
const { compareAndWriteFile } = require('./lib/create-file');
|
||||
|
||||
runner(__filename, async () => {
|
||||
const [result] = await Promise.all([
|
||||
@ -11,8 +11,8 @@ runner(__filename, async () => {
|
||||
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
|
||||
]);
|
||||
|
||||
await fs.promises.writeFile(
|
||||
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt'),
|
||||
`${result.map(line => `SUFFIX,${line}`).join('\n')}\n`
|
||||
await compareAndWriteFile(
|
||||
result.map(line => `SUFFIX,${line}`),
|
||||
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
|
||||
);
|
||||
});
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
const { parse } = require('tldts');
|
||||
const tldts = require('tldts');
|
||||
const { processFilterRules } = require('./lib/parse-filter.js');
|
||||
const path = require('path');
|
||||
const { createRuleset } = require('./lib/create-file');
|
||||
const { processLine } = require('./lib/process-line.js');
|
||||
const domainSorter = require('./lib/stable-sort-domain');
|
||||
const { runner } = require('./lib/trace-runner.js');
|
||||
const { runner, traceSync } = require('./lib/trace-runner.js');
|
||||
|
||||
const WHITELIST_DOMAIN = new Set([
|
||||
'w3s.link',
|
||||
@ -61,19 +61,14 @@ const BLACK_TLD = new Set([
|
||||
]);
|
||||
|
||||
runner(__filename, async () => {
|
||||
const domainSet = Array.from(
|
||||
(await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black
|
||||
);
|
||||
const domainSet = Array.from((await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black);
|
||||
const domainCountMap = {};
|
||||
|
||||
for (let i = 0, len = domainSet.length; i < len; i++) {
|
||||
const line = processLine(domainSet[i]);
|
||||
if (!line) continue;
|
||||
|
||||
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
|
||||
|
||||
const parsed = parse(domain, { allowPrivateDomains: true });
|
||||
|
||||
const parsed = tldts.parse(line, { allowPrivateDomains: true });
|
||||
const apexDomain = parsed.domain;
|
||||
|
||||
if (apexDomain) {
|
||||
@ -84,19 +79,18 @@ runner(__filename, async () => {
|
||||
domainCountMap[apexDomain] ||= 0;
|
||||
|
||||
let isPhishingDomainMockingAmazon = false;
|
||||
|
||||
if (domain.startsWith('amaz')) {
|
||||
if (line.startsWith('.amaz')) {
|
||||
domainCountMap[apexDomain] += 0.5;
|
||||
|
||||
isPhishingDomainMockingAmazon = true;
|
||||
|
||||
if (domain.startsWith('amazon-')) {
|
||||
if (line.startsWith('.amazon-')) {
|
||||
domainCountMap[apexDomain] += 4.5;
|
||||
}
|
||||
} else if (domain.startsWith('customer')) {
|
||||
} else if (line.startsWith('.customer')) {
|
||||
domainCountMap[apexDomain] += 0.25;
|
||||
}
|
||||
if (domain.includes('-co-jp')) {
|
||||
if (line.includes('-co-jp')) {
|
||||
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
|
||||
}
|
||||
|
||||
@ -105,17 +99,17 @@ runner(__filename, async () => {
|
||||
|
||||
domainCountMap[apexDomain] += 1;
|
||||
|
||||
if (domain.length > 19) {
|
||||
if (line.length > 19) {
|
||||
// Add more weight if the domain is long enough
|
||||
if (domain.length > 44) {
|
||||
if (line.length > 44) {
|
||||
domainCountMap[apexDomain] += 3.5;
|
||||
} else if (domain.length > 34) {
|
||||
} else if (line.length > 34) {
|
||||
domainCountMap[apexDomain] += 2.5;
|
||||
} else if (domain.length > 29) {
|
||||
} else if (line.length > 29) {
|
||||
domainCountMap[apexDomain] += 1.5;
|
||||
} else if (domain.length > 24) {
|
||||
} else if (line.length > 24) {
|
||||
domainCountMap[apexDomain] += 0.75;
|
||||
} else if (domain.length > 19) {
|
||||
} else if (line.length > 19) {
|
||||
domainCountMap[apexDomain] += 0.25;
|
||||
}
|
||||
|
||||
@ -129,15 +123,14 @@ runner(__filename, async () => {
|
||||
}
|
||||
}
|
||||
|
||||
const results = [];
|
||||
|
||||
Object.entries(domainCountMap).forEach(([domain, count]) => {
|
||||
if (count >= 5) {
|
||||
results.push(`.${domain}`);
|
||||
}
|
||||
});
|
||||
|
||||
results.sort(domainSorter);
|
||||
const results = traceSync('* get final results', () => Object.entries(domainCountMap)
|
||||
.reduce((acc, [apexDomain, count]) => {
|
||||
if (count >= 5) {
|
||||
acc.push(`.${apexDomain}`);
|
||||
}
|
||||
return acc;
|
||||
}, /** @type {string[]} */([]))
|
||||
.sort(domainSorter));
|
||||
|
||||
const description = [
|
||||
'License: AGPL 3.0',
|
||||
|
||||
@ -1,20 +1,20 @@
|
||||
// @ts-check
|
||||
const fs = require('fs');
|
||||
const fse = require('fs-extra');
|
||||
const { resolve: pathResolve } = require('path');
|
||||
|
||||
const tldts = require('tldts');
|
||||
|
||||
const { processHosts, processFilterRules } = require('./lib/parse-filter');
|
||||
const Trie = require('./lib/trie');
|
||||
|
||||
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
|
||||
const { createRuleset } = require('./lib/create-file');
|
||||
const { createRuleset, compareAndWriteFile } = require('./lib/create-file');
|
||||
const { processLine } = require('./lib/process-line');
|
||||
const { domainDeduper } = require('./lib/domain-deduper');
|
||||
const createKeywordFilter = require('./lib/aho-corasick');
|
||||
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
|
||||
const domainSorter = require('./lib/stable-sort-domain');
|
||||
const { createDomainSorter } = require('./lib/stable-sort-domain');
|
||||
const { traceSync, runner } = require('./lib/trace-runner');
|
||||
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
|
||||
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
|
||||
|
||||
/** Whitelists */
|
||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||
@ -22,7 +22,8 @@ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||
const domainKeywordsSet = new Set();
|
||||
/** @type {Set<string>} Dedupe domains included by DOMAIN-SUFFIX */
|
||||
const domainSuffixSet = new Set();
|
||||
(async () => {
|
||||
|
||||
runner(__filename, async () => {
|
||||
/** @type Set<string> */
|
||||
const domainSets = new Set();
|
||||
|
||||
@ -31,7 +32,8 @@ const domainSuffixSet = new Set();
|
||||
|
||||
let shouldStop = false;
|
||||
|
||||
await Promise.all([
|
||||
const [gorhill] = await Promise.all([
|
||||
getGorhillPublicSuffixPromise,
|
||||
// Parse from remote hosts & domain lists
|
||||
...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => {
|
||||
hosts.forEach(host => {
|
||||
@ -129,7 +131,7 @@ const domainSuffixSet = new Set();
|
||||
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
|
||||
console.time('* Dedupe from black keywords/suffixes');
|
||||
|
||||
const kwfilter = createKeywordFilter(Array.from(domainKeywordsSet));
|
||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||
|
||||
const trie1 = Trie.from(domainSets);
|
||||
domainSuffixSet.forEach(suffix => {
|
||||
@ -167,19 +169,35 @@ const domainSuffixSet = new Set();
|
||||
|
||||
const START_TIME = Date.now();
|
||||
|
||||
const dudupedDominArray = domainDeduper(Array.from(domainSets));
|
||||
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
|
||||
|
||||
console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
|
||||
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
|
||||
|
||||
/** @type {Record<string, number>} */
|
||||
const rejectDomainsStats = dudupedDominArray.reduce((acc, cur) => {
|
||||
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false });
|
||||
if (suffix) {
|
||||
acc[suffix] = (acc[suffix] ?? 0) + 1;
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
// Create reject stats
|
||||
const getDomain = createCachedGorhillGetDomain(gorhill);
|
||||
/** @type {[string, number][]} */
|
||||
const rejectDomainsStats = traceSync(
|
||||
'* Collect reject domain stats',
|
||||
() => Object.entries(
|
||||
dudupedDominArray.reduce((acc, cur) => {
|
||||
const suffix = getDomain(cur);
|
||||
if (suffix) {
|
||||
acc[suffix] = (acc[suffix] ?? 0) + 1;
|
||||
}
|
||||
return acc;
|
||||
}, {})
|
||||
).filter(a => a[1] > 2).sort((a, b) => {
|
||||
const t = b[1] - a[1];
|
||||
if (t === 0) {
|
||||
return a[0].localeCompare(b[0]);
|
||||
}
|
||||
return t;
|
||||
})
|
||||
);
|
||||
|
||||
const domainSorter = createDomainSorter(gorhill);
|
||||
const domainset = traceSync('* Sort reject domainset', () => dudupedDominArray.sort(domainSorter));
|
||||
|
||||
const description = [
|
||||
'License: AGPL 3.0',
|
||||
@ -192,7 +210,6 @@ const domainSuffixSet = new Set();
|
||||
...HOSTS.map(host => ` - ${host[0]}`),
|
||||
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
|
||||
];
|
||||
const domainset = dudupedDominArray.sort(domainSorter);
|
||||
|
||||
await Promise.all([
|
||||
...createRuleset(
|
||||
@ -204,21 +221,11 @@ const domainSuffixSet = new Set();
|
||||
pathResolve(__dirname, '../List/domainset/reject.conf'),
|
||||
pathResolve(__dirname, '../Clash/domainset/reject.txt')
|
||||
),
|
||||
fs.promises.writeFile(
|
||||
pathResolve(__dirname, '../List/internal/reject-stats.txt'),
|
||||
Object.entries(rejectDomainsStats)
|
||||
.filter(a => a[1] > 1)
|
||||
.sort((a, b) => {
|
||||
const t = b[1] - a[1];
|
||||
if (t === 0) {
|
||||
return a[0].localeCompare(b[0]);
|
||||
}
|
||||
return t;
|
||||
})
|
||||
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`)
|
||||
.join('\n')
|
||||
compareAndWriteFile(
|
||||
rejectDomainsStats.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`),
|
||||
pathResolve(__dirname, '../List/internal/reject-stats.txt')
|
||||
),
|
||||
// Copy reject_sukka.conf for backward compatibility
|
||||
fse.copy(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), pathResolve(__dirname, '../List/domainset/reject_sukka.conf'))
|
||||
]);
|
||||
})();
|
||||
});
|
||||
|
||||
@ -42,7 +42,7 @@ runner(__filename, async () => {
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(promises);
|
||||
return Promise.all(promises);
|
||||
});
|
||||
|
||||
/**
|
||||
|
||||
@ -16,7 +16,7 @@ const fileExists = (path) => {
|
||||
.catch(() => false);
|
||||
};
|
||||
|
||||
runner(__filename, async () => {
|
||||
const downloadPreviousBuild = async () => {
|
||||
const filesList = ['Clash', 'List'];
|
||||
|
||||
let allFileExists = true;
|
||||
@ -79,4 +79,28 @@ runner(__filename, async () => {
|
||||
}));
|
||||
|
||||
await fs.promises.unlink(extractedPath).catch(() => { });
|
||||
};
|
||||
|
||||
const downloadPublicSuffixList = async () => {
|
||||
const publicSuffixDir = resolve(__dirname, '../node_modules/.cache');
|
||||
const publicSuffixPath = join(publicSuffixDir, 'public_suffix-list_dat.txt');
|
||||
|
||||
console.log('Download public suffix list.');
|
||||
|
||||
const [resp] = await Promise.all([
|
||||
fetch('https://publicsuffix.org/list/public_suffix_list.dat'),
|
||||
fse.ensureDir(publicSuffixDir)
|
||||
]);
|
||||
|
||||
await pipeline(
|
||||
Readable.fromWeb(resp.body),
|
||||
fs.createWriteStream(publicSuffixPath)
|
||||
);
|
||||
};
|
||||
|
||||
runner(__filename, () => {
|
||||
return Promise.all([
|
||||
downloadPreviousBuild(),
|
||||
downloadPublicSuffixList()
|
||||
]);
|
||||
});
|
||||
|
||||
@ -23,7 +23,7 @@ const createNode = (key, depth = 0) => ({
|
||||
});
|
||||
|
||||
/**
|
||||
* @param {string[]} keys
|
||||
* @param {string[] | Set<string>} keys
|
||||
*/
|
||||
const createKeywordFilter = (keys) => {
|
||||
const root = createNode('root');
|
||||
@ -39,16 +39,18 @@ const createKeywordFilter = (keys) => {
|
||||
const map = beginNode.children;
|
||||
// eslint-disable-next-line guard-for-in -- plain object
|
||||
for (const key in beginNode.children) {
|
||||
const node = map[key];
|
||||
const node = map?.[key];
|
||||
let failNode = beginNode.fail;
|
||||
|
||||
while (failNode && !failNode.children[key]) {
|
||||
while (failNode && !failNode.children?.[key]) {
|
||||
failNode = failNode.fail;
|
||||
}
|
||||
|
||||
node.fail = failNode?.children[key] || root;
|
||||
if (node) {
|
||||
node.fail = failNode?.children?.[key] || root;
|
||||
|
||||
queue.push(node);
|
||||
queue.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
idx++;
|
||||
@ -83,10 +85,9 @@ const createKeywordFilter = (keys) => {
|
||||
}
|
||||
};
|
||||
|
||||
for (let idx = 0, len = keys.length; idx < len; idx++) {
|
||||
const key = keys[idx];
|
||||
put(key, key.length);
|
||||
}
|
||||
keys.forEach(k => {
|
||||
put(k, k.length);
|
||||
});
|
||||
|
||||
build();
|
||||
|
||||
|
||||
47
Build/lib/cache-apply.js
Normal file
47
Build/lib/cache-apply.js
Normal file
@ -0,0 +1,47 @@
|
||||
/**
|
||||
* @param {string} [namespace]
|
||||
*/
|
||||
const createCache = (namespace, printStats = false) => {
|
||||
const cache = new Map();
|
||||
|
||||
let hit = 0;
|
||||
if (namespace && printStats) {
|
||||
process.on('exit', () => {
|
||||
console.log(`🔋 [cache] ${namespace} hit: ${hit}, size: ${cache.size}`);
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
/**
|
||||
* @template T
|
||||
* @param {string} key
|
||||
* @param {() => T} fn
|
||||
* @returns {T}
|
||||
*/
|
||||
sync(key, fn) {
|
||||
if (cache.has(key)) {
|
||||
hit++;
|
||||
return cache.get(key);
|
||||
}
|
||||
const value = fn();
|
||||
cache.set(key, value);
|
||||
return value;
|
||||
},
|
||||
/**
|
||||
* @template T
|
||||
* @param {string} key
|
||||
* @param {() => Promise<T>} fn
|
||||
* @returns {Promise<T>}
|
||||
*/
|
||||
async async(key, fn) {
|
||||
if (cache.has(key)) {
|
||||
hit++;
|
||||
return cache.get(key);
|
||||
}
|
||||
const value = await fn();
|
||||
cache.set(key, value);
|
||||
return value;
|
||||
}
|
||||
};
|
||||
};
|
||||
module.exports.createCache = createCache;
|
||||
25
Build/lib/cached-tld-parse.js
Normal file
25
Build/lib/cached-tld-parse.js
Normal file
@ -0,0 +1,25 @@
|
||||
const tldts = require('tldts');
|
||||
const { createCache } = require('./cache-apply');
|
||||
|
||||
const cache = createCache('cached-tld-parse', true);
|
||||
|
||||
const sharedConfig = { allowPrivateDomains: true };
|
||||
|
||||
/**
|
||||
* @param {string} domain
|
||||
* @returns {ReturnType<import('tldts').parse>}
|
||||
*/
|
||||
module.exports.parse = (domain) => {
|
||||
return cache.sync(domain, () => tldts.parse(domain, sharedConfig));
|
||||
};
|
||||
|
||||
const gothillGetDomainCache = createCache('cached-gorhill-get-domain', true);
|
||||
/**
|
||||
* @param {import('gorhill-publicsuffixlist').default | null} gorhill
|
||||
*/
|
||||
module.exports.createCachedGorhillGetDomain = (gorhill) => {
|
||||
/**
|
||||
* @param {string} domain
|
||||
*/
|
||||
return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
|
||||
};
|
||||
@ -32,10 +32,11 @@ async function compareAndWriteFile(linesA, filePath) {
|
||||
const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' });
|
||||
|
||||
for (let i = 0, len = linesA.length; i < len; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- backpressure
|
||||
await writeToStream(stream, linesA[i]);
|
||||
// eslint-disable-next-line no-await-in-loop -- backpressure
|
||||
await writeToStream(stream, '\n');
|
||||
const p = writeToStream(stream, `${linesA[i]}\n`);
|
||||
if (p) {
|
||||
// eslint-disable-next-line no-await-in-loop -- backpressure, besides we only wait for drain
|
||||
await p;
|
||||
}
|
||||
}
|
||||
stream.end();
|
||||
} else {
|
||||
@ -48,13 +49,13 @@ module.exports.compareAndWriteFile = compareAndWriteFile;
|
||||
* @param {import('fs').WriteStream} stream
|
||||
* @param {string} data
|
||||
*/
|
||||
async function writeToStream(stream, data) {
|
||||
function writeToStream(stream, data) {
|
||||
if (!stream.write(data)) {
|
||||
return /** @type {Promise<void>} */(new Promise((resolve) => {
|
||||
stream.once('drain', () => { resolve(); });
|
||||
stream.once('drain', resolve);
|
||||
}));
|
||||
}
|
||||
return Promise.resolve();
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
// @ts-check
|
||||
const Trie = require('./trie');
|
||||
|
||||
/**
|
||||
@ -13,8 +14,10 @@ const domainDeduper = (inputDomains) => {
|
||||
continue;
|
||||
}
|
||||
|
||||
// delete all included subdomains (ends with `.example.com`)
|
||||
trie.find(d, false).forEach(f => sets.delete(f));
|
||||
|
||||
// if `.example.com` exists, then `example.com` should also be removed
|
||||
const a = d.slice(1);
|
||||
if (trie.has(a)) {
|
||||
sets.delete(a);
|
||||
|
||||
@ -34,7 +34,7 @@ module.exports.createReadlineInterfaceFromResponse = createReadlineInterfaceFrom
|
||||
|
||||
/**
|
||||
* @param {import('undici').RequestInfo} url
|
||||
* @param {import('undici').RequestInit | undefined} [opt]
|
||||
* @param {import('undici').RequestInit} [opt]
|
||||
*/
|
||||
module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => {
|
||||
const resp = await fetchWithRetry(url, opt);
|
||||
|
||||
@ -1,4 +1,11 @@
|
||||
// @ts-check
|
||||
const { fetch } = require('undici');
|
||||
const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch));
|
||||
const undici = require('undici');
|
||||
|
||||
// Enable HTTP/2 supports
|
||||
undici.setGlobalDispatcher(new undici.Agent({
|
||||
allowH2: true,
|
||||
pipelining: 10
|
||||
}));
|
||||
|
||||
const fetchWithRetry = /** @type {import('undici').fetch} */(require('@vercel/fetch-retry')(undici.fetch));
|
||||
module.exports.fetchWithRetry = fetchWithRetry;
|
||||
|
||||
34
Build/lib/get-gorhill-publicsuffix.js
Normal file
34
Build/lib/get-gorhill-publicsuffix.js
Normal file
@ -0,0 +1,34 @@
|
||||
const { toASCII } = require('punycode/');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix-list_dat.txt');
|
||||
const getPublicSuffixListDat = () => {
|
||||
if (fs.existsSync(publicSuffixPath)) {
|
||||
return fs.promises.readFile(publicSuffixPath, 'utf-8');
|
||||
}
|
||||
console.log('public_suffix_list.dat not found, fetch directly from remote.');
|
||||
return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text());
|
||||
};
|
||||
|
||||
const getGorhillPublicSuffix = async () => {
|
||||
const customFetch = async (url) => {
|
||||
const buf = await fs.promises.readFile(url);
|
||||
return {
|
||||
arrayBuffer() { return Promise.resolve(buf.buffer); }
|
||||
};
|
||||
};
|
||||
|
||||
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
|
||||
getPublicSuffixListDat(),
|
||||
import('gorhill-publicsuffixlist')
|
||||
]);
|
||||
|
||||
gorhill.parse(publicSuffixListDat, toASCII);
|
||||
await gorhill.enableWASM({ customFetch });
|
||||
|
||||
return gorhill;
|
||||
};
|
||||
|
||||
const getGorhillPublicSuffixPromise = getGorhillPublicSuffix();
|
||||
module.exports.getGorhillPublicSuffixPromise = getGorhillPublicSuffixPromise;
|
||||
@ -1,13 +1,14 @@
|
||||
// @ts-check
|
||||
const { parse } = require('tldts');
|
||||
const tldts = require('./cached-tld-parse');
|
||||
|
||||
/**
|
||||
* @param {string} domain
|
||||
*/
|
||||
module.exports.isDomainLoose = (domain) => {
|
||||
const { isIcann, isPrivate, isIp } = parse(domain, { allowPrivateDomains: true });
|
||||
const { isIcann, isPrivate, isIp } = tldts.parse(domain);
|
||||
return !!(!isIp && (isIcann || isPrivate));
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} domain
|
||||
*/
|
||||
@ -16,12 +17,15 @@ module.exports.normalizeDomain = (domain) => {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { isIcann, isPrivate, hostname, isIp } = parse(domain, { allowPrivateDomains: true });
|
||||
const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
|
||||
if (isIp) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (isIcann || isPrivate) {
|
||||
if (hostname?.[0] === '.') {
|
||||
return hostname.slice(1);
|
||||
}
|
||||
return hostname;
|
||||
}
|
||||
|
||||
|
||||
@ -179,19 +179,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
|
||||
const downloadStart = performance.now();
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const signal = controller.signal;
|
||||
|
||||
/** @type string[] */
|
||||
filterRules = (
|
||||
await Promise.any(
|
||||
[filterRulesUrl, ...(fallbackUrls || [])].map(
|
||||
url => fetchWithRetry(url, { signal })
|
||||
.then(r => r.text())
|
||||
.then(text => {
|
||||
controller.abort();
|
||||
return text;
|
||||
})
|
||||
)
|
||||
[filterRulesUrl, ...(fallbackUrls || [])].map(async url => {
|
||||
const text = await fetchWithRetry(url, { signal: controller.signal }).then(r => r.text());
|
||||
controller.abort();
|
||||
return text;
|
||||
})
|
||||
)
|
||||
).split('\n').map(line => line.trim());
|
||||
} catch (e) {
|
||||
@ -317,10 +313,7 @@ function parse($line, includeThirdParties) {
|
||||
|
||||
if (
|
||||
// (line.startsWith('@@|') || line.startsWith('@@.'))
|
||||
(
|
||||
line[2] === '|'
|
||||
|| line[2] === '.'
|
||||
)
|
||||
(line[2] === '|' || line[2] === '.')
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
@ -374,7 +367,7 @@ function parse($line, includeThirdParties) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineStartsWithSingleDot = line.startsWith('.');
|
||||
const lineStartsWithSingleDot = line[0] === '.';
|
||||
if (
|
||||
lineStartsWithSingleDot
|
||||
&& (
|
||||
@ -437,7 +430,7 @@ function parse($line, includeThirdParties) {
|
||||
|
||||
return null;
|
||||
}
|
||||
const tryNormalizeDomain = normalizeDomain(lineStartsWithSingleDot ? line.slice(1) : line);
|
||||
const tryNormalizeDomain = normalizeDomain(line);
|
||||
if (
|
||||
tryNormalizeDomain
|
||||
&& (
|
||||
|
||||
@ -1,14 +1,4 @@
|
||||
// @ts-check
|
||||
const tldts = require('tldts');
|
||||
|
||||
const cache1 = Object.create(null);
|
||||
/**
|
||||
* @param {string} url
|
||||
* @returns {ReturnType<typeof tldts.parse>}
|
||||
*/
|
||||
// eslint-disable-next-line no-return-assign -- cache
|
||||
const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true }));
|
||||
|
||||
/**
|
||||
* @param {string | null} a
|
||||
* @param {string | null} b
|
||||
@ -49,32 +39,49 @@ const compare = (a, b) => {
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} a
|
||||
* @param {string} b
|
||||
* @returns {0 | 1 | -1}
|
||||
* @param {import('gorhill-publicsuffixlist').default | null} [gorhill]
|
||||
*/
|
||||
const domainSorter = (a, b) => {
|
||||
if (a === b) return 0;
|
||||
const createDomainSorter = (gorhill = null) => {
|
||||
const cached = require('./cached-tld-parse');
|
||||
|
||||
const aParsed = parse(a[0] === '.' ? a.slice(1) : a);
|
||||
const bParsed = parse(b[0] === '.' ? b.slice(1) : b);
|
||||
if (gorhill) {
|
||||
/**
|
||||
* @param {string} input
|
||||
*/
|
||||
const getDomain = cached.createCachedGorhillGetDomain(gorhill);
|
||||
|
||||
const resultDomainWithoutSuffix = compare(aParsed.domainWithoutSuffix, bParsed.domainWithoutSuffix);
|
||||
if (resultDomainWithoutSuffix !== 0) {
|
||||
return resultDomainWithoutSuffix;
|
||||
/**
|
||||
* @param {string} a
|
||||
* @param {string} b
|
||||
* @returns {0 | 1 | -1}
|
||||
*/
|
||||
return (a, b) => {
|
||||
if (a === b) return 0;
|
||||
|
||||
const aDomain = getDomain(a);
|
||||
const bDomain = getDomain(b);
|
||||
|
||||
const resultDomain = compare(aDomain, bDomain);
|
||||
return resultDomain !== 0 ? resultDomain : compare(a, b);
|
||||
};
|
||||
}
|
||||
|
||||
const resultSuffix = compare(aParsed.publicSuffix, bParsed.publicSuffix);
|
||||
if (resultSuffix !== 0) {
|
||||
return resultSuffix;
|
||||
}
|
||||
const tldts = cached;
|
||||
/**
|
||||
* @param {string} a
|
||||
* @param {string} b
|
||||
* @returns {0 | 1 | -1}
|
||||
*/
|
||||
return (a, b) => {
|
||||
if (a === b) return 0;
|
||||
|
||||
const resultSubdomain = compare(aParsed.subdomain, bParsed.subdomain);
|
||||
if (resultSubdomain !== 0) {
|
||||
return resultSubdomain;
|
||||
}
|
||||
const aDomain = tldts.parse(a).domain;
|
||||
const bDomain = tldts.parse(b).domain;
|
||||
|
||||
return 0;
|
||||
const resultDomain = compare(aDomain, bDomain);
|
||||
return resultDomain !== 0 ? resultDomain : compare(a, b);
|
||||
};
|
||||
};
|
||||
|
||||
module.exports = domainSorter;
|
||||
module.exports = createDomainSorter();
|
||||
module.exports.createDomainSorter = createDomainSorter;
|
||||
|
||||
@ -1,15 +1,42 @@
|
||||
const path = require('path');
|
||||
const { performance } = require('perf_hooks');
|
||||
|
||||
/**
|
||||
* @param {Function} fn
|
||||
* @param {string} __filename
|
||||
* @template T
|
||||
* @param {string} prefix
|
||||
* @param {() => T} fn
|
||||
* @returns {T}
|
||||
*/
|
||||
module.exports.runner = async (__filename, fn) => {
|
||||
const runnerName = path.basename(__filename, path.extname(__filename));
|
||||
|
||||
const start = Date.now();
|
||||
const result = await fn();
|
||||
const end = Date.now();
|
||||
console.log(`⌛ [${runnerName}]: ${end - start}ms`);
|
||||
const traceSync = (prefix, fn) => {
|
||||
const start = performance.now();
|
||||
const result = fn();
|
||||
const end = performance.now();
|
||||
console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
|
||||
return result;
|
||||
};
|
||||
module.exports.traceSync = traceSync;
|
||||
|
||||
/**
|
||||
* @template T
|
||||
* @param {string} prefix
|
||||
* @param {() => Promise<T>} fn
|
||||
* @returns {Promise<T>}
|
||||
*/
|
||||
const traceAsync = async (prefix, fn) => {
|
||||
const start = performance.now();
|
||||
const result = await fn();
|
||||
const end = performance.now();
|
||||
console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
|
||||
return result;
|
||||
};
|
||||
module.exports.traceAsync = traceAsync;
|
||||
|
||||
/**
|
||||
* @template T
|
||||
* @param {string} __filename
|
||||
* @param {() => Promise<T>} fn
|
||||
* @returns {T}
|
||||
*/
|
||||
module.exports.runner = async (__filename, fn) => {
|
||||
return traceAsync(`⌛ [${path.basename(__filename, path.extname(__filename))}]`, fn);
|
||||
};
|
||||
|
||||
36
Build/mod.d.ts
vendored
Normal file
36
Build/mod.d.ts
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
declare module 'gorhill-publicsuffixlist' {
|
||||
type Selfie =
|
||||
| string
|
||||
| {
|
||||
magic: number;
|
||||
buf32: number[];
|
||||
};
|
||||
interface Decoder {
|
||||
decode: (bufferStr: string, buffer: ArrayBuffer) => void;
|
||||
decodeSize: (bufferStr: string) => number;
|
||||
}
|
||||
interface Encoder {
|
||||
encode: (buffer: ArrayBuffer, length: number) => string;
|
||||
}
|
||||
export interface PublicSuffixList {
|
||||
version: string;
|
||||
|
||||
parse(text: string, toAscii: (input: string) => string): void;
|
||||
|
||||
getPublicSuffix(hostname: string): string;
|
||||
getDomain(hostname: string): string;
|
||||
|
||||
suffixInPSL(hostname: string): boolean;
|
||||
|
||||
toSelfie(encoder?: null | Encoder): Selfie;
|
||||
fromSelfie(selfie: Selfie, decoder?: null | Decoder): boolean;
|
||||
|
||||
enableWASM(options?: {
|
||||
customFetch?: null | ((url: URL) => Promise<Blob>);
|
||||
}): Promise<boolean>;
|
||||
disableWASM(): Promise<boolean>;
|
||||
}
|
||||
|
||||
const psl: PublicSuffixList;
|
||||
export default psl;
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
// Surge Domain Set can not include root domain from public suffix list.
|
||||
|
||||
const tldts = require('tldts');
|
||||
const tldts = require('tldts'); // hit ratio way too low, dont cache
|
||||
const picocolors = require('picocolors');
|
||||
const path = require('path');
|
||||
const listDir = require('@sukka/listdir');
|
||||
@ -21,7 +21,7 @@ const validateDomainSet = async (filePath) => {
|
||||
if (!line) {
|
||||
continue;
|
||||
}
|
||||
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
|
||||
const domain = line[0] === '.' ? line.slice(1) : line;
|
||||
const parsed = tldts.parse(domain, { allowPrivateDomains: true, detectIp: false });
|
||||
|
||||
if (
|
||||
|
||||
@ -158,9 +158,11 @@
|
||||
"ci-info": "^3.8.0",
|
||||
"cidr-tools-wasm": "^0.0.11",
|
||||
"fs-extra": "^11.1.1",
|
||||
"gorhill-publicsuffixlist": "github:gorhill/publicsuffixlist.js",
|
||||
"mnemonist": "^0.39.5",
|
||||
"path-scurry": "^1.10.1",
|
||||
"picocolors": "^1.0.0",
|
||||
"punycode": "^2.3.0",
|
||||
"table": "^6.8.1",
|
||||
"tar": "^6.2.0",
|
||||
"tldts": "^6.0.14",
|
||||
|
||||
19
pnpm-lock.yaml
generated
19
pnpm-lock.yaml
generated
@ -34,6 +34,9 @@ dependencies:
|
||||
fs-extra:
|
||||
specifier: ^11.1.1
|
||||
version: 11.1.1
|
||||
gorhill-publicsuffixlist:
|
||||
specifier: github:gorhill/publicsuffixlist.js
|
||||
version: github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978
|
||||
mnemonist:
|
||||
specifier: ^0.39.5
|
||||
version: 0.39.5
|
||||
@ -43,6 +46,9 @@ dependencies:
|
||||
picocolors:
|
||||
specifier: ^1.0.0
|
||||
version: 1.0.0
|
||||
punycode:
|
||||
specifier: ^2.3.0
|
||||
version: 2.3.0
|
||||
table:
|
||||
specifier: ^6.8.1
|
||||
version: 6.8.1
|
||||
@ -1318,8 +1324,8 @@ packages:
|
||||
signal-exit: 3.0.7
|
||||
dev: true
|
||||
|
||||
/punycode@2.1.1:
|
||||
resolution: {integrity: sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==}
|
||||
/punycode@2.3.0:
|
||||
resolution: {integrity: sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==}
|
||||
engines: {node: '>=6'}
|
||||
|
||||
/queue-microtask@1.2.3:
|
||||
@ -1571,7 +1577,7 @@ packages:
|
||||
/uri-js@4.4.1:
|
||||
resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==}
|
||||
dependencies:
|
||||
punycode: 2.1.1
|
||||
punycode: 2.3.0
|
||||
|
||||
/webidl-conversions@3.0.1:
|
||||
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
|
||||
@ -1661,3 +1667,10 @@ packages:
|
||||
resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==}
|
||||
engines: {node: '>=10'}
|
||||
dev: true
|
||||
|
||||
github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978:
|
||||
resolution: {tarball: https://codeload.github.com/gorhill/publicsuffixlist.js/tar.gz/3a1bc623073079184ff76933b88b7bf4f5d48978}
|
||||
name: '@gorhill/publicsuffixlist'
|
||||
version: 3.0.1
|
||||
engines: {node: '>=14.0.0', npm: '>=6.14.4'}
|
||||
dev: false
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user