Refactor: simplify build infra

This commit is contained in:
SukkaW 2023-09-14 22:34:25 +08:00
parent 2448cbe39a
commit 573c0f5274
22 changed files with 127 additions and 132 deletions

View File

@ -4,7 +4,7 @@ const { isIPv4, isIPv6 } = require('net');
const { createRuleset } = require('./lib/create-file');
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const buildAntiBogusDomain = task(__filename, async () => {
/** @type {string[]} */
@ -61,5 +61,5 @@ const buildAntiBogusDomain = task(__filename, async () => {
module.exports.buildAntiBogusDomain = buildAntiBogusDomain;
if (require.main === module) {
runner(__filename, buildAntiBogusDomain);
buildAntiBogusDomain();
}

View File

@ -1,7 +1,7 @@
const path = require('path');
const { createRuleset } = require('./lib/create-file');
const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const buildAppleCdn = task(__filename, async () => {
const res = await parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf');
@ -45,5 +45,5 @@ const buildAppleCdn = task(__filename, async () => {
module.exports.buildAppleCdn = buildAppleCdn;
if (require.main === module) {
runner(__filename, buildAppleCdn);
buildAppleCdn();
}

View File

@ -3,7 +3,7 @@ const path = require('path');
const { createRuleset } = require('./lib/create-file');
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
const Trie = require('./lib/trie');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const fs = require('fs');
const { processLine } = require('./lib/process-line');
@ -50,7 +50,6 @@ const buildCdnConf = task(__filename, async () => {
const cdnDomainsList = [];
for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/cdn.conf'))) {
if (l === '# --- [AWS S3 Replace Me] ---') {
console.log(S3OSSDomains);
S3OSSDomains.forEach(domain => {
cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`);
});
@ -83,5 +82,5 @@ const buildCdnConf = task(__filename, async () => {
module.exports.buildCdnConf = buildCdnConf;
if (require.main === module) {
runner(__filename, buildCdnConf);
buildCdnConf();
}

View File

@ -4,7 +4,7 @@ const { resolve: pathResolve } = require('path');
// This should not use `createRuleset` API since we are going to generate ipcidr for Clash
const { compareAndWriteFile, withBannerArray } = require('./lib/create-file');
const { processLine } = require('./lib/process-line');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
// https://github.com/misakaio/chnroutes2/issues/25
const EXCLUDE_CIDRS = [
@ -61,5 +61,5 @@ const buildChnCidr = task(__filename, async () => {
module.exports.buildChnCidr = buildChnCidr;
if (require.main === module) {
runner(__filename, buildChnCidr);
buildChnCidr();
}

View File

@ -6,7 +6,7 @@ const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line');
const { createRuleset } = require('./lib/create-file');
const { domainDeduper } = require('./lib/domain-deduper');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const MAGIC_COMMAND_SKIP = '# $ custom_build_script';
const MAGIC_COMMAND_TITLE = '# $ meta_title ';
@ -48,7 +48,7 @@ const buildCommon = task(__filename, async () => {
module.exports.buildCommon = buildCommon;
if (require.main === module) {
runner(__filename, buildCommon);
buildCommon();
}
/**

View File

@ -5,7 +5,7 @@ const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line');
const { compareAndWriteFile, createRuleset } = require('./lib/create-file');
const domainSorter = require('./lib/stable-sort-domain');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const buildDomesticRuleset = task(__filename, async () => {
const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'));
@ -72,5 +72,5 @@ const buildDomesticRuleset = task(__filename, async () => {
module.exports.buildDomesticRuleset = buildDomesticRuleset;
if (require.main === module) {
runner(__filename, buildDomesticRuleset);
buildDomesticRuleset();
}

View File

@ -5,7 +5,7 @@ const tldts = require('tldts');
const { processLine } = require('./lib/process-line');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const domainSorter = require('./lib/stable-sort-domain');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const { compareAndWriteFile } = require('./lib/create-file');
/**
@ -92,5 +92,5 @@ const buildInternalCDNDomains = task(__filename, async () => {
module.exports.buildInternalCDNDomains = buildInternalCDNDomains;
if (require.main === module) {
runner(__filename, buildInternalCDNDomains);
buildInternalCDNDomains();
}

View File

@ -2,7 +2,7 @@
const path = require('path');
const fse = require('fs-extra');
const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const { compareAndWriteFile } = require('./lib/create-file');
const buildInternalChnDomains = task(__filename, async () => {
@ -20,5 +20,5 @@ const buildInternalChnDomains = task(__filename, async () => {
module.exports.buildInternalChnDomains = buildInternalChnDomains;
if (require.main === module) {
runner(__filename, buildInternalChnDomains);
buildInternalChnDomains();
}

View File

@ -4,7 +4,7 @@ const { processLine } = require('./lib/process-line');
const path = require('path');
const fse = require('fs-extra');
const fs = require('fs');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const RESERVED_IPV4_CIDR = [
'0.0.0.0/8',
@ -52,5 +52,5 @@ const buildInternalReverseChnCIDR = task(__filename, async () => {
module.exports.buildInternalReverseChnCIDR = buildInternalReverseChnCIDR;
if (require.main === module) {
runner(__filename, buildInternalReverseChnCIDR);
buildInternalReverseChnCIDR();
}

View File

@ -4,7 +4,7 @@ const path = require('path');
const { createRuleset } = require('./lib/create-file');
const { processLine } = require('./lib/process-line.js');
const domainSorter = require('./lib/stable-sort-domain');
const { runner, traceSync, task } = require('./lib/trace-runner.js');
const { traceSync, task } = require('./lib/trace-runner.js');
const WHITELIST_DOMAIN = new Set([
'w3s.link',
@ -61,67 +61,76 @@ const BLACK_TLD = new Set([
]);
const buildPhishingDomainSet = task(__filename, async () => {
const domainSet = Array.from((await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black);
const domainSet = Array.from((await processFilterRules(
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// [
// 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
// 'https://malware-filter.pages.dev/phishing-filter-agh.txt',
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// ]
)).black);
const domainCountMap = {};
for (let i = 0, len = domainSet.length; i < len; i++) {
const line = processLine(domainSet[i]);
if (!line) continue;
traceSync('* process domain set', () => {
for (let i = 0, len = domainSet.length; i < len; i++) {
const line = processLine(domainSet[i]);
if (!line) continue;
const parsed = tldts.parse(line, { allowPrivateDomains: true });
const apexDomain = parsed.domain;
const parsed = tldts.parse(line, { allowPrivateDomains: true });
const apexDomain = parsed.domain;
if (apexDomain) {
if (WHITELIST_DOMAIN.has(apexDomain)) {
continue;
}
domainCountMap[apexDomain] ||= 0;
let isPhishingDomainMockingAmazon = false;
if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;
isPhishingDomainMockingAmazon = true;
if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
if (apexDomain) {
if (WHITELIST_DOMAIN.has(apexDomain)) {
continue;
}
} else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25;
}
if (line.includes('-co-jp')) {
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
}
const tld = parsed.publicSuffix;
if (!tld || !BLACK_TLD.has(tld)) continue;
domainCountMap[apexDomain] ||= 0;
domainCountMap[apexDomain] += 1;
let isPhishingDomainMockingAmazon = false;
if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;
if (line.length > 19) {
// Add more weight if the domain is long enough
if (line.length > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (line.length > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (line.length > 29) {
domainCountMap[apexDomain] += 1.5;
} else if (line.length > 24) {
domainCountMap[apexDomain] += 0.75;
} else if (line.length > 19) {
isPhishingDomainMockingAmazon = true;
if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
}
} else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25;
}
if (line.includes('-co-jp')) {
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
}
if (domainCountMap[apexDomain] < 5) {
const subdomain = parsed.subdomain;
if (subdomain && subdomain.includes('.')) {
const tld = parsed.publicSuffix;
if (!tld || !BLACK_TLD.has(tld)) continue;
domainCountMap[apexDomain] += 1;
if (line.length > 19) {
// Add more weight if the domain is long enough
if (line.length > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (line.length > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (line.length > 29) {
domainCountMap[apexDomain] += 1.5;
} else if (line.length > 24) {
domainCountMap[apexDomain] += 0.75;
} else if (line.length > 19) {
domainCountMap[apexDomain] += 0.25;
}
if (domainCountMap[apexDomain] < 5) {
const subdomain = parsed.subdomain;
if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1.5;
}
}
}
}
}
}
});
const results = traceSync('* get final results', () => Object.entries(domainCountMap)
.reduce((acc, [apexDomain, count]) => {
@ -156,5 +165,5 @@ const buildPhishingDomainSet = task(__filename, async () => {
module.exports.buildPhishingDomainSet = buildPhishingDomainSet;
if (require.main === module) {
runner(__filename, buildPhishingDomainSet);
buildPhishingDomainSet();
}

View File

@ -2,7 +2,7 @@ const listDir = require('@sukka/listdir');
const path = require('path');
const fs = require('fs');
const fse = require('fs-extra');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const rootPath = path.resolve(__dirname, '../');
const publicPath = path.resolve(__dirname, '../public');
@ -34,7 +34,7 @@ const buildPublicHtml = task(__filename, async () => {
module.exports.buildPublicHtml = buildPublicHtml;
if (require.main === module) {
runner(__filename, buildPublicHtml);
buildPublicHtml();
}
/**

View File

@ -12,7 +12,7 @@ const { domainDeduper } = require('./lib/domain-deduper');
const createKeywordFilter = require('./lib/aho-corasick');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { createDomainSorter } = require('./lib/stable-sort-domain');
const { traceSync, runner, task } = require('./lib/trace-runner');
const { traceSync, task } = require('./lib/trace-runner');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
@ -23,7 +23,7 @@ const domainKeywordsSet = new Set();
/** @type {Set<string>} Dedupe domains included by DOMAIN-SUFFIX */
const domainSuffixSet = new Set();
const buildRejectDomainSet = task(__dirname, async () => {
const buildRejectDomainSet = task(__filename, async () => {
/** @type Set<string> */
const domainSets = new Set();
@ -167,11 +167,7 @@ const buildRejectDomainSet = task(__dirname, async () => {
// Dedupe domainSets
console.log(`Start deduping! (${previousSize})`);
const START_TIME = Date.now();
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
// Create reject stats
@ -189,10 +185,11 @@ const buildRejectDomainSet = task(__dirname, async () => {
}, {})
).filter(a => a[1] > 2).sort((a, b) => {
const t = b[1] - a[1];
if (t === 0) {
return a[0].localeCompare(b[0]);
if (t !== 0) {
return t;
}
return t;
return a[0].localeCompare(b[0]);
})
);
@ -233,5 +230,5 @@ const buildRejectDomainSet = task(__dirname, async () => {
module.exports.buildRejectDomainSet = buildRejectDomainSet;
if (require.main === module) {
runner(__filename, buildRejectDomainSet);
buildRejectDomainSet();
}

View File

@ -5,7 +5,7 @@ const { createRuleset } = require('./lib/create-file');
const domainSorter = require('./lib/stable-sort-domain');
const { Sema } = require('async-sema');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const s = new Sema(2);
/**
@ -128,5 +128,5 @@ const buildSpeedtestDomainSet = task(__filename, async () => {
module.exports.buildSpeedtestDomainSet = buildSpeedtestDomainSet;
if (require.main === module) {
runner(__filename, buildSpeedtestDomainSet);
buildSpeedtestDomainSet();
}

View File

@ -4,7 +4,7 @@ const path = require('path');
const { isIPv4, isIPv6 } = require('net');
const { processLine } = require('./lib/process-line');
const { createRuleset } = require('./lib/create-file');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const buildTelegramCIDR = task(__filename, async () => {
/** @type {Response} */
@ -54,5 +54,5 @@ const buildTelegramCIDR = task(__filename, async () => {
module.exports.buildTelegramCIDR = buildTelegramCIDR;
if (require.main === module) {
runner(__filename, buildTelegramCIDR);
buildTelegramCIDR();
}

View File

@ -8,7 +8,7 @@ const { Readable } = require('stream');
const { pipeline } = require('stream/promises');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { isCI } = require('ci-info');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const fileExists = (path) => {
return fs.promises.access(path, fs.constants.F_OK)
@ -81,7 +81,7 @@ const downloadPreviousBuild = task(__filename, async () => {
await fs.promises.unlink(extractedPath).catch(() => { });
});
const downloadPublicSuffixList = async () => {
const downloadPublicSuffixList = task(__filename, async () => {
const publicSuffixDir = resolve(__dirname, '../node_modules/.cache');
const publicSuffixPath = join(publicSuffixDir, 'public_suffix_list_dat.txt');
@ -96,14 +96,14 @@ const downloadPublicSuffixList = async () => {
Readable.fromWeb(resp.body),
fs.createWriteStream(publicSuffixPath)
);
};
}, 'download-publicsuffixlist');
module.exports.downloadPreviousBuild = downloadPreviousBuild;
module.exports.downloadPublicSuffixList = downloadPublicSuffixList;
if (require.main === module) {
runner(__filename, () => Promise.all([
Promise.all([
downloadPreviousBuild(),
downloadPublicSuffixList()
]));
]);
}

View File

@ -1,6 +1,5 @@
// @ts-check
const fs = require('fs');
const fse = require('fs-extra');
const { readFileByLine } = require('./fetch-remote-text-by-line');
const { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } = require('./clash');
@ -9,26 +8,33 @@ const { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset
* @param {string} filePath
*/
async function compareAndWriteFile(linesA, filePath) {
await fse.ensureFile(filePath);
let isEqual = true;
let index = 0;
if (!fs.existsSync(filePath)) {
console.log(`${filePath} does not exists, writing...`);
isEqual = false;
} else {
let index = 0;
for await (const lineB of readFileByLine(filePath)) {
const lineA = linesA[index];
index++;
for await (const lineB of readFileByLine(filePath)) {
const lineA = linesA[index];
index++;
if (lineA[0] === '#' && lineB[0] === '#') {
continue;
if (lineA[0] === '#' && lineB[0] === '#') {
continue;
}
if (lineA !== lineB) {
isEqual = false;
break;
}
}
if (lineA !== lineB) {
if (index !== linesA.length) {
isEqual = false;
break;
}
}
if (!isEqual || index !== linesA.length) {
if (!isEqual) {
const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' });
for (let i = 0, len = linesA.length; i < len; i++) {

View File

@ -1,14 +1,5 @@
// @ts-check
const tldts = require('./cached-tld-parse');
/**
* @param {string} domain
*/
module.exports.isDomainLoose = (domain) => {
const { isIcann, isPrivate, isIp } = tldts.parse(domain);
return !!(!isIp && (isIcann || isPrivate));
};
/**
* @param {string | null | undefined} domain
*/

View File

@ -1,5 +1,10 @@
const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
const { isDomainLoose } = require('./is-domain-loose');
const tldts = require('tldts');
const isDomainLoose = (domain) => {
const { isIcann, isPrivate, isIp } = tldts.parse(domain);
return !!(!isIp && (isIcann || isPrivate));
};
/**
* @param {string | URL} url

View File

@ -93,9 +93,6 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
return domainSets;
}
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN = /[#&%~=]/;
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)/;
/**
* @param {string | URL} filterRulesUrl
* @param {readonly (string | URL)[] | undefined} [fallbackUrls]
@ -197,8 +194,7 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
downloadTime = performance.now() - downloadStart;
for (let i = 0, len = filterRules.length; i < len; i++) {
const line = filterRules[i].trim();
lineCb(line);
lineCb(filterRules[i].trim());
}
}
@ -212,6 +208,9 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
};
}
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN = /[#&%~=]/;
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)/;
/**
* @param {string} $line
* @param {boolean} includeThirdParties

View File

@ -42,13 +42,11 @@ const compare = (a, b) => {
* @param {import('gorhill-publicsuffixlist').default | null} [gorhill]
*/
const createDomainSorter = (gorhill = null) => {
const cached = require('./cached-tld-parse');
if (gorhill) {
/**
* @param {string} input
*/
const getDomain = cached.createCachedGorhillGetDomain(gorhill);
const getDomain = require('./cached-tld-parse').createCachedGorhillGetDomain(gorhill);
/**
* @param {string} a
@ -66,7 +64,7 @@ const createDomainSorter = (gorhill = null) => {
};
}
const tldts = cached;
const tldts = require('./cached-tld-parse');
/**
* @param {string} a
* @param {string} b

View File

@ -36,19 +36,10 @@ module.exports.traceAsync = traceAsync;
* @template T
* @param {string} __filename
* @param {() => Promise<T>} fn
* @returns {Promise<T>}
* @param {string | null} [customname]
*/
module.exports.runner = async (__filename, fn) => {
return traceAsync(`⌛ [${path.basename(__filename, path.extname(__filename))}]`, fn);
};
/**
* @template T
* @param {string} __filename
* @param {() => Promise<T>} fn
*/
module.exports.task = (__filename, fn) => {
const taskName = path.basename(__filename, path.extname(__filename));
module.exports.task = (__filename, fn, customname = null) => {
const taskName = customname ?? path.basename(__filename, path.extname(__filename));
return () => {
console.log(`🏃 [${taskName}] Start executing`);
return traceAsync(`✅ [${taskName}] Executed successfully`, fn);

View File

@ -6,7 +6,7 @@ const path = require('path');
const listDir = require('@sukka/listdir');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line');
const { runner, task } = require('./lib/trace-runner');
const { task } = require('./lib/trace-runner');
const SPECIAL_SUFFIXES = new Set([
'linodeobjects.com', // only *.linodeobjects.com are public suffix
@ -71,5 +71,5 @@ const validate = task(__filename, async () => {
module.exports.validate = validate;
if (require.main === module) {
runner(__filename, validate);
validate();
}