Chore: simplify build infra

This commit is contained in:
SukkaW 2023-12-10 23:55:05 +08:00
parent dc8ba51257
commit a0a772d2e1
12 changed files with 137 additions and 114 deletions

View File

@ -1,21 +1,20 @@
// @ts-check // @ts-check
import path from 'path'; import path from 'path';
import { isIPv4, isIPv6 } from 'net';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { task } from './lib/trace-runner'; import { task } from './lib/trace-runner';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
const getBogusNxDomainIPs = async () => { const getBogusNxDomainIPs = async () => {
/** @type {string[]} */ const result: string[] = [];
const result = [];
for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) { for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
if (line.startsWith('bogus-nxdomain=')) { if (line && line.startsWith('bogus-nxdomain=')) {
const ip = line.slice(15).trim(); const ip = line.slice(15).trim();
if (isIPv4(ip)) { if (isProbablyIpv4(ip)) {
result.push(`IP-CIDR,${ip}/32,no-resolve`); result.push(`IP-CIDR,${ip}/32,no-resolve`);
} else if (isIPv6(ip)) { } else if (isProbablyIpv6(ip)) {
result.push(`IP-CIDR6,${ip}/128,no-resolve`); result.push(`IP-CIDR6,${ip}/128,no-resolve`);
} }
} }
@ -26,21 +25,16 @@ const getBogusNxDomainIPs = async () => {
export const buildAntiBogusDomain = task(import.meta.path, async () => { export const buildAntiBogusDomain = task(import.meta.path, async () => {
const bogusIpPromise = getBogusNxDomainIPs(); const bogusIpPromise = getBogusNxDomainIPs();
/** @type {string[]} */ const result: string[] = [];
const result = [];
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/ip/reject.conf'))) { for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/ip/reject.conf'))) {
if (line === '# --- [Anti Bogus Domain Replace Me] ---') { const l = processLine(line);
// bogus ip is less than 200, no need to worry about "Maximum call stack size exceeded" if (l) {
result.push(...(await bogusIpPromise)); result.push(l);
continue;
} else {
const l = processLine(line);
if (l) {
result.push(l);
}
} }
} }
result.push(...(await bogusIpPromise));
const description = [ const description = [
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',

View File

@ -63,16 +63,14 @@ const buildCdnConf = task(import.meta.path, async () => {
const getS3OSSDomainsPromise: Promise<Set<string>> = getS3OSSDomains(); const getS3OSSDomainsPromise: Promise<Set<string>> = getS3OSSDomains();
for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/cdn.conf'))) { for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/cdn.conf'))) {
if (l === '# --- [AWS S3 Replace Me] ---') {
(await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
continue;
}
const line = processLine(l); const line = processLine(l);
if (line) { if (line) {
cdnDomainsList.push(line); cdnDomainsList.push(line);
} }
} }
(await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
const description: string[] = [ const description: string[] = [
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',

View File

@ -22,23 +22,25 @@ export const buildCommon = task(import.meta.path, async () => {
const pw = new PathScurry(sourceDir); const pw = new PathScurry(sourceDir);
for await (const entry of pw) { for await (const entry of pw) {
if (entry.isFile()) { if (!entry.isFile()) {
if (path.extname(entry.name) === '.js') { continue;
continue; }
}
const relativePath = entry.relative(); if (path.extname(entry.name) === '.js') {
if (relativePath.startsWith('domainset/')) { continue;
promises.push(transformDomainset(entry.fullpath(), relativePath)); }
continue;
} const relativePath = entry.relative();
if ( if (relativePath.startsWith('domainset/')) {
relativePath.startsWith('ip/') promises.push(transformDomainset(entry.fullpath(), relativePath));
continue;
}
if (
relativePath.startsWith('ip/')
|| relativePath.startsWith('non_ip/') || relativePath.startsWith('non_ip/')
) { ) {
promises.push(transformRuleset(entry.fullpath(), relativePath)); promises.push(transformRuleset(entry.fullpath(), relativePath));
continue; continue;
}
} }
} }

View File

@ -15,9 +15,6 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
const set = new Set<string>(); const set = new Set<string>();
const keywords = new Set<string>(); const keywords = new Set<string>();
const gorhill = await getGorhillPublicSuffixPromise();
const domainSorter = createDomainSorter(gorhill);
const addApexDomain = (input: string) => { const addApexDomain = (input: string) => {
// We are including the private domains themselves // We are including the private domains themselves
const d = tldts.getDomain(input, { allowPrivateDomains: false }); const d = tldts.getDomain(input, { allowPrivateDomains: false });
@ -61,7 +58,8 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
} }
}; };
await Promise.all([ const [domainSorter] = await Promise.all([
getGorhillPublicSuffixPromise().then(createDomainSorter),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')),

View File

@ -18,15 +18,12 @@ import * as tldts from 'tldts';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { getPhishingDomains } from './lib/get-phishing-domains'; import { getPhishingDomains } from './lib/get-phishing-domains';
/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
/** @type {Set<string>} Dedupe domains inclued by DOMAIN-KEYWORD */
const domainKeywordsSet = new Set<string>();
/** @type {Set<string>} Dedupe domains included by DOMAIN-SUFFIX */
const domainSuffixSet = new Set<string>();
export const buildRejectDomainSet = task(import.meta.path, async () => { export const buildRejectDomainSet = task(import.meta.path, async () => {
/** @type Set<string> */ /** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
const domainKeywordsSet = new Set<string>();
const domainSuffixSet = new Set<string>();
const domainSets = new Set<string>(); const domainSets = new Set<string>();
// Parse from AdGuard Filters // Parse from AdGuard Filters
@ -38,9 +35,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
// Parse from remote hosts & domain lists // Parse from remote hosts & domain lists
...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => { ...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => {
hosts.forEach(host => { hosts.forEach(host => {
if (host) { domainSets.add(host);
domainSets.add(host);
}
}); });
})), })),
...ADGUARD_FILTERS.map(input => { ...ADGUARD_FILTERS.map(input => {
@ -61,12 +56,8 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt', 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt',
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt' 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
].map(input => processFilterRules(input).then(({ white, black }) => { ].map(input => processFilterRules(input).then(({ white, black }) => {
white.forEach(i => { white.forEach(i => filterRuleWhitelistDomainSets.add(i));
filterRuleWhitelistDomainSets.add(i); black.forEach(i => filterRuleWhitelistDomainSets.add(i));
});
black.forEach(i => {
filterRuleWhitelistDomainSets.add(i);
});
}))), }))),
getPhishingDomains().then(([purePhishingDomains, fullDomainSet]) => { getPhishingDomains().then(([purePhishingDomains, fullDomainSet]) => {
fullDomainSet.forEach(host => { fullDomainSet.forEach(host => {
@ -74,10 +65,16 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
domainSets.add(host); domainSets.add(host);
} }
}); });
purePhishingDomains.forEach(suffix => { purePhishingDomains.forEach(suffix => domainSets.add(`.${suffix}`));
domainSets.add(`.${suffix}`); }),
}); (async () => {
}) for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) {
const l = processLine(line);
if (l) {
domainSets.add(l);
}
}
})()
]); ]);
// remove pre-defined enforced blacklist from whitelist // remove pre-defined enforced blacklist from whitelist
@ -94,17 +91,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
} }
let previousSize = domainSets.size; let previousSize = domainSets.size;
console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules!`); console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) {
const l = processLine(line);
if (l) {
domainSets.add(l);
}
}
previousSize = domainSets.size - previousSize;
console.log(`Import ${previousSize} rules from reject_sukka.conf!`);
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) { for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) {
const [type, keyword] = line.split(','); const [type, keyword] = line.split(',');
@ -150,11 +137,11 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
domainSets.delete(domain); domainSets.delete(domain);
} }
} }
console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
}); });
console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
previousSize = domainSets.size; previousSize = domainSets.size;
// Dedupe domainSets // Dedupe domainSets
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets))); const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`); console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
@ -180,9 +167,6 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
}) })
); );
const domainSorter = createDomainSorter(gorhill);
const domainset = traceSync('* Sort reject domainset', () => dudupedDominArray.sort(domainSorter));
const description = [ const description = [
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
@ -198,7 +182,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
'Sukka\'s Ruleset - Reject Base', 'Sukka\'s Ruleset - Reject Base',
description, description,
new Date(), new Date(),
domainset, traceSync('* Sort reject domainset', () => dudupedDominArray.sort(createDomainSorter(gorhill))),
'domainset', 'domainset',
path.resolve(import.meta.dir, '../List/domainset/reject.conf'), path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt') path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')

View File

@ -2,14 +2,14 @@
import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line'; import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
import path from 'path'; import path from 'path';
import { isIPv4, isIPv6 } from 'net'; import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { task } from './lib/trace-runner'; import { task } from './lib/trace-runner';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
export const buildTelegramCIDR = task(import.meta.path, async () => { export const buildTelegramCIDR = task(import.meta.path, async () => {
const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit); const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit) as Response;
const lastModified = resp.headers.get('last-modified'); const lastModified = resp.headers.get('last-modified');
const date = lastModified ? new Date(lastModified) : new Date(); const date = lastModified ? new Date(lastModified) : new Date();
@ -20,10 +20,10 @@ export const buildTelegramCIDR = task(import.meta.path, async () => {
if (!cidr) continue; if (!cidr) continue;
const [subnet] = cidr.split('/'); const [subnet] = cidr.split('/');
if (isIPv4(subnet)) { if (isProbablyIpv4(subnet)) {
results.push(`IP-CIDR,${cidr},no-resolve`); results.push(`IP-CIDR,${cidr},no-resolve`);
} }
if (isIPv6(subnet)) { if (isProbablyIpv6(subnet)) {
results.push(`IP-CIDR6,${cidr},no-resolve`); results.push(`IP-CIDR6,${cidr},no-resolve`);
} }
} }

View File

@ -2,6 +2,7 @@
import { readFileByLine } from './fetch-text-by-line'; import { readFileByLine } from './fetch-text-by-line';
import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash';
import { traceAsync } from './trace-runner'; import { traceAsync } from './trace-runner';
import picocolors from 'picocolors';
export async function compareAndWriteFile(linesA: string[], filePath: string) { export async function compareAndWriteFile(linesA: string[], filePath: string) {
let isEqual = true; let isEqual = true;
@ -45,11 +46,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
} }
if (isEqual) { if (isEqual) {
console.log(`Same Content, bail out writing: ${filePath}`); console.log(picocolors.gray(`Same Content, bail out writing: ${filePath}`));
return; return;
} }
await traceAsync(`Writing ${filePath}`, async () => { await traceAsync(picocolors.gray(`Writing ${filePath}`), async () => {
if (linesALen < 10000) { if (linesALen < 10000) {
return Bun.write(file, `${linesA.join('\n')}\n`); return Bun.write(file, `${linesA.join('\n')}\n`);
} }
@ -63,7 +64,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
await writer.flush(); await writer.flush();
return writer.end(); return writer.end();
}); }, picocolors.gray);
} }
export const withBannerArray = (title: string, description: string[], date: Date, content: string[]) => { export const withBannerArray = (title: string, description: string[], date: Date, content: string[]) => {

View File

@ -1,4 +1,5 @@
import retry from 'async-retry'; import retry from 'async-retry';
import picocolors from 'picocolors';
// retry settings // retry settings
const MIN_TIMEOUT = 10; const MIN_TIMEOUT = 10;
@ -86,7 +87,7 @@ function createFetchRetry($fetch: typeof fetch): typeof fetch {
err.name === 'AbortError' err.name === 'AbortError'
|| ('digest' in err && err.digest === 'AbortError') || ('digest' in err && err.digest === 'AbortError')
) { ) {
console.log('[fetch abort]', url.toString()); console.log(picocolors.gray('[fetch abort]'), picocolors.gray(url.toString()));
return bail(err); return bail(err);
} }
} }

View File

@ -31,3 +31,47 @@ export function isProbablyIpv4(hostname: string): boolean {
&& /* '.' */ hostname.charCodeAt(hostname.length - 1) !== 46 /* '.' */ && /* '.' */ hostname.charCodeAt(hostname.length - 1) !== 46 /* '.' */
); );
} }
export function isProbablyIpv6(hostname: string): boolean {
if (hostname.length < 3) {
return false;
}
let start = hostname[0] === '[' ? 1 : 0;
let end = hostname.length;
if (hostname[end - 1] === ']') {
end -= 1;
}
// We only consider the maximum size of a normal IPV6. Note that this will
// fail on so-called "IPv4 mapped IPv6 addresses" but this is a corner-case
// and a proper validation library should be used for these.
if (end - start > 39) {
return false;
}
/* eslint-disable sukka/no-single-return -- here it goes */
let hasColon = false;
for (; start < end; start += 1) {
const code = hostname.charCodeAt(start);
if (code === 58 /* ':' */) {
hasColon = true;
} else if (
!(
(
(code >= 48 && code <= 57) // 0-9
|| (code >= 97 && code <= 102) // a-f
|| (code >= 65 && code <= 90) // A-F
)
)
) {
return false;
}
}
return hasColon;
/* eslint-enable sukka/no-single-return -- here it goes */
}

View File

@ -51,11 +51,7 @@ export async function processDomainLists(domainListsUrl: string, includeAllSubDo
foundDebugDomain = true; foundDebugDomain = true;
} }
if (includeAllSubDomain) { domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
domainSets.add(`.${domainToAdd}`);
} else {
domainSets.add(domainToAdd);
}
} }
return domainSets; return domainSets;
@ -90,6 +86,8 @@ export async function processHosts(hostsUrl: string, includeAllSubDomain = false
} }
} }
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
return domainSets; return domainSets;
}); });
} }
@ -159,7 +157,7 @@ export async function processFilterRules(
warningMessages.push(hostname); warningMessages.push(hostname);
break; break;
default: default:
throw new Error(`Unknown flag: ${flag as any}`); break;
} }
}; };
@ -187,6 +185,13 @@ export async function processFilterRules(
); );
}); });
console.log(
picocolors.gray('[process filter]'),
picocolors.gray(filterRulesUrl),
picocolors.gray(`white: ${whitelistDomainSets.size}`),
picocolors.gray(`black: ${blacklistDomainSets.size}`)
);
return { return {
white: whitelistDomainSets, white: whitelistDomainSets,
black: blacklistDomainSets, black: blacklistDomainSets,
@ -569,25 +574,23 @@ class CustomAbortError extends Error {
public readonly digest = 'AbortError'; public readonly digest = 'AbortError';
} }
function sleepWithAbort(ms: number, signal: AbortSignal) { const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise<void>((resolve, reject) => {
return new Promise<void>((resolve, reject) => { signal.throwIfAborted();
signal.throwIfAborted(); signal.addEventListener('abort', stop);
signal.addEventListener('abort', stop); Bun.sleep(ms).then(done).catch(doReject);
Bun.sleep(ms).then(done).catch(doReject);
function done() { function done() {
signal.removeEventListener('abort', stop); signal.removeEventListener('abort', stop);
resolve(); resolve();
} }
function stop(this: AbortSignal) { function stop(this: AbortSignal) {
reject(this.reason); reject(this.reason);
} }
function doReject(reason: unknown) { function doReject(reason: unknown) {
signal.removeEventListener('abort', stop); signal.removeEventListener('abort', stop);
reject(reason); reject(reason);
} }
}); });
}
async function fetchAssets(url: string, fallbackUrls: string[] | readonly string[]) { async function fetchAssets(url: string, fallbackUrls: string[] | readonly string[]) {
const controller = new AbortController(); const controller = new AbortController();
@ -602,7 +605,7 @@ async function fetchAssets(url: string, fallbackUrls: string[] | readonly string
const createFetchFallbackPromise = async (url: string, index: number) => { const createFetchFallbackPromise = async (url: string, index: number) => {
// Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 350ms before downloading from the fallback URL. // Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 350ms before downloading from the fallback URL.
try { try {
await sleepWithAbort(200 + (index + 1) * 10, controller.signal); await sleepWithAbort(300 + (index + 1) * 20, controller.signal);
} catch { } catch {
console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url)); console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url));
throw new CustomAbortError(); throw new CustomAbortError();

View File

@ -58,4 +58,3 @@ IP-CIDR,222.73.156.235/32,no-resolve
# --- Anti-Bogus Domain --- # --- Anti-Bogus Domain ---
# https://github.com/felixonmars/dnsmasq-china-list/blob/master/bogus-nxdomain.china.conf # https://github.com/felixonmars/dnsmasq-china-list/blob/master/bogus-nxdomain.china.conf
# --- [Anti Bogus Domain Replace Me] ---

View File

@ -108,4 +108,3 @@ DOMAIN-SUFFIX,cos.eu-frankfurt.myqcloud.com
DOMAIN-SUFFIX,ks3-cn-hk-1.ksyuncs.com DOMAIN-SUFFIX,ks3-cn-hk-1.ksyuncs.com
DOMAIN-SUFFIX,ks3-rus.ksyuncs.com DOMAIN-SUFFIX,ks3-rus.ksyuncs.com
DOMAIN-SUFFIX,ks3-sgp.ksyuncs.com DOMAIN-SUFFIX,ks3-sgp.ksyuncs.com
# --- [AWS S3 Replace Me] ---