mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Chore: new util run against source file
This commit is contained in:
parent
505f7544ed
commit
2d706f4775
41
Build/lib/run-against-source-file.ts
Normal file
41
Build/lib/run-against-source-file.ts
Normal file
@ -0,0 +1,41 @@
|
||||
import { never } from 'foxts/guard';
|
||||
import { readFileByLine } from './fetch-text-by-line';
|
||||
import { processLine } from './process-line';
|
||||
|
||||
export default async function runAgainstSourceFile(
|
||||
filePath: string,
|
||||
callback: (domain: string, includeAllSubDomain: boolean) => void,
|
||||
type?: 'ruleset' | 'domainset'
|
||||
) {
|
||||
for await (const line of readFileByLine(filePath)) {
|
||||
const l = processLine(line);
|
||||
if (!l) {
|
||||
continue;
|
||||
}
|
||||
if (type == null) {
|
||||
if (l.includes(',')) {
|
||||
type = 'ruleset';
|
||||
} else {
|
||||
type = 'domainset';
|
||||
}
|
||||
}
|
||||
|
||||
if (type === 'ruleset') {
|
||||
const [ruleType, domain] = l.split(',', 3);
|
||||
if (ruleType === 'DOMAIN') {
|
||||
callback(domain, false);
|
||||
} else if (ruleType === 'DOMAIN-SUFFIX') {
|
||||
callback(domain, true);
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- exhaus options
|
||||
} else if (type === 'domainset') {
|
||||
if (l[0] === '.') {
|
||||
callback(l.slice(1), true);
|
||||
} else {
|
||||
callback(l, false);
|
||||
}
|
||||
} else {
|
||||
never(type);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,9 +1,9 @@
|
||||
import { fetchRemoteTextByLine, readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
|
||||
import tldts from 'tldts';
|
||||
import { HostnameSmolTrie } from './lib/trie';
|
||||
import path from 'node:path';
|
||||
import { SOURCE_DIR } from './constants/dir';
|
||||
import { processLine } from './lib/process-line';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
(async () => {
|
||||
const lines1 = await Array.fromAsync(await fetchRemoteTextByLine('https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true));
|
||||
@ -31,23 +31,12 @@ import { processLine } from './lib/process-line';
|
||||
});
|
||||
}
|
||||
|
||||
for await (const line of readFileByLine(path.join(SOURCE_DIR, 'domainset', 'reject.conf'))) {
|
||||
const l = processLine(line);
|
||||
if (l) {
|
||||
trie.whitelist(l);
|
||||
}
|
||||
}
|
||||
for await (const line of readFileByLine(path.join(SOURCE_DIR, 'non_ip', 'reject.conf'))) {
|
||||
const l = processLine(line);
|
||||
if (l) {
|
||||
const [type, domain] = l.split(',', 3);
|
||||
if (type === 'DOMAIN') {
|
||||
trie.whitelist(domain, false);
|
||||
} else if (type === 'DOMAIN-SUFFIX') {
|
||||
trie.whitelist(domain, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
await runAgainstSourceFile(path.join(SOURCE_DIR, 'domainset', 'reject.conf'), (domain, includeAllSubDomain) => {
|
||||
trie.whitelist(domain, includeAllSubDomain);
|
||||
}, 'domainset');
|
||||
await runAgainstSourceFile(path.join(SOURCE_DIR, 'non_ip', 'reject.conf'), (domain, includeAllSubDomain) => {
|
||||
trie.whitelist(domain, includeAllSubDomain);
|
||||
}, 'ruleset');
|
||||
|
||||
console.log(trie.dump().map(i => '.' + i).join('\n'));
|
||||
})();
|
||||
|
||||
@ -1,20 +1,23 @@
|
||||
import path from 'node:path';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processFilterRulesWithPreload } from './lib/parse-filter/filters';
|
||||
import { processHosts } from './lib/parse-filter/hosts';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { HostnameSmolTrie } from './lib/trie';
|
||||
import { dummySpan } from './trace';
|
||||
import { SOURCE_DIR } from './constants/dir';
|
||||
import { PREDEFINED_WHITELIST } from './constants/reject-data-source';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
(async () => {
|
||||
const trie = new HostnameSmolTrie();
|
||||
|
||||
await writeHostsToTrie(trie, 'https://cdn.jsdelivr.net/gh/jerryn70/GoodbyeAds@master/Extension/GoodbyeAds-Xiaomi-Extension.txt', true);
|
||||
|
||||
await runWhiteOnSource(path.join(SOURCE_DIR, 'domainset', 'reject.conf'), trie);
|
||||
await runWhiteOnSource(path.join(SOURCE_DIR, 'non_ip', 'reject.conf'), trie);
|
||||
const callback = (domain: string, includeAllSubDomain: boolean) => {
|
||||
trie.whitelist(domain, includeAllSubDomain);
|
||||
};
|
||||
|
||||
await runAgainstSourceFile(path.join(SOURCE_DIR, 'domainset', 'reject.conf'), callback, 'domainset');
|
||||
await runAgainstSourceFile(path.join(SOURCE_DIR, 'non_ip', 'reject.conf'), callback, 'ruleset');
|
||||
|
||||
for (let i = 0, len = PREDEFINED_WHITELIST.length; i < len; i++) {
|
||||
trie.whitelist(PREDEFINED_WHITELIST[i]);
|
||||
@ -25,24 +28,6 @@ import { PREDEFINED_WHITELIST } from './constants/reject-data-source';
|
||||
console.log('---------------------------');
|
||||
})();
|
||||
|
||||
async function runWhiteOnSource(sourceFile: string, trie: HostnameSmolTrie) {
|
||||
for await (const line of readFileByLine(sourceFile)) {
|
||||
const l = processLine(line);
|
||||
if (l) {
|
||||
if (l.includes(',')) {
|
||||
const [type, domain] = l.split(',', 3);
|
||||
if (type === 'DOMAIN') {
|
||||
trie.whitelist(domain, false);
|
||||
} else if (type === 'DOMAIN-SUFFIX') {
|
||||
trie.whitelist(domain, true);
|
||||
}
|
||||
} else {
|
||||
trie.whitelist(l);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function writeHostsToTrie(trie: HostnameSmolTrie, hostsUrl: string, includeAllSubDomain = false) {
|
||||
const hosts = await processHosts(dummySpan, hostsUrl, [], includeAllSubDomain);
|
||||
|
||||
|
||||
@ -1,11 +1,9 @@
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processLine } from './lib/process-line';
|
||||
|
||||
import { SOURCE_DIR } from './constants/dir';
|
||||
import path from 'node:path';
|
||||
import { newQueue } from '@henrygd/queue';
|
||||
import { isDomainAlive, keyedAsyncMutexWithQueue } from './lib/is-domain-alive';
|
||||
import { fdir as Fdir } from 'fdir';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
const queue = newQueue(24);
|
||||
|
||||
@ -19,10 +17,20 @@ function onDomain(args: [string, boolean]) {
|
||||
(async () => {
|
||||
const domainSets = await new Fdir()
|
||||
.withFullPaths()
|
||||
.filter((filePath, isDirectory) => {
|
||||
if (isDirectory) return false;
|
||||
const extname = path.extname(filePath);
|
||||
return extname === '.txt' || extname === '.conf';
|
||||
})
|
||||
.crawl(SOURCE_DIR + path.sep + 'domainset')
|
||||
.withPromise();
|
||||
const domainRules = await new Fdir()
|
||||
.withFullPaths()
|
||||
.filter((filePath, isDirectory) => {
|
||||
if (isDirectory) return false;
|
||||
const extname = path.extname(filePath);
|
||||
return extname === '.txt' || extname === '.conf';
|
||||
})
|
||||
.crawl(SOURCE_DIR + path.sep + 'non_ip')
|
||||
.withPromise();
|
||||
|
||||
@ -37,53 +45,29 @@ function onDomain(args: [string, boolean]) {
|
||||
})();
|
||||
|
||||
export async function runAgainstRuleset(filepath: string) {
|
||||
const extname = path.extname(filepath);
|
||||
if (extname !== '.conf') {
|
||||
console.log('[skip]', filepath);
|
||||
return;
|
||||
}
|
||||
|
||||
const promises: Array<Promise<void>> = [];
|
||||
|
||||
for await (const l of readFileByLine(filepath)) {
|
||||
const line = processLine(l);
|
||||
if (!line) continue;
|
||||
const [type, domain] = line.split(',');
|
||||
switch (type) {
|
||||
case 'DOMAIN-SUFFIX':
|
||||
case 'DOMAIN': {
|
||||
promises.push(
|
||||
queue.add(() => keyedAsyncMutexWithQueue(domain, () => isDomainAlive(domain, type === 'DOMAIN-SUFFIX')))
|
||||
.then(onDomain)
|
||||
);
|
||||
break;
|
||||
}
|
||||
// no default
|
||||
}
|
||||
}
|
||||
await runAgainstSourceFile(
|
||||
filepath,
|
||||
(domain: string, includeAllSubdomain: boolean) => queue.add(() => keyedAsyncMutexWithQueue(
|
||||
domain,
|
||||
() => isDomainAlive(domain, includeAllSubdomain)
|
||||
).then(onDomain))
|
||||
);
|
||||
|
||||
await Promise.all(promises);
|
||||
console.log('[done]', filepath);
|
||||
}
|
||||
|
||||
export async function runAgainstDomainset(filepath: string) {
|
||||
const extname = path.extname(filepath);
|
||||
if (extname !== '.conf') {
|
||||
console.log('[skip]', filepath);
|
||||
return;
|
||||
}
|
||||
|
||||
const promises: Array<Promise<void>> = [];
|
||||
|
||||
for await (const l of readFileByLine(filepath)) {
|
||||
const line = processLine(l);
|
||||
if (!line) continue;
|
||||
promises.push(
|
||||
queue.add(() => keyedAsyncMutexWithQueue(line, () => isDomainAlive(line, line[0] === '.')))
|
||||
.then(onDomain)
|
||||
);
|
||||
}
|
||||
|
||||
await runAgainstSourceFile(
|
||||
filepath,
|
||||
(domain: string, includeAllSubdomain: boolean) => queue.add(() => keyedAsyncMutexWithQueue(
|
||||
domain,
|
||||
() => isDomainAlive(domain, includeAllSubdomain)
|
||||
).then(onDomain))
|
||||
);
|
||||
await Promise.all(promises);
|
||||
console.log('[done]', filepath);
|
||||
}
|
||||
|
||||
@ -1,11 +1,10 @@
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { parse } from 'csv-parse/sync';
|
||||
import { HostnameSmolTrie } from './lib/trie';
|
||||
import path from 'node:path';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { SOURCE_DIR } from './constants/dir';
|
||||
import { parseFelixDnsmasqFromResp } from './lib/parse-dnsmasq';
|
||||
import { $$fetch } from './lib/fetch-retry';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
export async function parseDomesticList() {
|
||||
const trie = new HostnameSmolTrie(await parseFelixDnsmasqFromResp(await $$fetch('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')));
|
||||
@ -36,27 +35,24 @@ export async function parseDomesticList() {
|
||||
|
||||
const notIncludedDomestic = new Set<string>(top5000);
|
||||
|
||||
const runAgainstRuleset = async (ruleset: string) => {
|
||||
for await (const l of readFileByLine(ruleset)) {
|
||||
const line = processLine(l);
|
||||
if (!line) continue;
|
||||
const [type, domain] = line.split(',');
|
||||
if (type === 'DOMAIN-SUFFIX') {
|
||||
// await Promise.all([
|
||||
await runAgainstSourceFile(
|
||||
path.resolve(SOURCE_DIR, 'non_ip/domestic.conf'),
|
||||
(domain, includeAllSubdomain) => {
|
||||
if (includeAllSubdomain) {
|
||||
if (top5000.has(domain)) {
|
||||
notIncludedDomestic.delete(domain);
|
||||
}
|
||||
} else if (type === 'DOMAIN-KEYWORD') {
|
||||
for (const d of top5000) {
|
||||
if (d.includes(domain)) {
|
||||
notIncludedDomestic.delete(d);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// noop, DOMAIN-KEYWORD handing
|
||||
// for (const d of top5000) {
|
||||
// if (d.includes(domain)) {
|
||||
// notIncludedDomestic.delete(d);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// await Promise.all([
|
||||
await runAgainstRuleset(path.resolve(SOURCE_DIR, 'non_ip/domestic.conf'));
|
||||
);
|
||||
// ]);
|
||||
|
||||
console.log(notIncludedDomestic.size, notIncludedDomestic);
|
||||
|
||||
@ -3,11 +3,12 @@ import { fastNormalizeDomain } from './lib/normalize-domain';
|
||||
import { HostnameSmolTrie } from './lib/trie';
|
||||
// import { Readable } from 'stream';
|
||||
import { parse } from 'csv-parse/sync';
|
||||
import { fetchRemoteTextByLine, readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
|
||||
import path from 'node:path';
|
||||
import { OUTPUT_SURGE_DIR } from './constants/dir';
|
||||
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
|
||||
import { $$fetch } from './lib/fetch-retry';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
export async function parseGfwList() {
|
||||
const whiteSet = new Set<string>();
|
||||
@ -77,46 +78,20 @@ export async function parseGfwList() {
|
||||
|
||||
const keywordSet = new Set<string>();
|
||||
|
||||
const runAgainstRuleset = async (ruleset: string) => {
|
||||
for await (const l of readFileByLine(ruleset)) {
|
||||
const line = processLine(l);
|
||||
if (!line) continue;
|
||||
const [type, domain] = line.split(',');
|
||||
switch (type) {
|
||||
case 'DOMAIN-SUFFIX': {
|
||||
trie.whitelist('.' + domain);
|
||||
break;
|
||||
}
|
||||
case 'DOMAIN': {
|
||||
trie.whitelist(domain);
|
||||
break;
|
||||
}
|
||||
case 'DOMAIN-KEYWORD': {
|
||||
keywordSet.add(domain);
|
||||
break;
|
||||
}
|
||||
// no default
|
||||
}
|
||||
}
|
||||
const callback = (domain: string, includeAllSubdomain: boolean) => {
|
||||
trie.whitelist(domain, includeAllSubdomain);
|
||||
};
|
||||
|
||||
const runAgainstDomainset = async (ruleset: string) => {
|
||||
for await (const l of readFileByLine(ruleset)) {
|
||||
const line = processLine(l);
|
||||
if (!line) continue;
|
||||
trie.whitelist(line);
|
||||
}
|
||||
};
|
||||
await Promise.all([
|
||||
runAgainstRuleset(path.join(OUTPUT_SURGE_DIR, 'non_ip/global.conf')),
|
||||
runAgainstRuleset(path.join(OUTPUT_SURGE_DIR, 'non_ip/reject.conf')),
|
||||
runAgainstRuleset(path.join(OUTPUT_SURGE_DIR, 'non_ip/telegram.conf')),
|
||||
runAgainstRuleset(path.resolve(OUTPUT_SURGE_DIR, 'non_ip/stream.conf')),
|
||||
runAgainstRuleset(path.resolve(OUTPUT_SURGE_DIR, 'non_ip/ai.conf')),
|
||||
runAgainstRuleset(path.resolve(OUTPUT_SURGE_DIR, 'non_ip/microsoft.conf')),
|
||||
runAgainstDomainset(path.resolve(OUTPUT_SURGE_DIR, 'domainset/reject.conf')),
|
||||
runAgainstDomainset(path.resolve(OUTPUT_SURGE_DIR, 'domainset/reject_extra.conf')),
|
||||
runAgainstDomainset(path.resolve(OUTPUT_SURGE_DIR, 'domainset/cdn.conf'))
|
||||
runAgainstSourceFile(path.join(OUTPUT_SURGE_DIR, 'non_ip/global.conf'), callback, 'ruleset'),
|
||||
runAgainstSourceFile(path.join(OUTPUT_SURGE_DIR, 'non_ip/reject.conf'), callback, 'ruleset'),
|
||||
runAgainstSourceFile(path.join(OUTPUT_SURGE_DIR, 'non_ip/telegram.conf'), callback, 'ruleset'),
|
||||
runAgainstSourceFile(path.resolve(OUTPUT_SURGE_DIR, 'non_ip/stream.conf'), callback, 'ruleset'),
|
||||
runAgainstSourceFile(path.resolve(OUTPUT_SURGE_DIR, 'non_ip/ai.conf'), callback, 'ruleset'),
|
||||
runAgainstSourceFile(path.resolve(OUTPUT_SURGE_DIR, 'non_ip/microsoft.conf'), callback, 'ruleset'),
|
||||
runAgainstSourceFile(path.resolve(OUTPUT_SURGE_DIR, 'domainset/reject.conf'), callback, 'domainset'),
|
||||
runAgainstSourceFile(path.resolve(OUTPUT_SURGE_DIR, 'domainset/reject_extra.conf'), callback, 'domainset'),
|
||||
runAgainstSourceFile(path.resolve(OUTPUT_SURGE_DIR, 'domainset/cdn.conf'), callback, 'domainset')
|
||||
]);
|
||||
|
||||
whiteSet.forEach(domain => trie.whitelist(domain));
|
||||
|
||||
@ -1,42 +1,28 @@
|
||||
import path from 'node:path';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { HostnameSmolTrie } from './lib/trie';
|
||||
import { OUTPUT_SURGE_DIR, SOURCE_DIR } from './constants/dir';
|
||||
import { OUTPUT_SURGE_DIR } from './constants/dir';
|
||||
import { ICP_TLD } from './constants/domains';
|
||||
import tldts from 'tldts-experimental';
|
||||
import { looseTldtsOpt } from './constants/loose-tldts-opt';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
(async () => {
|
||||
const trie = new HostnameSmolTrie();
|
||||
const extraWhiteTLDs = new Set<string>();
|
||||
|
||||
for await (const line of readFileByLine(path.join(OUTPUT_SURGE_DIR, 'non_ip', 'domestic.conf'))) {
|
||||
const [type, domain] = line.split(',');
|
||||
if (type !== 'DOMAIN' && type !== 'DOMAIN-SUFFIX') {
|
||||
continue;
|
||||
}
|
||||
await runAgainstSourceFile(path.join(OUTPUT_SURGE_DIR, 'non_ip', 'domestic.conf'), (domain) => {
|
||||
if (domain === 'this_ruleset_is_made_by_sukkaw.ruleset.skk.moe') {
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
const tld = tldts.getPublicSuffix(domain, looseTldtsOpt);
|
||||
if (tld) {
|
||||
extraWhiteTLDs.add(tld);
|
||||
}
|
||||
}
|
||||
}, 'ruleset');
|
||||
|
||||
for await (const line of readFileByLine(path.join(SOURCE_DIR, 'non_ip', 'global.conf'))) {
|
||||
const [type, domain] = line.split(',');
|
||||
switch (type) {
|
||||
case 'DOMAIN':
|
||||
trie.add(domain);
|
||||
break;
|
||||
case 'DOMAIN-SUFFIX':
|
||||
trie.add(domain, true);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
await runAgainstSourceFile(path.join(OUTPUT_SURGE_DIR, 'non_ip', 'global.conf'), (domain, includeAllSubDomain) => {
|
||||
trie.add(domain, includeAllSubDomain);
|
||||
}, 'ruleset');
|
||||
|
||||
ICP_TLD.forEach(tld => trie.whitelist(tld, true));
|
||||
extraWhiteTLDs.forEach(tld => trie.whitelist(tld, true));
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
import path from 'node:path';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { OUTPUT_SURGE_DIR } from './constants/dir';
|
||||
import { processLine } from './lib/process-line';
|
||||
import tldts from 'tldts';
|
||||
import { loosTldOptWithPrivateDomains } from './constants/loose-tldts-opt';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
(async () => {
|
||||
const rejectDomainCountMap = await runAgainstDomainset(new Map<string, number>(), path.join(OUTPUT_SURGE_DIR, 'domainset', 'reject.conf'));
|
||||
@ -17,22 +16,22 @@ import { loosTldOptWithPrivateDomains } from './constants/loose-tldts-opt';
|
||||
})();
|
||||
|
||||
async function runAgainstDomainset(rejectDomainCountMap: Map<string, number>, file: string) {
|
||||
for await (const line of readFileByLine(file)) {
|
||||
if (!processLine(line)) {
|
||||
continue;
|
||||
}
|
||||
const apexDomain = tldts.getDomain(line, loosTldOptWithPrivateDomains);
|
||||
if (!apexDomain) {
|
||||
continue;
|
||||
}
|
||||
await runAgainstSourceFile(
|
||||
file,
|
||||
(domain: string) => {
|
||||
const apexDomain = tldts.getDomain(domain, loosTldOptWithPrivateDomains);
|
||||
if (!apexDomain) {
|
||||
return;
|
||||
}
|
||||
|
||||
rejectDomainCountMap.set(
|
||||
apexDomain,
|
||||
rejectDomainCountMap.has(apexDomain)
|
||||
? rejectDomainCountMap.get(apexDomain)! + 1
|
||||
: 1
|
||||
);
|
||||
}
|
||||
rejectDomainCountMap.set(
|
||||
apexDomain,
|
||||
rejectDomainCountMap.has(apexDomain)
|
||||
? rejectDomainCountMap.get(apexDomain)! + 1
|
||||
: 1
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
return rejectDomainCountMap;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user