Add build step to CDN domainset

This commit is contained in:
SukkaW 2023-07-13 22:18:53 +08:00
parent 9fde4a3866
commit b43c1628d6
7 changed files with 128 additions and 48 deletions

1
.gitignore vendored
View File

@ -5,6 +5,7 @@ node_modules
public
List/domainset/reject.conf
List/domainset/cdn.conf
List/domainset/reject_phishing.conf
List/domainset/reject_sukka.conf
List/domainset/apple_cdn.conf

View File

@ -1,18 +1,28 @@
const { fetchWithRetry } = require('./lib/fetch-retry');
// @ts-check
const fs = require('fs');
const path = require('path');
const { compareAndWriteFile } = require('./lib/string-array-compare');
const { withBannerArray } = require('./lib/with-banner');
const { minifyRules } = require('./lib/minify-rules');
const { domainDeduper } = require('./lib/domain-deduper');
const { shouldIgnoreLine } = require('./lib/should-ignore-line');
const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
const readline = require('readline');
(async () => {
console.time('Total Time - build-cdn-conf');
const domains = (await (await fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat')).text()).split('\n');
/**
* Extract OSS domain from publicsuffix list
* @type {Set<string>}
*/
const S3OSSDomains = new Set();
const S3OSSDomains = domains.filter(line => {
if (line) {
return (
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
if (
line
&& (
line.startsWith('s3-')
|| line.startsWith('s3.')
)
@ -20,18 +30,16 @@ const { minifyRules } = require('./lib/minify-rules');
line.endsWith('.amazonaws.com')
|| line.endsWith('.scw.cloud')
)
&& !line.includes('cn-');
&& !line.includes('cn-')
) {
S3OSSDomains.add(line);
}
}
return false;
});
const filePath = path.resolve(__dirname, '../Source/non_ip/cdn.conf');
const resultPath = path.resolve(__dirname, '../List/non_ip/cdn.conf');
const content = (await fs.promises.readFile(filePath, 'utf-8'))
const content = (await fs.promises.readFile(path.resolve(__dirname, '../Source/non_ip/cdn.conf'), 'utf-8'))
.replace(
'# --- [AWS S3 Replace Me] ---',
S3OSSDomains.map(domain => `DOMAIN-SUFFIX,${domain}`).join('\n')
Array.from(S3OSSDomains).map(domain => `DOMAIN-SUFFIX,${domain}`).join('\n')
);
await compareAndWriteFile(
@ -47,7 +55,39 @@ const { minifyRules } = require('./lib/minify-rules');
new Date(),
minifyRules(content.split('\n'))
),
resultPath
path.resolve(__dirname, '../List/non_ip/cdn.conf')
);
/**
* Dedupe cdn.conf
*/
/** @type {Set<string>} */
const cdnDomains = new Set();
for await (const line of readline.createInterface({
input: fs.createReadStream(path.resolve(__dirname, '../Source/domainset/cdn.conf'), 'utf-8'),
crlfDelay: Infinity
})) {
const l = shouldIgnoreLine(line);
if (l) {
cdnDomains.add(l);
}
}
await compareAndWriteFile(
withBannerArray(
'Sukka\'s Surge Rules - CDN Domains',
[
'License: AGPL 3.0',
'Homepage: https://ruleset.skk.moe',
'GitHub: https://github.com/SukkaW/Surge',
'',
'This file contains object storage and static assets CDN domains.'
],
new Date(),
minifyRules(domainDeduper(Array.from(cdnDomains)))
),
path.resolve(__dirname, '../List/domainset/cdn.conf')
);
console.timeEnd('Total Time - build-cdn-conf');

View File

@ -11,6 +11,7 @@ const Trie = require('./lib/trie');
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
const { withBannerArray } = require('./lib/with-banner');
const { compareAndWriteFile } = require('./lib/string-array-compare');
const { shouldIgnoreLine } = require('./lib/should-ignore-line');
/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@ -109,19 +110,10 @@ const domainSuffixSet = new Set();
});
for await (const line of rl1) {
if (
line.startsWith('#')
|| line.startsWith(' ')
|| line.startsWith('\r')
|| line.startsWith('\n')
) {
continue;
const l = shouldIgnoreLine(line);
if (l) {
domainSets.add(l);
}
const trimmed = line.trim();
if (trimmed === '') continue;
domainSets.add(trimmed);
}
previousSize = domainSets.size - previousSize;
@ -146,19 +138,10 @@ const domainSuffixSet = new Set();
crlfDelay: Infinity
});
for await (const line of rl3) {
if (
line.startsWith('#')
|| line.startsWith(' ')
|| line.startsWith('\r')
|| line.startsWith('\n')
) {
continue;
const l = shouldIgnoreLine(line);
if (l) {
domainSets.add(l);
}
const trimmed = line.trim();
if (trimmed === '') continue;
domainSuffixSet.add(trimmed);
}
console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);

View File

@ -0,0 +1,27 @@
const Trie = require('./trie');
/**
* @param {string[]} inputDomains
*/
const domainDeduper = (inputDomains) => {
const trie = Trie.from(inputDomains);
const sets = new Set(inputDomains);
for (let j = 0, len = inputDomains.length; j < len; j++) {
const d = inputDomains[j];
if (d[0] !== '.') {
continue;
}
trie.find(d, false).forEach(f => sets.delete(f));
const a = d.slice(1);
if (trie.has(a)) {
sets.delete(a);
}
}
return Array.from(sets);
};
module.exports.domainDeduper = domainDeduper;

View File

@ -0,0 +1,29 @@
/* eslint-disable camelcase -- cache index access */
/**
* @param {string} line
*/
module.exports.shouldIgnoreLine = (line) => {
if (line === '') {
return null;
}
const line_0 = line[0];
if (
line_0 === '#'
|| line_0 === ' '
|| line_0 === '\r'
|| line_0 === '\n'
|| line_0 === '!'
) {
return null;
}
const trimmed = line.trim();
if (trimmed === '') {
return null;
}
return trimmed;
};

View File

@ -81,6 +81,7 @@ class Trie {
$suffix = suffixStack.pop();
node = nodeStack.pop();
// eslint-disable-next-line guard-for-in -- plain object
for (k in node) {
if (k === SENTINEL) {
if (includeEqualWithSuffix) {
@ -89,7 +90,6 @@ class Trie {
matches.push($suffix);
}
continue;
}
@ -161,8 +161,9 @@ class Trie {
node = node[token];
// Prefix does not exist
if (typeof node === 'undefined')
if (typeof node === 'undefined') {
return false;
}
// Keeping track of a potential branch to prune
if (toPrune !== null) {
@ -170,12 +171,9 @@ class Trie {
toPrune = null;
tokenToPrune = null;
}
}
else {
if (Object.keys(node).length < 2) {
toPrune = parent;
tokenToPrune = token;
}
} else if (Object.keys(node).length < 2) {
toPrune = parent;
tokenToPrune = token;
}
}
@ -206,8 +204,9 @@ class Trie {
token = suffix[i];
node = node[token];
if (typeof node === 'undefined')
if (typeof node === 'undefined') {
return false;
}
}
return SENTINEL in node;
@ -217,7 +216,7 @@ class Trie {
* @return {string[]}
*/
dump() {
let node = this.root;
const node = this.root;
const nodeStack = [];
const prefixStack = [];
// Resolving initial prefix
@ -238,6 +237,7 @@ class Trie {
currentNode = nodeStack.pop();
currentPrefix = prefixStack.pop();
// eslint-disable-next-line guard-for-in -- plain object
for (k in currentNode) {
if (k === SENTINEL) {
hasValue = true;