From bf4c92cc5dd5d0cd13114a1e2fea10a7b114731d Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 14 Jun 2023 23:15:21 +0800 Subject: [PATCH] Chore: read request stream line by line --- Build/build-anti-bogus-domain.js | 23 ++++++++++++----------- Build/build-apple-cdn.js | 24 +++++++++++++----------- Build/lib/fetch-remote-text-by-line.js | 20 ++++++++++++++++++++ Build/lib/fetch-retry.js | 2 +- Build/lib/parse-filter.js | 17 +++++++++-------- Build/lib/reject-data-source.js | 4 ++-- 6 files changed, 57 insertions(+), 33 deletions(-) create mode 100644 Build/lib/fetch-remote-text-by-line.js diff --git a/Build/build-anti-bogus-domain.js b/Build/build-anti-bogus-domain.js index 93ac7746..ad2ee078 100644 --- a/Build/build-anti-bogus-domain.js +++ b/Build/build-anti-bogus-domain.js @@ -1,23 +1,24 @@ -const { fetchWithRetry } = require('./lib/fetch-retry'); +// @ts-check const fs = require('fs'); const path = require('path'); const { isIPv4, isIPv6 } = require('net'); const { compareAndWriteFile } = require('./lib/string-array-compare'); const { withBannerArray } = require('./lib/with-banner'); +const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line'); (async () => { console.time('Total Time - build-anti-bogus-domain'); - console.time('* Download bogus-nxdomain-list') - const res = (await (await fetchWithRetry('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')).text()) - .split('\n') - .map(line => { - if (line.startsWith('bogus-nxdomain=')) { - return line.replace('bogus-nxdomain=', ''); - } + console.time('* Download bogus-nxdomain-list'); - return null - }) - .filter(ip => typeof ip === 'string'); + const rl = await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf'); + + /** @type {string[]} */ + const res = []; + for await (const line of rl) { + if (line.startsWith('bogus-nxdomain=')) { + res.push(line.replace('bogus-nxdomain=', '')); + } + } console.timeEnd('* Download bogus-nxdomain-list') diff --git a/Build/build-apple-cdn.js b/Build/build-apple-cdn.js index 23a84154..fe6b6dda 100644 --- a/Build/build-apple-cdn.js +++ b/Build/build-apple-cdn.js @@ -1,24 +1,26 @@ -const { fetchWithRetry } = require('./lib/fetch-retry'); -const fs = require('fs'); const path = require('path'); const { isDomainLoose } = require('./lib/is-domain-loose'); const { compareAndWriteFile } = require('./lib/string-array-compare'); const { withBannerArray } = require('./lib/with-banner'); +const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line'); + (async () => { console.time('Total Time - build-apple-cdn-conf'); - const res = (await (await fetchWithRetry('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf')).text()) - .split('\n') - .map(line => { - if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) { - return line.replace('server=/', '').replace('/114.114.114.114', ''); - } + const rl = await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf'); - return null - }) - .filter(domain => typeof domain === 'string' && isDomainLoose(domain)); + /** @type {string[]} */ + const res = []; + for await (const line of rl) { + if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) { + const domain = line.replace('server=/', '').replace('/114.114.114.114', ''); + if (isDomainLoose(domain)) { + res.push(domain); + } + } + } await Promise.all([ compareAndWriteFile( diff --git a/Build/lib/fetch-remote-text-by-line.js b/Build/lib/fetch-remote-text-by-line.js new file mode 100644 index 00000000..33c21982 --- /dev/null +++ b/Build/lib/fetch-remote-text-by-line.js @@ -0,0 +1,20 @@ +// @ts-check +const { fetchWithRetry } = require('./fetch-retry'); +const readline = require('readline'); +const { Readable } = require('stream'); + +/** + * @param {import('undici').RequestInfo} url + * @param {import('undici').RequestInit | undefined} [opt] + */ +module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => { + const resp = await fetchWithRetry(url, opt); + if (!resp.body) { + throw new Error('Failed to fetch remote text'); + } + + return readline.createInterface({ + input: Readable.fromWeb(resp.body), + crlfDelay: Infinity + }); +} diff --git a/Build/lib/fetch-retry.js b/Build/lib/fetch-retry.js index 47591b7b..b7c10333 100644 --- a/Build/lib/fetch-retry.js +++ b/Build/lib/fetch-retry.js @@ -1,4 +1,4 @@ // @ts-check const { fetch } = require('undici'); -const fetchWithRetry = require('@vercel/fetch-retry')(fetch); +const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch)); module.exports.fetchWithRetry = fetchWithRetry; diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index 6f6b0f6e..4218737a 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -1,5 +1,6 @@ // @ts-check const { fetchWithRetry } = require('./fetch-retry'); +const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line'); const { NetworkFilter } = require('@cliqz/adblocker'); const { normalizeDomain } = require('./is-domain-loose'); @@ -26,9 +27,10 @@ async function processDomainLists(domainListsUrl) { /** @type Set */ const domainSets = new Set(); - /** @type string[] */ - const domains = (await (await fetchWithRetry(domainListsUrl)).text()).split('\n'); - domains.forEach(line => { + + const rl = await fetchRemoteTextAndCreateReadlineInterface(domainListsUrl); + + for await (const line of rl) { if ( line.startsWith('#') || line.startsWith('!') @@ -48,7 +50,7 @@ async function processDomainLists(domainListsUrl) { } domainSets.add(domainToAdd); - }); + } return [...domainSets]; } @@ -66,9 +68,8 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) { /** @type Set */ const domainSets = new Set(); - /** @type string[] */ - const hosts = (await (await fetchWithRetry(hostsUrl)).text()).split('\n'); - hosts.forEach(line => { + const rl = await fetchRemoteTextAndCreateReadlineInterface(hostsUrl); + for await (const line of rl) { if (line.includes('#')) { return; } @@ -91,7 +92,7 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) { domainSets.add(domain); } } - }); + } console.timeEnd(` - processHosts: ${hostsUrl}`); diff --git a/Build/lib/reject-data-source.js b/Build/lib/reject-data-source.js index 7c4865b0..1ad7b109 100644 --- a/Build/lib/reject-data-source.js +++ b/Build/lib/reject-data-source.js @@ -8,7 +8,7 @@ const HOSTS = [ ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false] ] -const ADGUARD_FILTERS = [ +const ADGUARD_FILTERS = /** @type {const} */([ // Easy List [ 'https://easylist.to/easylist/easylist.txt', @@ -177,7 +177,7 @@ const ADGUARD_FILTERS = [ 'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt', 'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty-cname.txt', 'https://raw.githubusercontent.com/brave/adblock-lists/master/coin-miners.txt' -]; +]); const PREDEFINED_WHITELIST = [ 'localhost',