Chore: read request stream line by line

This commit is contained in:
SukkaW
2023-06-14 23:15:21 +08:00
parent 8a77541ce7
commit bf4c92cc5d
6 changed files with 57 additions and 33 deletions

View File

@@ -0,0 +1,20 @@
// @ts-check
const { fetchWithRetry } = require('./fetch-retry');
const readline = require('readline');
const { Readable } = require('stream');
/**
* @param {import('undici').RequestInfo} url
* @param {import('undici').RequestInit | undefined} [opt]
*/
module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => {
const resp = await fetchWithRetry(url, opt);
if (!resp.body) {
throw new Error('Failed to fetch remote text');
}
return readline.createInterface({
input: Readable.fromWeb(resp.body),
crlfDelay: Infinity
});
}

View File

@@ -1,4 +1,4 @@
// @ts-check
const { fetch } = require('undici');
const fetchWithRetry = require('@vercel/fetch-retry')(fetch);
const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch));
module.exports.fetchWithRetry = fetchWithRetry;

View File

@@ -1,5 +1,6 @@
// @ts-check
const { fetchWithRetry } = require('./fetch-retry');
const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
const { NetworkFilter } = require('@cliqz/adblocker');
const { normalizeDomain } = require('./is-domain-loose');
@@ -26,9 +27,10 @@ async function processDomainLists(domainListsUrl) {
/** @type Set<string> */
const domainSets = new Set();
/** @type string[] */
const domains = (await (await fetchWithRetry(domainListsUrl)).text()).split('\n');
domains.forEach(line => {
const rl = await fetchRemoteTextAndCreateReadlineInterface(domainListsUrl);
for await (const line of rl) {
if (
line.startsWith('#')
|| line.startsWith('!')
@@ -48,7 +50,7 @@ async function processDomainLists(domainListsUrl) {
}
domainSets.add(domainToAdd);
});
}
return [...domainSets];
}
@@ -66,9 +68,8 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
/** @type Set<string> */
const domainSets = new Set();
/** @type string[] */
const hosts = (await (await fetchWithRetry(hostsUrl)).text()).split('\n');
hosts.forEach(line => {
const rl = await fetchRemoteTextAndCreateReadlineInterface(hostsUrl);
for await (const line of rl) {
if (line.includes('#')) {
return;
}
@@ -91,7 +92,7 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
domainSets.add(domain);
}
}
});
}
console.timeEnd(` - processHosts: ${hostsUrl}`);

View File

@@ -8,7 +8,7 @@ const HOSTS = [
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false]
]
const ADGUARD_FILTERS = [
const ADGUARD_FILTERS = /** @type {const} */([
// Easy List
[
'https://easylist.to/easylist/easylist.txt',
@@ -177,7 +177,7 @@ const ADGUARD_FILTERS = [
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt',
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty-cname.txt',
'https://raw.githubusercontent.com/brave/adblock-lists/master/coin-miners.txt'
];
]);
const PREDEFINED_WHITELIST = [
'localhost',