From 57b5d2933f8d017bc1e0fb16c8988363b60d2d08 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sun, 5 Jan 2025 18:06:31 +0800 Subject: [PATCH] Fix: improve whois matching w/ domain checking --- Build/lib/is-domain-alive.test.ts | 77 +++++++++ Build/lib/is-domain-alive.ts | 272 ++++++++++++++++++++++++++++++ Build/validate-domain-alive.ts | 232 +------------------------ 3 files changed, 354 insertions(+), 227 deletions(-) create mode 100644 Build/lib/is-domain-alive.test.ts create mode 100644 Build/lib/is-domain-alive.ts diff --git a/Build/lib/is-domain-alive.test.ts b/Build/lib/is-domain-alive.test.ts new file mode 100644 index 00000000..afcee131 --- /dev/null +++ b/Build/lib/is-domain-alive.test.ts @@ -0,0 +1,77 @@ +import { describe, it } from 'mocha'; + +import { isDomainAlive, whoisExists } from './is-domain-alive'; +import { expect } from 'expect'; + +describe('whoisExists', () => { + it('.cryptocrawler.io', () => { + expect(whoisExists({ + 'whois.nic.io': { + 'Domain Status': [], + 'Name Server': [], + '>>> Last update of WHOIS database': '2025-01-05T11:06:38Z <<<', + text: [ + 'Domain not found.', + '', + 'Terms of Use: Access to WHOIS' + ] + } + })).toBe(false); + }); + + it('.tunevideo.ru', () => { + expect(whoisExists({ + 'whois.tcinet.ru': { + 'Domain Status': [], + 'Name Server': [], + text: [ + '% TCI Whois Service. Terms of use:', + '% https://tcinet.ru/documents/whois_ru_rf.pdf (in Russian)', + '% https://tcinet.ru/documents/whois_su.pdf (in Russian)', + '', + 'No entries found for the selected source(s).', + '', + 'Last updated on 2025-01-05T11:03:01Z' + ] + } + })).toBe(false); + }); + + it('.myqloud.com', () => { + expect(whoisExists({ + 'whois.tcinet.ru': { + 'Domain Status': [], + 'Name Server': [], + text: [ + '% TCI Whois Service. Terms of use:', + '% https://tcinet.ru/documents/whois_ru_rf.pdf (in Russian)', + '% https://tcinet.ru/documents/whois_su.pdf (in Russian)', + '', + 'No entries found for the selected source(s).', + '', + 'Last updated on 2025-01-05T11:03:01Z' + ] + } + })).toBe(false); + }); +}); + +describe('isDomainAlive', function () { + this.timeout(10000); + + it('.cryptocrawler.io', async () => { + expect((await isDomainAlive('.cryptocrawler.io', true))[1]).toEqual(false); + }); + + it('.tunevideo.ru', async () => { + expect((await isDomainAlive('.tunevideo.ru', true))[1]).toEqual(false); + }); + + it('.myqloud.com', async () => { + expect((await isDomainAlive('.myqloud.com', true))[1]).toEqual(true); + }); + + it('discount-deal.org', async () => { + expect((await isDomainAlive('discount-deal.org', false))[1]).toEqual(false); + }); +}); diff --git a/Build/lib/is-domain-alive.ts b/Build/lib/is-domain-alive.ts new file mode 100644 index 00000000..4175f7de --- /dev/null +++ b/Build/lib/is-domain-alive.ts @@ -0,0 +1,272 @@ +import tldts from 'tldts-experimental'; +import { looseTldtsOpt } from '../constants/loose-tldts-opt'; +import picocolors from 'picocolors'; + +import DNS2 from 'dns2'; +import asyncRetry from 'async-retry'; +import * as whoiser from 'whoiser'; + +import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie'; + +const mutex = new Map>(); +export function keyedAsyncMutexWithQueue(key: string, fn: () => Promise) { + if (mutex.has(key)) { + return mutex.get(key) as Promise; + } + const promise = fn(); + mutex.set(key, promise); + return promise; +} + +class DnsError extends Error { + name = 'DnsError'; + constructor(readonly message: string, public readonly server: string) { + super(message); + } +} + +interface DnsResponse extends DNS2.$DnsResponse { + dns: string +} + +const dohServers: Array<[string, DNS2.DnsResolver]> = ([ + '8.8.8.8', + '8.8.4.4', + '1.0.0.1', + '1.1.1.1', + '162.159.36.1', + '162.159.46.1', + '101.101.101.101', // TWNIC + '185.222.222.222', // DNS.SB + '45.11.45.11', // DNS.SB + 'dns10.quad9.net', // Quad9 unfiltered + 'doh.sandbox.opendns.com', // OpenDNS sandbox (unfiltered) + 'unfiltered.adguard-dns.com', + // '0ms.dev', // Proxy Cloudflare + // '76.76.2.0', // ControlD unfiltered, path not /dns-query + // '76.76.10.0', // ControlD unfiltered, path not /dns-query + // 'dns.bebasid.com', // BebasID, path not /dns-query but /unfiltered + // '193.110.81.0', // dns0.eu + // '185.253.5.0', // dns0.eu + // 'zero.dns0.eu', + 'dns.nextdns.io', + 'anycast.dns.nextdns.io', + 'wikimedia-dns.org', + // 'ordns.he.net', + // 'dns.mullvad.net', + 'basic.rethinkdns.com' + // 'ada.openbld.net', + // 'dns.rabbitdns.org' +] as const).map(dns => [ + dns, + DNS2.DOHClient({ + dns, + http: false + // get: (url: string) => undici.request(url).then(r => r.body) + }) +] as const); + +const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([ + '223.5.5.5', + '223.6.6.6', + '120.53.53.53', + '1.12.12.12' +] as const).map(dns => [ + dns, + DNS2.DOHClient({ + dns, + http: false + // get: (url: string) => undici.request(url).then(r => r.body) + }) +] as const); + +function createResolve(server: Array<[string, DNS2.DnsResolver]>): DNS2.DnsResolver { + return async (...args) => { + try { + return await asyncRetry(async () => { + const [dohServer, dohClient] = server[Math.floor(Math.random() * server.length)]; + + try { + return { + ...await dohClient(...args), + dns: dohServer + } satisfies DnsResponse; + } catch (e) { + // console.error(e); + throw new DnsError((e as Error).message, dohServer); + } + }, { retries: 5 }); + } catch (e) { + console.log('[doh error]', ...args, e); + throw e; + } + }; +} + +const resolve = createResolve(dohServers); +const domesticResolve = createResolve(domesticDohServers); + +async function getWhois(domain: string) { + return asyncRetry(() => whoiser.domain(domain), { retries: 5 }); +} + +const domainAliveMap = new Map(); +function onDomainAlive(domain: string): [string, boolean] { + domainAliveMap.set(domain, true); + return [domain, true]; +} +function onDomainDead(domain: string): [string, boolean] { + domainAliveMap.set(domain, false); + return [domain, false]; +} + +export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> { + if (domainAliveMap.has(domain)) { + return [domain, domainAliveMap.get(domain)!]; + } + + const apexDomain = tldts.getDomain(domain, looseTldtsOpt); + if (!apexDomain) { + console.log(picocolors.gray('[domain invalid]'), picocolors.gray('no apex domain'), { domain }); + return onDomainAlive(domain); + } + + const apexDomainAlive = await keyedAsyncMutexWithQueue(apexDomain, () => isApexDomainAlive(apexDomain)); + if (isSuffix) { + return apexDomainAlive; + } + if (!apexDomainAlive[1]) { + return apexDomainAlive; + } + + const $domain = domain[0] === '.' ? domain.slice(1) : domain; + + const aDns: string[] = []; + const aaaaDns: string[] = []; + + // test 2 times before make sure record is empty + for (let i = 0; i < 2; i++) { + // eslint-disable-next-line no-await-in-loop -- sequential + const aRecords = (await resolve($domain, 'A')); + if (aRecords.answers.length > 0) { + return onDomainAlive(domain); + } + + aDns.push(aRecords.dns); + } + for (let i = 0; i < 2; i++) { + // eslint-disable-next-line no-await-in-loop -- sequential + const aaaaRecords = (await resolve($domain, 'AAAA')); + if (aaaaRecords.answers.length > 0) { + return onDomainAlive(domain); + } + + aaaaDns.push(aaaaRecords.dns); + } + + // only then, let's test once with domesticDohServers + const aRecords = (await domesticResolve($domain, 'A')); + if (aRecords.answers.length > 0) { + return onDomainAlive(domain); + } + aDns.push(aRecords.dns); + + const aaaaRecords = (await domesticResolve($domain, 'AAAA')); + if (aaaaRecords.answers.length > 0) { + return onDomainAlive(domain); + } + aaaaDns.push(aaaaRecords.dns); + + console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns }); + return onDomainDead($domain); +} + +const apexDomainNsResolvePromiseMap = new Map>(); + +async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> { + if (domainAliveMap.has(apexDomain)) { + return [apexDomain, domainAliveMap.get(apexDomain)!]; + } + + let resp: DnsResponse; + if (apexDomainNsResolvePromiseMap.has(apexDomain)) { + resp = await apexDomainNsResolvePromiseMap.get(apexDomain)!; + } else { + const promise = resolve(apexDomain, 'NS'); + apexDomainNsResolvePromiseMap.set(apexDomain, promise); + resp = await promise; + } + + if (resp.answers.length > 0) { + return onDomainAlive(apexDomain); + } + + let whois; + + try { + whois = await getWhois(apexDomain); + } catch (e) { + console.log(picocolors.red('[domain dead]'), 'whois error', { domain: apexDomain }, e); + return onDomainDead(apexDomain); + } + + // console.log(JSON.stringify(whois, null, 2)); + + if (whoisExists(whois)) { + console.log(picocolors.gray('[domain alive]'), 'whois found', { domain: apexDomain }); + return onDomainAlive(apexDomain); + } + + console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain }); + return onDomainDead(apexDomain); +} + +// TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117 +const whoisNotFoundKeywordTest = createKeywordFilter([ + 'no match for', + 'does not exist', + 'not found', + 'no entries', + 'no data found', + 'is available for registration', + 'currently available for application' +]); + +export function whoisExists(whois: whoiser.WhoisSearchResult) { + let empty = true; + + for (const key in whois) { + if (Object.hasOwn(whois, key)) { + empty = false; + + if (key === 'error') { + if ( + (typeof whois.error === 'string' && whois.error) + || (Array.isArray(whois.error) && whois.error.length > 0) + ) { + console.error(whois); + return true; + } + continue; + } + + if (key === 'text') { + if (Array.isArray(whois.text) && whois.text.some(value => whoisNotFoundKeywordTest(value.toLowerCase()))) { + return false; + } + continue; + } + if (key === 'Name Server') { + if (Array.isArray(whois[key]) && whois[key].length === 0) { + return false; + } + continue; + } + + if (typeof whois[key] === 'object' && !Array.isArray(whois[key]) && !whoisExists(whois[key])) { + return false; + } + } + } + return !empty; +} diff --git a/Build/validate-domain-alive.ts b/Build/validate-domain-alive.ts index ace30e9d..ddac080b 100644 --- a/Build/validate-domain-alive.ts +++ b/Build/validate-domain-alive.ts @@ -1,119 +1,13 @@ -import DNS2 from 'dns2'; import { readFileByLine } from './lib/fetch-text-by-line'; import { processLine } from './lib/process-line'; -import tldts from 'tldts-experimental'; -import { looseTldtsOpt } from './constants/loose-tldts-opt'; -import { fdir as Fdir } from 'fdir'; + import { SOURCE_DIR } from './constants/dir'; import path from 'node:path'; import { newQueue } from '@henrygd/queue'; -import asyncRetry from 'async-retry'; -import * as whoiser from 'whoiser'; -import picocolors from 'picocolors'; -import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie'; - -const dohServers: Array<[string, DNS2.DnsResolver]> = ([ - '8.8.8.8', - '8.8.4.4', - '1.0.0.1', - '1.1.1.1', - '162.159.36.1', - '162.159.46.1', - '101.101.101.101', // TWNIC - '185.222.222.222', // DNS.SB - '45.11.45.11', // DNS.SB - 'dns10.quad9.net', // Quad9 unfiltered - 'doh.sandbox.opendns.com', // OpenDNS sandbox (unfiltered) - 'unfiltered.adguard-dns.com', - // '0ms.dev', // Proxy Cloudflare - // '76.76.2.0', // ControlD unfiltered, path not /dns-query - // '76.76.10.0', // ControlD unfiltered, path not /dns-query - // 'dns.bebasid.com', // BebasID, path not /dns-query but /unfiltered - // '193.110.81.0', // dns0.eu - // '185.253.5.0', // dns0.eu - // 'zero.dns0.eu', - 'dns.nextdns.io', - 'anycast.dns.nextdns.io', - 'wikimedia-dns.org', - // 'ordns.he.net', - // 'dns.mullvad.net', - 'basic.rethinkdns.com' - // 'ada.openbld.net', - // 'dns.rabbitdns.org' -] as const).map(dns => [ - dns, - DNS2.DOHClient({ - dns, - http: false - // get: (url: string) => undici.request(url).then(r => r.body) - }) -] as const); - -const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([ - '223.5.5.5', - '223.6.6.6', - '120.53.53.53', - '1.12.12.12' -] as const).map(dns => [ - dns, - DNS2.DOHClient({ - dns, - http: false - // get: (url: string) => undici.request(url).then(r => r.body) - }) -] as const); +import { isDomainAlive, keyedAsyncMutexWithQueue } from './lib/is-domain-alive'; +import { fdir as Fdir } from 'fdir'; const queue = newQueue(32); -const mutex = new Map>(); -function keyedAsyncMutexWithQueue(key: string, fn: () => Promise) { - if (mutex.has(key)) { - return mutex.get(key) as Promise; - } - const promise = queue.add(() => fn()); - mutex.set(key, promise); - return promise; -} - -class DnsError extends Error { - name = 'DnsError'; - constructor(readonly message: string, public readonly server: string) { - super(message); - } -} - -interface DnsResponse extends DNS2.$DnsResponse { - dns: string -} - -function createResolve(server: Array<[string, DNS2.DnsResolver]>): DNS2.DnsResolver { - return async (...args) => { - try { - return await asyncRetry(async () => { - const [dohServer, dohClient] = server[Math.floor(Math.random() * server.length)]; - - try { - return { - ...await dohClient(...args), - dns: dohServer - } satisfies DnsResponse; - } catch (e) { - // console.error(e); - throw new DnsError((e as Error).message, dohServer); - } - }, { retries: 5 }); - } catch (e) { - console.log('[doh error]', ...args, e); - throw e; - } - }; -} - -const resolve = createResolve(dohServers); -const domesticResolve = createResolve(domesticDohServers); - -async function getWhois(domain: string) { - return asyncRetry(() => whoiser.domain(domain), { retries: 5 }); -} (async () => { const domainSets = await new Fdir() @@ -133,122 +27,6 @@ async function getWhois(domain: string) { console.log('done'); })(); -const whoisNotFoundKeywordTest = createKeywordFilter([ - 'no match for', - 'does not exist', - 'not found' -]); - -const domainAliveMap = new Map(); -async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> { - if (domainAliveMap.has(apexDomain)) { - return [apexDomain, domainAliveMap.get(apexDomain)!]; - } - - const resp = await resolve(apexDomain, 'NS'); - - if (resp.answers.length > 0) { - return [apexDomain, true]; - } - - let whois; - - try { - whois = await getWhois(apexDomain); - } catch (e) { - console.log('[whois fail]', 'whois error', { domain: apexDomain }, e); - return [apexDomain, true]; - } - - if (Object.keys(whois).length > 0) { - // TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117 - if ('text' in whois && Array.isArray(whois.text) && whois.text.some(value => whoisNotFoundKeywordTest(value.toLowerCase()))) { - console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain }); - domainAliveMap.set(apexDomain, false); - return [apexDomain, false]; - } - - return [apexDomain, true]; - } - - if (!('dns' in whois)) { - console.log({ whois }); - } - - console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain }); - domainAliveMap.set(apexDomain, false); - return [apexDomain, false]; -} - -export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> { - if (domainAliveMap.has(domain)) { - return [domain, domainAliveMap.get(domain)!]; - } - - const apexDomain = tldts.getDomain(domain, looseTldtsOpt); - if (!apexDomain) { - console.log('[domain invalid]', 'no apex domain', { domain }); - domainAliveMap.set(domain, true); - return [domain, true] as const; - } - - const apexDomainAlive = await isApexDomainAlive(apexDomain); - - if (!apexDomainAlive[1]) { - domainAliveMap.set(domain, false); - return [domain, false] as const; - } - - const $domain = domain[0] === '.' ? domain.slice(1) : domain; - - if (!isSuffix) { - const aDns: string[] = []; - const aaaaDns: string[] = []; - - // test 2 times before make sure record is empty - for (let i = 0; i < 2; i++) { - // eslint-disable-next-line no-await-in-loop -- sequential - const aRecords = (await resolve($domain, 'A')); - if (aRecords.answers.length !== 0) { - domainAliveMap.set(domain, true); - return [domain, true] as const; - } - - aDns.push(aRecords.dns); - } - for (let i = 0; i < 2; i++) { - // eslint-disable-next-line no-await-in-loop -- sequential - const aaaaRecords = (await resolve($domain, 'AAAA')); - if (aaaaRecords.answers.length !== 0) { - domainAliveMap.set(domain, true); - return [domain, true] as const; - } - - aaaaDns.push(aaaaRecords.dns); - } - - // only then, let's test once with domesticDohServers - const aRecords = (await domesticResolve($domain, 'A')); - if (aRecords.answers.length !== 0) { - domainAliveMap.set(domain, true); - return [domain, true] as const; - } - aDns.push(aRecords.dns); - - const aaaaRecords = (await domesticResolve($domain, 'AAAA')); - if (aaaaRecords.answers.length !== 0) { - domainAliveMap.set(domain, true); - return [domain, true] as const; - } - aaaaDns.push(aaaaRecords.dns); - - console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns }); - } - - domainAliveMap.set($domain, false); - return [domain, false] as const; -} - export async function runAgainstRuleset(filepath: string) { const extname = path.extname(filepath); if (extname !== '.conf') { @@ -265,7 +43,7 @@ export async function runAgainstRuleset(filepath: string) { switch (type) { case 'DOMAIN-SUFFIX': case 'DOMAIN': { - promises.push(keyedAsyncMutexWithQueue(domain, () => isDomainAlive(domain, type === 'DOMAIN-SUFFIX'))); + promises.push(queue.add(() => keyedAsyncMutexWithQueue(domain, () => isDomainAlive(domain, type === 'DOMAIN-SUFFIX')))); break; } // no default @@ -288,7 +66,7 @@ export async function runAgainstDomainset(filepath: string) { for await (const l of readFileByLine(filepath)) { const line = processLine(l); if (!line) continue; - promises.push(keyedAsyncMutexWithQueue(line, () => isDomainAlive(line, line[0] === '.'))); + promises.push(queue.add(() => keyedAsyncMutexWithQueue(line, () => isDomainAlive(line, line[0] === '.')))); } await Promise.all(promises);