Fix: improve whois matching w/ domain checking

This commit is contained in:
SukkaW 2025-01-05 18:06:31 +08:00
parent c7d1ebcddf
commit 57b5d2933f
3 changed files with 354 additions and 227 deletions

View File

@ -0,0 +1,77 @@
import { describe, it } from 'mocha';
import { isDomainAlive, whoisExists } from './is-domain-alive';
import { expect } from 'expect';
describe('whoisExists', () => {
it('.cryptocrawler.io', () => {
expect(whoisExists({
'whois.nic.io': {
'Domain Status': [],
'Name Server': [],
'>>> Last update of WHOIS database': '2025-01-05T11:06:38Z <<<',
text: [
'Domain not found.',
'',
'Terms of Use: Access to WHOIS'
]
}
})).toBe(false);
});
it('.tunevideo.ru', () => {
expect(whoisExists({
'whois.tcinet.ru': {
'Domain Status': [],
'Name Server': [],
text: [
'% TCI Whois Service. Terms of use:',
'% https://tcinet.ru/documents/whois_ru_rf.pdf (in Russian)',
'% https://tcinet.ru/documents/whois_su.pdf (in Russian)',
'',
'No entries found for the selected source(s).',
'',
'Last updated on 2025-01-05T11:03:01Z'
]
}
})).toBe(false);
});
it('.myqloud.com', () => {
expect(whoisExists({
'whois.tcinet.ru': {
'Domain Status': [],
'Name Server': [],
text: [
'% TCI Whois Service. Terms of use:',
'% https://tcinet.ru/documents/whois_ru_rf.pdf (in Russian)',
'% https://tcinet.ru/documents/whois_su.pdf (in Russian)',
'',
'No entries found for the selected source(s).',
'',
'Last updated on 2025-01-05T11:03:01Z'
]
}
})).toBe(false);
});
});
describe('isDomainAlive', function () {
this.timeout(10000);
it('.cryptocrawler.io', async () => {
expect((await isDomainAlive('.cryptocrawler.io', true))[1]).toEqual(false);
});
it('.tunevideo.ru', async () => {
expect((await isDomainAlive('.tunevideo.ru', true))[1]).toEqual(false);
});
it('.myqloud.com', async () => {
expect((await isDomainAlive('.myqloud.com', true))[1]).toEqual(true);
});
it('discount-deal.org', async () => {
expect((await isDomainAlive('discount-deal.org', false))[1]).toEqual(false);
});
});

View File

@ -0,0 +1,272 @@
import tldts from 'tldts-experimental';
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
import picocolors from 'picocolors';
import DNS2 from 'dns2';
import asyncRetry from 'async-retry';
import * as whoiser from 'whoiser';
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
const mutex = new Map<string, Promise<unknown>>();
export function keyedAsyncMutexWithQueue<T>(key: string, fn: () => Promise<T>) {
if (mutex.has(key)) {
return mutex.get(key) as Promise<T>;
}
const promise = fn();
mutex.set(key, promise);
return promise;
}
class DnsError extends Error {
name = 'DnsError';
constructor(readonly message: string, public readonly server: string) {
super(message);
}
}
interface DnsResponse extends DNS2.$DnsResponse {
dns: string
}
const dohServers: Array<[string, DNS2.DnsResolver]> = ([
'8.8.8.8',
'8.8.4.4',
'1.0.0.1',
'1.1.1.1',
'162.159.36.1',
'162.159.46.1',
'101.101.101.101', // TWNIC
'185.222.222.222', // DNS.SB
'45.11.45.11', // DNS.SB
'dns10.quad9.net', // Quad9 unfiltered
'doh.sandbox.opendns.com', // OpenDNS sandbox (unfiltered)
'unfiltered.adguard-dns.com',
// '0ms.dev', // Proxy Cloudflare
// '76.76.2.0', // ControlD unfiltered, path not /dns-query
// '76.76.10.0', // ControlD unfiltered, path not /dns-query
// 'dns.bebasid.com', // BebasID, path not /dns-query but /unfiltered
// '193.110.81.0', // dns0.eu
// '185.253.5.0', // dns0.eu
// 'zero.dns0.eu',
'dns.nextdns.io',
'anycast.dns.nextdns.io',
'wikimedia-dns.org',
// 'ordns.he.net',
// 'dns.mullvad.net',
'basic.rethinkdns.com'
// 'ada.openbld.net',
// 'dns.rabbitdns.org'
] as const).map(dns => [
dns,
DNS2.DOHClient({
dns,
http: false
// get: (url: string) => undici.request(url).then(r => r.body)
})
] as const);
const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([
'223.5.5.5',
'223.6.6.6',
'120.53.53.53',
'1.12.12.12'
] as const).map(dns => [
dns,
DNS2.DOHClient({
dns,
http: false
// get: (url: string) => undici.request(url).then(r => r.body)
})
] as const);
function createResolve(server: Array<[string, DNS2.DnsResolver]>): DNS2.DnsResolver<DnsResponse> {
return async (...args) => {
try {
return await asyncRetry(async () => {
const [dohServer, dohClient] = server[Math.floor(Math.random() * server.length)];
try {
return {
...await dohClient(...args),
dns: dohServer
} satisfies DnsResponse;
} catch (e) {
// console.error(e);
throw new DnsError((e as Error).message, dohServer);
}
}, { retries: 5 });
} catch (e) {
console.log('[doh error]', ...args, e);
throw e;
}
};
}
const resolve = createResolve(dohServers);
const domesticResolve = createResolve(domesticDohServers);
async function getWhois(domain: string) {
return asyncRetry(() => whoiser.domain(domain), { retries: 5 });
}
const domainAliveMap = new Map<string, boolean>();
function onDomainAlive(domain: string): [string, boolean] {
domainAliveMap.set(domain, true);
return [domain, true];
}
function onDomainDead(domain: string): [string, boolean] {
domainAliveMap.set(domain, false);
return [domain, false];
}
export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
if (domainAliveMap.has(domain)) {
return [domain, domainAliveMap.get(domain)!];
}
const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
if (!apexDomain) {
console.log(picocolors.gray('[domain invalid]'), picocolors.gray('no apex domain'), { domain });
return onDomainAlive(domain);
}
const apexDomainAlive = await keyedAsyncMutexWithQueue(apexDomain, () => isApexDomainAlive(apexDomain));
if (isSuffix) {
return apexDomainAlive;
}
if (!apexDomainAlive[1]) {
return apexDomainAlive;
}
const $domain = domain[0] === '.' ? domain.slice(1) : domain;
const aDns: string[] = [];
const aaaaDns: string[] = [];
// test 2 times before make sure record is empty
for (let i = 0; i < 2; i++) {
// eslint-disable-next-line no-await-in-loop -- sequential
const aRecords = (await resolve($domain, 'A'));
if (aRecords.answers.length > 0) {
return onDomainAlive(domain);
}
aDns.push(aRecords.dns);
}
for (let i = 0; i < 2; i++) {
// eslint-disable-next-line no-await-in-loop -- sequential
const aaaaRecords = (await resolve($domain, 'AAAA'));
if (aaaaRecords.answers.length > 0) {
return onDomainAlive(domain);
}
aaaaDns.push(aaaaRecords.dns);
}
// only then, let's test once with domesticDohServers
const aRecords = (await domesticResolve($domain, 'A'));
if (aRecords.answers.length > 0) {
return onDomainAlive(domain);
}
aDns.push(aRecords.dns);
const aaaaRecords = (await domesticResolve($domain, 'AAAA'));
if (aaaaRecords.answers.length > 0) {
return onDomainAlive(domain);
}
aaaaDns.push(aaaaRecords.dns);
console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
return onDomainDead($domain);
}
const apexDomainNsResolvePromiseMap = new Map<string, Promise<DnsResponse>>();
async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
if (domainAliveMap.has(apexDomain)) {
return [apexDomain, domainAliveMap.get(apexDomain)!];
}
let resp: DnsResponse;
if (apexDomainNsResolvePromiseMap.has(apexDomain)) {
resp = await apexDomainNsResolvePromiseMap.get(apexDomain)!;
} else {
const promise = resolve(apexDomain, 'NS');
apexDomainNsResolvePromiseMap.set(apexDomain, promise);
resp = await promise;
}
if (resp.answers.length > 0) {
return onDomainAlive(apexDomain);
}
let whois;
try {
whois = await getWhois(apexDomain);
} catch (e) {
console.log(picocolors.red('[domain dead]'), 'whois error', { domain: apexDomain }, e);
return onDomainDead(apexDomain);
}
// console.log(JSON.stringify(whois, null, 2));
if (whoisExists(whois)) {
console.log(picocolors.gray('[domain alive]'), 'whois found', { domain: apexDomain });
return onDomainAlive(apexDomain);
}
console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain });
return onDomainDead(apexDomain);
}
// TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
const whoisNotFoundKeywordTest = createKeywordFilter([
'no match for',
'does not exist',
'not found',
'no entries',
'no data found',
'is available for registration',
'currently available for application'
]);
export function whoisExists(whois: whoiser.WhoisSearchResult) {
let empty = true;
for (const key in whois) {
if (Object.hasOwn(whois, key)) {
empty = false;
if (key === 'error') {
if (
(typeof whois.error === 'string' && whois.error)
|| (Array.isArray(whois.error) && whois.error.length > 0)
) {
console.error(whois);
return true;
}
continue;
}
if (key === 'text') {
if (Array.isArray(whois.text) && whois.text.some(value => whoisNotFoundKeywordTest(value.toLowerCase()))) {
return false;
}
continue;
}
if (key === 'Name Server') {
if (Array.isArray(whois[key]) && whois[key].length === 0) {
return false;
}
continue;
}
if (typeof whois[key] === 'object' && !Array.isArray(whois[key]) && !whoisExists(whois[key])) {
return false;
}
}
}
return !empty;
}

View File

@ -1,119 +1,13 @@
import DNS2 from 'dns2';
import { readFileByLine } from './lib/fetch-text-by-line';
import { processLine } from './lib/process-line';
import tldts from 'tldts-experimental';
import { looseTldtsOpt } from './constants/loose-tldts-opt';
import { fdir as Fdir } from 'fdir';
import { SOURCE_DIR } from './constants/dir';
import path from 'node:path';
import { newQueue } from '@henrygd/queue';
import asyncRetry from 'async-retry';
import * as whoiser from 'whoiser';
import picocolors from 'picocolors';
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
const dohServers: Array<[string, DNS2.DnsResolver]> = ([
'8.8.8.8',
'8.8.4.4',
'1.0.0.1',
'1.1.1.1',
'162.159.36.1',
'162.159.46.1',
'101.101.101.101', // TWNIC
'185.222.222.222', // DNS.SB
'45.11.45.11', // DNS.SB
'dns10.quad9.net', // Quad9 unfiltered
'doh.sandbox.opendns.com', // OpenDNS sandbox (unfiltered)
'unfiltered.adguard-dns.com',
// '0ms.dev', // Proxy Cloudflare
// '76.76.2.0', // ControlD unfiltered, path not /dns-query
// '76.76.10.0', // ControlD unfiltered, path not /dns-query
// 'dns.bebasid.com', // BebasID, path not /dns-query but /unfiltered
// '193.110.81.0', // dns0.eu
// '185.253.5.0', // dns0.eu
// 'zero.dns0.eu',
'dns.nextdns.io',
'anycast.dns.nextdns.io',
'wikimedia-dns.org',
// 'ordns.he.net',
// 'dns.mullvad.net',
'basic.rethinkdns.com'
// 'ada.openbld.net',
// 'dns.rabbitdns.org'
] as const).map(dns => [
dns,
DNS2.DOHClient({
dns,
http: false
// get: (url: string) => undici.request(url).then(r => r.body)
})
] as const);
const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([
'223.5.5.5',
'223.6.6.6',
'120.53.53.53',
'1.12.12.12'
] as const).map(dns => [
dns,
DNS2.DOHClient({
dns,
http: false
// get: (url: string) => undici.request(url).then(r => r.body)
})
] as const);
import { isDomainAlive, keyedAsyncMutexWithQueue } from './lib/is-domain-alive';
import { fdir as Fdir } from 'fdir';
const queue = newQueue(32);
const mutex = new Map<string, Promise<unknown>>();
function keyedAsyncMutexWithQueue<T>(key: string, fn: () => Promise<T>) {
if (mutex.has(key)) {
return mutex.get(key) as Promise<T>;
}
const promise = queue.add(() => fn());
mutex.set(key, promise);
return promise;
}
class DnsError extends Error {
name = 'DnsError';
constructor(readonly message: string, public readonly server: string) {
super(message);
}
}
interface DnsResponse extends DNS2.$DnsResponse {
dns: string
}
function createResolve(server: Array<[string, DNS2.DnsResolver]>): DNS2.DnsResolver<DnsResponse> {
return async (...args) => {
try {
return await asyncRetry(async () => {
const [dohServer, dohClient] = server[Math.floor(Math.random() * server.length)];
try {
return {
...await dohClient(...args),
dns: dohServer
} satisfies DnsResponse;
} catch (e) {
// console.error(e);
throw new DnsError((e as Error).message, dohServer);
}
}, { retries: 5 });
} catch (e) {
console.log('[doh error]', ...args, e);
throw e;
}
};
}
const resolve = createResolve(dohServers);
const domesticResolve = createResolve(domesticDohServers);
async function getWhois(domain: string) {
return asyncRetry(() => whoiser.domain(domain), { retries: 5 });
}
(async () => {
const domainSets = await new Fdir()
@ -133,122 +27,6 @@ async function getWhois(domain: string) {
console.log('done');
})();
const whoisNotFoundKeywordTest = createKeywordFilter([
'no match for',
'does not exist',
'not found'
]);
const domainAliveMap = new Map<string, boolean>();
async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
if (domainAliveMap.has(apexDomain)) {
return [apexDomain, domainAliveMap.get(apexDomain)!];
}
const resp = await resolve(apexDomain, 'NS');
if (resp.answers.length > 0) {
return [apexDomain, true];
}
let whois;
try {
whois = await getWhois(apexDomain);
} catch (e) {
console.log('[whois fail]', 'whois error', { domain: apexDomain }, e);
return [apexDomain, true];
}
if (Object.keys(whois).length > 0) {
// TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
if ('text' in whois && Array.isArray(whois.text) && whois.text.some(value => whoisNotFoundKeywordTest(value.toLowerCase()))) {
console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain });
domainAliveMap.set(apexDomain, false);
return [apexDomain, false];
}
return [apexDomain, true];
}
if (!('dns' in whois)) {
console.log({ whois });
}
console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain });
domainAliveMap.set(apexDomain, false);
return [apexDomain, false];
}
export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
if (domainAliveMap.has(domain)) {
return [domain, domainAliveMap.get(domain)!];
}
const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
if (!apexDomain) {
console.log('[domain invalid]', 'no apex domain', { domain });
domainAliveMap.set(domain, true);
return [domain, true] as const;
}
const apexDomainAlive = await isApexDomainAlive(apexDomain);
if (!apexDomainAlive[1]) {
domainAliveMap.set(domain, false);
return [domain, false] as const;
}
const $domain = domain[0] === '.' ? domain.slice(1) : domain;
if (!isSuffix) {
const aDns: string[] = [];
const aaaaDns: string[] = [];
// test 2 times before make sure record is empty
for (let i = 0; i < 2; i++) {
// eslint-disable-next-line no-await-in-loop -- sequential
const aRecords = (await resolve($domain, 'A'));
if (aRecords.answers.length !== 0) {
domainAliveMap.set(domain, true);
return [domain, true] as const;
}
aDns.push(aRecords.dns);
}
for (let i = 0; i < 2; i++) {
// eslint-disable-next-line no-await-in-loop -- sequential
const aaaaRecords = (await resolve($domain, 'AAAA'));
if (aaaaRecords.answers.length !== 0) {
domainAliveMap.set(domain, true);
return [domain, true] as const;
}
aaaaDns.push(aaaaRecords.dns);
}
// only then, let's test once with domesticDohServers
const aRecords = (await domesticResolve($domain, 'A'));
if (aRecords.answers.length !== 0) {
domainAliveMap.set(domain, true);
return [domain, true] as const;
}
aDns.push(aRecords.dns);
const aaaaRecords = (await domesticResolve($domain, 'AAAA'));
if (aaaaRecords.answers.length !== 0) {
domainAliveMap.set(domain, true);
return [domain, true] as const;
}
aaaaDns.push(aaaaRecords.dns);
console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
}
domainAliveMap.set($domain, false);
return [domain, false] as const;
}
export async function runAgainstRuleset(filepath: string) {
const extname = path.extname(filepath);
if (extname !== '.conf') {
@ -265,7 +43,7 @@ export async function runAgainstRuleset(filepath: string) {
switch (type) {
case 'DOMAIN-SUFFIX':
case 'DOMAIN': {
promises.push(keyedAsyncMutexWithQueue(domain, () => isDomainAlive(domain, type === 'DOMAIN-SUFFIX')));
promises.push(queue.add(() => keyedAsyncMutexWithQueue(domain, () => isDomainAlive(domain, type === 'DOMAIN-SUFFIX'))));
break;
}
// no default
@ -288,7 +66,7 @@ export async function runAgainstDomainset(filepath: string) {
for await (const l of readFileByLine(filepath)) {
const line = processLine(l);
if (!line) continue;
promises.push(keyedAsyncMutexWithQueue(line, () => isDomainAlive(line, line[0] === '.')));
promises.push(queue.add(() => keyedAsyncMutexWithQueue(line, () => isDomainAlive(line, line[0] === '.'))));
}
await Promise.all(promises);