mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Chore: update domain alive check
This commit is contained in:
parent
bcf6244e1e
commit
803e503a1e
@ -3,45 +3,14 @@ import { describe, it } from 'mocha';
|
||||
import { isDomainAlive } from './is-domain-alive';
|
||||
import { expect } from 'expect';
|
||||
|
||||
import process from 'node:process';
|
||||
|
||||
describe('isDomainAlive', function () {
|
||||
this.timeout(10000);
|
||||
|
||||
// it('.cryptocrawler.io', async () => {
|
||||
// expect((await isDomainAlive('.cryptocrawler.io', true))[1]).toEqual(false);
|
||||
// });
|
||||
|
||||
// it('.tunevideo.ru', async () => {
|
||||
// expect((await isDomainAlive('.tunevideo.ru', true))[1]).toEqual(false);
|
||||
// });
|
||||
|
||||
// it('.myqloud.com', async () => {
|
||||
// expect((await isDomainAlive('.myqloud.com', true))[1]).toEqual(true);
|
||||
// });
|
||||
|
||||
// it('discount-deal.org', async () => {
|
||||
// expect((await isDomainAlive('discount-deal.org', false))[1]).toEqual(false);
|
||||
// });
|
||||
|
||||
// it('ithome.com.tw', async () => {
|
||||
// expect((await isDomainAlive('ithome.com.tw', false))[1]).toEqual(true);
|
||||
// });
|
||||
|
||||
// it('flipkart.com', async () => {
|
||||
// expect((await isDomainAlive('flipkart.com', false))[1]).toEqual(true);
|
||||
// });
|
||||
|
||||
// it('lzzyimg.com', async () => {
|
||||
// expect((await isDomainAlive('.lzzyimg.com', true))[1]).toEqual(true);
|
||||
// });
|
||||
|
||||
// it('tayfundogdas.me', async () => {
|
||||
// expect((await isDomainAlive('.tayfundogdas.me', true))[1]).toEqual(true);
|
||||
// });
|
||||
it('samsungcloudsolution.net', async () => {
|
||||
expect((await isDomainAlive('samsungcloudsolution.net', true))).toEqual(false);
|
||||
});
|
||||
|
||||
it('ecdasoin.it', async () => {
|
||||
process.env.DEBUG = 'true';
|
||||
expect((await isDomainAlive('.ecdasoin.it', true))[1]).toEqual(false);
|
||||
expect((await isDomainAlive('.ecdasoin.it', true))).toEqual(false);
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,25 +1,15 @@
|
||||
import tldts from 'tldts-experimental';
|
||||
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
||||
import picocolors from 'picocolors';
|
||||
import { pickRandom, pickOne } from 'foxts/pick-random';
|
||||
|
||||
import DNS2 from 'dns2';
|
||||
import asyncRetry from 'async-retry';
|
||||
import picocolors from 'picocolors';
|
||||
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
||||
import { createKeyedAsyncMutex } from './keyed-async-mutex';
|
||||
import { pickRandom, pickOne } from 'foxts/pick-random';
|
||||
import tldts from 'tldts-experimental';
|
||||
import * as whoiser from 'whoiser';
|
||||
|
||||
import process from 'node:process';
|
||||
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
|
||||
|
||||
import process from 'node:process';
|
||||
|
||||
const mutex = new Map<string, Promise<unknown>>();
|
||||
export function keyedAsyncMutexWithQueue<T>(key: string, fn: () => Promise<T>) {
|
||||
if (mutex.has(key)) {
|
||||
return mutex.get(key) as Promise<T>;
|
||||
}
|
||||
const promise = fn();
|
||||
mutex.set(key, promise);
|
||||
return promise;
|
||||
}
|
||||
const domainAliveMap = new Map<string, boolean>();
|
||||
|
||||
class DnsError extends Error {
|
||||
name = 'DnsError';
|
||||
@ -88,6 +78,127 @@ const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([
|
||||
})
|
||||
] as const);
|
||||
|
||||
const domainAliveMutex = createKeyedAsyncMutex('isDomainAlive');
|
||||
|
||||
export async function isDomainAlive(domain: string, isIncludeAllSubdomain: boolean = domain[0] === '.'): Promise<boolean> {
|
||||
if (domainAliveMap.has(domain)) {
|
||||
return domainAliveMap.get(domain)!;
|
||||
}
|
||||
const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
|
||||
if (!apexDomain) {
|
||||
console.log(picocolors.gray('[domain invalid]'), picocolors.gray('no apex domain'), { domain });
|
||||
domainAliveMap.set('.' + domain, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
const apexDomainAlive = await isApexDomainAlive(apexDomain);
|
||||
if (isIncludeAllSubdomain || domain.length > apexDomain.length) {
|
||||
return apexDomainAlive;
|
||||
}
|
||||
if (!apexDomainAlive) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return domainAliveMutex.acquire(domain, async () => {
|
||||
domain = domain[0] === '.' ? domain.slice(1) : domain;
|
||||
|
||||
const $domain = isIncludeAllSubdomain ? '.' + domain : domain;
|
||||
|
||||
const aDns: string[] = [];
|
||||
const aaaaDns: string[] = [];
|
||||
|
||||
// test 2 times before make sure record is empty
|
||||
const servers = pickRandom(dohServers, 2);
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aRecords = (await $resolve(domain, 'A', servers[i]));
|
||||
if (aRecords.answers.length > 0) {
|
||||
domainAliveMap.set($domain, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
aDns.push(aRecords.dns);
|
||||
}
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aaaaRecords = (await $resolve(domain, 'AAAA', servers[i]));
|
||||
if (aaaaRecords.answers.length > 0) {
|
||||
domainAliveMap.set($domain, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
aaaaDns.push(aaaaRecords.dns);
|
||||
}
|
||||
|
||||
// only then, let's test twice with domesticDohServers
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aRecords = (await $resolve(domain, 'A', pickOne(domesticDohServers)));
|
||||
if (aRecords.answers.length > 0) {
|
||||
domainAliveMap.set($domain, true);
|
||||
return true;
|
||||
}
|
||||
aDns.push(aRecords.dns);
|
||||
}
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aaaaRecords = (await $resolve(domain, 'AAAA', pickOne(domesticDohServers)));
|
||||
if (aaaaRecords.answers.length > 0) {
|
||||
domainAliveMap.set($domain, true);
|
||||
return true;
|
||||
}
|
||||
aaaaDns.push(aaaaRecords.dns);
|
||||
}
|
||||
|
||||
console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
|
||||
|
||||
domainAliveMap.set($domain, false);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
const apexDomainMap = createKeyedAsyncMutex('isApexDomainAlive');
|
||||
|
||||
function isApexDomainAlive(apexDomain: string) {
|
||||
if (domainAliveMap.has(apexDomain)) {
|
||||
return domainAliveMap.get(apexDomain)!;
|
||||
}
|
||||
|
||||
return apexDomainMap.acquire(apexDomain, async () => {
|
||||
const servers = pickRandom(dohServers, 2);
|
||||
for (let i = 0, len = servers.length; i < len; i++) {
|
||||
const server = servers[i];
|
||||
// eslint-disable-next-line no-await-in-loop -- one by one
|
||||
const resp = await $resolve(apexDomain, 'NS', server);
|
||||
if (resp.answers.length > 0) {
|
||||
domainAliveMap.set(apexDomain, true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
let whois;
|
||||
try {
|
||||
whois = await getWhois(apexDomain);
|
||||
} catch (e) {
|
||||
console.log(picocolors.red('[whois error]'), { domain: apexDomain }, e);
|
||||
domainAliveMap.set(apexDomain, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
const whoisError = noWhois(whois);
|
||||
if (!whoisError) {
|
||||
console.log(picocolors.gray('[domain alive]'), picocolors.gray('whois found'), { domain: apexDomain });
|
||||
domainAliveMap.set(apexDomain, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain, err: whoisError });
|
||||
|
||||
domainAliveMap.set(apexDomain, false);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
async function $resolve(name: string, type: DNS2.PacketQuestion, server: [string, DNS2.DnsResolver]) {
|
||||
try {
|
||||
return await asyncRetry(async () => {
|
||||
@ -113,140 +224,6 @@ async function getWhois(domain: string) {
|
||||
return asyncRetry(() => whoiser.domain(domain, { raw: true }), { retries: 5 });
|
||||
}
|
||||
|
||||
const domainAliveMap = new Map<string, boolean>();
|
||||
function onDomainAlive(domain: string): [string, boolean] {
|
||||
domainAliveMap.set(domain, true);
|
||||
return [domain, true];
|
||||
}
|
||||
function onDomainDead(domain: string): [string, boolean] {
|
||||
domainAliveMap.set(domain, false);
|
||||
return [domain, false];
|
||||
}
|
||||
|
||||
export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
|
||||
if (domainAliveMap.has(domain)) {
|
||||
return [domain, domainAliveMap.get(domain)!];
|
||||
}
|
||||
|
||||
const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
|
||||
if (!apexDomain) {
|
||||
console.log(picocolors.gray('[domain invalid]'), picocolors.gray('no apex domain'), { domain });
|
||||
return onDomainAlive(domain);
|
||||
}
|
||||
|
||||
const apexDomainAlive = await keyedAsyncMutexWithQueue(apexDomain, () => isApexDomainAlive(apexDomain));
|
||||
if (isSuffix) {
|
||||
return apexDomainAlive;
|
||||
}
|
||||
if (!apexDomainAlive[1]) {
|
||||
return apexDomainAlive;
|
||||
}
|
||||
|
||||
const $domain = domain[0] === '.' ? domain.slice(1) : domain;
|
||||
|
||||
const aDns: string[] = [];
|
||||
const aaaaDns: string[] = [];
|
||||
|
||||
// test 2 times before make sure record is empty
|
||||
const servers = pickRandom(dohServers, 2);
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aRecords = (await $resolve($domain, 'A', servers[i]));
|
||||
if (aRecords.answers.length > 0) {
|
||||
return onDomainAlive(domain);
|
||||
}
|
||||
|
||||
aDns.push(aRecords.dns);
|
||||
}
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aaaaRecords = (await $resolve($domain, 'AAAA', servers[i]));
|
||||
if (aaaaRecords.answers.length > 0) {
|
||||
return onDomainAlive(domain);
|
||||
}
|
||||
|
||||
aaaaDns.push(aaaaRecords.dns);
|
||||
}
|
||||
|
||||
// only then, let's test twice with domesticDohServers
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aRecords = (await $resolve($domain, 'A', pickOne(domesticDohServers)));
|
||||
if (aRecords.answers.length > 0) {
|
||||
return onDomainAlive(domain);
|
||||
}
|
||||
aDns.push(aRecords.dns);
|
||||
}
|
||||
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- sequential
|
||||
const aaaaRecords = (await $resolve($domain, 'AAAA', pickOne(domesticDohServers)));
|
||||
if (aaaaRecords.answers.length > 0) {
|
||||
return onDomainAlive(domain);
|
||||
}
|
||||
aaaaDns.push(aaaaRecords.dns);
|
||||
}
|
||||
|
||||
console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
|
||||
return onDomainDead($domain);
|
||||
}
|
||||
|
||||
const apexDomainNsResolvePromiseMap = new Map<string, Promise<boolean>>();
|
||||
|
||||
async function getNS(domain: string) {
|
||||
const servers = pickRandom(dohServers, 2);
|
||||
for (let i = 0, len = servers.length; i < len; i++) {
|
||||
const server = servers[i];
|
||||
// eslint-disable-next-line no-await-in-loop -- one by one
|
||||
const resp = await $resolve(domain, 'NS', server);
|
||||
if (resp.answers.length > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
|
||||
if (domainAliveMap.has(apexDomain)) {
|
||||
return [apexDomain, domainAliveMap.get(apexDomain)!];
|
||||
}
|
||||
|
||||
let hasNS: boolean;
|
||||
if (apexDomainNsResolvePromiseMap.has(apexDomain)) {
|
||||
hasNS = await apexDomainNsResolvePromiseMap.get(apexDomain)!;
|
||||
} else {
|
||||
const promise = getNS(apexDomain);
|
||||
apexDomainNsResolvePromiseMap.set(apexDomain, promise);
|
||||
hasNS = await promise;
|
||||
}
|
||||
|
||||
if (hasNS) {
|
||||
return onDomainAlive(apexDomain);
|
||||
}
|
||||
|
||||
let whois;
|
||||
|
||||
try {
|
||||
whois = await getWhois(apexDomain);
|
||||
} catch (e) {
|
||||
console.log(picocolors.red('[whois error]'), { domain: apexDomain }, e);
|
||||
return onDomainAlive(apexDomain);
|
||||
}
|
||||
|
||||
if (process.env.DEBUG) {
|
||||
console.log(JSON.stringify(whois, null, 2));
|
||||
}
|
||||
|
||||
const whoisError = noWhois(whois);
|
||||
if (!whoisError) {
|
||||
console.log(picocolors.gray('[domain alive]'), picocolors.gray('whois found'), { domain: apexDomain });
|
||||
return onDomainAlive(apexDomain);
|
||||
}
|
||||
|
||||
console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain, err: whoisError });
|
||||
return onDomainDead('.' + apexDomain);
|
||||
}
|
||||
|
||||
// TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
|
||||
const whoisNotFoundKeywordTest = createKeywordFilter([
|
||||
'no match for',
|
||||
@ -269,11 +246,10 @@ const whoisNotFoundKeywordTest = createKeywordFilter([
|
||||
// 'pendingdelete',
|
||||
' has been blocked by '
|
||||
]);
|
||||
|
||||
// whois server can redirect, so whoiser might/will get info from multiple whois servers
|
||||
// some servers (like TLD whois servers) might have cached/outdated results
|
||||
// we can only make sure a domain is alive once all response from all whois servers demonstrate so
|
||||
export function noWhois(whois: whoiser.WhoisSearchResult): null | string {
|
||||
function noWhois(whois: whoiser.WhoisSearchResult): null | string {
|
||||
let empty = true;
|
||||
|
||||
for (const key in whois) {
|
||||
|
||||
23
Build/lib/keyed-async-mutex.ts
Normal file
23
Build/lib/keyed-async-mutex.ts
Normal file
@ -0,0 +1,23 @@
|
||||
const globalMap = new Map<string, Map<string, Promise<unknown>>>();
|
||||
|
||||
export function createKeyedAsyncMutex(globalNamespaceKey: string) {
|
||||
let map;
|
||||
if (globalMap.has(globalNamespaceKey)) {
|
||||
map = globalMap.get(globalNamespaceKey)!;
|
||||
} else {
|
||||
map = new Map();
|
||||
globalMap.set(globalNamespaceKey, map);
|
||||
}
|
||||
|
||||
return {
|
||||
async acquire<T = unknown>(key: string, fn: () => Promise<T>) {
|
||||
if (map.has(key)) {
|
||||
return map.get(key);
|
||||
}
|
||||
|
||||
const promise = fn();
|
||||
map.set(key, promise);
|
||||
return promise;
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -1,18 +1,10 @@
|
||||
import { SOURCE_DIR } from './constants/dir';
|
||||
import path from 'node:path';
|
||||
import { newQueue } from '@henrygd/queue';
|
||||
import { isDomainAlive, keyedAsyncMutexWithQueue } from './lib/is-domain-alive';
|
||||
import { isDomainAlive } from './lib/is-domain-alive';
|
||||
import { fdir as Fdir } from 'fdir';
|
||||
import runAgainstSourceFile from './lib/run-against-source-file';
|
||||
|
||||
const queue = newQueue(24);
|
||||
|
||||
const deadDomains: string[] = [];
|
||||
function onDomain(args: [string, boolean]) {
|
||||
if (!args[1]) {
|
||||
deadDomains.push(args[0]);
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const domainSets = await new Fdir()
|
||||
@ -42,12 +34,14 @@ function onDomain(args: [string, boolean]) {
|
||||
].map(
|
||||
filepath => runAgainstSourceFile(
|
||||
filepath,
|
||||
(domain: string, includeAllSubdomain: boolean) => promises.push(queue.add(
|
||||
() => keyedAsyncMutexWithQueue(
|
||||
domain,
|
||||
() => isDomainAlive(domain, includeAllSubdomain)
|
||||
).then(onDomain)
|
||||
))
|
||||
(domain: string, includeAllSubdomain: boolean) => promises.push(
|
||||
isDomainAlive(domain, includeAllSubdomain).then((alive) => {
|
||||
if (alive) {
|
||||
return;
|
||||
}
|
||||
deadDomains.push(includeAllSubdomain ? '.' + domain : domain);
|
||||
})
|
||||
)
|
||||
).then(() => console.log('[crawl]', filepath))
|
||||
));
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user