Surge_by_SukkaW/Build/build-mitm-hostname.ts

import { readFileByLine } from './lib/fetch-text-by-line';
import fsPromises from 'fs/promises';
import pathFn from 'path';
import table from 'table';
import listDir from '@sukka/listdir';
import { green, yellow } from 'picocolors';
import { processLineFromReadline } from './lib/process-line';
import { getHostname } from 'tldts';

const PRESET_MITM_HOSTNAMES = [
  // '*baidu.com',
  '*.ydstatic.com',
  // '*snssdk.com',
  // '*musical.com',
  // '*musical.ly',
  // '*snssdk.ly',
  'api.zhihu.com',
  'www.zhihu.com',
  'api.chelaile.net.cn',
  'atrace.chelaile.net.cn',
  '*.meituan.net',
  'ctrl.playcvn.com',
  'ctrl.playcvn.net',
  'ctrl.zmzapi.com',
  'ctrl.zmzapi.net',
  'api.zhuishushenqi.com',
  'b.zhuishushenqi.com',
  'ggic.cmvideo.cn',
  'ggic2.cmvideo.cn',
  'mrobot.pcauto.com.cn',
  'mrobot.pconline.com.cn',
  'home.umetrip.com',
  'discardrp.umetrip.com',
  'startup.umetrip.com',
  'dsp-x.jd.com',
  'bdsp-x.jd.com'
];

(async () => {
  const folderListPath = pathFn.resolve(__dirname, '../List/');
  const rulesets = await listDir(folderListPath);
  let urlRegexPaths = [];

  urlRegexPaths.push(
    ...(await fsPromises.readFile(pathFn.join(__dirname, '../Modules/sukka_url_rewrite.sgmodule'), { encoding: 'utf-8' }))
      .split('\n')
      .filter(
        i => !i.startsWith('#')
          && !i.startsWith('[')
      )
      .map(i => i.split(' ')[0])
      .map(i => ({
        origin: i,
        processed: i
          .replaceAll('(www.)?', '{www or not}')
          .replaceAll('^https?://', '')
          .replaceAll('^https://', '')
          .replaceAll('^http://', '')
          .split('/')[0]
          .replaceAll('\\.', '.')
          .replaceAll('.+', '*')
          .replaceAll('(.*)', '*')
      }))
  );

  const bothWwwApexDomains: Array<{ origin: string, processed: string }> = [];
  urlRegexPaths = urlRegexPaths.map(i => {
    if (!i.processed.includes('{www or not}')) return i;

    const d = i.processed.replace('{www or not}', '');
    bothWwwApexDomains.push({
      origin: i.origin,
      processed: `www.${d}`
    });

    return {
      origin: i.origin,
      processed: d
    };
  });

  urlRegexPaths.push(...bothWwwApexDomains);

  await Promise.all(rulesets.map(async file => {
    const content = await processLineFromReadline(readFileByLine(pathFn.join(folderListPath, file)));
    urlRegexPaths.push(
      ...content
        .filter(i => (
          i.startsWith('URL-REGEX')
          && !i.includes('http://')
        ))
        .map(i => i.split(',')[1])
        .map(i => ({
          origin: i,
          processed: i
            .replaceAll('^https?://', '')
            .replaceAll('^https://', '')
            .replaceAll('^http://', '')
            .split('/')[0]
            .replaceAll('\\.', '.')
            .replaceAll('.+', '*')
            .replaceAll('\\d', '*')
            .replaceAll('([a-z])', '*')
            .replaceAll('[a-z]', '*')
            .replaceAll('([0-9])', '*')
            .replaceAll('[0-9]', '*')
            .replaceAll(/{.+?}/g, '')
            .replaceAll(/\*+/g, '*')
        }))
    );
  }));

  const mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
  const parsedFailures = new Set();

  const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];

  dedupedUrlRegexPaths.forEach(i => {
    const result = getHostnameSafe(i.processed);

    if (result) {
      mitmDomains.add(result);
    } else {
      parsedFailures.add(`${i.origin} ${i.processed} ${result}`);
    }
  });

  const mitmDomainsRegExpArray = Array.from(mitmDomains)
    .slice()
    .filter(i => {
      return i.length > 3
        && !i.includes('.mp4') // Special Case
        && i !== '(www.)' // Special Case
        && !(i !== '*.meituan.net' && i.endsWith('.meituan.net'))
        && !i.startsWith('.')
        && !i.endsWith('.')
        && !i.endsWith('*');
    })
    .map(i => {
      return new RegExp(
        escapeRegExp(i)
          .replaceAll('{www or not}', '(www.)?')
          .replaceAll('\\*', '(.*)')
      );
    });

  const parsedDomainsData: Array<[string, string]> = [];
  dedupedUrlRegexPaths.forEach(i => {
    const result = getHostnameSafe(i.processed);

    if (result) {
      if (matchWithRegExpArray(result, mitmDomainsRegExpArray)) {
        parsedDomainsData.push([green(result), i.origin]);
      } else {
        parsedDomainsData.push([yellow(result), i.origin]);
      }
    }
  });

  console.log('Mitm Hostnames:');
  console.log(`hostname = %APPEND% ${Array.from(mitmDomains).join(', ')}`);
  console.log('--------------------');
  console.log('Parsed Sucessed:');
  console.log(table.table(parsedDomainsData, {
    border: table.getBorderCharacters('void'),
    columnDefault: {
      paddingLeft: 0,
      paddingRight: 3
    },
    drawHorizontalLine: () => false
  }));
  console.log('--------------------');
  console.log('Parsed Failed');
  console.log([...parsedFailures].join('\n'));
})();

/** Util function */

function getHostnameSafe(input: string) {
  const res = getHostname(input);
  if (res && /[^\s\w*.-]/.test(res)) return null;
  return res;
}

function matchWithRegExpArray(input: string, regexps: RegExp[] = []) {
  for (const r of regexps) {
    if (r.test(input)) return true;
  }

  return false;
}

function escapeRegExp(string = '') {
  const reRegExpChar = /[$()*+.?[\\\]^{|}]/g;
  const reHasRegExpChar = new RegExp(reRegExpChar.source);

  return string && reHasRegExpChar.test(string)
    ? string.replaceAll(reRegExpChar, '\\$&')
    : string;
}