add checkCF() that will detect cloudflare ddos page and wait 5 seconds until original page is loaded

This commit is contained in:
Ilya Kreymer 2022-03-14 19:00:51 -07:00
parent 0fcc89fdb4
commit d1e53e6e26
2 changed files with 16 additions and 1 deletions

View file

@ -310,6 +310,8 @@ class Crawler {
await page._client.send("Network.setBypassServiceWorker", {bypass: true}); await page._client.send("Network.setBypassServiceWorker", {bypass: true});
} }
await page.evaluateOnNewDocument("Object.defineProperty(navigator, \"webdriver\", {value: false});");
if (this.params.behaviorOpts && !page.__bx_inited) { if (this.params.behaviorOpts && !page.__bx_inited) {
await page.exposeFunction(BEHAVIOR_LOG_FUNC, (logdata) => this._behaviorLog(logdata)); await page.exposeFunction(BEHAVIOR_LOG_FUNC, (logdata) => this._behaviorLog(logdata));
await page.evaluateOnNewDocument(behaviors + `;\nself.__bx_behaviors.init(${this.params.behaviorOpts});`); await page.evaluateOnNewDocument(behaviors + `;\nself.__bx_behaviors.init(${this.params.behaviorOpts});`);
@ -581,6 +583,8 @@ class Crawler {
const seed = this.params.scopedSeeds[seedId]; const seed = this.params.scopedSeeds[seedId];
await this.checkCF(page);
// skip extraction if at max depth // skip extraction if at max depth
if (seed.isAtMaxDepth(depth) || !selectorOptsList) { if (seed.isAtMaxDepth(depth) || !selectorOptsList) {
return; return;
@ -649,6 +653,17 @@ class Crawler {
} }
} }
async checkCF(page) {
try {
while (await page.$("div.cf-browser-verification.cf-im-under-attack")) {
this.statusLog("Cloudflare Check Detected, waiting for reload...");
await this.sleep(5500);
}
} catch (e) {
console.warn(e);
}
}
async queueUrl(seedId, url, depth, extraHops = 0) { async queueUrl(seedId, url, depth, extraHops = 0) {
if (this.limitHit) { if (this.limitHit) {
return false; return false;

View file

@ -126,7 +126,7 @@ async function main() {
await page.setCacheEnabled(false); await page.setCacheEnabled(false);
if (params.interactive) { if (params.interactive) {
await page.evaluateOnNewDocument('Object.defineProperty(navigator, "webdriver", {value: false});'); await page.evaluateOnNewDocument("Object.defineProperty(navigator, \"webdriver\", {value: false});");
// for testing, inject browsertrix-behaviors // for testing, inject browsertrix-behaviors
await page.evaluateOnNewDocument(behaviors + ";\nself.__bx_behaviors.init();"); await page.evaluateOnNewDocument(behaviors + ";\nself.__bx_behaviors.init();");
} }