diff --git a/crawler.js b/crawler.js index e864efe2..3b8fedc9 100644 --- a/crawler.js +++ b/crawler.js @@ -310,6 +310,8 @@ class Crawler { await page._client.send("Network.setBypassServiceWorker", {bypass: true}); } + await page.evaluateOnNewDocument("Object.defineProperty(navigator, \"webdriver\", {value: false});"); + if (this.params.behaviorOpts && !page.__bx_inited) { await page.exposeFunction(BEHAVIOR_LOG_FUNC, (logdata) => this._behaviorLog(logdata)); await page.evaluateOnNewDocument(behaviors + `;\nself.__bx_behaviors.init(${this.params.behaviorOpts});`); @@ -581,6 +583,8 @@ class Crawler { const seed = this.params.scopedSeeds[seedId]; + await this.checkCF(page); + // skip extraction if at max depth if (seed.isAtMaxDepth(depth) || !selectorOptsList) { return; @@ -649,6 +653,17 @@ class Crawler { } } + async checkCF(page) { + try { + while (await page.$("div.cf-browser-verification.cf-im-under-attack")) { + this.statusLog("Cloudflare Check Detected, waiting for reload..."); + await this.sleep(5500); + } + } catch (e) { + console.warn(e); + } + } + async queueUrl(seedId, url, depth, extraHops = 0) { if (this.limitHit) { return false; diff --git a/create-login-profile.js b/create-login-profile.js index bfd53728..3157e298 100755 --- a/create-login-profile.js +++ b/create-login-profile.js @@ -126,7 +126,7 @@ async function main() { await page.setCacheEnabled(false); if (params.interactive) { - await page.evaluateOnNewDocument('Object.defineProperty(navigator, "webdriver", {value: false});'); + await page.evaluateOnNewDocument("Object.defineProperty(navigator, \"webdriver\", {value: false});"); // for testing, inject browsertrix-behaviors await page.evaluateOnNewDocument(behaviors + ";\nself.__bx_behaviors.init();"); }