mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-07 21:59:48 +00:00
better handling of net::ERR_HTTP_RESPONSE_CODE_FAILURE: (#934)
- http headers provided but no payload, record response - record page as failed with status code provided, don't attempt to retry
This commit is contained in:
parent
822de93301
commit
993081d3ee
3 changed files with 26 additions and 3 deletions
|
|
@ -1327,7 +1327,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
}
|
}
|
||||||
// if page loaded, considered page finished successfully
|
// if page loaded, considered page finished successfully
|
||||||
// (even if behaviors timed out)
|
// (even if behaviors timed out)
|
||||||
const { loadState, logDetails, depth, url, pageSkipped } = data;
|
const { loadState, logDetails, depth, url, pageSkipped, noRetries } = data;
|
||||||
|
|
||||||
if (data.loadState >= LoadState.FULL_PAGE_LOADED) {
|
if (data.loadState >= LoadState.FULL_PAGE_LOADED) {
|
||||||
await this.writePage(data);
|
await this.writePage(data);
|
||||||
|
|
@ -1347,7 +1347,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
if (pageSkipped) {
|
if (pageSkipped) {
|
||||||
await this.crawlState.markExcluded(url);
|
await this.crawlState.markExcluded(url);
|
||||||
} else {
|
} else {
|
||||||
const retry = await this.crawlState.markFailed(url);
|
const retry = await this.crawlState.markFailed(url, noRetries);
|
||||||
|
|
||||||
if (this.healthChecker) {
|
if (this.healthChecker) {
|
||||||
this.healthChecker.incError();
|
this.healthChecker.incError();
|
||||||
|
|
@ -2215,8 +2215,8 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
if (msg !== "logged") {
|
if (msg !== "logged") {
|
||||||
const loadState = data.loadState;
|
const loadState = data.loadState;
|
||||||
|
|
||||||
// excluded in recorder
|
|
||||||
if (msg.startsWith("net::ERR_BLOCKED_BY_RESPONSE")) {
|
if (msg.startsWith("net::ERR_BLOCKED_BY_RESPONSE")) {
|
||||||
|
// excluded in recorder
|
||||||
data.pageSkipped = true;
|
data.pageSkipped = true;
|
||||||
logger.warn("Page Load Blocked, skipping", { msg, loadState });
|
logger.warn("Page Load Blocked, skipping", { msg, loadState });
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -2274,6 +2274,17 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (failed) {
|
if (failed) {
|
||||||
|
const failText = resp.request().failure()?.errorText;
|
||||||
|
if (isChromeError && failText === "net::ERR_HTTP_RESPONSE_CODE_FAILURE") {
|
||||||
|
data.noRetries = true;
|
||||||
|
logger.warn(
|
||||||
|
"Page is an empty non-200 response, not retrying",
|
||||||
|
{ url, status, ...logDetails },
|
||||||
|
"pageStatus",
|
||||||
|
);
|
||||||
|
throw new Error("logged");
|
||||||
|
}
|
||||||
|
|
||||||
return this.pageFailed(
|
return this.pageFailed(
|
||||||
isChromeError ? "Page Crashed on Load" : "Page Invalid Status",
|
isChromeError ? "Page Crashed on Load" : "Page Invalid Status",
|
||||||
retry,
|
retry,
|
||||||
|
|
|
||||||
|
|
@ -464,6 +464,16 @@ export class Recorder extends EventEmitter {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case "net::ERR_HTTP_RESPONSE_CODE_FAILURE":
|
||||||
|
logger.warn("Recording empty non-200 status response", {
|
||||||
|
url,
|
||||||
|
status: reqresp.status,
|
||||||
|
errorText,
|
||||||
|
type,
|
||||||
|
...this.logDetails,
|
||||||
|
});
|
||||||
|
return this.serializeToWARC(reqresp);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
this.lastErrorText = errorText;
|
this.lastErrorText = errorText;
|
||||||
logger.warn(
|
logger.warn(
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,8 @@ export class PageState {
|
||||||
|
|
||||||
skipBehaviors = false;
|
skipBehaviors = false;
|
||||||
pageSkipped = false;
|
pageSkipped = false;
|
||||||
|
noRetries = false;
|
||||||
|
|
||||||
asyncLoading = false;
|
asyncLoading = false;
|
||||||
filteredFrames: Frame[] = [];
|
filteredFrames: Frame[] = [];
|
||||||
loadState: LoadState = LoadState.FAILED;
|
loadState: LoadState = LoadState.FAILED;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue