mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
logging: don't log filtered out direct fetch attempt as error (#432)
When calling directFetchCapture, and aborting the response via an exception, throw `new Error("response-filtered-out");` so that it can be ignored. This exception is only used for direct capture, and should not be logged as an error - rethrow and handle in calling function to indicate direct fetch is skipped
This commit is contained in:
parent
ab0f66aa54
commit
3972942f5f
2 changed files with 14 additions and 7 deletions
|
@ -703,7 +703,12 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
return true;
|
||||
}
|
||||
} catch (e) {
|
||||
// ignore failed direct fetch attempt, do browser-based capture
|
||||
// filtered out direct fetch
|
||||
logger.debug(
|
||||
"Direct fetch response not accepted, continuing with browser fetch",
|
||||
logDetails,
|
||||
"fetch",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1084,13 +1084,17 @@ class AsyncFetcher {
|
|||
serializer,
|
||||
),
|
||||
);
|
||||
} catch (e) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} catch (e: any) {
|
||||
await crawlState.removeDupe(ASYNC_FETCH_DUPE_KEY, url!);
|
||||
if (e.message === "response-filtered-out") {
|
||||
throw e;
|
||||
}
|
||||
logger.error(
|
||||
"Streaming Fetch Error",
|
||||
{ url, networkId, filename, ...errJSON(e), ...logDetails },
|
||||
"recorder",
|
||||
);
|
||||
await crawlState.removeDupe(ASYNC_FETCH_DUPE_KEY, url!);
|
||||
} finally {
|
||||
recorder.removeReqResp(networkId);
|
||||
}
|
||||
|
@ -1122,7 +1126,7 @@ class AsyncFetcher {
|
|||
|
||||
if (this.filter && !this.filter(resp) && abort) {
|
||||
abort.abort();
|
||||
throw new Error("invalid response, ignoring fetch");
|
||||
throw new Error("response-filtered-out");
|
||||
}
|
||||
|
||||
if (
|
||||
|
@ -1137,9 +1141,7 @@ class AsyncFetcher {
|
|||
reqresp.payload = new Uint8Array();
|
||||
return;
|
||||
} else if (!resp.body) {
|
||||
logger.error("Empty body, stopping fetch", { url }, "recorder");
|
||||
await this.recorder.crawlState.removeDupe(ASYNC_FETCH_DUPE_KEY, url!);
|
||||
return;
|
||||
throw new Error("fetch body missing, fetch aborted");
|
||||
}
|
||||
|
||||
reqresp.fillFetchResponse(resp);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue