mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00

* origin overrides: ensure 'host' and 'origin' headers are also overridden, set to the *original* host and origin when sent to the destination origin
59 lines
1.6 KiB
JavaScript
59 lines
1.6 KiB
JavaScript
import { errJSON, logger } from "./logger.js";
|
|
|
|
export class OriginOverride
|
|
{
|
|
constructor(originOverride) {
|
|
this.originOverride = originOverride.map((override) => {
|
|
let [orig, dest] = override.split("=");
|
|
const origUrl = new URL(orig);
|
|
const destUrl = new URL(dest);
|
|
|
|
return {origUrl, destUrl};
|
|
});
|
|
}
|
|
|
|
async initPage(browser, page) {
|
|
const onRequest = async (request) => {
|
|
try {
|
|
const url = request.url();
|
|
|
|
let newUrl = null;
|
|
let orig = null;
|
|
|
|
for (const {origUrl, destUrl} of this.originOverride) {
|
|
if (url.startsWith(origUrl.origin)) {
|
|
newUrl = destUrl.origin + url.slice(origUrl.origin.length);
|
|
orig = origUrl;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!newUrl) {
|
|
request.continue({}, -1);
|
|
return;
|
|
}
|
|
|
|
const headers = new Headers(request.headers());
|
|
headers.set("host", orig.host);
|
|
if (headers.get("origin")) {
|
|
headers.set("origin", orig.origin);
|
|
}
|
|
|
|
const resp = await fetch(newUrl, {headers});
|
|
|
|
const body = Buffer.from(await resp.arrayBuffer());
|
|
const respHeaders = Object.fromEntries(resp.headers);
|
|
const status = resp.status;
|
|
|
|
logger.debug("Origin overridden", {orig: url, dest: newUrl, status, body: body.length}, "originoverride");
|
|
|
|
request.respond({body, headers: respHeaders, status}, -1);
|
|
|
|
} catch (e) {
|
|
logger.warn("Error overriding origin", {...errJSON(e), url: page.url()}, "originoverride");
|
|
request.continue({}, -1);
|
|
}
|
|
};
|
|
await browser.interceptRequest(page, onRequest);
|
|
}
|
|
}
|