browsertrix-crawler/util/originoverride.js
Ilya Kreymer 7b906f921c
Origin Overrides: Ensure Host header also set (#326)
* origin overrides: ensure 'host' and 'origin' headers are also overridden, set to the *original* host and origin when sent to the destination origin
2023-05-30 19:25:37 -07:00

59 lines
1.6 KiB
JavaScript

import { errJSON, logger } from "./logger.js";
export class OriginOverride
{
constructor(originOverride) {
this.originOverride = originOverride.map((override) => {
let [orig, dest] = override.split("=");
const origUrl = new URL(orig);
const destUrl = new URL(dest);
return {origUrl, destUrl};
});
}
async initPage(browser, page) {
const onRequest = async (request) => {
try {
const url = request.url();
let newUrl = null;
let orig = null;
for (const {origUrl, destUrl} of this.originOverride) {
if (url.startsWith(origUrl.origin)) {
newUrl = destUrl.origin + url.slice(origUrl.origin.length);
orig = origUrl;
break;
}
}
if (!newUrl) {
request.continue({}, -1);
return;
}
const headers = new Headers(request.headers());
headers.set("host", orig.host);
if (headers.get("origin")) {
headers.set("origin", orig.origin);
}
const resp = await fetch(newUrl, {headers});
const body = Buffer.from(await resp.arrayBuffer());
const respHeaders = Object.fromEntries(resp.headers);
const status = resp.status;
logger.debug("Origin overridden", {orig: url, dest: newUrl, status, body: body.length}, "originoverride");
request.respond({body, headers: respHeaders, status}, -1);
} catch (e) {
logger.warn("Error overriding origin", {...errJSON(e), url: page.url()}, "originoverride");
request.continue({}, -1);
}
};
await browser.interceptRequest(page, onRequest);
}
}