mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
240 lines
6 KiB
JavaScript
240 lines
6 KiB
JavaScript
![]() |
import { getStatusText } from "@webrecorder/wabac/src/utils.js";
|
||
|
|
||
|
const CONTENT_LENGTH = "content-length";
|
||
|
const CONTENT_TYPE = "content-type";
|
||
|
const EXCLUDE_HEADERS = ["content-encoding", "transfer-encoding"];
|
||
|
|
||
|
|
||
|
// ===========================================================================
|
||
|
export class RequestResponseInfo
|
||
|
{
|
||
|
constructor(requestId) {
|
||
|
this._created = new Date();
|
||
|
|
||
|
this.requestId = requestId;
|
||
|
|
||
|
this.ts = null;
|
||
|
|
||
|
// request data
|
||
|
this.method = null;
|
||
|
this.url = null;
|
||
|
this.protocol = "HTTP/1.1";
|
||
|
|
||
|
this.requestHeaders = null;
|
||
|
this.requestHeadersText = null;
|
||
|
|
||
|
this.postData = null;
|
||
|
this.hasPostData = false;
|
||
|
|
||
|
// response data
|
||
|
this.status = 0;
|
||
|
this.statusText = null;
|
||
|
|
||
|
this.responseHeaders = null;
|
||
|
this.responseHeadersList = null;
|
||
|
this.responseHeadersText = null;
|
||
|
|
||
|
this.payload = null;
|
||
|
|
||
|
this.fromServiceWorker = false;
|
||
|
|
||
|
this.fetch = false;
|
||
|
|
||
|
this.resourceType = null;
|
||
|
|
||
|
this.extraOpts = {};
|
||
|
|
||
|
this.readSize = 0;
|
||
|
this.expectedSize = 0;
|
||
|
|
||
|
// set to true to indicate async loading in progress
|
||
|
this.asyncLoading = false;
|
||
|
|
||
|
// set to add truncated message
|
||
|
this.truncated = null;
|
||
|
}
|
||
|
|
||
|
fillRequest(params) {
|
||
|
this.url = params.request.url;
|
||
|
this.method = params.request.method;
|
||
|
if (!this.requestHeaders) {
|
||
|
this.requestHeaders = params.request.headers;
|
||
|
}
|
||
|
this.postData = params.request.postData;
|
||
|
this.hasPostData = params.request.hasPostData;
|
||
|
|
||
|
if (params.type) {
|
||
|
this.resourceType = params.type;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
fillFetchRequestPaused(params) {
|
||
|
this.fillRequest(params);
|
||
|
|
||
|
this.status = params.responseStatusCode;
|
||
|
this.statusText = params.responseStatusText || getStatusText(this.status);
|
||
|
|
||
|
this.responseHeadersList = params.responseHeaders;
|
||
|
|
||
|
this.fetch = true;
|
||
|
this.resourceType = params.resourceType;
|
||
|
|
||
|
this.frameId = params.frameId;
|
||
|
}
|
||
|
|
||
|
fillResponse(response) {
|
||
|
// if initial fetch was a 200, but now replacing with 304, don't!
|
||
|
if (response.status == 304 && this.status && this.status != 304 && this.url) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
this.url = response.url.split("#")[0];
|
||
|
|
||
|
this.status = response.status;
|
||
|
this.statusText = response.statusText || getStatusText(this.status);
|
||
|
|
||
|
this.protocol = response.protocol;
|
||
|
|
||
|
if (response.requestHeaders) {
|
||
|
this.requestHeaders = response.requestHeaders;
|
||
|
}
|
||
|
if (response.requestHeadersText) {
|
||
|
this.requestHeadersText = response.requestHeadersText;
|
||
|
}
|
||
|
|
||
|
this.responseHeaders = response.headers;
|
||
|
|
||
|
if (response.headersText) {
|
||
|
this.responseHeadersText = response.headersText;
|
||
|
}
|
||
|
|
||
|
this.fromServiceWorker = !!response.fromServiceWorker;
|
||
|
|
||
|
if (response.securityDetails) {
|
||
|
const issuer = response.securityDetails.issuer || "";
|
||
|
const ctc = response.securityDetails.certificateTransparencyCompliance === "compliant" ? "1" : "0";
|
||
|
this.extraOpts.cert = {issuer, ctc};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
isSelfRedirect() {
|
||
|
if (this.status < 300 || this.status >= 400 || this.status === 304) {
|
||
|
return false;
|
||
|
}
|
||
|
try {
|
||
|
const headers = new Headers(this.responseHeaders);
|
||
|
const redirUrl = new URL(headers.get("location"), this.url).href;
|
||
|
return this.url === redirUrl;
|
||
|
} catch (e) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fillResponseReceivedExtraInfo(params) {
|
||
|
// this.responseHeaders = params.headers;
|
||
|
// if (params.headersText) {
|
||
|
// this.responseHeadersText = params.headersText;
|
||
|
// }
|
||
|
this.extraOpts.ipType = params.resourceIPAddressSpace;
|
||
|
}
|
||
|
|
||
|
fillFetchResponse(response) {
|
||
|
this.responseHeaders = Object.fromEntries(response.headers);
|
||
|
this.status = response.status;
|
||
|
this.statusText = response.statusText || getStatusText(this.status);
|
||
|
|
||
|
}
|
||
|
|
||
|
fillRequestExtraInfo(params) {
|
||
|
this.requestHeaders = params.headers;
|
||
|
}
|
||
|
|
||
|
getResponseHeadersText() {
|
||
|
let headers = `${this.protocol} ${this.status} ${this.statusText}\r\n`;
|
||
|
|
||
|
for (const header of Object.keys(this.responseHeaders)) {
|
||
|
headers += `${header}: ${this.responseHeaders[header].replace(/\n/g, ", ")}\r\n`;
|
||
|
}
|
||
|
headers += "\r\n";
|
||
|
return headers;
|
||
|
}
|
||
|
|
||
|
hasRequest() {
|
||
|
return this.method && (this.requestHeaders || this.requestHeadersText);
|
||
|
}
|
||
|
|
||
|
getRequestHeadersDict() {
|
||
|
return this._getHeadersDict(this.requestHeaders, null);
|
||
|
}
|
||
|
|
||
|
getResponseHeadersDict(length) {
|
||
|
return this._getHeadersDict(this.responseHeaders, this.responseHeadersList, length);
|
||
|
}
|
||
|
|
||
|
_getHeadersDict(headersDict, headersList, actualContentLength) {
|
||
|
if (!headersDict && headersList) {
|
||
|
headersDict = {};
|
||
|
|
||
|
for (const header of headersList) {
|
||
|
let headerName = header.name.toLowerCase();
|
||
|
if (EXCLUDE_HEADERS.includes(headerName)) {
|
||
|
headerName = "x-orig-" + headerName;
|
||
|
continue;
|
||
|
}
|
||
|
if (actualContentLength && headerName === CONTENT_LENGTH) {
|
||
|
headersDict[headerName] = "" + actualContentLength;
|
||
|
continue;
|
||
|
}
|
||
|
headersDict[headerName] = header.value.replace(/\n/g, ", ");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!headersDict) {
|
||
|
return {};
|
||
|
}
|
||
|
|
||
|
for (const key of Object.keys(headersDict)) {
|
||
|
if (key[0] === ":") {
|
||
|
delete headersDict[key];
|
||
|
continue;
|
||
|
}
|
||
|
const keyLower = key.toLowerCase();
|
||
|
if (EXCLUDE_HEADERS.includes(keyLower)) {
|
||
|
headersDict["x-orig-" + key] = headersDict[key];
|
||
|
delete headersDict[key];
|
||
|
continue;
|
||
|
}
|
||
|
if (actualContentLength && keyLower === CONTENT_LENGTH) {
|
||
|
headersDict[key] = "" + actualContentLength;
|
||
|
continue;
|
||
|
}
|
||
|
headersDict[key] = headersDict[key].replace(/\n/g, ", ");
|
||
|
}
|
||
|
|
||
|
return headersDict;
|
||
|
}
|
||
|
|
||
|
isValidBinary() {
|
||
|
if (!this.payload) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
const length = this.payload.length;
|
||
|
|
||
|
const headers = new Headers(this.getResponseHeadersDict());
|
||
|
const contentType = headers.get(CONTENT_TYPE);
|
||
|
const contentLength = headers.get(CONTENT_LENGTH);
|
||
|
|
||
|
if (Number(contentLength) !== length) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (contentType && contentType.startsWith("text/html")) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
}
|