Include resource type + mime type in page resources list (#468)

The `:pageinfo:<url>` record now includes the mime type + resource type
(from Chrome) along with status code for each resource, for better
filtering / comparison.
This commit is contained in:
Ilya Kreymer 2024-02-19 19:11:48 -08:00 committed by GitHub
parent 8d2d79a5df
commit a512e92886
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 139 additions and 37 deletions

View file

@ -48,10 +48,17 @@ function logNetwork(msg: string, data: any) {
// logger.debug(msg, data, "recorderNetwork"); // logger.debug(msg, data, "recorderNetwork");
} }
// =================================================================
export type PageInfoValue = {
status: number;
mime?: string;
type?: string;
};
// ================================================================= // =================================================================
export type PageInfoRecord = { export type PageInfoRecord = {
pageid: string; pageid: string;
urls: Record<string, number>; urls: Record<string, PageInfoValue>;
url: string; url: string;
ts?: Date; ts?: Date;
}; };
@ -190,7 +197,7 @@ export class Recorder {
}); });
const reqresp = this.pendingReqResp(params.requestId, true); const reqresp = this.pendingReqResp(params.requestId, true);
if (reqresp) { if (reqresp) {
this.addPageRecord(reqresp); reqresp.resourceType = params.this.addPageRecord(reqresp);
this.removeReqResp(params.requestId); this.removeReqResp(params.requestId);
} }
@ -250,7 +257,7 @@ export class Recorder {
} }
handleResponseReceived(params: Protocol.Network.ResponseReceivedEvent) { handleResponseReceived(params: Protocol.Network.ResponseReceivedEvent) {
const { requestId, response } = params; const { requestId, response, type } = params;
const { mimeType } = response; const { mimeType } = response;
@ -263,7 +270,7 @@ export class Recorder {
return; return;
} }
reqresp.fillResponse(response); reqresp.fillResponse(response, type);
this.addPageRecord(reqresp); this.addPageRecord(reqresp);
} }
@ -280,7 +287,7 @@ export class Recorder {
} }
handleRedirectResponse(params: Protocol.Network.RequestWillBeSentEvent) { handleRedirectResponse(params: Protocol.Network.RequestWillBeSentEvent) {
const { requestId, redirectResponse } = params; const { requestId, redirectResponse, type } = params;
// remove and serialize, but allow reusing requestId // remove and serialize, but allow reusing requestId
// as redirect chain may reuse same requestId for subsequent request // as redirect chain may reuse same requestId for subsequent request
@ -289,7 +296,7 @@ export class Recorder {
return; return;
} }
reqresp.fillResponse(redirectResponse); reqresp.fillResponse(redirectResponse, type);
if (reqresp.isSelfRedirect()) { if (reqresp.isSelfRedirect()) {
logger.warn( logger.warn(
@ -312,6 +319,7 @@ export class Recorder {
} }
const { url } = reqresp; const { url } = reqresp;
reqresp.resourceType = type;
switch (errorText) { switch (errorText) {
case "net::ERR_BLOCKED_BY_CLIENT": case "net::ERR_BLOCKED_BY_CLIENT":
@ -648,8 +656,10 @@ export class Recorder {
} }
addPageRecord(reqresp: RequestResponseInfo) { addPageRecord(reqresp: RequestResponseInfo) {
if (this.isValidUrl(this.pageInfo.url)) { if (this.isValidUrl(reqresp.url)) {
this.pageInfo.urls[reqresp.getCanonURL()] = reqresp.status; const { status, resourceType: type } = reqresp;
const mime = reqresp.getMimeType();
this.pageInfo.urls[reqresp.getCanonURL()] = { status, mime, type };
} }
} }

View file

@ -9,7 +9,7 @@ const CONTENT_TYPE = "content-type";
const EXCLUDE_HEADERS = ["content-encoding", "transfer-encoding"]; const EXCLUDE_HEADERS = ["content-encoding", "transfer-encoding"];
// max URL length for post/put payload-converted URLs // max URL length for post/put payload-converted URLs
const MAX_URL_LENGTH = 4096; export const MAX_URL_LENGTH = 4096;
// max length for single query arg for post/put converted URLs // max length for single query arg for post/put converted URLs
const MAX_ARG_LEN = 512; const MAX_ARG_LEN = 512;
@ -24,6 +24,8 @@ export class RequestResponseInfo {
url!: string; url!: string;
protocol?: string = "HTTP/1.1"; protocol?: string = "HTTP/1.1";
mimeType?: string;
// request data // request data
requestHeaders?: Record<string, string>; requestHeaders?: Record<string, string>;
requestHeadersText?: string; requestHeadersText?: string;
@ -88,7 +90,7 @@ export class RequestResponseInfo {
this.frameId = params.frameId; this.frameId = params.frameId;
} }
fillResponse(response: Protocol.Network.Response) { fillResponse(response: Protocol.Network.Response, type?: string) {
// if initial fetch was a 200, but now replacing with 304, don't! // if initial fetch was a 200, but now replacing with 304, don't!
if ( if (
response.status == 304 && response.status == 304 &&
@ -106,6 +108,10 @@ export class RequestResponseInfo {
this.protocol = response.protocol; this.protocol = response.protocol;
if (type) {
this.resourceType = type;
}
if (response.requestHeaders) { if (response.requestHeaders) {
this.requestHeaders = response.requestHeaders; this.requestHeaders = response.requestHeaders;
} }
@ -246,6 +252,21 @@ export class RequestResponseInfo {
return headersDict; return headersDict;
} }
getMimeType() {
if (this.mimeType) {
return this.mimeType;
}
const headers = new Headers(this.getResponseHeadersDict());
const contentType = headers.get(CONTENT_TYPE);
if (!contentType) {
return;
}
return contentType.split(";")[0];
}
isValidBinary() { isValidBinary() {
if (!this.payload) { if (!this.payload) {
return false; return false;

View file

@ -52,22 +52,71 @@ function validateResourcesIndex(json) {
expect(json).toHaveProperty("ts"); expect(json).toHaveProperty("ts");
expect(json).toHaveProperty("urls"); expect(json).toHaveProperty("urls");
expect(json.urls).toEqual({ expect(json.urls).toEqual({
"https://webrecorder.net/": 200, "https://webrecorder.net/": {
"https://webrecorder.net/assets/main.css": 200, status: 200,
"https://webrecorder.net/assets/tools/awp-icon.png": 200, mime: "text/html",
"https://webrecorder.net/assets/wr-logo.svg": 200, type: "Document",
"https://webrecorder.net/assets/tools/browsertrixcrawler.png": 200, },
"https://webrecorder.net/assets/tools/logo-pywb.png": 200, "https://webrecorder.net/assets/fontawesome/all.css": {
"https://webrecorder.net/assets/images/btrix-cloud.png": 200, status: 200,
"https://webrecorder.net/assets/tools/rwp-icon.png": 200, mime: "text/css",
"https://webrecorder.net/assets/fontawesome/all.css": 200, type: "Stylesheet",
"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap": 200, },
"https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap": 200, "https://webrecorder.net/assets/wr-logo.svg": {
"https://stats.browsertrix.com/js/script.js": 200, status: 200,
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2": 200, mime: "image/svg+xml",
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2": 200, type: "Image",
"https://webrecorder.net/assets/favicon.ico": 200, },
"https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2F&d=webrecorder.net": 202, "https://webrecorder.net/assets/tools/awp-icon.png": {
status: 200,
mime: "image/png",
type: "Image",
},
"https://webrecorder.net/assets/tools/logo-pywb.png": {
status: 200,
mime: "image/png",
type: "Image",
},
"https://webrecorder.net/assets/tools/browsertrixcrawler.png": {
status: 200,
mime: "image/png",
type: "Image",
},
"https://webrecorder.net/assets/tools/rwp-icon.png": {
status: 200,
mime: "image/png",
type: "Image",
},
"https://webrecorder.net/assets/images/btrix-cloud.png": {
status: 200,
mime: "image/png",
type: "Image",
},
"https://webrecorder.net/assets/main.css": {
status: 200,
mime: "text/css",
type: "Stylesheet",
},
"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap":
{ status: 200, mime: "text/css", type: "Stylesheet" },
"https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap":
{ status: 200, mime: "text/css", type: "Stylesheet" },
"https://stats.browsertrix.com/js/script.js": {
status: 200,
mime: "application/javascript",
type: "Script",
},
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2":
{ status: 200, mime: "font/woff2", type: "Font" },
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2":
{ status: 200, mime: "font/woff2", type: "Font" },
"https://webrecorder.net/assets/favicon.ico": {
status: 200,
mime: "image/vnd.microsoft.icon",
type: "Other",
},
"https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2F&d=webrecorder.net":
{ status: 202, mime: "text/plain", type: "XHR" },
}); });
} }
@ -77,16 +126,38 @@ function validateResourcesAbout(json) {
expect(json).toHaveProperty("ts"); expect(json).toHaveProperty("ts");
expect(json).toHaveProperty("urls"); expect(json).toHaveProperty("urls");
expect(json.urls).toEqual({ expect(json.urls).toEqual({
"https://webrecorder.net/about": 200, "https://webrecorder.net/about": {
"https://webrecorder.net/assets/main.css": 200, status: 200,
"https://webrecorder.net/assets/fontawesome/all.css": 200, mime: "text/html",
"https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap": 200, type: "Document",
"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap": 200, },
"https://stats.browsertrix.com/js/script.js": 200, "https://webrecorder.net/assets/main.css": {
"https://webrecorder.net/assets/wr-logo.svg": 200, status: 200,
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2": 200, mime: "text/css",
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2": 200, type: "Stylesheet",
//"https://webrecorder.net/assets/favicon.ico": 200, },
//"https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2Fabout&d=webrecorder.net": 202, "https://webrecorder.net/assets/fontawesome/all.css": {
status: 200,
mime: "text/css",
type: "Stylesheet",
},
"https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap":
{ status: 200, mime: "text/css", type: "Stylesheet" },
"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap":
{ status: 200, mime: "text/css", type: "Stylesheet" },
"https://stats.browsertrix.com/js/script.js": {
status: 200,
mime: "application/javascript",
type: "Script",
},
"https://webrecorder.net/assets/wr-logo.svg": {
status: 200,
mime: "image/svg+xml",
type: "Image",
},
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2":
{ status: 200, mime: "font/woff2", type: "Font" },
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2":
{ status: 200, mime: "font/woff2", type: "Font" },
}); });
} }