add pixelRatio storing to extraOpts

This commit is contained in:
Ilya Kreymer 2024-04-03 18:48:56 -07:00
parent 97b95fdf18
commit 103f321dd4
2 changed files with 34 additions and 8 deletions

View file

@ -20,7 +20,7 @@ import { WARCRecord } from "warcio";
import { TempFileBuffer, WARCSerializer } from "warcio/node"; import { TempFileBuffer, WARCSerializer } from "warcio/node";
import { WARCWriter } from "./warcwriter.js"; import { WARCWriter } from "./warcwriter.js";
import { RedisCrawlState, WorkerId } from "./state.js"; import { RedisCrawlState, WorkerId } from "./state.js";
import { CDPSession, Protocol } from "puppeteer-core"; import { CDPSession, Page, Protocol } from "puppeteer-core";
import { Crawler } from "../crawler.js"; import { Crawler } from "../crawler.js";
const MAX_BROWSER_DEFAULT_FETCH_SIZE = 5_000_000; const MAX_BROWSER_DEFAULT_FETCH_SIZE = 5_000_000;
@ -118,6 +118,8 @@ export class Recorder {
pageUrl!: string; pageUrl!: string;
pageid!: string; pageid!: string;
pixelRatio: number = 0;
constructor({ constructor({
workerid, workerid,
writer, writer,
@ -142,7 +144,7 @@ export class Recorder {
this.fetcherQ = new PQueue({ concurrency: 1 }); this.fetcherQ = new PQueue({ concurrency: 1 });
} }
async onCreatePage({ cdp }: { cdp: CDPSession }) { async onCreatePage({ cdp, page }: { cdp: CDPSession; page: Page }) {
// Fetch // Fetch
cdp.on("Fetch.requestPaused", async (params) => { cdp.on("Fetch.requestPaused", async (params) => {
this.handleRequestPaused(params, cdp); this.handleRequestPaused(params, cdp);
@ -204,6 +206,16 @@ export class Recorder {
} }
}); });
// just set the first time
if (!this.pixelRatio) {
try {
const res = await page.evaluate("window.devicePixelRatio");
this.pixelRatio = typeof res === "number" ? res : 1;
} catch (e) {
this.pixelRatio = 1;
}
}
await cdp.send("Target.setAutoAttach", { await cdp.send("Target.setAutoAttach", {
autoAttach: true, autoAttach: true,
waitForDebuggerOnStart: false, waitForDebuggerOnStart: false,
@ -550,9 +562,12 @@ export class Recorder {
return false; return false;
} }
if (url === this.pageUrl && !this.pageInfo.ts) { if (url === this.pageUrl) {
logger.debug("Setting page timestamp", { ts: reqresp.ts, url }); if (!this.pageInfo.ts) {
this.pageInfo.ts = reqresp.ts; logger.debug("Setting page timestamp", { ts: reqresp.ts, url });
this.pageInfo.ts = reqresp.ts;
}
reqresp.extraOpts.pixelRatio = this.pixelRatio;
} }
reqresp.fillFetchRequestPaused(params); reqresp.fillFetchRequestPaused(params);

View file

@ -14,6 +14,19 @@ export const MAX_URL_LENGTH = 4096;
// max length for single query arg for post/put converted URLs // max length for single query arg for post/put converted URLs
const MAX_ARG_LEN = 512; const MAX_ARG_LEN = 512;
// ===========================================================================
export type ExtraOptsType = {
cert?: { issuer: string; ctc: string };
ipType?: string;
rewritten?: number;
pixelRatio?: number;
// for wabac.js video rewriting
maxRes?: number;
maxBand?: number;
maxBitrate?: number;
};
// =========================================================================== // ===========================================================================
export class RequestResponseInfo { export class RequestResponseInfo {
ts: Date = new Date(); ts: Date = new Date();
@ -55,9 +68,7 @@ export class RequestResponseInfo {
resourceType?: string; resourceType?: string;
// TODO: Fix this the next time the file is edited. extraOpts: ExtraOptsType = {};
// eslint-disable-next-line @typescript-eslint/no-explicit-any
extraOpts: Record<string, any> = {};
// stats // stats
readSize: number = 0; readSize: number = 0;