From 103f321dd4dad494a994a3b5eb7fb5bd5d036b10 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 3 Apr 2024 18:48:56 -0700 Subject: [PATCH] add pixelRatio storing to extraOpts --- src/util/recorder.ts | 25 ++++++++++++++++++++----- src/util/reqresp.ts | 17 ++++++++++++++--- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/util/recorder.ts b/src/util/recorder.ts index ec2a24c1..15596749 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -20,7 +20,7 @@ import { WARCRecord } from "warcio"; import { TempFileBuffer, WARCSerializer } from "warcio/node"; import { WARCWriter } from "./warcwriter.js"; import { RedisCrawlState, WorkerId } from "./state.js"; -import { CDPSession, Protocol } from "puppeteer-core"; +import { CDPSession, Page, Protocol } from "puppeteer-core"; import { Crawler } from "../crawler.js"; const MAX_BROWSER_DEFAULT_FETCH_SIZE = 5_000_000; @@ -118,6 +118,8 @@ export class Recorder { pageUrl!: string; pageid!: string; + pixelRatio: number = 0; + constructor({ workerid, writer, @@ -142,7 +144,7 @@ export class Recorder { this.fetcherQ = new PQueue({ concurrency: 1 }); } - async onCreatePage({ cdp }: { cdp: CDPSession }) { + async onCreatePage({ cdp, page }: { cdp: CDPSession; page: Page }) { // Fetch cdp.on("Fetch.requestPaused", async (params) => { this.handleRequestPaused(params, cdp); @@ -204,6 +206,16 @@ export class Recorder { } }); + // just set the first time + if (!this.pixelRatio) { + try { + const res = await page.evaluate("window.devicePixelRatio"); + this.pixelRatio = typeof res === "number" ? res : 1; + } catch (e) { + this.pixelRatio = 1; + } + } + await cdp.send("Target.setAutoAttach", { autoAttach: true, waitForDebuggerOnStart: false, @@ -550,9 +562,12 @@ export class Recorder { return false; } - if (url === this.pageUrl && !this.pageInfo.ts) { - logger.debug("Setting page timestamp", { ts: reqresp.ts, url }); - this.pageInfo.ts = reqresp.ts; + if (url === this.pageUrl) { + if (!this.pageInfo.ts) { + logger.debug("Setting page timestamp", { ts: reqresp.ts, url }); + this.pageInfo.ts = reqresp.ts; + } + reqresp.extraOpts.pixelRatio = this.pixelRatio; } reqresp.fillFetchRequestPaused(params); diff --git a/src/util/reqresp.ts b/src/util/reqresp.ts index d8c36ee1..145e55d9 100644 --- a/src/util/reqresp.ts +++ b/src/util/reqresp.ts @@ -14,6 +14,19 @@ export const MAX_URL_LENGTH = 4096; // max length for single query arg for post/put converted URLs const MAX_ARG_LEN = 512; +// =========================================================================== +export type ExtraOptsType = { + cert?: { issuer: string; ctc: string }; + ipType?: string; + rewritten?: number; + pixelRatio?: number; + + // for wabac.js video rewriting + maxRes?: number; + maxBand?: number; + maxBitrate?: number; +}; + // =========================================================================== export class RequestResponseInfo { ts: Date = new Date(); @@ -55,9 +68,7 @@ export class RequestResponseInfo { resourceType?: string; - // TODO: Fix this the next time the file is edited. - // eslint-disable-next-line @typescript-eslint/no-explicit-any - extraOpts: Record = {}; + extraOpts: ExtraOptsType = {}; // stats readSize: number = 0;