From 214eb6ca8fbad82488fa66fe927463535b44fb2e Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 22 Nov 2024 10:31:12 -0800 Subject: [PATCH] support removing range from query (via wabac.js 2.20.6): (#724) - fix for archiving facebook video, to match webrecorder/archiveweb.page#272 - permissions: auto enable permissions to avoid possibly modal (for both profiles and crawling) - deps: update to latest wabac.js + warcio.js --- package.json | 4 ++-- src/util/browser.ts | 15 +++++++++++++++ src/util/recorder.ts | 33 +++++++++++++++++++++++++++++++-- yarn.lock | 38 ++++++++++++-------------------------- 4 files changed, 60 insertions(+), 30 deletions(-) diff --git a/package.json b/package.json index 63dd203f..cb1089a6 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,7 @@ }, "dependencies": { "@novnc/novnc": "^1.4.0", - "@webrecorder/wabac": "^2.20.0", + "@webrecorder/wabac": "^2.20.6", "browsertrix-behaviors": "^0.6.5", "client-zip": "^2.4.5", "css-selector-parser": "^3.0.5", @@ -37,7 +37,7 @@ "tsc": "^2.0.4", "undici": "^6.18.2", "uuid": "8.3.2", - "warcio": "^2.4.0", + "warcio": "^2.4.2", "ws": "^7.4.4", "yargs": "^17.7.2" }, diff --git a/src/util/browser.ts b/src/util/browser.ts index e2f0e585..6526e295 100644 --- a/src/util/browser.ts +++ b/src/util/browser.ts @@ -407,6 +407,21 @@ export class Browser { this.browser.on("disconnected", () => { this.browser = null; }); + + // common permissions + const permissions = [ + "notifications", + "geolocation", + "camera", + "microphone", + ]; + + for (const name of permissions) { + await this.firstCDP.send("Browser.setPermission", { + permission: { name }, + setting: "granted", + }); + } } async newWindowPageWithCDP(): Promise<{ cdp: CDPSession; page: Page }> { diff --git a/src/util/recorder.ts b/src/util/recorder.ts index c237ce34..f045e05c 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -10,7 +10,12 @@ import { import { fetch, getGlobalDispatcher, Response } from "undici"; -import { getCustomRewriter, rewriteDASH, rewriteHLS } from "@webrecorder/wabac"; +import { + getCustomRewriter, + removeRangeAsQuery, + rewriteDASH, + rewriteHLS, +} from "@webrecorder/wabac"; import { WARCRecord } from "warcio"; import { TempFileBuffer, WARCSerializer } from "warcio/node"; @@ -635,6 +640,30 @@ export class Recorder { this.skipRangeUrls.set(url, count + 1); return false; } + } else { + const filteredUrl = removeRangeAsQuery(url); + if (filteredUrl) { + this.removeReqResp(networkId); + + logger.debug( + "Removed range in query, async fetching full URL", + { url, ...this.logDetails }, + "recorder", + ); + + const reqresp = new RequestResponseInfo("0"); + reqresp.fillRequest(params.request, params.resourceType); + reqresp.url = filteredUrl; + reqresp.frameId = params.frameId; + + this.addAsyncFetch({ + reqresp, + recorder: this, + networkId: "0", + cdp, + }); + return false; + } } const reqresp = this.pendingReqResp(networkId); @@ -798,7 +827,7 @@ export class Recorder { return true; } - addAsyncFetch(opts: NetworkLoadAsyncFetchOptions, contentLen: number) { + addAsyncFetch(opts: NetworkLoadAsyncFetchOptions, contentLen: number = -1) { let fetcher: AsyncFetcher; if ( diff --git a/yarn.lock b/yarn.lock index 2a398802..3efff9b1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1312,16 +1312,16 @@ resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ== -"@webrecorder/wabac@^2.20.0": - version "2.20.1" - resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.20.1.tgz#58e397e2ef1c33de1bb37aa4f51fc7f3eec8a1f7" - integrity sha512-RX+U6m7aVgvsAfLb9FuLY/PcHCNL5dc1FPaD0GnUiFgswSSe5v4MjIhqJNOnbrJYEcbib81AJfxNuvOyXAJDJQ== +"@webrecorder/wabac@^2.20.6": + version "2.20.6" + resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.20.6.tgz#8b28b58d5ae900faebd7be30224eadc620838f9e" + integrity sha512-bCvHgfnKHFzEuwPkIxdYIVuBmVTzOgxf96DeDTdZkhPI99iP7AUrz/Y6+qgoD9Vjn4usozUff25vdViD5mtiUg== dependencies: "@peculiar/asn1-ecc" "^2.3.4" "@peculiar/asn1-schema" "^2.3.3" "@peculiar/x509" "^1.9.2" "@types/js-levenshtein" "^1.1.3" - "@webrecorder/wombat" "^3.8.3" + "@webrecorder/wombat" "^3.8.7" acorn "^8.10.0" auto-js-ipfs "^2.1.1" base64-js "^1.5.1" @@ -1340,14 +1340,14 @@ path-parser "^6.1.0" process "^0.11.10" stream-browserify "^3.0.0" - warcio "^2.3.1" + warcio "^2.4.2" -"@webrecorder/wombat@^3.8.3": - version "3.8.3" - resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.8.3.tgz#c5a077225d1a70def9fbbbfcd50fa4465d236546" - integrity sha512-dqgoxigB3OdX5JeB3yxJrUNwFwUBlYC+LmGrLEgGeP259MFzXQLD2pmfuqGt5ygWvIv56SrAMV4sUceux07X2A== +"@webrecorder/wombat@^3.8.7": + version "3.8.7" + resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.8.7.tgz#51c7465c589e0020be064121127c7c10a38ec21f" + integrity sha512-bW5V7cBweTkTazOIN8oZZGwHLevsGNv1luY3t0RYdEZhs5BDpTmUHN33zEbrXDOiPUlY3N3I8+73VA+PuxihoQ== dependencies: - warcio "^2.3.1" + warcio "^2.4.0" "@zxing/text-encoding@0.9.0": version "0.9.0" @@ -5281,7 +5281,7 @@ walker@^1.0.8: dependencies: makeerror "1.0.12" -warcio@^2.3.1: +warcio@^2.3.1, warcio@^2.4.0, warcio@^2.4.2: version "2.3.1" resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.3.1.tgz#8ac9de897de1a556161168f2a3938b60929908ca" integrity sha512-PjcWqzXfs6HdWfHi1V/i8MoMmV5M0Csg3rOa2mqCJ1dmCJXswVfQ0VXbEVumwavNIW2oFFj6LJoCHHeL4Ls/zw== @@ -5295,20 +5295,6 @@ warcio@^2.3.1: uuid-random "^1.3.2" yargs "^17.6.2" -warcio@^2.4.0: - version "2.4.0" - resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.0.tgz#13bae2837f1bbf5cf7585f75857e6311d30557bd" - integrity sha512-EfxXCgsnZ35CGf2j99QBMyB6EI98KEQ6YmeER+8Lnv/4KFJ3thT76PiX37HfZVbPJS21JihA0Eddjk9QBQRlPg== - dependencies: - "@types/pako" "^1.0.7" - "@types/stream-buffers" "^3.0.7" - base32-encode "^2.0.0" - hash-wasm "^4.9.0" - pako "^1.0.11" - tempy "^3.1.0" - uuid-random "^1.3.2" - yargs "^17.7.2" - web-encoding@^1.1.5: version "1.1.5" resolved "https://registry.yarnpkg.com/web-encoding/-/web-encoding-1.1.5.tgz#fc810cf7667364a6335c939913f5051d3e0c4864"