support removing range from query (via wabac.js 2.20.6): (#724)

- fix for archiving facebook video, to match
webrecorder/archiveweb.page#272
- permissions: auto enable permissions to avoid possibly modal (for both
profiles and crawling)
- deps: update to latest wabac.js + warcio.js
This commit is contained in:
Ilya Kreymer 2024-11-22 10:31:12 -08:00 committed by GitHub
parent 0b9cd71c5a
commit 214eb6ca8f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 60 additions and 30 deletions

View file

@ -17,7 +17,7 @@
}, },
"dependencies": { "dependencies": {
"@novnc/novnc": "^1.4.0", "@novnc/novnc": "^1.4.0",
"@webrecorder/wabac": "^2.20.0", "@webrecorder/wabac": "^2.20.6",
"browsertrix-behaviors": "^0.6.5", "browsertrix-behaviors": "^0.6.5",
"client-zip": "^2.4.5", "client-zip": "^2.4.5",
"css-selector-parser": "^3.0.5", "css-selector-parser": "^3.0.5",
@ -37,7 +37,7 @@
"tsc": "^2.0.4", "tsc": "^2.0.4",
"undici": "^6.18.2", "undici": "^6.18.2",
"uuid": "8.3.2", "uuid": "8.3.2",
"warcio": "^2.4.0", "warcio": "^2.4.2",
"ws": "^7.4.4", "ws": "^7.4.4",
"yargs": "^17.7.2" "yargs": "^17.7.2"
}, },

View file

@ -407,6 +407,21 @@ export class Browser {
this.browser.on("disconnected", () => { this.browser.on("disconnected", () => {
this.browser = null; this.browser = null;
}); });
// common permissions
const permissions = [
"notifications",
"geolocation",
"camera",
"microphone",
];
for (const name of permissions) {
await this.firstCDP.send("Browser.setPermission", {
permission: { name },
setting: "granted",
});
}
} }
async newWindowPageWithCDP(): Promise<{ cdp: CDPSession; page: Page }> { async newWindowPageWithCDP(): Promise<{ cdp: CDPSession; page: Page }> {

View file

@ -10,7 +10,12 @@ import {
import { fetch, getGlobalDispatcher, Response } from "undici"; import { fetch, getGlobalDispatcher, Response } from "undici";
import { getCustomRewriter, rewriteDASH, rewriteHLS } from "@webrecorder/wabac"; import {
getCustomRewriter,
removeRangeAsQuery,
rewriteDASH,
rewriteHLS,
} from "@webrecorder/wabac";
import { WARCRecord } from "warcio"; import { WARCRecord } from "warcio";
import { TempFileBuffer, WARCSerializer } from "warcio/node"; import { TempFileBuffer, WARCSerializer } from "warcio/node";
@ -635,6 +640,30 @@ export class Recorder {
this.skipRangeUrls.set(url, count + 1); this.skipRangeUrls.set(url, count + 1);
return false; return false;
} }
} else {
const filteredUrl = removeRangeAsQuery(url);
if (filteredUrl) {
this.removeReqResp(networkId);
logger.debug(
"Removed range in query, async fetching full URL",
{ url, ...this.logDetails },
"recorder",
);
const reqresp = new RequestResponseInfo("0");
reqresp.fillRequest(params.request, params.resourceType);
reqresp.url = filteredUrl;
reqresp.frameId = params.frameId;
this.addAsyncFetch({
reqresp,
recorder: this,
networkId: "0",
cdp,
});
return false;
}
} }
const reqresp = this.pendingReqResp(networkId); const reqresp = this.pendingReqResp(networkId);
@ -798,7 +827,7 @@ export class Recorder {
return true; return true;
} }
addAsyncFetch(opts: NetworkLoadAsyncFetchOptions, contentLen: number) { addAsyncFetch(opts: NetworkLoadAsyncFetchOptions, contentLen: number = -1) {
let fetcher: AsyncFetcher; let fetcher: AsyncFetcher;
if ( if (

View file

@ -1312,16 +1312,16 @@
resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ== integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==
"@webrecorder/wabac@^2.20.0": "@webrecorder/wabac@^2.20.6":
version "2.20.1" version "2.20.6"
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.20.1.tgz#58e397e2ef1c33de1bb37aa4f51fc7f3eec8a1f7" resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.20.6.tgz#8b28b58d5ae900faebd7be30224eadc620838f9e"
integrity sha512-RX+U6m7aVgvsAfLb9FuLY/PcHCNL5dc1FPaD0GnUiFgswSSe5v4MjIhqJNOnbrJYEcbib81AJfxNuvOyXAJDJQ== integrity sha512-bCvHgfnKHFzEuwPkIxdYIVuBmVTzOgxf96DeDTdZkhPI99iP7AUrz/Y6+qgoD9Vjn4usozUff25vdViD5mtiUg==
dependencies: dependencies:
"@peculiar/asn1-ecc" "^2.3.4" "@peculiar/asn1-ecc" "^2.3.4"
"@peculiar/asn1-schema" "^2.3.3" "@peculiar/asn1-schema" "^2.3.3"
"@peculiar/x509" "^1.9.2" "@peculiar/x509" "^1.9.2"
"@types/js-levenshtein" "^1.1.3" "@types/js-levenshtein" "^1.1.3"
"@webrecorder/wombat" "^3.8.3" "@webrecorder/wombat" "^3.8.7"
acorn "^8.10.0" acorn "^8.10.0"
auto-js-ipfs "^2.1.1" auto-js-ipfs "^2.1.1"
base64-js "^1.5.1" base64-js "^1.5.1"
@ -1340,14 +1340,14 @@
path-parser "^6.1.0" path-parser "^6.1.0"
process "^0.11.10" process "^0.11.10"
stream-browserify "^3.0.0" stream-browserify "^3.0.0"
warcio "^2.3.1" warcio "^2.4.2"
"@webrecorder/wombat@^3.8.3": "@webrecorder/wombat@^3.8.7":
version "3.8.3" version "3.8.7"
resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.8.3.tgz#c5a077225d1a70def9fbbbfcd50fa4465d236546" resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.8.7.tgz#51c7465c589e0020be064121127c7c10a38ec21f"
integrity sha512-dqgoxigB3OdX5JeB3yxJrUNwFwUBlYC+LmGrLEgGeP259MFzXQLD2pmfuqGt5ygWvIv56SrAMV4sUceux07X2A== integrity sha512-bW5V7cBweTkTazOIN8oZZGwHLevsGNv1luY3t0RYdEZhs5BDpTmUHN33zEbrXDOiPUlY3N3I8+73VA+PuxihoQ==
dependencies: dependencies:
warcio "^2.3.1" warcio "^2.4.0"
"@zxing/text-encoding@0.9.0": "@zxing/text-encoding@0.9.0":
version "0.9.0" version "0.9.0"
@ -5281,7 +5281,7 @@ walker@^1.0.8:
dependencies: dependencies:
makeerror "1.0.12" makeerror "1.0.12"
warcio@^2.3.1: warcio@^2.3.1, warcio@^2.4.0, warcio@^2.4.2:
version "2.3.1" version "2.3.1"
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.3.1.tgz#8ac9de897de1a556161168f2a3938b60929908ca" resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.3.1.tgz#8ac9de897de1a556161168f2a3938b60929908ca"
integrity sha512-PjcWqzXfs6HdWfHi1V/i8MoMmV5M0Csg3rOa2mqCJ1dmCJXswVfQ0VXbEVumwavNIW2oFFj6LJoCHHeL4Ls/zw== integrity sha512-PjcWqzXfs6HdWfHi1V/i8MoMmV5M0Csg3rOa2mqCJ1dmCJXswVfQ0VXbEVumwavNIW2oFFj6LJoCHHeL4Ls/zw==
@ -5295,20 +5295,6 @@ warcio@^2.3.1:
uuid-random "^1.3.2" uuid-random "^1.3.2"
yargs "^17.6.2" yargs "^17.6.2"
warcio@^2.4.0:
version "2.4.0"
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.0.tgz#13bae2837f1bbf5cf7585f75857e6311d30557bd"
integrity sha512-EfxXCgsnZ35CGf2j99QBMyB6EI98KEQ6YmeER+8Lnv/4KFJ3thT76PiX37HfZVbPJS21JihA0Eddjk9QBQRlPg==
dependencies:
"@types/pako" "^1.0.7"
"@types/stream-buffers" "^3.0.7"
base32-encode "^2.0.0"
hash-wasm "^4.9.0"
pako "^1.0.11"
tempy "^3.1.0"
uuid-random "^1.3.2"
yargs "^17.7.2"
web-encoding@^1.1.5: web-encoding@^1.1.5:
version "1.1.5" version "1.1.5"
resolved "https://registry.yarnpkg.com/web-encoding/-/web-encoding-1.1.5.tgz#fc810cf7667364a6335c939913f5051d3e0c4864" resolved "https://registry.yarnpkg.com/web-encoding/-/web-encoding-1.1.5.tgz#fc810cf7667364a6335c939913f5051d3e0c4864"