mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
support removing range from query (via wabac.js 2.20.6): (#724)
- fix for archiving facebook video, to match webrecorder/archiveweb.page#272 - permissions: auto enable permissions to avoid possibly modal (for both profiles and crawling) - deps: update to latest wabac.js + warcio.js
This commit is contained in:
parent
0b9cd71c5a
commit
214eb6ca8f
4 changed files with 60 additions and 30 deletions
|
@ -17,7 +17,7 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"@novnc/novnc": "^1.4.0",
|
||||
"@webrecorder/wabac": "^2.20.0",
|
||||
"@webrecorder/wabac": "^2.20.6",
|
||||
"browsertrix-behaviors": "^0.6.5",
|
||||
"client-zip": "^2.4.5",
|
||||
"css-selector-parser": "^3.0.5",
|
||||
|
@ -37,7 +37,7 @@
|
|||
"tsc": "^2.0.4",
|
||||
"undici": "^6.18.2",
|
||||
"uuid": "8.3.2",
|
||||
"warcio": "^2.4.0",
|
||||
"warcio": "^2.4.2",
|
||||
"ws": "^7.4.4",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
|
|
|
@ -407,6 +407,21 @@ export class Browser {
|
|||
this.browser.on("disconnected", () => {
|
||||
this.browser = null;
|
||||
});
|
||||
|
||||
// common permissions
|
||||
const permissions = [
|
||||
"notifications",
|
||||
"geolocation",
|
||||
"camera",
|
||||
"microphone",
|
||||
];
|
||||
|
||||
for (const name of permissions) {
|
||||
await this.firstCDP.send("Browser.setPermission", {
|
||||
permission: { name },
|
||||
setting: "granted",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async newWindowPageWithCDP(): Promise<{ cdp: CDPSession; page: Page }> {
|
||||
|
|
|
@ -10,7 +10,12 @@ import {
|
|||
|
||||
import { fetch, getGlobalDispatcher, Response } from "undici";
|
||||
|
||||
import { getCustomRewriter, rewriteDASH, rewriteHLS } from "@webrecorder/wabac";
|
||||
import {
|
||||
getCustomRewriter,
|
||||
removeRangeAsQuery,
|
||||
rewriteDASH,
|
||||
rewriteHLS,
|
||||
} from "@webrecorder/wabac";
|
||||
|
||||
import { WARCRecord } from "warcio";
|
||||
import { TempFileBuffer, WARCSerializer } from "warcio/node";
|
||||
|
@ -635,6 +640,30 @@ export class Recorder {
|
|||
this.skipRangeUrls.set(url, count + 1);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
const filteredUrl = removeRangeAsQuery(url);
|
||||
if (filteredUrl) {
|
||||
this.removeReqResp(networkId);
|
||||
|
||||
logger.debug(
|
||||
"Removed range in query, async fetching full URL",
|
||||
{ url, ...this.logDetails },
|
||||
"recorder",
|
||||
);
|
||||
|
||||
const reqresp = new RequestResponseInfo("0");
|
||||
reqresp.fillRequest(params.request, params.resourceType);
|
||||
reqresp.url = filteredUrl;
|
||||
reqresp.frameId = params.frameId;
|
||||
|
||||
this.addAsyncFetch({
|
||||
reqresp,
|
||||
recorder: this,
|
||||
networkId: "0",
|
||||
cdp,
|
||||
});
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const reqresp = this.pendingReqResp(networkId);
|
||||
|
@ -798,7 +827,7 @@ export class Recorder {
|
|||
return true;
|
||||
}
|
||||
|
||||
addAsyncFetch(opts: NetworkLoadAsyncFetchOptions, contentLen: number) {
|
||||
addAsyncFetch(opts: NetworkLoadAsyncFetchOptions, contentLen: number = -1) {
|
||||
let fetcher: AsyncFetcher;
|
||||
|
||||
if (
|
||||
|
|
38
yarn.lock
38
yarn.lock
|
@ -1312,16 +1312,16 @@
|
|||
resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
|
||||
integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==
|
||||
|
||||
"@webrecorder/wabac@^2.20.0":
|
||||
version "2.20.1"
|
||||
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.20.1.tgz#58e397e2ef1c33de1bb37aa4f51fc7f3eec8a1f7"
|
||||
integrity sha512-RX+U6m7aVgvsAfLb9FuLY/PcHCNL5dc1FPaD0GnUiFgswSSe5v4MjIhqJNOnbrJYEcbib81AJfxNuvOyXAJDJQ==
|
||||
"@webrecorder/wabac@^2.20.6":
|
||||
version "2.20.6"
|
||||
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.20.6.tgz#8b28b58d5ae900faebd7be30224eadc620838f9e"
|
||||
integrity sha512-bCvHgfnKHFzEuwPkIxdYIVuBmVTzOgxf96DeDTdZkhPI99iP7AUrz/Y6+qgoD9Vjn4usozUff25vdViD5mtiUg==
|
||||
dependencies:
|
||||
"@peculiar/asn1-ecc" "^2.3.4"
|
||||
"@peculiar/asn1-schema" "^2.3.3"
|
||||
"@peculiar/x509" "^1.9.2"
|
||||
"@types/js-levenshtein" "^1.1.3"
|
||||
"@webrecorder/wombat" "^3.8.3"
|
||||
"@webrecorder/wombat" "^3.8.7"
|
||||
acorn "^8.10.0"
|
||||
auto-js-ipfs "^2.1.1"
|
||||
base64-js "^1.5.1"
|
||||
|
@ -1340,14 +1340,14 @@
|
|||
path-parser "^6.1.0"
|
||||
process "^0.11.10"
|
||||
stream-browserify "^3.0.0"
|
||||
warcio "^2.3.1"
|
||||
warcio "^2.4.2"
|
||||
|
||||
"@webrecorder/wombat@^3.8.3":
|
||||
version "3.8.3"
|
||||
resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.8.3.tgz#c5a077225d1a70def9fbbbfcd50fa4465d236546"
|
||||
integrity sha512-dqgoxigB3OdX5JeB3yxJrUNwFwUBlYC+LmGrLEgGeP259MFzXQLD2pmfuqGt5ygWvIv56SrAMV4sUceux07X2A==
|
||||
"@webrecorder/wombat@^3.8.7":
|
||||
version "3.8.7"
|
||||
resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.8.7.tgz#51c7465c589e0020be064121127c7c10a38ec21f"
|
||||
integrity sha512-bW5V7cBweTkTazOIN8oZZGwHLevsGNv1luY3t0RYdEZhs5BDpTmUHN33zEbrXDOiPUlY3N3I8+73VA+PuxihoQ==
|
||||
dependencies:
|
||||
warcio "^2.3.1"
|
||||
warcio "^2.4.0"
|
||||
|
||||
"@zxing/text-encoding@0.9.0":
|
||||
version "0.9.0"
|
||||
|
@ -5281,7 +5281,7 @@ walker@^1.0.8:
|
|||
dependencies:
|
||||
makeerror "1.0.12"
|
||||
|
||||
warcio@^2.3.1:
|
||||
warcio@^2.3.1, warcio@^2.4.0, warcio@^2.4.2:
|
||||
version "2.3.1"
|
||||
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.3.1.tgz#8ac9de897de1a556161168f2a3938b60929908ca"
|
||||
integrity sha512-PjcWqzXfs6HdWfHi1V/i8MoMmV5M0Csg3rOa2mqCJ1dmCJXswVfQ0VXbEVumwavNIW2oFFj6LJoCHHeL4Ls/zw==
|
||||
|
@ -5295,20 +5295,6 @@ warcio@^2.3.1:
|
|||
uuid-random "^1.3.2"
|
||||
yargs "^17.6.2"
|
||||
|
||||
warcio@^2.4.0:
|
||||
version "2.4.0"
|
||||
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.0.tgz#13bae2837f1bbf5cf7585f75857e6311d30557bd"
|
||||
integrity sha512-EfxXCgsnZ35CGf2j99QBMyB6EI98KEQ6YmeER+8Lnv/4KFJ3thT76PiX37HfZVbPJS21JihA0Eddjk9QBQRlPg==
|
||||
dependencies:
|
||||
"@types/pako" "^1.0.7"
|
||||
"@types/stream-buffers" "^3.0.7"
|
||||
base32-encode "^2.0.0"
|
||||
hash-wasm "^4.9.0"
|
||||
pako "^1.0.11"
|
||||
tempy "^3.1.0"
|
||||
uuid-random "^1.3.2"
|
||||
yargs "^17.7.2"
|
||||
|
||||
web-encoding@^1.1.5:
|
||||
version "1.1.5"
|
||||
resolved "https://registry.yarnpkg.com/web-encoding/-/web-encoding-1.1.5.tgz#fc810cf7667364a6335c939913f5051d3e0c4864"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue