mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
fix indexing of cookie header: (#714)
- add fields option for adding req.http:cookie and referrer entries to the cdxj - update to warcio 2.4.0 to support this functionality
This commit is contained in:
parent
60c84b342e
commit
f56d6505c1
3 changed files with 20 additions and 3 deletions
|
@ -37,7 +37,7 @@
|
|||
"tsc": "^2.0.4",
|
||||
"undici": "^6.18.2",
|
||||
"uuid": "8.3.2",
|
||||
"warcio": "^2.3.1",
|
||||
"warcio": "^2.4.0",
|
||||
"ws": "^7.4.4",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@ import fs from "fs";
|
|||
import { Writable } from "stream";
|
||||
import path from "path";
|
||||
|
||||
import { CDXIndexer, WARCRecord } from "warcio";
|
||||
import { CDXIndexer, WARCRecord, DEFAULT_CDX_FIELDS } from "warcio";
|
||||
import { WARCSerializer } from "warcio/node";
|
||||
import { logger, formatErr, LogDetails, LogContext } from "./logger.js";
|
||||
import type { IndexerOffsetLength } from "warcio";
|
||||
|
@ -76,7 +76,10 @@ export class WARCWriter implements IndexerOffsetLength {
|
|||
this.recordLength = 0;
|
||||
|
||||
if (this.warcCdxDir) {
|
||||
this.indexer = new CDXIndexer({ format: "cdxj" });
|
||||
this.indexer = new CDXIndexer({
|
||||
format: "cdxj",
|
||||
fields: [...DEFAULT_CDX_FIELDS, "req.http:cookie", "referrer"],
|
||||
});
|
||||
}
|
||||
|
||||
return filename;
|
||||
|
|
14
yarn.lock
14
yarn.lock
|
@ -5295,6 +5295,20 @@ warcio@^2.3.1:
|
|||
uuid-random "^1.3.2"
|
||||
yargs "^17.6.2"
|
||||
|
||||
warcio@^2.4.0:
|
||||
version "2.4.0"
|
||||
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.0.tgz#13bae2837f1bbf5cf7585f75857e6311d30557bd"
|
||||
integrity sha512-EfxXCgsnZ35CGf2j99QBMyB6EI98KEQ6YmeER+8Lnv/4KFJ3thT76PiX37HfZVbPJS21JihA0Eddjk9QBQRlPg==
|
||||
dependencies:
|
||||
"@types/pako" "^1.0.7"
|
||||
"@types/stream-buffers" "^3.0.7"
|
||||
base32-encode "^2.0.0"
|
||||
hash-wasm "^4.9.0"
|
||||
pako "^1.0.11"
|
||||
tempy "^3.1.0"
|
||||
uuid-random "^1.3.2"
|
||||
yargs "^17.7.2"
|
||||
|
||||
web-encoding@^1.1.5:
|
||||
version "1.1.5"
|
||||
resolved "https://registry.yarnpkg.com/web-encoding/-/web-encoding-1.1.5.tgz#fc810cf7667364a6335c939913f5051d3e0c4864"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue