mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
warcwriter: ensure filehandle is also inited before first use, instead of throwing error
This commit is contained in:
parent
d36564e0b0
commit
34069eaaf9
1 changed files with 13 additions and 11 deletions
|
@ -27,8 +27,8 @@ export class WARCWriter implements IndexerOffsetLength {
|
|||
|
||||
indexer?: CDXIndexer;
|
||||
|
||||
fh?: Writable | null;
|
||||
cdxFH?: Writable | null;
|
||||
fh: Writable | null;
|
||||
cdxFH: Writable | null;
|
||||
|
||||
constructor({
|
||||
archivesDir,
|
||||
|
@ -52,6 +52,8 @@ export class WARCWriter implements IndexerOffsetLength {
|
|||
this.rolloverSize = rolloverSize;
|
||||
|
||||
this.filenameTemplate = filenameTemplate;
|
||||
this.cdxFH = null;
|
||||
this.fh = null;
|
||||
}
|
||||
|
||||
_initNewFile() {
|
||||
|
@ -79,22 +81,23 @@ export class WARCWriter implements IndexerOffsetLength {
|
|||
"writer",
|
||||
);
|
||||
this.filename = this._initNewFile();
|
||||
this.fh = null;
|
||||
this.cdxFH = null;
|
||||
} else if (!this.filename) {
|
||||
this.filename = this._initNewFile();
|
||||
}
|
||||
|
||||
if (!this.fh) {
|
||||
this.fh = fs.createWriteStream(
|
||||
path.join(this.archivesDir, this.filename),
|
||||
);
|
||||
let fh: Writable | null = this.fh;
|
||||
|
||||
if (!fh) {
|
||||
fh = fs.createWriteStream(path.join(this.archivesDir, this.filename));
|
||||
}
|
||||
if (!this.cdxFH && this.tempCdxDir) {
|
||||
this.cdxFH = fs.createWriteStream(
|
||||
path.join(this.tempCdxDir, this.filename + ".cdx"),
|
||||
);
|
||||
}
|
||||
|
||||
return fh;
|
||||
}
|
||||
|
||||
async writeRecordPair(
|
||||
|
@ -117,8 +120,6 @@ export class WARCWriter implements IndexerOffsetLength {
|
|||
responseSerializer = new WARCSerializer(responseRecord, opts);
|
||||
}
|
||||
|
||||
await this.initFH();
|
||||
|
||||
this.recordLength = await this._writeRecord(
|
||||
responseRecord,
|
||||
responseSerializer,
|
||||
|
@ -139,17 +140,18 @@ export class WARCWriter implements IndexerOffsetLength {
|
|||
const opts = { gzip: this.gzip };
|
||||
|
||||
const requestSerializer = new WARCSerializer(record, opts);
|
||||
|
||||
this.recordLength = await this._writeRecord(record, requestSerializer);
|
||||
|
||||
this._writeCDX(record);
|
||||
}
|
||||
|
||||
async _writeRecord(record: WARCRecord, serializer: WARCSerializer) {
|
||||
private async _writeRecord(record: WARCRecord, serializer: WARCSerializer) {
|
||||
let total = 0;
|
||||
const url = record.warcTargetURI;
|
||||
|
||||
if (!this.fh) {
|
||||
throw new Error("writer not initialized");
|
||||
this.fh = await this.initFH();
|
||||
}
|
||||
|
||||
for await (const chunk of serializer) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue