warcwriter: ensure filehandle is also inited before first use, instead of throwing error

This commit is contained in:
Ilya Kreymer 2024-02-23 21:21:09 -08:00
parent d36564e0b0
commit 34069eaaf9

View file

@ -27,8 +27,8 @@ export class WARCWriter implements IndexerOffsetLength {
indexer?: CDXIndexer; indexer?: CDXIndexer;
fh?: Writable | null; fh: Writable | null;
cdxFH?: Writable | null; cdxFH: Writable | null;
constructor({ constructor({
archivesDir, archivesDir,
@ -52,6 +52,8 @@ export class WARCWriter implements IndexerOffsetLength {
this.rolloverSize = rolloverSize; this.rolloverSize = rolloverSize;
this.filenameTemplate = filenameTemplate; this.filenameTemplate = filenameTemplate;
this.cdxFH = null;
this.fh = null;
} }
_initNewFile() { _initNewFile() {
@ -79,22 +81,23 @@ export class WARCWriter implements IndexerOffsetLength {
"writer", "writer",
); );
this.filename = this._initNewFile(); this.filename = this._initNewFile();
this.fh = null;
this.cdxFH = null; this.cdxFH = null;
} else if (!this.filename) { } else if (!this.filename) {
this.filename = this._initNewFile(); this.filename = this._initNewFile();
} }
if (!this.fh) { let fh: Writable | null = this.fh;
this.fh = fs.createWriteStream(
path.join(this.archivesDir, this.filename), if (!fh) {
); fh = fs.createWriteStream(path.join(this.archivesDir, this.filename));
} }
if (!this.cdxFH && this.tempCdxDir) { if (!this.cdxFH && this.tempCdxDir) {
this.cdxFH = fs.createWriteStream( this.cdxFH = fs.createWriteStream(
path.join(this.tempCdxDir, this.filename + ".cdx"), path.join(this.tempCdxDir, this.filename + ".cdx"),
); );
} }
return fh;
} }
async writeRecordPair( async writeRecordPair(
@ -117,8 +120,6 @@ export class WARCWriter implements IndexerOffsetLength {
responseSerializer = new WARCSerializer(responseRecord, opts); responseSerializer = new WARCSerializer(responseRecord, opts);
} }
await this.initFH();
this.recordLength = await this._writeRecord( this.recordLength = await this._writeRecord(
responseRecord, responseRecord,
responseSerializer, responseSerializer,
@ -139,17 +140,18 @@ export class WARCWriter implements IndexerOffsetLength {
const opts = { gzip: this.gzip }; const opts = { gzip: this.gzip };
const requestSerializer = new WARCSerializer(record, opts); const requestSerializer = new WARCSerializer(record, opts);
this.recordLength = await this._writeRecord(record, requestSerializer); this.recordLength = await this._writeRecord(record, requestSerializer);
this._writeCDX(record); this._writeCDX(record);
} }
async _writeRecord(record: WARCRecord, serializer: WARCSerializer) { private async _writeRecord(record: WARCRecord, serializer: WARCSerializer) {
let total = 0; let total = 0;
const url = record.warcTargetURI; const url = record.warcTargetURI;
if (!this.fh) { if (!this.fh) {
throw new Error("writer not initialized"); this.fh = await this.initFH();
} }
for await (const chunk of serializer) { for await (const chunk of serializer) {