mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
warcwriter: ensure filehandle is also inited before first use, instead of throwing error
This commit is contained in:
parent
d36564e0b0
commit
34069eaaf9
1 changed files with 13 additions and 11 deletions
|
@ -27,8 +27,8 @@ export class WARCWriter implements IndexerOffsetLength {
|
||||||
|
|
||||||
indexer?: CDXIndexer;
|
indexer?: CDXIndexer;
|
||||||
|
|
||||||
fh?: Writable | null;
|
fh: Writable | null;
|
||||||
cdxFH?: Writable | null;
|
cdxFH: Writable | null;
|
||||||
|
|
||||||
constructor({
|
constructor({
|
||||||
archivesDir,
|
archivesDir,
|
||||||
|
@ -52,6 +52,8 @@ export class WARCWriter implements IndexerOffsetLength {
|
||||||
this.rolloverSize = rolloverSize;
|
this.rolloverSize = rolloverSize;
|
||||||
|
|
||||||
this.filenameTemplate = filenameTemplate;
|
this.filenameTemplate = filenameTemplate;
|
||||||
|
this.cdxFH = null;
|
||||||
|
this.fh = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
_initNewFile() {
|
_initNewFile() {
|
||||||
|
@ -79,22 +81,23 @@ export class WARCWriter implements IndexerOffsetLength {
|
||||||
"writer",
|
"writer",
|
||||||
);
|
);
|
||||||
this.filename = this._initNewFile();
|
this.filename = this._initNewFile();
|
||||||
this.fh = null;
|
|
||||||
this.cdxFH = null;
|
this.cdxFH = null;
|
||||||
} else if (!this.filename) {
|
} else if (!this.filename) {
|
||||||
this.filename = this._initNewFile();
|
this.filename = this._initNewFile();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!this.fh) {
|
let fh: Writable | null = this.fh;
|
||||||
this.fh = fs.createWriteStream(
|
|
||||||
path.join(this.archivesDir, this.filename),
|
if (!fh) {
|
||||||
);
|
fh = fs.createWriteStream(path.join(this.archivesDir, this.filename));
|
||||||
}
|
}
|
||||||
if (!this.cdxFH && this.tempCdxDir) {
|
if (!this.cdxFH && this.tempCdxDir) {
|
||||||
this.cdxFH = fs.createWriteStream(
|
this.cdxFH = fs.createWriteStream(
|
||||||
path.join(this.tempCdxDir, this.filename + ".cdx"),
|
path.join(this.tempCdxDir, this.filename + ".cdx"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return fh;
|
||||||
}
|
}
|
||||||
|
|
||||||
async writeRecordPair(
|
async writeRecordPair(
|
||||||
|
@ -117,8 +120,6 @@ export class WARCWriter implements IndexerOffsetLength {
|
||||||
responseSerializer = new WARCSerializer(responseRecord, opts);
|
responseSerializer = new WARCSerializer(responseRecord, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.initFH();
|
|
||||||
|
|
||||||
this.recordLength = await this._writeRecord(
|
this.recordLength = await this._writeRecord(
|
||||||
responseRecord,
|
responseRecord,
|
||||||
responseSerializer,
|
responseSerializer,
|
||||||
|
@ -139,17 +140,18 @@ export class WARCWriter implements IndexerOffsetLength {
|
||||||
const opts = { gzip: this.gzip };
|
const opts = { gzip: this.gzip };
|
||||||
|
|
||||||
const requestSerializer = new WARCSerializer(record, opts);
|
const requestSerializer = new WARCSerializer(record, opts);
|
||||||
|
|
||||||
this.recordLength = await this._writeRecord(record, requestSerializer);
|
this.recordLength = await this._writeRecord(record, requestSerializer);
|
||||||
|
|
||||||
this._writeCDX(record);
|
this._writeCDX(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
async _writeRecord(record: WARCRecord, serializer: WARCSerializer) {
|
private async _writeRecord(record: WARCRecord, serializer: WARCSerializer) {
|
||||||
let total = 0;
|
let total = 0;
|
||||||
const url = record.warcTargetURI;
|
const url = record.warcTargetURI;
|
||||||
|
|
||||||
if (!this.fh) {
|
if (!this.fh) {
|
||||||
throw new Error("writer not initialized");
|
this.fh = await this.initFH();
|
||||||
}
|
}
|
||||||
|
|
||||||
for await (const chunk of serializer) {
|
for await (const chunk of serializer) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue