mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
replayserver: support serving sw.js directly, make RWP version configurable, using CDN version
replayserver: if local file path specified, support serving local file under /source.{wacz,json}, support range requests
This commit is contained in:
parent
aca1a64c18
commit
bad67a014a
4 changed files with 94 additions and 12 deletions
|
@ -1,7 +1,7 @@
|
|||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<script src="https://replayweb.page/ui.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/replaywebpage@$RWP_VERSION/ui.js"></script>
|
||||
<style>
|
||||
html {
|
||||
width: 100%;
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
// eslint-disable-next-line no-undef
|
||||
importScripts("https://replayweb.page/sw.js");
|
|
@ -405,8 +405,8 @@ export class ReplayCrawler extends Crawler {
|
|||
);
|
||||
|
||||
if (res && this.params.qaDebugImageDiff) {
|
||||
const dir = path.join(this.collDir, pageid || "unknown");
|
||||
await fsp.mkdir(dir);
|
||||
const dir = path.join(this.collDir, "screenshots", pageid || "unknown");
|
||||
await fsp.mkdir(dir, { recursive: true });
|
||||
await fsp.writeFile(path.join(dir, "crawl.png"), PNG.sync.write(crawl));
|
||||
await fsp.writeFile(path.join(dir, "replay.png"), PNG.sync.write(replay));
|
||||
await fsp.writeFile(path.join(dir, "diff.png"), PNG.sync.write(diff));
|
||||
|
@ -503,6 +503,8 @@ export class ReplayCrawler extends Crawler {
|
|||
resourceCounts.replayGood = replayGood;
|
||||
resourceCounts.replayBad = replayBad;
|
||||
|
||||
logger.info("Resource counts", { url, ...resourceCounts }, "replay");
|
||||
|
||||
// if (crawlGood !== replayGood) {
|
||||
// console.log("*** ORIG");
|
||||
// console.log(origResData);
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
import fs from "fs";
|
||||
import fsp from "fs/promises";
|
||||
import http, { IncomingMessage, ServerResponse } from "http";
|
||||
import path from "path";
|
||||
|
||||
const RWP_VERSION = "1.8.14";
|
||||
|
||||
const replayHTML = fs.readFileSync(
|
||||
new URL("../../html/replay.html", import.meta.url),
|
||||
{ encoding: "utf8" },
|
||||
);
|
||||
|
||||
const swJS = fs.readFileSync(new URL("../../html/sw.js", import.meta.url), {
|
||||
encoding: "utf8",
|
||||
});
|
||||
const swJS = `importScripts("https://cdn.jsdelivr.net/npm/replaywebpage@${RWP_VERSION}/sw.js");`;
|
||||
|
||||
// ============================================================================
|
||||
const PORT = 9990;
|
||||
|
@ -16,9 +18,36 @@ const PORT = 9990;
|
|||
// ============================================================================
|
||||
export class ReplayServer {
|
||||
sourceUrl: string;
|
||||
origFileSource: string | null;
|
||||
sourceContentType: string | null;
|
||||
sourceSize?: number;
|
||||
|
||||
constructor(sourceUrl: string) {
|
||||
this.sourceUrl = sourceUrl;
|
||||
constructor(sourceUrlOrFile: string) {
|
||||
if (
|
||||
sourceUrlOrFile.startsWith("http://") ||
|
||||
sourceUrlOrFile.startsWith("https://")
|
||||
) {
|
||||
this.sourceUrl = sourceUrlOrFile;
|
||||
this.origFileSource = null;
|
||||
this.sourceContentType = null;
|
||||
} else {
|
||||
this.origFileSource = sourceUrlOrFile;
|
||||
const ext = path.extname(sourceUrlOrFile);
|
||||
this.sourceUrl = `/source${ext}`;
|
||||
|
||||
switch (ext) {
|
||||
case ".wacz":
|
||||
this.sourceContentType = "application/wacz+zip";
|
||||
break;
|
||||
|
||||
case ".json":
|
||||
this.sourceContentType = "application/json";
|
||||
break;
|
||||
|
||||
default:
|
||||
this.sourceContentType = "application/octet-stream";
|
||||
}
|
||||
}
|
||||
const httpServer = http.createServer((req, res) =>
|
||||
this.handleRequest(req, res),
|
||||
);
|
||||
|
@ -29,7 +58,7 @@ export class ReplayServer {
|
|||
return `http://localhost:${PORT}/`;
|
||||
}
|
||||
|
||||
handleRequest(request: IncomingMessage, response: ServerResponse) {
|
||||
async handleRequest(request: IncomingMessage, response: ServerResponse) {
|
||||
const parsedUrl = new URL(
|
||||
request.url || "",
|
||||
`http://${request.headers.host}`,
|
||||
|
@ -39,7 +68,11 @@ export class ReplayServer {
|
|||
switch (pathname) {
|
||||
case "/":
|
||||
response.writeHead(200, { "Content-Type": "text/html" });
|
||||
response.end(replayHTML.replace("$SOURCE", this.sourceUrl));
|
||||
response.end(
|
||||
replayHTML
|
||||
.replace("$SOURCE", this.sourceUrl)
|
||||
.replace("$RWP_VERSION", RWP_VERSION),
|
||||
);
|
||||
return;
|
||||
|
||||
case "/sw.js":
|
||||
|
@ -50,10 +83,59 @@ export class ReplayServer {
|
|||
response.end(swJS);
|
||||
return;
|
||||
|
||||
case this.sourceUrl:
|
||||
if (this.sourceContentType && this.origFileSource) {
|
||||
if (!this.sourceSize) {
|
||||
const { size } = await fsp.stat(this.origFileSource);
|
||||
this.sourceSize = size;
|
||||
}
|
||||
const { opts, status, contentRange, contentLength } =
|
||||
this.getRespOptsForRequest(request, this.sourceSize);
|
||||
response.writeHead(status, {
|
||||
"Accept-Ranges": "bytes",
|
||||
"Content-Type": this.sourceContentType,
|
||||
"Content-Length": contentLength,
|
||||
"Content-Range": contentRange,
|
||||
});
|
||||
console.log(request.method, contentRange, opts);
|
||||
if (request.method === "GET") {
|
||||
fs.createReadStream(this.origFileSource, opts).pipe(response);
|
||||
} else {
|
||||
response.end();
|
||||
}
|
||||
break;
|
||||
}
|
||||
// falls through
|
||||
|
||||
default:
|
||||
response.writeHead(404, { "Content-Type": "application/json" });
|
||||
response.end(JSON.stringify({ error: "not_found" }));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
getRespOptsForRequest(request: IncomingMessage, total: number) {
|
||||
const range = request.headers["range"] || "";
|
||||
const array = range.match(/bytes=(\d+)-(\d*)/);
|
||||
let contentRange = undefined;
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const opts: Record<string, any> = {};
|
||||
if (array) {
|
||||
opts.start = parseInt(array[1]);
|
||||
opts.end = parseInt(array[2]);
|
||||
if (isNaN(opts.end)) {
|
||||
opts.end = undefined;
|
||||
}
|
||||
const end = opts.end || total - 1;
|
||||
contentRange = `bytes ${opts.start}-${end}/${total}`;
|
||||
return {
|
||||
status: 206,
|
||||
opts,
|
||||
contentRange,
|
||||
contentLength: end - opts.start + 1,
|
||||
};
|
||||
}
|
||||
return { status: 200, opts, contentRange, contentLength: total };
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue