mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
replayserver: support serving sw.js directly, make RWP version configurable, using CDN version
replayserver: if local file path specified, support serving local file under /source.{wacz,json}, support range requests
This commit is contained in:
parent
aca1a64c18
commit
bad67a014a
4 changed files with 94 additions and 12 deletions
|
@ -1,7 +1,7 @@
|
||||||
<!doctype html>
|
<!doctype html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<script src="https://replayweb.page/ui.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/replaywebpage@$RWP_VERSION/ui.js"></script>
|
||||||
<style>
|
<style>
|
||||||
html {
|
html {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
// eslint-disable-next-line no-undef
|
|
||||||
importScripts("https://replayweb.page/sw.js");
|
|
|
@ -405,8 +405,8 @@ export class ReplayCrawler extends Crawler {
|
||||||
);
|
);
|
||||||
|
|
||||||
if (res && this.params.qaDebugImageDiff) {
|
if (res && this.params.qaDebugImageDiff) {
|
||||||
const dir = path.join(this.collDir, pageid || "unknown");
|
const dir = path.join(this.collDir, "screenshots", pageid || "unknown");
|
||||||
await fsp.mkdir(dir);
|
await fsp.mkdir(dir, { recursive: true });
|
||||||
await fsp.writeFile(path.join(dir, "crawl.png"), PNG.sync.write(crawl));
|
await fsp.writeFile(path.join(dir, "crawl.png"), PNG.sync.write(crawl));
|
||||||
await fsp.writeFile(path.join(dir, "replay.png"), PNG.sync.write(replay));
|
await fsp.writeFile(path.join(dir, "replay.png"), PNG.sync.write(replay));
|
||||||
await fsp.writeFile(path.join(dir, "diff.png"), PNG.sync.write(diff));
|
await fsp.writeFile(path.join(dir, "diff.png"), PNG.sync.write(diff));
|
||||||
|
@ -503,6 +503,8 @@ export class ReplayCrawler extends Crawler {
|
||||||
resourceCounts.replayGood = replayGood;
|
resourceCounts.replayGood = replayGood;
|
||||||
resourceCounts.replayBad = replayBad;
|
resourceCounts.replayBad = replayBad;
|
||||||
|
|
||||||
|
logger.info("Resource counts", { url, ...resourceCounts }, "replay");
|
||||||
|
|
||||||
// if (crawlGood !== replayGood) {
|
// if (crawlGood !== replayGood) {
|
||||||
// console.log("*** ORIG");
|
// console.log("*** ORIG");
|
||||||
// console.log(origResData);
|
// console.log(origResData);
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
import fs from "fs";
|
import fs from "fs";
|
||||||
|
import fsp from "fs/promises";
|
||||||
import http, { IncomingMessage, ServerResponse } from "http";
|
import http, { IncomingMessage, ServerResponse } from "http";
|
||||||
|
import path from "path";
|
||||||
|
|
||||||
|
const RWP_VERSION = "1.8.14";
|
||||||
|
|
||||||
const replayHTML = fs.readFileSync(
|
const replayHTML = fs.readFileSync(
|
||||||
new URL("../../html/replay.html", import.meta.url),
|
new URL("../../html/replay.html", import.meta.url),
|
||||||
{ encoding: "utf8" },
|
{ encoding: "utf8" },
|
||||||
);
|
);
|
||||||
|
|
||||||
const swJS = fs.readFileSync(new URL("../../html/sw.js", import.meta.url), {
|
const swJS = `importScripts("https://cdn.jsdelivr.net/npm/replaywebpage@${RWP_VERSION}/sw.js");`;
|
||||||
encoding: "utf8",
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
const PORT = 9990;
|
const PORT = 9990;
|
||||||
|
@ -16,9 +18,36 @@ const PORT = 9990;
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
export class ReplayServer {
|
export class ReplayServer {
|
||||||
sourceUrl: string;
|
sourceUrl: string;
|
||||||
|
origFileSource: string | null;
|
||||||
|
sourceContentType: string | null;
|
||||||
|
sourceSize?: number;
|
||||||
|
|
||||||
constructor(sourceUrl: string) {
|
constructor(sourceUrlOrFile: string) {
|
||||||
this.sourceUrl = sourceUrl;
|
if (
|
||||||
|
sourceUrlOrFile.startsWith("http://") ||
|
||||||
|
sourceUrlOrFile.startsWith("https://")
|
||||||
|
) {
|
||||||
|
this.sourceUrl = sourceUrlOrFile;
|
||||||
|
this.origFileSource = null;
|
||||||
|
this.sourceContentType = null;
|
||||||
|
} else {
|
||||||
|
this.origFileSource = sourceUrlOrFile;
|
||||||
|
const ext = path.extname(sourceUrlOrFile);
|
||||||
|
this.sourceUrl = `/source${ext}`;
|
||||||
|
|
||||||
|
switch (ext) {
|
||||||
|
case ".wacz":
|
||||||
|
this.sourceContentType = "application/wacz+zip";
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ".json":
|
||||||
|
this.sourceContentType = "application/json";
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
this.sourceContentType = "application/octet-stream";
|
||||||
|
}
|
||||||
|
}
|
||||||
const httpServer = http.createServer((req, res) =>
|
const httpServer = http.createServer((req, res) =>
|
||||||
this.handleRequest(req, res),
|
this.handleRequest(req, res),
|
||||||
);
|
);
|
||||||
|
@ -29,7 +58,7 @@ export class ReplayServer {
|
||||||
return `http://localhost:${PORT}/`;
|
return `http://localhost:${PORT}/`;
|
||||||
}
|
}
|
||||||
|
|
||||||
handleRequest(request: IncomingMessage, response: ServerResponse) {
|
async handleRequest(request: IncomingMessage, response: ServerResponse) {
|
||||||
const parsedUrl = new URL(
|
const parsedUrl = new URL(
|
||||||
request.url || "",
|
request.url || "",
|
||||||
`http://${request.headers.host}`,
|
`http://${request.headers.host}`,
|
||||||
|
@ -39,7 +68,11 @@ export class ReplayServer {
|
||||||
switch (pathname) {
|
switch (pathname) {
|
||||||
case "/":
|
case "/":
|
||||||
response.writeHead(200, { "Content-Type": "text/html" });
|
response.writeHead(200, { "Content-Type": "text/html" });
|
||||||
response.end(replayHTML.replace("$SOURCE", this.sourceUrl));
|
response.end(
|
||||||
|
replayHTML
|
||||||
|
.replace("$SOURCE", this.sourceUrl)
|
||||||
|
.replace("$RWP_VERSION", RWP_VERSION),
|
||||||
|
);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case "/sw.js":
|
case "/sw.js":
|
||||||
|
@ -50,10 +83,59 @@ export class ReplayServer {
|
||||||
response.end(swJS);
|
response.end(swJS);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
case this.sourceUrl:
|
||||||
|
if (this.sourceContentType && this.origFileSource) {
|
||||||
|
if (!this.sourceSize) {
|
||||||
|
const { size } = await fsp.stat(this.origFileSource);
|
||||||
|
this.sourceSize = size;
|
||||||
|
}
|
||||||
|
const { opts, status, contentRange, contentLength } =
|
||||||
|
this.getRespOptsForRequest(request, this.sourceSize);
|
||||||
|
response.writeHead(status, {
|
||||||
|
"Accept-Ranges": "bytes",
|
||||||
|
"Content-Type": this.sourceContentType,
|
||||||
|
"Content-Length": contentLength,
|
||||||
|
"Content-Range": contentRange,
|
||||||
|
});
|
||||||
|
console.log(request.method, contentRange, opts);
|
||||||
|
if (request.method === "GET") {
|
||||||
|
fs.createReadStream(this.origFileSource, opts).pipe(response);
|
||||||
|
} else {
|
||||||
|
response.end();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// falls through
|
||||||
|
|
||||||
default:
|
default:
|
||||||
response.writeHead(404, { "Content-Type": "application/json" });
|
response.writeHead(404, { "Content-Type": "application/json" });
|
||||||
response.end(JSON.stringify({ error: "not_found" }));
|
response.end(JSON.stringify({ error: "not_found" }));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getRespOptsForRequest(request: IncomingMessage, total: number) {
|
||||||
|
const range = request.headers["range"] || "";
|
||||||
|
const array = range.match(/bytes=(\d+)-(\d*)/);
|
||||||
|
let contentRange = undefined;
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const opts: Record<string, any> = {};
|
||||||
|
if (array) {
|
||||||
|
opts.start = parseInt(array[1]);
|
||||||
|
opts.end = parseInt(array[2]);
|
||||||
|
if (isNaN(opts.end)) {
|
||||||
|
opts.end = undefined;
|
||||||
|
}
|
||||||
|
const end = opts.end || total - 1;
|
||||||
|
contentRange = `bytes ${opts.start}-${end}/${total}`;
|
||||||
|
return {
|
||||||
|
status: 206,
|
||||||
|
opts,
|
||||||
|
contentRange,
|
||||||
|
contentLength: end - opts.start + 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return { status: 200, opts, contentRange, contentLength: total };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue