case insensitive params (#27)

* make --generateWacz, --generateCdx case insensitive with alias option
* fix eslint config and eslint issues

Co-authored-by: Emma Dickson <emmadickson@Emmas-MacBook-Pro.local>
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
Emma Dickson 2021-02-17 12:37:07 -05:00 committed by GitHub
parent 4d6dcbc3d6
commit 0688674f6f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 1245 additions and 1258 deletions

14
.eslintrc.js Normal file
View file

@ -0,0 +1,14 @@
module.exports = {
"env": {
"browser": true,
"es2021": true,
"node": true
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 12,
"sourceType": "module"
},
"rules": {
}
};

View file

@ -11,7 +11,7 @@ function autofetcher() {
'video > source[srcset], video > source[data-srcset], video > source[data-src], ' + 'video > source[srcset], video > source[data-srcset], video > source[data-src], ' +
'audio > source[srcset], audio > source[data-srcset], audio > source[data-src]'; 'audio > source[srcset], audio > source[data-srcset], audio > source[data-src]';
const SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/; const SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/; // eslint-disable-line no-useless-escape
const STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi; const STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
const IMPORT_REGEX = /(@import\s*[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi; const IMPORT_REGEX = /(@import\s*[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
@ -35,6 +35,7 @@ function autofetcher() {
} }
async run() { async run() {
/*eslint no-constant-condition: ["error", { "checkLoops": false }]*/
while (true) { while (true) {
this.extractSrcSrcSetAll(document); this.extractSrcSrcSetAll(document);
this.extractStyleSheets(); this.extractStyleSheets();
@ -211,7 +212,7 @@ function autofetcher() {
} }
new AutoFetcher().init(); new AutoFetcher().init();
}; }
// =========================================================================== // ===========================================================================

View file

@ -88,7 +88,7 @@ function autoplay() {
} }
}, 3000); }, 3000);
}; }
// =========================================================================== // ===========================================================================

View file

@ -22,12 +22,12 @@ async function autoScroll() {
class AutoScrollBehavior class AutoScrollBehavior
{ {
async beforeLoad(page, crawler) { async beforeLoad() {
} }
async afterLoad(page, crawler) { async afterLoad(page, crawler) {
try { try {
await Promise.race([page.evaluate(autoscroll), crawler.sleep(30000)]); await Promise.race([page.evaluate(autoScroll), crawler.sleep(30000)]);
} catch (e) { } catch (e) {
console.warn("Autoscroll Behavior Failed", e); console.warn("Autoscroll Behavior Failed", e);
} }

View file

@ -209,12 +209,14 @@ class Crawler {
}, },
"generateCDX": { "generateCDX": {
alias: ["generatecdx", "generateCdx"],
describe: "If set, generate index (CDXJ) for use with pywb after crawl is done", describe: "If set, generate index (CDXJ) for use with pywb after crawl is done",
type: "boolean", type: "boolean",
default: false, default: false,
}, },
"generateWACZ": { "generateWACZ": {
alias: ["generatewacz", "generateWacz"],
describe: "If set, generate wacz", describe: "If set, generate wacz",
type: "boolean", type: "boolean",
default: false, default: false,
@ -464,7 +466,7 @@ class Crawler {
child_process.spawnSync("wb-manager", ["reindex", this.params.collection], {stdio: "inherit", cwd: this.params.cwd}); child_process.spawnSync("wb-manager", ["reindex", this.params.collection], {stdio: "inherit", cwd: this.params.cwd});
} }
if (this.params.generateWACZ) { if (this.params.generateWACZ || this.params.generateWacz || this.params.generatewacz ) {
console.log("Generating WACZ"); console.log("Generating WACZ");
const archiveDir = path.join(this.collDir, "archive"); const archiveDir = path.join(this.collDir, "archive");
@ -476,7 +478,7 @@ class Crawler {
const waczFilename = this.params.collection.concat(".wacz"); const waczFilename = this.params.collection.concat(".wacz");
const waczPath = path.join(this.collDir, waczFilename); const waczPath = path.join(this.collDir, waczFilename);
const argument_list = ["create", "-o", waczPath, "--pages", this.pagesFile, "-f"]; const argument_list = ["create", "-o", waczPath, "--pages", this.pagesFile, "-f"];
warcFileList.forEach((val, index) => argument_list.push(path.join(archiveDir, val))); warcFileList.forEach((val, index) => argument_list.push(path.join(archiveDir, val))); // eslint-disable-line no-unused-vars
// Run the wacz create command // Run the wacz create command
child_process.spawnSync("wacz" , argument_list); child_process.spawnSync("wacz" , argument_list);
@ -557,7 +559,6 @@ class Crawler {
writePage(url, title){ writePage(url, title){
const id = uuidv4(); const id = uuidv4();
const today = new Date();
const row = {"id": id, "url": url, "title": title}; const row = {"id": id, "url": url, "title": title};
const processedRow = JSON.stringify(row).concat("\n"); const processedRow = JSON.stringify(row).concat("\n");
try { try {

View file

@ -11,11 +11,11 @@
"puppeteer-cluster": "^0.22.0", "puppeteer-cluster": "^0.22.0",
"puppeteer-core": "^5.3.1", "puppeteer-core": "^5.3.1",
"sitemapper": "^3.1.2", "sitemapper": "^3.1.2",
"yargs": "^16.0.3", "uuid": "8.3.2",
"uuid": "8.3.2" "yargs": "^16.0.3"
}, },
"devDependencies": { "devDependencies": {
"eslint": "^7.19.0", "eslint": "^7.20.0",
"eslint-plugin-react": "^7.22.0" "eslint-plugin-react": "^7.22.0"
} }
} }

2461
yarn.lock

File diff suppressed because it is too large Load diff