case insensitive params (#27)

* make --generateWacz, --generateCdx case insensitive with alias option
* fix eslint config and eslint issues

Co-authored-by: Emma Dickson <emmadickson@Emmas-MacBook-Pro.local>
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
Emma Dickson 2021-02-17 12:37:07 -05:00 committed by GitHub
parent 4d6dcbc3d6
commit 0688674f6f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 1245 additions and 1258 deletions

14
.eslintrc.js Normal file
View file

@ -0,0 +1,14 @@
module.exports = {
"env": {
"browser": true,
"es2021": true,
"node": true
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 12,
"sourceType": "module"
},
"rules": {
}
};

View file

@ -11,7 +11,7 @@ function autofetcher() {
'video > source[srcset], video > source[data-srcset], video > source[data-src], ' +
'audio > source[srcset], audio > source[data-srcset], audio > source[data-src]';
const SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
const SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/; // eslint-disable-line no-useless-escape
const STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
const IMPORT_REGEX = /(@import\s*[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
@ -35,6 +35,7 @@ function autofetcher() {
}
async run() {
/*eslint no-constant-condition: ["error", { "checkLoops": false }]*/
while (true) {
this.extractSrcSrcSetAll(document);
this.extractStyleSheets();
@ -211,7 +212,7 @@ function autofetcher() {
}
new AutoFetcher().init();
};
}
// ===========================================================================

View file

@ -88,7 +88,7 @@ function autoplay() {
}
}, 3000);
};
}
// ===========================================================================

View file

@ -22,12 +22,12 @@ async function autoScroll() {
class AutoScrollBehavior
{
async beforeLoad(page, crawler) {
async beforeLoad() {
}
async afterLoad(page, crawler) {
try {
await Promise.race([page.evaluate(autoscroll), crawler.sleep(30000)]);
await Promise.race([page.evaluate(autoScroll), crawler.sleep(30000)]);
} catch (e) {
console.warn("Autoscroll Behavior Failed", e);
}

View file

@ -209,12 +209,14 @@ class Crawler {
},
"generateCDX": {
alias: ["generatecdx", "generateCdx"],
describe: "If set, generate index (CDXJ) for use with pywb after crawl is done",
type: "boolean",
default: false,
},
"generateWACZ": {
alias: ["generatewacz", "generateWacz"],
describe: "If set, generate wacz",
type: "boolean",
default: false,
@ -464,7 +466,7 @@ class Crawler {
child_process.spawnSync("wb-manager", ["reindex", this.params.collection], {stdio: "inherit", cwd: this.params.cwd});
}
if (this.params.generateWACZ) {
if (this.params.generateWACZ || this.params.generateWacz || this.params.generatewacz ) {
console.log("Generating WACZ");
const archiveDir = path.join(this.collDir, "archive");
@ -476,7 +478,7 @@ class Crawler {
const waczFilename = this.params.collection.concat(".wacz");
const waczPath = path.join(this.collDir, waczFilename);
const argument_list = ["create", "-o", waczPath, "--pages", this.pagesFile, "-f"];
warcFileList.forEach((val, index) => argument_list.push(path.join(archiveDir, val)));
warcFileList.forEach((val, index) => argument_list.push(path.join(archiveDir, val))); // eslint-disable-line no-unused-vars
// Run the wacz create command
child_process.spawnSync("wacz" , argument_list);
@ -557,7 +559,6 @@ class Crawler {
writePage(url, title){
const id = uuidv4();
const today = new Date();
const row = {"id": id, "url": url, "title": title};
const processedRow = JSON.stringify(row).concat("\n");
try {

View file

@ -11,11 +11,11 @@
"puppeteer-cluster": "^0.22.0",
"puppeteer-core": "^5.3.1",
"sitemapper": "^3.1.2",
"yargs": "^16.0.3",
"uuid": "8.3.2"
},
"uuid": "8.3.2",
"yargs": "^16.0.3"
},
"devDependencies": {
"eslint": "^7.19.0",
"eslint": "^7.20.0",
"eslint-plugin-react": "^7.22.0"
}
}

2461
yarn.lock

File diff suppressed because it is too large Load diff