use same waitUntil defaults as current crawler

This commit is contained in:
renaud gaudin 2021-03-04 10:40:12 +00:00
parent 14fc8ffe0f
commit f746f7b020
2 changed files with 15 additions and 3 deletions

View file

@ -1,3 +1,8 @@
# 1.1.4
- Defaults to `load,networkidle0` for waitUntil param (same as crawler)
- Allows setting combinations of values for waitUntil param
# 1.1.3
- allows same first-level-domain redirects

View file

@ -9,6 +9,7 @@ and then calls the Node based driver
"""
import re
import itertools
from argparse import ArgumentParser
import tempfile
import subprocess
@ -22,6 +23,7 @@ from multiprocessing import Process
from warc2zim.main import warc2zim
import requests
import inotify
import inotify.adapters
from tld import get_fld
@ -113,6 +115,10 @@ class ProgressFileWatcher:
def zimit(args=None):
wait_until_options = ["load", "domcontentloaded", "networkidle0", "networkidle2"]
wait_until_all = wait_until_options + [
f"{a},{b}" for a, b in itertools.combinations(wait_until_options, 2)
]
parser = ArgumentParser(
description="Run a browser-based crawl on the specified URL and convert to ZIM"
)
@ -130,9 +136,10 @@ def zimit(args=None):
parser.add_argument(
"--waitUntil",
help="Puppeteer page.goto() condition to wait for before continuing",
choices=["load", "domcontentloaded", "networkidle0", "networkidle2"],
default="load",
help="Puppeteer page.goto() condition to wait for before continuing. One of "
f"{wait_until_options} or a comma-separated combination of those.",
choices=wait_until_all,
default="load,networkidle0",
)
parser.add_argument(