mirror of
https://github.com/openzim/zimit.git
synced 2025-12-31 04:23:15 +00:00
Fixed #191: --lang to crawler, --zim-lang to warc2zim
This commit is contained in:
parent
941db5fdfc
commit
61dc792653
2 changed files with 19 additions and 0 deletions
|
|
@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
- `--title` to set ZIM title
|
||||
- `--description` to set ZIM description
|
||||
- New crawler options: `--maxPageLimit`, `--delay`, `--diskUtilization`
|
||||
- `--zim-lang` param to set warc2zim's `--lang` (ISO-639-3)
|
||||
|
||||
### Changed
|
||||
|
||||
|
|
@ -20,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
- Using `main` warc2zim ⚠️ change before releasing!
|
||||
- Disabled Chrome updates to prevent incidental inclusion of update data in WARC/ZIM (#172)
|
||||
- `--failOnFailedSeed` used inconditionally
|
||||
- `--lang` now passed to crawler (ISO-639-1)
|
||||
|
||||
### Removed
|
||||
|
||||
|
|
|
|||
17
zimit.py
17
zimit.py
|
|
@ -205,6 +205,18 @@ def zimit(args=None):
|
|||
action="store_true",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lang",
|
||||
help="if set, sets the language used by the browser, should be ISO 639 language[-country] code",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--zim-lang",
|
||||
help="Language metadata of ZIM "
|
||||
"(warc2zim --lang param). ISO-639-3 code. "
|
||||
"Retrieved from homepage if found, fallback to `eng`",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--mobileDevice",
|
||||
help="Emulate mobile device by name from "
|
||||
|
|
@ -348,6 +360,10 @@ def zimit(args=None):
|
|||
warc2zim_args.append("--description")
|
||||
warc2zim_args.append(zimit_args.description)
|
||||
|
||||
if zimit_args.zim_lang:
|
||||
warc2zim_args.append("--lang")
|
||||
warc2zim_args.append(zimit_args.zim_lang)
|
||||
|
||||
print("----------")
|
||||
print("Testing warc2zim args")
|
||||
print("Running: warc2zim " + " ".join(warc2zim_args), flush=True)
|
||||
|
|
@ -482,6 +498,7 @@ def get_node_cmd_line(args):
|
|||
"exclude",
|
||||
"collection",
|
||||
"allowHashUrls",
|
||||
"lang",
|
||||
"mobileDevice",
|
||||
"userAgent",
|
||||
"useSitemap",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue