mirror of
https://github.com/openzim/zimit.git
synced 2025-12-31 04:23:15 +00:00
new crawler folder structure
This commit is contained in:
parent
03abf6050a
commit
2e9c129523
2 changed files with 3 additions and 1 deletions
|
|
@ -4,6 +4,8 @@
|
|||
- Allows setting combinations of values for waitUntil param
|
||||
- Updated warc2zim to 1.3.5
|
||||
- Updated browsertrix-crawler to 0.3.1
|
||||
- Warc to zim now written to `{temp_root_dir}/collections/capture-*/archive/` where
|
||||
`capture-*` is dynamic and includes the datetime. (from browsertrix-crawler)
|
||||
|
||||
# 1.1.3
|
||||
|
||||
|
|
|
|||
2
zimit.py
2
zimit.py
|
|
@ -279,7 +279,7 @@ def zimit(args=None):
|
|||
print(f"Running browsertrix-crawler crawl: {cmd_line}", flush=True)
|
||||
subprocess.run(cmd_args, check=True)
|
||||
|
||||
warc_files = temp_root_dir / "collections" / "capture" / "archive"
|
||||
warc_files = list(temp_root_dir.rglob("collections/capture-*/archive/"))[-1]
|
||||
warc2zim_args.append(str(warc_files))
|
||||
|
||||
num_files = sum(1 for e in warc_files.iterdir())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue