mirror of
https://github.com/openzim/zimit.git
synced 2025-12-31 04:23:15 +00:00
Added --overwrite flag to zimit
This commit is contained in:
parent
34ce7eb98d
commit
81018f06fa
6 changed files with 110 additions and 2 deletions
|
|
@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- Added `--overwrite` flag to overwrite existing ZIM file if it exists (#399)
|
||||
|
||||
### Changed
|
||||
- Fix issues preventing interrupted crawls from being resumed. (#499)
|
||||
- Ensure build directory is used explicitly instead of a randomized subdirectory when passed, and pre-create it if it does not exist.
|
||||
|
|
|
|||
|
|
@ -970,6 +970,12 @@
|
|||
"alias": "name",
|
||||
"pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+)$",
|
||||
"relaxedPattern": "^[A-Za-z0-9._-]+$"
|
||||
},
|
||||
"overwrite": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Overwrite",
|
||||
"description": "Whether to overwrite existing ZIM file if it exists"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -849,6 +849,9 @@ def run(raw_args):
|
|||
warc2zim_args.append("--lang")
|
||||
warc2zim_args.append(known_args.zim_lang)
|
||||
|
||||
if known_args.overwrite:
|
||||
warc2zim_args.append("--overwrite")
|
||||
|
||||
logger.info("----------")
|
||||
logger.info("Testing warc2zim args")
|
||||
logger.info("Running: warc2zim " + " ".join(warc2zim_args))
|
||||
|
|
@ -1036,7 +1039,6 @@ def run(raw_args):
|
|||
warc_files.append(Path(extract_path))
|
||||
|
||||
else:
|
||||
|
||||
logger.info(f"Running browsertrix-crawler crawl: {cmd_line}")
|
||||
crawl = subprocess.run(crawler_args, check=False)
|
||||
if (
|
||||
|
|
@ -1091,7 +1093,7 @@ def run(raw_args):
|
|||
logger.info("----------")
|
||||
logger.info(
|
||||
f"Processing WARC files in/at "
|
||||
f'{" ".join(str(warc_file) for warc_file in warc_files)}'
|
||||
f"{' '.join(str(warc_file) for warc_file in warc_files)}"
|
||||
)
|
||||
warc2zim_args.extend(str(warc_file) for warc_file in warc_files)
|
||||
|
||||
|
|
|
|||
14
tests/conftest.py
Normal file
14
tests/conftest.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import pytest
|
||||
|
||||
from zimit import zimit as app
|
||||
|
||||
"""
|
||||
cleanup disabled because atexit hooks run at the very end of the Python process
|
||||
shutdown. By the time cleanup() is called, the logging module has already closed its
|
||||
file streams.
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def disable_zimit_cleanup(monkeypatch):
|
||||
monkeypatch.setattr(app, "cleanup", lambda: None)
|
||||
BIN
tests/data/example-response.warc
Normal file
BIN
tests/data/example-response.warc
Normal file
Binary file not shown.
83
tests/test_overwrite.py
Normal file
83
tests/test_overwrite.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
from zimit.zimit import run
|
||||
|
||||
TEST_DATA_DIR = pathlib.Path(__file__).parent / "data"
|
||||
|
||||
|
||||
def test_overwrite_flag_behaviour(tmp_path):
|
||||
zim_output = "overwrite-test.zim"
|
||||
output_path = tmp_path / zim_output
|
||||
|
||||
# 1st run → creates file
|
||||
result = run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--warcs",
|
||||
str(TEST_DATA_DIR / "example-response.warc"),
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
]
|
||||
)
|
||||
assert result in (None, 100)
|
||||
assert output_path.exists()
|
||||
|
||||
# 2nd run, no overwrite → should fail
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--warcs",
|
||||
str(TEST_DATA_DIR / "example-response.warc"),
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
]
|
||||
)
|
||||
assert exc.value.code == 2
|
||||
|
||||
# 2nd run, no overwrite → should fail
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
]
|
||||
)
|
||||
assert exc.value.code == 2
|
||||
|
||||
# 3rd run, with overwrite → should succeed
|
||||
result = run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--warcs",
|
||||
str(TEST_DATA_DIR / "example-response.warc"),
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
"--overwrite",
|
||||
]
|
||||
)
|
||||
assert result in (None, 100)
|
||||
assert output_path.exists()
|
||||
Loading…
Add table
Add a link
Reference in a new issue