mirror of
https://github.com/openzim/zimit.git
synced 2025-12-31 04:23:15 +00:00
Compare commits
28 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a7e236f0d7 | ||
|
|
81018f06fa | ||
|
|
34ce7eb98d | ||
|
|
5bb068ffea | ||
|
|
aec19d95d2 | ||
|
|
277473884e | ||
|
|
e30a82a91c | ||
|
|
ef004f3863 | ||
|
|
6db73a0a83 | ||
|
|
57a88434e2 | ||
|
|
4595d2a302 | ||
|
|
611d2033f7 | ||
|
|
00845293d6 | ||
|
|
44cf4218cb | ||
|
|
6b520318a2 | ||
|
|
a9805c84c2 | ||
|
|
8630b87a1f | ||
|
|
ad09665c4a | ||
|
|
1d2069a66b | ||
|
|
4ec47cd6dd | ||
|
|
b60dd388e7 | ||
|
|
5624cbf081 | ||
|
|
8c471d9ee2 | ||
|
|
009b8b4bd6 | ||
|
|
0c795b0051 | ||
|
|
b5d87198d8 | ||
|
|
511c3a5021 | ||
|
|
3421ca0212 |
11 changed files with 1158 additions and 12 deletions
45
.github/workflows/update-zim-offliner-definition.yaml
vendored
Normal file
45
.github/workflows/update-zim-offliner-definition.yaml
vendored
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
name: Update ZIMFarm Definitions
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "offliner-definition.json"
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: "Version to publish"
|
||||
required: false
|
||||
default: "dev"
|
||||
|
||||
jobs:
|
||||
prepare-json:
|
||||
runs-on: ubuntu-24.04
|
||||
outputs:
|
||||
offliner_definition_b64: ${{ steps.read-json.outputs.offliner_definition_b64 }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- id: read-json
|
||||
run: |
|
||||
if [ ! -f "offliner-definition.json" ]; then
|
||||
echo "File not found!" >&2
|
||||
exit 1
|
||||
fi
|
||||
json_b64=$(base64 -w0 <<< "$(jq -c . offliner-definition.json)")
|
||||
echo "offliner_definition_b64=$json_b64" >> $GITHUB_OUTPUT
|
||||
call-workflow:
|
||||
needs: prepare-json
|
||||
uses: openzim/overview/.github/workflows/update-zimfarm-offliner-definition.yaml@main
|
||||
with:
|
||||
version: ${{ github.event_name == 'release' && github.event.release.tag_name || (github.event.inputs.version || 'dev') }}
|
||||
offliner: zimit
|
||||
offliner_definition_b64: ${{ needs.prepare-json.outputs.offliner_definition_b64 }}
|
||||
secrets:
|
||||
zimfarm_ci_secret: ${{ secrets.ZIMFARM_CI_SECRET }}
|
||||
17
CHANGELOG.md
17
CHANGELOG.md
|
|
@ -5,6 +5,23 @@ All notable changes to this project are documented in this file.
|
|||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.2.0).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- Added `--overwrite` flag to overwrite existing ZIM file if it exists (#399)
|
||||
|
||||
### Changed
|
||||
- Fix issues preventing interrupted crawls from being resumed. (#499)
|
||||
- Ensure build directory is used explicitly instead of a randomized subdirectory when passed, and pre-create it if it does not exist.
|
||||
- Use all warc_dirs found instead of just the latest so interrupted crawls use all collected pages across runs when an explicit collections directory is not passed.
|
||||
- Don't cleanup an explicitly passed build directory.
|
||||
|
||||
## [3.0.5] - 2024-04-11
|
||||
|
||||
### Changed
|
||||
|
||||
- Upgrade to browsertrix crawler 1.6.0 (#493)
|
||||
|
||||
## [3.0.4] - 2024-04-04
|
||||
|
||||
### Changed
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM webrecorder/browsertrix-crawler:1.5.9
|
||||
FROM webrecorder/browsertrix-crawler:1.6.0
|
||||
LABEL org.opencontainers.image.source=https://github.com/openzim/zimit
|
||||
|
||||
# add deadsnakes ppa for latest Python on Ubuntu
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
Zimit
|
||||
=====
|
||||
|
||||
Zimit is a scraper allowing to create ZIM file from any Web site.
|
||||
Zimit is a scraper allowing to create [ZIM file](https://en.wikipedia.org/wiki/ZIM_(file_format)) from any Web site.
|
||||
|
||||
[](https://www.codefactor.io/repository/github/openzim/zimit)
|
||||
[](https://www.gnu.org/licenses/gpl-3.0)
|
||||
|
|
|
|||
981
offliner-definition.json
Normal file
981
offliner-definition.json
Normal file
|
|
@ -0,0 +1,981 @@
|
|||
{
|
||||
"offliner_id": "zimit",
|
||||
"stdOutput": true,
|
||||
"stdStats": "zimit-progress-file",
|
||||
"flags": {
|
||||
"seeds": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Seeds",
|
||||
"description": "The seed URL(s) to start crawling from. Multile seed URL must be separated by a comma (usually not needed, these are just the crawl seeds). First seed URL is used as ZIM homepage"
|
||||
},
|
||||
"seed_file": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Seed File",
|
||||
"description": "If set, read a list of seed urls, one per line. HTTPS URL to an online file."
|
||||
},
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Browser Language",
|
||||
"description": "If set, sets the language used by the browser, should be ISO 639 language[-country] code, e.g. `en` or `en-GB`"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Title",
|
||||
"description": "Custom title for your ZIM. Defaults to title of main page",
|
||||
"minLength": 1,
|
||||
"maxLength": 30
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Description",
|
||||
"description": "Description for ZIM",
|
||||
"minLength": 1,
|
||||
"maxLength": 80
|
||||
},
|
||||
"favicon": {
|
||||
"type": "blob",
|
||||
"kind": "image",
|
||||
"required": false,
|
||||
"title": "Illustration",
|
||||
"description": "URL for Illustration. "
|
||||
},
|
||||
"tags": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "ZIM Tags",
|
||||
"description": "Single string with individual tags separated by a semicolon."
|
||||
},
|
||||
"creator": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Creator",
|
||||
"description": "Name of content creator"
|
||||
},
|
||||
"publisher": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Publisher",
|
||||
"isPublisher": true,
|
||||
"description": "Custom publisher name (ZIM metadata). openZIM otherwise"
|
||||
},
|
||||
"source": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Source",
|
||||
"description": "Source name/URL of content"
|
||||
},
|
||||
"workers": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Workers",
|
||||
"description": "The number of workers to run in parallel. Defaults to 1",
|
||||
"min": 1
|
||||
},
|
||||
"wait_until": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "WaitUntil",
|
||||
"description": "Puppeteer page.goto() condition to wait for before continuing. One of load, domcontentloaded, networkidle0 or networkidle2, or a comma-separated combination of those. Default is load,networkidle2"
|
||||
},
|
||||
"extra_hops": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Extra Hops",
|
||||
"description": "Number of extra 'hops' to follow, beyond the current scope. Default is 0",
|
||||
"min": 0
|
||||
},
|
||||
"page_limit": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Page Limit",
|
||||
"description": "Limit crawl to this number of pages. Default is 0 (no-limit).",
|
||||
"min": 0
|
||||
},
|
||||
"max_page_limit": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Max Page Limit",
|
||||
"description": "Maximum pages to crawl, overriding pageLimit if both are set. Default is 0 (no-limit)",
|
||||
"min": 0
|
||||
},
|
||||
"page_load_timeout": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Page Load Timeout",
|
||||
"description": "Timeout for each page to load (in seconds). Default is 90",
|
||||
"min": 0
|
||||
},
|
||||
"scope_type": {
|
||||
"type": "string-enum",
|
||||
"required": false,
|
||||
"title": "Scope Type",
|
||||
"description": "A predfined scope of the crawl. For more customization, use 'custom' and set scopeIncludeRx/scopeExcludeRx regexes. Default is custom if scopeIncludeRx is set, prefix otherwise.",
|
||||
"choices": [
|
||||
{
|
||||
"title": "Page",
|
||||
"value": "page"
|
||||
},
|
||||
{
|
||||
"title": "Page SPA",
|
||||
"value": "page-spa"
|
||||
},
|
||||
{
|
||||
"title": "Prefix",
|
||||
"value": "prefix"
|
||||
},
|
||||
{
|
||||
"title": "Host",
|
||||
"value": "host"
|
||||
},
|
||||
{
|
||||
"title": "Domain",
|
||||
"value": "domain"
|
||||
},
|
||||
{
|
||||
"title": "Any",
|
||||
"value": "any"
|
||||
},
|
||||
{
|
||||
"title": "Custom",
|
||||
"value": "custom"
|
||||
}
|
||||
]
|
||||
},
|
||||
"scope_include_rx": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Scope Include Regex",
|
||||
"description": "Regex of page URLs that should be included in the crawl (defaults to the immediate directory of seed)"
|
||||
},
|
||||
"scope_exclude_rx": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Scope Exclude Regex",
|
||||
"description": "Regex of page URLs that should be excluded from the crawl"
|
||||
},
|
||||
"allow_hash_urls": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Allow Hashtag URLs",
|
||||
"description": "Allow Hashtag URLs, useful for single-page-application crawling or when different hashtags load dynamic content"
|
||||
},
|
||||
"mobile_device": {
|
||||
"type": "string-enum",
|
||||
"required": false,
|
||||
"title": "As device",
|
||||
"description": "Device to crawl as. See Pupeeter's Device.ts for a list",
|
||||
"choices": [
|
||||
{
|
||||
"title": "Blackberry Playbook",
|
||||
"value": "Blackberry PlayBook"
|
||||
},
|
||||
{
|
||||
"title": "Blackberry Playbook Landscape",
|
||||
"value": "Blackberry PlayBook landscape"
|
||||
},
|
||||
{
|
||||
"title": "Blackberry Z30",
|
||||
"value": "BlackBerry Z30"
|
||||
},
|
||||
{
|
||||
"title": "Blackberry Z30 Landscape",
|
||||
"value": "BlackBerry Z30 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy Note 3",
|
||||
"value": "Galaxy Note 3"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy Note 3 Landscape",
|
||||
"value": "Galaxy Note 3 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy Note II",
|
||||
"value": "Galaxy Note II"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy Note II Landscape",
|
||||
"value": "Galaxy Note II landscape"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S III",
|
||||
"value": "Galaxy S III"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S III Landscape",
|
||||
"value": "Galaxy S III landscape"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S5",
|
||||
"value": "Galaxy S5"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S5 Landscape",
|
||||
"value": "Galaxy S5 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S8",
|
||||
"value": "Galaxy S8"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S8 Landscape",
|
||||
"value": "Galaxy S8 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S9 Plus",
|
||||
"value": "Galaxy S9+"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy S9 Plus Landscape",
|
||||
"value": "Galaxy S9+ landscape"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy Tab S4",
|
||||
"value": "Galaxy Tab S4"
|
||||
},
|
||||
{
|
||||
"title": "Galaxy Tab S4 Landscape",
|
||||
"value": "Galaxy Tab S4 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPad",
|
||||
"value": "iPad"
|
||||
},
|
||||
{
|
||||
"title": "iPad Landscape",
|
||||
"value": "iPad landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPad Gen 6",
|
||||
"value": "iPad (gen 6)"
|
||||
},
|
||||
{
|
||||
"title": "iPad Gen 6 Landscape",
|
||||
"value": "iPad (gen 6) landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPad Gen 7",
|
||||
"value": "iPad (gen 7)"
|
||||
},
|
||||
{
|
||||
"title": "iPad Gen 7 Landscape",
|
||||
"value": "iPad (gen 7) landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPad Mini",
|
||||
"value": "iPad Mini"
|
||||
},
|
||||
{
|
||||
"title": "iPad Mini Landscape",
|
||||
"value": "iPad Mini landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPad Pro",
|
||||
"value": "iPad Pro"
|
||||
},
|
||||
{
|
||||
"title": "iPad Pro Landscape",
|
||||
"value": "iPad Pro landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPad Pro 11",
|
||||
"value": "iPad Pro 11"
|
||||
},
|
||||
{
|
||||
"title": "iPad Pro 11 Landscape",
|
||||
"value": "iPad Pro 11 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 4",
|
||||
"value": "iPhone 4"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 4 Landscape",
|
||||
"value": "iPhone 4 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 5",
|
||||
"value": "iPhone 5"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 5 Landscape",
|
||||
"value": "iPhone 5 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 6",
|
||||
"value": "iPhone 6"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 6 Landscape",
|
||||
"value": "iPhone 6 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 6 Plus",
|
||||
"value": "iPhone 6 Plus"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 6 Plus Landscape",
|
||||
"value": "iPhone 6 Plus landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 7",
|
||||
"value": "iPhone 7"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 7 Landscape",
|
||||
"value": "iPhone 7 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 7 Plus",
|
||||
"value": "iPhone 7 Plus"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 7 Plus Landscape",
|
||||
"value": "iPhone 7 Plus landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 8",
|
||||
"value": "iPhone 8"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 8 Landscape",
|
||||
"value": "iPhone 8 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 8 Plus",
|
||||
"value": "iPhone 8 Plus"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 8 Plus Landscape",
|
||||
"value": "iPhone 8 Plus landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone SE",
|
||||
"value": "iPhone SE"
|
||||
},
|
||||
{
|
||||
"title": "iPhone SE Landscape",
|
||||
"value": "iPhone SE landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone X",
|
||||
"value": "iPhone X"
|
||||
},
|
||||
{
|
||||
"title": "iPhone X Landscape",
|
||||
"value": "iPhone X landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone XR",
|
||||
"value": "iPhone XR"
|
||||
},
|
||||
{
|
||||
"title": "iPhone XR Landscape",
|
||||
"value": "iPhone XR landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 11",
|
||||
"value": "iPhone 11"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 11 Landscape",
|
||||
"value": "iPhone 11 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 11 Pro",
|
||||
"value": "iPhone 11 Pro"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 11 Pro Landscape",
|
||||
"value": "iPhone 11 Pro landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 11 Pro Max",
|
||||
"value": "iPhone 11 Pro Max"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 11 Pro Max Landscape",
|
||||
"value": "iPhone 11 Pro Max landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12",
|
||||
"value": "iPhone 12"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12 Landscape",
|
||||
"value": "iPhone 12 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12 Pro",
|
||||
"value": "iPhone 12 Pro"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12 Pro Landscape",
|
||||
"value": "iPhone 12 Pro landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12 Pro Max",
|
||||
"value": "iPhone 12 Pro Max"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12 Pro Max Landscape",
|
||||
"value": "iPhone 12 Pro Max landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12 Mini",
|
||||
"value": "iPhone 12 Mini"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 12 Mini Landscape",
|
||||
"value": "iPhone 12 Mini landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13",
|
||||
"value": "iPhone 13"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13 Landscape",
|
||||
"value": "iPhone 13 landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13 Pro",
|
||||
"value": "iPhone 13 Pro"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13 Pro Landscape",
|
||||
"value": "iPhone 13 Pro landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13 Pro Max",
|
||||
"value": "iPhone 13 Pro Max"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13 Pro Max Landscape",
|
||||
"value": "iPhone 13 Pro Max landscape"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13 Mini",
|
||||
"value": "iPhone 13 Mini"
|
||||
},
|
||||
{
|
||||
"title": "iPhone 13 Mini Landscape",
|
||||
"value": "iPhone 13 Mini landscape"
|
||||
},
|
||||
{
|
||||
"title": "Jio Phone 2",
|
||||
"value": "JioPhone 2"
|
||||
},
|
||||
{
|
||||
"title": "Jio Phone 2 Landscape",
|
||||
"value": "JioPhone 2 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Kindle Fire HDX",
|
||||
"value": "Kindle Fire HDX"
|
||||
},
|
||||
{
|
||||
"title": "Kindle Fire HDX Landscape",
|
||||
"value": "Kindle Fire HDX landscape"
|
||||
},
|
||||
{
|
||||
"title": "LG Optimus L70",
|
||||
"value": "LG Optimus L70"
|
||||
},
|
||||
{
|
||||
"title": "LG Optimus L70 Landscape",
|
||||
"value": "LG Optimus L70 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Microsoft Lumia 550",
|
||||
"value": "Microsoft Lumia 550"
|
||||
},
|
||||
{
|
||||
"title": "Microsoft Lumia 950",
|
||||
"value": "Microsoft Lumia 950"
|
||||
},
|
||||
{
|
||||
"title": "Microsoft Lumia 950 Landscape",
|
||||
"value": "Microsoft Lumia 950 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 10",
|
||||
"value": "Nexus 10"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 10 Landscape",
|
||||
"value": "Nexus 10 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 4",
|
||||
"value": "Nexus 4"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 4 Landscape",
|
||||
"value": "Nexus 4 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 5",
|
||||
"value": "Nexus 5"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 5 Landscape",
|
||||
"value": "Nexus 5 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 5X",
|
||||
"value": "Nexus 5X"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 5X Landscape",
|
||||
"value": "Nexus 5X landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 6",
|
||||
"value": "Nexus 6"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 6 Landscape",
|
||||
"value": "Nexus 6 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 6P",
|
||||
"value": "Nexus 6P"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 6P Landscape",
|
||||
"value": "Nexus 6P landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 7",
|
||||
"value": "Nexus 7"
|
||||
},
|
||||
{
|
||||
"title": "Nexus 7 Landscape",
|
||||
"value": "Nexus 7 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nokia Lumia 520",
|
||||
"value": "Nokia Lumia 520"
|
||||
},
|
||||
{
|
||||
"title": "Nokia Lumia 520 Landscape",
|
||||
"value": "Nokia Lumia 520 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Nokia N9",
|
||||
"value": "Nokia N9"
|
||||
},
|
||||
{
|
||||
"title": "Nokia N9 Landscape",
|
||||
"value": "Nokia N9 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 2",
|
||||
"value": "Pixel 2"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 2 Landscape",
|
||||
"value": "Pixel 2 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 2 XL",
|
||||
"value": "Pixel 2 XL"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 2 XL Landscape",
|
||||
"value": "Pixel 2 XL landscape"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 3",
|
||||
"value": "Pixel 3"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 3 Landscape",
|
||||
"value": "Pixel 3 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 4",
|
||||
"value": "Pixel 4"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 4 Landscape",
|
||||
"value": "Pixel 4 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 4A 5G",
|
||||
"value": "Pixel 4a (5G)"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 4A 5G Landscape",
|
||||
"value": "Pixel 4a (5G) landscape"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 5",
|
||||
"value": "Pixel 5"
|
||||
},
|
||||
{
|
||||
"title": "Pixel 5 Landscape",
|
||||
"value": "Pixel 5 landscape"
|
||||
},
|
||||
{
|
||||
"title": "Moto G4",
|
||||
"value": "Moto G4"
|
||||
},
|
||||
{
|
||||
"title": "Moto G4 Landscape",
|
||||
"value": "Moto G4 landscape"
|
||||
}
|
||||
]
|
||||
},
|
||||
"select_links": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Select Links",
|
||||
"description": "One or more selectors for extracting links, in the format [css selector]->[property to use],[css selector]->@[attribute to use]"
|
||||
},
|
||||
"click_selector": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Click Selector",
|
||||
"description": "Selector for elements to click when using the autoclick behavior. Default is 'a'"
|
||||
},
|
||||
"block_rules": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Block Rules",
|
||||
"description": "Additional rules for blocking certain URLs from being loaded, by URL regex and optionally via text match in an iframe"
|
||||
},
|
||||
"block_message": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Block Message",
|
||||
"description": "If specified, when a URL is blocked, a record with this error message is added instead"
|
||||
},
|
||||
"block_ads": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Block Ads",
|
||||
"description": "If set, block advertisements from being loaded (based on Stephen Black's blocklist). Note that some bad domains are also blocked by zimit configuration even if this option is not set."
|
||||
},
|
||||
"ad_block_message": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Ads Block Message",
|
||||
"description": "If specified, when an ad is blocked, a record with this error message is added instead"
|
||||
},
|
||||
"user_agent": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "User Agent",
|
||||
"description": "Override user-agent with specified"
|
||||
},
|
||||
"user_agent_suffix": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "User Agent Suffix",
|
||||
"description": "Append suffix to existing browser user-agent. Defaults to +Zimit"
|
||||
},
|
||||
"use_sitemap": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Sitemap URL",
|
||||
"description": "Use as sitemap to get additional URLs for the crawl (usually at /sitemap.xml)"
|
||||
},
|
||||
"sitemap_from_date": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Sitemap From Date",
|
||||
"description": "If set, filter URLs from sitemaps to those greater than or equal to (>=) provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)"
|
||||
},
|
||||
"sitemap_to_date": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Sitemap To Date",
|
||||
"description": "If set, filter URLs from sitemaps to those less than or equal to (<=) provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)"
|
||||
},
|
||||
"behavior_timeout": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Behavior Timeout",
|
||||
"description": "If >0, timeout (in seconds) for in-page behavior will run on each page. If 0, a behavior can run until finish. Default is 90.",
|
||||
"min": 0
|
||||
},
|
||||
"post_load_delay": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Post Load Delay",
|
||||
"description": "If >0, amount of time to sleep (in seconds) after page has loaded, before taking screenshots / getting text / running behaviors. Default is 0.",
|
||||
"min": 0
|
||||
},
|
||||
"page_extra_delay": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Page Extra Delay",
|
||||
"description": "If >0, amount of time to sleep (in seconds) after behaviors before moving on to next page. Default is 0.",
|
||||
"min": 0
|
||||
},
|
||||
"dedup_policy": {
|
||||
"type": "string-enum",
|
||||
"required": false,
|
||||
"title": "Dedup Policy",
|
||||
"description": "Deduplication policy. One of skip, revisit or keep. Default is skip",
|
||||
"choices": [
|
||||
{
|
||||
"title": "Skip",
|
||||
"value": "skip"
|
||||
},
|
||||
{
|
||||
"title": "Revisit",
|
||||
"value": "revisit"
|
||||
},
|
||||
{
|
||||
"title": "Keep",
|
||||
"value": "keep"
|
||||
}
|
||||
]
|
||||
},
|
||||
"screenshot": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Screenshot",
|
||||
"description": "Screenshot options for crawler. One of view, thumbnail, fullPage, fullPageFinal or a comma-separated combination of those."
|
||||
},
|
||||
"size_soft_limit": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Size Soft Limit",
|
||||
"description": "If set, save crawl state and stop crawl if WARC size exceeds this value. ZIM will still be created.",
|
||||
"min": 0
|
||||
},
|
||||
"size_hard_limit": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Size Hard Limit",
|
||||
"description": "If set, exit crawler and fail the scraper immediately if WARC size exceeds this value",
|
||||
"min": 0
|
||||
},
|
||||
"disk_utilization": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Disk Utilization",
|
||||
"description": "Save state and exit if disk utilization exceeds this percentage value. Default (if not set) is 90%. Set to 0 to disable disk utilization check.",
|
||||
"min": 0
|
||||
},
|
||||
"time_soft_limit": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Time Soft Limit",
|
||||
"description": "If set, save crawl state and stop crawl if WARC(s) creation takes longer than this value, in seconds. ZIM will still be created.",
|
||||
"min": 0
|
||||
},
|
||||
"time_hard_limit": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Time Hard Limit",
|
||||
"description": "If set, exit crawler and fail the scraper immediately if WARC(s) creation takes longer than this value, in seconds",
|
||||
"min": 0
|
||||
},
|
||||
"net_idle_wait": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Net Idle Wait",
|
||||
"description": "If set, wait for network idle after page load and after behaviors are done (in seconds). If -1 (default), determine based on scope."
|
||||
},
|
||||
"origin_override": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Origin Override",
|
||||
"description": "If set, will redirect requests from each origin in key to origin in the value, eg. https://host:port=http://alt-host:alt-port."
|
||||
},
|
||||
"max_page_retries": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Max Page Retries",
|
||||
"description": "If set, number of times to retry a page that failed to load before page is considered to have failed. Default is 2.",
|
||||
"min": 0
|
||||
},
|
||||
"fail_on_failed_seed": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Fail on failed seed",
|
||||
"description": "Whether to display additional logs"
|
||||
},
|
||||
"fail_on_invalid_status": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Fail on invalid status",
|
||||
"description": "If set, will treat pages with 4xx or 5xx response as failures. When combined with --failOnFailedLimit or --failOnFailedSeed may result in crawl failing due to non-200 responses"
|
||||
},
|
||||
"fail_on_failed_limit": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Fail on failed - Limit",
|
||||
"description": "If set, save state and exit if number of failed pages exceeds this value.",
|
||||
"min": 0
|
||||
},
|
||||
"warcs": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "WARC files",
|
||||
"description": "Comma-separated list of WARC files to use as input."
|
||||
},
|
||||
"verbose": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Verbose mode",
|
||||
"description": "Whether to display additional logs"
|
||||
},
|
||||
"keep": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Keep",
|
||||
"description": "Should be True. Developer option: must be True if we want to keep the WARC files for artifacts archiving.",
|
||||
"default": true
|
||||
},
|
||||
"output": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Output folder",
|
||||
"description": "Output folder for ZIM file(s). Leave it as `/output`",
|
||||
"pattern": "^/output$"
|
||||
},
|
||||
"admin_email": {
|
||||
"type": "email",
|
||||
"required": false,
|
||||
"title": "Admin Email",
|
||||
"description": "Admin Email for crawler: used in UserAgent so website admin can contact us",
|
||||
"default": "contact+zimfarm@kiwix.org"
|
||||
},
|
||||
"profile": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Browser profile",
|
||||
"description": "Path or HTTP(S) URL to tar.gz file which contains the browser profile directory for Browsertrix crawler."
|
||||
},
|
||||
"behaviors": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Behaviors",
|
||||
"description": "Which background behaviors to enable on each page. Defaults to autoplay,autofetch,siteSpecific."
|
||||
},
|
||||
"depth": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Depth",
|
||||
"description": "The depth of the crawl for all seeds. Default is -1 (infinite).",
|
||||
"min": -1
|
||||
},
|
||||
"zim_lang": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "ZIM Language",
|
||||
"description": "Language metadata of ZIM (warc2zim --lang param). ISO-639-3 code. Retrieved from homepage if found, fallback to `eng`",
|
||||
"alias": "zim-lang",
|
||||
"customValidator": "language_code"
|
||||
},
|
||||
"long_description": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Long description",
|
||||
"description": "Optional long description for your ZIM",
|
||||
"minLength": 1,
|
||||
"maxLength": 4000,
|
||||
"alias": "long-description"
|
||||
},
|
||||
"custom_css": {
|
||||
"type": "blob",
|
||||
"kind": "css",
|
||||
"required": false,
|
||||
"title": "Custom CSS",
|
||||
"description": "URL to a CSS file to inject into pages",
|
||||
"alias": "custom-css"
|
||||
},
|
||||
"charsets_to_try": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Charsets to try",
|
||||
"description": "List of charsets to try decode content when charset is not found",
|
||||
"alias": "charsets-to-try"
|
||||
},
|
||||
"ignore_content_header_charsets": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Ignore Content Header Charsets",
|
||||
"description": "Ignore the charsets specified in content headers - first bytes - typically because they are wrong.",
|
||||
"alias": "ignore-content-header-charsets"
|
||||
},
|
||||
"content_header_bytes_length": {
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"title": "Content Header Bytes Length",
|
||||
"description": "How many bytes to consider when searching for content charsets in header (default is 1024).",
|
||||
"alias": "content-header-bytes-length",
|
||||
"min": 0
|
||||
},
|
||||
"ignore_http_header_charsets": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Ignore HTTP Header Charsets",
|
||||
"description": "Ignore the charsets specified in HTTP `Content-Type` headers, typically because they are wrong.",
|
||||
"alias": "ignore-http-header-charsets"
|
||||
},
|
||||
"encoding_aliases": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Encoding Aliases",
|
||||
"description": "List of encoding/charset aliases to decode WARC content. Aliases are used when the encoding specified in upstream server exists in Python under a different name. This parameter is single string, multiple values are separated by a comma, like in alias1=encoding1,alias2=encoding2.",
|
||||
"alias": "encoding-aliases"
|
||||
},
|
||||
"custom_behaviors": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Custom Behaviors",
|
||||
"description": "JS code for custom behaviors to customize crawler. Single string with individual JS files URL/path separated by a comma.",
|
||||
"alias": "custom-behaviours"
|
||||
},
|
||||
"zimit_progress_file": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "Zimit Progress File",
|
||||
"description": "Scraping progress file. Leave it as `/output/task_progress.json`",
|
||||
"alias": "zimit-progress-file",
|
||||
"pattern": "^/output/task_progress\\.json$"
|
||||
},
|
||||
"replay_viewer_source": {
|
||||
"type": "url",
|
||||
"required": false,
|
||||
"title": "Replay Viewer Source",
|
||||
"description": "URL from which to load the ReplayWeb.page replay viewer from",
|
||||
"alias": "replay-viewer-source"
|
||||
},
|
||||
"zim_file": {
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"title": "ZIM filename",
|
||||
"description": "ZIM file name (based on --name if not provided). Include {period} to insert date period dynamically",
|
||||
"alias": "zim-file",
|
||||
"pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+_)([a-z0-9\\-\\.]+_|)([\\d]{4}-[\\d]{2}|\\{period\\}).zim$",
|
||||
"relaxedPattern": "^[A-Za-z0-9._-]+$"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"required": true,
|
||||
"title": "ZIM name",
|
||||
"description": "Name of the ZIM.",
|
||||
"alias": "name",
|
||||
"pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+)$",
|
||||
"relaxedPattern": "^[A-Za-z0-9._-]+$"
|
||||
},
|
||||
"overwrite": {
|
||||
"type": "boolean",
|
||||
"required": false,
|
||||
"title": "Overwrite",
|
||||
"description": "Whether to overwrite existing ZIM file if it exists"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -11,7 +11,7 @@ dependencies = [
|
|||
"requests==2.32.3",
|
||||
"inotify==0.2.10",
|
||||
"tld==0.13",
|
||||
"warc2zim==2.2.2",
|
||||
"warc2zim @ git+https://github.com/openzim/warc2zim@main",
|
||||
]
|
||||
dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]
|
||||
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
__version__ = "3.0.4"
|
||||
__version__ = "3.0.6-dev0"
|
||||
|
|
|
|||
|
|
@ -796,11 +796,14 @@ def run(raw_args):
|
|||
if known_args.adminEmail:
|
||||
user_agent_suffix += f" {known_args.adminEmail}"
|
||||
|
||||
# make temp dir for this crawl
|
||||
# set temp dir to use for this crawl
|
||||
global temp_root_dir # noqa: PLW0603
|
||||
if known_args.build:
|
||||
temp_root_dir = Path(tempfile.mkdtemp(dir=known_args.build, prefix=".tmp"))
|
||||
# use build dir argument if passed
|
||||
temp_root_dir = Path(known_args.build)
|
||||
temp_root_dir.mkdir(parents=True, exist_ok=True)
|
||||
else:
|
||||
# make new randomized temp dir
|
||||
temp_root_dir = Path(tempfile.mkdtemp(dir=known_args.output, prefix=".tmp"))
|
||||
|
||||
seeds = []
|
||||
|
|
@ -846,6 +849,9 @@ def run(raw_args):
|
|||
warc2zim_args.append("--lang")
|
||||
warc2zim_args.append(known_args.zim_lang)
|
||||
|
||||
if known_args.overwrite:
|
||||
warc2zim_args.append("--overwrite")
|
||||
|
||||
logger.info("----------")
|
||||
logger.info("Testing warc2zim args")
|
||||
logger.info("Running: warc2zim " + " ".join(warc2zim_args))
|
||||
|
|
@ -854,7 +860,8 @@ def run(raw_args):
|
|||
logger.info("Exiting, invalid warc2zim params")
|
||||
return EXIT_CODE_WARC2ZIM_CHECK_FAILED
|
||||
|
||||
if not known_args.keep:
|
||||
# only trigger cleanup when the keep argument is passed without a custom build dir.
|
||||
if not known_args.build and not known_args.keep:
|
||||
atexit.register(cleanup)
|
||||
|
||||
# copy / download custom behaviors to one single folder and configure crawler
|
||||
|
|
@ -1032,7 +1039,6 @@ def run(raw_args):
|
|||
warc_files.append(Path(extract_path))
|
||||
|
||||
else:
|
||||
|
||||
logger.info(f"Running browsertrix-crawler crawl: {cmd_line}")
|
||||
crawl = subprocess.run(crawler_args, check=False)
|
||||
if (
|
||||
|
|
@ -1076,18 +1082,18 @@ def run(raw_args):
|
|||
)
|
||||
elif len(warc_dirs) > 1:
|
||||
logger.info(
|
||||
"Found many WARC files directories, only most recently modified one"
|
||||
" will be used"
|
||||
"Found many WARC files directories, combining pages from all "
|
||||
"of them"
|
||||
)
|
||||
for directory in warc_dirs:
|
||||
logger.info(f"- {directory}")
|
||||
warc_files = [warc_dirs[-1]]
|
||||
warc_files = warc_dirs
|
||||
|
||||
logger.info("")
|
||||
logger.info("----------")
|
||||
logger.info(
|
||||
f"Processing WARC files in/at "
|
||||
f'{" ".join(str(warc_file) for warc_file in warc_files)}'
|
||||
f"{' '.join(str(warc_file) for warc_file in warc_files)}"
|
||||
)
|
||||
warc2zim_args.extend(str(warc_file) for warc_file in warc_files)
|
||||
|
||||
|
|
|
|||
14
tests/conftest.py
Normal file
14
tests/conftest.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import pytest
|
||||
|
||||
from zimit import zimit as app
|
||||
|
||||
"""
|
||||
cleanup disabled because atexit hooks run at the very end of the Python process
|
||||
shutdown. By the time cleanup() is called, the logging module has already closed its
|
||||
file streams.
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def disable_zimit_cleanup(monkeypatch):
|
||||
monkeypatch.setattr(app, "cleanup", lambda: None)
|
||||
BIN
tests/data/example-response.warc
Normal file
BIN
tests/data/example-response.warc
Normal file
Binary file not shown.
83
tests/test_overwrite.py
Normal file
83
tests/test_overwrite.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
from zimit.zimit import run
|
||||
|
||||
TEST_DATA_DIR = pathlib.Path(__file__).parent / "data"
|
||||
|
||||
|
||||
def test_overwrite_flag_behaviour(tmp_path):
|
||||
zim_output = "overwrite-test.zim"
|
||||
output_path = tmp_path / zim_output
|
||||
|
||||
# 1st run → creates file
|
||||
result = run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--warcs",
|
||||
str(TEST_DATA_DIR / "example-response.warc"),
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
]
|
||||
)
|
||||
assert result in (None, 100)
|
||||
assert output_path.exists()
|
||||
|
||||
# 2nd run, no overwrite → should fail
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--warcs",
|
||||
str(TEST_DATA_DIR / "example-response.warc"),
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
]
|
||||
)
|
||||
assert exc.value.code == 2
|
||||
|
||||
# 2nd run, no overwrite → should fail
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
]
|
||||
)
|
||||
assert exc.value.code == 2
|
||||
|
||||
# 3rd run, with overwrite → should succeed
|
||||
result = run(
|
||||
[
|
||||
"--seeds",
|
||||
"https://example.com",
|
||||
"--warcs",
|
||||
str(TEST_DATA_DIR / "example-response.warc"),
|
||||
"--output",
|
||||
str(tmp_path),
|
||||
"--zim-file",
|
||||
zim_output,
|
||||
"--name",
|
||||
"overwrite-test",
|
||||
"--overwrite",
|
||||
]
|
||||
)
|
||||
assert result in (None, 100)
|
||||
assert output_path.exists()
|
||||
Loading…
Add table
Add a link
Reference in a new issue