diff --git a/.github/workflows/DailyTests.yaml b/.github/workflows/DailyTests.yaml
index 0585721..2bc9bc5 100644
--- a/.github/workflows/DailyTests.yaml
+++ b/.github/workflows/DailyTests.yaml
@@ -18,7 +18,7 @@ jobs:
         run: docker build -t local-zimit .
 
       - name: run crawl of test website
-        run: docker run -v $PWD/output:/output local-zimit zimit --url https://website.test.openzim.org/ --name tests_eng_test-website --zim-file tests_eng_test-website.zim
+        run: docker run -v $PWD/output:/output local-zimit zimit --seeds https://website.test.openzim.org/ --name tests_eng_test-website --zim-file tests_eng_test-website.zim
 
       - name: archive ZIM
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/Publish.yml b/.github/workflows/Publish.yml
index b6660d0..1ddb343 100644
--- a/.github/workflows/Publish.yml
+++ b/.github/workflows/Publish.yml
@@ -5,8 +5,9 @@ on:
     types: [published]
 
 jobs:
-   publish:
-    runs-on: ubuntu-22.04
+  publish-amd64:
+    runs-on: ubuntu-24.04
+    name: "Publish for AMD64"
 
     steps:
       - uses: actions/checkout@v4
@@ -19,11 +20,34 @@ jobs:
           latest-on-tag: true
           restrict-to: openzim/zimit
           registries: ghcr.io
-          credentials:
+          credentials: |
             GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
             GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
           repo_description: auto
           repo_overview: auto
           platforms: |
             linux/amd64
-            linux/arm64
+
+  # Disabled for now, see https://github.com/openzim/zimit/issues/463
+  # publish-arm64:
+  #   runs-on: ubuntu-24.04
+  #   name: "Publish for ARM64"
+  #
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #
+  #     - name: Build and push Docker image
+  #       uses: openzim/docker-publish-action@v10
+  #       with:
+  #         image-name: openzim/zimit
+  #         tag-pattern: /^v([0-9.]+)$/
+  #         latest-on-tag: true
+  #         restrict-to: openzim/zimit
+  #         registries: ghcr.io
+  #         credentials: |
+  #           GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
+  #           GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
+  #         repo_description: auto
+  #         repo_overview: auto
+  #         platforms: |
+  #           linux/arm64
diff --git a/.github/workflows/PublishDockerDevImage.yaml b/.github/workflows/PublishDockerDevImage.yaml
index 5e2431e..1cbecea 100644
--- a/.github/workflows/PublishDockerDevImage.yaml
+++ b/.github/workflows/PublishDockerDevImage.yaml
@@ -7,8 +7,9 @@ on:
   workflow_dispatch:
 
 jobs:
-  publish:
-    runs-on: ubuntu-22.04
+  publish-amd64:
+    runs-on: ubuntu-24.04
+    name: "Publish for AMD64"
 
     steps:
       - uses: actions/checkout@v4
@@ -21,11 +22,34 @@ jobs:
           latest-on-tag: false
           restrict-to: openzim/zimit
           registries: ghcr.io
-          credentials:
+          credentials: |
             GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
             GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
           repo_description: auto
           repo_overview: auto
           platforms: |
             linux/amd64
-            linux/arm64
+
+  # Disabled for now, see https://github.com/openzim/zimit/issues/463
+  # publish-arm64:
+  #   runs-on: ubuntu-24.04-arm
+  #   name: "Publish for ARM64"
+  #
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #
+  #     - name: Build and push Docker image
+  #       uses: openzim/docker-publish-action@v10
+  #       with:
+  #         image-name: openzim/zimit
+  #         manual-tag: dev
+  #         latest-on-tag: false
+  #         restrict-to: openzim/zimit
+  #         registries: ghcr.io
+  #         credentials: |
+  #           GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
+  #           GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
+  #         repo_description: auto
+  #         repo_overview: auto
+  #         platforms: |
+  #           linux/arm64
diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml
index 9e21fa7..8c74b21 100644
--- a/.github/workflows/Tests.yaml
+++ b/.github/workflows/Tests.yaml
@@ -57,13 +57,25 @@ jobs:
         uses: actions/checkout@v4
 
       - name: build image
-        run: docker build -t zimit .
+        run: docker build -t local-zimit .
 
       - name: ensure help display without issue
-        run: docker run -v $PWD/output:/output zimit zimit --help
+        run: docker run -v $PWD/output:/output local-zimit zimit --help
 
-      - name: run crawl
-        run: docker run -v $PWD/output:/output zimit zimit --url http://isago.rskg.org/ --name isago --zim-file isago.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --statsFilename /output/stats.json --keep
+      - name: run crawl with soft size limit
+        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --sizeSoftLimit 8192 --name tests_en_sizesoftlimit --zim-file tests_en_sizesoftlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_sizesoftlimit.json
+
+      - name: run crawl with hard size limit
+        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --sizeHardLimit 8192 --name tests_en_sizehardlimit --zim-file tests_en_sizehardlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_sizehardlimit.json || true
+
+      - name: run crawl with soft time limit
+        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --timeSoftLimit 1 --name tests_en_timesoftlimit --zim-file tests_en_timesoftlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_timesoftlimit.json
+
+      - name: run crawl with hard time limit
+        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --timeHardLimit 1 --name tests_en_timehardlimit --zim-file tests_en_timehardlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_timehardlimit.json || true
+
+      - name: run standard crawl
+        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/http-return-codes.html --name tests_en_onepage --zim-file tests_en_onepage.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats.json --statsFilename /output/crawl.json --warc2zim-progress-file /output/warc2zim.json --keep
 
       - name: run integration test suite
-        run: docker run -v $PWD/tests-integration/integration.py:/app/integration.py -v $PWD/output:/output zimit bash -c "/app/zimit/bin/pip install pytest; /app/zimit/bin/pytest -v /app/integration.py"
+        run: docker run -v $PWD/tests-integration/integration.py:/app/integration.py -v $PWD/output:/output local-zimit bash -c "/app/zimit/bin/pip install pytest; /app/zimit/bin/pytest -v /app/integration.py"
diff --git a/.github/workflows/update-zim-offliner-definition.yaml b/.github/workflows/update-zim-offliner-definition.yaml
new file mode 100644
index 0000000..f481354
--- /dev/null
+++ b/.github/workflows/update-zim-offliner-definition.yaml
@@ -0,0 +1,45 @@
+name: Update ZIMFarm Definitions
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "offliner-definition.json"
+  release:
+    types: [published]
+
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "Version to publish"
+        required: false
+        default: "dev"
+
+jobs:
+  prepare-json:
+    runs-on: ubuntu-24.04
+    outputs:
+      offliner_definition_b64: ${{ steps.read-json.outputs.offliner_definition_b64 }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - id: read-json
+        run: |
+          if [ ! -f "offliner-definition.json" ]; then
+            echo "File not found!" >&2
+            exit 1
+          fi
+          json_b64=$(base64 -w0 <<< "$(jq -c . offliner-definition.json)")
+          echo "offliner_definition_b64=$json_b64" >> $GITHUB_OUTPUT
+  call-workflow:
+    needs: prepare-json
+    uses: openzim/overview/.github/workflows/update-zimfarm-offliner-definition.yaml@main
+    with:
+      version: ${{ github.event_name == 'release' && github.event.release.tag_name || (github.event.inputs.version || 'dev') }}
+      offliner: zimit
+      offliner_definition_b64: ${{ needs.prepare-json.outputs.offliner_definition_b64 }}
+    secrets:
+      zimfarm_ci_secret: ${{ secrets.ZIMFARM_CI_SECRET }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4f91d0b..b362d62 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,20 +2,20 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.4.0
+  rev: v5.0.0
   hooks:
   -   id: trailing-whitespace
   -   id: end-of-file-fixer
 - repo: https://github.com/psf/black
-  rev: "24.10.0"
+  rev: "25.1.0"
   hooks:
   -   id: black
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.6.9
+  rev: v0.9.4
   hooks:
   - id: ruff
 - repo: https://github.com/RobertCraigie/pyright-python
-  rev: v1.1.383
+  rev: v1.1.393
   hooks:
   - id: pyright
     name: pyright (system)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2af6e6f..2a99b30 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,92 @@ All notable changes to this project are documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.2.0).
 
+## [Unreleased]
+
+### Added
+- Added `--overwrite` flag to overwrite existing ZIM file if it exists (#399)
+
+### Changed
+- Fix issues preventing interrupted crawls from being resumed. (#499)
+  - Ensure build directory is used explicitly instead of a randomized subdirectory when passed, and pre-create it if it does not exist.
+  - Use all warc_dirs found instead of just the latest so interrupted crawls use all collected pages across runs when an explicit collections directory is not passed.
+  - Don't cleanup an explicitly passed build directory.
+
+## [3.0.5] - 2024-04-11
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.6.0 (#493)
+
+## [3.0.4] - 2024-04-04
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.5.10 (#491)
+
+## [3.0.3] - 2024-02-28
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.5.7 (#483)
+
+## [3.0.2] - 2024-02-27
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.5.6 (#482)
+
+## [3.0.1] - 2024-02-24
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.5.4 (#476)
+
+## [3.0.0] - 2024-02-17
+
+### Changed
+
+- Change solution to report partial ZIM to the Zimfarm and other clients (#304)
+- Keep temporary folder when crawler or warc2zim fails, even if not asked for (#468)
+- Add many missing Browsertrix Crawler arguments ; drop default overrides by zimit ; drop `--noMobileDevice` setting (not needed anymore) (#433)
+- Document all Browsertrix Crawler default arguments values (#416)
+- Use preferred Browsertrix Crawler arguments names: (part of #471)
+  - `--seeds` instead of `--url`
+  - `--seedFile` instead of `--urlFile`
+  - `--pageLimit` instead of `--limit`
+  - `--pageLoadTimeout` instead of `--timeout`
+  - `--scopeIncludeRx` instead of `--include`
+  - `--scopeExcludeRx` instead of `--exclude`
+  - `--pageExtraDelay` instead of `--delay`
+- Remove confusion between zimit, warc2zim and crawler stats filenames (part of #471)
+  - `--statsFilename` is now the crawler stats file (since it is the same name, just like other arguments)
+  - `--zimit-progress-file` is now the zimit stats location
+  - `--warc2zim-progress-file` is the warc2zim stats location
+  - all are optional values, if not set and needed temporary files are used
+
+### Fixed
+
+- Do not create the ZIM when crawl is incomplete (#444)
+
+## [2.1.8] - 2024-02-07
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.5.1, Python 3.13 and others (#462 + #464)
+
+## [2.1.7] - 2024-01-10
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.4.2 (#450)
+- Upgrade to warc2zim 2.2.0
+
+## [2.1.6] - 2024-11-07
+
+### Changed
+
+- Upgrade to browsertrix crawler 1.3.5 (#426)
+
 ## [2.1.5] - 2024-11-01
 
 ### Changed
diff --git a/Dockerfile b/Dockerfile
index 5be84c4..9666c0b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,13 +1,16 @@
-FROM webrecorder/browsertrix-crawler:1.3.4
-LABEL org.opencontainers.image.source https://github.com/openzim/zimit
+FROM webrecorder/browsertrix-crawler:1.6.0
+LABEL org.opencontainers.image.source=https://github.com/openzim/zimit
+
+# add deadsnakes ppa for latest Python on Ubuntu
+RUN add-apt-repository ppa:deadsnakes/ppa -y
 
 RUN apt-get update \
  && apt-get install -qqy --no-install-recommends \
       libmagic1 \
-      python3.12-venv \
+      python3.13-venv \
  && rm -rf /var/lib/apt/lists/* \
  # python setup (in venv not to conflict with browsertrix)
- && python3.12 -m venv /app/zimit \
+ && python3.13 -m venv /app/zimit \
  # placeholder (default output location)
  && mkdir -p /output \
  # disable chrome upgrade
diff --git a/README.md b/README.md
index e01abbc..188615f 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,15 @@
 Zimit
 =====
 
-Zimit is a scraper allowing to create ZIM file from any Web site.
+Zimit is a scraper allowing to create [ZIM file](https://en.wikipedia.org/wiki/ZIM_(file_format)) from any Web site.
 
 [![CodeFactor](https://www.codefactor.io/repository/github/openzim/zimit/badge)](https://www.codefactor.io/repository/github/openzim/zimit)
 [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
-[![Docker](https://ghcr-badge.deta.dev/openzim/zimit/latest_tag?label=docker)](https://ghcr.io/openzim/zimit)
+[![Docker](https://ghcr-badge.egpl.dev/openzim/zimit/latest_tag?label=docker)](https://ghcr.io/openzim/zimit)
 
 Zimit adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing).
 
-Zimit has implemented openZIM's [Python bootstrap, conventions and policies](https://github.com/openzim/_python-bootstrap/docs/Policy.md) **v1.0.1**.
+Zimit has implemented openZIM's [Python bootstrap, conventions and policies](https://github.com/openzim/_python-bootstrap/blob/main/docs/Policy.md) **v1.0.1**.
 
 Capabilities and known limitations
 --------------------
@@ -38,24 +38,23 @@ Usage
 
 `zimit` is intended to be run in Docker. Docker image is published at https://github.com/orgs/openzim/packages/container/package/zimit.
 
-The image accepts the following parameters, **as well as any of the [warc2zim](https://github.com/openzim/warc2zim) ones**; useful for setting metadata, for instance:
+The image accepts the following parameters, **as well as any of the [Browsertrix crawler](https://crawler.docs.browsertrix.com/user-guide/cli-options/) and [warc2zim](https://github.com/openzim/warc2zim) ones**:
 
-- Required: `--url URL` - the url to be crawled
+- Required: `--seeds URL` - the url to start crawling from ; multiple URLs can be separated by a comma (even if **usually not needed**, these are just the **seeds** of the crawl) ; first seed URL is used as ZIM homepage
 - Required: `--name` - Name of ZIM file
 - `--output` - output directory (defaults to `/output`)
-- `--limit U` - Limit capture to at most U URLs
-- `--behaviors` - Control which browsertrix behaviors are ran (defaults to `autoplay,autofetch,siteSpecific`, adding `autoscroll` to the list is possible to automatically scroll the pages and fetch resources which are lazy loaded)
-- `--exclude <regex>` - skip URLs that match the regex from crawling. Can be specified multiple times. An example is `--exclude="(\?q=|signup-landing\?|\?cid=)"`, where URLs that contain either `?q=` or `signup-landing?` or `?cid=` will be excluded.
+- `--pageLimit U` - Limit capture to at most U URLs
+- `--scopeExcludeRx <regex>` - skip URLs that match the regex from crawling. Can be specified multiple times. An example is `--scopeExcludeRx="(\?q=|signup-landing\?|\?cid=)"`, where URLs that contain either `?q=` or `signup-landing?` or `?cid=` will be excluded.
 - `--workers N` - number of crawl workers to be run in parallel
-- `--wait-until` - Puppeteer setting for how long to wait for page load. See [page.goto waitUntil options](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options). The default is `load`, but for static sites, `--wait-until domcontentloaded` may be used to speed up the crawl (to avoid waiting for ads to load for example).
-- `--keep` - if set, keep the WARC files in a temp directory inside the output directory
+- `--waitUntil` - Puppeteer setting for how long to wait for page load. See [page.goto waitUntil options](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options). The default is `load`, but for static sites, `--waitUntil domcontentloaded` may be used to speed up the crawl (to avoid waiting for ads to load for example).
+- `--keep` - in case of failure, WARC files and other temporary files (which are stored as a subfolder of output directory) are always kept, otherwise they are automatically deleted. Use this flag to always keep WARC files, even in case of success.
 
 Example command:
 
 ```bash
 docker run ghcr.io/openzim/zimit zimit --help
 docker run ghcr.io/openzim/zimit warc2zim --help
-docker run  -v /output:/output ghcr.io/openzim/zimit zimit --url URL --name myzimfile
+docker run  -v /output:/output ghcr.io/openzim/zimit zimit --seeds URL --name myzimfile
 ```
 
 **Note**: Image automatically filters out a large number of ads by using the 3 blocklists from [anudeepND](https://github.com/anudeepND/blacklist). If you don't want this filtering, disable the image's entrypoint in your container (`docker run --entrypoint="" ghcr.io/openzim/zimit ...`).
diff --git a/offliner-definition.json b/offliner-definition.json
new file mode 100644
index 0000000..4bb68b5
--- /dev/null
+++ b/offliner-definition.json
@@ -0,0 +1,981 @@
+{
+  "offliner_id": "zimit",
+  "stdOutput": true,
+  "stdStats": "zimit-progress-file",
+  "flags": {
+    "seeds": {
+      "type": "string",
+      "required": false,
+      "title": "Seeds",
+      "description": "The seed URL(s) to start crawling from. Multile seed URL must be separated by a comma (usually not needed, these are just the crawl seeds). First seed URL is used as ZIM homepage"
+    },
+    "seed_file": {
+      "type": "string",
+      "required": false,
+      "title": "Seed File",
+      "description": "If set, read a list of seed urls, one per line. HTTPS URL to an online file."
+    },
+    "lang": {
+      "type": "string",
+      "required": false,
+      "title": "Browser Language",
+      "description": "If set, sets the language used by the browser, should be ISO 639 language[-country] code, e.g. `en` or `en-GB`"
+    },
+    "title": {
+      "type": "string",
+      "required": false,
+      "title": "Title",
+      "description": "Custom title for your ZIM. Defaults to title of main page",
+      "minLength": 1,
+      "maxLength": 30
+    },
+    "description": {
+      "type": "string",
+      "required": false,
+      "title": "Description",
+      "description": "Description for ZIM",
+      "minLength": 1,
+      "maxLength": 80
+    },
+    "favicon": {
+      "type": "blob",
+      "kind": "image",
+      "required": false,
+      "title": "Illustration",
+      "description": "URL for Illustration. "
+    },
+    "tags": {
+      "type": "string",
+      "required": false,
+      "title": "ZIM Tags",
+      "description": "Single string with individual tags separated by a semicolon."
+    },
+    "creator": {
+      "type": "string",
+      "required": false,
+      "title": "Creator",
+      "description": "Name of content creator"
+    },
+    "publisher": {
+      "type": "string",
+      "required": false,
+      "title": "Publisher",
+      "isPublisher": true,
+      "description": "Custom publisher name (ZIM metadata). openZIM otherwise"
+    },
+    "source": {
+      "type": "string",
+      "required": false,
+      "title": "Source",
+      "description": "Source name/URL of content"
+    },
+    "workers": {
+      "type": "integer",
+      "required": false,
+      "title": "Workers",
+      "description": "The number of workers to run in parallel. Defaults to 1",
+      "min": 1
+    },
+    "wait_until": {
+      "type": "string",
+      "required": false,
+      "title": "WaitUntil",
+      "description": "Puppeteer page.goto() condition to wait for before continuing. One of load, domcontentloaded, networkidle0 or networkidle2, or a comma-separated combination of those. Default is load,networkidle2"
+    },
+    "extra_hops": {
+      "type": "integer",
+      "required": false,
+      "title": "Extra Hops",
+      "description": "Number of extra 'hops' to follow, beyond the current scope. Default is 0",
+      "min": 0
+    },
+    "page_limit": {
+      "type": "integer",
+      "required": false,
+      "title": "Page Limit",
+      "description": "Limit crawl to this number of pages. Default is 0 (no-limit).",
+      "min": 0
+    },
+    "max_page_limit": {
+      "type": "integer",
+      "required": false,
+      "title": "Max Page Limit",
+      "description": "Maximum pages to crawl, overriding pageLimit if both are set. Default is 0 (no-limit)",
+      "min": 0
+    },
+    "page_load_timeout": {
+      "type": "integer",
+      "required": false,
+      "title": "Page Load Timeout",
+      "description": "Timeout for each page to load (in seconds). Default is 90",
+      "min": 0
+    },
+    "scope_type": {
+      "type": "string-enum",
+      "required": false,
+      "title": "Scope Type",
+      "description": "A predfined scope of the crawl. For more customization, use 'custom' and set scopeIncludeRx/scopeExcludeRx regexes. Default is custom if scopeIncludeRx is set, prefix otherwise.",
+      "choices": [
+        {
+          "title": "Page",
+          "value": "page"
+        },
+        {
+          "title": "Page SPA",
+          "value": "page-spa"
+        },
+        {
+          "title": "Prefix",
+          "value": "prefix"
+        },
+        {
+          "title": "Host",
+          "value": "host"
+        },
+        {
+          "title": "Domain",
+          "value": "domain"
+        },
+        {
+          "title": "Any",
+          "value": "any"
+        },
+        {
+          "title": "Custom",
+          "value": "custom"
+        }
+      ]
+    },
+    "scope_include_rx": {
+      "type": "string",
+      "required": false,
+      "title": "Scope Include Regex",
+      "description": "Regex of page URLs that should be included in the crawl (defaults to the immediate directory of seed)"
+    },
+    "scope_exclude_rx": {
+      "type": "string",
+      "required": false,
+      "title": "Scope Exclude Regex",
+      "description": "Regex of page URLs that should be excluded from the crawl"
+    },
+    "allow_hash_urls": {
+      "type": "boolean",
+      "required": false,
+      "title": "Allow Hashtag URLs",
+      "description": "Allow Hashtag URLs, useful for single-page-application crawling or when different hashtags load dynamic content"
+    },
+    "mobile_device": {
+      "type": "string-enum",
+      "required": false,
+      "title": "As device",
+      "description": "Device to crawl as. See Pupeeter's Device.ts for a list",
+      "choices": [
+        {
+          "title": "Blackberry Playbook",
+          "value": "Blackberry PlayBook"
+        },
+        {
+          "title": "Blackberry Playbook Landscape",
+          "value": "Blackberry PlayBook landscape"
+        },
+        {
+          "title": "Blackberry Z30",
+          "value": "BlackBerry Z30"
+        },
+        {
+          "title": "Blackberry Z30 Landscape",
+          "value": "BlackBerry Z30 landscape"
+        },
+        {
+          "title": "Galaxy Note 3",
+          "value": "Galaxy Note 3"
+        },
+        {
+          "title": "Galaxy Note 3 Landscape",
+          "value": "Galaxy Note 3 landscape"
+        },
+        {
+          "title": "Galaxy Note II",
+          "value": "Galaxy Note II"
+        },
+        {
+          "title": "Galaxy Note II Landscape",
+          "value": "Galaxy Note II landscape"
+        },
+        {
+          "title": "Galaxy S III",
+          "value": "Galaxy S III"
+        },
+        {
+          "title": "Galaxy S III Landscape",
+          "value": "Galaxy S III landscape"
+        },
+        {
+          "title": "Galaxy S5",
+          "value": "Galaxy S5"
+        },
+        {
+          "title": "Galaxy S5 Landscape",
+          "value": "Galaxy S5 landscape"
+        },
+        {
+          "title": "Galaxy S8",
+          "value": "Galaxy S8"
+        },
+        {
+          "title": "Galaxy S8 Landscape",
+          "value": "Galaxy S8 landscape"
+        },
+        {
+          "title": "Galaxy S9 Plus",
+          "value": "Galaxy S9+"
+        },
+        {
+          "title": "Galaxy S9 Plus Landscape",
+          "value": "Galaxy S9+ landscape"
+        },
+        {
+          "title": "Galaxy Tab S4",
+          "value": "Galaxy Tab S4"
+        },
+        {
+          "title": "Galaxy Tab S4 Landscape",
+          "value": "Galaxy Tab S4 landscape"
+        },
+        {
+          "title": "iPad",
+          "value": "iPad"
+        },
+        {
+          "title": "iPad Landscape",
+          "value": "iPad landscape"
+        },
+        {
+          "title": "iPad Gen 6",
+          "value": "iPad (gen 6)"
+        },
+        {
+          "title": "iPad Gen 6 Landscape",
+          "value": "iPad (gen 6) landscape"
+        },
+        {
+          "title": "iPad Gen 7",
+          "value": "iPad (gen 7)"
+        },
+        {
+          "title": "iPad Gen 7 Landscape",
+          "value": "iPad (gen 7) landscape"
+        },
+        {
+          "title": "iPad Mini",
+          "value": "iPad Mini"
+        },
+        {
+          "title": "iPad Mini Landscape",
+          "value": "iPad Mini landscape"
+        },
+        {
+          "title": "iPad Pro",
+          "value": "iPad Pro"
+        },
+        {
+          "title": "iPad Pro Landscape",
+          "value": "iPad Pro landscape"
+        },
+        {
+          "title": "iPad Pro 11",
+          "value": "iPad Pro 11"
+        },
+        {
+          "title": "iPad Pro 11 Landscape",
+          "value": "iPad Pro 11 landscape"
+        },
+        {
+          "title": "iPhone 4",
+          "value": "iPhone 4"
+        },
+        {
+          "title": "iPhone 4 Landscape",
+          "value": "iPhone 4 landscape"
+        },
+        {
+          "title": "iPhone 5",
+          "value": "iPhone 5"
+        },
+        {
+          "title": "iPhone 5 Landscape",
+          "value": "iPhone 5 landscape"
+        },
+        {
+          "title": "iPhone 6",
+          "value": "iPhone 6"
+        },
+        {
+          "title": "iPhone 6 Landscape",
+          "value": "iPhone 6 landscape"
+        },
+        {
+          "title": "iPhone 6 Plus",
+          "value": "iPhone 6 Plus"
+        },
+        {
+          "title": "iPhone 6 Plus Landscape",
+          "value": "iPhone 6 Plus landscape"
+        },
+        {
+          "title": "iPhone 7",
+          "value": "iPhone 7"
+        },
+        {
+          "title": "iPhone 7 Landscape",
+          "value": "iPhone 7 landscape"
+        },
+        {
+          "title": "iPhone 7 Plus",
+          "value": "iPhone 7 Plus"
+        },
+        {
+          "title": "iPhone 7 Plus Landscape",
+          "value": "iPhone 7 Plus landscape"
+        },
+        {
+          "title": "iPhone 8",
+          "value": "iPhone 8"
+        },
+        {
+          "title": "iPhone 8 Landscape",
+          "value": "iPhone 8 landscape"
+        },
+        {
+          "title": "iPhone 8 Plus",
+          "value": "iPhone 8 Plus"
+        },
+        {
+          "title": "iPhone 8 Plus Landscape",
+          "value": "iPhone 8 Plus landscape"
+        },
+        {
+          "title": "iPhone SE",
+          "value": "iPhone SE"
+        },
+        {
+          "title": "iPhone SE Landscape",
+          "value": "iPhone SE landscape"
+        },
+        {
+          "title": "iPhone X",
+          "value": "iPhone X"
+        },
+        {
+          "title": "iPhone X Landscape",
+          "value": "iPhone X landscape"
+        },
+        {
+          "title": "iPhone XR",
+          "value": "iPhone XR"
+        },
+        {
+          "title": "iPhone XR Landscape",
+          "value": "iPhone XR landscape"
+        },
+        {
+          "title": "iPhone 11",
+          "value": "iPhone 11"
+        },
+        {
+          "title": "iPhone 11 Landscape",
+          "value": "iPhone 11 landscape"
+        },
+        {
+          "title": "iPhone 11 Pro",
+          "value": "iPhone 11 Pro"
+        },
+        {
+          "title": "iPhone 11 Pro Landscape",
+          "value": "iPhone 11 Pro landscape"
+        },
+        {
+          "title": "iPhone 11 Pro Max",
+          "value": "iPhone 11 Pro Max"
+        },
+        {
+          "title": "iPhone 11 Pro Max Landscape",
+          "value": "iPhone 11 Pro Max landscape"
+        },
+        {
+          "title": "iPhone 12",
+          "value": "iPhone 12"
+        },
+        {
+          "title": "iPhone 12 Landscape",
+          "value": "iPhone 12 landscape"
+        },
+        {
+          "title": "iPhone 12 Pro",
+          "value": "iPhone 12 Pro"
+        },
+        {
+          "title": "iPhone 12 Pro Landscape",
+          "value": "iPhone 12 Pro landscape"
+        },
+        {
+          "title": "iPhone 12 Pro Max",
+          "value": "iPhone 12 Pro Max"
+        },
+        {
+          "title": "iPhone 12 Pro Max Landscape",
+          "value": "iPhone 12 Pro Max landscape"
+        },
+        {
+          "title": "iPhone 12 Mini",
+          "value": "iPhone 12 Mini"
+        },
+        {
+          "title": "iPhone 12 Mini Landscape",
+          "value": "iPhone 12 Mini landscape"
+        },
+        {
+          "title": "iPhone 13",
+          "value": "iPhone 13"
+        },
+        {
+          "title": "iPhone 13 Landscape",
+          "value": "iPhone 13 landscape"
+        },
+        {
+          "title": "iPhone 13 Pro",
+          "value": "iPhone 13 Pro"
+        },
+        {
+          "title": "iPhone 13 Pro Landscape",
+          "value": "iPhone 13 Pro landscape"
+        },
+        {
+          "title": "iPhone 13 Pro Max",
+          "value": "iPhone 13 Pro Max"
+        },
+        {
+          "title": "iPhone 13 Pro Max Landscape",
+          "value": "iPhone 13 Pro Max landscape"
+        },
+        {
+          "title": "iPhone 13 Mini",
+          "value": "iPhone 13 Mini"
+        },
+        {
+          "title": "iPhone 13 Mini Landscape",
+          "value": "iPhone 13 Mini landscape"
+        },
+        {
+          "title": "Jio Phone 2",
+          "value": "JioPhone 2"
+        },
+        {
+          "title": "Jio Phone 2 Landscape",
+          "value": "JioPhone 2 landscape"
+        },
+        {
+          "title": "Kindle Fire HDX",
+          "value": "Kindle Fire HDX"
+        },
+        {
+          "title": "Kindle Fire HDX Landscape",
+          "value": "Kindle Fire HDX landscape"
+        },
+        {
+          "title": "LG Optimus L70",
+          "value": "LG Optimus L70"
+        },
+        {
+          "title": "LG Optimus L70 Landscape",
+          "value": "LG Optimus L70 landscape"
+        },
+        {
+          "title": "Microsoft Lumia 550",
+          "value": "Microsoft Lumia 550"
+        },
+        {
+          "title": "Microsoft Lumia 950",
+          "value": "Microsoft Lumia 950"
+        },
+        {
+          "title": "Microsoft Lumia 950 Landscape",
+          "value": "Microsoft Lumia 950 landscape"
+        },
+        {
+          "title": "Nexus 10",
+          "value": "Nexus 10"
+        },
+        {
+          "title": "Nexus 10 Landscape",
+          "value": "Nexus 10 landscape"
+        },
+        {
+          "title": "Nexus 4",
+          "value": "Nexus 4"
+        },
+        {
+          "title": "Nexus 4 Landscape",
+          "value": "Nexus 4 landscape"
+        },
+        {
+          "title": "Nexus 5",
+          "value": "Nexus 5"
+        },
+        {
+          "title": "Nexus 5 Landscape",
+          "value": "Nexus 5 landscape"
+        },
+        {
+          "title": "Nexus 5X",
+          "value": "Nexus 5X"
+        },
+        {
+          "title": "Nexus 5X Landscape",
+          "value": "Nexus 5X landscape"
+        },
+        {
+          "title": "Nexus 6",
+          "value": "Nexus 6"
+        },
+        {
+          "title": "Nexus 6 Landscape",
+          "value": "Nexus 6 landscape"
+        },
+        {
+          "title": "Nexus 6P",
+          "value": "Nexus 6P"
+        },
+        {
+          "title": "Nexus 6P Landscape",
+          "value": "Nexus 6P landscape"
+        },
+        {
+          "title": "Nexus 7",
+          "value": "Nexus 7"
+        },
+        {
+          "title": "Nexus 7 Landscape",
+          "value": "Nexus 7 landscape"
+        },
+        {
+          "title": "Nokia Lumia 520",
+          "value": "Nokia Lumia 520"
+        },
+        {
+          "title": "Nokia Lumia 520 Landscape",
+          "value": "Nokia Lumia 520 landscape"
+        },
+        {
+          "title": "Nokia N9",
+          "value": "Nokia N9"
+        },
+        {
+          "title": "Nokia N9 Landscape",
+          "value": "Nokia N9 landscape"
+        },
+        {
+          "title": "Pixel 2",
+          "value": "Pixel 2"
+        },
+        {
+          "title": "Pixel 2 Landscape",
+          "value": "Pixel 2 landscape"
+        },
+        {
+          "title": "Pixel 2 XL",
+          "value": "Pixel 2 XL"
+        },
+        {
+          "title": "Pixel 2 XL Landscape",
+          "value": "Pixel 2 XL landscape"
+        },
+        {
+          "title": "Pixel 3",
+          "value": "Pixel 3"
+        },
+        {
+          "title": "Pixel 3 Landscape",
+          "value": "Pixel 3 landscape"
+        },
+        {
+          "title": "Pixel 4",
+          "value": "Pixel 4"
+        },
+        {
+          "title": "Pixel 4 Landscape",
+          "value": "Pixel 4 landscape"
+        },
+        {
+          "title": "Pixel 4A 5G",
+          "value": "Pixel 4a (5G)"
+        },
+        {
+          "title": "Pixel 4A 5G Landscape",
+          "value": "Pixel 4a (5G) landscape"
+        },
+        {
+          "title": "Pixel 5",
+          "value": "Pixel 5"
+        },
+        {
+          "title": "Pixel 5 Landscape",
+          "value": "Pixel 5 landscape"
+        },
+        {
+          "title": "Moto G4",
+          "value": "Moto G4"
+        },
+        {
+          "title": "Moto G4 Landscape",
+          "value": "Moto G4 landscape"
+        }
+      ]
+    },
+    "select_links": {
+      "type": "string",
+      "required": false,
+      "title": "Select Links",
+      "description": "One or more selectors for extracting links, in the format [css selector]->[property to use],[css selector]->@[attribute to use]"
+    },
+    "click_selector": {
+      "type": "string",
+      "required": false,
+      "title": "Click Selector",
+      "description": "Selector for elements to click when using the autoclick behavior. Default is 'a'"
+    },
+    "block_rules": {
+      "type": "string",
+      "required": false,
+      "title": "Block Rules",
+      "description": "Additional rules for blocking certain URLs from being loaded, by URL regex and optionally via text match in an iframe"
+    },
+    "block_message": {
+      "type": "string",
+      "required": false,
+      "title": "Block Message",
+      "description": "If specified, when a URL is blocked, a record with this error message is added instead"
+    },
+    "block_ads": {
+      "type": "boolean",
+      "required": false,
+      "title": "Block Ads",
+      "description": "If set, block advertisements from being loaded (based on Stephen Black's blocklist). Note that some bad domains are also blocked by zimit configuration even if this option is not set."
+    },
+    "ad_block_message": {
+      "type": "string",
+      "required": false,
+      "title": "Ads Block Message",
+      "description": "If specified, when an ad is blocked, a record with this error message is added instead"
+    },
+    "user_agent": {
+      "type": "string",
+      "required": false,
+      "title": "User Agent",
+      "description": "Override user-agent with specified"
+    },
+    "user_agent_suffix": {
+      "type": "string",
+      "required": false,
+      "title": "User Agent Suffix",
+      "description": "Append suffix to existing browser user-agent. Defaults to +Zimit"
+    },
+    "use_sitemap": {
+      "type": "string",
+      "required": false,
+      "title": "Sitemap URL",
+      "description": "Use as sitemap to get additional URLs for the crawl (usually at /sitemap.xml)"
+    },
+    "sitemap_from_date": {
+      "type": "string",
+      "required": false,
+      "title": "Sitemap From Date",
+      "description": "If set, filter URLs from sitemaps to those greater than or equal to (>=) provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)"
+    },
+    "sitemap_to_date": {
+      "type": "string",
+      "required": false,
+      "title": "Sitemap To Date",
+      "description": "If set, filter URLs from sitemaps to those less than or equal to (<=) provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)"
+    },
+    "behavior_timeout": {
+      "type": "integer",
+      "required": false,
+      "title": "Behavior Timeout",
+      "description": "If >0, timeout (in seconds) for in-page behavior will run on each page. If 0, a behavior can run until finish. Default is 90.",
+      "min": 0
+    },
+    "post_load_delay": {
+      "type": "integer",
+      "required": false,
+      "title": "Post Load Delay",
+      "description": "If >0, amount of time to sleep (in seconds) after page has loaded, before taking screenshots / getting text / running behaviors. Default is 0.",
+      "min": 0
+    },
+    "page_extra_delay": {
+      "type": "integer",
+      "required": false,
+      "title": "Page Extra Delay",
+      "description": "If >0, amount of time to sleep (in seconds) after behaviors before moving on to next page. Default is 0.",
+      "min": 0
+    },
+    "dedup_policy": {
+      "type": "string-enum",
+      "required": false,
+      "title": "Dedup Policy",
+      "description": "Deduplication policy. One of skip, revisit or keep. Default is skip",
+      "choices": [
+        {
+          "title": "Skip",
+          "value": "skip"
+        },
+        {
+          "title": "Revisit",
+          "value": "revisit"
+        },
+        {
+          "title": "Keep",
+          "value": "keep"
+        }
+      ]
+    },
+    "screenshot": {
+      "type": "string",
+      "required": false,
+      "title": "Screenshot",
+      "description": "Screenshot options for crawler. One of view, thumbnail, fullPage, fullPageFinal or a comma-separated combination of those."
+    },
+    "size_soft_limit": {
+      "type": "integer",
+      "required": false,
+      "title": "Size Soft Limit",
+      "description": "If set, save crawl state and stop crawl if WARC size exceeds this value. ZIM will still be created.",
+      "min": 0
+    },
+    "size_hard_limit": {
+      "type": "integer",
+      "required": false,
+      "title": "Size Hard Limit",
+      "description": "If set, exit crawler and fail the scraper immediately if WARC size exceeds this value",
+      "min": 0
+    },
+    "disk_utilization": {
+      "type": "integer",
+      "required": false,
+      "title": "Disk Utilization",
+      "description": "Save state and exit if disk utilization exceeds this percentage value. Default (if not set) is 90%. Set to 0 to disable disk utilization check.",
+      "min": 0
+    },
+    "time_soft_limit": {
+      "type": "integer",
+      "required": false,
+      "title": "Time Soft Limit",
+      "description": "If set, save crawl state and stop crawl if WARC(s) creation takes longer than this value, in seconds. ZIM will still be created.",
+      "min": 0
+    },
+    "time_hard_limit": {
+      "type": "integer",
+      "required": false,
+      "title": "Time Hard Limit",
+      "description": "If set, exit crawler and fail the scraper immediately if WARC(s) creation takes longer than this value, in seconds",
+      "min": 0
+    },
+    "net_idle_wait": {
+      "type": "integer",
+      "required": false,
+      "title": "Net Idle Wait",
+      "description": "If set, wait for network idle after page load and after behaviors are done (in seconds). If -1 (default), determine based on scope."
+    },
+    "origin_override": {
+      "type": "string",
+      "required": false,
+      "title": "Origin Override",
+      "description": "If set, will redirect requests from each origin in key to origin in the value, eg. https://host:port=http://alt-host:alt-port."
+    },
+    "max_page_retries": {
+      "type": "integer",
+      "required": false,
+      "title": "Max Page Retries",
+      "description": "If set, number of times to retry a page that failed to load before page is considered to have failed. Default is 2.",
+      "min": 0
+    },
+    "fail_on_failed_seed": {
+      "type": "boolean",
+      "required": false,
+      "title": "Fail on failed seed",
+      "description": "Whether to display additional logs"
+    },
+    "fail_on_invalid_status": {
+      "type": "boolean",
+      "required": false,
+      "title": "Fail on invalid status",
+      "description": "If set, will treat pages with 4xx or 5xx response as failures. When combined with --failOnFailedLimit or --failOnFailedSeed may result in crawl failing due to non-200 responses"
+    },
+    "fail_on_failed_limit": {
+      "type": "integer",
+      "required": false,
+      "title": "Fail on failed - Limit",
+      "description": "If set, save state and exit if number of failed pages exceeds this value.",
+      "min": 0
+    },
+    "warcs": {
+      "type": "string",
+      "required": false,
+      "title": "WARC files",
+      "description": "Comma-separated list of WARC files to use as input."
+    },
+    "verbose": {
+      "type": "boolean",
+      "required": false,
+      "title": "Verbose mode",
+      "description": "Whether to display additional logs"
+    },
+    "keep": {
+      "type": "boolean",
+      "required": false,
+      "title": "Keep",
+      "description": "Should be True. Developer option: must be True if we want to keep the WARC files for artifacts archiving.",
+      "default": true
+    },
+    "output": {
+      "type": "string",
+      "required": false,
+      "title": "Output folder",
+      "description": "Output folder for ZIM file(s). Leave it as `/output`",
+      "pattern": "^/output$"
+    },
+    "admin_email": {
+      "type": "email",
+      "required": false,
+      "title": "Admin Email",
+      "description": "Admin Email for crawler: used in UserAgent so website admin can contact us",
+      "default": "contact+zimfarm@kiwix.org"
+    },
+    "profile": {
+      "type": "string",
+      "required": false,
+      "title": "Browser profile",
+      "description": "Path or HTTP(S) URL to tar.gz file which contains the browser profile directory for Browsertrix crawler."
+    },
+    "behaviors": {
+      "type": "string",
+      "required": false,
+      "title": "Behaviors",
+      "description": "Which background behaviors to enable on each page. Defaults to autoplay,autofetch,siteSpecific."
+    },
+    "depth": {
+      "type": "integer",
+      "required": false,
+      "title": "Depth",
+      "description": "The depth of the crawl for all seeds. Default is -1 (infinite).",
+      "min": -1
+    },
+    "zim_lang": {
+      "type": "string",
+      "required": false,
+      "title": "ZIM Language",
+      "description": "Language metadata of ZIM (warc2zim --lang param). ISO-639-3 code. Retrieved from homepage if found, fallback to `eng`",
+      "alias": "zim-lang",
+      "customValidator": "language_code"
+    },
+    "long_description": {
+      "type": "string",
+      "required": false,
+      "title": "Long description",
+      "description": "Optional long description for your ZIM",
+      "minLength": 1,
+      "maxLength": 4000,
+      "alias": "long-description"
+    },
+    "custom_css": {
+      "type": "blob",
+      "kind": "css",
+      "required": false,
+      "title": "Custom CSS",
+      "description": "URL to a CSS file to inject into pages",
+      "alias": "custom-css"
+    },
+    "charsets_to_try": {
+      "type": "string",
+      "required": false,
+      "title": "Charsets to try",
+      "description": "List of charsets to try decode content when charset is not found",
+      "alias": "charsets-to-try"
+    },
+    "ignore_content_header_charsets": {
+      "type": "boolean",
+      "required": false,
+      "title": "Ignore Content Header Charsets",
+      "description": "Ignore the charsets specified in content headers - first bytes - typically because they are wrong.",
+      "alias": "ignore-content-header-charsets"
+    },
+    "content_header_bytes_length": {
+      "type": "integer",
+      "required": false,
+      "title": "Content Header Bytes Length",
+      "description": "How many bytes to consider when searching for content charsets in header (default is 1024).",
+      "alias": "content-header-bytes-length",
+      "min": 0
+    },
+    "ignore_http_header_charsets": {
+      "type": "boolean",
+      "required": false,
+      "title": "Ignore HTTP Header Charsets",
+      "description": "Ignore the charsets specified in HTTP `Content-Type` headers, typically because they are wrong.",
+      "alias": "ignore-http-header-charsets"
+    },
+    "encoding_aliases": {
+      "type": "string",
+      "required": false,
+      "title": "Encoding Aliases",
+      "description": "List of encoding/charset aliases to decode WARC content. Aliases are used when the encoding specified in upstream server exists in Python under a different name. This parameter is single string, multiple values are separated by a comma, like in alias1=encoding1,alias2=encoding2.",
+      "alias": "encoding-aliases"
+    },
+    "custom_behaviors": {
+      "type": "string",
+      "required": false,
+      "title": "Custom Behaviors",
+      "description": "JS code for custom behaviors to customize crawler. Single string with individual JS files URL/path separated by a comma.",
+      "alias": "custom-behaviours"
+    },
+    "zimit_progress_file": {
+      "type": "string",
+      "required": false,
+      "title": "Zimit Progress File",
+      "description": "Scraping progress file. Leave it as `/output/task_progress.json`",
+      "alias": "zimit-progress-file",
+      "pattern": "^/output/task_progress\\.json$"
+    },
+    "replay_viewer_source": {
+      "type": "url",
+      "required": false,
+      "title": "Replay Viewer Source",
+      "description": "URL from which to load the ReplayWeb.page replay viewer from",
+      "alias": "replay-viewer-source"
+    },
+    "zim_file": {
+      "type": "string",
+      "required": false,
+      "title": "ZIM filename",
+      "description": "ZIM file name (based on --name if not provided). Include {period} to insert date period dynamically",
+      "alias": "zim-file",
+      "pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+_)([a-z0-9\\-\\.]+_|)([\\d]{4}-[\\d]{2}|\\{period\\}).zim$",
+      "relaxedPattern": "^[A-Za-z0-9._-]+$"
+    },
+    "name": {
+      "type": "string",
+      "required": true,
+      "title": "ZIM name",
+      "description": "Name of the ZIM.",
+      "alias": "name",
+      "pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+)$",
+      "relaxedPattern": "^[A-Za-z0-9._-]+$"
+    },
+    "overwrite": {
+      "type": "boolean",
+      "required": false,
+      "title": "Overwrite",
+      "description": "Whether to overwrite existing ZIM file if it exists"
+    }
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
index b213161..e4e7696 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,17 +1,17 @@
 [build-system]
-requires = ["hatchling", "hatch-openzim==0.2.0"]
+requires = ["hatchling", "hatch-openzim"]
 build-backend = "hatchling.build"
 
 [project]
 name = "zimit"
-requires-python = ">=3.12,<3.13"
+requires-python = ">=3.13,<3.14"
 description = "Make ZIM file from any website through crawling"
 readme = "README.md"
 dependencies = [
   "requests==2.32.3",
   "inotify==0.2.10",
   "tld==0.13",
-  "warc2zim==2.1.3",
+  "warc2zim @ git+https://github.com/openzim/warc2zim@main",
 ]
 dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]
 
@@ -26,20 +26,20 @@ scripts = [
   "invoke==2.2.0",
 ]
 lint = [
-  "black==24.10.0",
-  "ruff==0.6.9",
+  "black==25.1.0",
+  "ruff==0.9.4",
 ]
 check = [
-  "pyright==1.1.383",
+  "pyright==1.1.393",
 ]
 test = [
-  "pytest==8.3.3",
-  "coverage==7.6.1",
+  "pytest==8.3.4",
+  "coverage==7.6.10",
 ]
 dev = [
-  "pre-commit==4.0.0",
-  "debugpy==1.8.6",
-  "selenium==4.25.0", # used in daily tests, convenient for dev purpose (autocompletion)
+  "pre-commit==4.1.0",
+  "debugpy==1.8.12",
+  "selenium==4.28.1", # used in daily tests, convenient for dev purpose (autocompletion)
   "zimit[scripts]",
   "zimit[lint]",
   "zimit[test]",
@@ -95,10 +95,10 @@ all = "inv checkall --args '{args}'"
 
 [tool.black]
 line-length = 88
-target-version = ['py312']
+target-version = ['py313']
 
 [tool.ruff]
-target-version = "py312"
+target-version = "py313"
 line-length = 88
 src = ["src"]
 
@@ -221,5 +221,5 @@ exclude_lines = [
 include = ["src", "tests", "tasks.py"]
 exclude = [".env/**", ".venv/**"]
 extraPaths = ["src"]
-pythonVersion = "3.12"
+pythonVersion = "3.13"
 typeCheckingMode="basic"
diff --git a/src/zimit/__about__.py b/src/zimit/__about__.py
index 0b167e6..281b1bb 100644
--- a/src/zimit/__about__.py
+++ b/src/zimit/__about__.py
@@ -1 +1 @@
-__version__ = "2.1.5"
+__version__ = "3.0.6-dev0"
diff --git a/src/zimit/constants.py b/src/zimit/constants.py
index f81905a..35baeb9 100644
--- a/src/zimit/constants.py
+++ b/src/zimit/constants.py
@@ -3,7 +3,8 @@ import logging
 from zimscraperlib.logging import getLogger
 
 EXIT_CODE_WARC2ZIM_CHECK_FAILED = 2
-EXIT_CODE_CRAWLER_LIMIT_HIT = 11
+EXIT_CODE_CRAWLER_SIZE_LIMIT_HIT = 14
+EXIT_CODE_CRAWLER_TIME_LIMIT_HIT = 15
 NORMAL_WARC2ZIM_EXIT_CODE = 100
 REQUESTS_TIMEOUT = 10
 
diff --git a/src/zimit/zimit.py b/src/zimit/zimit.py
index 44c6d4f..b205007 100755
--- a/src/zimit/zimit.py
+++ b/src/zimit/zimit.py
@@ -25,26 +25,28 @@ from zimscraperlib.uri import rebuild_uri
 
 from zimit.__about__ import __version__
 from zimit.constants import (
-    EXIT_CODE_CRAWLER_LIMIT_HIT,
+    EXIT_CODE_CRAWLER_SIZE_LIMIT_HIT,
+    EXIT_CODE_CRAWLER_TIME_LIMIT_HIT,
     EXIT_CODE_WARC2ZIM_CHECK_FAILED,
     NORMAL_WARC2ZIM_EXIT_CODE,
     logger,
 )
 from zimit.utils import download_file
 
+temp_root_dir: Path | None = None
+
 
 class ProgressFileWatcher:
-    def __init__(self, output_dir: Path, stats_path: Path):
-        self.crawl_path = output_dir / "crawl.json"
-        self.warc2zim_path = output_dir / "warc2zim.json"
-        self.stats_path = stats_path
-
-        if not self.stats_path.is_absolute():
-            self.stats_path = output_dir / self.stats_path
+    def __init__(
+        self, crawl_stats_path: Path, warc2zim_stats_path, zimit_stats_path: Path
+    ):
+        self.crawl_stats_path = crawl_stats_path
+        self.warc2zim_stats_path = warc2zim_stats_path
+        self.zimit_stats_path = zimit_stats_path
 
         # touch them all so inotify is not unhappy on add_watch
-        self.crawl_path.touch()
-        self.warc2zim_path.touch()
+        self.crawl_stats_path.touch()
+        self.warc2zim_stats_path.touch()
         self.process = None
 
     def stop(self):
@@ -56,40 +58,28 @@ class ProgressFileWatcher:
     def watch(self):
         self.process = Process(
             target=self.inotify_watcher,
-            args=(str(self.crawl_path), str(self.warc2zim_path), str(self.stats_path)),
+            args=(
+                str(self.crawl_stats_path),
+                str(self.warc2zim_stats_path),
+                str(self.zimit_stats_path),
+            ),
         )
         self.process.daemon = True
         self.process.start()
 
-    @staticmethod
-    def inotify_watcher(crawl_fpath: str, warc2zim_fpath: str, output_fpath: str):
+    def inotify_watcher(self, crawl_fpath: str, warc2zim_fpath: str, zimit_fpath: str):
         ino = inotify.adapters.Inotify()
         ino.add_watch(crawl_fpath, inotify.constants.IN_MODIFY)  # pyright: ignore
         ino.add_watch(warc2zim_fpath, inotify.constants.IN_MODIFY)  # pyright: ignore
 
-        class Limit:
-            def __init__(self):
-                self.max = self.hit = None
-
-            @property
-            def as_dict(self):
-                return {"max": self.max, "hit": self.hit}
-
-        # limit is only reported by crawl but needs to be reported up
-        limit = Limit()
-
-        def crawl_conv(data, limit):
+        def crawl_conv(data):
             # we consider crawl to be 90% of the workload so total = craw_total * 90%
-            # limit = {"max": data["limit"]["max"], "hit": data["limit"]["hit"]}
-            limit.max = data["limit"]["max"]
-            limit.hit = data["limit"]["hit"]
             return {
                 "done": data["crawled"],
                 "total": int(data["total"] / 0.9),
-                "limit": limit.as_dict,
             }
 
-        def warc2zim_conv(data, limit):
+        def warc2zim_conv(data):
             # we consider warc2zim to be 10% of the workload so
             # warc2zim_total = 10% and  total = 90 + warc2zim_total * 10%
             return {
@@ -98,7 +88,6 @@ class ProgressFileWatcher:
                     * (0.9 + (float(data["written"]) / data["total"]) / 10)
                 ),
                 "total": data["total"],
-                "limit": limit.as_dict,
             }
 
         for _, _, fpath, _ in ino.event_gen(yield_nones=False):  # pyright: ignore
@@ -108,128 +97,305 @@ class ProgressFileWatcher:
             # open input and output separatly as to not clear output on error
             with open(fpath) as ifh:
                 try:
-                    out = func(json.load(ifh), limit)
+                    out = func(json.load(ifh))
                 except Exception:  # nosec # noqa: S112
                     # simply ignore progress update should an error arise
                     # might be malformed input for instance
                     continue
                 if not out:
                     continue
-                with open(output_fpath, "w") as ofh:
+                with open(zimit_fpath, "w") as ofh:
                     json.dump(out, ofh)
 
 
+def cleanup():
+    if not temp_root_dir:
+        logger.warning("Temporary root dir not already set, cannot clean this up")
+        return
+    logger.info("")
+    logger.info("----------")
+    logger.info(f"Cleanup, removing temp dir: {temp_root_dir}")
+    shutil.rmtree(temp_root_dir)
+
+
+def cancel_cleanup():
+    logger.info(
+        f"Temporary files have been kept in {temp_root_dir}, please clean them"
+        " up manually once you don't need them anymore"
+    )
+    atexit.unregister(cleanup)
+
+
 def run(raw_args):
     parser = ArgumentParser(
         description="Run a browser-based crawl on the specified URL and convert to ZIM"
     )
 
-    parser.add_argument("-u", "--url", help="The URL to start crawling from")
-    parser.add_argument("--title", help="ZIM title")
-    parser.add_argument("--description", help="ZIM description")
+    parser.add_argument(
+        "--seeds",
+        help="The seed URL(s) to start crawling from. Multile seed URL must be "
+        "separated by a comma (usually not needed, these are just the crawl seeds). "
+        "First seed URL is used as ZIM homepage",
+    )
+
+    parser.add_argument("--title", help="WARC and ZIM title")
+    parser.add_argument("--description", help="WARC and ZIM description")
     parser.add_argument("--long-description", help="ZIM long description metadata")
 
     parser.add_argument(
-        "--urlFile",
-        help="If set, read a list of seed urls, one per line, from the specified",
+        "--seedFile",
+        help="If set, read a list of seed urls, one per line. Can be a local file or "
+        "the HTTP(s) URL to an online file.",
     )
 
-    parser.add_argument("-w", "--workers", type=int, help="Number of parallel workers")
+    parser.add_argument(
+        "-w", "--workers", type=int, help="Number of parallel workers. Default is 1."
+    )
+
+    parser.add_argument(
+        "--crawlId",
+        help="A user provided ID for this crawl or crawl configuration (can also be "
+        "set via CRAWL_ID env var, defaults to machine hostname)",
+    )
 
     parser.add_argument(
         "--waitUntil",
         help="Puppeteer page.goto() condition to wait for before continuing. One of "
         "load, domcontentloaded, networkidle0 or networkidle2, or a "
-        "comma-separated combination of those.",
-        default="load",
+        "comma-separated combination of those. Default is load,networkidle2",
     )
 
     parser.add_argument(
-        "--depth", help="The depth of the crawl for all seeds", type=int, default=-1
+        "--depth",
+        help="The depth of the crawl for all seeds. Default is -1 (infinite).",
+        type=int,
     )
 
     parser.add_argument(
         "--extraHops",
-        help="Number of extra 'hops' to follow, beyond the current scope",
+        help="Number of extra 'hops' to follow, beyond the current scope. "
+        "Default is 0.",
         type=int,
     )
 
-    parser.add_argument("--limit", help="Limit crawl to this number of pages", type=int)
+    parser.add_argument(
+        "--pageLimit",
+        help="Limit crawl to this number of pages. Default is 0 (no limit).",
+        type=int,
+    )
 
     parser.add_argument(
         "--maxPageLimit",
-        help="Maximum pages to crawl, overriding pageLimit if both are set",
+        help="Maximum pages to crawl, overriding pageLimit if both are set. Default is "
+        "0 (no limit)",
         type=int,
     )
 
     parser.add_argument(
-        "--timeout",
-        help="Timeout for each page to load (in seconds)",
+        "--pageLoadTimeout",
+        help="Timeout for each page to load (in seconds). Default is 90 secs.",
         type=int,
-        default=90,
     )
 
     parser.add_argument(
         "--scopeType",
         help="A predfined scope of the crawl. For more customization, "
-        "use 'custom' and set scopeIncludeRx regexes",
+        "use 'custom' and set scopeIncludeRx/scopeExcludeRx regexes. Default is custom"
+        "if scopeIncludeRx is set, prefix otherwise.",
         choices=["page", "page-spa", "prefix", "host", "domain", "any", "custom"],
     )
 
     parser.add_argument(
-        "--include",
-        help="Regex of page URLs that should be "
-        "included in the crawl (defaults to "
+        "--scopeIncludeRx",
+        help="Regex of page URLs that should be included in the crawl (defaults to "
         "the immediate directory of URL)",
     )
 
     parser.add_argument(
-        "--exclude",
+        "--scopeExcludeRx",
         help="Regex of page URLs that should be excluded from the crawl",
     )
 
+    parser.add_argument(
+        "--allowHashUrls",
+        help="Allow Hashtag URLs, useful for single-page-application crawling or "
+        "when different hashtags load dynamic content",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--selectLinks",
+        help="One or more selectors for extracting links, in the format "
+        "[css selector]->[property to use],[css selector]->@[attribute to use]",
+    )
+
+    parser.add_argument(
+        "--clickSelector",
+        help="Selector for elements to click when using the autoclick behavior. Default"
+        " is 'a'",
+    )
+
+    parser.add_argument(
+        "--blockRules",
+        help="Additional rules for blocking certain URLs from being loaded, by URL "
+        "regex and optionally via text match in an iframe",
+    )
+
+    parser.add_argument(
+        "--blockMessage",
+        help="If specified, when a URL is blocked, a record with this error message is"
+        " added instead",
+    )
+
+    parser.add_argument(
+        "--blockAds",
+        help="If set, block advertisements from being loaded (based on Stephen Black's"
+        " blocklist). Note that some bad domains are also blocked by zimit"
+        " configuration even if this option is not set.",
+    )
+
+    parser.add_argument(
+        "--adBlockMessage",
+        help="If specified, when an ad is blocked, a record with this error message is"
+        " added instead",
+    )
+
     parser.add_argument(
         "--collection",
         help="Collection name to crawl to (replay will be accessible "
-        "under this name in pywb preview) instead of crawl-@ts",
+        "under this name in pywb preview). Default is crawl-@ts.",
     )
 
     parser.add_argument(
-        "--allowHashUrls",
-        help="Allow Hashtag URLs, useful for "
-        "single-page-application crawling or "
-        "when different hashtags load dynamic "
-        "content",
+        "--headless",
+        help="Run in headless mode, otherwise start xvfb",
         action="store_true",
     )
 
     parser.add_argument(
-        "--lang",
-        help="if set, sets the language used by the browser, should be ISO 639 "
-        "language[-country] code",
+        "--driver",
+        help="Custom driver for the crawler, if any",
     )
 
     parser.add_argument(
-        "--zim-lang",
-        help="Language metadata of ZIM "
-        "(warc2zim --lang param). ISO-639-3 code. "
-        "Retrieved from homepage if found, fallback to `eng`",
+        "--generateCDX",
+        help="If set, generate index (CDXJ) for use with pywb after crawl is done",
+        action="store_true",
     )
 
+    parser.add_argument(
+        "--combineWARC",
+        help="If set, combine the warcs",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--rolloverSize",
+        help="If set, declare the rollover size. Default is 1000000000.",
+        type=int,
+    )
+
+    parser.add_argument(
+        "--generateWACZ",
+        help="If set, generate WACZ on disk",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--logging",
+        help="Crawler logging configuration",
+    )
+
+    parser.add_argument(
+        "--logLevel",
+        help="Comma-separated list of log levels to include in logs",
+    )
+
+    parser.add_argument(
+        "--logContext",
+        help="Comma-separated list of contexts to include in logs",
+        choices=[
+            "general",
+            "worker",
+            "recorder",
+            "recorderNetwork",
+            "writer",
+            "state",
+            "redis",
+            "storage",
+            "text",
+            "exclusion",
+            "screenshots",
+            "screencast",
+            "originOverride",
+            "healthcheck",
+            "browser",
+            "blocking",
+            "behavior",
+            "behaviorScript",
+            "jsError",
+            "fetch",
+            "pageStatus",
+            "memoryStatus",
+            "crawlStatus",
+            "links",
+            "sitemap",
+            "wacz",
+            "replay",
+            "proxy",
+        ],
+    )
+
+    parser.add_argument(
+        "--logExcludeContext",
+        help="Comma-separated list of contexts to NOT include in logs. Default is "
+        "recorderNetwork,jsError,screencast",
+        choices=[
+            "general",
+            "worker",
+            "recorder",
+            "recorderNetwork",
+            "writer",
+            "state",
+            "redis",
+            "storage",
+            "text",
+            "exclusion",
+            "screenshots",
+            "screencast",
+            "originOverride",
+            "healthcheck",
+            "browser",
+            "blocking",
+            "behavior",
+            "behaviorScript",
+            "jsError",
+            "fetch",
+            "pageStatus",
+            "memoryStatus",
+            "crawlStatus",
+            "links",
+            "sitemap",
+            "wacz",
+            "replay",
+            "proxy",
+        ],
+    )
+
+    parser.add_argument(
+        "--text",
+        help="Extract initial (default) or final text to pages.jsonl or WARC resource"
+        " record(s)",
+    )
+
+    # cwd is manipulated directly by zimit, based on --output / --build, we do not want
+    # to expose this setting
+
     parser.add_argument(
         "--mobileDevice",
         help="Emulate mobile device by name from "
         "https://github.com/puppeteer/puppeteer/blob/"
         "main/packages/puppeteer-core/src/common/Device.ts",
-        default="Pixel 2",
-    )
-
-    parser.add_argument(
-        "--noMobileDevice",
-        help="Do not emulate a mobile device (use at your own risk, behavior is"
-        "uncertain)",
-        action="store_true",
-        default=False,
     )
 
     parser.add_argument(
@@ -251,27 +417,71 @@ def run(raw_args):
         "(usually /sitemap.xml)",
     )
 
+    parser.add_argument(
+        "--sitemapFromDate",
+        help="If set, filter URLs from sitemaps to those greater than or equal to (>=)"
+        " provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)",
+    )
+
+    parser.add_argument(
+        "--sitemapToDate",
+        help="If set, filter URLs from sitemaps to those less than or equal to (<=) "
+        "provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)",
+    )
+
+    parser.add_argument(
+        "--statsFilename",
+        help="If set, output crawl stats as JSON to this file. Relative filename "
+        "resolves to output directory, see --output.",
+    )
+
+    parser.add_argument(
+        "--zimit-progress-file",
+        help="If set, output zimit stats as JSON to this file. Forces the creation of"
+        "crawler and warc2zim stats as well. If --statsFilename and/or "
+        "--warc2zim-progress-file are not set, default temporary files will be used. "
+        "Relative filename resolves to output directory, see --output.",
+    )
+
+    parser.add_argument(
+        "--warc2zim-progress-file",
+        help="If set, output warc2zim stats as JSON to this file. Relative filename "
+        "resolves to output directory, see --output.",
+    )
+
     parser.add_argument(
         "--behaviors",
-        help="Which background behaviors to enable on each page",
-        default="autoplay,autofetch,siteSpecific",
+        help="Which background behaviors to enable on each page. Default is autoplay,"
+        "autofetch,autoscroll,siteSpecific",
     )
 
     parser.add_argument(
         "--behaviorTimeout",
         help="If >0, timeout (in seconds) for in-page behavior will run on each page. "
-        "If 0, a behavior can run until finish",
+        "If 0, a behavior can run until finish. Default is 90.",
         type=int,
-        default=90,
     )
 
     parser.add_argument(
-        "--delay",
-        help="If >0, amount of time to sleep (in seconds) after behaviors "
-        "before moving on to next page",
+        "--postLoadDelay",
+        help="If >0, amount of time to sleep (in seconds) after page has loaded, before"
+        " taking screenshots / getting text / running behaviors. Default is 0.",
         type=int,
     )
 
+    parser.add_argument(
+        "--pageExtraDelay",
+        help="If >0, amount of time to sleep (in seconds) after behaviors "
+        "before moving on to next page. Default is 0.",
+        type=int,
+    )
+
+    parser.add_argument(
+        "--dedupPolicy",
+        help="Deduplication policy. Default is skip",
+        choices=["skip", "revisit", "keep"],
+    )
+
     parser.add_argument(
         "--profile",
         help="Path or HTTP(S) URL to tar.gz file which contains the browser profile "
@@ -279,8 +489,61 @@ def run(raw_args):
     )
 
     parser.add_argument(
-        "--sizeLimit",
-        help="If set, save state and exit if size limit exceeds this value",
+        "--screenshot",
+        help="Screenshot options for crawler. One of view, thumbnail, fullPage, "
+        "fullPageFinal or a comma-separated combination of those.",
+    )
+
+    parser.add_argument(
+        "--screencastPort",
+        help="If set to a non-zero value, starts an HTTP server with screencast "
+        "accessible on this port.",
+        type=int,
+    )
+
+    parser.add_argument(
+        "--screencastRedis",
+        help="If set, will use the state store redis pubsub for screencasting",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--warcInfo",
+        help="Optional fields added to the warcinfo record in combined WARCs",
+    )
+
+    parser.add_argument(
+        "--saveState",
+        help="If the crawl state should be serialized to the crawls/ directory. "
+        "Defaults to 'partial', only saved when crawl is interrupted",
+        choices=["never", "partial", "always"],
+    )
+
+    parser.add_argument(
+        "--saveStateInterval",
+        help="If save state is set to 'always', also save state during the crawl at "
+        "this interval (in seconds). Default to 300.",
+        type=int,
+    )
+
+    parser.add_argument(
+        "--saveStateHistory",
+        help="Number of save states to keep during the duration of a crawl. "
+        "Default to 5.",
+        type=int,
+    )
+
+    size_group = parser.add_mutually_exclusive_group()
+    size_group.add_argument(
+        "--sizeSoftLimit",
+        help="If set, save crawl state and stop crawl if WARC size exceeds this value. "
+        "ZIM will still be created.",
+        type=int,
+    )
+    size_group.add_argument(
+        "--sizeHardLimit",
+        help="If set, exit crawler and fail the scraper immediately if WARC size "
+        "exceeds this value",
         type=int,
     )
 
@@ -292,9 +555,17 @@ def run(raw_args):
         default=90,
     )
 
-    parser.add_argument(
-        "--timeLimit",
-        help="If set, save state and exit after time limit, in seconds",
+    time_group = parser.add_mutually_exclusive_group()
+    time_group.add_argument(
+        "--timeSoftLimit",
+        help="If set, save crawl state and stop crawl if WARC WARC(s) creation takes "
+        "longer than this value, in seconds. ZIM will still be created.",
+        type=int,
+    )
+    time_group.add_argument(
+        "--timeHardLimit",
+        help="If set, exit crawler and fail the scraper immediately if WARC(s) creation"
+        " takes longer than this value, in seconds",
         type=int,
     )
 
@@ -309,16 +580,150 @@ def run(raw_args):
         help="overwrite current crawl data: if set, existing collection directory "
         "will be deleted before crawl is started",
         action="store_true",
-        default=False,
+    )
+
+    parser.add_argument(
+        "--waitOnDone",
+        help="if set, wait for interrupt signal when finished instead of exiting",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--restartsOnError",
+        help="if set, assume will be restarted if interrupted, don't run post-crawl "
+        "processes on interrupt",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--netIdleWait",
+        help="If set, wait for network idle after page load and after behaviors are "
+        "done (in seconds). if -1 (default), determine based on scope.",
+        type=int,
+    )
+
+    parser.add_argument(
+        "--lang",
+        help="if set, sets the language used by the browser, should be ISO 639 "
+        "language[-country] code",
+    )
+
+    parser.add_argument(
+        "--originOverride",
+        help="if set, will redirect requests from each origin in key to origin in the "
+        "value, eg. --originOverride https://host:port=http://alt-host:alt-port",
+    )
+
+    parser.add_argument(
+        "--logErrorsToRedis",
+        help="If set, write error messages to redis",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--writePagesToRedis",
+        help="If set, write page objects to redis",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--maxPageRetries",
+        help="If set, number of times to retry a page that failed to load before page"
+        " is considered to have failed. Default is 2.",
+        type=int,
+    )
+
+    parser.add_argument(
+        "--failOnFailedSeed",
+        help="If set, crawler will fail with exit code 1 if any seed fails. When "
+        "combined with --failOnInvalidStatus, will result in crawl failing with exit "
+        "code 1 if any seed has a 4xx/5xx response",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--failOnFailedLimit",
+        help="If set, save state and exit if number of failed pages exceeds this value",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--failOnInvalidStatus",
+        help="If set, will treat pages with 4xx or 5xx response as failures. When "
+        "combined with --failOnFailedLimit or --failOnFailedSeed may result in crawl "
+        "failing due to non-200 responses",
+        action="store_true",
+    )
+
+    # customBehaviors not included because it has special handling
+    # debugAccessRedis not included due to custom redis engine in zimit
+
+    parser.add_argument(
+        "--debugAccessBrowser",
+        help="if set, allow debugging browser on port 9222 via CDP",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--warcPrefix",
+        help="prefix for WARC files generated, including WARCs added to WACZ",
+    )
+
+    parser.add_argument(
+        "--serviceWorker",
+        help="service worker handling: disabled, enabled or disabled-if-profile. "
+        "Default: disabled.",
+    )
+
+    parser.add_argument(
+        "--proxyServer",
+        help="if set, will use specified proxy server. Takes precedence over any env "
+        "var proxy settings",
+    )
+
+    parser.add_argument(
+        "--dryRun",
+        help="If true, no archive data is written to disk, only pages and logs (and "
+        "optionally saved state).",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--qaSource",
+        help="Required for QA mode. Path to the source WACZ or multi WACZ file for QA",
+    )
+
+    parser.add_argument(
+        "--qaDebugImageDiff",
+        help="if specified, will write crawl.png, replay.png and diff.png for each "
+        "page where they're different",
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--sshProxyPrivateKeyFile",
+        help="path to SSH private key for SOCKS5 over SSH proxy connection",
+    )
+
+    parser.add_argument(
+        "--sshProxyKnownHostsFile",
+        help="path to SSH known hosts file for SOCKS5 over SSH proxy connection",
     )
 
     parser.add_argument(
         "--keep",
-        help="If set, keep WARC files after crawl, don't delete",
+        help="In case of failure, WARC files and other temporary files (which are "
+        "stored as a subfolder of output directory) are always kept, otherwise "
+        "they are automatically deleted. Use this flag to always keep WARC files, "
+        "even in case of success.",
         action="store_true",
     )
 
-    parser.add_argument("--output", help="Output directory for ZIM", default="/output")
+    parser.add_argument(
+        "--output",
+        help="Output directory for ZIM. Default to /output.",
+        default="/output",
+    )
 
     parser.add_argument(
         "--build",
@@ -332,11 +737,6 @@ def run(raw_args):
         help="[warc2zim] Custom CSS file URL/path to inject into all articles",
     )
 
-    parser.add_argument(
-        "--statsFilename",
-        help="If set, output stats as JSON to this file",
-    )
-
     parser.add_argument(
         "--config",
         help="Path to YAML config file. If set, browsertrix-crawler will use this file"
@@ -351,8 +751,10 @@ def run(raw_args):
     )
 
     parser.add_argument(
-        "--logging",
-        help="Crawler logging configuration",
+        "--zim-lang",
+        help="Language metadata of ZIM "
+        "(warc2zim --lang param). ISO-639-3 code. "
+        "Retrieved from homepage if found, fallback to `eng`",
     )
 
     parser.add_argument(
@@ -369,7 +771,16 @@ def run(raw_args):
         "path/URLs separated by comma",
     )
 
-    zimit_args, warc2zim_args = parser.parse_known_args(raw_args)
+    parser.add_argument(
+        "--acceptable-crawler-exit-codes",
+        help="Non-zero crawler exit codes to consider as acceptable to continue with "
+        " conversion of WARC to ZIM. Flag partialZim will be set in statsFilename (if "
+        " used). Single value with individual error codes separated by comma",
+    )
+
+    # by design, all unknown args are for warc2zim ; known one are either for crawler
+    # or shared
+    known_args, warc2zim_args = parser.parse_known_args(raw_args)
 
     # pass a scraper suffix to warc2zim so that both zimit and warc2zim versions are
     # associated with the ZIM ; make it a CSV for easier parsing
@@ -377,39 +788,69 @@ def run(raw_args):
     warc2zim_args.append(f"zimit {__version__}")
 
     # pass url and output to warc2zim also
-    if zimit_args.output:
+    if known_args.output:
         warc2zim_args.append("--output")
-        warc2zim_args.append(zimit_args.output)
+        warc2zim_args.append(known_args.output)
 
-    url = zimit_args.url
+    user_agent_suffix = known_args.userAgentSuffix
+    if known_args.adminEmail:
+        user_agent_suffix += f" {known_args.adminEmail}"
 
-    user_agent_suffix = zimit_args.userAgentSuffix
-    if zimit_args.adminEmail:
-        user_agent_suffix += f" {zimit_args.adminEmail}"
+    # set temp dir to use for this crawl
+    global temp_root_dir  # noqa: PLW0603
+    if known_args.build:
+        # use build dir argument if passed
+        temp_root_dir = Path(known_args.build)
+        temp_root_dir.mkdir(parents=True, exist_ok=True)
+    else:
+        # make new randomized temp dir
+        temp_root_dir = Path(tempfile.mkdtemp(dir=known_args.output, prefix=".tmp"))
 
-    if url:
-        url = get_cleaned_url(url)
-        warc2zim_args.append("--url")
-        warc2zim_args.append(url)
+    seeds = []
+    if known_args.seeds:
+        seeds += [get_cleaned_url(url) for url in known_args.seeds.split(",")]
+    if known_args.seedFile:
+        if re.match(r"^https?\://", known_args.seedFile):
+            with tempfile.NamedTemporaryFile(
+                dir=temp_root_dir,
+                prefix="seeds_",
+                suffix=".txt",
+                delete_on_close=True,
+            ) as filename:
+                seed_file = Path(filename.name)
+                download_file(known_args.seedFile, seed_file)
+                seeds += [
+                    get_cleaned_url(url) for url in seed_file.read_text().splitlines()
+                ]
+        else:
+            seeds += [
+                get_cleaned_url(url)
+                for url in Path(known_args.seedFile).read_text().splitlines()
+            ]
+    warc2zim_args.append("--url")
+    warc2zim_args.append(seeds[0])
 
-    if zimit_args.custom_css:
-        warc2zim_args += ["--custom-css", zimit_args.custom_css]
+    if known_args.custom_css:
+        warc2zim_args += ["--custom-css", known_args.custom_css]
 
-    if zimit_args.title:
+    if known_args.title:
         warc2zim_args.append("--title")
-        warc2zim_args.append(zimit_args.title)
+        warc2zim_args.append(known_args.title)
 
-    if zimit_args.description:
+    if known_args.description:
         warc2zim_args.append("--description")
-        warc2zim_args.append(zimit_args.description)
+        warc2zim_args.append(known_args.description)
 
-    if zimit_args.long_description:
+    if known_args.long_description:
         warc2zim_args.append("--long-description")
-        warc2zim_args.append(zimit_args.long_description)
+        warc2zim_args.append(known_args.long_description)
 
-    if zimit_args.zim_lang:
+    if known_args.zim_lang:
         warc2zim_args.append("--lang")
-        warc2zim_args.append(zimit_args.zim_lang)
+        warc2zim_args.append(known_args.zim_lang)
+
+    if known_args.overwrite:
+        warc2zim_args.append("--overwrite")
 
     logger.info("----------")
     logger.info("Testing warc2zim args")
@@ -419,29 +860,17 @@ def run(raw_args):
         logger.info("Exiting, invalid warc2zim params")
         return EXIT_CODE_WARC2ZIM_CHECK_FAILED
 
-    # make temp dir for this crawl
-    if zimit_args.build:
-        temp_root_dir = Path(tempfile.mkdtemp(dir=zimit_args.build, prefix=".tmp"))
-    else:
-        temp_root_dir = Path(tempfile.mkdtemp(dir=zimit_args.output, prefix=".tmp"))
-
-    if not zimit_args.keep:
-
-        def cleanup():
-            logger.info("")
-            logger.info("----------")
-            logger.info(f"Cleanup, removing temp dir: {temp_root_dir}")
-            shutil.rmtree(temp_root_dir)
-
+    # only trigger cleanup when the keep argument is passed without a custom build dir.
+    if not known_args.build and not known_args.keep:
         atexit.register(cleanup)
 
     # copy / download custom behaviors to one single folder and configure crawler
-    if zimit_args.custom_behaviors:
+    if known_args.custom_behaviors:
         behaviors_dir = temp_root_dir / "custom-behaviors"
         behaviors_dir.mkdir()
         for custom_behavior in [
             custom_behavior.strip()
-            for custom_behavior in zimit_args.custom_behaviors.split(",")
+            for custom_behavior in known_args.custom_behaviors.split(",")
         ]:
             behaviors_file = tempfile.NamedTemporaryFile(
                 dir=behaviors_dir,
@@ -461,55 +890,100 @@ def run(raw_args):
                     f"to {behaviors_file.name}"
                 )
                 shutil.copy(custom_behavior, behaviors_file.name)
-        zimit_args.customBehaviors = str(behaviors_dir)
+        known_args.customBehaviors = str(behaviors_dir)
     else:
-        zimit_args.customBehaviors = None
+        known_args.customBehaviors = None
 
-    cmd_args = get_node_cmd_line(zimit_args)
-    if url:
-        cmd_args.append("--url")
-        cmd_args.append(url)
+    crawler_args = get_crawler_cmd_line(known_args)
+    for seed in seeds:
+        crawler_args.append("--seeds")
+        crawler_args.append(seed)
 
-    cmd_args.append("--userAgentSuffix")
-    cmd_args.append(user_agent_suffix)
+    crawler_args.append("--userAgentSuffix")
+    crawler_args.append(user_agent_suffix)
 
-    if not zimit_args.noMobileDevice:
-        cmd_args.append("--mobileDevice")
-        cmd_args.append(zimit_args.mobileDevice)
+    crawler_args.append("--cwd")
+    crawler_args.append(str(temp_root_dir))
 
-    cmd_args.append("--cwd")
-    cmd_args.append(str(temp_root_dir))
+    output_dir = Path(known_args.output)
+    warc2zim_stats_file = (
+        Path(known_args.warc2zim_progress_file)
+        if known_args.warc2zim_progress_file
+        else temp_root_dir / "warc2zim.json"
+    )
+    if not warc2zim_stats_file.is_absolute():
+        warc2zim_stats_file = output_dir / warc2zim_stats_file
+        warc2zim_stats_file.parent.mkdir(parents=True, exist_ok=True)
+    warc2zim_stats_file.unlink(missing_ok=True)
 
-    # setup inotify crawler progress watcher
-    if zimit_args.statsFilename:
+    crawler_stats_file = (
+        Path(known_args.statsFilename)
+        if known_args.statsFilename
+        else temp_root_dir / "crawl.json"
+    )
+    if not crawler_stats_file.is_absolute():
+        crawler_stats_file = output_dir / crawler_stats_file
+        crawler_stats_file.parent.mkdir(parents=True, exist_ok=True)
+    crawler_stats_file.unlink(missing_ok=True)
+
+    zimit_stats_file = (
+        Path(known_args.zimit_progress_file)
+        if known_args.zimit_progress_file
+        else temp_root_dir / "stats.json"
+    )
+    if not zimit_stats_file.is_absolute():
+        zimit_stats_file = output_dir / zimit_stats_file
+        zimit_stats_file.parent.mkdir(parents=True, exist_ok=True)
+    zimit_stats_file.unlink(missing_ok=True)
+
+    if known_args.zimit_progress_file:
+        # setup inotify crawler progress watcher
         watcher = ProgressFileWatcher(
-            Path(zimit_args.output), Path(zimit_args.statsFilename)
+            zimit_stats_path=zimit_stats_file,
+            crawl_stats_path=crawler_stats_file,
+            warc2zim_stats_path=warc2zim_stats_file,
+        )
+        logger.info(
+            f"Writing zimit progress to {watcher.zimit_stats_path}, crawler progress to"
+            f" {watcher.crawl_stats_path} and warc2zim progress to "
+            f"{watcher.warc2zim_stats_path}"
         )
-        logger.info(f"Writing progress to {watcher.stats_path}")
         # update crawler command
-        cmd_args.append("--statsFilename")
-        cmd_args.append(str(watcher.crawl_path))
+        crawler_args.append("--statsFilename")
+        crawler_args.append(str(crawler_stats_file))
         # update warc2zim command
         warc2zim_args.append("-v")
         warc2zim_args.append("--progress-file")
-        warc2zim_args.append(str(watcher.warc2zim_path))
+        warc2zim_args.append(str(warc2zim_stats_file))
         watcher.watch()
+    else:
+        if known_args.statsFilename:
+            logger.info(f"Writing crawler progress to {crawler_stats_file}")
+            crawler_args.append("--statsFilename")
+            crawler_args.append(str(crawler_stats_file))
+        if known_args.warc2zim_progress_file:
+            logger.info(f"Writing warc2zim progress to {warc2zim_stats_file}")
+            warc2zim_args.append("-v")
+            warc2zim_args.append("--progress-file")
+            warc2zim_args.append(str(warc2zim_stats_file))
 
-    cmd_line = " ".join(cmd_args)
+    cmd_line = " ".join(crawler_args)
 
     logger.info("")
     logger.info("----------")
     logger.info(
         f"Output to tempdir: {temp_root_dir} - "
-        f"{'will keep' if zimit_args.keep else 'will delete'}"
+        f"{'will keep' if known_args.keep else 'will delete'}"
     )
 
+    partial_zim = False
+
     # if warc files are passed, do not run browsertrix crawler but fetch the files if
     # they are provided as an HTTP URL + extract the archive if it is a tar.gz
     warc_files: list[Path] = []
-    if zimit_args.warcs:
+    if known_args.warcs:
         for warc_location in [
-            warc_location.strip() for warc_location in zimit_args.warcs.split(",")
+            warc_location.strip() for warc_location in known_args.warcs.split(",")
         ]:
             suffix = "".join(Path(urllib.parse.urlparse(warc_location).path).suffixes)
             if suffix not in {".tar", ".tar.gz", ".warc", ".warc.gz"}:
@@ -565,17 +1039,36 @@ def run(raw_args):
             warc_files.append(Path(extract_path))
 
     else:
-
         logger.info(f"Running browsertrix-crawler crawl: {cmd_line}")
-        crawl = subprocess.run(cmd_args, check=False)
-        if crawl.returncode == EXIT_CODE_CRAWLER_LIMIT_HIT:
-            logger.info("crawl interupted by a limit")
+        crawl = subprocess.run(crawler_args, check=False)
+        if (
+            crawl.returncode == EXIT_CODE_CRAWLER_SIZE_LIMIT_HIT
+            and known_args.sizeSoftLimit
+        ):
+            logger.info(
+                "Crawl size soft limit hit. Continuing with warc2zim conversion."
+            )
+            if known_args.zimit_progress_file:
+                partial_zim = True
+        elif (
+            crawl.returncode == EXIT_CODE_CRAWLER_TIME_LIMIT_HIT
+            and known_args.timeSoftLimit
+        ):
+            logger.info(
+                "Crawl time soft limit hit. Continuing with warc2zim conversion."
+            )
+            if known_args.zimit_progress_file:
+                partial_zim = True
         elif crawl.returncode != 0:
-            raise subprocess.CalledProcessError(crawl.returncode, cmd_args)
+            logger.error(
+                f"Crawl returned an error: {crawl.returncode}, scraper exiting"
+            )
+            cancel_cleanup()
+            return crawl.returncode
 
-        if zimit_args.collection:
+        if known_args.collection:
             warc_files = [
-                temp_root_dir.joinpath(f"collections/{zimit_args.collection}/archive/")
+                temp_root_dir.joinpath(f"collections/{known_args.collection}/archive/")
             ]
 
         else:
@@ -589,24 +1082,36 @@ def run(raw_args):
                 )
             elif len(warc_dirs) > 1:
                 logger.info(
-                    "Found many WARC files directories, only most recently modified one"
-                    " will be used"
+                    "Found many WARC files directories, combining pages from all "
+                    "of them"
                 )
                 for directory in warc_dirs:
                     logger.info(f"- {directory}")
-            warc_files = [warc_dirs[-1]]
+            warc_files = warc_dirs
 
     logger.info("")
     logger.info("----------")
     logger.info(
         f"Processing WARC files in/at "
-        f'{" ".join(str(warc_file) for warc_file in warc_files)}'
+        f"{' '.join(str(warc_file) for warc_file in warc_files)}"
     )
     warc2zim_args.extend(str(warc_file) for warc_file in warc_files)
 
     logger.info(f"Calling warc2zim with these args: {warc2zim_args}")
 
-    return warc2zim(warc2zim_args)
+    warc2zim_exit_code = warc2zim(warc2zim_args)
+
+    if known_args.zimit_progress_file:
+        stats_content = json.loads(zimit_stats_file.read_bytes())
+        stats_content["partialZim"] = partial_zim
+        zimit_stats_file.write_text(json.dumps(stats_content))
+
+    # also call cancel_cleanup when --keep, even if it is not supposed to be registered,
+    # so that we will display temporary files location just like in other situations
+    if warc2zim_exit_code or known_args.keep:
+        cancel_cleanup()
+
+    return warc2zim_exit_code
 
 
 def get_cleaned_url(url: str):
@@ -621,39 +1126,92 @@ def get_cleaned_url(url: str):
     return parsed_url.geturl()
 
 
-def get_node_cmd_line(args):
-    node_cmd = ["crawl", "--failOnFailedSeed"]
+def get_crawler_cmd_line(args):
+    """Build the command line for Browsertrix crawler"""
+    node_cmd = ["crawl"]
     for arg in [
-        "workers",
-        "waitUntil",
-        "urlFile",
         "title",
         "description",
+        "workers",
+        "crawlId",
+        "waitUntil",
         "depth",
         "extraHops",
-        "limit",
+        "pageLimit",
         "maxPageLimit",
-        "timeout",
+        "pageLoadTimeout",
         "scopeType",
-        "include",
-        "exclude",
+        "scopeIncludeRx",
+        "scopeExcludeRx",
         "collection",
         "allowHashUrls",
-        "lang",
+        "selectLinks",
+        "clickSelector",
+        "blockRules",
+        "blockMessage",
+        "blockAds",
+        "adBlockMessage",
+        "collection",
+        "headless",
+        "driver",
+        "generateCDX",
+        "combineWARC",
+        "rolloverSize",
+        "generateWACZ",
+        "logging",
+        "logLevel",
+        "logContext",
+        "logExcludeContext",
+        "text",
+        "mobileDevice",
         "userAgent",
+        # userAgentSuffix (manipulated),
         "useSitemap",
+        "sitemapFromDate",
+        "sitemapToDate",
+        # statsFilename (manipulated),
         "behaviors",
         "behaviorTimeout",
-        "delay",
+        "postLoadDelay",
+        "pageExtraDelay",
+        "dedupPolicy",
         "profile",
-        "sizeLimit",
+        "screenshot",
+        "screencastPort",
+        "screencastRedis",
+        "warcInfo",
+        "saveState",
+        "saveStateInterval",
+        "saveStateHistory",
+        "sizeSoftLimit",
+        "sizeHardLimit",
         "diskUtilization",
-        "timeLimit",
+        "timeSoftLimit",
+        "timeHardLimit",
         "healthCheckPort",
         "overwrite",
-        "config",
-        "logging",
+        "waitOnDone",
+        "restartsOnError",
+        "netIdleWait",
+        "lang",
+        "originOverride",
+        "logErrorsToRedis",
+        "writePagesToRedis",
+        "maxPageRetries",
+        "failOnFailedSeed",
+        "failOnFailedLimit",
+        "failOnInvalidStatus",
+        "debugAccessBrowser",
+        "warcPrefix",
+        "serviceWorker",
+        "proxyServer",
+        "dryRun",
+        "qaSource",
+        "qaDebugImageDiff",
+        "sshProxyPrivateKeyFile",
+        "sshProxyKnownHostsFile",
         "customBehaviors",
+        "config",
     ]:
         value = getattr(args, arg)
         if arg == "userAgent":
@@ -668,7 +1226,14 @@ def get_node_cmd_line(args):
                 continue
         if value is None or (isinstance(value, bool) and value is False):
             continue
-        node_cmd.append("--" + arg)
+        node_cmd.append(
+            "--"
+            + (
+                "sizeLimit"
+                if arg in ["sizeSoftLimit", "sizeHardLimit"]
+                else "timeLimit" if arg in ["timeSoftLimit", "timeHardLimit"] else arg
+            )
+        )
         if not isinstance(value, bool):
             node_cmd.append(str(value))
 
@@ -685,7 +1250,7 @@ def sigint_handler(*args):  # noqa: ARG001
 
 
 def zimit():
-    run(sys.argv[1:])
+    sys.exit(run(sys.argv[1:]))
 
 
 signal.signal(signal.SIGINT, sigint_handler)
diff --git a/tests-daily/Dockerfile b/tests-daily/Dockerfile
index f6118fe..22d45ef 100644
--- a/tests-daily/Dockerfile
+++ b/tests-daily/Dockerfile
@@ -1,5 +1,5 @@
 # Let's extract kiwix-tools as usual on alpine temporary build container
-FROM alpine:3.18 as kiwix-serve
+FROM alpine:3.21 as kiwix-serve
 LABEL org.opencontainers.image.source https://github.com/openzim/kiwix-tools
 
 # TARGETPLATFORM is injected by docker build
@@ -30,7 +30,7 @@ RUN set -e && \
     curl -k -L $url | tar -xz -C /kiwix-serve --strip-components 1
 
 # Build real "workload" container
-FROM python:3.12-slim-bookworm
+FROM python:3.13-slim-bookworm
 
 # Add kiwix-serve
 COPY --from=kiwix-serve /kiwix-serve /usr/local/bin
@@ -70,6 +70,6 @@ RUN rm /tmp/chrome-linux64.zip /tmp/chromedriver-linux64.zip /tmp/versions.json
 RUN \
    python -m pip install --no-cache-dir -U \
      pip \
-     selenium==4.23.0 \
-     pytest==8.2.2 \
+     selenium==4.28.1 \
+     pytest==8.3.4 \
 && mkdir -p /work
diff --git a/tests-integration/integration.py b/tests-integration/integration.py
index 16ab337..7e79f52 100644
--- a/tests-integration/integration.py
+++ b/tests-integration/integration.py
@@ -1,30 +1,55 @@
 import glob
 import json
 import os
+from pathlib import Path
 
+import pytest
 from warcio import ArchiveIterator
 from zimscraperlib.zim import Archive
 
 
-def test_is_file():
+@pytest.mark.parametrize(
+    "filename",
+    [
+        pytest.param("/output/tests_en_onepage.zim", id="onepage"),
+        pytest.param("/output/tests_en_sizesoftlimit.zim", id="sizesoftlimit"),
+        pytest.param("/output/tests_en_timesoftlimit.zim", id="timesoftlimit"),
+    ],
+)
+def test_zim_created(filename):
     """Ensure ZIM file exists"""
-    assert os.path.isfile("/output/isago.zim")
+    assert os.path.isfile(filename)
+
+
+@pytest.mark.parametrize(
+    "filename",
+    [
+        pytest.param("/output/tests_en_sizehardlimit.zim", id="sizehardlimit"),
+        pytest.param("/output/tests_en_timehardlimit.zim", id="timehardlimit"),
+    ],
+)
+def test_zim_not_created(filename):
+    """Ensure ZIM file does not exists"""
+    assert not os.path.exists(filename)
 
 
 def test_zim_main_page():
-    """Main page specified, http://isago.rskg.org/, was a redirect to https
+    """Main page specified, http://website.test.openzim.org/http-return-codes.html,
+    was a redirect to https
     Ensure main page is the redirected page"""
 
-    main_entry = Archive("/output/isago.zim").main_entry
+    main_entry = Archive(Path("/output/tests_en_onepage.zim")).main_entry
     assert main_entry.is_redirect
-    assert main_entry.get_redirect_entry().path == "isago.rskg.org/"
+    assert (
+        main_entry.get_redirect_entry().path
+        == "website.test.openzim.org/http-return-codes.html"
+    )
 
 
 def test_zim_scraper():
-    """Main page specified, http://isago.rskg.org/, was a redirect to https
-    Ensure main page is the redirected page"""
+    """Check content of scraper metadata"""
 
-    zim_fh = Archive("/output/isago.zim")
+    zim_fh = Archive(Path("/output/tests_en_onepage.zim"))
     scraper = zim_fh.get_text_metadata("Scraper")
     assert "zimit " in scraper
     assert "warc2zim " in scraper
@@ -33,18 +58,28 @@ def test_zim_scraper():
 
 def test_files_list():
     """Check that expected files are present in the ZIM at proper path"""
-    zim_fh = Archive("/output/isago.zim")
+    zim_fh = Archive(Path("/output/tests_en_onepage.zim"))
     for expected_entry in [
         "_zim_static/__wb_module_decl.js",
         "_zim_static/wombat.js",
         "_zim_static/wombatSetup.js",
-        "isago.rskg.org/",
-        "isago.rskg.org/a-propos",
-        "isago.rskg.org/conseils",
-        "isago.rskg.org/faq",
-        "isago.rskg.org/static/favicon256.png",
-        "isago.rskg.org/static/tarifs-isago.pdf",
-        "maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css",
+        "website.test.openzim.org/http-return-codes.html",
+        "website.test.openzim.org/200-response",
+        "website.test.openzim.org/201-response",
+        "website.test.openzim.org/202-response",
+        "website.test.openzim.org/301-external-redirect-ok",
+        "website.test.openzim.org/301-internal-redirect-ok",
+        "website.test.openzim.org/302-external-redirect-ok",
+        "website.test.openzim.org/302-internal-redirect-ok",
+        "website.test.openzim.org/307-external-redirect-ok",
+        "website.test.openzim.org/307-internal-redirect-ok",
+        "website.test.openzim.org/308-external-redirect-ok",
+        "website.test.openzim.org/308-internal-redirect-ok",
+        "website.test.openzim.org/http-return-codes.html",
+        "website.test.openzim.org/icons/favicon.ico",
+        "website.test.openzim.org/icons/site.webmanifest",
+        "website.test.openzim.org/internal_redirect_target.html",
+        "www.example.com/",
     ]:
         assert zim_fh.get_content(expected_entry)
 
@@ -71,24 +106,40 @@ def test_user_agent():
     assert found
 
 
-def test_stats_output():
-    with open("/output/crawl.json") as fh:
-        assert json.loads(fh.read()) == {
-            "crawled": 5,
-            "pending": 0,
-            "pendingPages": [],
-            "total": 5,
-            "failed": 0,
-            "limit": {"max": 0, "hit": False},
-        }
-    with open("/output/warc2zim.json") as fh:
-        assert json.loads(fh.read()) == {
-            "written": 7,
-            "total": 7,
-        }
-    with open("/output/stats.json") as fh:
-        assert json.loads(fh.read()) == {
-            "done": 7,
-            "total": 7,
-            "limit": {"max": 0, "hit": False},
-        }
+def test_stats_output_standard():
+    assert json.loads(Path("/output/crawl.json").read_bytes()) == {
+        "crawled": 17,
+        "pending": 0,
+        "pendingPages": [],
+        "total": 35,
+        "failed": 18,
+        "limit": {"max": 0, "hit": False},
+    }
+
+    assert json.loads(Path("/output/warc2zim.json").read_bytes()) == {
+        "written": 8,
+        "total": 8,
+    }
+
+    assert json.loads(Path("/output/stats.json").read_bytes()) == {
+        "done": 8,
+        "total": 8,
+        "partialZim": False,
+    }
+
+
+@pytest.mark.parametrize(
+    "filename",
+    [
+        pytest.param("/output/stats_sizesoftlimit.json", id="sizesoftlimit"),
+        pytest.param("/output/stats_timesoftlimit.json", id="timesoftlimit"),
+    ],
+)
+def test_stats_output_softlimit(filename):
+    file = Path(filename)
+    assert file.exists
+    content = json.loads(file.read_bytes())
+    assert "done" in content
+    assert "total" in content
+    assert "partialZim" in content
+    assert content["partialZim"]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..d51650d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,14 @@
+import pytest
+
+from zimit import zimit as app
+
+"""
+ cleanup disabled because atexit hooks run at the very end of the Python process
+ shutdown. By the time cleanup() is called, the logging module has already closed its
+ file streams.
+"""
+
+
+@pytest.fixture(autouse=True)
+def disable_zimit_cleanup(monkeypatch):
+    monkeypatch.setattr(app, "cleanup", lambda: None)
diff --git a/tests/data/example-response.warc b/tests/data/example-response.warc
new file mode 100644
index 0000000..143b947
Binary files /dev/null and b/tests/data/example-response.warc differ
diff --git a/tests/test_overwrite.py b/tests/test_overwrite.py
new file mode 100644
index 0000000..e41baca
--- /dev/null
+++ b/tests/test_overwrite.py
@@ -0,0 +1,83 @@
+import pathlib
+
+import pytest
+
+from zimit.zimit import run
+
+TEST_DATA_DIR = pathlib.Path(__file__).parent / "data"
+
+
+def test_overwrite_flag_behaviour(tmp_path):
+    zim_output = "overwrite-test.zim"
+    output_path = tmp_path / zim_output
+
+    # 1st run → creates file
+    result = run(
+        [
+            "--seeds",
+            "https://example.com",
+            "--warcs",
+            str(TEST_DATA_DIR / "example-response.warc"),
+            "--output",
+            str(tmp_path),
+            "--zim-file",
+            zim_output,
+            "--name",
+            "overwrite-test",
+        ]
+    )
+    assert result in (None, 100)
+    assert output_path.exists()
+
+    # 2nd run, no overwrite → should fail
+    with pytest.raises(SystemExit) as exc:
+        run(
+            [
+                "--seeds",
+                "https://example.com",
+                "--warcs",
+                str(TEST_DATA_DIR / "example-response.warc"),
+                "--output",
+                str(tmp_path),
+                "--zim-file",
+                zim_output,
+                "--name",
+                "overwrite-test",
+            ]
+        )
+    assert exc.value.code == 2
+
+    # 2nd run, no overwrite → should fail
+    with pytest.raises(SystemExit) as exc:
+        run(
+            [
+                "--seeds",
+                "https://example.com",
+                "--output",
+                str(tmp_path),
+                "--zim-file",
+                zim_output,
+                "--name",
+                "overwrite-test",
+            ]
+        )
+    assert exc.value.code == 2
+
+    # 3rd run, with overwrite → should succeed
+    result = run(
+        [
+            "--seeds",
+            "https://example.com",
+            "--warcs",
+            str(TEST_DATA_DIR / "example-response.warc"),
+            "--output",
+            str(tmp_path),
+            "--zim-file",
+            zim_output,
+            "--name",
+            "overwrite-test",
+            "--overwrite",
+        ]
+    )
+    assert result in (None, 100)
+    assert output_path.exists()