2025-12-31 12:33:15 +00:00
19 changed files with 278 additions and 2166 deletions
--- a/.github/workflows/DailyTests.yaml
+++ b/.github/workflows/DailyTests.yaml
@ -18,7 +18,7 @@ jobs:
        run: docker build -t local-zimit .

      - name: run crawl of test website
-        run: docker run -v $PWD/output:/output local-zimit zimit --seeds https://website.test.openzim.org/ --name tests_eng_test-website --zim-file tests_eng_test-website.zim
+        run: docker run -v $PWD/output:/output local-zimit zimit --url https://website.test.openzim.org/ --name tests_eng_test-website --zim-file tests_eng_test-website.zim

      - name: archive ZIM
        uses: actions/upload-artifact@v4
--- a/.github/workflows/Publish.yml
+++ b/.github/workflows/Publish.yml
@ -5,9 +5,8 @@ on:
    types: [published]

 jobs:
-  publish-amd64:
-    runs-on: ubuntu-24.04
-    name: "Publish for AMD64"
+   publish:
+    runs-on: ubuntu-22.04

    steps:
      - uses: actions/checkout@v4
@ -20,34 +19,11 @@ jobs:
          latest-on-tag: true
          restrict-to: openzim/zimit
          registries: ghcr.io
-          credentials: |
+          credentials:
            GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
            GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
          repo_description: auto
          repo_overview: auto
          platforms: |
            linux/amd64
-
-  # Disabled for now, see https://github.com/openzim/zimit/issues/463
-  # publish-arm64:
-  #   runs-on: ubuntu-24.04
-  #   name: "Publish for ARM64"
-  #
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #
-  #     - name: Build and push Docker image
-  #       uses: openzim/docker-publish-action@v10
-  #       with:
-  #         image-name: openzim/zimit
-  #         tag-pattern: /^v([0-9.]+)$/
-  #         latest-on-tag: true
-  #         restrict-to: openzim/zimit
-  #         registries: ghcr.io
-  #         credentials: |
-  #           GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
-  #           GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
-  #         repo_description: auto
-  #         repo_overview: auto
-  #         platforms: |
-  #           linux/arm64
+            linux/arm64
--- a/.github/workflows/PublishDockerDevImage.yaml
+++ b/.github/workflows/PublishDockerDevImage.yaml
@ -7,9 +7,8 @@ on:
  workflow_dispatch:

 jobs:
-  publish-amd64:
-    runs-on: ubuntu-24.04
-    name: "Publish for AMD64"
+  publish:
+    runs-on: ubuntu-22.04

    steps:
      - uses: actions/checkout@v4
@ -22,34 +21,11 @@ jobs:
          latest-on-tag: false
          restrict-to: openzim/zimit
          registries: ghcr.io
-          credentials: |
+          credentials:
            GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
            GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
          repo_description: auto
          repo_overview: auto
          platforms: |
            linux/amd64
-
-  # Disabled for now, see https://github.com/openzim/zimit/issues/463
-  # publish-arm64:
-  #   runs-on: ubuntu-24.04-arm
-  #   name: "Publish for ARM64"
-  #
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #
-  #     - name: Build and push Docker image
-  #       uses: openzim/docker-publish-action@v10
-  #       with:
-  #         image-name: openzim/zimit
-  #         manual-tag: dev
-  #         latest-on-tag: false
-  #         restrict-to: openzim/zimit
-  #         registries: ghcr.io
-  #         credentials: |
-  #           GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
-  #           GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
-  #         repo_description: auto
-  #         repo_overview: auto
-  #         platforms: |
-  #           linux/arm64
+            linux/arm64
--- a/.github/workflows/Tests.yaml
+++ b/.github/workflows/Tests.yaml
@ -57,25 +57,13 @@ jobs:
        uses: actions/checkout@v4

      - name: build image
-        run: docker build -t local-zimit .
+        run: docker build -t zimit .

      - name: ensure help display without issue
-        run: docker run -v $PWD/output:/output local-zimit zimit --help
+        run: docker run -v $PWD/output:/output zimit zimit --help

-      - name: run crawl with soft size limit
-        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --sizeSoftLimit 8192 --name tests_en_sizesoftlimit --zim-file tests_en_sizesoftlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_sizesoftlimit.json
-
-      - name: run crawl with hard size limit
-        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --sizeHardLimit 8192 --name tests_en_sizehardlimit --zim-file tests_en_sizehardlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_sizehardlimit.json || true
-
-      - name: run crawl with soft time limit
-        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --timeSoftLimit 1 --name tests_en_timesoftlimit --zim-file tests_en_timesoftlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_timesoftlimit.json
-
-      - name: run crawl with hard time limit
-        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/ --timeHardLimit 1 --name tests_en_timehardlimit --zim-file tests_en_timehardlimit.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats_timehardlimit.json || true
-
-      - name: run standard crawl
-        run: docker run -v $PWD/output:/output local-zimit zimit --seeds http://website.test.openzim.org/http-return-codes.html --name tests_en_onepage --zim-file tests_en_onepage.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --zimit-progress-file /output/stats.json --statsFilename /output/crawl.json --warc2zim-progress-file /output/warc2zim.json --keep
+      - name: run crawl
+        run: docker run -v $PWD/output:/output zimit zimit --url http://isago.rskg.org/ --name isago --zim-file isago.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --statsFilename /output/stats.json --keep

      - name: run integration test suite
-        run: docker run -v $PWD/tests-integration/integration.py:/app/integration.py -v $PWD/output:/output local-zimit bash -c "/app/zimit/bin/pip install pytest; /app/zimit/bin/pytest -v /app/integration.py"
+        run: docker run -v $PWD/tests-integration/integration.py:/app/integration.py -v $PWD/output:/output zimit bash -c "/app/zimit/bin/pip install pytest; /app/zimit/bin/pytest -v /app/integration.py"
--- a/.github/workflows/update-zim-offliner-definition.yaml
+++ b/.github/workflows/update-zim-offliner-definition.yaml
@ -1,45 +0,0 @@
-name: Update ZIMFarm Definitions
-
-on:
-  push:
-    branches: [main]
-    paths:
-      - "offliner-definition.json"
-  release:
-    types: [published]
-
-  workflow_dispatch:
-    inputs:
-      version:
-        description: "Version to publish"
-        required: false
-        default: "dev"
-
-jobs:
-  prepare-json:
-    runs-on: ubuntu-24.04
-    outputs:
-      offliner_definition_b64: ${{ steps.read-json.outputs.offliner_definition_b64 }}
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - id: read-json
-        run: |
-          if [ ! -f "offliner-definition.json" ]; then
-            echo "File not found!" >&2
-            exit 1
-          fi
-          json_b64=$(base64 -w0 <<< "$(jq -c . offliner-definition.json)")
-          echo "offliner_definition_b64=$json_b64" >> $GITHUB_OUTPUT
-  call-workflow:
-    needs: prepare-json
-    uses: openzim/overview/.github/workflows/update-zimfarm-offliner-definition.yaml@main
-    with:
-      version: ${{ github.event_name == 'release' && github.event.release.tag_name || (github.event.inputs.version || 'dev') }}
-      offliner: zimit
-      offliner_definition_b64: ${{ needs.prepare-json.outputs.offliner_definition_b64 }}
-    secrets:
-      zimfarm_ci_secret: ${{ secrets.ZIMFARM_CI_SECRET }}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -2,20 +2,20 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v5.0.0
+  rev: v4.4.0
  hooks:
  -   id: trailing-whitespace
  -   id: end-of-file-fixer
 - repo: https://github.com/psf/black
-  rev: "25.1.0"
+  rev: "24.10.0"
  hooks:
  -   id: black
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.9.4
+  rev: v0.6.9
  hooks:
  - id: ruff
 - repo: https://github.com/RobertCraigie/pyright-python
-  rev: v1.1.393
+  rev: v1.1.383
  hooks:
  - id: pyright
    name: pyright (system)
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,92 +5,6 @@ All notable changes to this project are documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.2.0).

-## [Unreleased]
-
-### Added
- Added `--overwrite` flag to overwrite existing ZIM file if it exists (#399)
-
-### Changed
- Fix issues preventing interrupted crawls from being resumed. (#499)
-  - Ensure build directory is used explicitly instead of a randomized subdirectory when passed, and pre-create it if it does not exist.
-  - Use all warc_dirs found instead of just the latest so interrupted crawls use all collected pages across runs when an explicit collections directory is not passed.
-  - Don't cleanup an explicitly passed build directory.
-
-## [3.0.5] - 2024-04-11
-
-### Changed
-
- Upgrade to browsertrix crawler 1.6.0 (#493)
-
-## [3.0.4] - 2024-04-04
-
-### Changed
-
- Upgrade to browsertrix crawler 1.5.10 (#491)
-
-## [3.0.3] - 2024-02-28
-
-### Changed
-
- Upgrade to browsertrix crawler 1.5.7 (#483)
-
-## [3.0.2] - 2024-02-27
-
-### Changed
-
- Upgrade to browsertrix crawler 1.5.6 (#482)
-
-## [3.0.1] - 2024-02-24
-
-### Changed
-
- Upgrade to browsertrix crawler 1.5.4 (#476)
-
-## [3.0.0] - 2024-02-17
-
-### Changed
-
- Change solution to report partial ZIM to the Zimfarm and other clients (#304)
- Keep temporary folder when crawler or warc2zim fails, even if not asked for (#468)
- Add many missing Browsertrix Crawler arguments ; drop default overrides by zimit ; drop `--noMobileDevice` setting (not needed anymore) (#433)
- Document all Browsertrix Crawler default arguments values (#416)
- Use preferred Browsertrix Crawler arguments names: (part of #471)
-  - `--seeds` instead of `--url`
-  - `--seedFile` instead of `--urlFile`
-  - `--pageLimit` instead of `--limit`
-  - `--pageLoadTimeout` instead of `--timeout`
-  - `--scopeIncludeRx` instead of `--include`
-  - `--scopeExcludeRx` instead of `--exclude`
-  - `--pageExtraDelay` instead of `--delay`
- Remove confusion between zimit, warc2zim and crawler stats filenames (part of #471)
-  - `--statsFilename` is now the crawler stats file (since it is the same name, just like other arguments)
-  - `--zimit-progress-file` is now the zimit stats location
-  - `--warc2zim-progress-file` is the warc2zim stats location
-  - all are optional values, if not set and needed temporary files are used
-
-### Fixed
-
- Do not create the ZIM when crawl is incomplete (#444)
-
-## [2.1.8] - 2024-02-07
-
-### Changed
-
- Upgrade to browsertrix crawler 1.5.1, Python 3.13 and others (#462 + #464)
-
-## [2.1.7] - 2024-01-10
-
-### Changed
-
- Upgrade to browsertrix crawler 1.4.2 (#450)
- Upgrade to warc2zim 2.2.0
-
-## [2.1.6] - 2024-11-07
-
-### Changed
-
- Upgrade to browsertrix crawler 1.3.5 (#426)
-
 ## [2.1.5] - 2024-11-01

 ### Changed
--- a/11
+++ b/11
@ -1,16 +1,13 @@
-FROM webrecorder/browsertrix-crawler:1.6.0
-LABEL org.opencontainers.image.source=https://github.com/openzim/zimit
-
-# add deadsnakes ppa for latest Python on Ubuntu
-RUN add-apt-repository ppa:deadsnakes/ppa -y
+FROM webrecorder/browsertrix-crawler:1.3.4
+LABEL org.opencontainers.image.source https://github.com/openzim/zimit

 RUN apt-get update \
 && apt-get install -qqy --no-install-recommends \
      libmagic1 \
-      python3.13-venv \
+      python3.12-venv \
 && rm -rf /var/lib/apt/lists/* \
 # python setup (in venv not to conflict with browsertrix)
- && python3.13 -m venv /app/zimit \
+ && python3.12 -m venv /app/zimit \
 # placeholder (default output location)
 && mkdir -p /output \
 # disable chrome upgrade
--- a/README.md
+++ b/README.md
@ -1,15 +1,15 @@
 Zimit
 =====

-Zimit is a scraper allowing to create [ZIM file](https://en.wikipedia.org/wiki/ZIM_(file_format)) from any Web site.
+Zimit is a scraper allowing to create ZIM file from any Web site.

 [![CodeFactor](https://www.codefactor.io/repository/github/openzim/zimit/badge)](https://www.codefactor.io/repository/github/openzim/zimit)
 [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
-[![Docker](https://ghcr-badge.egpl.dev/openzim/zimit/latest_tag?label=docker)](https://ghcr.io/openzim/zimit)
+[![Docker](https://ghcr-badge.deta.dev/openzim/zimit/latest_tag?label=docker)](https://ghcr.io/openzim/zimit)

 Zimit adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing).

-Zimit has implemented openZIM's [Python bootstrap, conventions and policies](https://github.com/openzim/_python-bootstrap/blob/main/docs/Policy.md) **v1.0.1**.
+Zimit has implemented openZIM's [Python bootstrap, conventions and policies](https://github.com/openzim/_python-bootstrap/docs/Policy.md) **v1.0.1**.

 Capabilities and known limitations
 --------------------
@ -38,23 +38,24 @@ Usage

 `zimit` is intended to be run in Docker. Docker image is published at https://github.com/orgs/openzim/packages/container/package/zimit.

-The image accepts the following parameters, **as well as any of the [Browsertrix crawler](https://crawler.docs.browsertrix.com/user-guide/cli-options/) and [warc2zim](https://github.com/openzim/warc2zim) ones**:
+The image accepts the following parameters, **as well as any of the [warc2zim](https://github.com/openzim/warc2zim) ones**; useful for setting metadata, for instance:

- Required: `--seeds URL` - the url to start crawling from ; multiple URLs can be separated by a comma (even if **usually not needed**, these are just the **seeds** of the crawl) ; first seed URL is used as ZIM homepage
+- Required: `--url URL` - the url to be crawled
 - Required: `--name` - Name of ZIM file
 - `--output` - output directory (defaults to `/output`)
- `--pageLimit U` - Limit capture to at most U URLs
- `--scopeExcludeRx <regex>` - skip URLs that match the regex from crawling. Can be specified multiple times. An example is `--scopeExcludeRx="(\?q=|signup-landing\?|\?cid=)"`, where URLs that contain either `?q=` or `signup-landing?` or `?cid=` will be excluded.
+- `--limit U` - Limit capture to at most U URLs
+- `--behaviors` - Control which browsertrix behaviors are ran (defaults to `autoplay,autofetch,siteSpecific`, adding `autoscroll` to the list is possible to automatically scroll the pages and fetch resources which are lazy loaded)
+- `--exclude <regex>` - skip URLs that match the regex from crawling. Can be specified multiple times. An example is `--exclude="(\?q=|signup-landing\?|\?cid=)"`, where URLs that contain either `?q=` or `signup-landing?` or `?cid=` will be excluded.
 - `--workers N` - number of crawl workers to be run in parallel
- `--waitUntil` - Puppeteer setting for how long to wait for page load. See [page.goto waitUntil options](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options). The default is `load`, but for static sites, `--waitUntil domcontentloaded` may be used to speed up the crawl (to avoid waiting for ads to load for example).
- `--keep` - in case of failure, WARC files and other temporary files (which are stored as a subfolder of output directory) are always kept, otherwise they are automatically deleted. Use this flag to always keep WARC files, even in case of success.
+- `--wait-until` - Puppeteer setting for how long to wait for page load. See [page.goto waitUntil options](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagegotourl-options). The default is `load`, but for static sites, `--wait-until domcontentloaded` may be used to speed up the crawl (to avoid waiting for ads to load for example).
+- `--keep` - if set, keep the WARC files in a temp directory inside the output directory

 Example command:

 ```bash
 docker run ghcr.io/openzim/zimit zimit --help
 docker run ghcr.io/openzim/zimit warc2zim --help
-docker run  -v /output:/output ghcr.io/openzim/zimit zimit --seeds URL --name myzimfile
+docker run  -v /output:/output ghcr.io/openzim/zimit zimit --url URL --name myzimfile
 ```

 **Note**: Image automatically filters out a large number of ads by using the 3 blocklists from [anudeepND](https://github.com/anudeepND/blacklist). If you don't want this filtering, disable the image's entrypoint in your container (`docker run --entrypoint="" ghcr.io/openzim/zimit ...`).
--- a/offliner-definition.json
+++ b/offliner-definition.json
@ -1,981 +0,0 @@
-{
-  "offliner_id": "zimit",
-  "stdOutput": true,
-  "stdStats": "zimit-progress-file",
-  "flags": {
-    "seeds": {
-      "type": "string",
-      "required": false,
-      "title": "Seeds",
-      "description": "The seed URL(s) to start crawling from. Multile seed URL must be separated by a comma (usually not needed, these are just the crawl seeds). First seed URL is used as ZIM homepage"
-    },
-    "seed_file": {
-      "type": "string",
-      "required": false,
-      "title": "Seed File",
-      "description": "If set, read a list of seed urls, one per line. HTTPS URL to an online file."
-    },
-    "lang": {
-      "type": "string",
-      "required": false,
-      "title": "Browser Language",
-      "description": "If set, sets the language used by the browser, should be ISO 639 language[-country] code, e.g. `en` or `en-GB`"
-    },
-    "title": {
-      "type": "string",
-      "required": false,
-      "title": "Title",
-      "description": "Custom title for your ZIM. Defaults to title of main page",
-      "minLength": 1,
-      "maxLength": 30
-    },
-    "description": {
-      "type": "string",
-      "required": false,
-      "title": "Description",
-      "description": "Description for ZIM",
-      "minLength": 1,
-      "maxLength": 80
-    },
-    "favicon": {
-      "type": "blob",
-      "kind": "image",
-      "required": false,
-      "title": "Illustration",
-      "description": "URL for Illustration. "
-    },
-    "tags": {
-      "type": "string",
-      "required": false,
-      "title": "ZIM Tags",
-      "description": "Single string with individual tags separated by a semicolon."
-    },
-    "creator": {
-      "type": "string",
-      "required": false,
-      "title": "Creator",
-      "description": "Name of content creator"
-    },
-    "publisher": {
-      "type": "string",
-      "required": false,
-      "title": "Publisher",
-      "isPublisher": true,
-      "description": "Custom publisher name (ZIM metadata). openZIM otherwise"
-    },
-    "source": {
-      "type": "string",
-      "required": false,
-      "title": "Source",
-      "description": "Source name/URL of content"
-    },
-    "workers": {
-      "type": "integer",
-      "required": false,
-      "title": "Workers",
-      "description": "The number of workers to run in parallel. Defaults to 1",
-      "min": 1
-    },
-    "wait_until": {
-      "type": "string",
-      "required": false,
-      "title": "WaitUntil",
-      "description": "Puppeteer page.goto() condition to wait for before continuing. One of load, domcontentloaded, networkidle0 or networkidle2, or a comma-separated combination of those. Default is load,networkidle2"
-    },
-    "extra_hops": {
-      "type": "integer",
-      "required": false,
-      "title": "Extra Hops",
-      "description": "Number of extra 'hops' to follow, beyond the current scope. Default is 0",
-      "min": 0
-    },
-    "page_limit": {
-      "type": "integer",
-      "required": false,
-      "title": "Page Limit",
-      "description": "Limit crawl to this number of pages. Default is 0 (no-limit).",
-      "min": 0
-    },
-    "max_page_limit": {
-      "type": "integer",
-      "required": false,
-      "title": "Max Page Limit",
-      "description": "Maximum pages to crawl, overriding pageLimit if both are set. Default is 0 (no-limit)",
-      "min": 0
-    },
-    "page_load_timeout": {
-      "type": "integer",
-      "required": false,
-      "title": "Page Load Timeout",
-      "description": "Timeout for each page to load (in seconds). Default is 90",
-      "min": 0
-    },
-    "scope_type": {
-      "type": "string-enum",
-      "required": false,
-      "title": "Scope Type",
-      "description": "A predfined scope of the crawl. For more customization, use 'custom' and set scopeIncludeRx/scopeExcludeRx regexes. Default is custom if scopeIncludeRx is set, prefix otherwise.",
-      "choices": [
-        {
-          "title": "Page",
-          "value": "page"
-        },
-        {
-          "title": "Page SPA",
-          "value": "page-spa"
-        },
-        {
-          "title": "Prefix",
-          "value": "prefix"
-        },
-        {
-          "title": "Host",
-          "value": "host"
-        },
-        {
-          "title": "Domain",
-          "value": "domain"
-        },
-        {
-          "title": "Any",
-          "value": "any"
-        },
-        {
-          "title": "Custom",
-          "value": "custom"
-        }
-      ]
-    },
-    "scope_include_rx": {
-      "type": "string",
-      "required": false,
-      "title": "Scope Include Regex",
-      "description": "Regex of page URLs that should be included in the crawl (defaults to the immediate directory of seed)"
-    },
-    "scope_exclude_rx": {
-      "type": "string",
-      "required": false,
-      "title": "Scope Exclude Regex",
-      "description": "Regex of page URLs that should be excluded from the crawl"
-    },
-    "allow_hash_urls": {
-      "type": "boolean",
-      "required": false,
-      "title": "Allow Hashtag URLs",
-      "description": "Allow Hashtag URLs, useful for single-page-application crawling or when different hashtags load dynamic content"
-    },
-    "mobile_device": {
-      "type": "string-enum",
-      "required": false,
-      "title": "As device",
-      "description": "Device to crawl as. See Pupeeter's Device.ts for a list",
-      "choices": [
-        {
-          "title": "Blackberry Playbook",
-          "value": "Blackberry PlayBook"
-        },
-        {
-          "title": "Blackberry Playbook Landscape",
-          "value": "Blackberry PlayBook landscape"
-        },
-        {
-          "title": "Blackberry Z30",
-          "value": "BlackBerry Z30"
-        },
-        {
-          "title": "Blackberry Z30 Landscape",
-          "value": "BlackBerry Z30 landscape"
-        },
-        {
-          "title": "Galaxy Note 3",
-          "value": "Galaxy Note 3"
-        },
-        {
-          "title": "Galaxy Note 3 Landscape",
-          "value": "Galaxy Note 3 landscape"
-        },
-        {
-          "title": "Galaxy Note II",
-          "value": "Galaxy Note II"
-        },
-        {
-          "title": "Galaxy Note II Landscape",
-          "value": "Galaxy Note II landscape"
-        },
-        {
-          "title": "Galaxy S III",
-          "value": "Galaxy S III"
-        },
-        {
-          "title": "Galaxy S III Landscape",
-          "value": "Galaxy S III landscape"
-        },
-        {
-          "title": "Galaxy S5",
-          "value": "Galaxy S5"
-        },
-        {
-          "title": "Galaxy S5 Landscape",
-          "value": "Galaxy S5 landscape"
-        },
-        {
-          "title": "Galaxy S8",
-          "value": "Galaxy S8"
-        },
-        {
-          "title": "Galaxy S8 Landscape",
-          "value": "Galaxy S8 landscape"
-        },
-        {
-          "title": "Galaxy S9 Plus",
-          "value": "Galaxy S9+"
-        },
-        {
-          "title": "Galaxy S9 Plus Landscape",
-          "value": "Galaxy S9+ landscape"
-        },
-        {
-          "title": "Galaxy Tab S4",
-          "value": "Galaxy Tab S4"
-        },
-        {
-          "title": "Galaxy Tab S4 Landscape",
-          "value": "Galaxy Tab S4 landscape"
-        },
-        {
-          "title": "iPad",
-          "value": "iPad"
-        },
-        {
-          "title": "iPad Landscape",
-          "value": "iPad landscape"
-        },
-        {
-          "title": "iPad Gen 6",
-          "value": "iPad (gen 6)"
-        },
-        {
-          "title": "iPad Gen 6 Landscape",
-          "value": "iPad (gen 6) landscape"
-        },
-        {
-          "title": "iPad Gen 7",
-          "value": "iPad (gen 7)"
-        },
-        {
-          "title": "iPad Gen 7 Landscape",
-          "value": "iPad (gen 7) landscape"
-        },
-        {
-          "title": "iPad Mini",
-          "value": "iPad Mini"
-        },
-        {
-          "title": "iPad Mini Landscape",
-          "value": "iPad Mini landscape"
-        },
-        {
-          "title": "iPad Pro",
-          "value": "iPad Pro"
-        },
-        {
-          "title": "iPad Pro Landscape",
-          "value": "iPad Pro landscape"
-        },
-        {
-          "title": "iPad Pro 11",
-          "value": "iPad Pro 11"
-        },
-        {
-          "title": "iPad Pro 11 Landscape",
-          "value": "iPad Pro 11 landscape"
-        },
-        {
-          "title": "iPhone 4",
-          "value": "iPhone 4"
-        },
-        {
-          "title": "iPhone 4 Landscape",
-          "value": "iPhone 4 landscape"
-        },
-        {
-          "title": "iPhone 5",
-          "value": "iPhone 5"
-        },
-        {
-          "title": "iPhone 5 Landscape",
-          "value": "iPhone 5 landscape"
-        },
-        {
-          "title": "iPhone 6",
-          "value": "iPhone 6"
-        },
-        {
-          "title": "iPhone 6 Landscape",
-          "value": "iPhone 6 landscape"
-        },
-        {
-          "title": "iPhone 6 Plus",
-          "value": "iPhone 6 Plus"
-        },
-        {
-          "title": "iPhone 6 Plus Landscape",
-          "value": "iPhone 6 Plus landscape"
-        },
-        {
-          "title": "iPhone 7",
-          "value": "iPhone 7"
-        },
-        {
-          "title": "iPhone 7 Landscape",
-          "value": "iPhone 7 landscape"
-        },
-        {
-          "title": "iPhone 7 Plus",
-          "value": "iPhone 7 Plus"
-        },
-        {
-          "title": "iPhone 7 Plus Landscape",
-          "value": "iPhone 7 Plus landscape"
-        },
-        {
-          "title": "iPhone 8",
-          "value": "iPhone 8"
-        },
-        {
-          "title": "iPhone 8 Landscape",
-          "value": "iPhone 8 landscape"
-        },
-        {
-          "title": "iPhone 8 Plus",
-          "value": "iPhone 8 Plus"
-        },
-        {
-          "title": "iPhone 8 Plus Landscape",
-          "value": "iPhone 8 Plus landscape"
-        },
-        {
-          "title": "iPhone SE",
-          "value": "iPhone SE"
-        },
-        {
-          "title": "iPhone SE Landscape",
-          "value": "iPhone SE landscape"
-        },
-        {
-          "title": "iPhone X",
-          "value": "iPhone X"
-        },
-        {
-          "title": "iPhone X Landscape",
-          "value": "iPhone X landscape"
-        },
-        {
-          "title": "iPhone XR",
-          "value": "iPhone XR"
-        },
-        {
-          "title": "iPhone XR Landscape",
-          "value": "iPhone XR landscape"
-        },
-        {
-          "title": "iPhone 11",
-          "value": "iPhone 11"
-        },
-        {
-          "title": "iPhone 11 Landscape",
-          "value": "iPhone 11 landscape"
-        },
-        {
-          "title": "iPhone 11 Pro",
-          "value": "iPhone 11 Pro"
-        },
-        {
-          "title": "iPhone 11 Pro Landscape",
-          "value": "iPhone 11 Pro landscape"
-        },
-        {
-          "title": "iPhone 11 Pro Max",
-          "value": "iPhone 11 Pro Max"
-        },
-        {
-          "title": "iPhone 11 Pro Max Landscape",
-          "value": "iPhone 11 Pro Max landscape"
-        },
-        {
-          "title": "iPhone 12",
-          "value": "iPhone 12"
-        },
-        {
-          "title": "iPhone 12 Landscape",
-          "value": "iPhone 12 landscape"
-        },
-        {
-          "title": "iPhone 12 Pro",
-          "value": "iPhone 12 Pro"
-        },
-        {
-          "title": "iPhone 12 Pro Landscape",
-          "value": "iPhone 12 Pro landscape"
-        },
-        {
-          "title": "iPhone 12 Pro Max",
-          "value": "iPhone 12 Pro Max"
-        },
-        {
-          "title": "iPhone 12 Pro Max Landscape",
-          "value": "iPhone 12 Pro Max landscape"
-        },
-        {
-          "title": "iPhone 12 Mini",
-          "value": "iPhone 12 Mini"
-        },
-        {
-          "title": "iPhone 12 Mini Landscape",
-          "value": "iPhone 12 Mini landscape"
-        },
-        {
-          "title": "iPhone 13",
-          "value": "iPhone 13"
-        },
-        {
-          "title": "iPhone 13 Landscape",
-          "value": "iPhone 13 landscape"
-        },
-        {
-          "title": "iPhone 13 Pro",
-          "value": "iPhone 13 Pro"
-        },
-        {
-          "title": "iPhone 13 Pro Landscape",
-          "value": "iPhone 13 Pro landscape"
-        },
-        {
-          "title": "iPhone 13 Pro Max",
-          "value": "iPhone 13 Pro Max"
-        },
-        {
-          "title": "iPhone 13 Pro Max Landscape",
-          "value": "iPhone 13 Pro Max landscape"
-        },
-        {
-          "title": "iPhone 13 Mini",
-          "value": "iPhone 13 Mini"
-        },
-        {
-          "title": "iPhone 13 Mini Landscape",
-          "value": "iPhone 13 Mini landscape"
-        },
-        {
-          "title": "Jio Phone 2",
-          "value": "JioPhone 2"
-        },
-        {
-          "title": "Jio Phone 2 Landscape",
-          "value": "JioPhone 2 landscape"
-        },
-        {
-          "title": "Kindle Fire HDX",
-          "value": "Kindle Fire HDX"
-        },
-        {
-          "title": "Kindle Fire HDX Landscape",
-          "value": "Kindle Fire HDX landscape"
-        },
-        {
-          "title": "LG Optimus L70",
-          "value": "LG Optimus L70"
-        },
-        {
-          "title": "LG Optimus L70 Landscape",
-          "value": "LG Optimus L70 landscape"
-        },
-        {
-          "title": "Microsoft Lumia 550",
-          "value": "Microsoft Lumia 550"
-        },
-        {
-          "title": "Microsoft Lumia 950",
-          "value": "Microsoft Lumia 950"
-        },
-        {
-          "title": "Microsoft Lumia 950 Landscape",
-          "value": "Microsoft Lumia 950 landscape"
-        },
-        {
-          "title": "Nexus 10",
-          "value": "Nexus 10"
-        },
-        {
-          "title": "Nexus 10 Landscape",
-          "value": "Nexus 10 landscape"
-        },
-        {
-          "title": "Nexus 4",
-          "value": "Nexus 4"
-        },
-        {
-          "title": "Nexus 4 Landscape",
-          "value": "Nexus 4 landscape"
-        },
-        {
-          "title": "Nexus 5",
-          "value": "Nexus 5"
-        },
-        {
-          "title": "Nexus 5 Landscape",
-          "value": "Nexus 5 landscape"
-        },
-        {
-          "title": "Nexus 5X",
-          "value": "Nexus 5X"
-        },
-        {
-          "title": "Nexus 5X Landscape",
-          "value": "Nexus 5X landscape"
-        },
-        {
-          "title": "Nexus 6",
-          "value": "Nexus 6"
-        },
-        {
-          "title": "Nexus 6 Landscape",
-          "value": "Nexus 6 landscape"
-        },
-        {
-          "title": "Nexus 6P",
-          "value": "Nexus 6P"
-        },
-        {
-          "title": "Nexus 6P Landscape",
-          "value": "Nexus 6P landscape"
-        },
-        {
-          "title": "Nexus 7",
-          "value": "Nexus 7"
-        },
-        {
-          "title": "Nexus 7 Landscape",
-          "value": "Nexus 7 landscape"
-        },
-        {
-          "title": "Nokia Lumia 520",
-          "value": "Nokia Lumia 520"
-        },
-        {
-          "title": "Nokia Lumia 520 Landscape",
-          "value": "Nokia Lumia 520 landscape"
-        },
-        {
-          "title": "Nokia N9",
-          "value": "Nokia N9"
-        },
-        {
-          "title": "Nokia N9 Landscape",
-          "value": "Nokia N9 landscape"
-        },
-        {
-          "title": "Pixel 2",
-          "value": "Pixel 2"
-        },
-        {
-          "title": "Pixel 2 Landscape",
-          "value": "Pixel 2 landscape"
-        },
-        {
-          "title": "Pixel 2 XL",
-          "value": "Pixel 2 XL"
-        },
-        {
-          "title": "Pixel 2 XL Landscape",
-          "value": "Pixel 2 XL landscape"
-        },
-        {
-          "title": "Pixel 3",
-          "value": "Pixel 3"
-        },
-        {
-          "title": "Pixel 3 Landscape",
-          "value": "Pixel 3 landscape"
-        },
-        {
-          "title": "Pixel 4",
-          "value": "Pixel 4"
-        },
-        {
-          "title": "Pixel 4 Landscape",
-          "value": "Pixel 4 landscape"
-        },
-        {
-          "title": "Pixel 4A 5G",
-          "value": "Pixel 4a (5G)"
-        },
-        {
-          "title": "Pixel 4A 5G Landscape",
-          "value": "Pixel 4a (5G) landscape"
-        },
-        {
-          "title": "Pixel 5",
-          "value": "Pixel 5"
-        },
-        {
-          "title": "Pixel 5 Landscape",
-          "value": "Pixel 5 landscape"
-        },
-        {
-          "title": "Moto G4",
-          "value": "Moto G4"
-        },
-        {
-          "title": "Moto G4 Landscape",
-          "value": "Moto G4 landscape"
-        }
-      ]
-    },
-    "select_links": {
-      "type": "string",
-      "required": false,
-      "title": "Select Links",
-      "description": "One or more selectors for extracting links, in the format [css selector]->[property to use],[css selector]->@[attribute to use]"
-    },
-    "click_selector": {
-      "type": "string",
-      "required": false,
-      "title": "Click Selector",
-      "description": "Selector for elements to click when using the autoclick behavior. Default is 'a'"
-    },
-    "block_rules": {
-      "type": "string",
-      "required": false,
-      "title": "Block Rules",
-      "description": "Additional rules for blocking certain URLs from being loaded, by URL regex and optionally via text match in an iframe"
-    },
-    "block_message": {
-      "type": "string",
-      "required": false,
-      "title": "Block Message",
-      "description": "If specified, when a URL is blocked, a record with this error message is added instead"
-    },
-    "block_ads": {
-      "type": "boolean",
-      "required": false,
-      "title": "Block Ads",
-      "description": "If set, block advertisements from being loaded (based on Stephen Black's blocklist). Note that some bad domains are also blocked by zimit configuration even if this option is not set."
-    },
-    "ad_block_message": {
-      "type": "string",
-      "required": false,
-      "title": "Ads Block Message",
-      "description": "If specified, when an ad is blocked, a record with this error message is added instead"
-    },
-    "user_agent": {
-      "type": "string",
-      "required": false,
-      "title": "User Agent",
-      "description": "Override user-agent with specified"
-    },
-    "user_agent_suffix": {
-      "type": "string",
-      "required": false,
-      "title": "User Agent Suffix",
-      "description": "Append suffix to existing browser user-agent. Defaults to +Zimit"
-    },
-    "use_sitemap": {
-      "type": "string",
-      "required": false,
-      "title": "Sitemap URL",
-      "description": "Use as sitemap to get additional URLs for the crawl (usually at /sitemap.xml)"
-    },
-    "sitemap_from_date": {
-      "type": "string",
-      "required": false,
-      "title": "Sitemap From Date",
-      "description": "If set, filter URLs from sitemaps to those greater than or equal to (>=) provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)"
-    },
-    "sitemap_to_date": {
-      "type": "string",
-      "required": false,
-      "title": "Sitemap To Date",
-      "description": "If set, filter URLs from sitemaps to those less than or equal to (<=) provided ISO Date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or partial date)"
-    },
-    "behavior_timeout": {
-      "type": "integer",
-      "required": false,
-      "title": "Behavior Timeout",
-      "description": "If >0, timeout (in seconds) for in-page behavior will run on each page. If 0, a behavior can run until finish. Default is 90.",
-      "min": 0
-    },
-    "post_load_delay": {
-      "type": "integer",
-      "required": false,
-      "title": "Post Load Delay",
-      "description": "If >0, amount of time to sleep (in seconds) after page has loaded, before taking screenshots / getting text / running behaviors. Default is 0.",
-      "min": 0
-    },
-    "page_extra_delay": {
-      "type": "integer",
-      "required": false,
-      "title": "Page Extra Delay",
-      "description": "If >0, amount of time to sleep (in seconds) after behaviors before moving on to next page. Default is 0.",
-      "min": 0
-    },
-    "dedup_policy": {
-      "type": "string-enum",
-      "required": false,
-      "title": "Dedup Policy",
-      "description": "Deduplication policy. One of skip, revisit or keep. Default is skip",
-      "choices": [
-        {
-          "title": "Skip",
-          "value": "skip"
-        },
-        {
-          "title": "Revisit",
-          "value": "revisit"
-        },
-        {
-          "title": "Keep",
-          "value": "keep"
-        }
-      ]
-    },
-    "screenshot": {
-      "type": "string",
-      "required": false,
-      "title": "Screenshot",
-      "description": "Screenshot options for crawler. One of view, thumbnail, fullPage, fullPageFinal or a comma-separated combination of those."
-    },
-    "size_soft_limit": {
-      "type": "integer",
-      "required": false,
-      "title": "Size Soft Limit",
-      "description": "If set, save crawl state and stop crawl if WARC size exceeds this value. ZIM will still be created.",
-      "min": 0
-    },
-    "size_hard_limit": {
-      "type": "integer",
-      "required": false,
-      "title": "Size Hard Limit",
-      "description": "If set, exit crawler and fail the scraper immediately if WARC size exceeds this value",
-      "min": 0
-    },
-    "disk_utilization": {
-      "type": "integer",
-      "required": false,
-      "title": "Disk Utilization",
-      "description": "Save state and exit if disk utilization exceeds this percentage value. Default (if not set) is 90%. Set to 0 to disable disk utilization check.",
-      "min": 0
-    },
-    "time_soft_limit": {
-      "type": "integer",
-      "required": false,
-      "title": "Time Soft Limit",
-      "description": "If set, save crawl state and stop crawl if WARC(s) creation takes longer than this value, in seconds. ZIM will still be created.",
-      "min": 0
-    },
-    "time_hard_limit": {
-      "type": "integer",
-      "required": false,
-      "title": "Time Hard Limit",
-      "description": "If set, exit crawler and fail the scraper immediately if WARC(s) creation takes longer than this value, in seconds",
-      "min": 0
-    },
-    "net_idle_wait": {
-      "type": "integer",
-      "required": false,
-      "title": "Net Idle Wait",
-      "description": "If set, wait for network idle after page load and after behaviors are done (in seconds). If -1 (default), determine based on scope."
-    },
-    "origin_override": {
-      "type": "string",
-      "required": false,
-      "title": "Origin Override",
-      "description": "If set, will redirect requests from each origin in key to origin in the value, eg. https://host:port=http://alt-host:alt-port."
-    },
-    "max_page_retries": {
-      "type": "integer",
-      "required": false,
-      "title": "Max Page Retries",
-      "description": "If set, number of times to retry a page that failed to load before page is considered to have failed. Default is 2.",
-      "min": 0
-    },
-    "fail_on_failed_seed": {
-      "type": "boolean",
-      "required": false,
-      "title": "Fail on failed seed",
-      "description": "Whether to display additional logs"
-    },
-    "fail_on_invalid_status": {
-      "type": "boolean",
-      "required": false,
-      "title": "Fail on invalid status",
-      "description": "If set, will treat pages with 4xx or 5xx response as failures. When combined with --failOnFailedLimit or --failOnFailedSeed may result in crawl failing due to non-200 responses"
-    },
-    "fail_on_failed_limit": {
-      "type": "integer",
-      "required": false,
-      "title": "Fail on failed - Limit",
-      "description": "If set, save state and exit if number of failed pages exceeds this value.",
-      "min": 0
-    },
-    "warcs": {
-      "type": "string",
-      "required": false,
-      "title": "WARC files",
-      "description": "Comma-separated list of WARC files to use as input."
-    },
-    "verbose": {
-      "type": "boolean",
-      "required": false,
-      "title": "Verbose mode",
-      "description": "Whether to display additional logs"
-    },
-    "keep": {
-      "type": "boolean",
-      "required": false,
-      "title": "Keep",
-      "description": "Should be True. Developer option: must be True if we want to keep the WARC files for artifacts archiving.",
-      "default": true
-    },
-    "output": {
-      "type": "string",
-      "required": false,
-      "title": "Output folder",
-      "description": "Output folder for ZIM file(s). Leave it as `/output`",
-      "pattern": "^/output$"
-    },
-    "admin_email": {
-      "type": "email",
-      "required": false,
-      "title": "Admin Email",
-      "description": "Admin Email for crawler: used in UserAgent so website admin can contact us",
-      "default": "contact+zimfarm@kiwix.org"
-    },
-    "profile": {
-      "type": "string",
-      "required": false,
-      "title": "Browser profile",
-      "description": "Path or HTTP(S) URL to tar.gz file which contains the browser profile directory for Browsertrix crawler."
-    },
-    "behaviors": {
-      "type": "string",
-      "required": false,
-      "title": "Behaviors",
-      "description": "Which background behaviors to enable on each page. Defaults to autoplay,autofetch,siteSpecific."
-    },
-    "depth": {
-      "type": "integer",
-      "required": false,
-      "title": "Depth",
-      "description": "The depth of the crawl for all seeds. Default is -1 (infinite).",
-      "min": -1
-    },
-    "zim_lang": {
-      "type": "string",
-      "required": false,
-      "title": "ZIM Language",
-      "description": "Language metadata of ZIM (warc2zim --lang param). ISO-639-3 code. Retrieved from homepage if found, fallback to `eng`",
-      "alias": "zim-lang",
-      "customValidator": "language_code"
-    },
-    "long_description": {
-      "type": "string",
-      "required": false,
-      "title": "Long description",
-      "description": "Optional long description for your ZIM",
-      "minLength": 1,
-      "maxLength": 4000,
-      "alias": "long-description"
-    },
-    "custom_css": {
-      "type": "blob",
-      "kind": "css",
-      "required": false,
-      "title": "Custom CSS",
-      "description": "URL to a CSS file to inject into pages",
-      "alias": "custom-css"
-    },
-    "charsets_to_try": {
-      "type": "string",
-      "required": false,
-      "title": "Charsets to try",
-      "description": "List of charsets to try decode content when charset is not found",
-      "alias": "charsets-to-try"
-    },
-    "ignore_content_header_charsets": {
-      "type": "boolean",
-      "required": false,
-      "title": "Ignore Content Header Charsets",
-      "description": "Ignore the charsets specified in content headers - first bytes - typically because they are wrong.",
-      "alias": "ignore-content-header-charsets"
-    },
-    "content_header_bytes_length": {
-      "type": "integer",
-      "required": false,
-      "title": "Content Header Bytes Length",
-      "description": "How many bytes to consider when searching for content charsets in header (default is 1024).",
-      "alias": "content-header-bytes-length",
-      "min": 0
-    },
-    "ignore_http_header_charsets": {
-      "type": "boolean",
-      "required": false,
-      "title": "Ignore HTTP Header Charsets",
-      "description": "Ignore the charsets specified in HTTP `Content-Type` headers, typically because they are wrong.",
-      "alias": "ignore-http-header-charsets"
-    },
-    "encoding_aliases": {
-      "type": "string",
-      "required": false,
-      "title": "Encoding Aliases",
-      "description": "List of encoding/charset aliases to decode WARC content. Aliases are used when the encoding specified in upstream server exists in Python under a different name. This parameter is single string, multiple values are separated by a comma, like in alias1=encoding1,alias2=encoding2.",
-      "alias": "encoding-aliases"
-    },
-    "custom_behaviors": {
-      "type": "string",
-      "required": false,
-      "title": "Custom Behaviors",
-      "description": "JS code for custom behaviors to customize crawler. Single string with individual JS files URL/path separated by a comma.",
-      "alias": "custom-behaviours"
-    },
-    "zimit_progress_file": {
-      "type": "string",
-      "required": false,
-      "title": "Zimit Progress File",
-      "description": "Scraping progress file. Leave it as `/output/task_progress.json`",
-      "alias": "zimit-progress-file",
-      "pattern": "^/output/task_progress\\.json$"
-    },
-    "replay_viewer_source": {
-      "type": "url",
-      "required": false,
-      "title": "Replay Viewer Source",
-      "description": "URL from which to load the ReplayWeb.page replay viewer from",
-      "alias": "replay-viewer-source"
-    },
-    "zim_file": {
-      "type": "string",
-      "required": false,
-      "title": "ZIM filename",
-      "description": "ZIM file name (based on --name if not provided). Include {period} to insert date period dynamically",
-      "alias": "zim-file",
-      "pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+_)([a-z0-9\\-\\.]+_|)([\\d]{4}-[\\d]{2}|\\{period\\}).zim$",
-      "relaxedPattern": "^[A-Za-z0-9._-]+$"
-    },
-    "name": {
-      "type": "string",
-      "required": true,
-      "title": "ZIM name",
-      "description": "Name of the ZIM.",
-      "alias": "name",
-      "pattern": "^([a-z0-9\\-\\.]+_)([a-z\\-]+_)([a-z0-9\\-\\.]+)$",
-      "relaxedPattern": "^[A-Za-z0-9._-]+$"
-    },
-    "overwrite": {
-      "type": "boolean",
-      "required": false,
-      "title": "Overwrite",
-      "description": "Whether to overwrite existing ZIM file if it exists"
-    }
-  }
-}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,17 +1,17 @@
 [build-system]
-requires = ["hatchling", "hatch-openzim"]
+requires = ["hatchling", "hatch-openzim==0.2.0"]
 build-backend = "hatchling.build"

 [project]
 name = "zimit"
-requires-python = ">=3.13,<3.14"
+requires-python = ">=3.12,<3.13"
 description = "Make ZIM file from any website through crawling"
 readme = "README.md"
 dependencies = [
  "requests==2.32.3",
  "inotify==0.2.10",
  "tld==0.13",
-  "warc2zim @ git+https://github.com/openzim/warc2zim@main",
+  "warc2zim==2.1.3",
 ]
 dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]

@ -26,20 +26,20 @@ scripts = [
  "invoke==2.2.0",
 ]
 lint = [
-  "black==25.1.0",
-  "ruff==0.9.4",
+  "black==24.10.0",
+  "ruff==0.6.9",
 ]
 check = [
-  "pyright==1.1.393",
+  "pyright==1.1.383",
 ]
 test = [
-  "pytest==8.3.4",
-  "coverage==7.6.10",
+  "pytest==8.3.3",
+  "coverage==7.6.1",
 ]
 dev = [
-  "pre-commit==4.1.0",
-  "debugpy==1.8.12",
-  "selenium==4.28.1", # used in daily tests, convenient for dev purpose (autocompletion)
+  "pre-commit==4.0.0",
+  "debugpy==1.8.6",
+  "selenium==4.25.0", # used in daily tests, convenient for dev purpose (autocompletion)
  "zimit[scripts]",
  "zimit[lint]",
  "zimit[test]",
@ -95,10 +95,10 @@ all = "inv checkall --args '{args}'"

 [tool.black]
 line-length = 88
-target-version = ['py313']
+target-version = ['py312']

 [tool.ruff]
-target-version = "py313"
+target-version = "py312"
 line-length = 88
 src = ["src"]

@ -221,5 +221,5 @@ exclude_lines = [
 include = ["src", "tests", "tasks.py"]
 exclude = [".env/**", ".venv/**"]
 extraPaths = ["src"]
-pythonVersion = "3.13"
+pythonVersion = "3.12"
 typeCheckingMode="basic"
--- a/src/zimit/about.py
+++ b/src/zimit/about.py
@ -1 +1 @@
-__version__ = "3.0.6-dev0"
+__version__ = "2.1.5"
--- a/src/zimit/constants.py
+++ b/src/zimit/constants.py
@ -3,8 +3,7 @@ import logging
 from zimscraperlib.logging import getLogger

 EXIT_CODE_WARC2ZIM_CHECK_FAILED = 2
-EXIT_CODE_CRAWLER_SIZE_LIMIT_HIT = 14
-EXIT_CODE_CRAWLER_TIME_LIMIT_HIT = 15
+EXIT_CODE_CRAWLER_LIMIT_HIT = 11
 NORMAL_WARC2ZIM_EXIT_CODE = 100
 REQUESTS_TIMEOUT = 10

--- a/src/zimit/zimit.py
+++ b/src/zimit/zimit.py
--- a/tests-daily/Dockerfile
+++ b/tests-daily/Dockerfile
@ -1,5 +1,5 @@
 # Let's extract kiwix-tools as usual on alpine temporary build container
-FROM alpine:3.21 as kiwix-serve
+FROM alpine:3.18 as kiwix-serve
 LABEL org.opencontainers.image.source https://github.com/openzim/kiwix-tools

 # TARGETPLATFORM is injected by docker build
@ -30,7 +30,7 @@ RUN set -e && \
    curl -k -L $url | tar -xz -C /kiwix-serve --strip-components 1

 # Build real "workload" container
-FROM python:3.13-slim-bookworm
+FROM python:3.12-slim-bookworm

 # Add kiwix-serve
 COPY --from=kiwix-serve /kiwix-serve /usr/local/bin
@ -70,6 +70,6 @@ RUN rm /tmp/chrome-linux64.zip /tmp/chromedriver-linux64.zip /tmp/versions.json
 RUN \
   python -m pip install --no-cache-dir -U \
     pip \
-     selenium==4.28.1 \
-     pytest==8.3.4 \
+     selenium==4.23.0 \
+     pytest==8.2.2 \
 && mkdir -p /work
--- a/tests-integration/integration.py
+++ b/tests-integration/integration.py
@ -1,55 +1,30 @@
 import glob
 import json
 import os
-from pathlib import Path

-import pytest
 from warcio import ArchiveIterator
 from zimscraperlib.zim import Archive


-@pytest.mark.parametrize(
-    "filename",
-    [
-        pytest.param("/output/tests_en_onepage.zim", id="onepage"),
-        pytest.param("/output/tests_en_sizesoftlimit.zim", id="sizesoftlimit"),
-        pytest.param("/output/tests_en_timesoftlimit.zim", id="timesoftlimit"),
-    ],
-)
-def test_zim_created(filename):
+def test_is_file():
    """Ensure ZIM file exists"""
-    assert os.path.isfile(filename)
-
-
-@pytest.mark.parametrize(
-    "filename",
-    [
-        pytest.param("/output/tests_en_sizehardlimit.zim", id="sizehardlimit"),
-        pytest.param("/output/tests_en_timehardlimit.zim", id="timehardlimit"),
-    ],
-)
-def test_zim_not_created(filename):
-    """Ensure ZIM file does not exists"""
-    assert not os.path.exists(filename)
+    assert os.path.isfile("/output/isago.zim")


 def test_zim_main_page():
-    """Main page specified, http://website.test.openzim.org/http-return-codes.html,
-    was a redirect to https
+    """Main page specified, http://isago.rskg.org/, was a redirect to https
    Ensure main page is the redirected page"""

-    main_entry = Archive(Path("/output/tests_en_onepage.zim")).main_entry
+    main_entry = Archive("/output/isago.zim").main_entry
    assert main_entry.is_redirect
-    assert (
-        main_entry.get_redirect_entry().path
-        == "website.test.openzim.org/http-return-codes.html"
-    )
+    assert main_entry.get_redirect_entry().path == "isago.rskg.org/"


 def test_zim_scraper():
-    """Check content of scraper metadata"""
+    """Main page specified, http://isago.rskg.org/, was a redirect to https
+    Ensure main page is the redirected page"""

-    zim_fh = Archive(Path("/output/tests_en_onepage.zim"))
+    zim_fh = Archive("/output/isago.zim")
    scraper = zim_fh.get_text_metadata("Scraper")
    assert "zimit " in scraper
    assert "warc2zim " in scraper
@ -58,28 +33,18 @@ def test_zim_scraper():

 def test_files_list():
    """Check that expected files are present in the ZIM at proper path"""
-    zim_fh = Archive(Path("/output/tests_en_onepage.zim"))
+    zim_fh = Archive("/output/isago.zim")
    for expected_entry in [
        "_zim_static/__wb_module_decl.js",
        "_zim_static/wombat.js",
        "_zim_static/wombatSetup.js",
-        "website.test.openzim.org/http-return-codes.html",
-        "website.test.openzim.org/200-response",
-        "website.test.openzim.org/201-response",
-        "website.test.openzim.org/202-response",
-        "website.test.openzim.org/301-external-redirect-ok",
-        "website.test.openzim.org/301-internal-redirect-ok",
-        "website.test.openzim.org/302-external-redirect-ok",
-        "website.test.openzim.org/302-internal-redirect-ok",
-        "website.test.openzim.org/307-external-redirect-ok",
-        "website.test.openzim.org/307-internal-redirect-ok",
-        "website.test.openzim.org/308-external-redirect-ok",
-        "website.test.openzim.org/308-internal-redirect-ok",
-        "website.test.openzim.org/http-return-codes.html",
-        "website.test.openzim.org/icons/favicon.ico",
-        "website.test.openzim.org/icons/site.webmanifest",
-        "website.test.openzim.org/internal_redirect_target.html",
-        "www.example.com/",
+        "isago.rskg.org/",
+        "isago.rskg.org/a-propos",
+        "isago.rskg.org/conseils",
+        "isago.rskg.org/faq",
+        "isago.rskg.org/static/favicon256.png",
+        "isago.rskg.org/static/tarifs-isago.pdf",
+        "maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css",
    ]:
        assert zim_fh.get_content(expected_entry)

@ -106,40 +71,24 @@ def test_user_agent():
    assert found


-def test_stats_output_standard():
-    assert json.loads(Path("/output/crawl.json").read_bytes()) == {
-        "crawled": 17,
-        "pending": 0,
-        "pendingPages": [],
-        "total": 35,
-        "failed": 18,
-        "limit": {"max": 0, "hit": False},
-    }
-
-    assert json.loads(Path("/output/warc2zim.json").read_bytes()) == {
-        "written": 8,
-        "total": 8,
-    }
-
-    assert json.loads(Path("/output/stats.json").read_bytes()) == {
-        "done": 8,
-        "total": 8,
-        "partialZim": False,
-    }
-
-
-@pytest.mark.parametrize(
-    "filename",
-    [
-        pytest.param("/output/stats_sizesoftlimit.json", id="sizesoftlimit"),
-        pytest.param("/output/stats_timesoftlimit.json", id="timesoftlimit"),
-    ],
-)
-def test_stats_output_softlimit(filename):
-    file = Path(filename)
-    assert file.exists
-    content = json.loads(file.read_bytes())
-    assert "done" in content
-    assert "total" in content
-    assert "partialZim" in content
-    assert content["partialZim"]
+def test_stats_output():
+    with open("/output/crawl.json") as fh:
+        assert json.loads(fh.read()) == {
+            "crawled": 5,
+            "pending": 0,
+            "pendingPages": [],
+            "total": 5,
+            "failed": 0,
+            "limit": {"max": 0, "hit": False},
+        }
+    with open("/output/warc2zim.json") as fh:
+        assert json.loads(fh.read()) == {
+            "written": 7,
+            "total": 7,
+        }
+    with open("/output/stats.json") as fh:
+        assert json.loads(fh.read()) == {
+            "done": 7,
+            "total": 7,
+            "limit": {"max": 0, "hit": False},
+        }
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,14 +0,0 @@
-import pytest
-
-from zimit import zimit as app
-
-"""
- cleanup disabled because atexit hooks run at the very end of the Python process
- shutdown. By the time cleanup() is called, the logging module has already closed its
- file streams.
-"""
-
-
-@pytest.fixture(autouse=True)
-def disable_zimit_cleanup(monkeypatch):
-    monkeypatch.setattr(app, "cleanup", lambda: None)
--- a/tests/data/example-response.warc
+++ b/tests/data/example-response.warc
--- a/tests/test_overwrite.py
+++ b/tests/test_overwrite.py
@ -1,83 +0,0 @@
-import pathlib
-
-import pytest
-
-from zimit.zimit import run
-
-TEST_DATA_DIR = pathlib.Path(__file__).parent / "data"
-
-
-def test_overwrite_flag_behaviour(tmp_path):
-    zim_output = "overwrite-test.zim"
-    output_path = tmp_path / zim_output
-
-    # 1st run → creates file
-    result = run(
-        [
-            "--seeds",
-            "https://example.com",
-            "--warcs",
-            str(TEST_DATA_DIR / "example-response.warc"),
-            "--output",
-            str(tmp_path),
-            "--zim-file",
-            zim_output,
-            "--name",
-            "overwrite-test",
-        ]
-    )
-    assert result in (None, 100)
-    assert output_path.exists()
-
-    # 2nd run, no overwrite → should fail
-    with pytest.raises(SystemExit) as exc:
-        run(
-            [
-                "--seeds",
-                "https://example.com",
-                "--warcs",
-                str(TEST_DATA_DIR / "example-response.warc"),
-                "--output",
-                str(tmp_path),
-                "--zim-file",
-                zim_output,
-                "--name",
-                "overwrite-test",
-            ]
-        )
-    assert exc.value.code == 2
-
-    # 2nd run, no overwrite → should fail
-    with pytest.raises(SystemExit) as exc:
-        run(
-            [
-                "--seeds",
-                "https://example.com",
-                "--output",
-                str(tmp_path),
-                "--zim-file",
-                zim_output,
-                "--name",
-                "overwrite-test",
-            ]
-        )
-    assert exc.value.code == 2
-
-    # 3rd run, with overwrite → should succeed
-    result = run(
-        [
-            "--seeds",
-            "https://example.com",
-            "--warcs",
-            str(TEST_DATA_DIR / "example-response.warc"),
-            "--output",
-            str(tmp_path),
-            "--zim-file",
-            zim_output,
-            "--name",
-            "overwrite-test",
-            "--overwrite",
-        ]
-    )
-    assert result in (None, 100)
-    assert output_path.exists()