Move integration tests to test website

This commit is contained in:
benoit74 2025-01-09 10:41:05 +00:00
parent 00d2433383
commit 8d42a8dd93
No known key found for this signature in database
GPG key ID: B89606434FC7B530
2 changed files with 49 additions and 36 deletions

View file

@ -63,7 +63,7 @@ jobs:
run: docker run -v $PWD/output:/output zimit zimit --help
- name: run crawl
run: docker run -v $PWD/output:/output zimit zimit --url http://isago.rskg.org/ --name isago --zim-file isago.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --statsFilename /output/stats.json --keep
run: docker run -v $PWD/output:/output zimit zimit --url http://website.test.openzim.org/http-return-codes.html --name tests_en_onepage --zim-file tests_en_onepage.zim --adminEmail test@example.com --mobileDevice "Pixel 5" --statsFilename /output/stats.json --keep
- name: run integration test suite
run: docker run -v $PWD/tests-integration/integration.py:/app/integration.py -v $PWD/output:/output zimit bash -c "/app/zimit/bin/pip install pytest; /app/zimit/bin/pytest -v /app/integration.py"

View file

@ -1,6 +1,7 @@
import glob
import json
import os
from pathlib import Path
from warcio import ArchiveIterator
from zimscraperlib.zim import Archive
@ -8,23 +9,26 @@ from zimscraperlib.zim import Archive
def test_is_file():
"""Ensure ZIM file exists"""
assert os.path.isfile("/output/isago.zim")
assert os.path.isfile("/output/tests_en_onepage.zim")
def test_zim_main_page():
"""Main page specified, http://isago.rskg.org/, was a redirect to https
"""Main page specified, http://website.test.openzim.org/http-return-codes.html,
was a redirect to https
Ensure main page is the redirected page"""
main_entry = Archive("/output/isago.zim").main_entry
main_entry = Archive("/output/tests_en_onepage.zim").main_entry
assert main_entry.is_redirect
assert main_entry.get_redirect_entry().path == "isago.rskg.org/"
assert (
main_entry.get_redirect_entry().path
== "website.test.openzim.org/http-return-codes.html"
)
def test_zim_scraper():
"""Main page specified, http://isago.rskg.org/, was a redirect to https
Ensure main page is the redirected page"""
"""Check content of scraper metadata"""
zim_fh = Archive("/output/isago.zim")
zim_fh = Archive("/output/tests_en_onepage.zim")
scraper = zim_fh.get_text_metadata("Scraper")
assert "zimit " in scraper
assert "warc2zim " in scraper
@ -33,18 +37,28 @@ def test_zim_scraper():
def test_files_list():
"""Check that expected files are present in the ZIM at proper path"""
zim_fh = Archive("/output/isago.zim")
zim_fh = Archive("/output/tests_en_onepage.zim")
for expected_entry in [
"_zim_static/__wb_module_decl.js",
"_zim_static/wombat.js",
"_zim_static/wombatSetup.js",
"isago.rskg.org/",
"isago.rskg.org/a-propos",
"isago.rskg.org/conseils",
"isago.rskg.org/faq",
"isago.rskg.org/static/favicon256.png",
"isago.rskg.org/static/tarifs-isago.pdf",
"maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css",
"website.test.openzim.org/http-return-codes.html",
"website.test.openzim.org/200-response",
"website.test.openzim.org/201-response",
"website.test.openzim.org/202-response",
"website.test.openzim.org/301-external-redirect-ok",
"website.test.openzim.org/301-internal-redirect-ok",
"website.test.openzim.org/302-external-redirect-ok",
"website.test.openzim.org/302-internal-redirect-ok",
"website.test.openzim.org/307-external-redirect-ok",
"website.test.openzim.org/307-internal-redirect-ok",
"website.test.openzim.org/308-external-redirect-ok",
"website.test.openzim.org/308-internal-redirect-ok",
"website.test.openzim.org/http-return-codes.html",
"website.test.openzim.org/icons/favicon.ico",
"website.test.openzim.org/icons/site.webmanifest",
"website.test.openzim.org/internal_redirect_target.html",
"www.example.com/",
]:
assert zim_fh.get_content(expected_entry)
@ -72,23 +86,22 @@ def test_user_agent():
def test_stats_output():
with open("/output/crawl.json") as fh:
assert json.loads(fh.read()) == {
"crawled": 5,
"pending": 0,
"pendingPages": [],
"total": 5,
"failed": 0,
"limit": {"max": 0, "hit": False},
}
with open("/output/warc2zim.json") as fh:
assert json.loads(fh.read()) == {
"written": 7,
"total": 7,
}
with open("/output/stats.json") as fh:
assert json.loads(fh.read()) == {
"done": 7,
"total": 7,
"limit": {"max": 0, "hit": False},
}
assert json.loads(Path("/output/crawl.json").read_bytes()) == {
"crawled": 35,
"pending": 0,
"pendingPages": [],
"total": 35,
"failed": 18,
"limit": {"max": 0, "hit": False},
}
assert json.loads(Path("/output/warc2zim.json").read_bytes()) == {
"written": 8,
"total": 8,
}
assert json.loads(Path("/output/stats.json").read_bytes()) == {
"done": 8,
"total": 8,
"limit": {"max": 0, "hit": False},
}