Merge pull request #406 from openzim/html_as_fetch

HTML document can be retrieved as `fetch`
This commit is contained in:
benoit74 2024-10-08 13:16:18 +02:00 committed by GitHub
commit 38e590232d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 119 additions and 4 deletions

View file

@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Enrich test website with img srcset situations (in preparation for #403)
### Fixed
- HTML document can be retrieved as `fetch` resource type (#405)
## [2.1.1] - 2024-09-05
### Changed

View file

@ -154,7 +154,7 @@ class Rewriter:
def get_resourcetype_rewrite_mode(self, record, resourcetype, mimetype):
"""Get current record rewrite mode based on WARC-Resource-Type and mimetype"""
if resourcetype in ["document", "xhr"] and mimetype == "text/html":
if resourcetype in ["document", "xhr", "fetch"] and mimetype == "text/html":
# TODO : Handle header "Accept" == "application/json"
if getattr(record, "method", "GET") == "GET":
return "html"

View file

@ -9,7 +9,7 @@ This module contains the differents Item we may want to add to a Zim archive.
from pathlib import Path
from jinja2.environment import Template
from libzim.writer import Hint # pyright: ignore[reportMissingImports]
from libzim.writer import Hint # pyright: ignore[reportMissingModuleSource]
from warcio.recordloader import ArcWarcRecord
from zimscraperlib.types import get_mime_for_name
from zimscraperlib.zim.items import StaticItem

View file

@ -0,0 +1,102 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Test website</title>
<link
rel="apple-touch-icon"
sizes="180x180"
href="./icons/apple-touch-icon.png"
/>
<link
rel="icon"
type="image/png"
sizes="32x32"
href="./icons/favicon-32x32.png"
/>
<link
rel="icon"
type="image/png"
sizes="16x16"
href="./icons/favicon-16x16.png"
/>
<link rel="manifest" href="./icons/site.webmanifest" />
<link rel="shortcut icon" href="./icons/favicon.ico" />
</head>
<body>
<p>
This page contains tests around varying images based on screen / device.
</p>
<h2>Image srcset</h2>
<p>
An image should be displayed below at all screen sizes and pixel ratios.
</p>
<img
srcset="
./images/image1.png,
./images/image1-1x.png 1x,
./images/image1-2x.png 2x
"
src="./images/image1.png"
alt="an image"
/>
<p>
Another image should be displayed below at all screen sizes and pixel
ratios.
</p>
<img
srcset="./images/image4-1.5x.png 1.5x"
src="./images/image4.png"
alt="an image"
/>
<h2>Picture sources - with srcset pixel ratio</h2>
<p>
An image should be displayed below at all screen sizes and pixel ratios.
</p>
<picture>
<source
srcset="
./images/image2.png,
./images/image2-1x.png 1x,
./images/image2-2x.png 2x
"
type="image/png"
/>
<img src="./images/image2.png" alt="an image" />
</picture>
<h2>Picture sources - with media queries</h2>
<p>
An image should be displayed below at all screen sizes and pixel ratios.
</p>
<picture>
<source
srcset="./images/image3-high.png"
media="all and (min-width: 1280px)"
type="image/png"
/>
<source
srcset="./images/image3-medium.png"
media="all and (min-width: 600px)"
type="image/png"
/>
<source
srcset="./images/image3-small.png"
media="all and (min-width: 0px)"
type="image/png"
/>
<img src="./images/image3.png" alt="an image" />
</picture>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View file

@ -50,6 +50,7 @@
<li><a href="./bad-redirections.html">Bad redirections</a></li>
<li><a href="./content-types/index.html">Handling of content types</a></li>
<li><a href="./http-equiv-redirect.html">Redirect with http-equiv meta directive</a></li>
<li><a href="./image-srcset.html">Image with srcset</a></li>
</ul>
</body>

View file

@ -202,11 +202,15 @@ class TestWarc2Zim:
elif record.rec_type == "response":
# We must have a payload
assert payload
payload_content = payload.content.tobytes()
payload_content = (
payload.content.tobytes() # pyright:ignore[reportAttributeAccessIssue]
)
# if HTML, still need to account for the head insert, otherwise should
# have exact match
if payload.mimetype.startswith("text/html"):
if payload.mimetype.startswith( # pyright:ignore[reportAttributeAccessIssue]
"text/html"
):
assert head_insert in payload_content
elif record.rec_type == "resource":
# we do not want to embed resources "as-is"