Merge pull request #406 from openzim/html_as_fetch
HTML document can be retrieved as `fetch`
|
@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Enrich test website with img srcset situations (in preparation for #403)
|
||||
|
||||
### Fixed
|
||||
|
||||
- HTML document can be retrieved as `fetch` resource type (#405)
|
||||
|
||||
## [2.1.1] - 2024-09-05
|
||||
|
||||
### Changed
|
||||
|
|
|
@ -154,7 +154,7 @@ class Rewriter:
|
|||
def get_resourcetype_rewrite_mode(self, record, resourcetype, mimetype):
|
||||
"""Get current record rewrite mode based on WARC-Resource-Type and mimetype"""
|
||||
|
||||
if resourcetype in ["document", "xhr"] and mimetype == "text/html":
|
||||
if resourcetype in ["document", "xhr", "fetch"] and mimetype == "text/html":
|
||||
# TODO : Handle header "Accept" == "application/json"
|
||||
if getattr(record, "method", "GET") == "GET":
|
||||
return "html"
|
||||
|
|
|
@ -9,7 +9,7 @@ This module contains the differents Item we may want to add to a Zim archive.
|
|||
from pathlib import Path
|
||||
|
||||
from jinja2.environment import Template
|
||||
from libzim.writer import Hint # pyright: ignore[reportMissingImports]
|
||||
from libzim.writer import Hint # pyright: ignore[reportMissingModuleSource]
|
||||
from warcio.recordloader import ArcWarcRecord
|
||||
from zimscraperlib.types import get_mime_for_name
|
||||
from zimscraperlib.zim.items import StaticItem
|
||||
|
|
102
test-website/content/image-srcset.html
Normal file
|
@ -0,0 +1,102 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Test website</title>
|
||||
<link
|
||||
rel="apple-touch-icon"
|
||||
sizes="180x180"
|
||||
href="./icons/apple-touch-icon.png"
|
||||
/>
|
||||
<link
|
||||
rel="icon"
|
||||
type="image/png"
|
||||
sizes="32x32"
|
||||
href="./icons/favicon-32x32.png"
|
||||
/>
|
||||
<link
|
||||
rel="icon"
|
||||
type="image/png"
|
||||
sizes="16x16"
|
||||
href="./icons/favicon-16x16.png"
|
||||
/>
|
||||
<link rel="manifest" href="./icons/site.webmanifest" />
|
||||
<link rel="shortcut icon" href="./icons/favicon.ico" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<p>
|
||||
This page contains tests around varying images based on screen / device.
|
||||
</p>
|
||||
|
||||
<h2>Image srcset</h2>
|
||||
|
||||
<p>
|
||||
An image should be displayed below at all screen sizes and pixel ratios.
|
||||
</p>
|
||||
|
||||
<img
|
||||
srcset="
|
||||
./images/image1.png,
|
||||
./images/image1-1x.png 1x,
|
||||
./images/image1-2x.png 2x
|
||||
"
|
||||
src="./images/image1.png"
|
||||
alt="an image"
|
||||
/>
|
||||
|
||||
<p>
|
||||
Another image should be displayed below at all screen sizes and pixel
|
||||
ratios.
|
||||
</p>
|
||||
|
||||
<img
|
||||
srcset="./images/image4-1.5x.png 1.5x"
|
||||
src="./images/image4.png"
|
||||
alt="an image"
|
||||
/>
|
||||
|
||||
<h2>Picture sources - with srcset pixel ratio</h2>
|
||||
|
||||
<p>
|
||||
An image should be displayed below at all screen sizes and pixel ratios.
|
||||
</p>
|
||||
|
||||
<picture>
|
||||
<source
|
||||
srcset="
|
||||
./images/image2.png,
|
||||
./images/image2-1x.png 1x,
|
||||
./images/image2-2x.png 2x
|
||||
"
|
||||
type="image/png"
|
||||
/>
|
||||
<img src="./images/image2.png" alt="an image" />
|
||||
</picture>
|
||||
|
||||
<h2>Picture sources - with media queries</h2>
|
||||
|
||||
<p>
|
||||
An image should be displayed below at all screen sizes and pixel ratios.
|
||||
</p>
|
||||
|
||||
<picture>
|
||||
<source
|
||||
srcset="./images/image3-high.png"
|
||||
media="all and (min-width: 1280px)"
|
||||
type="image/png"
|
||||
/>
|
||||
<source
|
||||
srcset="./images/image3-medium.png"
|
||||
media="all and (min-width: 600px)"
|
||||
type="image/png"
|
||||
/>
|
||||
<source
|
||||
srcset="./images/image3-small.png"
|
||||
media="all and (min-width: 0px)"
|
||||
type="image/png"
|
||||
/>
|
||||
<img src="./images/image3.png" alt="an image" />
|
||||
</picture>
|
||||
</body>
|
||||
</html>
|
BIN
test-website/content/images/image1-1x.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
test-website/content/images/image1-2x.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
test-website/content/images/image2-1x.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
test-website/content/images/image2-2x.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
test-website/content/images/image2.png
Normal file
After Width: | Height: | Size: 28 KiB |
BIN
test-website/content/images/image3-high.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
test-website/content/images/image3-medium.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
test-website/content/images/image3-small.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
test-website/content/images/image3.png
Normal file
After Width: | Height: | Size: 28 KiB |
BIN
test-website/content/images/image4-1.5x.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
test-website/content/images/image4.png
Normal file
After Width: | Height: | Size: 28 KiB |
|
@ -50,6 +50,7 @@
|
|||
<li><a href="./bad-redirections.html">Bad redirections</a></li>
|
||||
<li><a href="./content-types/index.html">Handling of content types</a></li>
|
||||
<li><a href="./http-equiv-redirect.html">Redirect with http-equiv meta directive</a></li>
|
||||
<li><a href="./image-srcset.html">Image with srcset</a></li>
|
||||
</ul>
|
||||
</body>
|
||||
|
||||
|
|
|
@ -202,11 +202,15 @@ class TestWarc2Zim:
|
|||
elif record.rec_type == "response":
|
||||
# We must have a payload
|
||||
assert payload
|
||||
payload_content = payload.content.tobytes()
|
||||
payload_content = (
|
||||
payload.content.tobytes() # pyright:ignore[reportAttributeAccessIssue]
|
||||
)
|
||||
|
||||
# if HTML, still need to account for the head insert, otherwise should
|
||||
# have exact match
|
||||
if payload.mimetype.startswith("text/html"):
|
||||
if payload.mimetype.startswith( # pyright:ignore[reportAttributeAccessIssue]
|
||||
"text/html"
|
||||
):
|
||||
assert head_insert in payload_content
|
||||
elif record.rec_type == "resource":
|
||||
# we do not want to embed resources "as-is"
|
||||
|
|