mirror of
https://github.com/openzim/warc2zim.git
synced 2025-10-19 14:33:17 +00:00
1503 lines
46 KiB
Python
1503 lines
46 KiB
Python
from collections.abc import Callable
|
|
from textwrap import dedent
|
|
|
|
import pytest
|
|
|
|
from warc2zim.content_rewriting.html import (
|
|
AttrNameAndValue,
|
|
AttrsList,
|
|
HtmlRewriter,
|
|
HTMLRewritingRules,
|
|
extract_base_href,
|
|
format_attr,
|
|
get_attr_value_from,
|
|
rewrite_meta_http_equiv_redirect,
|
|
)
|
|
from warc2zim.url_rewriting import ArticleUrlRewriter, HttpUrl, ZimPath
|
|
|
|
from .utils import ContentForTests
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
ContentForTests("A simple string without url"),
|
|
ContentForTests(
|
|
"<html><body><p>This is a sentence with a http://exemple.com/path link</p>"
|
|
"</body></html>"
|
|
),
|
|
ContentForTests(
|
|
'<a data-source="http://exemple.com/path">A link we should not rewrite</a>'
|
|
),
|
|
ContentForTests(
|
|
'<p style="background: url(some/image.png);">A url (relative) in a inline '
|
|
"style</p>"
|
|
),
|
|
ContentForTests("<p style></p>"),
|
|
ContentForTests(
|
|
"<style>p { /* A comment with a http://link.org/ */ "
|
|
'background: url("some/image.png") ; }</style>'
|
|
),
|
|
ContentForTests("<a href></a>"),
|
|
ContentForTests(
|
|
"""<a type="a"type">This is a sample attribute with a quote in its"""
|
|
"value and which is not a URL</a>"
|
|
),
|
|
ContentForTests("<img src />"),
|
|
ContentForTests("<code><script></code>"),
|
|
ContentForTests(
|
|
"<p> This is a smiley (🙂) and it html hex value (🙂) </p>"
|
|
),
|
|
ContentForTests(
|
|
'<script type="json">{"window": "https://kiwix.org/path"}</script>'
|
|
),
|
|
ContentForTests(
|
|
'<script type="application/json">{"window": "https://kiwix.org/path"}'
|
|
"</script>"
|
|
),
|
|
ContentForTests(
|
|
'<script type="application/i_dont_know_you">'
|
|
'{"window": "https://kiwix.org/path"}</script>'
|
|
),
|
|
]
|
|
)
|
|
def no_rewrite_content(request):
|
|
yield request.param
|
|
|
|
|
|
def test_no_rewrite(no_rewrite_content, no_js_notify):
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(
|
|
HttpUrl(f"http://{no_rewrite_content.article_url}"), set()
|
|
),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(no_rewrite_content.input_str)
|
|
.content
|
|
== no_rewrite_content.expected_str
|
|
)
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
ContentForTests(
|
|
"<p style='background: url(\"some/image.png\")'>A link in a inline style"
|
|
"</p>",
|
|
'<p style="background: url("some/image.png");">'
|
|
"A link in a inline style</p>",
|
|
),
|
|
ContentForTests(
|
|
"<p style=\"background: url('some/image.png')\">"
|
|
"A link in a inline style</p>",
|
|
'<p style="background: url("some/image.png");">'
|
|
"A link in a inline style</p>",
|
|
),
|
|
ContentForTests(
|
|
"<ul style='list-style: \">\"'>",
|
|
'<ul style="list-style: ">";">',
|
|
),
|
|
]
|
|
)
|
|
def escaped_content(request):
|
|
yield request.param
|
|
|
|
|
|
def test_escaped_content(escaped_content, no_js_notify):
|
|
transformed = (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(HttpUrl(f"http://{escaped_content.article_url}"), set()),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(escaped_content.input_str)
|
|
.content
|
|
)
|
|
assert transformed == escaped_content.expected_str
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
ContentForTests(
|
|
'<script>document.title="HELLO";</script>',
|
|
(
|
|
"<script>var _____WB$wombat$assign$function_____ = function(name) "
|
|
"{return (self._wb_wombat && self._wb_wombat.local_init && "
|
|
"self._wb_wombat.local_init(name)) || self[name]; };\n"
|
|
"if (!self.__WB_pmw) { self.__WB_pmw = function(obj) "
|
|
"{ this.__WB_source = obj; return this; } }\n"
|
|
"{\n"
|
|
"""let window = _____WB$wombat$assign$function_____("window");\n"""
|
|
"let globalThis = _____WB$wombat$assign$function_____"
|
|
"""("globalThis");\n"""
|
|
"""let self = _____WB$wombat$assign$function_____("self");\n"""
|
|
"""let document = _____WB$wombat$assign$function_____("document");\n"""
|
|
"""let location = _____WB$wombat$assign$function_____("location");\n"""
|
|
"""let top = _____WB$wombat$assign$function_____("top");\n"""
|
|
"""let parent = _____WB$wombat$assign$function_____("parent");\n"""
|
|
"""let frames = _____WB$wombat$assign$function_____("frames");\n"""
|
|
"""let opener = _____WB$wombat$assign$function_____("opener");\n"""
|
|
"let arguments;\n\n"
|
|
"""document.title="HELLO";\n"""
|
|
"}</script>"
|
|
),
|
|
),
|
|
ContentForTests(
|
|
'<script type="application/javascript">document.title="HELLO";</script>',
|
|
(
|
|
"""<script type="application/javascript">"""
|
|
"var _____WB$wombat$assign$function_____ = function(name) "
|
|
""
|
|
"{return (self._wb_wombat && self._wb_wombat.local_init && "
|
|
"self._wb_wombat.local_init(name)) || self[name]; };\n"
|
|
"if (!self.__WB_pmw) { self.__WB_pmw = function(obj) "
|
|
"{ this.__WB_source = obj; return this; } }\n"
|
|
"{\n"
|
|
"""let window = _____WB$wombat$assign$function_____("window");\n"""
|
|
"let globalThis = _____WB$wombat$assign$function_____"
|
|
"""("globalThis");\n"""
|
|
"""let self = _____WB$wombat$assign$function_____("self");\n"""
|
|
"""let document = _____WB$wombat$assign$function_____("document");\n"""
|
|
"""let location = _____WB$wombat$assign$function_____("location");\n"""
|
|
"""let top = _____WB$wombat$assign$function_____("top");\n"""
|
|
"""let parent = _____WB$wombat$assign$function_____("parent");\n"""
|
|
"""let frames = _____WB$wombat$assign$function_____("frames");\n"""
|
|
"""let opener = _____WB$wombat$assign$function_____("opener");\n"""
|
|
"let arguments;\n\n"
|
|
"""document.title="HELLO";\n"""
|
|
"}</script>"
|
|
),
|
|
),
|
|
ContentForTests(
|
|
'<script type="text/javascript">document.title="HELLO";</script>',
|
|
(
|
|
"""<script type="text/javascript">"""
|
|
"var _____WB$wombat$assign$function_____ = function(name) "
|
|
"{return (self._wb_wombat && self._wb_wombat.local_init && "
|
|
"self._wb_wombat.local_init(name)) || self[name]; };\n"
|
|
"if (!self.__WB_pmw) { self.__WB_pmw = function(obj) "
|
|
"{ this.__WB_source = obj; return this; } }\n"
|
|
"{\n"
|
|
"""let window = _____WB$wombat$assign$function_____("window");\n"""
|
|
"let globalThis = _____WB$wombat$assign$function_____"
|
|
"""("globalThis");\n"""
|
|
"""let self = _____WB$wombat$assign$function_____("self");\n"""
|
|
"""let document = _____WB$wombat$assign$function_____("document");\n"""
|
|
"""let location = _____WB$wombat$assign$function_____("location");\n"""
|
|
"""let top = _____WB$wombat$assign$function_____("top");\n"""
|
|
"""let parent = _____WB$wombat$assign$function_____("parent");\n"""
|
|
"""let frames = _____WB$wombat$assign$function_____("frames");\n"""
|
|
"""let opener = _____WB$wombat$assign$function_____("opener");\n"""
|
|
"let arguments;\n\n"
|
|
"""document.title="HELLO";\n"""
|
|
"}</script>"
|
|
),
|
|
),
|
|
ContentForTests(
|
|
'<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.0/jquery.min.js"'
|
|
' integrity="sha512-3gJwYpMe3QewGELv8k/BX9vcqhryRdzRMxVfq6ngyWXwo03GFEzjsUm'
|
|
'8Q7RZcHPHksttq7/GFoxjCVUjkjvPdw=="'
|
|
' crossorigin="anonymous" referrerpolicy="no-referrer"></script>',
|
|
'<script src="../cdnjs.cloudflare.com/ajax/libs/jquery/3.7.0/jquery.min.js"'
|
|
' crossorigin="anonymous" referrerpolicy="no-referrer"></script>',
|
|
),
|
|
ContentForTests(
|
|
'<link rel="preload" src="https://cdnjs.cloudflare.com/jquery.min.js"'
|
|
' integrity="sha512-3gJwYpMe3QewGELv8k/BX9vcqhryRdzRMxVfq6ngyWXwo03GFEzjsUm'
|
|
'8Q7RZcHPHksttq7/GFoxjCVUjkjvPdw=="></link>',
|
|
'<link rel="preload" src="../cdnjs.cloudflare.com/jquery.min.js"></link>',
|
|
),
|
|
]
|
|
)
|
|
def js_rewrites(request):
|
|
yield request.param
|
|
|
|
|
|
def test_js_rewrites(js_rewrites, no_js_notify):
|
|
transformed = (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(HttpUrl(f"http://{js_rewrites.article_url}"), set()),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(js_rewrites.input_str)
|
|
.content
|
|
)
|
|
assert transformed == js_rewrites.expected_str
|
|
|
|
|
|
def long_path_replace_test_content(input_: str, rewriten_url: str, article_url: str):
|
|
expected = input_.replace("http://exemple.com/a/long/path", rewriten_url)
|
|
return ContentForTests(input_, expected, article_url)
|
|
|
|
|
|
lprtc = long_path_replace_test_content
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# Normalized path is "exemple.com/a/long/path"
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"a/long/path",
|
|
"exemple.com",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"../exemple.com/a/long/path",
|
|
"kiwix.org",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"../exemple.com/a/long/path",
|
|
"kiwix.org/",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"a/long/path",
|
|
"exemple.com/",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"a/long/path",
|
|
"exemple.com/a",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"long/path",
|
|
"exemple.com/a/",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"long/path",
|
|
"exemple.com/a/long",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"path",
|
|
"exemple.com/a/long/",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"path",
|
|
"exemple.com/a/long/path",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
".",
|
|
"exemple.com/a/long/path/yes",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"../../long/path",
|
|
"exemple.com/a/very/long/path",
|
|
),
|
|
lprtc(
|
|
'<a href="http://exemple.com/a/long/path">A link to rewrite</a>',
|
|
"../../exemple.com/a/long/path",
|
|
"kiwix.org/another/path",
|
|
),
|
|
]
|
|
)
|
|
def rewrite_url(request):
|
|
yield request.param
|
|
|
|
|
|
def test_rewrite(rewrite_url, no_js_notify):
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(
|
|
HttpUrl(f"http://{rewrite_url.article_url}"),
|
|
{ZimPath("exemple.com/a/long/path")},
|
|
),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(rewrite_url.input_str)
|
|
.content
|
|
== rewrite_url.expected_str
|
|
)
|
|
|
|
|
|
def test_extract_title(no_js_notify):
|
|
content = """<html>
|
|
<head>
|
|
<title>Page title</title>
|
|
</head>
|
|
<body>
|
|
<title>Wrong page title</title>
|
|
</body>
|
|
</html>"""
|
|
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(
|
|
HttpUrl("http://example.com"),
|
|
{ZimPath("exemple.com/a/long/path")},
|
|
),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(content)
|
|
.title
|
|
== "Page title"
|
|
)
|
|
|
|
|
|
def test_rewrite_attributes(no_js_notify):
|
|
rewriter = HtmlRewriter(
|
|
ArticleUrlRewriter(HttpUrl("http://kiwix.org/"), {ZimPath("kiwix.org/foo")}),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
|
|
assert (
|
|
rewriter.rewrite("<a href='https://kiwix.org/foo'>A link</a>").content
|
|
== '<a href="foo">A link</a>'
|
|
)
|
|
|
|
assert (
|
|
rewriter.rewrite("<img src='https://kiwix.org/foo'></img>").content
|
|
== '<img src="foo"></img>'
|
|
)
|
|
|
|
assert (
|
|
rewriter.rewrite(
|
|
"<img srcset='https://kiwix.org/img-480w.jpg 480w, "
|
|
"https://kiwix.org/img-800w.jpg 800w'></img>"
|
|
).content
|
|
== '<img srcset="img-480w.jpg 480w, img-800w.jpg 800w"></img>'
|
|
)
|
|
|
|
|
|
def test_rewrite_css(no_js_notify):
|
|
output = (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(HttpUrl("http://kiwix.org/"), set()),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(
|
|
"<style>p { /* A comment with a http://link.org/ */ "
|
|
"background: url('some/image.png') ; }</style>",
|
|
)
|
|
.content
|
|
)
|
|
assert (
|
|
output == "<style>p { /* A comment with a http://link.org/ */ "
|
|
'background: url("some/image.png") ; }</style>'
|
|
)
|
|
|
|
|
|
def test_head_insert(no_js_notify):
|
|
content = """<html>
|
|
<head>
|
|
<title>A test content</title>
|
|
</head>
|
|
<body></body>
|
|
</html>"""
|
|
|
|
content = dedent(content)
|
|
|
|
url_rewriter = ArticleUrlRewriter(HttpUrl("http://kiwix.org/"), set())
|
|
assert (
|
|
HtmlRewriter(url_rewriter, "", "", no_js_notify).rewrite(content).content
|
|
== content
|
|
)
|
|
|
|
assert HtmlRewriter(url_rewriter, "PRE_HEAD_INSERT", "", no_js_notify).rewrite(
|
|
content
|
|
).content == content.replace("<head>", "<head>PRE_HEAD_INSERT")
|
|
assert HtmlRewriter(url_rewriter, "", "POST_HEAD_INSERT", no_js_notify).rewrite(
|
|
content
|
|
).content == content.replace("</head>", "POST_HEAD_INSERT</head>")
|
|
assert HtmlRewriter(
|
|
url_rewriter, "PRE_HEAD_INSERT", "POST_HEAD_INSERT", no_js_notify
|
|
).rewrite(content).content == content.replace(
|
|
"<head>", "<head>PRE_HEAD_INSERT"
|
|
).replace(
|
|
"</head>", "POST_HEAD_INSERT</head>"
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"js_src,expected_js_module_path",
|
|
[
|
|
("my-module-script.js", "kiwix.org/my_folder/my-module-script.js"),
|
|
("./my-module-script.js", "kiwix.org/my_folder/my-module-script.js"),
|
|
("../my-module-script.js", "kiwix.org/my-module-script.js"),
|
|
("../../../my-module-script.js", "kiwix.org/my-module-script.js"),
|
|
("/my-module-script.js", "kiwix.org/my-module-script.js"),
|
|
("//myserver.com/my-module-script.js", "myserver.com/my-module-script.js"),
|
|
(
|
|
"https://myserver.com/my-module-script.js",
|
|
"myserver.com/my-module-script.js",
|
|
),
|
|
],
|
|
)
|
|
def test_js_module_detected_script(js_src, expected_js_module_path):
|
|
|
|
js_modules = []
|
|
|
|
def custom_notify(zim_path: ZimPath):
|
|
js_modules.append(zim_path)
|
|
|
|
HtmlRewriter(
|
|
url_rewriter=ArticleUrlRewriter(
|
|
HttpUrl("http://kiwix.org/my_folder/my_article.html"), set()
|
|
),
|
|
pre_head_insert="",
|
|
post_head_insert="",
|
|
notify_js_module=custom_notify,
|
|
).rewrite(f'<script type="module" src="{js_src}"></script>')
|
|
|
|
assert len(js_modules) == 1
|
|
assert js_modules[0].value == expected_js_module_path
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"js_src,expected_js_module_path",
|
|
[
|
|
("my-module-script.js", "kiwix.org/my_folder/my-module-script.js"),
|
|
("./my-module-script.js", "kiwix.org/my_folder/my-module-script.js"),
|
|
("../my-module-script.js", "kiwix.org/my-module-script.js"),
|
|
("../../../my-module-script.js", "kiwix.org/my-module-script.js"),
|
|
("/my-module-script.js", "kiwix.org/my-module-script.js"),
|
|
("//myserver.com/my-module-script.js", "myserver.com/my-module-script.js"),
|
|
(
|
|
"https://myserver.com/my-module-script.js",
|
|
"myserver.com/my-module-script.js",
|
|
),
|
|
],
|
|
)
|
|
def test_js_module_detected_module_preload(js_src, expected_js_module_path):
|
|
|
|
js_modules = []
|
|
|
|
def custom_notify(zim_path: ZimPath):
|
|
js_modules.append(zim_path)
|
|
|
|
HtmlRewriter(
|
|
url_rewriter=ArticleUrlRewriter(
|
|
HttpUrl("http://kiwix.org/my_folder/my_article.html"), set()
|
|
),
|
|
pre_head_insert="",
|
|
post_head_insert="",
|
|
notify_js_module=custom_notify,
|
|
).rewrite(f'<link rel="modulepreload" src="{js_src}"></script>')
|
|
|
|
assert len(js_modules) == 1
|
|
assert js_modules[0].value == expected_js_module_path
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"script_src",
|
|
[
|
|
('<script src="whatever.js"></script>'),
|
|
('<script>console.log("HELLO")</script>'),
|
|
('<script type="json" src="whatever.json"></script>'),
|
|
],
|
|
)
|
|
def test_no_js_module_detected(script_src):
|
|
|
|
js_modules = []
|
|
|
|
def custom_notify(zim_path: ZimPath):
|
|
js_modules.append(zim_path)
|
|
|
|
HtmlRewriter(
|
|
url_rewriter=ArticleUrlRewriter(
|
|
HttpUrl("http://kiwix.org/my_folder/my_article.html"), set()
|
|
),
|
|
pre_head_insert="",
|
|
post_head_insert="",
|
|
notify_js_module=custom_notify,
|
|
).rewrite(script_src)
|
|
|
|
assert len(js_modules) == 0
|
|
|
|
|
|
def test_js_module_base_href_src():
|
|
|
|
js_modules = []
|
|
|
|
def custom_notify(zim_path: ZimPath):
|
|
js_modules.append(zim_path)
|
|
|
|
HtmlRewriter(
|
|
url_rewriter=ArticleUrlRewriter(
|
|
HttpUrl("http://kiwix.org/my_folder/my_article.html"), set()
|
|
),
|
|
pre_head_insert="",
|
|
post_head_insert="",
|
|
notify_js_module=custom_notify,
|
|
).rewrite(
|
|
"""<head>
|
|
<base href="../my_other_folder/">
|
|
<script type="module" src="my-module-script.js"></script>"""
|
|
)
|
|
|
|
assert len(js_modules) == 1
|
|
assert js_modules[0].value == "kiwix.org/my_other_folder/my-module-script.js"
|
|
|
|
|
|
def test_js_module_base_href_inline():
|
|
|
|
js_modules = []
|
|
|
|
def custom_notify(zim_path: ZimPath):
|
|
js_modules.append(zim_path)
|
|
|
|
HtmlRewriter(
|
|
url_rewriter=ArticleUrlRewriter(
|
|
HttpUrl("http://kiwix.org/my_folder/my_article.html"), set()
|
|
),
|
|
pre_head_insert="",
|
|
post_head_insert="",
|
|
notify_js_module=custom_notify,
|
|
).rewrite(
|
|
"""<head><base href="../my_other_folder/">
|
|
<script type="module">
|
|
import * from "./my-module-script.js";
|
|
</script>
|
|
"""
|
|
)
|
|
|
|
assert len(js_modules) == 1
|
|
assert js_modules[0].value == "kiwix.org/my_other_folder/my-module-script.js"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"html_content, expected_base_href",
|
|
[
|
|
pytest.param("", None, id="empty_content"),
|
|
pytest.param("<html></html>", None, id="empty_html"),
|
|
pytest.param(
|
|
"<html><head><title>Foo</title></head></html>", None, id="no_base"
|
|
),
|
|
pytest.param(
|
|
'<html><head><base href="../.."></head></html>', "../..", id="standard_case"
|
|
),
|
|
pytest.param(
|
|
'<html><head><base href="../..">', "../..", id="malformed_head"
|
|
), # malformed HTML is OK
|
|
pytest.param(
|
|
'<html><base href="../..">', "../..", id="very_malformed_head"
|
|
), # even very malformed HTML is OK
|
|
pytest.param(
|
|
'<base href="../..">', "../..", id="base_at_root"
|
|
), # even very malformed HTML is OK
|
|
pytest.param(
|
|
'<html><body><base href="../.."></body></html>', None, id="base_in_body"
|
|
), # but base in body is ignored
|
|
pytest.param(
|
|
'<html><head><base target="_blank" href="../.."></head></html>',
|
|
"../..",
|
|
id="base_with_target_before",
|
|
),
|
|
pytest.param(
|
|
'<html><head><base href="../.." target="_blank" ></head></html>',
|
|
"../..",
|
|
id="base_with_target_after",
|
|
),
|
|
pytest.param(
|
|
'<html><head><base href="../.." href=".."></head></html>',
|
|
"../..",
|
|
id="base_with_two_href",
|
|
),
|
|
pytest.param(
|
|
'<html><head><base href="../.."><base href=".."></head></html>',
|
|
"../..",
|
|
id="two_bases_with_href",
|
|
),
|
|
pytest.param(
|
|
'<html><head><base target="_blank"><base href="../.."></head></html>',
|
|
"../..",
|
|
id="href_in_second_base",
|
|
),
|
|
pytest.param(
|
|
'<html><head><base target="_blank"><base href="../.."><base href="..">'
|
|
"</head></html>",
|
|
"../..",
|
|
id="href_in_second_base_second_href_ignored",
|
|
),
|
|
],
|
|
)
|
|
def test_extract_base_href(html_content, expected_base_href):
|
|
assert extract_base_href(html_content) == expected_base_href
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
ContentForTests(
|
|
'<html><head><base href="./"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
'<html><head></head><body><a href="foo.html"></a></body></html>',
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base href="../"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
'<html><head></head><body><a href="../foo.html"></a></body></html>',
|
|
"kiwix.org/a/index.html",
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base href="./" target="_blank"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
'<html><head><base target="_blank"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base href="./"><base target="_blank"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
'<html><head><base target="_blank"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base href="./"><base href="../" target="_blank"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
'<html><head><base target="_blank"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base target="_blank"><base href="./"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
'<html><head><base target="_blank"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base href="./"><base target="_blank"><base target="_foo">'
|
|
"</head>"
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
'<html><head><base target="_blank"><base target="_foo"></head>'
|
|
'<body><a href="foo.html"></a></body></html>',
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base href="../"></head>'
|
|
'<body><script src="foo.js"></script></body></html>',
|
|
'<html><head></head><body><script src="../foo.js"></script></body></html>',
|
|
"kiwix.org/a/index.html",
|
|
),
|
|
ContentForTests(
|
|
'<html><head><base href="../"></head>'
|
|
'<body><style>background: url("foo.css");}</style></body></html>',
|
|
'<html><head></head><body><style>background: url("../foo.css");}</style>'
|
|
"</body></html>",
|
|
"kiwix.org/a/index.html",
|
|
),
|
|
ContentForTests(
|
|
'<html><head> <link rel="shortcut icon" href="favicon.ico">'
|
|
'<base href="../"></head><body></body></html>',
|
|
'<html><head> <link rel="shortcut icon" href="../favicon.ico">'
|
|
"</head><body></body></html>",
|
|
"kiwix.org/a/index.html",
|
|
),
|
|
]
|
|
)
|
|
def rewrite_base_href_content(request):
|
|
yield request.param
|
|
|
|
|
|
def test_rewrite_base_href(rewrite_base_href_content, no_js_notify):
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(
|
|
HttpUrl(f"http://{rewrite_base_href_content.article_url}"),
|
|
{
|
|
ZimPath("kiwix.org/foo.html"),
|
|
ZimPath("kiwix.org/foo.js"),
|
|
ZimPath("kiwix.org/foo.css"),
|
|
ZimPath("kiwix.org/foo.png"),
|
|
ZimPath("kiwix.org/favicon.png"),
|
|
},
|
|
),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(rewrite_base_href_content.input_str)
|
|
.content
|
|
== rewrite_base_href_content.expected_str
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"input_content,expected_output",
|
|
[
|
|
pytest.param(
|
|
"""<a type="whatever"></a>""",
|
|
"""<a type="whatever"></a>""",
|
|
id="double_quoted_attr",
|
|
),
|
|
pytest.param(
|
|
"<a type='whatever'></a>",
|
|
"""<a type="whatever"></a>""",
|
|
id="single_quoted_attr",
|
|
),
|
|
pytest.param(
|
|
"""<a type="wha"tever"></a>""",
|
|
"""<a type="wha"tever"></a>""",
|
|
id="uppercase_named_reference_in_attr",
|
|
),
|
|
pytest.param(
|
|
"""<a type="wha"tever"></a>""",
|
|
"""<a type="wha"tever"></a>""",
|
|
id="numeric_reference_in_attr",
|
|
),
|
|
pytest.param(
|
|
"""<a type="whaÆtever"></a>""",
|
|
"""<a type="whaÆtever"></a>""",
|
|
id="numeric_reference_in_attr",
|
|
),
|
|
pytest.param(
|
|
"""<img src="image.png?param1=value1¶m2=value2">""",
|
|
"""<img src="image.png%3Fparam1%3Dvalue1%C2%B6m2%3Dvalue2">""",
|
|
id="badly_escaped_src",
|
|
),
|
|
],
|
|
)
|
|
def test_simple_rewrite(input_content, expected_output, no_js_notify):
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(HttpUrl("http://example.com"), set()),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(input_content)
|
|
.content
|
|
== expected_output
|
|
)
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
ContentForTests(
|
|
"""<img onclick="">""",
|
|
),
|
|
ContentForTests(
|
|
"""<img on-whatever="foo">""",
|
|
),
|
|
ContentForTests(
|
|
"""<img on="foo">""",
|
|
),
|
|
ContentForTests(
|
|
"""<img to="foo">""",
|
|
),
|
|
ContentForTests(
|
|
"""<img onclick="document.location.href='./index.html';">""",
|
|
(
|
|
"""<img onclick="var _____WB$wombat$assign$function_____ = """
|
|
"function(name) {return (self._wb_wombat && "
|
|
"self._wb_wombat.local_init && "
|
|
"self._wb_wombat.local_init(name)) || self[name]; };\n"
|
|
"if (!self.__WB_pmw) { self.__WB_pmw = function(obj) "
|
|
"{ this.__WB_source = obj; return this; } }\n"
|
|
"{\n"
|
|
"let window = _____WB$wombat$assign$function_____("window");"
|
|
"\n"
|
|
"let globalThis = _____WB$wombat$assign$function_____"
|
|
"("globalThis");\n"
|
|
"let self = _____WB$wombat$assign$function_____("self");\n"
|
|
"let document = "
|
|
"_____WB$wombat$assign$function_____("document");\n"
|
|
"let location = "
|
|
"_____WB$wombat$assign$function_____("location");\n"
|
|
"let top = _____WB$wombat$assign$function_____("top");\n"
|
|
"let parent = "
|
|
"_____WB$wombat$assign$function_____("parent");\n"
|
|
"let frames = "
|
|
"_____WB$wombat$assign$function_____("frames");\n"
|
|
"let opener = "
|
|
"_____WB$wombat$assign$function_____("opener");\n"
|
|
"let arguments;\n\n"
|
|
"document.location.href='./index.html';\n"
|
|
"""}">"""
|
|
), # NOTA: quotes and ampersand are escaped since we are inside HTML attr
|
|
),
|
|
]
|
|
)
|
|
def rewrite_onxxx_content(request):
|
|
yield request.param
|
|
|
|
|
|
def test_rewrite_onxxx_event(rewrite_onxxx_content, no_js_notify):
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(
|
|
HttpUrl(f"http://{rewrite_onxxx_content.article_url}"),
|
|
{
|
|
ZimPath("kiwix.org/foo.html"),
|
|
ZimPath("kiwix.org/foo.js"),
|
|
ZimPath("kiwix.org/foo.css"),
|
|
ZimPath("kiwix.org/foo.png"),
|
|
ZimPath("kiwix.org/favicon.png"),
|
|
},
|
|
),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(rewrite_onxxx_content.input_str)
|
|
.content
|
|
== rewrite_onxxx_content.expected_str
|
|
)
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
ContentForTests(
|
|
'<html><head><meta charset="UTF-8"></head><body>whatever</body></html>',
|
|
),
|
|
ContentForTests(
|
|
'<html><head><meta charset="ISO-8859-1"></head>'
|
|
"<body>whatever</body></html>",
|
|
'<html><head><meta charset="UTF-8"></head><body>whatever</body></html>',
|
|
),
|
|
ContentForTests(
|
|
"<html><head>"
|
|
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'
|
|
"</head><body>whatever</body></html>",
|
|
),
|
|
ContentForTests(
|
|
"<html><head>"
|
|
'<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">'
|
|
"</head><body>whatever</body></html>",
|
|
"<html><head>"
|
|
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'
|
|
"</head><body>whatever</body></html>",
|
|
),
|
|
ContentForTests(
|
|
'<html><head><bla charset="ISO-8859-1"></head><body>whatever</body></html>',
|
|
), # do not rewrite other tags mentionning a charset
|
|
ContentForTests(
|
|
"<html><head>"
|
|
'<meta http-equiv="Foo" content="text/html; charset=ISO-8859-1">'
|
|
"</head><body>whatever</body></html>",
|
|
), # do not rewrite other http-equiv mentionning a charset
|
|
]
|
|
)
|
|
def rewrite_meta_charset_content(request):
|
|
yield request.param
|
|
|
|
|
|
def test_rewrite_meta_charset(rewrite_meta_charset_content, no_js_notify):
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(
|
|
HttpUrl(f"http://{rewrite_meta_charset_content.article_url}"),
|
|
set(),
|
|
),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(rewrite_meta_charset_content.input_str)
|
|
.content
|
|
== rewrite_meta_charset_content.expected_str
|
|
)
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
ContentForTests(
|
|
'<html><head><meta http-equiv="refresh" '
|
|
'content="3;url=https://kiwix.org/somepage" />'
|
|
"</head><body>whatever</body></html>",
|
|
'<html><head><meta http-equiv="refresh" '
|
|
'content="3;url=somepage" />'
|
|
"</head><body>whatever</body></html>",
|
|
),
|
|
]
|
|
)
|
|
def rewrite_meta_http_equiv_redirect_full_content(request):
|
|
yield request.param
|
|
|
|
|
|
def test_rewrite_meta_http_equiv_redirect_full(
|
|
rewrite_meta_http_equiv_redirect_full_content, no_js_notify
|
|
):
|
|
assert (
|
|
HtmlRewriter(
|
|
ArticleUrlRewriter(
|
|
HttpUrl(
|
|
f"http://{rewrite_meta_http_equiv_redirect_full_content.article_url}"
|
|
),
|
|
{ZimPath("kiwix.org/somepage")},
|
|
),
|
|
"",
|
|
"",
|
|
no_js_notify,
|
|
)
|
|
.rewrite(rewrite_meta_http_equiv_redirect_full_content.input_str)
|
|
.content
|
|
== rewrite_meta_http_equiv_redirect_full_content.expected_str
|
|
)
|
|
|
|
|
|
rules = HTMLRewritingRules()
|
|
|
|
|
|
@rules.drop_attribute()
|
|
def drop_all_named_attribute(attr_name: str):
|
|
return attr_name == "all_named"
|
|
|
|
|
|
@rules.drop_attribute()
|
|
def drop_all_tag_name_attribute(tag: str):
|
|
return tag == "all_tag"
|
|
|
|
|
|
@rules.drop_attribute()
|
|
def drop_tag_name_attribute(tag: str, attr_name: str):
|
|
return tag == "drop_tag" and attr_name == "drop_name"
|
|
|
|
|
|
@rules.drop_attribute()
|
|
def drop_attr_name_and_value_attribute(attr_name: str, attr_value: str | None):
|
|
return (
|
|
attr_name == "drop_value"
|
|
and attr_value is not None
|
|
and attr_value.startswith("drop")
|
|
)
|
|
|
|
|
|
@rules.drop_attribute()
|
|
def drop_if_other_attribute(attr_name: str, attrs: AttrsList):
|
|
return attr_name == "drop_if_other" and any(
|
|
other_name == "other" for other_name, _ in attrs
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"tag, attr_name, attr_value, attrs, should_drop",
|
|
[
|
|
pytest.param("all_tag", "foo", "bar", [], True, id="drop_by_tag_name"),
|
|
pytest.param("other_tag", "foo", "bar", [], False, id="dont_drop_by_tag_name"),
|
|
pytest.param("foo", "all_named", "bar", [], True, id="drop_by_attr_name"),
|
|
pytest.param(
|
|
"foo", "other_name", "bar", [], False, id="dont_drop_by_attr_name"
|
|
),
|
|
pytest.param(
|
|
"drop_tag", "drop_name", "bar", [], True, id="drop_by_tag_and_attr_name"
|
|
),
|
|
pytest.param(
|
|
"drop_tag", "foo", "bar", [], False, id="dont_drop_by_tag_and_attr_name"
|
|
),
|
|
pytest.param("foo", "drop_value", "drop_me", [], True, id="drop_by_attr_value"),
|
|
pytest.param(
|
|
"foo", "drop_value", "dont_drop", [], False, id="dont_drop_by_attr_value"
|
|
),
|
|
pytest.param(
|
|
"foo", "drop_value", "dont_drop", [], False, id="dont_drop_by_attr_value"
|
|
),
|
|
pytest.param(
|
|
"foo",
|
|
"drop_if_other",
|
|
"bar",
|
|
[("foo", None), ("other", "foo"), ("bar", "foo")],
|
|
True,
|
|
id="drop_if_other",
|
|
),
|
|
pytest.param(
|
|
"foo",
|
|
"drop_if_other",
|
|
"bar",
|
|
[("foo", None), ("bar", "foo")],
|
|
False,
|
|
id="dont_drop_if_not_other",
|
|
),
|
|
],
|
|
)
|
|
def test_html_drop_rules(
|
|
tag: str,
|
|
attr_name: str,
|
|
attr_value: str | None,
|
|
attrs: AttrsList,
|
|
*,
|
|
should_drop: bool,
|
|
):
|
|
assert (
|
|
rules._do_drop_attribute(
|
|
tag=tag, attr_name=attr_name, attr_value=attr_value, attrs=attrs
|
|
)
|
|
is should_drop
|
|
)
|
|
|
|
|
|
def test_bad_html_drop_rules_argument_name():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* is unsupported in function"):
|
|
|
|
@bad_rules.drop_attribute()
|
|
def bad_signature(foo: str) -> bool:
|
|
return foo == "bar"
|
|
|
|
|
|
def test_bad_html_drop_rules_argument_type():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* in function .* must be of type"):
|
|
|
|
@bad_rules.drop_attribute()
|
|
def bad_signature(attr_name: int) -> bool:
|
|
return attr_name == "bar"
|
|
|
|
|
|
@rules.rewrite_attribute()
|
|
def rewrite_tag_value(attr_name: str) -> AttrNameAndValue | None:
|
|
if attr_name != "aname":
|
|
return
|
|
return (attr_name, "foo")
|
|
|
|
|
|
@rules.rewrite_attribute()
|
|
def rewrite_tag_name(attr_name: str, attr_value: str | None) -> AttrNameAndValue | None:
|
|
if attr_name != "bad_name":
|
|
return
|
|
return ("good_name", attr_value)
|
|
|
|
|
|
@rules.rewrite_attribute()
|
|
def rewrite_call_notify(
|
|
attr_name: str,
|
|
notify_js_module: Callable[[ZimPath], None],
|
|
) -> AttrNameAndValue | None:
|
|
if attr_name != "call_notify":
|
|
return
|
|
notify_js_module(ZimPath("foo"))
|
|
return
|
|
|
|
|
|
@rules.rewrite_attribute()
|
|
def rewrite_value_with_base_href(
|
|
attr_name: str,
|
|
base_href: str | None,
|
|
) -> AttrNameAndValue | None:
|
|
if attr_name != "get_base_href":
|
|
return
|
|
return (attr_name, base_href)
|
|
|
|
|
|
@rules.rewrite_attribute()
|
|
def rewrite_attr2_value_with_attr1_value(
|
|
attr_name: str,
|
|
attrs: AttrsList,
|
|
) -> AttrNameAndValue | None:
|
|
if attr_name != "attr2":
|
|
return
|
|
return (attr_name, get_attr_value_from(attrs, "attr1"))
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"tag, attr_name, attr_value, attrs, base_href, expected_result, should_notify",
|
|
[
|
|
pytest.param(
|
|
"foo",
|
|
"aname",
|
|
"bar",
|
|
[],
|
|
"",
|
|
("aname", "foo"),
|
|
False,
|
|
id="rewrite_tag_value",
|
|
),
|
|
pytest.param(
|
|
"foo",
|
|
"bad_name",
|
|
"bar",
|
|
[],
|
|
"",
|
|
("good_name", "bar"),
|
|
False,
|
|
id="rewrite_tag_name",
|
|
),
|
|
pytest.param(
|
|
"foo",
|
|
"call_notify",
|
|
"bar",
|
|
[],
|
|
"",
|
|
("call_notify", "bar"),
|
|
True,
|
|
id="call_notify",
|
|
),
|
|
pytest.param(
|
|
"foo",
|
|
"get_base_href",
|
|
"bar",
|
|
[],
|
|
"base_href_value",
|
|
("get_base_href", "base_href_value"),
|
|
False,
|
|
id="rewrite_value_with_base_href",
|
|
),
|
|
pytest.param(
|
|
"foo",
|
|
"attr2",
|
|
"bar",
|
|
[("attr1", "value1")],
|
|
"base_href_value",
|
|
("attr2", "value1"),
|
|
False,
|
|
id="rewrite_attr2_value_with_attr1_value",
|
|
),
|
|
],
|
|
)
|
|
def test_html_attribute_rewrite_rules(
|
|
tag: str,
|
|
attr_name: str,
|
|
attr_value: str | None,
|
|
attrs: AttrsList,
|
|
base_href: str,
|
|
expected_result: AttrNameAndValue,
|
|
*,
|
|
should_notify: bool,
|
|
simple_url_rewriter,
|
|
js_rewriter,
|
|
css_rewriter,
|
|
):
|
|
notified_paths = []
|
|
|
|
def notify(path: ZimPath):
|
|
notified_paths.append(path)
|
|
|
|
url_rewriter = simple_url_rewriter("http://www.example.com")
|
|
js_rewriter = js_rewriter(
|
|
url_rewriter=url_rewriter, base_href=base_href, notify_js_module=notify
|
|
)
|
|
css_rewriter = css_rewriter(url_rewriter=url_rewriter, base_href=base_href)
|
|
|
|
assert (
|
|
rules._do_attribute_rewrite(
|
|
tag=tag,
|
|
attr_name=attr_name,
|
|
attr_value=attr_value,
|
|
attrs=attrs,
|
|
js_rewriter=js_rewriter,
|
|
css_rewriter=css_rewriter,
|
|
url_rewriter=url_rewriter,
|
|
base_href=base_href,
|
|
notify_js_module=notify,
|
|
)
|
|
== expected_result
|
|
)
|
|
assert (len(notified_paths) > 0) == should_notify
|
|
|
|
|
|
def test_bad_html_attribute_rewrite_rules_argument_name():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* is unsupported in function"):
|
|
|
|
@bad_rules.rewrite_attribute()
|
|
def bad_signature(foo: str) -> AttrNameAndValue | None:
|
|
return (foo, "bar")
|
|
|
|
|
|
def test_bad_html_attribute_rewrite_rules_argument_type():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* in function .* must be of type"):
|
|
|
|
@bad_rules.rewrite_attribute()
|
|
def bad_signature(attr_name: int) -> AttrNameAndValue | None:
|
|
return (f"{attr_name}", "bar")
|
|
|
|
|
|
@rules.rewrite_tag()
|
|
def rewrite1_tag(
|
|
tag: str,
|
|
) -> str | None:
|
|
if tag != "rewrite1":
|
|
return
|
|
return "<rewriten attr1=value1 />"
|
|
|
|
|
|
@rules.rewrite_tag()
|
|
def rewrite2_tag(
|
|
tag: str,
|
|
attrs: AttrsList,
|
|
*,
|
|
auto_close: bool,
|
|
) -> str | None:
|
|
if tag != "rewrite2":
|
|
return
|
|
|
|
return (
|
|
f"<rewriten {' '.join(format_attr(*attr) for attr in attrs)}"
|
|
f"{'/>' if auto_close else '>'}"
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"tag, attrs, auto_close, expected_result",
|
|
[
|
|
pytest.param(
|
|
"foo",
|
|
[],
|
|
False,
|
|
None,
|
|
id="do_not_rewrite_foo_tag",
|
|
),
|
|
pytest.param(
|
|
"rewrite1",
|
|
[("attr2", "value2")],
|
|
False,
|
|
"<rewriten attr1=value1 />",
|
|
id="rewrite1_tag",
|
|
),
|
|
pytest.param(
|
|
"rewrite2",
|
|
[("attr2", "value2")],
|
|
False,
|
|
'<rewriten attr2="value2">',
|
|
id="rewrite2_tag_no_close",
|
|
),
|
|
pytest.param(
|
|
"rewrite2",
|
|
[("attr2", "value2")],
|
|
True,
|
|
'<rewriten attr2="value2"/>',
|
|
id="rewrite2_tag_auto_close",
|
|
),
|
|
],
|
|
)
|
|
def test_html_tag_rewrite_rules(
|
|
tag: str,
|
|
attrs: AttrsList,
|
|
*,
|
|
auto_close: bool,
|
|
expected_result: str | None,
|
|
):
|
|
assert (
|
|
rules._do_tag_rewrite(
|
|
tag=tag,
|
|
attrs=attrs,
|
|
auto_close=auto_close,
|
|
)
|
|
== expected_result
|
|
)
|
|
|
|
|
|
def test_bad_html_tag_rewrite_rules_argument_name():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* is unsupported in function"):
|
|
|
|
@bad_rules.rewrite_tag()
|
|
def bad_signature(foo: str) -> str:
|
|
return foo
|
|
|
|
|
|
def test_bad_html_tag_rewrite_rules_argument_type():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* in function .* must be of type"):
|
|
|
|
@bad_rules.rewrite_tag()
|
|
def bad_signature(attrs: int) -> str:
|
|
return f"{attrs}"
|
|
|
|
|
|
@rules.rewrite_data()
|
|
def rewrite_data_html_rewrite_context(
|
|
html_rewrite_context: str | None,
|
|
) -> str | None:
|
|
if html_rewrite_context != "rewrite":
|
|
return
|
|
return "rewritten data"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"html_rewrite_context, base_href, data, expected_result",
|
|
[
|
|
pytest.param(
|
|
"foo",
|
|
"bar",
|
|
"something",
|
|
None,
|
|
id="do_not_rewrite_foo_context",
|
|
),
|
|
pytest.param(
|
|
None,
|
|
"bar",
|
|
"something",
|
|
None,
|
|
id="do_not_rewrite_none_context",
|
|
),
|
|
pytest.param(
|
|
"rewrite",
|
|
"bar",
|
|
"something",
|
|
"rewritten data",
|
|
id="rewrite_data_html_rewrite_context",
|
|
),
|
|
],
|
|
)
|
|
def test_html_data_rewrite_rules(
|
|
html_rewrite_context: str | None,
|
|
base_href: str,
|
|
data: str,
|
|
*,
|
|
expected_result: str | None,
|
|
simple_url_rewriter,
|
|
js_rewriter,
|
|
css_rewriter,
|
|
):
|
|
notified_paths = []
|
|
|
|
def notify(path: ZimPath):
|
|
notified_paths.append(path)
|
|
|
|
url_rewriter = simple_url_rewriter("http://www.example.com")
|
|
js_rewriter = js_rewriter(
|
|
url_rewriter=url_rewriter, base_href=base_href, notify_js_module=notify
|
|
)
|
|
css_rewriter = css_rewriter(url_rewriter=url_rewriter, base_href=base_href)
|
|
|
|
assert (
|
|
rules._do_data_rewrite(
|
|
html_rewrite_context=html_rewrite_context,
|
|
data=data,
|
|
css_rewriter=css_rewriter,
|
|
js_rewriter=js_rewriter,
|
|
url_rewriter=url_rewriter,
|
|
)
|
|
== expected_result
|
|
)
|
|
|
|
|
|
def test_bad_html_data_rewrite_rules_argument_name():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* is unsupported in function"):
|
|
|
|
@bad_rules.rewrite_data()
|
|
def bad_signature(foo: str) -> str | None:
|
|
return foo
|
|
|
|
|
|
def test_bad_html_data_rewrite_rules_argument_type():
|
|
bad_rules = HTMLRewritingRules()
|
|
|
|
with pytest.raises(TypeError, match="Parameter .* in function .* must be of type"):
|
|
|
|
@bad_rules.rewrite_data()
|
|
def bad_signature(data: int) -> str | None:
|
|
return f"{data}"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"tag, attr_name, attr_value, attrs, expected_result",
|
|
[
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
"1;url=http://www.example.com/somewhere",
|
|
[("http-equiv", "refresh")],
|
|
("content", "1;url=http://www.example.com/somewhererewritten"),
|
|
id="nomimal_case",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
" 1 ; url = http://www.example.com/somewhere ",
|
|
[("http-equiv", "refresh")],
|
|
("content", "1;url=http://www.example.com/somewhererewritten"),
|
|
id="nomimal_case_with_spaces",
|
|
),
|
|
pytest.param(
|
|
"foo",
|
|
"content",
|
|
"1;url=http://www.example.com/somewhere",
|
|
[("http-equiv", "refresh")],
|
|
None,
|
|
id="do_not_rewrite_foo_tag",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"foo",
|
|
"1;url=http://www.example.com/somewhere",
|
|
[("http-equiv", "refresh")],
|
|
None,
|
|
id="do_not_rewrite_foo_attribute",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
"1;url=http://www.example.com/somewhere",
|
|
[("http-equiv", "foo")],
|
|
None,
|
|
id="do_not_rewrite_http_equiv_not_refresh",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
"1;url=http://www.example.com/somewhere",
|
|
[],
|
|
None,
|
|
id="do_not_rewrite_no_http_equiv",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
None,
|
|
[("http-equiv", "refresh")],
|
|
None,
|
|
id="do_not_rewrite_missing_attribute",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
"",
|
|
[("http-equiv", "refresh")],
|
|
None,
|
|
id="do_not_rewrite_empty_attribute",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
"1",
|
|
[("http-equiv", "refresh")],
|
|
None,
|
|
id="do_not_rewrite_attribute_without_url",
|
|
),
|
|
pytest.param(
|
|
"meta",
|
|
"content",
|
|
"1;foo=http://www.example.com/somewhere",
|
|
[("http-equiv", "refresh")],
|
|
None,
|
|
id="do_not_rewrite_bad_attribute",
|
|
),
|
|
],
|
|
)
|
|
def test_rewrite_meta_http_equiv_redirect_rule(
|
|
tag: str,
|
|
attr_name: str,
|
|
attr_value: str | None,
|
|
attrs: AttrsList,
|
|
expected_result: AttrNameAndValue | None,
|
|
simple_url_rewriter,
|
|
):
|
|
url_rewriter = simple_url_rewriter("http://www.example.com", suffix="rewritten")
|
|
|
|
assert (
|
|
rewrite_meta_http_equiv_redirect(
|
|
tag=tag,
|
|
attr_name=attr_name,
|
|
attr_value=attr_value,
|
|
attrs=attrs,
|
|
url_rewriter=url_rewriter,
|
|
base_href=None,
|
|
)
|
|
== expected_result
|
|
)
|