Fix linter / type checker issues

This commit is contained in:
benoit74 2025-02-03 14:50:58 +00:00
parent 4c584cab75
commit cd3251b978
No known key found for this signature in database
GPG key ID: B89606434FC7B530
9 changed files with 22 additions and 17 deletions

View file

@ -2,8 +2,9 @@ import re
import sys
from pathlib import Path
from zimscraperlib.rewriting.url_rewriting import ZimPath
from warc2zim.constants import logger
from warc2zim.url_rewriting import ZimPath
def notify(_: ZimPath):

View file

@ -1,4 +1,4 @@
""" html rewrite test utility
"""html rewrite test utility
This utility takes a given HTML content as input, base64 encoded, its original URL, and
rewrites its content.
@ -17,9 +17,10 @@ import logging
import sys
from pathlib import Path
from zimscraperlib.rewriting.html import HtmlRewriter
from zimscraperlib.rewriting.url_rewriting import ArticleUrlRewriter, HttpUrl, ZimPath
from warc2zim.constants import logger
from warc2zim.content_rewriting.html import HtmlRewriter
from warc2zim.url_rewriting import ArticleUrlRewriter, HttpUrl, ZimPath
from warc2zim.utils import to_string
@ -37,7 +38,9 @@ def main(path_to_content: str, article_url: str, encoding: str | None = None):
content = Path(path_to_content)
url_rewriter = ArticleUrlRewriter(
HttpUrl(article_url), existing_zim_paths=set(), missing_zim_paths=set()
article_url=HttpUrl(article_url),
existing_zim_paths=set(),
missing_zim_paths=set(),
)
html_rewriter = HtmlRewriter(url_rewriter, "", None, notify)

View file

@ -1,4 +1,4 @@
""" MIA English exclude list
"""MIA English exclude list
This utility computes the list of all subpages/languages that must be ignored for the
English ZIM of The Marxists Internet Archive (MIA) at www.marxists.org.
@ -23,9 +23,9 @@ soup = BeautifulSoup(resp.text, "html.parser")
subfolders = set()
REGEX = re.compile(r"\.\.\/(?P<subfolder>.*?)\/")
for anchor in soup.find_all("a"):
if not anchor.has_attr("href"):
if not anchor.has_attr("href"): # pyright: ignore
continue
if match := REGEX.match(anchor["href"]):
if match := REGEX.match(anchor["href"]): # pyright: ignore
subfolders.add(match.group("subfolder"))
print("|".join(sorted(subfolders))) # noqa: T201

View file

@ -1,4 +1,4 @@
from zimscraperlib import getLogger
from zimscraperlib.logging import getLogger
# Shared logger with default log level at this stage
logger = getLogger("warc2zim")

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4 nu
""" warc2zim conversion utility
"""warc2zim conversion utility
This utility provides a conversion from WARC records to ZIM files.
WARC record are directly stored in a zim file as:
@ -739,7 +739,7 @@ class Converter:
lang_elem = soup.find("html", attrs={"lang": True})
if lang_elem:
self.language = parse_language(
lang_elem.attrs[ # pyright: ignore[reportGeneralTypeIssues ,reportAttributeAccessIssue]
lang_elem.attrs[ # pyright: ignore[reportArgumentType, reportAttributeAccessIssue]
"lang"
]
)
@ -751,7 +751,7 @@ class Converter:
)
if lang_elem:
self.language = parse_language(
lang_elem.attrs[ # pyright: ignore[reportGeneralTypeIssues ,reportAttributeAccessIssue]
lang_elem.attrs[ # pyright: ignore[reportArgumentType ,reportAttributeAccessIssue]
"content"
]
)
@ -761,7 +761,7 @@ class Converter:
lang_elem = soup.find("meta", {"name": "language", "content": True})
if lang_elem:
self.language = parse_language(
lang_elem.attrs[ # pyright: ignore[reportGeneralTypeIssues ,reportAttributeAccessIssue]
lang_elem.attrs[ # pyright: ignore[reportArgumentType ,reportAttributeAccessIssue]
"content"
]
)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4 nu
""" warc2zim's item classes
"""warc2zim's item classes
This module contains the differents Item we may want to add to a Zim archive.
"""

View file

@ -27,7 +27,7 @@ JSONP_CALLBACK_REGEX = re.compile(r"[?].*(?:callback|jsonp)=([^&]+)", re.I)
def no_title(
function: Callable[..., str | bytes]
function: Callable[..., str | bytes],
) -> Callable[..., tuple[str, str | bytes]]:
"""Decorator for methods transforming content without extracting a title.

View file

@ -258,7 +258,7 @@ class CharsetsTestData:
expected_strings: list[str]
def get_testdata() -> Generator[CharsetsTestData, None, None]:
def get_testdata() -> Generator[CharsetsTestData]:
data = json.loads(
(Path(__file__).parent / "encodings" / "definition.json").read_bytes()
)

View file

@ -10,8 +10,9 @@ from urllib.parse import unquote
import pytest
import requests
from zimscraperlib.image.conversion import convert_image, convert_svg2png, resize_image
from zimscraperlib.image.conversion import convert_image, convert_svg2png
from zimscraperlib.image.probing import format_for
from zimscraperlib.image.transformation import resize_image
from zimscraperlib.zim import Archive
from warc2zim.__about__ import __version__