Fixed #83: find home if main_url contains fragment

This commit is contained in:
renaud gaudin 2021-01-25 11:20:27 +00:00
parent 0eaedad7a7
commit 9eec367b59
2 changed files with 7 additions and 2 deletions

View file

@ -1,6 +1,11 @@
warc2zim
===
# 1.3.5
* don't crash on records without WARC-Target-URI
* fixed failure if url contains a fragment
# 1.3.4
* Added `--custom-css` option

View file

@ -32,7 +32,7 @@ import re
import io
import time
from argparse import ArgumentParser
from urllib.parse import urlsplit, urljoin, urlunsplit
from urllib.parse import urlsplit, urljoin, urlunsplit, urldefrag
import pkg_resources
import requests
@ -499,7 +499,7 @@ class WARC2Zim:
):
self.main_url = url
if self.main_url != url:
if urldefrag(self.main_url).url != url:
continue
# if we get here, found record for the main page