2024-06-11 06:51:41 +00:00
|
|
|
import pytest
|
|
|
|
|
|
|
|
from warc2zim.language import parse_language
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"input_lang, expected_lang",
|
|
|
|
[
|
|
|
|
pytest.param("en", "eng", id="english_2_chars"),
|
|
|
|
pytest.param("eng", "eng", id="english_3_chars"),
|
|
|
|
pytest.param("English", "eng", id="english_full_1"),
|
|
|
|
pytest.param("zh", "zho", id="chinese_2_chars"),
|
|
|
|
pytest.param("zh-hans", "zho", id="chinese_variant"),
|
|
|
|
pytest.param("zho", "zho", id="chinese_3_chars"),
|
|
|
|
pytest.param("Chinese", "zho", id="chinese_full_1"),
|
|
|
|
pytest.param("chinEse", "zho", id="chinese_full_2"),
|
|
|
|
pytest.param("patois", "eng", id="unrecognized_bad_name"),
|
2024-06-11 07:11:17 +00:00
|
|
|
pytest.param("unknown,fra,unknown", "fra", id="ignore_unknown"),
|
|
|
|
pytest.param("eng,fra", "eng,fra", id="two_langs_1"),
|
|
|
|
pytest.param("fra,eng", "fra,eng", id="two_langs_2"), # order must be preserved
|
|
|
|
pytest.param(" eng , fra ", "eng,fra", id="two_langs_spaces"),
|
|
|
|
pytest.param("eng,fra,English", "eng,fra", id="duplicates"),
|
|
|
|
pytest.param("eng;fra", "eng", id="unrecognized_bad_separator"),
|
2024-06-11 06:51:41 +00:00
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_parse_language(input_lang, expected_lang):
|
|
|
|
assert parse_language(input_lang) == expected_lang
|