[3.13] gh-130197: pygettext: Test the --escape option (GH-131902) (GH-132032)

(cherry picked from commit 87d9983994)
This commit is contained in:
Tomas R. 2025-04-02 22:30:26 +02:00 committed by GitHub
parent 75d453b0ab
commit 2d909c356f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 143 additions and 15 deletions

View file

@ -0,0 +1,45 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: escapes.py:5
msgid ""
"\"\t\n"
"\r\\"
msgstr ""
#: escapes.py:8
msgid ""
"\000\001\002\003\004\005\006\007\010\t\n"
"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
msgstr ""
#: escapes.py:13
msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
msgstr ""
#: escapes.py:17
msgid "\177"
msgstr ""
#: escapes.py:20
msgid "€   ÿ"
msgstr ""
#: escapes.py:23
msgid "α ㄱ 𓂀"
msgstr ""

View file

@ -0,0 +1,45 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: escapes.py:5
msgid ""
"\"\t\n"
"\r\\"
msgstr ""
#: escapes.py:8
msgid ""
"\000\001\002\003\004\005\006\007\010\t\n"
"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
msgstr ""
#: escapes.py:13
msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
msgstr ""
#: escapes.py:17
msgid "\177"
msgstr ""
#: escapes.py:20
msgid "\302\200 \302\240 \303\277"
msgstr ""
#: escapes.py:23
msgid "\316\261 \343\204\261 \360\223\202\200"
msgstr ""

View file

@ -0,0 +1,23 @@
import gettext as _
# Special characters that are always escaped in the POT file
_('"\t\n\r\\')
# All ascii characters 0-31
_('\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n'
'\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15'
'\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f')
# All ascii characters 32-126
_(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
# ascii char 127
_('\x7f')
# some characters in the 128-255 range
_('\x80 \xa0 ÿ')
# some characters >= 256 encoded as 2, 3 and 4 bytes, respectively
_('α ㄱ 𓂀')

View file

@ -369,15 +369,8 @@ class _(object):
def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'):
output_file = input_file.with_suffix('.pot')
with self.subTest(input_file=f'i18n_data/{input_file}'):
contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None):
Path(input_file.name).write_text(contents)
assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
for input_file, output_file, output in extract_from_snapshots():
with self.subTest(input_file=input_file):
expected = output_file.read_text(encoding='utf-8')
self.assert_POT_equal(expected, output)
@ -408,15 +401,37 @@ def test_files_list(self):
self.assertNotIn(text3, data)
def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'):
output_file = input_file.with_suffix('.pot')
def extract_from_snapshots():
snapshots = {
'messages.py': ('--docstrings',),
'fileloc.py': ('--docstrings',),
'docstrings.py': ('--docstrings',),
# == Test character escaping
# Escape ascii and unicode:
'escapes.py': ('--escape',),
# Escape only ascii and let unicode pass through:
('escapes.py', 'ascii-escapes.pot'): (),
}
for filename, args in snapshots.items():
if isinstance(filename, tuple):
filename, output_file = filename
output_file = DATA_DIR / output_file
input_file = DATA_DIR / filename
else:
input_file = DATA_DIR / filename
output_file = input_file.with_suffix('.pot')
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
assert_python_ok('-Xutf8', Test_pygettext.script, *args,
input_file.name)
yield (input_file, output_file,
Path('messages.pot').read_text(encoding='utf-8'))
def update_POT_snapshots():
for _, output_file, output in extract_from_snapshots():
output = normalize_POT_file(output)
output_file.write_text(output, encoding='utf-8')

View file

@ -207,7 +207,7 @@ def make_escapes(pass_nonascii):
global escapes, escape
if pass_nonascii:
# Allow non-ascii characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# "Höhe"' would not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod = 128
escape = escape_ascii