mirror of
https://github.com/python/cpython.git
synced 2025-10-20 08:23:47 +00:00
gh-137729: Fix support for locales with @-modifiers (GH-137253)
This commit is contained in:
parent
bc2872445b
commit
0c8fecc4cf
6 changed files with 164 additions and 16 deletions
|
@ -42,7 +42,7 @@ The :mod:`locale` module defines the following exception and functions:
|
||||||
If *locale* is a pair, it is converted to a locale name using
|
If *locale* is a pair, it is converted to a locale name using
|
||||||
the locale aliasing engine.
|
the locale aliasing engine.
|
||||||
The language code has the same format as a :ref:`locale name <locale_name>`,
|
The language code has the same format as a :ref:`locale name <locale_name>`,
|
||||||
but without encoding and ``@``-modifier.
|
but without encoding.
|
||||||
The language code and encoding can be ``None``.
|
The language code and encoding can be ``None``.
|
||||||
|
|
||||||
If *locale* is omitted or ``None``, the current setting for *category* is
|
If *locale* is omitted or ``None``, the current setting for *category* is
|
||||||
|
@ -58,6 +58,9 @@ The :mod:`locale` module defines the following exception and functions:
|
||||||
specified in the :envvar:`LANG` environment variable). If the locale is not
|
specified in the :envvar:`LANG` environment variable). If the locale is not
|
||||||
changed thereafter, using multithreading should not cause problems.
|
changed thereafter, using multithreading should not cause problems.
|
||||||
|
|
||||||
|
.. versionchanged:: next
|
||||||
|
Support language codes with ``@``-modifiers.
|
||||||
|
|
||||||
|
|
||||||
.. function:: localeconv()
|
.. function:: localeconv()
|
||||||
|
|
||||||
|
@ -366,11 +369,15 @@ The :mod:`locale` module defines the following exception and functions:
|
||||||
values except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`.
|
values except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`.
|
||||||
|
|
||||||
The language code has the same format as a :ref:`locale name <locale_name>`,
|
The language code has the same format as a :ref:`locale name <locale_name>`,
|
||||||
but without encoding and ``@``-modifier.
|
but without encoding.
|
||||||
The language code and encoding may be ``None`` if their values cannot be
|
The language code and encoding may be ``None`` if their values cannot be
|
||||||
determined.
|
determined.
|
||||||
The "C" locale is represented as ``(None, None)``.
|
The "C" locale is represented as ``(None, None)``.
|
||||||
|
|
||||||
|
.. versionchanged:: next
|
||||||
|
``@``-modifier are no longer silently removed, but included in
|
||||||
|
the language code.
|
||||||
|
|
||||||
|
|
||||||
.. function:: getpreferredencoding(do_setlocale=True)
|
.. function:: getpreferredencoding(do_setlocale=True)
|
||||||
|
|
||||||
|
|
|
@ -274,6 +274,15 @@ http.cookies
|
||||||
(Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.)
|
(Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.)
|
||||||
|
|
||||||
|
|
||||||
|
locale
|
||||||
|
------
|
||||||
|
|
||||||
|
* :func:`~locale.setlocale` now supports language codes with ``@``-modifiers.
|
||||||
|
``@``-modifier are no longer silently removed in :func:`~locale.getlocale`,
|
||||||
|
but included in the language code.
|
||||||
|
(Contributed by Serhiy Storchaka in :gh:`137729`.)
|
||||||
|
|
||||||
|
|
||||||
math
|
math
|
||||||
----
|
----
|
||||||
|
|
||||||
|
|
|
@ -375,12 +375,14 @@ def _replace_encoding(code, encoding):
|
||||||
def _append_modifier(code, modifier):
|
def _append_modifier(code, modifier):
|
||||||
if modifier == 'euro':
|
if modifier == 'euro':
|
||||||
if '.' not in code:
|
if '.' not in code:
|
||||||
return code + '.ISO8859-15'
|
# Linux appears to require keeping the "@euro" modifier in place,
|
||||||
|
# even when using the ".ISO8859-15" encoding.
|
||||||
|
return code + '.ISO8859-15@euro'
|
||||||
_, _, encoding = code.partition('.')
|
_, _, encoding = code.partition('.')
|
||||||
if encoding in ('ISO8859-15', 'UTF-8'):
|
if encoding == 'UTF-8':
|
||||||
return code
|
return code
|
||||||
if encoding == 'ISO8859-1':
|
if encoding == 'ISO8859-1':
|
||||||
return _replace_encoding(code, 'ISO8859-15')
|
code = _replace_encoding(code, 'ISO8859-15')
|
||||||
return code + '@' + modifier
|
return code + '@' + modifier
|
||||||
|
|
||||||
def normalize(localename):
|
def normalize(localename):
|
||||||
|
@ -485,13 +487,18 @@ def _parse_localename(localename):
|
||||||
# Deal with locale modifiers
|
# Deal with locale modifiers
|
||||||
code, modifier = code.split('@', 1)
|
code, modifier = code.split('@', 1)
|
||||||
if modifier == 'euro' and '.' not in code:
|
if modifier == 'euro' and '.' not in code:
|
||||||
# Assume Latin-9 for @euro locales. This is bogus,
|
# Assume ISO8859-15 for @euro locales. Do note that some systems
|
||||||
# since some systems may use other encodings for these
|
# may use other encodings for these locales, so this may not always
|
||||||
# locales. Also, we ignore other modifiers.
|
# be correct.
|
||||||
return code, 'iso-8859-15'
|
return code + '@euro', 'ISO8859-15'
|
||||||
|
else:
|
||||||
|
modifier = ''
|
||||||
|
|
||||||
if '.' in code:
|
if '.' in code:
|
||||||
return tuple(code.split('.')[:2])
|
code, encoding = code.split('.')[:2]
|
||||||
|
if modifier:
|
||||||
|
code += '@' + modifier
|
||||||
|
return code, encoding
|
||||||
elif code == 'C':
|
elif code == 'C':
|
||||||
return None, None
|
return None, None
|
||||||
elif code == 'UTF-8':
|
elif code == 'UTF-8':
|
||||||
|
@ -516,7 +523,14 @@ def _build_localename(localetuple):
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
return language
|
return language
|
||||||
else:
|
else:
|
||||||
return language + '.' + encoding
|
if '@' in language:
|
||||||
|
language, modifier = language.split('@', 1)
|
||||||
|
else:
|
||||||
|
modifier = ''
|
||||||
|
localename = language + '.' + encoding
|
||||||
|
if modifier:
|
||||||
|
localename += '@' + modifier
|
||||||
|
return localename
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
raise TypeError('Locale must be None, a string, or an iterable of '
|
raise TypeError('Locale must be None, a string, or an iterable of '
|
||||||
'two strings -- language code, encoding.') from None
|
'two strings -- language code, encoding.') from None
|
||||||
|
@ -888,6 +902,12 @@ def getpreferredencoding(do_setlocale=True):
|
||||||
# SS 2025-06-10:
|
# SS 2025-06-10:
|
||||||
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
|
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
|
||||||
# on all platforms.
|
# on all platforms.
|
||||||
|
#
|
||||||
|
# SS 2025-07-30:
|
||||||
|
# Remove conflicts with GNU libc.
|
||||||
|
#
|
||||||
|
# removed 'el_gr@euro'
|
||||||
|
# removed 'uz_uz@cyrillic'
|
||||||
|
|
||||||
locale_alias = {
|
locale_alias = {
|
||||||
'a3': 'az_AZ.KOI8-C',
|
'a3': 'az_AZ.KOI8-C',
|
||||||
|
@ -1021,7 +1041,6 @@ def getpreferredencoding(do_setlocale=True):
|
||||||
'el': 'el_GR.ISO8859-7',
|
'el': 'el_GR.ISO8859-7',
|
||||||
'el_cy': 'el_CY.ISO8859-7',
|
'el_cy': 'el_CY.ISO8859-7',
|
||||||
'el_gr': 'el_GR.ISO8859-7',
|
'el_gr': 'el_GR.ISO8859-7',
|
||||||
'el_gr@euro': 'el_GR.ISO8859-15',
|
|
||||||
'en': 'en_US.ISO8859-1',
|
'en': 'en_US.ISO8859-1',
|
||||||
'en_ag': 'en_AG.UTF-8',
|
'en_ag': 'en_AG.UTF-8',
|
||||||
'en_au': 'en_AU.ISO8859-1',
|
'en_au': 'en_AU.ISO8859-1',
|
||||||
|
@ -1456,7 +1475,6 @@ def getpreferredencoding(do_setlocale=True):
|
||||||
'ur_pk': 'ur_PK.CP1256',
|
'ur_pk': 'ur_PK.CP1256',
|
||||||
'uz': 'uz_UZ.UTF-8',
|
'uz': 'uz_UZ.UTF-8',
|
||||||
'uz_uz': 'uz_UZ.UTF-8',
|
'uz_uz': 'uz_UZ.UTF-8',
|
||||||
'uz_uz@cyrillic': 'uz_UZ.UTF-8',
|
|
||||||
've': 've_ZA.UTF-8',
|
've': 've_ZA.UTF-8',
|
||||||
've_za': 've_ZA.UTF-8',
|
've_za': 've_ZA.UTF-8',
|
||||||
'vi': 'vi_VN.TCVN',
|
'vi': 'vi_VN.TCVN',
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
from test import support
|
||||||
from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper
|
from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper
|
||||||
from test.support.warnings_helper import check_warnings
|
from test.support.warnings_helper import check_warnings
|
||||||
from test.support.import_helper import ensure_lazy_imports, import_fresh_module
|
from test.support.import_helper import ensure_lazy_imports, import_fresh_module
|
||||||
|
@ -425,8 +426,8 @@ def test_hyphenated_encoding(self):
|
||||||
self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
|
self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
|
||||||
|
|
||||||
def test_euro_modifier(self):
|
def test_euro_modifier(self):
|
||||||
self.check('de_DE@euro', 'de_DE.ISO8859-15')
|
self.check('de_DE@euro', 'de_DE.ISO8859-15@euro')
|
||||||
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
|
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro')
|
||||||
self.check('de_DE.utf8@euro', 'de_DE.UTF-8')
|
self.check('de_DE.utf8@euro', 'de_DE.UTF-8')
|
||||||
|
|
||||||
def test_latin_modifier(self):
|
def test_latin_modifier(self):
|
||||||
|
@ -534,6 +535,105 @@ def test_setlocale_long_encoding(self):
|
||||||
with self.assertRaises(locale.Error):
|
with self.assertRaises(locale.Error):
|
||||||
locale.setlocale(locale.LC_ALL, loc2)
|
locale.setlocale(locale.LC_ALL, loc2)
|
||||||
|
|
||||||
|
@support.subTests('localename,localetuple', [
|
||||||
|
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')),
|
||||||
|
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')),
|
||||||
|
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
|
||||||
|
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')),
|
||||||
|
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)),
|
||||||
|
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')),
|
||||||
|
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')),
|
||||||
|
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
|
||||||
|
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')),
|
||||||
|
('de_DE.ISO8859-15@euro', ('de_DE@euro', None)),
|
||||||
|
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')),
|
||||||
|
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
|
||||||
|
('el_GR.ISO8859-7@euro', ('el_GR@euro', None)),
|
||||||
|
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')),
|
||||||
|
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')),
|
||||||
|
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
|
||||||
|
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')),
|
||||||
|
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)),
|
||||||
|
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')),
|
||||||
|
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
|
||||||
|
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)),
|
||||||
|
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')),
|
||||||
|
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
|
||||||
|
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)),
|
||||||
|
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')),
|
||||||
|
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
|
||||||
|
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)),
|
||||||
|
('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')),
|
||||||
|
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
|
||||||
|
('be_BY.UTF-8@latin', ('be_BY@latin', None)),
|
||||||
|
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')),
|
||||||
|
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
|
||||||
|
('sr_RS.UTF-8@latin', ('sr_RS@latin', None)),
|
||||||
|
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')),
|
||||||
|
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
|
||||||
|
('ug_CN.UTF-8@latin', ('ug_CN@latin', None)),
|
||||||
|
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')),
|
||||||
|
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
|
||||||
|
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)),
|
||||||
|
])
|
||||||
|
def test_setlocale_with_modifier(self, localename, localetuple):
|
||||||
|
try:
|
||||||
|
locale.setlocale(locale.LC_CTYPE, localename)
|
||||||
|
except locale.Error as exc:
|
||||||
|
self.skipTest(str(exc))
|
||||||
|
loc = locale.setlocale(locale.LC_CTYPE, localetuple)
|
||||||
|
self.assertEqual(loc, localename)
|
||||||
|
|
||||||
|
loctuple = locale.getlocale(locale.LC_CTYPE)
|
||||||
|
loc = locale.setlocale(locale.LC_CTYPE, loctuple)
|
||||||
|
self.assertEqual(loc, localename)
|
||||||
|
|
||||||
|
@support.subTests('localename,localetuple', [
|
||||||
|
('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')),
|
||||||
|
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
|
||||||
|
('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')),
|
||||||
|
('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')),
|
||||||
|
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
|
||||||
|
('de_DE@euro', ('de_DE@euro', 'ISO8859-15')),
|
||||||
|
('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')),
|
||||||
|
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
|
||||||
|
('el_GR@euro', ('el_GR@euro', 'ISO8859-7')),
|
||||||
|
('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')),
|
||||||
|
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
|
||||||
|
('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')),
|
||||||
|
('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')),
|
||||||
|
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
|
||||||
|
('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')),
|
||||||
|
('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
|
||||||
|
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
|
||||||
|
('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')),
|
||||||
|
('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
|
||||||
|
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
|
||||||
|
('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')),
|
||||||
|
('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')),
|
||||||
|
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
|
||||||
|
('be_BY@latin', ('be_BY@latin', 'UTF-8')),
|
||||||
|
('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')),
|
||||||
|
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
|
||||||
|
('sr_RS@latin', ('sr_RS@latin', 'UTF-8')),
|
||||||
|
('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')),
|
||||||
|
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
|
||||||
|
('ug_CN@latin', ('ug_CN@latin', 'UTF-8')),
|
||||||
|
('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
|
||||||
|
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
|
||||||
|
('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
|
||||||
|
])
|
||||||
|
def test_getlocale_with_modifier(self, localename, localetuple):
|
||||||
|
try:
|
||||||
|
locale.setlocale(locale.LC_CTYPE, localename)
|
||||||
|
except locale.Error as exc:
|
||||||
|
self.skipTest(str(exc))
|
||||||
|
loctuple = locale.getlocale(locale.LC_CTYPE)
|
||||||
|
self.assertEqual(loctuple, localetuple)
|
||||||
|
|
||||||
|
locale.setlocale(locale.LC_CTYPE, loctuple)
|
||||||
|
self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple)
|
||||||
|
|
||||||
|
|
||||||
class TestMiscellaneous(unittest.TestCase):
|
class TestMiscellaneous(unittest.TestCase):
|
||||||
def test_defaults_UTF8(self):
|
def test_defaults_UTF8(self):
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
:func:`locale.setlocale` now supports language codes with ``@``-modifiers.
|
||||||
|
``@``-modifier are no longer silently removed in :func:`locale.getlocale`,
|
||||||
|
but included in the language code.
|
|
@ -44,6 +44,13 @@ def parse(filename):
|
||||||
# Ignore one letter locale mappings (except for 'c')
|
# Ignore one letter locale mappings (except for 'c')
|
||||||
if len(locale) == 1 and locale != 'c':
|
if len(locale) == 1 and locale != 'c':
|
||||||
continue
|
continue
|
||||||
|
if '@' in locale and '@' not in alias:
|
||||||
|
# Do not simply remove the "@euro" modifier.
|
||||||
|
# Glibc generates separate locales with the "@euro" modifier, and
|
||||||
|
# not always generates a locale without it with the same encoding.
|
||||||
|
# It can also affect collation.
|
||||||
|
if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
|
||||||
|
alias += '@euro'
|
||||||
# Normalize encoding, if given
|
# Normalize encoding, if given
|
||||||
if '.' in locale:
|
if '.' in locale:
|
||||||
lang, encoding = locale.split('.')[:2]
|
lang, encoding = locale.split('.')[:2]
|
||||||
|
@ -51,6 +58,10 @@ def parse(filename):
|
||||||
encoding = encoding.replace('_', '')
|
encoding = encoding.replace('_', '')
|
||||||
locale = lang + '.' + encoding
|
locale = lang + '.' + encoding
|
||||||
data[locale] = alias
|
data[locale] = alias
|
||||||
|
# Conflict with glibc.
|
||||||
|
data.pop('el_gr@euro', None)
|
||||||
|
data.pop('uz_uz@cyrillic', None)
|
||||||
|
data.pop('uz_uz.utf8@cyrillic', None)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def parse_glibc_supported(filename):
|
def parse_glibc_supported(filename):
|
||||||
|
@ -81,7 +92,7 @@ def parse_glibc_supported(filename):
|
||||||
# Add an encoding to alias
|
# Add an encoding to alias
|
||||||
alias, _, modifier = alias.partition('@')
|
alias, _, modifier = alias.partition('@')
|
||||||
alias = _locale._replace_encoding(alias, alias_encoding)
|
alias = _locale._replace_encoding(alias, alias_encoding)
|
||||||
if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
|
if modifier:
|
||||||
alias += '@' + modifier
|
alias += '@' + modifier
|
||||||
data[locale] = alias
|
data[locale] = alias
|
||||||
return data
|
return data
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue