mirror of
https://github.com/python/cpython.git
synced 2025-10-19 16:03:42 +00:00
closes gh-138706: update Unicode to 17.0.0 (#138719)
This commit is contained in:
parent
e0f54a608e
commit
5bd4bf04c4
11 changed files with 22094 additions and 22031 deletions
|
@ -1843,9 +1843,9 @@ expression support in the :mod:`re` module).
|
|||
lowercase, :meth:`lower` would do nothing to ``'ß'``; :meth:`casefold`
|
||||
converts it to ``"ss"``.
|
||||
|
||||
The casefolding algorithm is
|
||||
`described in section 3.13 'Default Case Folding' of the Unicode Standard
|
||||
<https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__.
|
||||
The casefolding algorithm is `described in section 3.13.3 'Default Case
|
||||
Folding' of the Unicode Standard
|
||||
<https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G53253>`__.
|
||||
|
||||
.. versionadded:: 3.3
|
||||
|
||||
|
@ -2056,7 +2056,7 @@ expression support in the :mod:`re` module).
|
|||
property being one of "Lm", "Lt", "Lu", "Ll", or "Lo". Note that this is different
|
||||
from the `Alphabetic property defined in the section 4.10 'Letters, Alphabetic, and
|
||||
Ideographic' of the Unicode Standard
|
||||
<https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-4/#G91002>`_.
|
||||
<https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-4/#G91002>`__.
|
||||
|
||||
|
||||
.. method:: str.isascii()
|
||||
|
@ -2196,9 +2196,9 @@ expression support in the :mod:`re` module).
|
|||
Return a copy of the string with all the cased characters [4]_ converted to
|
||||
lowercase.
|
||||
|
||||
The lowercasing algorithm used is
|
||||
`described in section 3.13 'Default Case Folding' of the Unicode Standard
|
||||
<https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__.
|
||||
The lowercasing algorithm used is `described in section 3.13.2 'Default Case
|
||||
Conversion' of the Unicode Standard
|
||||
<https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G34078>`__.
|
||||
|
||||
|
||||
.. method:: str.lstrip(chars=None, /)
|
||||
|
@ -2561,9 +2561,9 @@ expression support in the :mod:`re` module).
|
|||
character(s) is not "Lu" (Letter, uppercase), but e.g. "Lt" (Letter,
|
||||
titlecase).
|
||||
|
||||
The uppercasing algorithm used is
|
||||
`described in section 3.13 'Default Case Folding' of the Unicode Standard
|
||||
<https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__.
|
||||
The uppercasing algorithm used is `described in section 3.13.2 'Default Case
|
||||
Conversion' of the Unicode Standard
|
||||
<https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G34078>`__.
|
||||
|
||||
|
||||
.. method:: str.zfill(width, /)
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
|
||||
This module provides access to the Unicode Character Database (UCD) which
|
||||
defines character properties for all Unicode characters. The data contained in
|
||||
this database is compiled from the `UCD version 16.0.0
|
||||
<https://www.unicode.org/Public/16.0.0/ucd>`_.
|
||||
this database is compiled from the `UCD version 17.0.0
|
||||
<https://www.unicode.org/Public/17.0.0/ucd>`_.
|
||||
|
||||
The module uses the same names and symbols as defined by Unicode
|
||||
Standard Annex #44, `"Unicode Character Database"
|
||||
|
@ -211,6 +211,6 @@ In addition, the module exposes the following constant:
|
|||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#] https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt
|
||||
.. [#] https://www.unicode.org/Public/17.0.0/ucd/NameAliases.txt
|
||||
|
||||
.. [#] https://www.unicode.org/Public/16.0.0/ucd/NamedSequences.txt
|
||||
.. [#] https://www.unicode.org/Public/17.0.0/ucd/NamedSequences.txt
|
||||
|
|
|
@ -384,8 +384,8 @@ Character Database.
|
|||
|
||||
|
||||
.. _UAX-31: https://www.unicode.org/reports/tr31/
|
||||
.. _PropList.txt: https://www.unicode.org/Public/16.0.0/ucd/PropList.txt
|
||||
.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt
|
||||
.. _PropList.txt: https://www.unicode.org/Public/17.0.0/ucd/PropList.txt
|
||||
.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
|
||||
.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms
|
||||
|
||||
|
||||
|
@ -793,7 +793,7 @@ with the given *name*::
|
|||
This sequence cannot appear in :ref:`bytes literals <bytes-literal>`.
|
||||
|
||||
.. versionchanged:: 3.3
|
||||
Support for `name aliases <https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt>`__
|
||||
Support for `name aliases <https://www.unicode.org/Public/17.0.0/ucd/NameAliases.txt>`__
|
||||
has been added.
|
||||
|
||||
.. _string-escape-long-hex:
|
||||
|
|
|
@ -648,6 +648,12 @@ typing
|
|||
(Contributed by Nikita Sobolev in :gh:`137191`.)
|
||||
|
||||
|
||||
unicodedata
|
||||
-----------
|
||||
|
||||
* The Unicode database has been updated to Unicode 17.0.0.
|
||||
|
||||
|
||||
wave
|
||||
----
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
class UnicodeMethodsTest(unittest.TestCase):
|
||||
|
||||
# update this, if the database changes
|
||||
expectedchecksum = '9e43ee3929471739680c0e705482b4ae1c4122e4'
|
||||
expectedchecksum = '8b2615a9fc627676cbc0b6fac0191177df97ef5f'
|
||||
|
||||
@requires_resource('cpu')
|
||||
def test_method_checksum(self):
|
||||
|
@ -77,7 +77,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
|
|||
|
||||
# Update this if the database changes. Make sure to do a full rebuild
|
||||
# (e.g. 'make distclean && make') to get the correct checksum.
|
||||
expectedchecksum = '23ab09ed4abdf93db23b97359108ed630dd8311d'
|
||||
expectedchecksum = '65670ae03a324c5f9e826a4de3e25bae4d73c9b7'
|
||||
|
||||
@requires_resource('cpu')
|
||||
def test_function_checksum(self):
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Update :mod:`unicodedata` database to Unicode 17.0.0.
|
|
@ -1020,13 +1020,14 @@ is_unified_ideograph(Py_UCS4 code)
|
|||
(0x3400 <= code && code <= 0x4DBF) || /* CJK Ideograph Extension A */
|
||||
(0x4E00 <= code && code <= 0x9FFF) || /* CJK Ideograph */
|
||||
(0x20000 <= code && code <= 0x2A6DF) || /* CJK Ideograph Extension B */
|
||||
(0x2A700 <= code && code <= 0x2B739) || /* CJK Ideograph Extension C */
|
||||
(0x2A700 <= code && code <= 0x2B73F) || /* CJK Ideograph Extension C */
|
||||
(0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
|
||||
(0x2B820 <= code && code <= 0x2CEA1) || /* CJK Ideograph Extension E */
|
||||
(0x2B820 <= code && code <= 0x2CEAD) || /* CJK Ideograph Extension E */
|
||||
(0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
|
||||
(0x2EBF0 <= code && code <= 0x2EE5D) || /* CJK Ideograph Extension I */
|
||||
(0x30000 <= code && code <= 0x3134A) || /* CJK Ideograph Extension G */
|
||||
(0x31350 <= code && code <= 0x323AF); /* CJK Ideograph Extension H */
|
||||
(0x31350 <= code && code <= 0x323AF) || /* CJK Ideograph Extension H */
|
||||
(0x323B0 <= code && code <= 0x33479); /* CJK Ideograph Extension J */
|
||||
}
|
||||
|
||||
/* macros used to determine if the given code point is in the PUA range that
|
||||
|
|
5919
Modules/unicodedata_db.h
generated
5919
Modules/unicodedata_db.h
generated
File diff suppressed because it is too large
Load diff
34081
Modules/unicodename_db.h
generated
34081
Modules/unicodename_db.h
generated
File diff suppressed because it is too large
Load diff
4066
Objects/unicodetype_db.h
generated
4066
Objects/unicodetype_db.h
generated
File diff suppressed because it is too large
Load diff
|
@ -44,7 +44,7 @@
|
|||
# * Doc/library/stdtypes.rst, and
|
||||
# * Doc/library/unicodedata.rst
|
||||
# * Doc/reference/lexical_analysis.rst (three occurrences)
|
||||
UNIDATA_VERSION = "16.0.0"
|
||||
UNIDATA_VERSION = "17.0.0"
|
||||
UNICODE_DATA = "UnicodeData%s.txt"
|
||||
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
|
||||
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
|
||||
|
@ -104,13 +104,14 @@
|
|||
('3400', '4DBF'), # CJK Ideograph Extension A CJK
|
||||
('4E00', '9FFF'), # CJK Ideograph
|
||||
('20000', '2A6DF'), # CJK Ideograph Extension B
|
||||
('2A700', '2B739'), # CJK Ideograph Extension C
|
||||
('2A700', '2B73F'), # CJK Ideograph Extension C
|
||||
('2B740', '2B81D'), # CJK Ideograph Extension D
|
||||
('2B820', '2CEA1'), # CJK Ideograph Extension E
|
||||
('2B820', '2CEAD'), # CJK Ideograph Extension E
|
||||
('2CEB0', '2EBE0'), # CJK Ideograph Extension F
|
||||
('2EBF0', '2EE5D'), # CJK Ideograph Extension I
|
||||
('30000', '3134A'), # CJK Ideograph Extension G
|
||||
('31350', '323AF'), # CJK Ideograph Extension H
|
||||
('323B0', '33479'), # CJK Ideograph Extension J
|
||||
]
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue