This commit is contained in:
L. Grobol 2025-12-08 06:10:43 +02:00 committed by GitHub
commit c4868f10c1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 8 additions and 15 deletions

View file

@ -2479,12 +2479,6 @@ expression support in the :mod:`re` module).
+-----------------------+-----------------------------+
| ``\f`` or ``\x0c`` | Form Feed |
+-----------------------+-----------------------------+
| ``\x1c`` | File Separator |
+-----------------------+-----------------------------+
| ``\x1d`` | Group Separator |
+-----------------------+-----------------------------+
| ``\x1e`` | Record Separator |
+-----------------------+-----------------------------+
| ``\x85`` | Next Line (C1 Control Code) |
+-----------------------+-----------------------------+
| ``\u2028`` | Line Separator |

View file

@ -0,0 +1,4 @@
Remove Unicode characters that have the bidirectional B property but are not
mandatory line breakers (U+001C, U+001D and U+001E) from the list of
line-breaking characters. ``str.splitlines()`` will not break on these
characters any more.

View file

@ -2367,7 +2367,7 @@ static const unsigned short index1[] = {
static const unsigned short index2[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 2, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4,
0, 0, 0, 0, 1, 1, 1, 1, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 5, 4, 4, 4, 4, 4, 4, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 4, 4, 4, 5, 17, 5, 18, 18, 18, 18, 18, 18, 18, 18, 18,
@ -6581,8 +6581,7 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
}
/* Returns 1 for Unicode characters having the line break
* property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
* type 'B', 0 otherwise.
* property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.
*/
int _PyUnicode_IsLinebreak(const Py_UCS4 ch)
{
@ -6591,9 +6590,6 @@ int _PyUnicode_IsLinebreak(const Py_UCS4 ch)
case 0x000B:
case 0x000C:
case 0x000D:
case 0x001C:
case 0x001D:
case 0x001E:
case 0x0085:
case 0x2028:
case 0x2029:

View file

@ -438,7 +438,7 @@ def makeunicodetype(unicode, trace):
flags |= ALPHA_MASK
if "Lowercase" in properties:
flags |= LOWER_MASK
if 'Line_Break' in properties or bidirectional == "B":
if 'Line_Break' in properties:
flags |= LINEBREAK_MASK
linebreaks.append(char)
if category == "Zs" or bidirectional in ("WS", "B", "S"):
@ -604,8 +604,7 @@ def makeunicodetype(unicode, trace):
# Generate code for _PyUnicode_IsLinebreak()
fprint("/* Returns 1 for Unicode characters having the line break")
fprint(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional")
fprint(" * type 'B', 0 otherwise.")
fprint(" * property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.")
fprint(" */")
fprint('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)')
fprint('{')