mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
Merge 286ab7ba6e into 7099af8f5e
This commit is contained in:
commit
c4868f10c1
4 changed files with 8 additions and 15 deletions
|
|
@ -2479,12 +2479,6 @@ expression support in the :mod:`re` module).
|
|||
+-----------------------+-----------------------------+
|
||||
| ``\f`` or ``\x0c`` | Form Feed |
|
||||
+-----------------------+-----------------------------+
|
||||
| ``\x1c`` | File Separator |
|
||||
+-----------------------+-----------------------------+
|
||||
| ``\x1d`` | Group Separator |
|
||||
+-----------------------+-----------------------------+
|
||||
| ``\x1e`` | Record Separator |
|
||||
+-----------------------+-----------------------------+
|
||||
| ``\x85`` | Next Line (C1 Control Code) |
|
||||
+-----------------------+-----------------------------+
|
||||
| ``\u2028`` | Line Separator |
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
Remove Unicode characters that have the bidirectional B property but are not
|
||||
mandatory line breakers (U+001C, U+001D and U+001E) from the list of
|
||||
line-breaking characters. ``str.splitlines()`` will not break on these
|
||||
characters any more.
|
||||
8
Objects/unicodetype_db.h
generated
8
Objects/unicodetype_db.h
generated
|
|
@ -2367,7 +2367,7 @@ static const unsigned short index1[] = {
|
|||
|
||||
static const unsigned short index2[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 2, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4,
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4,
|
||||
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 5, 4, 4, 4, 4, 4, 4, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 4, 4, 4, 5, 17, 5, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
|
|
@ -6581,8 +6581,7 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
|
|||
}
|
||||
|
||||
/* Returns 1 for Unicode characters having the line break
|
||||
* property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
|
||||
* type 'B', 0 otherwise.
|
||||
* property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.
|
||||
*/
|
||||
int _PyUnicode_IsLinebreak(const Py_UCS4 ch)
|
||||
{
|
||||
|
|
@ -6591,9 +6590,6 @@ int _PyUnicode_IsLinebreak(const Py_UCS4 ch)
|
|||
case 0x000B:
|
||||
case 0x000C:
|
||||
case 0x000D:
|
||||
case 0x001C:
|
||||
case 0x001D:
|
||||
case 0x001E:
|
||||
case 0x0085:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
|
|
|
|||
|
|
@ -438,7 +438,7 @@ def makeunicodetype(unicode, trace):
|
|||
flags |= ALPHA_MASK
|
||||
if "Lowercase" in properties:
|
||||
flags |= LOWER_MASK
|
||||
if 'Line_Break' in properties or bidirectional == "B":
|
||||
if 'Line_Break' in properties:
|
||||
flags |= LINEBREAK_MASK
|
||||
linebreaks.append(char)
|
||||
if category == "Zs" or bidirectional in ("WS", "B", "S"):
|
||||
|
|
@ -604,8 +604,7 @@ def makeunicodetype(unicode, trace):
|
|||
|
||||
# Generate code for _PyUnicode_IsLinebreak()
|
||||
fprint("/* Returns 1 for Unicode characters having the line break")
|
||||
fprint(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional")
|
||||
fprint(" * type 'B', 0 otherwise.")
|
||||
fprint(" * property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.")
|
||||
fprint(" */")
|
||||
fprint('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)')
|
||||
fprint('{')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue