Remove from linebreak/_PyUnicode_IsLinebreak characters that are bidirectional B but don't have the line break property

This is exactly three characters: U+001C "FILE SEPARATOR", U+001D "GROUP SEPARATOR" and U+001E "RECORD SEPARATOR", all of which have the Combining Mark line breaking property, meaning that they should *not* be present at a line break
This commit is contained in:
L. Grobol 2025-04-10 15:17:17 +02:00
parent 5fbe23ee4e
commit e535e89b90

View file

@ -437,7 +437,7 @@ def makeunicodetype(unicode, trace):
flags |= ALPHA_MASK
if "Lowercase" in properties:
flags |= LOWER_MASK
if 'Line_Break' in properties or bidirectional == "B":
if 'Line_Break' in properties:
flags |= LINEBREAK_MASK
linebreaks.append(char)
if category == "Zs" or bidirectional in ("WS", "B", "S"):
@ -603,8 +603,7 @@ def makeunicodetype(unicode, trace):
# Generate code for _PyUnicode_IsLinebreak()
fprint("/* Returns 1 for Unicode characters having the line break")
fprint(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional")
fprint(" * type 'B', 0 otherwise.")
fprint(" * property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.")
fprint(" */")
fprint('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)')
fprint('{')