Remove from linebreak/_PyUnicode_IsLinebreak characters that are bidirectional B but don't have the line break property

This is exactly three characters: U+001C "FILE SEPARATOR", U+001D "GROUP SEPARATOR" and U+001E "RECORD SEPARATOR", all of which have the Combining Mark line breaking property, meaning that they should *not* be present at a line break
2025-12-08 06:10:17 +00:00 · 2025-04-10 15:17:17 +02:00 · 2025-04-10 15:17:17 +02:00 · e535e89b90
commit e535e89b90
parent 5fbe23ee4e
1 changed files with 2 additions and 3 deletions
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@ -437,7 +437,7 @@ def makeunicodetype(unicode, trace):
                flags |= ALPHA_MASK
            if "Lowercase" in properties:
                flags |= LOWER_MASK
-            if 'Line_Break' in properties or bidirectional == "B":
+            if 'Line_Break' in properties:
                flags |= LINEBREAK_MASK
                linebreaks.append(char)
            if category == "Zs" or bidirectional in ("WS", "B", "S"):
@ -603,8 +603,7 @@ def makeunicodetype(unicode, trace):

        # Generate code for _PyUnicode_IsLinebreak()
        fprint("/* Returns 1 for Unicode characters having the line break")
-        fprint(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional")
-        fprint(" * type 'B', 0 otherwise.")
+        fprint(" * property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.")
        fprint(" */")
        fprint('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)')
        fprint('{')