From e535e89b901d71cb7ae591776507e9b35cb71f84 Mon Sep 17 00:00:00 2001
From: "L. Grobol" <lgrobol@tuta.com>
Date: Thu, 10 Apr 2025 15:17:17 +0200
Subject: [PATCH 1/4] Remove from linebreak/_PyUnicode_IsLinebreak characters
 that are bidirectional B but don't have the line break property

This is exactly three characters: U+001C "FILE SEPARATOR", U+001D "GROUP SEPARATOR" and U+001E "RECORD SEPARATOR", all of which have the Combining Mark line breaking property, meaning that they should *not* be present at a line break
---
 Tools/unicode/makeunicodedata.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 889ae8fc869..ba6c494c375 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -437,7 +437,7 @@ def makeunicodetype(unicode, trace):
                 flags |= ALPHA_MASK
             if "Lowercase" in properties:
                 flags |= LOWER_MASK
-            if 'Line_Break' in properties or bidirectional == "B":
+            if 'Line_Break' in properties:
                 flags |= LINEBREAK_MASK
                 linebreaks.append(char)
             if category == "Zs" or bidirectional in ("WS", "B", "S"):
@@ -603,8 +603,7 @@ def makeunicodetype(unicode, trace):
 
         # Generate code for _PyUnicode_IsLinebreak()
         fprint("/* Returns 1 for Unicode characters having the line break")
-        fprint(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional")
-        fprint(" * type 'B', 0 otherwise.")
+        fprint(" * property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.")
         fprint(" */")
         fprint('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)')
         fprint('{')

From 22633de8b339d6672da8081ddbbf360d76ed72e0 Mon Sep 17 00:00:00 2001
From: "L. Grobol" <lgrobol@tuta.com>
Date: Thu, 10 Apr 2025 15:23:21 +0200
Subject: [PATCH 2/4] remove the offending characters from the splitlines table

---
 Doc/library/stdtypes.rst | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index 48d179c2703..ee07debe1e0 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -2295,12 +2295,6 @@ expression support in the :mod:`re` module).
    +-----------------------+-----------------------------+
    | ``\f`` or ``\x0c``    | Form Feed                   |
    +-----------------------+-----------------------------+
-   | ``\x1c``              | File Separator              |
-   +-----------------------+-----------------------------+
-   | ``\x1d``              | Group Separator             |
-   +-----------------------+-----------------------------+
-   | ``\x1e``              | Record Separator            |
-   +-----------------------+-----------------------------+
    | ``\x85``              | Next Line (C1 Control Code) |
    +-----------------------+-----------------------------+
    | ``\u2028``            | Line Separator              |

From dfe0b2e5319459a3c6f5291f740495c35c12b84d Mon Sep 17 00:00:00 2001
From: "L. Grobol" <lgrobol@tuta.com>
Date: Thu, 10 Apr 2025 15:47:56 +0200
Subject: [PATCH 3/4] update unicodetype_db.h

---
 Objects/unicodetype_db.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h
index 5be810dd674..93de16bc42b 100644
--- a/Objects/unicodetype_db.h
+++ b/Objects/unicodetype_db.h
@@ -2971,7 +2971,7 @@ static const unsigned short index1[] = {
 
 static const unsigned short index2[] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 2, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4,
+    0, 0, 0, 0, 1, 1, 1, 1, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 4,
     6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 5, 4, 4, 4, 4, 4, 4, 16, 16, 16, 16,
     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
     16, 16, 16, 16, 4, 4, 4, 5, 17, 5, 18, 18, 18, 18, 18, 18, 18, 18, 18,
@@ -6711,8 +6711,7 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
 }
 
 /* Returns 1 for Unicode characters having the line break
- * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
- * type 'B', 0 otherwise.
+ * property 'BK', 'CR', 'LF' or 'NL', 0 otherwise.
  */
 int _PyUnicode_IsLinebreak(const Py_UCS4 ch)
 {
@@ -6721,9 +6720,6 @@ int _PyUnicode_IsLinebreak(const Py_UCS4 ch)
     case 0x000B:
     case 0x000C:
     case 0x000D:
-    case 0x001C:
-    case 0x001D:
-    case 0x001E:
     case 0x0085:
     case 0x2028:
     case 0x2029:

From eadb0e21039dd0316188264649187f4ea9a2f751 Mon Sep 17 00:00:00 2001
From: "L. Grobol" <lgrobol@tuta.com>
Date: Thu, 10 Apr 2025 18:27:53 +0200
Subject: [PATCH 4/4] add tentative news blurb

---
 .../2025-04-10-18-26-49.gh-issue-66428.sJ9yJn.rst             | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-10-18-26-49.gh-issue-66428.sJ9yJn.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-10-18-26-49.gh-issue-66428.sJ9yJn.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-10-18-26-49.gh-issue-66428.sJ9yJn.rst
new file mode 100644
index 00000000000..96a504e504c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-10-18-26-49.gh-issue-66428.sJ9yJn.rst
@@ -0,0 +1,4 @@
+Remove Unicode characters that have the bidirectional B property but are not
+mandatory line breakers (U+001C, U+001D and U+001E) from the list of
+line-breaking characters. ``str.splitlines()`` will not break on these
+characters any more.