From c5ec267311f9312c75ebef330dd210cc64ec0c24 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Date: Tue, 14 Oct 2025 15:48:29 +0100
Subject: [PATCH] [3.13] gh-101828: Fix `jisx0213` codecs removing null
 characters (gh-139340) (gh-140112)

* [3.13] gh-101828: Fix `jisx0213` codecs removing null characters (gh-139340)
(cherry picked from commit 87eadce3e0309d80a95e85d70a00028b5dca9907)

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>

* Accidentally removed line
---
 Lib/test/multibytecodec_support.py              | 17 +++++++++++++++++
 ...25-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst |  3 +++
 Modules/cjkcodecs/_codecs_iso2022.c             | 11 +++++++----
 Modules/cjkcodecs/_codecs_jp.c                  |  9 +++++++--
 4 files changed, 34 insertions(+), 6 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst

diff --git a/Lib/test/multibytecodec_support.py b/Lib/test/multibytecodec_support.py
index dbf0cc428e3..6b4c57d0b4b 100644
--- a/Lib/test/multibytecodec_support.py
+++ b/Lib/test/multibytecodec_support.py
@@ -282,6 +282,23 @@ def test_incrementalencoder_del_segfault(self):
         with self.assertRaises(AttributeError):
             del e.errors
 
+    def test_null_terminator(self):
+        # see gh-101828
+        text = "フルーツ"
+        try:
+            text.encode(self.encoding)
+        except UnicodeEncodeError:
+            text = "Python is cool"
+        encode_w_null = (text + "\0").encode(self.encoding)
+        encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
+        self.assertTrue(encode_w_null.endswith(b'\x00'))
+        self.assertEqual(encode_w_null, encode_plus_null)
+
+        encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
+        encode_plus_null_2 = encode_plus_null + encode_plus_null
+        self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
+        self.assertEqual(encode_w_null_2, encode_plus_null_2)
+
 
 class TestBase_Mapping(unittest.TestCase):
     pass_enctest = []
diff --git a/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
new file mode 100644
index 00000000000..1d100180c07
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
@@ -0,0 +1,3 @@
+Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
+``'euc_jis_2004'`` codecs truncating null chars
+as they were treated as part of multi-character sequences.
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index e8835ad0909..bdbaca2c421 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
         return coded;
 
     case 2: /* second character of unicode pair */
-        coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
-                                jisx0213_pair_encmap, JISX0213_ENCPAIRS);
-        if (coded != DBCINV)
-            return coded;
+        if (data[1] != 0) { /* Don't consume null char as part of pair */
+            coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
+                                    jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+            if (coded != DBCINV) {
+                return coded;
+            }
+        }
         /* fall through */
 
     case -1: /* flush unterminated */
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index f7127487aa5..cd77888d551 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -192,8 +192,11 @@ ENCODER(euc_jis_2004)
                                 JISX0213_ENCPAIRS);
                             if (code == DBCINV)
                                 return 1;
-                        } else
+                        }
+                        else if (c2 != 0) {
+                            /* Don't consume null char as part of pair */
                             insize = 2;
+                        }
                     }
                 }
             }
@@ -611,8 +614,10 @@ ENCODER(shift_jis_2004)
                             if (code == DBCINV)
                                 return 1;
                             }
-                            else
+                            else if (ch2 != 0) {
+                                /* Don't consume null char as part of pair */
                                 insize = 2;
+                            }
                         }
                     }
                 }