gh-139156: Optimize the UTF-7 encoder (#139253)

Remove base64SetO and base64WhiteSpace parameters.
2025-11-01 14:11:41 +00:00 · 2025-09-24 17:57:29 +02:00 · 2025-09-24 17:57:29 +02:00 · 8d83b7df3f
commit 8d83b7df3f
parent c7b11b7546
3 changed files with 6 additions and 13 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -4670,15 +4670,12 @@ char utf7_category[128] = {

 /* ENCODE_DIRECT: this character should be encoded as itself.  The
 * answer depends on whether we are encoding set O as itself, and also
- * on whether we are encoding whitespace as itself.  RFC2152 makes it
+ * on whether we are encoding whitespace as itself.  RFC 2152 makes it
 * clear that the answers to these questions vary between
 * applications, so this code needs to be flexible.  */

-#define ENCODE_DIRECT(c, directO, directWS)             \
-    ((c) < 128 && (c) > 0 &&                            \
-     ((utf7_category[(c)] == 0) ||                      \
-      (directWS && (utf7_category[(c)] == 2)) ||        \
-      (directO && (utf7_category[(c)] == 1))))
+#define ENCODE_DIRECT(c) \
+    ((c) < 128 && (c) > 0 && ((utf7_category[(c)] != 3)))

 PyObject *
 PyUnicode_DecodeUTF7(const char *s,
@ -4895,8 +4892,6 @@ PyUnicode_DecodeUTF7Stateful(const char *s,

 PyObject *
 _PyUnicode_EncodeUTF7(PyObject *str,
-                      int base64SetO,
-                      int base64WhiteSpace,
                      const char *errors)
 {
    Py_ssize_t len = PyUnicode_GET_LENGTH(str);
@ -4923,7 +4918,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
        Py_UCS4 ch = PyUnicode_READ(kind, data, i);

        if (inShift) {
-            if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
+            if (ENCODE_DIRECT(ch)) {
                /* shifting out */
                if (base64bits) { /* output remaining bits */
                    *out++ = TO_BASE64(base64buffer << (6-base64bits));
@ -4947,7 +4942,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
                *out++ = '+';
                        *out++ = '-';
            }
-            else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
+            else if (ENCODE_DIRECT(ch)) {
                *out++ = (char) ch;
            }
            else {