mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 10:44:55 +00:00 
			
		
		
		
	gh-129349: Accept bytes in bytes.fromhex()/bytearray.fromhex() (#129844)
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
		
							parent
							
								
									405a2d74cb
								
							
						
					
					
						commit
						e0637cebe5
					
				
					 8 changed files with 90 additions and 69 deletions
				
			
		|  | @ -2744,6 +2744,10 @@ data and are closely related to string objects in a variety of other ways. | ||||||
|          :meth:`bytes.fromhex` now skips all ASCII whitespace in the string, |          :meth:`bytes.fromhex` now skips all ASCII whitespace in the string, | ||||||
|          not just spaces. |          not just spaces. | ||||||
| 
 | 
 | ||||||
|  |       .. versionchanged:: next | ||||||
|  |          :meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and | ||||||
|  |          :term:`bytes-like objects <bytes-like object>` as input. | ||||||
|  | 
 | ||||||
|    A reverse conversion function exists to transform a bytes object into its |    A reverse conversion function exists to transform a bytes object into its | ||||||
|    hexadecimal representation. |    hexadecimal representation. | ||||||
| 
 | 
 | ||||||
|  | @ -2829,6 +2833,10 @@ objects. | ||||||
|          :meth:`bytearray.fromhex` now skips all ASCII whitespace in the string, |          :meth:`bytearray.fromhex` now skips all ASCII whitespace in the string, | ||||||
|          not just spaces. |          not just spaces. | ||||||
| 
 | 
 | ||||||
|  |       .. versionchanged:: next | ||||||
|  |          :meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` and | ||||||
|  |          :term:`bytes-like objects <bytes-like object>` as input. | ||||||
|  | 
 | ||||||
|    A reverse conversion function exists to transform a bytearray object into its |    A reverse conversion function exists to transform a bytearray object into its | ||||||
|    hexadecimal representation. |    hexadecimal representation. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -354,6 +354,10 @@ Other language changes | ||||||
|   (with :func:`format` or :ref:`f-strings`). |   (with :func:`format` or :ref:`f-strings`). | ||||||
|   (Contrubuted by Sergey B Kirpichev in :gh:`87790`.) |   (Contrubuted by Sergey B Kirpichev in :gh:`87790`.) | ||||||
| 
 | 
 | ||||||
|  | * The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept | ||||||
|  |   ASCII :class:`bytes` and :term:`bytes-like objects <bytes-like object>`. | ||||||
|  |   (Contributed by Daniel Pope in :gh:`129349`.) | ||||||
|  | 
 | ||||||
| * ``\B`` in :mod:`regular expression <re>` now matches empty input string. | * ``\B`` in :mod:`regular expression <re>` now matches empty input string. | ||||||
|   Now it is always the opposite of ``\b``. |   Now it is always the opposite of ``\b``. | ||||||
|   (Contributed by Serhiy Storchaka in :gh:`124130`.) |   (Contributed by Serhiy Storchaka in :gh:`124130`.) | ||||||
|  |  | ||||||
|  | @ -450,13 +450,34 @@ def test_fromhex(self): | ||||||
| 
 | 
 | ||||||
|         # check that ASCII whitespace is ignored |         # check that ASCII whitespace is ignored | ||||||
|         self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b) |         self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b) | ||||||
|  |         self.assertEqual(self.type2test.fromhex(b' 1A\n2B\t30\v'), b) | ||||||
|         for c in "\x09\x0A\x0B\x0C\x0D\x20": |         for c in "\x09\x0A\x0B\x0C\x0D\x20": | ||||||
|             self.assertEqual(self.type2test.fromhex(c), self.type2test()) |             self.assertEqual(self.type2test.fromhex(c), self.type2test()) | ||||||
|         for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028": |         for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028": | ||||||
|             self.assertRaises(ValueError, self.type2test.fromhex, c) |             self.assertRaises(ValueError, self.type2test.fromhex, c) | ||||||
| 
 | 
 | ||||||
|  |         # Check that we can parse bytes and bytearray | ||||||
|  |         tests = [ | ||||||
|  |             ("bytes", bytes), | ||||||
|  |             ("bytearray", bytearray), | ||||||
|  |             ("memoryview", memoryview), | ||||||
|  |             ("array.array", lambda bs: array.array('B', bs)), | ||||||
|  |         ] | ||||||
|  |         for name, factory in tests: | ||||||
|  |             with self.subTest(name=name): | ||||||
|  |                 self.assertEqual(self.type2test.fromhex(factory(b' 1A 2B 30 ')), b) | ||||||
|  | 
 | ||||||
|  |         # Invalid bytes are rejected | ||||||
|  |         for u8 in b"\0\x1C\x1D\x1E\x1F\x85\xa0": | ||||||
|  |             b = bytes([30, 31, u8]) | ||||||
|  |             self.assertRaises(ValueError, self.type2test.fromhex, b) | ||||||
|  | 
 | ||||||
|         self.assertEqual(self.type2test.fromhex('0000'), b'\0\0') |         self.assertEqual(self.type2test.fromhex('0000'), b'\0\0') | ||||||
|         self.assertRaises(TypeError, self.type2test.fromhex, b'1B') |         with self.assertRaisesRegex( | ||||||
|  |             TypeError, | ||||||
|  |             r'fromhex\(\) argument must be str or bytes-like, not tuple', | ||||||
|  |         ): | ||||||
|  |             self.type2test.fromhex(()) | ||||||
|         self.assertRaises(ValueError, self.type2test.fromhex, 'a') |         self.assertRaises(ValueError, self.type2test.fromhex, 'a') | ||||||
|         self.assertRaises(ValueError, self.type2test.fromhex, 'rt') |         self.assertRaises(ValueError, self.type2test.fromhex, 'rt') | ||||||
|         self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd') |         self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd') | ||||||
|  |  | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | :meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII | ||||||
|  | :class:`bytes` and :term:`bytes-like objects <bytes-like object>`. | ||||||
|  | @ -2533,7 +2533,7 @@ bytearray_splitlines_impl(PyByteArrayObject *self, int keepends) | ||||||
| @classmethod | @classmethod | ||||||
| bytearray.fromhex | bytearray.fromhex | ||||||
| 
 | 
 | ||||||
|     string: unicode |     string: object | ||||||
|     / |     / | ||||||
| 
 | 
 | ||||||
| Create a bytearray object from a string of hexadecimal numbers. | Create a bytearray object from a string of hexadecimal numbers. | ||||||
|  | @ -2543,8 +2543,8 @@ Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef') | ||||||
| [clinic start generated code]*/ | [clinic start generated code]*/ | ||||||
| 
 | 
 | ||||||
| static PyObject * | static PyObject * | ||||||
| bytearray_fromhex_impl(PyTypeObject *type, PyObject *string) | bytearray_fromhex(PyTypeObject *type, PyObject *string) | ||||||
| /*[clinic end generated code: output=8f0f0b6d30fb3ba0 input=f033a16d1fb21f48]*/ | /*[clinic end generated code: output=da84dc708e9c4b36 input=7e314e5b2d7ab484]*/ | ||||||
| { | { | ||||||
|     PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type); |     PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type); | ||||||
|     if (type != &PyByteArray_Type && result != NULL) { |     if (type != &PyByteArray_Type && result != NULL) { | ||||||
|  |  | ||||||
|  | @ -2484,7 +2484,7 @@ bytes_splitlines_impl(PyBytesObject *self, int keepends) | ||||||
| @classmethod | @classmethod | ||||||
| bytes.fromhex | bytes.fromhex | ||||||
| 
 | 
 | ||||||
|     string: unicode |     string: object | ||||||
|     / |     / | ||||||
| 
 | 
 | ||||||
| Create a bytes object from a string of hexadecimal numbers. | Create a bytes object from a string of hexadecimal numbers. | ||||||
|  | @ -2494,8 +2494,8 @@ Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'. | ||||||
| [clinic start generated code]*/ | [clinic start generated code]*/ | ||||||
| 
 | 
 | ||||||
| static PyObject * | static PyObject * | ||||||
| bytes_fromhex_impl(PyTypeObject *type, PyObject *string) | bytes_fromhex(PyTypeObject *type, PyObject *string) | ||||||
| /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/ | /*[clinic end generated code: output=d458ec88195da6b3 input=f37d98ed51088a21]*/ | ||||||
| { | { | ||||||
|     PyObject *result = _PyBytes_FromHex(string, 0); |     PyObject *result = _PyBytes_FromHex(string, 0); | ||||||
|     if (type != &PyBytes_Type && result != NULL) { |     if (type != &PyBytes_Type && result != NULL) { | ||||||
|  | @ -2510,37 +2510,55 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | ||||||
|     char *buf; |     char *buf; | ||||||
|     Py_ssize_t hexlen, invalid_char; |     Py_ssize_t hexlen, invalid_char; | ||||||
|     unsigned int top, bot; |     unsigned int top, bot; | ||||||
|     const Py_UCS1 *str, *end; |     const Py_UCS1 *str, *start, *end; | ||||||
|     _PyBytesWriter writer; |     _PyBytesWriter writer; | ||||||
|  |     Py_buffer view; | ||||||
|  |     view.obj = NULL; | ||||||
| 
 | 
 | ||||||
|     _PyBytesWriter_Init(&writer); |     _PyBytesWriter_Init(&writer); | ||||||
|     writer.use_bytearray = use_bytearray; |     writer.use_bytearray = use_bytearray; | ||||||
| 
 | 
 | ||||||
|     assert(PyUnicode_Check(string)); |     if (PyUnicode_Check(string)) { | ||||||
|     hexlen = PyUnicode_GET_LENGTH(string); |         hexlen = PyUnicode_GET_LENGTH(string); | ||||||
| 
 | 
 | ||||||
|     if (!PyUnicode_IS_ASCII(string)) { |         if (!PyUnicode_IS_ASCII(string)) { | ||||||
|         const void *data = PyUnicode_DATA(string); |             const void *data = PyUnicode_DATA(string); | ||||||
|         int kind = PyUnicode_KIND(string); |             int kind = PyUnicode_KIND(string); | ||||||
|         Py_ssize_t i; |             Py_ssize_t i; | ||||||
| 
 | 
 | ||||||
|         /* search for the first non-ASCII character */ |             /* search for the first non-ASCII character */ | ||||||
|         for (i = 0; i < hexlen; i++) { |             for (i = 0; i < hexlen; i++) { | ||||||
|             if (PyUnicode_READ(kind, data, i) >= 128) |                 if (PyUnicode_READ(kind, data, i) >= 128) | ||||||
|                 break; |                     break; | ||||||
|  |             } | ||||||
|  |             invalid_char = i; | ||||||
|  |             goto error; | ||||||
|         } |         } | ||||||
|         invalid_char = i; |  | ||||||
|         goto error; |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); |         assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); | ||||||
|     str = PyUnicode_1BYTE_DATA(string); |         str = PyUnicode_1BYTE_DATA(string); | ||||||
|  |     } | ||||||
|  |     else if (PyObject_CheckBuffer(string)) { | ||||||
|  |         if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) { | ||||||
|  |             return NULL; | ||||||
|  |         } | ||||||
|  |         hexlen = view.len; | ||||||
|  |         str = view.buf; | ||||||
|  |     } | ||||||
|  |     else { | ||||||
|  |         PyErr_Format(PyExc_TypeError, | ||||||
|  |                      "fromhex() argument must be str or bytes-like, not %T", | ||||||
|  |                      string); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     /* This overestimates if there are spaces */ |     /* This overestimates if there are spaces */ | ||||||
|     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); |     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); | ||||||
|     if (buf == NULL) |     if (buf == NULL) { | ||||||
|         return NULL; |         goto release_buffer; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|  |     start = str; | ||||||
|     end = str + hexlen; |     end = str + hexlen; | ||||||
|     while (str < end) { |     while (str < end) { | ||||||
|         /* skip over spaces in the input */ |         /* skip over spaces in the input */ | ||||||
|  | @ -2554,7 +2572,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | ||||||
| 
 | 
 | ||||||
|         top = _PyLong_DigitValue[*str]; |         top = _PyLong_DigitValue[*str]; | ||||||
|         if (top >= 16) { |         if (top >= 16) { | ||||||
|             invalid_char = str - PyUnicode_1BYTE_DATA(string); |             invalid_char = str - start; | ||||||
|             goto error; |             goto error; | ||||||
|         } |         } | ||||||
|         str++; |         str++; | ||||||
|  | @ -2565,7 +2583,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | ||||||
|             if (str >= end){ |             if (str >= end){ | ||||||
|                 invalid_char = -1; |                 invalid_char = -1; | ||||||
|             } else { |             } else { | ||||||
|                 invalid_char = str - PyUnicode_1BYTE_DATA(string); |                 invalid_char = str - start; | ||||||
|             } |             } | ||||||
|             goto error; |             goto error; | ||||||
|         } |         } | ||||||
|  | @ -2574,6 +2592,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | ||||||
|         *buf++ = (unsigned char)((top << 4) + bot); |         *buf++ = (unsigned char)((top << 4) + bot); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     if (view.obj != NULL) { | ||||||
|  |        PyBuffer_Release(&view); | ||||||
|  |     } | ||||||
|     return _PyBytesWriter_Finish(&writer, buf); |     return _PyBytesWriter_Finish(&writer, buf); | ||||||
| 
 | 
 | ||||||
|   error: |   error: | ||||||
|  | @ -2586,6 +2607,11 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | ||||||
|                      "fromhex() arg at position %zd", invalid_char); |                      "fromhex() arg at position %zd", invalid_char); | ||||||
|     } |     } | ||||||
|     _PyBytesWriter_Dealloc(&writer); |     _PyBytesWriter_Dealloc(&writer); | ||||||
|  | 
 | ||||||
|  |   release_buffer: | ||||||
|  |     if (view.obj != NULL) { | ||||||
|  |         PyBuffer_Release(&view); | ||||||
|  |     } | ||||||
|     return NULL; |     return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										22
									
								
								Objects/clinic/bytearrayobject.c.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										22
									
								
								Objects/clinic/bytearrayobject.c.h
									
										
									
										generated
									
									
									
								
							|  | @ -1601,26 +1601,6 @@ PyDoc_STRVAR(bytearray_fromhex__doc__, | ||||||
| #define BYTEARRAY_FROMHEX_METHODDEF    \ | #define BYTEARRAY_FROMHEX_METHODDEF    \ | ||||||
|     {"fromhex", (PyCFunction)bytearray_fromhex, METH_O|METH_CLASS, bytearray_fromhex__doc__}, |     {"fromhex", (PyCFunction)bytearray_fromhex, METH_O|METH_CLASS, bytearray_fromhex__doc__}, | ||||||
| 
 | 
 | ||||||
| static PyObject * |  | ||||||
| bytearray_fromhex_impl(PyTypeObject *type, PyObject *string); |  | ||||||
| 
 |  | ||||||
| static PyObject * |  | ||||||
| bytearray_fromhex(PyTypeObject *type, PyObject *arg) |  | ||||||
| { |  | ||||||
|     PyObject *return_value = NULL; |  | ||||||
|     PyObject *string; |  | ||||||
| 
 |  | ||||||
|     if (!PyUnicode_Check(arg)) { |  | ||||||
|         _PyArg_BadArgument("fromhex", "argument", "str", arg); |  | ||||||
|         goto exit; |  | ||||||
|     } |  | ||||||
|     string = arg; |  | ||||||
|     return_value = bytearray_fromhex_impl(type, string); |  | ||||||
| 
 |  | ||||||
| exit: |  | ||||||
|     return return_value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| PyDoc_STRVAR(bytearray_hex__doc__, | PyDoc_STRVAR(bytearray_hex__doc__, | ||||||
| "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" | "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" | ||||||
| "--\n" | "--\n" | ||||||
|  | @ -1789,4 +1769,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) | ||||||
| { | { | ||||||
|     return bytearray_sizeof_impl((PyByteArrayObject *)self); |     return bytearray_sizeof_impl((PyByteArrayObject *)self); | ||||||
| } | } | ||||||
| /*[clinic end generated code: output=7c924a56e0a8bfe6 input=a9049054013a1b77]*/ | /*[clinic end generated code: output=13a4231325b7d3c1 input=a9049054013a1b77]*/ | ||||||
|  |  | ||||||
							
								
								
									
										22
									
								
								Objects/clinic/bytesobject.c.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										22
									
								
								Objects/clinic/bytesobject.c.h
									
										
									
										generated
									
									
									
								
							|  | @ -1204,26 +1204,6 @@ PyDoc_STRVAR(bytes_fromhex__doc__, | ||||||
| #define BYTES_FROMHEX_METHODDEF    \ | #define BYTES_FROMHEX_METHODDEF    \ | ||||||
|     {"fromhex", (PyCFunction)bytes_fromhex, METH_O|METH_CLASS, bytes_fromhex__doc__}, |     {"fromhex", (PyCFunction)bytes_fromhex, METH_O|METH_CLASS, bytes_fromhex__doc__}, | ||||||
| 
 | 
 | ||||||
| static PyObject * |  | ||||||
| bytes_fromhex_impl(PyTypeObject *type, PyObject *string); |  | ||||||
| 
 |  | ||||||
| static PyObject * |  | ||||||
| bytes_fromhex(PyTypeObject *type, PyObject *arg) |  | ||||||
| { |  | ||||||
|     PyObject *return_value = NULL; |  | ||||||
|     PyObject *string; |  | ||||||
| 
 |  | ||||||
|     if (!PyUnicode_Check(arg)) { |  | ||||||
|         _PyArg_BadArgument("fromhex", "argument", "str", arg); |  | ||||||
|         goto exit; |  | ||||||
|     } |  | ||||||
|     string = arg; |  | ||||||
|     return_value = bytes_fromhex_impl(type, string); |  | ||||||
| 
 |  | ||||||
| exit: |  | ||||||
|     return return_value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| PyDoc_STRVAR(bytes_hex__doc__, | PyDoc_STRVAR(bytes_hex__doc__, | ||||||
| "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" | "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" | ||||||
| "--\n" | "--\n" | ||||||
|  | @ -1404,4 +1384,4 @@ skip_optional_pos: | ||||||
| exit: | exit: | ||||||
|     return return_value; |     return return_value; | ||||||
| } | } | ||||||
| /*[clinic end generated code: output=61cb2cf6506df4c6 input=a9049054013a1b77]*/ | /*[clinic end generated code: output=967aae4b46423586 input=a9049054013a1b77]*/ | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Daniel Pope
						Daniel Pope