mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	gh-129349: Accept bytes in bytes.fromhex()/bytearray.fromhex() (#129844)
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
		
							parent
							
								
									405a2d74cb
								
							
						
					
					
						commit
						e0637cebe5
					
				
					 8 changed files with 90 additions and 69 deletions
				
			
		|  | @ -2744,6 +2744,10 @@ data and are closely related to string objects in a variety of other ways. | |||
|          :meth:`bytes.fromhex` now skips all ASCII whitespace in the string, | ||||
|          not just spaces. | ||||
| 
 | ||||
|       .. versionchanged:: next | ||||
|          :meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and | ||||
|          :term:`bytes-like objects <bytes-like object>` as input. | ||||
| 
 | ||||
|    A reverse conversion function exists to transform a bytes object into its | ||||
|    hexadecimal representation. | ||||
| 
 | ||||
|  | @ -2829,6 +2833,10 @@ objects. | |||
|          :meth:`bytearray.fromhex` now skips all ASCII whitespace in the string, | ||||
|          not just spaces. | ||||
| 
 | ||||
|       .. versionchanged:: next | ||||
|          :meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` and | ||||
|          :term:`bytes-like objects <bytes-like object>` as input. | ||||
| 
 | ||||
|    A reverse conversion function exists to transform a bytearray object into its | ||||
|    hexadecimal representation. | ||||
| 
 | ||||
|  |  | |||
|  | @ -354,6 +354,10 @@ Other language changes | |||
|   (with :func:`format` or :ref:`f-strings`). | ||||
|   (Contrubuted by Sergey B Kirpichev in :gh:`87790`.) | ||||
| 
 | ||||
| * The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept | ||||
|   ASCII :class:`bytes` and :term:`bytes-like objects <bytes-like object>`. | ||||
|   (Contributed by Daniel Pope in :gh:`129349`.) | ||||
| 
 | ||||
| * ``\B`` in :mod:`regular expression <re>` now matches empty input string. | ||||
|   Now it is always the opposite of ``\b``. | ||||
|   (Contributed by Serhiy Storchaka in :gh:`124130`.) | ||||
|  |  | |||
|  | @ -450,13 +450,34 @@ def test_fromhex(self): | |||
| 
 | ||||
|         # check that ASCII whitespace is ignored | ||||
|         self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b) | ||||
|         self.assertEqual(self.type2test.fromhex(b' 1A\n2B\t30\v'), b) | ||||
|         for c in "\x09\x0A\x0B\x0C\x0D\x20": | ||||
|             self.assertEqual(self.type2test.fromhex(c), self.type2test()) | ||||
|         for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028": | ||||
|             self.assertRaises(ValueError, self.type2test.fromhex, c) | ||||
| 
 | ||||
|         # Check that we can parse bytes and bytearray | ||||
|         tests = [ | ||||
|             ("bytes", bytes), | ||||
|             ("bytearray", bytearray), | ||||
|             ("memoryview", memoryview), | ||||
|             ("array.array", lambda bs: array.array('B', bs)), | ||||
|         ] | ||||
|         for name, factory in tests: | ||||
|             with self.subTest(name=name): | ||||
|                 self.assertEqual(self.type2test.fromhex(factory(b' 1A 2B 30 ')), b) | ||||
| 
 | ||||
|         # Invalid bytes are rejected | ||||
|         for u8 in b"\0\x1C\x1D\x1E\x1F\x85\xa0": | ||||
|             b = bytes([30, 31, u8]) | ||||
|             self.assertRaises(ValueError, self.type2test.fromhex, b) | ||||
| 
 | ||||
|         self.assertEqual(self.type2test.fromhex('0000'), b'\0\0') | ||||
|         self.assertRaises(TypeError, self.type2test.fromhex, b'1B') | ||||
|         with self.assertRaisesRegex( | ||||
|             TypeError, | ||||
|             r'fromhex\(\) argument must be str or bytes-like, not tuple', | ||||
|         ): | ||||
|             self.type2test.fromhex(()) | ||||
|         self.assertRaises(ValueError, self.type2test.fromhex, 'a') | ||||
|         self.assertRaises(ValueError, self.type2test.fromhex, 'rt') | ||||
|         self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd') | ||||
|  |  | |||
|  | @ -0,0 +1,2 @@ | |||
| :meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII | ||||
| :class:`bytes` and :term:`bytes-like objects <bytes-like object>`. | ||||
|  | @ -2533,7 +2533,7 @@ bytearray_splitlines_impl(PyByteArrayObject *self, int keepends) | |||
| @classmethod | ||||
| bytearray.fromhex | ||||
| 
 | ||||
|     string: unicode | ||||
|     string: object | ||||
|     / | ||||
| 
 | ||||
| Create a bytearray object from a string of hexadecimal numbers. | ||||
|  | @ -2543,8 +2543,8 @@ Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef') | |||
| [clinic start generated code]*/ | ||||
| 
 | ||||
| static PyObject * | ||||
| bytearray_fromhex_impl(PyTypeObject *type, PyObject *string) | ||||
| /*[clinic end generated code: output=8f0f0b6d30fb3ba0 input=f033a16d1fb21f48]*/ | ||||
| bytearray_fromhex(PyTypeObject *type, PyObject *string) | ||||
| /*[clinic end generated code: output=da84dc708e9c4b36 input=7e314e5b2d7ab484]*/ | ||||
| { | ||||
|     PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type); | ||||
|     if (type != &PyByteArray_Type && result != NULL) { | ||||
|  |  | |||
|  | @ -2484,7 +2484,7 @@ bytes_splitlines_impl(PyBytesObject *self, int keepends) | |||
| @classmethod | ||||
| bytes.fromhex | ||||
| 
 | ||||
|     string: unicode | ||||
|     string: object | ||||
|     / | ||||
| 
 | ||||
| Create a bytes object from a string of hexadecimal numbers. | ||||
|  | @ -2494,8 +2494,8 @@ Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'. | |||
| [clinic start generated code]*/ | ||||
| 
 | ||||
| static PyObject * | ||||
| bytes_fromhex_impl(PyTypeObject *type, PyObject *string) | ||||
| /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/ | ||||
| bytes_fromhex(PyTypeObject *type, PyObject *string) | ||||
| /*[clinic end generated code: output=d458ec88195da6b3 input=f37d98ed51088a21]*/ | ||||
| { | ||||
|     PyObject *result = _PyBytes_FromHex(string, 0); | ||||
|     if (type != &PyBytes_Type && result != NULL) { | ||||
|  | @ -2510,37 +2510,55 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | |||
|     char *buf; | ||||
|     Py_ssize_t hexlen, invalid_char; | ||||
|     unsigned int top, bot; | ||||
|     const Py_UCS1 *str, *end; | ||||
|     const Py_UCS1 *str, *start, *end; | ||||
|     _PyBytesWriter writer; | ||||
|     Py_buffer view; | ||||
|     view.obj = NULL; | ||||
| 
 | ||||
|     _PyBytesWriter_Init(&writer); | ||||
|     writer.use_bytearray = use_bytearray; | ||||
| 
 | ||||
|     assert(PyUnicode_Check(string)); | ||||
|     hexlen = PyUnicode_GET_LENGTH(string); | ||||
|     if (PyUnicode_Check(string)) { | ||||
|         hexlen = PyUnicode_GET_LENGTH(string); | ||||
| 
 | ||||
|     if (!PyUnicode_IS_ASCII(string)) { | ||||
|         const void *data = PyUnicode_DATA(string); | ||||
|         int kind = PyUnicode_KIND(string); | ||||
|         Py_ssize_t i; | ||||
|         if (!PyUnicode_IS_ASCII(string)) { | ||||
|             const void *data = PyUnicode_DATA(string); | ||||
|             int kind = PyUnicode_KIND(string); | ||||
|             Py_ssize_t i; | ||||
| 
 | ||||
|         /* search for the first non-ASCII character */ | ||||
|         for (i = 0; i < hexlen; i++) { | ||||
|             if (PyUnicode_READ(kind, data, i) >= 128) | ||||
|                 break; | ||||
|             /* search for the first non-ASCII character */ | ||||
|             for (i = 0; i < hexlen; i++) { | ||||
|                 if (PyUnicode_READ(kind, data, i) >= 128) | ||||
|                     break; | ||||
|             } | ||||
|             invalid_char = i; | ||||
|             goto error; | ||||
|         } | ||||
|         invalid_char = i; | ||||
|         goto error; | ||||
|     } | ||||
| 
 | ||||
|     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); | ||||
|     str = PyUnicode_1BYTE_DATA(string); | ||||
|         assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); | ||||
|         str = PyUnicode_1BYTE_DATA(string); | ||||
|     } | ||||
|     else if (PyObject_CheckBuffer(string)) { | ||||
|         if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) { | ||||
|             return NULL; | ||||
|         } | ||||
|         hexlen = view.len; | ||||
|         str = view.buf; | ||||
|     } | ||||
|     else { | ||||
|         PyErr_Format(PyExc_TypeError, | ||||
|                      "fromhex() argument must be str or bytes-like, not %T", | ||||
|                      string); | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     /* This overestimates if there are spaces */ | ||||
|     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); | ||||
|     if (buf == NULL) | ||||
|         return NULL; | ||||
|     if (buf == NULL) { | ||||
|         goto release_buffer; | ||||
|     } | ||||
| 
 | ||||
|     start = str; | ||||
|     end = str + hexlen; | ||||
|     while (str < end) { | ||||
|         /* skip over spaces in the input */ | ||||
|  | @ -2554,7 +2572,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | |||
| 
 | ||||
|         top = _PyLong_DigitValue[*str]; | ||||
|         if (top >= 16) { | ||||
|             invalid_char = str - PyUnicode_1BYTE_DATA(string); | ||||
|             invalid_char = str - start; | ||||
|             goto error; | ||||
|         } | ||||
|         str++; | ||||
|  | @ -2565,7 +2583,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | |||
|             if (str >= end){ | ||||
|                 invalid_char = -1; | ||||
|             } else { | ||||
|                 invalid_char = str - PyUnicode_1BYTE_DATA(string); | ||||
|                 invalid_char = str - start; | ||||
|             } | ||||
|             goto error; | ||||
|         } | ||||
|  | @ -2574,6 +2592,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | |||
|         *buf++ = (unsigned char)((top << 4) + bot); | ||||
|     } | ||||
| 
 | ||||
|     if (view.obj != NULL) { | ||||
|        PyBuffer_Release(&view); | ||||
|     } | ||||
|     return _PyBytesWriter_Finish(&writer, buf); | ||||
| 
 | ||||
|   error: | ||||
|  | @ -2586,6 +2607,11 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) | |||
|                      "fromhex() arg at position %zd", invalid_char); | ||||
|     } | ||||
|     _PyBytesWriter_Dealloc(&writer); | ||||
| 
 | ||||
|   release_buffer: | ||||
|     if (view.obj != NULL) { | ||||
|         PyBuffer_Release(&view); | ||||
|     } | ||||
|     return NULL; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										22
									
								
								Objects/clinic/bytearrayobject.c.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										22
									
								
								Objects/clinic/bytearrayobject.c.h
									
										
									
										generated
									
									
									
								
							|  | @ -1601,26 +1601,6 @@ PyDoc_STRVAR(bytearray_fromhex__doc__, | |||
| #define BYTEARRAY_FROMHEX_METHODDEF    \ | ||||
|     {"fromhex", (PyCFunction)bytearray_fromhex, METH_O|METH_CLASS, bytearray_fromhex__doc__}, | ||||
| 
 | ||||
| static PyObject * | ||||
| bytearray_fromhex_impl(PyTypeObject *type, PyObject *string); | ||||
| 
 | ||||
| static PyObject * | ||||
| bytearray_fromhex(PyTypeObject *type, PyObject *arg) | ||||
| { | ||||
|     PyObject *return_value = NULL; | ||||
|     PyObject *string; | ||||
| 
 | ||||
|     if (!PyUnicode_Check(arg)) { | ||||
|         _PyArg_BadArgument("fromhex", "argument", "str", arg); | ||||
|         goto exit; | ||||
|     } | ||||
|     string = arg; | ||||
|     return_value = bytearray_fromhex_impl(type, string); | ||||
| 
 | ||||
| exit: | ||||
|     return return_value; | ||||
| } | ||||
| 
 | ||||
| PyDoc_STRVAR(bytearray_hex__doc__, | ||||
| "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" | ||||
| "--\n" | ||||
|  | @ -1789,4 +1769,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) | |||
| { | ||||
|     return bytearray_sizeof_impl((PyByteArrayObject *)self); | ||||
| } | ||||
| /*[clinic end generated code: output=7c924a56e0a8bfe6 input=a9049054013a1b77]*/ | ||||
| /*[clinic end generated code: output=13a4231325b7d3c1 input=a9049054013a1b77]*/ | ||||
|  |  | |||
							
								
								
									
										22
									
								
								Objects/clinic/bytesobject.c.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										22
									
								
								Objects/clinic/bytesobject.c.h
									
										
									
										generated
									
									
									
								
							|  | @ -1204,26 +1204,6 @@ PyDoc_STRVAR(bytes_fromhex__doc__, | |||
| #define BYTES_FROMHEX_METHODDEF    \ | ||||
|     {"fromhex", (PyCFunction)bytes_fromhex, METH_O|METH_CLASS, bytes_fromhex__doc__}, | ||||
| 
 | ||||
| static PyObject * | ||||
| bytes_fromhex_impl(PyTypeObject *type, PyObject *string); | ||||
| 
 | ||||
| static PyObject * | ||||
| bytes_fromhex(PyTypeObject *type, PyObject *arg) | ||||
| { | ||||
|     PyObject *return_value = NULL; | ||||
|     PyObject *string; | ||||
| 
 | ||||
|     if (!PyUnicode_Check(arg)) { | ||||
|         _PyArg_BadArgument("fromhex", "argument", "str", arg); | ||||
|         goto exit; | ||||
|     } | ||||
|     string = arg; | ||||
|     return_value = bytes_fromhex_impl(type, string); | ||||
| 
 | ||||
| exit: | ||||
|     return return_value; | ||||
| } | ||||
| 
 | ||||
| PyDoc_STRVAR(bytes_hex__doc__, | ||||
| "hex($self, /, sep=<unrepresentable>, bytes_per_sep=1)\n" | ||||
| "--\n" | ||||
|  | @ -1404,4 +1384,4 @@ skip_optional_pos: | |||
| exit: | ||||
|     return return_value; | ||||
| } | ||||
| /*[clinic end generated code: output=61cb2cf6506df4c6 input=a9049054013a1b77]*/ | ||||
| /*[clinic end generated code: output=967aae4b46423586 input=a9049054013a1b77]*/ | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Daniel Pope
						Daniel Pope