mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	Fixed issue #1564: The set implementation should special-case PyUnicode instead of PyString
I moved the unicode_eq to stringlib/eq.h to keep the function static and possible inline for setobject.c and dictobject.h. I also removed the unused _PyString_Eq method. If it's required in the future it can be added to eq.h
This commit is contained in:
		
							parent
							
								
									110194048e
								
							
						
					
					
						commit
						0ded5b54bb
					
				
					 7 changed files with 85 additions and 45 deletions
				
			
		|  | @ -58,7 +58,6 @@ PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int); | ||||||
| PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); | PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); | ||||||
| PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *); | PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *); | ||||||
| PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t); | PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t); | ||||||
| PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*); |  | ||||||
| PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *); | PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *); | ||||||
| PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int, | PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int, | ||||||
| 						  int, char**, int*); | 						  int, char**, int*); | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ | ||||||
| import os | import os | ||||||
| from random import randrange, shuffle | from random import randrange, shuffle | ||||||
| import sys | import sys | ||||||
|  | import warnings | ||||||
| 
 | 
 | ||||||
| class PassThru(Exception): | class PassThru(Exception): | ||||||
|     pass |     pass | ||||||
|  | @ -817,6 +818,44 @@ def setUp(self): | ||||||
|         self.length = 3 |         self.length = 3 | ||||||
|         self.repr   = None |         self.repr   = None | ||||||
| 
 | 
 | ||||||
|  | #------------------------------------------------------------------------------ | ||||||
|  | 
 | ||||||
|  | class TestBasicOpsString(TestBasicOps): | ||||||
|  |     def setUp(self): | ||||||
|  |         self.case   = "string set" | ||||||
|  |         self.values = ["a", "b", "c"] | ||||||
|  |         self.set    = set(self.values) | ||||||
|  |         self.dup    = set(self.values) | ||||||
|  |         self.length = 3 | ||||||
|  |         self.repr   = "{'a', 'c', 'b'}" | ||||||
|  | 
 | ||||||
|  | #------------------------------------------------------------------------------ | ||||||
|  | 
 | ||||||
|  | class TestBasicOpsBytes(TestBasicOps): | ||||||
|  |     def setUp(self): | ||||||
|  |         self.case   = "string set" | ||||||
|  |         self.values = [b"a", b"b", b"c"] | ||||||
|  |         self.set    = set(self.values) | ||||||
|  |         self.dup    = set(self.values) | ||||||
|  |         self.length = 3 | ||||||
|  |         self.repr   = "{b'a', b'c', b'b'}" | ||||||
|  | 
 | ||||||
|  | #------------------------------------------------------------------------------ | ||||||
|  | 
 | ||||||
|  | class TestBasicOpsMixedStringBytes(TestBasicOps): | ||||||
|  |     def setUp(self): | ||||||
|  |         self.warning_filters = warnings.filters[:] | ||||||
|  |         warnings.simplefilter('ignore', BytesWarning) | ||||||
|  |         self.case   = "string and bytes set" | ||||||
|  |         self.values = ["a", "b", b"a", b"b"] | ||||||
|  |         self.set    = set(self.values) | ||||||
|  |         self.dup    = set(self.values) | ||||||
|  |         self.length = 4 | ||||||
|  |         self.repr   = "{'a', b'a', 'b', b'b'}" | ||||||
|  | 
 | ||||||
|  |     def tearDown(self): | ||||||
|  |         warnings.filters = self.warning_filters | ||||||
|  | 
 | ||||||
| #============================================================================== | #============================================================================== | ||||||
| 
 | 
 | ||||||
| def baditer(): | def baditer(): | ||||||
|  | @ -1581,6 +1620,9 @@ def test_main(verbose=None): | ||||||
|         TestBasicOpsSingleton, |         TestBasicOpsSingleton, | ||||||
|         TestBasicOpsTuple, |         TestBasicOpsTuple, | ||||||
|         TestBasicOpsTriple, |         TestBasicOpsTriple, | ||||||
|  |         TestBasicOpsString, | ||||||
|  |         TestBasicOpsBytes, | ||||||
|  |         TestBasicOpsMixedStringBytes, | ||||||
|         TestBinaryOps, |         TestBinaryOps, | ||||||
|         TestUpdateOps, |         TestUpdateOps, | ||||||
|         TestMutate, |         TestMutate, | ||||||
|  |  | ||||||
|  | @ -14,6 +14,9 @@ Core and Builtins | ||||||
| 
 | 
 | ||||||
| - Issue #1573: Improper use of the keyword-only syntax makes the parser crash | - Issue #1573: Improper use of the keyword-only syntax makes the parser crash | ||||||
| 
 | 
 | ||||||
|  | - Issue #1564: The set implementation should special-case PyUnicode instead | ||||||
|  |   of PyString | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| Extension Modules | Extension Modules | ||||||
| ----------------- | ----------------- | ||||||
|  |  | ||||||
|  | @ -8,6 +8,7 @@ | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| #include "Python.h" | #include "Python.h" | ||||||
|  | #include "stringlib/eq.h" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /* Set a key error with the specified argument, wrapping it in a
 | /* Set a key error with the specified argument, wrapping it in a
 | ||||||
|  | @ -327,25 +328,6 @@ lookdict(PyDictObject *mp, PyObject *key, register long hash) | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Return 1 if two unicode objects are equal, 0 if not. */ |  | ||||||
| static int |  | ||||||
| unicode_eq(PyObject *aa, PyObject *bb) |  | ||||||
| { |  | ||||||
| 	PyUnicodeObject *a = (PyUnicodeObject *)aa; |  | ||||||
| 	PyUnicodeObject *b = (PyUnicodeObject *)bb; |  | ||||||
| 
 |  | ||||||
| 	if (a->length != b->length) |  | ||||||
| 		return 0; |  | ||||||
| 	if (a->length == 0) |  | ||||||
| 		return 1; |  | ||||||
| 	if (a->str[0] != b->str[0]) |  | ||||||
| 		return 0; |  | ||||||
| 	if (a->length == 1) |  | ||||||
| 		return 1; |  | ||||||
| 	return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Hacked up version of lookdict which can assume keys are always |  * Hacked up version of lookdict which can assume keys are always | ||||||
|  * unicodes; this assumption allows testing for errors during |  * unicodes; this assumption allows testing for errors during | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "Python.h" | #include "Python.h" | ||||||
| #include "structmember.h" | #include "structmember.h" | ||||||
|  | #include "stringlib/eq.h" | ||||||
| 
 | 
 | ||||||
| /* Set a key error with the specified argument, wrapping it in a
 | /* Set a key error with the specified argument, wrapping it in a
 | ||||||
|  * tuple automatically so that tuple keys are not unpacked as the |  * tuple automatically so that tuple keys are not unpacked as the | ||||||
|  | @ -55,6 +56,7 @@ _PySet_Dummy(void) | ||||||
| static PySetObject *free_sets[MAXFREESETS]; | static PySetObject *free_sets[MAXFREESETS]; | ||||||
| static int num_free_sets = 0; | static int num_free_sets = 0; | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
| The basic lookup function used by all operations. | The basic lookup function used by all operations. | ||||||
| This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. | This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. | ||||||
|  | @ -144,12 +146,12 @@ set_lookkey(PySetObject *so, PyObject *key, register long hash) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Hacked up version of set_lookkey which can assume keys are always strings; |  * Hacked up version of set_lookkey which can assume keys are always unicode; | ||||||
|  * This means we can always use _PyString_Eq directly and not have to check to |  * This means we can always use unicode_eq directly and not have to check to | ||||||
|  * see if the comparison altered the table. |  * see if the comparison altered the table. | ||||||
|  */ |  */ | ||||||
| static setentry * | static setentry * | ||||||
| set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash) | ||||||
| { | { | ||||||
| 	register Py_ssize_t i; | 	register Py_ssize_t i; | ||||||
| 	register size_t perturb; | 	register size_t perturb; | ||||||
|  | @ -158,11 +160,11 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | ||||||
| 	setentry *table = so->table; | 	setentry *table = so->table; | ||||||
| 	register setentry *entry; | 	register setentry *entry; | ||||||
| 
 | 
 | ||||||
| 	/* Make sure this function doesn't have to handle non-string keys,
 | 	/* Make sure this function doesn't have to handle non-unicode keys,
 | ||||||
| 	   including subclasses of str; e.g., one reason to subclass | 	   including subclasses of str; e.g., one reason to subclass | ||||||
| 	   strings is to override __eq__, and for speed we don't cater to | 	   strings is to override __eq__, and for speed we don't cater to | ||||||
| 	   that here. */ | 	   that here. */ | ||||||
| 	if (!PyString_CheckExact(key)) { | 	if (!PyUnicode_CheckExact(key)) { | ||||||
| 		so->lookup = set_lookkey; | 		so->lookup = set_lookkey; | ||||||
| 		return set_lookkey(so, key, hash); | 		return set_lookkey(so, key, hash); | ||||||
| 	} | 	} | ||||||
|  | @ -173,7 +175,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | ||||||
| 	if (entry->key == dummy) | 	if (entry->key == dummy) | ||||||
| 		freeslot = entry; | 		freeslot = entry; | ||||||
| 	else { | 	else { | ||||||
| 		if (entry->hash == hash && _PyString_Eq(entry->key, key)) | 		if (entry->hash == hash && unicode_eq(entry->key, key)) | ||||||
| 			return entry; | 			return entry; | ||||||
| 		freeslot = NULL; | 		freeslot = NULL; | ||||||
| 	} | 	} | ||||||
|  | @ -188,7 +190,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | ||||||
| 		if (entry->key == key | 		if (entry->key == key | ||||||
| 		    || (entry->hash == hash | 		    || (entry->hash == hash | ||||||
| 			&& entry->key != dummy | 			&& entry->key != dummy | ||||||
| 			&& _PyString_Eq(entry->key, key))) | 			&& unicode_eq(entry->key, key))) | ||||||
| 			return entry; | 			return entry; | ||||||
| 		if (entry->key == dummy && freeslot == NULL) | 		if (entry->key == dummy && freeslot == NULL) | ||||||
| 			freeslot = entry; | 			freeslot = entry; | ||||||
|  | @ -375,8 +377,8 @@ set_add_key(register PySetObject *so, PyObject *key) | ||||||
| 	register long hash; | 	register long hash; | ||||||
| 	register Py_ssize_t n_used; | 	register Py_ssize_t n_used; | ||||||
| 
 | 
 | ||||||
| 	if (!PyString_CheckExact(key) || | 	if (!PyUnicode_CheckExact(key) || | ||||||
| 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) { | 	    (hash = ((PyUnicodeObject *) key)->hash) == -1) { | ||||||
| 		hash = PyObject_Hash(key); | 		hash = PyObject_Hash(key); | ||||||
| 		if (hash == -1) | 		if (hash == -1) | ||||||
| 			return -1; | 			return -1; | ||||||
|  | @ -422,8 +424,9 @@ set_discard_key(PySetObject *so, PyObject *key) | ||||||
| 	PyObject *old_key; | 	PyObject *old_key; | ||||||
| 
 | 
 | ||||||
| 	assert (PyAnySet_Check(so)); | 	assert (PyAnySet_Check(so)); | ||||||
| 	if (!PyString_CheckExact(key) || | 
 | ||||||
| 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) { | 	if (!PyUnicode_CheckExact(key) || | ||||||
|  | 	    (hash = ((PyUnicodeObject *) key)->hash) == -1) { | ||||||
| 		hash = PyObject_Hash(key); | 		hash = PyObject_Hash(key); | ||||||
| 		if (hash == -1) | 		if (hash == -1) | ||||||
| 			return -1; | 			return -1; | ||||||
|  | @ -668,8 +671,8 @@ set_contains_key(PySetObject *so, PyObject *key) | ||||||
| 	long hash; | 	long hash; | ||||||
| 	setentry *entry; | 	setentry *entry; | ||||||
| 
 | 
 | ||||||
| 	if (!PyString_CheckExact(key) || | 	if (!PyUnicode_CheckExact(key) || | ||||||
| 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) { | 	    (hash = ((PyUnicodeObject *) key)->hash) == -1) { | ||||||
| 		hash = PyObject_Hash(key); | 		hash = PyObject_Hash(key); | ||||||
| 		if (hash == -1) | 		if (hash == -1) | ||||||
| 			return -1; | 			return -1; | ||||||
|  | @ -989,7 +992,7 @@ make_new_set(PyTypeObject *type, PyObject *iterable) | ||||||
| 		INIT_NONZERO_SET_SLOTS(so); | 		INIT_NONZERO_SET_SLOTS(so); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	so->lookup = set_lookkey_string; | 	so->lookup = set_lookkey_unicode; | ||||||
| 	so->weakreflist = NULL; | 	so->weakreflist = NULL; | ||||||
| 
 | 
 | ||||||
| 	if (iterable != NULL) { | 	if (iterable != NULL) { | ||||||
|  | @ -1352,7 +1355,7 @@ set_isdisjoint(PySetObject *so, PyObject *other) | ||||||
| 	while ((key = PyIter_Next(it)) != NULL) { | 	while ((key = PyIter_Next(it)) != NULL) { | ||||||
| 		int rv; | 		int rv; | ||||||
| 		setentry entry; | 		setentry entry; | ||||||
| 		long hash = PyObject_Hash(key); | 		long hash = PyObject_Hash(key);; | ||||||
| 
 | 
 | ||||||
| 		if (hash == -1) { | 		if (hash == -1) { | ||||||
| 			Py_DECREF(key); | 			Py_DECREF(key); | ||||||
|  |  | ||||||
							
								
								
									
										21
									
								
								Objects/stringlib/eq.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								Objects/stringlib/eq.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | ||||||
|  | /* Fast unicode equal function optimized for dictobject.c and setobject.c */ | ||||||
|  | 
 | ||||||
|  | /* Return 1 if two unicode objects are equal, 0 if not.
 | ||||||
|  |  * unicode_eq() is called when the hash of two unicode objects is equal. | ||||||
|  |  */ | ||||||
|  | Py_LOCAL_INLINE(int) | ||||||
|  | unicode_eq(PyObject *aa, PyObject *bb) | ||||||
|  | { | ||||||
|  | 	register PyUnicodeObject *a = (PyUnicodeObject *)aa; | ||||||
|  | 	register PyUnicodeObject *b = (PyUnicodeObject *)bb; | ||||||
|  | 
 | ||||||
|  | 	if (a->length != b->length) | ||||||
|  | 		return 0; | ||||||
|  | 	if (a->length == 0) | ||||||
|  | 		return 1; | ||||||
|  | 	if (a->str[0] != b->str[0]) | ||||||
|  | 		return 0; | ||||||
|  | 	if (a->length == 1) | ||||||
|  | 		return 1; | ||||||
|  | 	return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; | ||||||
|  | } | ||||||
|  | @ -877,16 +877,6 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op) | ||||||
| 	return result; | 	return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int |  | ||||||
| _PyString_Eq(PyObject *o1, PyObject *o2) |  | ||||||
| { |  | ||||||
| 	PyStringObject *a = (PyStringObject*) o1; |  | ||||||
| 	PyStringObject *b = (PyStringObject*) o2; |  | ||||||
| 	return Py_Size(a) == Py_Size(b) |  | ||||||
| 		&& *a->ob_sval == *b->ob_sval |  | ||||||
| 		&& memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static long | static long | ||||||
| string_hash(PyStringObject *a) | string_hash(PyStringObject *a) | ||||||
| { | { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Christian Heimes
						Christian Heimes