mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Fixed issue #1564: The set implementation should special-case PyUnicode instead of PyString
I moved the unicode_eq to stringlib/eq.h to keep the function static and possible inline for setobject.c and dictobject.h. I also removed the unused _PyString_Eq method. If it's required in the future it can be added to eq.h
This commit is contained in:
		
							parent
							
								
									110194048e
								
							
						
					
					
						commit
						0ded5b54bb
					
				
					 7 changed files with 85 additions and 45 deletions
				
			
		|  | @ -58,7 +58,6 @@ PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int); | |||
| PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); | ||||
| PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *); | ||||
| PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t); | ||||
| PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*); | ||||
| PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *); | ||||
| PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int, | ||||
| 						  int, char**, int*); | ||||
|  |  | |||
|  | @ -7,6 +7,7 @@ | |||
| import os | ||||
| from random import randrange, shuffle | ||||
| import sys | ||||
| import warnings | ||||
| 
 | ||||
| class PassThru(Exception): | ||||
|     pass | ||||
|  | @ -817,6 +818,44 @@ def setUp(self): | |||
|         self.length = 3 | ||||
|         self.repr   = None | ||||
| 
 | ||||
| #------------------------------------------------------------------------------ | ||||
| 
 | ||||
| class TestBasicOpsString(TestBasicOps): | ||||
|     def setUp(self): | ||||
|         self.case   = "string set" | ||||
|         self.values = ["a", "b", "c"] | ||||
|         self.set    = set(self.values) | ||||
|         self.dup    = set(self.values) | ||||
|         self.length = 3 | ||||
|         self.repr   = "{'a', 'c', 'b'}" | ||||
| 
 | ||||
| #------------------------------------------------------------------------------ | ||||
| 
 | ||||
| class TestBasicOpsBytes(TestBasicOps): | ||||
|     def setUp(self): | ||||
|         self.case   = "string set" | ||||
|         self.values = [b"a", b"b", b"c"] | ||||
|         self.set    = set(self.values) | ||||
|         self.dup    = set(self.values) | ||||
|         self.length = 3 | ||||
|         self.repr   = "{b'a', b'c', b'b'}" | ||||
| 
 | ||||
| #------------------------------------------------------------------------------ | ||||
| 
 | ||||
| class TestBasicOpsMixedStringBytes(TestBasicOps): | ||||
|     def setUp(self): | ||||
|         self.warning_filters = warnings.filters[:] | ||||
|         warnings.simplefilter('ignore', BytesWarning) | ||||
|         self.case   = "string and bytes set" | ||||
|         self.values = ["a", "b", b"a", b"b"] | ||||
|         self.set    = set(self.values) | ||||
|         self.dup    = set(self.values) | ||||
|         self.length = 4 | ||||
|         self.repr   = "{'a', b'a', 'b', b'b'}" | ||||
| 
 | ||||
|     def tearDown(self): | ||||
|         warnings.filters = self.warning_filters | ||||
| 
 | ||||
| #============================================================================== | ||||
| 
 | ||||
| def baditer(): | ||||
|  | @ -1581,6 +1620,9 @@ def test_main(verbose=None): | |||
|         TestBasicOpsSingleton, | ||||
|         TestBasicOpsTuple, | ||||
|         TestBasicOpsTriple, | ||||
|         TestBasicOpsString, | ||||
|         TestBasicOpsBytes, | ||||
|         TestBasicOpsMixedStringBytes, | ||||
|         TestBinaryOps, | ||||
|         TestUpdateOps, | ||||
|         TestMutate, | ||||
|  |  | |||
|  | @ -14,6 +14,9 @@ Core and Builtins | |||
| 
 | ||||
| - Issue #1573: Improper use of the keyword-only syntax makes the parser crash | ||||
| 
 | ||||
| - Issue #1564: The set implementation should special-case PyUnicode instead | ||||
|   of PyString | ||||
| 
 | ||||
| 
 | ||||
| Extension Modules | ||||
| ----------------- | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ | |||
| */ | ||||
| 
 | ||||
| #include "Python.h" | ||||
| #include "stringlib/eq.h" | ||||
| 
 | ||||
| 
 | ||||
| /* Set a key error with the specified argument, wrapping it in a
 | ||||
|  | @ -327,25 +328,6 @@ lookdict(PyDictObject *mp, PyObject *key, register long hash) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /* Return 1 if two unicode objects are equal, 0 if not. */ | ||||
| static int | ||||
| unicode_eq(PyObject *aa, PyObject *bb) | ||||
| { | ||||
| 	PyUnicodeObject *a = (PyUnicodeObject *)aa; | ||||
| 	PyUnicodeObject *b = (PyUnicodeObject *)bb; | ||||
| 
 | ||||
| 	if (a->length != b->length) | ||||
| 		return 0; | ||||
| 	if (a->length == 0) | ||||
| 		return 1; | ||||
| 	if (a->str[0] != b->str[0]) | ||||
| 		return 0; | ||||
| 	if (a->length == 1) | ||||
| 		return 1; | ||||
| 	return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * Hacked up version of lookdict which can assume keys are always | ||||
|  * unicodes; this assumption allows testing for errors during | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ | |||
| 
 | ||||
| #include "Python.h" | ||||
| #include "structmember.h" | ||||
| #include "stringlib/eq.h" | ||||
| 
 | ||||
| /* Set a key error with the specified argument, wrapping it in a
 | ||||
|  * tuple automatically so that tuple keys are not unpacked as the | ||||
|  | @ -55,6 +56,7 @@ _PySet_Dummy(void) | |||
| static PySetObject *free_sets[MAXFREESETS]; | ||||
| static int num_free_sets = 0; | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
| The basic lookup function used by all operations. | ||||
| This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. | ||||
|  | @ -144,12 +146,12 @@ set_lookkey(PySetObject *so, PyObject *key, register long hash) | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Hacked up version of set_lookkey which can assume keys are always strings; | ||||
|  * This means we can always use _PyString_Eq directly and not have to check to | ||||
|  * Hacked up version of set_lookkey which can assume keys are always unicode; | ||||
|  * This means we can always use unicode_eq directly and not have to check to | ||||
|  * see if the comparison altered the table. | ||||
|  */ | ||||
| static setentry * | ||||
| set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | ||||
| set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash) | ||||
| { | ||||
| 	register Py_ssize_t i; | ||||
| 	register size_t perturb; | ||||
|  | @ -158,11 +160,11 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | |||
| 	setentry *table = so->table; | ||||
| 	register setentry *entry; | ||||
| 
 | ||||
| 	/* Make sure this function doesn't have to handle non-string keys,
 | ||||
| 	/* Make sure this function doesn't have to handle non-unicode keys,
 | ||||
| 	   including subclasses of str; e.g., one reason to subclass | ||||
| 	   strings is to override __eq__, and for speed we don't cater to | ||||
| 	   that here. */ | ||||
| 	if (!PyString_CheckExact(key)) { | ||||
| 	if (!PyUnicode_CheckExact(key)) { | ||||
| 		so->lookup = set_lookkey; | ||||
| 		return set_lookkey(so, key, hash); | ||||
| 	} | ||||
|  | @ -173,7 +175,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | |||
| 	if (entry->key == dummy) | ||||
| 		freeslot = entry; | ||||
| 	else { | ||||
| 		if (entry->hash == hash && _PyString_Eq(entry->key, key)) | ||||
| 		if (entry->hash == hash && unicode_eq(entry->key, key)) | ||||
| 			return entry; | ||||
| 		freeslot = NULL; | ||||
| 	} | ||||
|  | @ -188,7 +190,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) | |||
| 		if (entry->key == key | ||||
| 		    || (entry->hash == hash | ||||
| 			&& entry->key != dummy | ||||
| 			&& _PyString_Eq(entry->key, key))) | ||||
| 			&& unicode_eq(entry->key, key))) | ||||
| 			return entry; | ||||
| 		if (entry->key == dummy && freeslot == NULL) | ||||
| 			freeslot = entry; | ||||
|  | @ -375,8 +377,8 @@ set_add_key(register PySetObject *so, PyObject *key) | |||
| 	register long hash; | ||||
| 	register Py_ssize_t n_used; | ||||
| 
 | ||||
| 	if (!PyString_CheckExact(key) || | ||||
| 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) { | ||||
| 	if (!PyUnicode_CheckExact(key) || | ||||
| 	    (hash = ((PyUnicodeObject *) key)->hash) == -1) { | ||||
| 		hash = PyObject_Hash(key); | ||||
| 		if (hash == -1) | ||||
| 			return -1; | ||||
|  | @ -422,8 +424,9 @@ set_discard_key(PySetObject *so, PyObject *key) | |||
| 	PyObject *old_key; | ||||
| 
 | ||||
| 	assert (PyAnySet_Check(so)); | ||||
| 	if (!PyString_CheckExact(key) || | ||||
| 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) { | ||||
| 
 | ||||
| 	if (!PyUnicode_CheckExact(key) || | ||||
| 	    (hash = ((PyUnicodeObject *) key)->hash) == -1) { | ||||
| 		hash = PyObject_Hash(key); | ||||
| 		if (hash == -1) | ||||
| 			return -1; | ||||
|  | @ -668,8 +671,8 @@ set_contains_key(PySetObject *so, PyObject *key) | |||
| 	long hash; | ||||
| 	setentry *entry; | ||||
| 
 | ||||
| 	if (!PyString_CheckExact(key) || | ||||
| 	    (hash = ((PyStringObject *) key)->ob_shash) == -1) { | ||||
| 	if (!PyUnicode_CheckExact(key) || | ||||
| 	    (hash = ((PyUnicodeObject *) key)->hash) == -1) { | ||||
| 		hash = PyObject_Hash(key); | ||||
| 		if (hash == -1) | ||||
| 			return -1; | ||||
|  | @ -989,7 +992,7 @@ make_new_set(PyTypeObject *type, PyObject *iterable) | |||
| 		INIT_NONZERO_SET_SLOTS(so); | ||||
| 	} | ||||
| 
 | ||||
| 	so->lookup = set_lookkey_string; | ||||
| 	so->lookup = set_lookkey_unicode; | ||||
| 	so->weakreflist = NULL; | ||||
| 
 | ||||
| 	if (iterable != NULL) { | ||||
|  | @ -1352,7 +1355,7 @@ set_isdisjoint(PySetObject *so, PyObject *other) | |||
| 	while ((key = PyIter_Next(it)) != NULL) { | ||||
| 		int rv; | ||||
| 		setentry entry; | ||||
| 		long hash = PyObject_Hash(key); | ||||
| 		long hash = PyObject_Hash(key);; | ||||
| 
 | ||||
| 		if (hash == -1) { | ||||
| 			Py_DECREF(key); | ||||
|  |  | |||
							
								
								
									
										21
									
								
								Objects/stringlib/eq.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								Objects/stringlib/eq.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | |||
| /* Fast unicode equal function optimized for dictobject.c and setobject.c */ | ||||
| 
 | ||||
| /* Return 1 if two unicode objects are equal, 0 if not.
 | ||||
|  * unicode_eq() is called when the hash of two unicode objects is equal. | ||||
|  */ | ||||
| Py_LOCAL_INLINE(int) | ||||
| unicode_eq(PyObject *aa, PyObject *bb) | ||||
| { | ||||
| 	register PyUnicodeObject *a = (PyUnicodeObject *)aa; | ||||
| 	register PyUnicodeObject *b = (PyUnicodeObject *)bb; | ||||
| 
 | ||||
| 	if (a->length != b->length) | ||||
| 		return 0; | ||||
| 	if (a->length == 0) | ||||
| 		return 1; | ||||
| 	if (a->str[0] != b->str[0]) | ||||
| 		return 0; | ||||
| 	if (a->length == 1) | ||||
| 		return 1; | ||||
| 	return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; | ||||
| } | ||||
|  | @ -877,16 +877,6 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op) | |||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| int | ||||
| _PyString_Eq(PyObject *o1, PyObject *o2) | ||||
| { | ||||
| 	PyStringObject *a = (PyStringObject*) o1; | ||||
| 	PyStringObject *b = (PyStringObject*) o2; | ||||
| 	return Py_Size(a) == Py_Size(b) | ||||
| 		&& *a->ob_sval == *b->ob_sval | ||||
| 		&& memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0; | ||||
| } | ||||
| 
 | ||||
| static long | ||||
| string_hash(PyStringObject *a) | ||||
| { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Christian Heimes
						Christian Heimes