diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst new file mode 100644 index 00000000000..8e5ec0bfe87 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst @@ -0,0 +1,3 @@ +Optimize set difference_update for the case when the other set is much +larger than the base set. (Suggested by Evgeny Kapun with code contributed +by Michele OrrĂ¹). diff --git a/Objects/setobject.c b/Objects/setobject.c index 56858dbccfe..fafc2fa9e46 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1463,9 +1463,25 @@ set_difference_update_internal(PySetObject *so, PyObject *other) setentry *entry; Py_ssize_t pos = 0; - while (set_next((PySetObject *)other, &pos, &entry)) - if (set_discard_entry(so, entry->key, entry->hash) < 0) + /* Optimization: When the other set is more than 8 times + larger than the base set, replace the other set with + interesection of the two sets. + */ + if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) { + other = set_intersection(so, other); + if (other == NULL) return -1; + } else { + Py_INCREF(other); + } + + while (set_next((PySetObject *)other, &pos, &entry)) + if (set_discard_entry(so, entry->key, entry->hash) < 0) { + Py_DECREF(other); + return -1; + } + + Py_DECREF(other); } else { PyObject *key, *it; it = PyObject_GetIter(other);