mirror of
https://github.com/python/cpython.git
synced 2026-01-06 15:32:22 +00:00
Get rid of the superstitious "~" in dict hashing's "i = (~hash) & mask".
The comment following used to say:
/* We use ~hash instead of hash, as degenerate hash functions, such
as for ints <sigh>, can have lots of leading zeros. It's not
really a performance risk, but better safe than sorry.
12-Dec-00 tim: so ~hash produces lots of leading ones instead --
what's the gain? */
That is, there was never a good reason for doing it. And to the contrary,
as explained on Python-Dev last December, it tended to make the *sum*
(i + incr) & mask (which is the first table index examined in case of
collison) the same "too often" across distinct hashes.
Changing to the simpler "i = hash & mask" reduced the number of string-dict
collisions (== # number of times we go around the lookup for-loop) from about
6 million to 5 million during a full run of the test suite (these are
approximate because the test suite does some random stuff from run to run).
The number of collisions in non-string dicts also decreased, but not as
dramatically.
Note that this may, for a given dict, change the order (wrt previous
releases) of entries exposed by .keys(), .values() and .items(). A number
of std tests suffered bogus failures as a result. For dicts keyed by
small ints, or (less so) by characters, the order is much more likely to be
in increasing order of key now; e.g.,
>>> d = {}
>>> for i in range(10):
... d[i] = i
...
>>> d
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
>>>
Unfortunately. people may latch on to that in small examples and draw a
bogus conclusion.
test_support.py
Moved test_extcall's sortdict() into test_support, made it stronger,
and imported sortdict into other std tests that needed it.
test_unicode.py
Excluced cp875 from the "roundtrip over range(128)" test, because
cp875 doesn't have a well-defined inverse for unicode("?", "cp875").
See Python-Dev for excruciating details.
Cookie.py
Chaged various output functions to sort dicts before building
strings from them.
test_extcall
Fiddled the expected-result file. This remains sensitive to native
dict ordering, because, e.g., if there are multiple errors in a
keyword-arg dict (and test_extcall sets up many cases like that), the
specific error Python complains about first depends on native dict
ordering.
This commit is contained in:
parent
0194ad5c7d
commit
2f228e75e4
11 changed files with 64 additions and 46 deletions
|
|
@ -70,8 +70,8 @@
|
|||
>>> C["fig"] = "newton"
|
||||
>>> C["sugar"] = "wafer"
|
||||
>>> print C
|
||||
Set-Cookie: sugar=wafer;
|
||||
Set-Cookie: fig=newton;
|
||||
Set-Cookie: sugar=wafer;
|
||||
|
||||
Notice that the printable representation of a Cookie is the
|
||||
appropriate format for a Set-Cookie: header. This is the
|
||||
|
|
@ -93,8 +93,8 @@
|
|||
>>> C = Cookie.SmartCookie()
|
||||
>>> C.load("chips=ahoy; vienna=finger")
|
||||
>>> print C
|
||||
Set-Cookie: vienna=finger;
|
||||
Set-Cookie: chips=ahoy;
|
||||
Set-Cookie: vienna=finger;
|
||||
|
||||
The load() method is darn-tootin smart about identifying cookies
|
||||
within a string. Escaped quotation marks, nested semicolons, and other
|
||||
|
|
@ -493,7 +493,9 @@ def OutputString(self, attrs=None):
|
|||
# Now add any defined attributes
|
||||
if attrs is None:
|
||||
attrs = self._reserved_keys
|
||||
for K,V in self.items():
|
||||
items = self.items()
|
||||
items.sort()
|
||||
for K,V in items:
|
||||
if V == "": continue
|
||||
if K not in attrs: continue
|
||||
if K == "expires" and type(V) == type(1):
|
||||
|
|
@ -586,7 +588,9 @@ def __setitem__(self, key, value):
|
|||
def output(self, attrs=None, header="Set-Cookie:", sep="\n"):
|
||||
"""Return a string suitable for HTTP."""
|
||||
result = []
|
||||
for K,V in self.items():
|
||||
items = self.items()
|
||||
items.sort()
|
||||
for K,V in items:
|
||||
result.append( V.output(attrs, header) )
|
||||
return string.join(result, sep)
|
||||
# end output
|
||||
|
|
@ -595,14 +599,18 @@ def output(self, attrs=None, header="Set-Cookie:", sep="\n"):
|
|||
|
||||
def __repr__(self):
|
||||
L = []
|
||||
for K,V in self.items():
|
||||
items = self.items()
|
||||
items.sort()
|
||||
for K,V in items:
|
||||
L.append( '%s=%s' % (K,repr(V.value) ) )
|
||||
return '<%s: %s>' % (self.__class__.__name__, string.join(L))
|
||||
|
||||
def js_output(self, attrs=None):
|
||||
"""Return a string suitable for JavaScript."""
|
||||
result = []
|
||||
for K,V in self.items():
|
||||
items = self.items()
|
||||
items.sort()
|
||||
for K,V in items:
|
||||
result.append( V.js_output(attrs) )
|
||||
return string.join(result, "")
|
||||
# end js_output
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
test_cookie
|
||||
<SimpleCookie: vienna='finger' chips='ahoy'>
|
||||
Set-Cookie: vienna=finger;
|
||||
<SimpleCookie: chips='ahoy' vienna='finger'>
|
||||
Set-Cookie: chips=ahoy;
|
||||
vienna 'finger' 'finger'
|
||||
Set-Cookie: vienna=finger;
|
||||
chips 'ahoy' 'ahoy'
|
||||
Set-Cookie: chips=ahoy;
|
||||
vienna 'finger' 'finger'
|
||||
Set-Cookie: vienna=finger;
|
||||
<SimpleCookie: keebler='E=mc2; L="Loves"; fudge=\n;'>
|
||||
Set-Cookie: keebler="E=mc2; L=\"Loves\"; fudge=\012;";
|
||||
keebler 'E=mc2; L="Loves"; fudge=\n;' 'E=mc2; L="Loves"; fudge=\n;'
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ za () {} -> za() takes exactly 1 argument (0 given)
|
|||
za () {'a': 'aa'} -> ok za aa B D E V a
|
||||
za () {'d': 'dd'} -> za() got an unexpected keyword argument 'd'
|
||||
za () {'a': 'aa', 'd': 'dd'} -> za() got an unexpected keyword argument 'd'
|
||||
za () {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> za() got an unexpected keyword argument 'd'
|
||||
za () {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> za() got an unexpected keyword argument 'b'
|
||||
za (1, 2) {} -> za() takes exactly 1 argument (2 given)
|
||||
za (1, 2) {'a': 'aa'} -> za() takes exactly 1 non-keyword argument (2 given)
|
||||
za (1, 2) {'d': 'dd'} -> za() takes exactly 1 non-keyword argument (2 given)
|
||||
|
|
@ -59,8 +59,8 @@ zade () {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zade() got an unexpected
|
|||
zade (1, 2) {} -> ok zade 1 B 2 e V e
|
||||
zade (1, 2) {'a': 'aa'} -> zade() got multiple values for keyword argument 'a'
|
||||
zade (1, 2) {'d': 'dd'} -> zade() got multiple values for keyword argument 'd'
|
||||
zade (1, 2) {'a': 'aa', 'd': 'dd'} -> zade() got multiple values for keyword argument 'd'
|
||||
zade (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zade() got multiple values for keyword argument 'd'
|
||||
zade (1, 2) {'a': 'aa', 'd': 'dd'} -> zade() got multiple values for keyword argument 'a'
|
||||
zade (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zade() got multiple values for keyword argument 'a'
|
||||
zade (1, 2, 3, 4, 5) {} -> zade() takes at most 3 arguments (5 given)
|
||||
zade (1, 2, 3, 4, 5) {'a': 'aa'} -> zade() takes at most 3 non-keyword arguments (5 given)
|
||||
zade (1, 2, 3, 4, 5) {'d': 'dd'} -> zade() takes at most 3 non-keyword arguments (5 given)
|
||||
|
|
@ -75,7 +75,7 @@ zabk (1, 2) {} -> ok zabk 1 2 D E V {}
|
|||
zabk (1, 2) {'a': 'aa'} -> zabk() got multiple values for keyword argument 'a'
|
||||
zabk (1, 2) {'d': 'dd'} -> ok zabk 1 2 D E V {'d': 'dd'}
|
||||
zabk (1, 2) {'a': 'aa', 'd': 'dd'} -> zabk() got multiple values for keyword argument 'a'
|
||||
zabk (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabk() got multiple values for keyword argument 'b'
|
||||
zabk (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabk() got multiple values for keyword argument 'a'
|
||||
zabk (1, 2, 3, 4, 5) {} -> zabk() takes exactly 2 arguments (5 given)
|
||||
zabk (1, 2, 3, 4, 5) {'a': 'aa'} -> zabk() takes exactly 2 non-keyword arguments (5 given)
|
||||
zabk (1, 2, 3, 4, 5) {'d': 'dd'} -> zabk() takes exactly 2 non-keyword arguments (5 given)
|
||||
|
|
@ -90,12 +90,12 @@ zabdv (1, 2) {} -> ok zabdv 1 2 d E () e
|
|||
zabdv (1, 2) {'a': 'aa'} -> zabdv() got multiple values for keyword argument 'a'
|
||||
zabdv (1, 2) {'d': 'dd'} -> ok zabdv 1 2 dd E () d
|
||||
zabdv (1, 2) {'a': 'aa', 'd': 'dd'} -> zabdv() got multiple values for keyword argument 'a'
|
||||
zabdv (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdv() got an unexpected keyword argument 'e'
|
||||
zabdv (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdv() got multiple values for keyword argument 'a'
|
||||
zabdv (1, 2, 3, 4, 5) {} -> ok zabdv 1 2 3 E (4, 5) e
|
||||
zabdv (1, 2, 3, 4, 5) {'a': 'aa'} -> zabdv() got multiple values for keyword argument 'a'
|
||||
zabdv (1, 2, 3, 4, 5) {'d': 'dd'} -> zabdv() got multiple values for keyword argument 'd'
|
||||
zabdv (1, 2, 3, 4, 5) {'a': 'aa', 'd': 'dd'} -> zabdv() got multiple values for keyword argument 'd'
|
||||
zabdv (1, 2, 3, 4, 5) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdv() got multiple values for keyword argument 'd'
|
||||
zabdv (1, 2, 3, 4, 5) {'a': 'aa', 'd': 'dd'} -> zabdv() got multiple values for keyword argument 'a'
|
||||
zabdv (1, 2, 3, 4, 5) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdv() got multiple values for keyword argument 'a'
|
||||
zabdevk () {} -> zabdevk() takes at least 2 arguments (0 given)
|
||||
zabdevk () {'a': 'aa'} -> zabdevk() takes at least 2 non-keyword arguments (1 given)
|
||||
zabdevk () {'d': 'dd'} -> zabdevk() takes at least 2 non-keyword arguments (0 given)
|
||||
|
|
@ -105,9 +105,9 @@ zabdevk (1, 2) {} -> ok zabdevk 1 2 d e () {}
|
|||
zabdevk (1, 2) {'a': 'aa'} -> zabdevk() got multiple values for keyword argument 'a'
|
||||
zabdevk (1, 2) {'d': 'dd'} -> ok zabdevk 1 2 dd e () {}
|
||||
zabdevk (1, 2) {'a': 'aa', 'd': 'dd'} -> zabdevk() got multiple values for keyword argument 'a'
|
||||
zabdevk (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdevk() got multiple values for keyword argument 'b'
|
||||
zabdevk (1, 2) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdevk() got multiple values for keyword argument 'a'
|
||||
zabdevk (1, 2, 3, 4, 5) {} -> ok zabdevk 1 2 3 4 (5,) {}
|
||||
zabdevk (1, 2, 3, 4, 5) {'a': 'aa'} -> zabdevk() got multiple values for keyword argument 'a'
|
||||
zabdevk (1, 2, 3, 4, 5) {'d': 'dd'} -> zabdevk() got multiple values for keyword argument 'd'
|
||||
zabdevk (1, 2, 3, 4, 5) {'a': 'aa', 'd': 'dd'} -> zabdevk() got multiple values for keyword argument 'd'
|
||||
zabdevk (1, 2, 3, 4, 5) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdevk() got multiple values for keyword argument 'd'
|
||||
zabdevk (1, 2, 3, 4, 5) {'a': 'aa', 'd': 'dd'} -> zabdevk() got multiple values for keyword argument 'a'
|
||||
zabdevk (1, 2, 3, 4, 5) {'a': 'aa', 'b': 'bb', 'd': 'dd', 'e': 'ee'} -> zabdevk() got multiple values for keyword argument 'a'
|
||||
|
|
|
|||
|
|
@ -20,7 +20,9 @@
|
|||
C = Cookie.SimpleCookie() ; C.load(data)
|
||||
print repr(C)
|
||||
print str(C)
|
||||
for k, v in dict.items():
|
||||
items = dict.items()
|
||||
items.sort()
|
||||
for k, v in items:
|
||||
print ' ', k, repr( C[k].value ), repr(v)
|
||||
verify(C[k].value == v)
|
||||
print C[k]
|
||||
|
|
|
|||
|
|
@ -1,14 +1,6 @@
|
|||
from test_support import verify, verbose, TestFailed
|
||||
from test_support import verify, verbose, TestFailed, sortdict
|
||||
from UserList import UserList
|
||||
|
||||
def sortdict(d):
|
||||
keys = d.keys()
|
||||
keys.sort()
|
||||
lst = []
|
||||
for k in keys:
|
||||
lst.append("%r: %r" % (k, d[k]))
|
||||
return "{%s}" % ", ".join(lst)
|
||||
|
||||
def f(*a, **k):
|
||||
print a, sortdict(k)
|
||||
|
||||
|
|
@ -228,8 +220,9 @@ def method(self, arg1, arg2):
|
|||
lambda x: '%s="%s"' % (x, x), defargs)
|
||||
if vararg: arglist.append('*' + vararg)
|
||||
if kwarg: arglist.append('**' + kwarg)
|
||||
decl = 'def %s(%s): print "ok %s", a, b, d, e, v, k' % (
|
||||
name, ', '.join(arglist), name)
|
||||
decl = (('def %s(%s): print "ok %s", a, b, d, e, v, ' +
|
||||
'type(k) is type ("") and k or sortdict(k)')
|
||||
% (name, ', '.join(arglist), name))
|
||||
exec(decl)
|
||||
func = eval(name)
|
||||
funcs.append(func)
|
||||
|
|
|
|||
|
|
@ -5,9 +5,11 @@
|
|||
|
||||
from xml.parsers import expat
|
||||
|
||||
from test_support import sortdict
|
||||
|
||||
class Outputter:
|
||||
def StartElementHandler(self, name, attrs):
|
||||
print 'Start element:\n\t', repr(name), attrs
|
||||
print 'Start element:\n\t', repr(name), sortdict(attrs)
|
||||
|
||||
def EndElementHandler(self, name):
|
||||
print 'End element:\n\t', repr(name)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from test_support import verbose
|
||||
from test_support import verbose, sortdict
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", "the regex module is deprecated",
|
||||
DeprecationWarning, __name__)
|
||||
|
|
@ -40,7 +40,7 @@
|
|||
print cre.group(1, 2)
|
||||
print cre.group('one', 'two')
|
||||
print 'realpat:', cre.realpat
|
||||
print 'groupindex:', cre.groupindex
|
||||
print 'groupindex:', sortdict(cre.groupindex)
|
||||
|
||||
re = 'world'
|
||||
cre = regex.compile(re)
|
||||
|
|
|
|||
|
|
@ -90,6 +90,14 @@ def verify(condition, reason='test failed'):
|
|||
if not condition:
|
||||
raise TestFailed(reason)
|
||||
|
||||
def sortdict(dict):
|
||||
"Like repr(dict), but in sorted order."
|
||||
items = dict.items()
|
||||
items.sort()
|
||||
reprpairs = ["%r: %r" % pair for pair in items]
|
||||
withcommas = ", ".join(reprpairs)
|
||||
return "{%s}" % withcommas
|
||||
|
||||
def check_syntax(statement):
|
||||
try:
|
||||
compile(statement, '<string>', 'exec')
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
from test_support import verify, verbose
|
||||
from test_support import verify, verbose, TestFailed
|
||||
import sys
|
||||
|
||||
def test(method, input, output, *args):
|
||||
|
|
@ -493,11 +493,14 @@ def __str__(self):
|
|||
'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||||
|
||||
'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||||
'cp1006', 'cp875', 'iso8859_8',
|
||||
'cp1006', 'iso8859_8',
|
||||
|
||||
### These have undefined mappings:
|
||||
#'cp424',
|
||||
|
||||
### These fail the round-trip:
|
||||
#'cp875'
|
||||
|
||||
):
|
||||
try:
|
||||
verify(unicode(s,encoding).encode(encoding) == s)
|
||||
|
|
|
|||
10
Misc/NEWS
10
Misc/NEWS
|
|
@ -23,6 +23,16 @@ Core
|
|||
usually for the better, but may also cause numerically unstable
|
||||
algorithms to break.
|
||||
|
||||
- The implementation of dicts suffers fewer collisions, which has speed
|
||||
benefits. However, the order in which dict entries appear in dict.keys(),
|
||||
dict.values() and dict.items() may differ from previous releases for a
|
||||
given dict. Nothing is defined about this order, so no program should
|
||||
rely on it. Nevertheless, it's easy to write test cases that rely on the
|
||||
order by accident, typically because of printing the str() or repr() of a
|
||||
dict to an "expected results" file. See Lib/test/test_support.py's new
|
||||
sortdict(dict) function for a simple way to display a dict in sorted
|
||||
order.
|
||||
|
||||
- Dictionary objects now support the "in" operator: "x in dict" means
|
||||
the same as dict.has_key(x).
|
||||
|
||||
|
|
|
|||
|
|
@ -188,12 +188,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
|
|||
/* We must come up with (i, incr) such that 0 <= i < ma_size
|
||||
and 0 < incr < ma_size and both are a function of hash.
|
||||
i is the initial table index and incr the initial probe offset. */
|
||||
i = (~hash) & mask;
|
||||
/* We use ~hash instead of hash, as degenerate hash functions, such
|
||||
as for ints <sigh>, can have lots of leading zeros. It's not
|
||||
really a performance risk, but better safe than sorry.
|
||||
12-Dec-00 tim: so ~hash produces lots of leading ones instead --
|
||||
what's the gain? */
|
||||
i = hash & mask;
|
||||
ep = &ep0[i];
|
||||
if (ep->me_key == NULL || ep->me_key == key)
|
||||
return ep;
|
||||
|
|
@ -301,10 +296,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
|
|||
}
|
||||
/* We must come up with (i, incr) such that 0 <= i < ma_size
|
||||
and 0 < incr < ma_size and both are a function of hash */
|
||||
i = (~hash) & mask;
|
||||
/* We use ~hash instead of hash, as degenerate hash functions, such
|
||||
as for ints <sigh>, can have lots of leading zeros. It's not
|
||||
really a performance risk, but better safe than sorry. */
|
||||
i = hash & mask;
|
||||
ep = &ep0[i];
|
||||
if (ep->me_key == NULL || ep->me_key == key)
|
||||
return ep;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue