mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)
This commit is contained in:
		
							parent
							
								
									accea7dc2b
								
							
						
					
					
						commit
						a07da09ad5
					
				
					 10 changed files with 25 additions and 21 deletions
				
			
		|  | @ -692,10 +692,9 @@ Here are the rules in detail: | ||||||
|   as ``-hash(-x)``.  If the resulting hash is ``-1``, replace it with |   as ``-hash(-x)``.  If the resulting hash is ``-1``, replace it with | ||||||
|   ``-2``. |   ``-2``. | ||||||
| 
 | 
 | ||||||
| - The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf`` | - The particular values ``sys.hash_info.inf`` and ``-sys.hash_info.inf`` | ||||||
|   and ``sys.hash_info.nan`` are used as hash values for positive |   are used as hash values for positive | ||||||
|   infinity, negative infinity, or nans (respectively).  (All hashable |   infinity or negative infinity (respectively). | ||||||
|   nans have the same hash value.) |  | ||||||
| 
 | 
 | ||||||
| - For a :class:`complex` number ``z``, the hash values of the real | - For a :class:`complex` number ``z``, the hash values of the real | ||||||
|   and imaginary parts are combined by computing ``hash(z.real) + |   and imaginary parts are combined by computing ``hash(z.real) + | ||||||
|  | @ -740,7 +739,7 @@ number, :class:`float`, or :class:`complex`:: | ||||||
|        """Compute the hash of a float x.""" |        """Compute the hash of a float x.""" | ||||||
| 
 | 
 | ||||||
|        if math.isnan(x): |        if math.isnan(x): | ||||||
|            return sys.hash_info.nan |            return super().__hash__() | ||||||
|        elif math.isinf(x): |        elif math.isinf(x): | ||||||
|            return sys.hash_info.inf if x > 0 else -sys.hash_info.inf |            return sys.hash_info.inf if x > 0 else -sys.hash_info.inf | ||||||
|        else: |        else: | ||||||
|  |  | ||||||
|  | @ -855,7 +855,7 @@ always available. | ||||||
|    +---------------------+--------------------------------------------------+ |    +---------------------+--------------------------------------------------+ | ||||||
|    | :const:`inf`        | hash value returned for a positive infinity      | |    | :const:`inf`        | hash value returned for a positive infinity      | | ||||||
|    +---------------------+--------------------------------------------------+ |    +---------------------+--------------------------------------------------+ | ||||||
|    | :const:`nan`        | hash value returned for a nan                    | |    | :const:`nan`        | (this attribute is no longer used)               | | ||||||
|    +---------------------+--------------------------------------------------+ |    +---------------------+--------------------------------------------------+ | ||||||
|    | :const:`imag`       | multiplier used for the imaginary part of a      | |    | :const:`imag`       | multiplier used for the imaginary part of a      | | ||||||
|    |                     | complex number                                   | |    |                     | complex number                                   | | ||||||
|  |  | ||||||
|  | @ -7,7 +7,7 @@ extern "C" { | ||||||
| 
 | 
 | ||||||
| /* Helpers for hash functions */ | /* Helpers for hash functions */ | ||||||
| #ifndef Py_LIMITED_API | #ifndef Py_LIMITED_API | ||||||
| PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); | PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double); | ||||||
| PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); | PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); | ||||||
| // Similar to _Py_HashPointer(), but don't replace -1 with -2
 | // Similar to _Py_HashPointer(), but don't replace -1 with -2
 | ||||||
| PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*); | PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*); | ||||||
|  | @ -29,7 +29,6 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); | ||||||
| 
 | 
 | ||||||
| #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) | #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) | ||||||
| #define _PyHASH_INF 314159 | #define _PyHASH_INF 314159 | ||||||
| #define _PyHASH_NAN 0 |  | ||||||
| #define _PyHASH_IMAG _PyHASH_MULTIPLIER | #define _PyHASH_IMAG _PyHASH_MULTIPLIER | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -951,7 +951,7 @@ def __hash__(self): | ||||||
|             if self.is_snan(): |             if self.is_snan(): | ||||||
|                 raise TypeError('Cannot hash a signaling NaN value.') |                 raise TypeError('Cannot hash a signaling NaN value.') | ||||||
|             elif self.is_nan(): |             elif self.is_nan(): | ||||||
|                 return _PyHASH_NAN |                 return super().__hash__() | ||||||
|             else: |             else: | ||||||
|                 if self._sign: |                 if self._sign: | ||||||
|                     return -_PyHASH_INF |                     return -_PyHASH_INF | ||||||
|  |  | ||||||
|  | @ -0,0 +1,3 @@ | ||||||
|  | Hashes of NaN values now depend on object identity.  Formerly, they always | ||||||
|  | hashed to 0 even though NaN values are not equal to one another.  Having the | ||||||
|  | same hash for unequal values caused pile-ups in hash tables. | ||||||
|  | @ -4536,7 +4536,6 @@ _dec_hash(PyDecObject *v) | ||||||
|     #error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS" |     #error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS" | ||||||
| #endif | #endif | ||||||
|     const Py_hash_t py_hash_inf = 314159; |     const Py_hash_t py_hash_inf = 314159; | ||||||
|     const Py_hash_t py_hash_nan = 0; |  | ||||||
|     mpd_uint_t ten_data[1] = {10}; |     mpd_uint_t ten_data[1] = {10}; | ||||||
|     mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA, |     mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA, | ||||||
|                  0, 2, 1, 1, ten_data}; |                  0, 2, 1, 1, ten_data}; | ||||||
|  | @ -4555,7 +4554,7 @@ _dec_hash(PyDecObject *v) | ||||||
|             return -1; |             return -1; | ||||||
|         } |         } | ||||||
|         else if (mpd_isnan(MPD(v))) { |         else if (mpd_isnan(MPD(v))) { | ||||||
|             return py_hash_nan; |             return _Py_HashPointer(v); | ||||||
|         } |         } | ||||||
|         else { |         else { | ||||||
|             return py_hash_inf * mpd_arith_sign(MPD(v)); |             return py_hash_inf * mpd_arith_sign(MPD(v)); | ||||||
|  | @ -5939,5 +5938,3 @@ PyInit__decimal(void) | ||||||
| 
 | 
 | ||||||
|     return NULL; /* GCOV_NOT_REACHED */ |     return NULL; /* GCOV_NOT_REACHED */ | ||||||
| } | } | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|  |  | ||||||
|  | @ -412,10 +412,10 @@ static Py_hash_t | ||||||
| complex_hash(PyComplexObject *v) | complex_hash(PyComplexObject *v) | ||||||
| { | { | ||||||
|     Py_uhash_t hashreal, hashimag, combined; |     Py_uhash_t hashreal, hashimag, combined; | ||||||
|     hashreal = (Py_uhash_t)_Py_HashDouble(v->cval.real); |     hashreal = (Py_uhash_t)_Py_HashDouble((PyObject *) v, v->cval.real); | ||||||
|     if (hashreal == (Py_uhash_t)-1) |     if (hashreal == (Py_uhash_t)-1) | ||||||
|         return -1; |         return -1; | ||||||
|     hashimag = (Py_uhash_t)_Py_HashDouble(v->cval.imag); |     hashimag = (Py_uhash_t)_Py_HashDouble((PyObject *)v, v->cval.imag); | ||||||
|     if (hashimag == (Py_uhash_t)-1) |     if (hashimag == (Py_uhash_t)-1) | ||||||
|         return -1; |         return -1; | ||||||
|     /* Note:  if the imaginary part is 0, hashimag is 0 now,
 |     /* Note:  if the imaginary part is 0, hashimag is 0 now,
 | ||||||
|  |  | ||||||
|  | @ -556,7 +556,7 @@ float_richcompare(PyObject *v, PyObject *w, int op) | ||||||
| static Py_hash_t | static Py_hash_t | ||||||
| float_hash(PyFloatObject *v) | float_hash(PyFloatObject *v) | ||||||
| { | { | ||||||
|     return _Py_HashDouble(v->ob_fval); |     return _Py_HashDouble((PyObject *)v, v->ob_fval); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static PyObject * | static PyObject * | ||||||
|  |  | ||||||
|  | @ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; | ||||||
|    If the result of the reduction is infinity (this is impossible for |    If the result of the reduction is infinity (this is impossible for | ||||||
|    integers, floats and Decimals) then use the predefined hash value |    integers, floats and Decimals) then use the predefined hash value | ||||||
|    _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. |    _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. | ||||||
|    _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the |    _PyHASH_INF and -_PyHASH_INF are also used for the | ||||||
|    hashes of float and Decimal infinities and nans. |    hashes of float and Decimal infinities. | ||||||
|  | 
 | ||||||
|  |    NaNs hash with a pointer hash.  Having distinct hash values prevents | ||||||
|  |    catastrophic pileups from distinct NaN instances which used to always | ||||||
|  |    have the same hash value but would compare unequal. | ||||||
| 
 | 
 | ||||||
|    A selling point for the above strategy is that it makes it possible |    A selling point for the above strategy is that it makes it possible | ||||||
|    to compute hashes of decimal and binary floating-point numbers |    to compute hashes of decimal and binary floating-point numbers | ||||||
|  | @ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; | ||||||
| 
 | 
 | ||||||
|    */ |    */ | ||||||
| 
 | 
 | ||||||
|  | Py_hash_t _Py_HashPointer(const void *); | ||||||
|  | 
 | ||||||
| Py_hash_t | Py_hash_t | ||||||
| _Py_HashDouble(double v) | _Py_HashDouble(PyObject *inst, double v) | ||||||
| { | { | ||||||
|     int e, sign; |     int e, sign; | ||||||
|     double m; |     double m; | ||||||
|  | @ -93,7 +99,7 @@ _Py_HashDouble(double v) | ||||||
|         if (Py_IS_INFINITY(v)) |         if (Py_IS_INFINITY(v)) | ||||||
|             return v > 0 ? _PyHASH_INF : -_PyHASH_INF; |             return v > 0 ? _PyHASH_INF : -_PyHASH_INF; | ||||||
|         else |         else | ||||||
|             return _PyHASH_NAN; |             return _Py_HashPointer(inst); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     m = frexp(v, &e); |     m = frexp(v, &e); | ||||||
|  |  | ||||||
|  | @ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate) | ||||||
|     PyStructSequence_SET_ITEM(hash_info, field++, |     PyStructSequence_SET_ITEM(hash_info, field++, | ||||||
|                               PyLong_FromLong(_PyHASH_INF)); |                               PyLong_FromLong(_PyHASH_INF)); | ||||||
|     PyStructSequence_SET_ITEM(hash_info, field++, |     PyStructSequence_SET_ITEM(hash_info, field++, | ||||||
|                               PyLong_FromLong(_PyHASH_NAN)); |                               PyLong_FromLong(0));  // This is no longer used
 | ||||||
|     PyStructSequence_SET_ITEM(hash_info, field++, |     PyStructSequence_SET_ITEM(hash_info, field++, | ||||||
|                               PyLong_FromLong(_PyHASH_IMAG)); |                               PyLong_FromLong(_PyHASH_IMAG)); | ||||||
|     PyStructSequence_SET_ITEM(hash_info, field++, |     PyStructSequence_SET_ITEM(hash_info, field++, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Raymond Hettinger
						Raymond Hettinger