| 
									
										
										
										
											2020-06-08 16:30:33 +02:00
										 |  |  | /* Bit and bytes utilities.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    Bytes swap functions, reverse order of bytes: | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |    - _Py_bswap16(uint16_t) | 
					
						
							|  |  |  |    - _Py_bswap32(uint32_t) | 
					
						
							|  |  |  |    - _Py_bswap64(uint64_t) | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-15 14:33:48 +02:00
										 |  |  | #ifndef Py_INTERNAL_BITUTILS_H
 | 
					
						
							|  |  |  | #define Py_INTERNAL_BITUTILS_H
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  | #ifdef __cplusplus
 | 
					
						
							|  |  |  | extern "C" { | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifndef Py_BUILD_CORE
 | 
					
						
							|  |  |  | #  error "this header requires Py_BUILD_CORE define"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 15:38:17 +01:00
										 |  |  | #if defined(__GNUC__) \
 | 
					
						
							|  |  |  |       && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) | 
					
						
							|  |  |  |    /* __builtin_bswap16() is available since GCC 4.8,
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  |       __builtin_bswap32() is available since GCC 4.3, | 
					
						
							|  |  |  |       __builtin_bswap64() is available since GCC 4.3. */ | 
					
						
							|  |  |  | #  define _PY_HAVE_BUILTIN_BSWAP
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _MSC_VER
 | 
					
						
							| 
									
										
										
										
											2023-08-21 20:05:59 +02:00
										 |  |  | #  include <intrin.h>             // _byteswap_uint64()
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-21 20:05:59 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  | static inline uint16_t | 
					
						
							|  |  |  | _Py_bswap16(uint16_t word) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2020-11-13 15:38:17 +01:00
										 |  |  | #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  |     return __builtin_bswap16(word); | 
					
						
							|  |  |  | #elif defined(_MSC_VER)
 | 
					
						
							|  |  |  |     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short)); | 
					
						
							|  |  |  |     return _byteswap_ushort(word); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     // Portable implementation which doesn't rely on circular bit shift
 | 
					
						
							|  |  |  |     return ( ((word & UINT16_C(0x00FF)) << 8) | 
					
						
							|  |  |  |            | ((word & UINT16_C(0xFF00)) >> 8)); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline uint32_t | 
					
						
							|  |  |  | _Py_bswap32(uint32_t word) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2020-11-13 15:38:17 +01:00
										 |  |  | #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  |     return __builtin_bswap32(word); | 
					
						
							|  |  |  | #elif defined(_MSC_VER)
 | 
					
						
							|  |  |  |     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long)); | 
					
						
							|  |  |  |     return _byteswap_ulong(word); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     // Portable implementation which doesn't rely on circular bit shift
 | 
					
						
							|  |  |  |     return ( ((word & UINT32_C(0x000000FF)) << 24) | 
					
						
							|  |  |  |            | ((word & UINT32_C(0x0000FF00)) <<  8) | 
					
						
							|  |  |  |            | ((word & UINT32_C(0x00FF0000)) >>  8) | 
					
						
							|  |  |  |            | ((word & UINT32_C(0xFF000000)) >> 24)); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline uint64_t | 
					
						
							|  |  |  | _Py_bswap64(uint64_t word) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2020-11-13 15:38:17 +01:00
										 |  |  | #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  |     return __builtin_bswap64(word); | 
					
						
							|  |  |  | #elif defined(_MSC_VER)
 | 
					
						
							|  |  |  |     return _byteswap_uint64(word); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     // Portable implementation which doesn't rely on circular bit shift
 | 
					
						
							|  |  |  |     return ( ((word & UINT64_C(0x00000000000000FF)) << 56) | 
					
						
							|  |  |  |            | ((word & UINT64_C(0x000000000000FF00)) << 40) | 
					
						
							|  |  |  |            | ((word & UINT64_C(0x0000000000FF0000)) << 24) | 
					
						
							|  |  |  |            | ((word & UINT64_C(0x00000000FF000000)) <<  8) | 
					
						
							|  |  |  |            | ((word & UINT64_C(0x000000FF00000000)) >>  8) | 
					
						
							|  |  |  |            | ((word & UINT64_C(0x0000FF0000000000)) >> 24) | 
					
						
							|  |  |  |            | ((word & UINT64_C(0x00FF000000000000)) >> 40) | 
					
						
							|  |  |  |            | ((word & UINT64_C(0xFF00000000000000)) >> 56)); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-08 16:30:33 +02:00
										 |  |  | // Population count: count the number of 1's in 'x'
 | 
					
						
							|  |  |  | // (number of bits set to 1), also known as the hamming weight.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
 | 
					
						
							|  |  |  | // can be used, to keep the implementation simple. For example, Visual Studio
 | 
					
						
							|  |  |  | // __popcnt() is not used this reason. The clang and GCC builtin function can
 | 
					
						
							|  |  |  | // use the x86 POPCNT instruction if the target architecture has SSE4a or
 | 
					
						
							|  |  |  | // newer.
 | 
					
						
							|  |  |  | static inline int | 
					
						
							|  |  |  | _Py_popcount32(uint32_t x) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #if (defined(__clang__) || defined(__GNUC__))
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if SIZEOF_INT >= 4
 | 
					
						
							|  |  |  |     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int)); | 
					
						
							|  |  |  |     return __builtin_popcount(x); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     // The C standard guarantees that unsigned long will always be big enough
 | 
					
						
							|  |  |  |     // to hold a uint32_t value without losing information.
 | 
					
						
							|  |  |  |     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long)); | 
					
						
							|  |  |  |     return __builtin_popcountl(x); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     // 32-bit SWAR (SIMD Within A Register) popcount
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Binary: 0 1 0 1 ...
 | 
					
						
							|  |  |  |     const uint32_t M1 = 0x55555555; | 
					
						
							|  |  |  |     // Binary: 00 11 00 11. ..
 | 
					
						
							|  |  |  |     const uint32_t M2 = 0x33333333; | 
					
						
							|  |  |  |     // Binary: 0000 1111 0000 1111 ...
 | 
					
						
							|  |  |  |     const uint32_t M4 = 0x0F0F0F0F; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Put count of each 2 bits into those 2 bits
 | 
					
						
							|  |  |  |     x = x - ((x >> 1) & M1); | 
					
						
							|  |  |  |     // Put count of each 4 bits into those 4 bits
 | 
					
						
							|  |  |  |     x = (x & M2) + ((x >> 2) & M2); | 
					
						
							|  |  |  |     // Put count of each 8 bits into those 8 bits
 | 
					
						
							|  |  |  |     x = (x + (x >> 4)) & M4; | 
					
						
							| 
									
										
										
										
											2022-01-23 09:59:34 +00:00
										 |  |  |     // Sum of the 4 byte counts.
 | 
					
						
							|  |  |  |     // Take care when considering changes to the next line. Portability and
 | 
					
						
							|  |  |  |     // correctness are delicate here, thanks to C's "integer promotions" (C99
 | 
					
						
							|  |  |  |     // §6.3.1.1p2). On machines where the `int` type has width greater than 32
 | 
					
						
							|  |  |  |     // bits, `x` will be promoted to an `int`, and following C's "usual
 | 
					
						
							|  |  |  |     // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
 | 
					
						
							|  |  |  |     // performed as a multiplication of two `unsigned int` operands. In this
 | 
					
						
							|  |  |  |     // case it's critical that we cast back to `uint32_t` in order to keep only
 | 
					
						
							|  |  |  |     // the least significant 32 bits. On machines where the `int` type has
 | 
					
						
							|  |  |  |     // width no greater than 32, the multiplication is of two 32-bit unsigned
 | 
					
						
							|  |  |  |     // integer types, and the (uint32_t) cast is a no-op. In both cases, we
 | 
					
						
							|  |  |  |     // avoid the risk of undefined behaviour due to overflow of a
 | 
					
						
							|  |  |  |     // multiplication of signed integer types.
 | 
					
						
							|  |  |  |     return (uint32_t)(x * 0x01010101U) >> 24; | 
					
						
							| 
									
										
										
										
											2020-06-08 16:30:33 +02:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-15 14:33:48 +02:00
										 |  |  | // Return the index of the most significant 1 bit in 'x'. This is the smallest
 | 
					
						
							|  |  |  | // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
 | 
					
						
							|  |  |  | static inline int | 
					
						
							|  |  |  | _Py_bit_length(unsigned long x) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #if (defined(__clang__) || defined(__GNUC__))
 | 
					
						
							|  |  |  |     if (x != 0) { | 
					
						
							|  |  |  |         // __builtin_clzl() is available since GCC 3.4.
 | 
					
						
							|  |  |  |         // Undefined behavior for x == 0.
 | 
					
						
							|  |  |  |         return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #elif defined(_MSC_VER)
 | 
					
						
							|  |  |  |     // _BitScanReverse() is documented to search 32 bits.
 | 
					
						
							|  |  |  |     Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); | 
					
						
							|  |  |  |     unsigned long msb; | 
					
						
							|  |  |  |     if (_BitScanReverse(&msb, x)) { | 
					
						
							|  |  |  |         return (int)msb + 1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     const int BIT_LENGTH_TABLE[32] = { | 
					
						
							|  |  |  |         0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | 
					
						
							|  |  |  |         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  |     int msb = 0; | 
					
						
							|  |  |  |     while (x >= 32) { | 
					
						
							|  |  |  |         msb += 6; | 
					
						
							|  |  |  |         x >>= 6; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     msb += BIT_LENGTH_TABLE[x]; | 
					
						
							|  |  |  |     return msb; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-17 17:47:20 +02:00
										 |  |  | #ifdef __cplusplus
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2020-06-15 14:33:48 +02:00
										 |  |  | #endif /* !Py_INTERNAL_BITUTILS_H */
 |