| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  | /* Finding the optimal width of unicode characters in a buffer */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if STRINGLIB_IS_UNICODE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Mask to quickly check whether a C 'long' contains a
 | 
					
						
							|  |  |  |    non-ASCII, UTF8-encoded char. */ | 
					
						
							|  |  |  | #if (SIZEOF_LONG == 8)
 | 
					
						
							| 
									
										
										
										
											2012-07-07 14:08:48 +02:00
										 |  |  | # define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
 | 
					
						
							| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  | #elif (SIZEOF_LONG == 4)
 | 
					
						
							| 
									
										
										
										
											2012-07-07 14:08:48 +02:00
										 |  |  | # define UCS1_ASCII_CHAR_MASK 0x80808080UL
 | 
					
						
							| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  | #else
 | 
					
						
							|  |  |  | # error C 'long' size should be either 4 or 8!
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if STRINGLIB_SIZEOF_CHAR == 1
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Py_LOCAL_INLINE(Py_UCS4) | 
					
						
							|  |  |  | STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     const unsigned char *p = (const unsigned char *) begin; | 
					
						
							| 
									
										
										
										
											2012-09-20 20:56:47 +02:00
										 |  |  |     const unsigned char *aligned_end = | 
					
						
							|  |  |  |             (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); | 
					
						
							| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     while (p < end) { | 
					
						
							| 
									
										
										
										
											2012-09-20 20:56:47 +02:00
										 |  |  |         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { | 
					
						
							| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  |             /* Help register allocation */ | 
					
						
							| 
									
										
										
										
											2013-08-13 20:18:52 +02:00
										 |  |  |             const unsigned char *_p = p; | 
					
						
							| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  |             while (_p < aligned_end) { | 
					
						
							|  |  |  |                 unsigned long value = *(unsigned long *) _p; | 
					
						
							|  |  |  |                 if (value & UCS1_ASCII_CHAR_MASK) | 
					
						
							|  |  |  |                     return 255; | 
					
						
							|  |  |  |                 _p += SIZEOF_LONG; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             p = _p; | 
					
						
							|  |  |  |             if (p == end) | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (*p++ & 0x80) | 
					
						
							|  |  |  |             return 255; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return 127; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #undef ASCII_CHAR_MASK
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else /* STRINGLIB_SIZEOF_CHAR == 1 */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define MASK_ASCII 0xFFFFFF80
 | 
					
						
							|  |  |  | #define MASK_UCS1 0xFFFFFF00
 | 
					
						
							|  |  |  | #define MASK_UCS2 0xFFFF0000
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define MAX_CHAR_ASCII 0x7f
 | 
					
						
							|  |  |  | #define MAX_CHAR_UCS1  0xff
 | 
					
						
							|  |  |  | #define MAX_CHAR_UCS2  0xffff
 | 
					
						
							|  |  |  | #define MAX_CHAR_UCS4  0x10ffff
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Py_LOCAL_INLINE(Py_UCS4) | 
					
						
							|  |  |  | STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #if STRINGLIB_SIZEOF_CHAR == 2
 | 
					
						
							|  |  |  |     const Py_UCS4 mask_limit = MASK_UCS1; | 
					
						
							|  |  |  |     const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; | 
					
						
							|  |  |  | #elif STRINGLIB_SIZEOF_CHAR == 4
 | 
					
						
							|  |  |  |     const Py_UCS4 mask_limit = MASK_UCS2; | 
					
						
							|  |  |  |     const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2013-08-13 20:18:52 +02:00
										 |  |  |     Py_UCS4 mask; | 
					
						
							| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  |     Py_ssize_t n = end - begin; | 
					
						
							|  |  |  |     const STRINGLIB_CHAR *p = begin; | 
					
						
							| 
									
										
										
										
											2012-09-20 20:56:47 +02:00
										 |  |  |     const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); | 
					
						
							| 
									
										
										
										
											2011-10-13 00:02:27 +02:00
										 |  |  |     Py_UCS4 max_char; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     max_char = MAX_CHAR_ASCII; | 
					
						
							|  |  |  |     mask = MASK_ASCII; | 
					
						
							|  |  |  |     while (p < unrolled_end) { | 
					
						
							|  |  |  |         STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; | 
					
						
							|  |  |  |         if (bits & mask) { | 
					
						
							|  |  |  |             if (mask == mask_limit) { | 
					
						
							|  |  |  |                 /* Limit reached */ | 
					
						
							|  |  |  |                 return max_char_limit; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             if (mask == MASK_ASCII) { | 
					
						
							|  |  |  |                 max_char = MAX_CHAR_UCS1; | 
					
						
							|  |  |  |                 mask = MASK_UCS1; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 /* mask can't be MASK_UCS2 because of mask_limit above */ | 
					
						
							|  |  |  |                 assert(mask == MASK_UCS1); | 
					
						
							|  |  |  |                 max_char = MAX_CHAR_UCS2; | 
					
						
							|  |  |  |                 mask = MASK_UCS2; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             /* We check the new mask on the same chars in the next iteration */ | 
					
						
							|  |  |  |             continue; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         p += 4; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     while (p < end) { | 
					
						
							|  |  |  |         if (p[0] & mask) { | 
					
						
							|  |  |  |             if (mask == mask_limit) { | 
					
						
							|  |  |  |                 /* Limit reached */ | 
					
						
							|  |  |  |                 return max_char_limit; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             if (mask == MASK_ASCII) { | 
					
						
							|  |  |  |                 max_char = MAX_CHAR_UCS1; | 
					
						
							|  |  |  |                 mask = MASK_UCS1; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 /* mask can't be MASK_UCS2 because of mask_limit above */ | 
					
						
							|  |  |  |                 assert(mask == MASK_UCS1); | 
					
						
							|  |  |  |                 max_char = MAX_CHAR_UCS2; | 
					
						
							|  |  |  |                 mask = MASK_UCS2; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             /* We check the new mask on the same chars in the next iteration */ | 
					
						
							|  |  |  |             continue; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         p++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return max_char; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #undef MASK_ASCII
 | 
					
						
							|  |  |  | #undef MASK_UCS1
 | 
					
						
							|  |  |  | #undef MASK_UCS2
 | 
					
						
							|  |  |  | #undef MAX_CHAR_ASCII
 | 
					
						
							|  |  |  | #undef MAX_CHAR_UCS1
 | 
					
						
							|  |  |  | #undef MAX_CHAR_UCS2
 | 
					
						
							|  |  |  | #undef MAX_CHAR_UCS4
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #endif /* STRINGLIB_SIZEOF_CHAR == 1 */
 | 
					
						
							|  |  |  | #endif /* STRINGLIB_IS_UNICODE */
 | 
					
						
							|  |  |  | 
 |