mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Issue 2526, float.__format__ 'n' specifier does not support thousands grouping.
Implemented grouping, with tests. Cleaned up PyOS_ascii_formatd by breaking reformatting into smaller functions.
This commit is contained in:
		
							parent
							
								
									48f6276ddc
								
							
						
					
					
						commit
						0a95063d73
					
				
					 2 changed files with 240 additions and 129 deletions
				
			
		|  | @ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) | |||
| 	return val; | ||||
| } | ||||
| 
 | ||||
| /* Given a string that may have a decimal point in the current
 | ||||
|    locale, change it back to a dot.  Since the string cannot get | ||||
|    longer, no need for a maximum buffer size parameter. */ | ||||
| Py_LOCAL_INLINE(void) | ||||
| change_decimal_from_locale_to_dot(char* buffer) | ||||
| { | ||||
| 	struct lconv *locale_data = localeconv(); | ||||
| 	const char *decimal_point = locale_data->decimal_point; | ||||
| 
 | ||||
| 	if (decimal_point[0] != '.' || decimal_point[1] != 0) { | ||||
| 		size_t decimal_point_len = strlen(decimal_point); | ||||
| 
 | ||||
| 		if (*buffer == '+' || *buffer == '-') | ||||
| 			buffer++; | ||||
| 		while (isdigit(Py_CHARMASK(*buffer))) | ||||
| 			buffer++; | ||||
| 		if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { | ||||
| 			*buffer = '.'; | ||||
| 			buffer++; | ||||
| 			if (decimal_point_len > 1) { | ||||
| 				/* buffer needs to get smaller */ | ||||
| 				size_t rest_len = strlen(buffer + | ||||
| 						     (decimal_point_len - 1)); | ||||
| 				memmove(buffer, | ||||
| 					buffer + (decimal_point_len - 1), | ||||
| 					rest_len); | ||||
| 				buffer[rest_len] = 0; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* From the C99 standard, section 7.19.6:
 | ||||
| The exponent always contains at least two digits, and only as many more digits | ||||
|  | @ -194,6 +226,189 @@ as necessary to represent the exponent. | |||
| */ | ||||
| #define MIN_EXPONENT_DIGITS 2 | ||||
| 
 | ||||
| /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
 | ||||
|    in length. */ | ||||
| Py_LOCAL_INLINE(void) | ||||
| ensure_minumim_exponent_length(char* buffer, size_t buf_size) | ||||
| { | ||||
| 	char *p = strpbrk(buffer, "eE"); | ||||
| 	if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { | ||||
| 		char *start = p + 2; | ||||
| 		int exponent_digit_cnt = 0; | ||||
| 		int leading_zero_cnt = 0; | ||||
| 		int in_leading_zeros = 1; | ||||
| 		int significant_digit_cnt; | ||||
| 
 | ||||
| 		/* Skip over the exponent and the sign. */ | ||||
| 		p += 2; | ||||
| 
 | ||||
| 		/* Find the end of the exponent, keeping track of leading
 | ||||
| 		   zeros. */ | ||||
| 		while (*p && isdigit(Py_CHARMASK(*p))) { | ||||
| 			if (in_leading_zeros && *p == '0') | ||||
| 				++leading_zero_cnt; | ||||
| 			if (*p != '0') | ||||
| 				in_leading_zeros = 0; | ||||
| 			++p; | ||||
| 			++exponent_digit_cnt; | ||||
| 		} | ||||
| 
 | ||||
| 		significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; | ||||
| 		if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { | ||||
| 			/* If there are 2 exactly digits, we're done,
 | ||||
| 			   regardless of what they contain */ | ||||
| 		} | ||||
| 		else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { | ||||
| 			int extra_zeros_cnt; | ||||
| 
 | ||||
| 			/* There are more than 2 digits in the exponent.  See
 | ||||
| 			   if we can delete some of the leading zeros */ | ||||
| 			if (significant_digit_cnt < MIN_EXPONENT_DIGITS) | ||||
| 				significant_digit_cnt = MIN_EXPONENT_DIGITS; | ||||
| 			extra_zeros_cnt = exponent_digit_cnt - | ||||
| 				significant_digit_cnt; | ||||
| 
 | ||||
| 			/* Delete extra_zeros_cnt worth of characters from the
 | ||||
| 			   front of the exponent */ | ||||
| 			assert(extra_zeros_cnt >= 0); | ||||
| 
 | ||||
| 			/* Add one to significant_digit_cnt to copy the
 | ||||
| 			   trailing 0 byte, thus setting the length */ | ||||
| 			memmove(start, | ||||
| 				start + extra_zeros_cnt, | ||||
| 				significant_digit_cnt + 1); | ||||
| 		} | ||||
| 		else { | ||||
| 			/* If there are fewer than 2 digits, add zeros
 | ||||
| 			   until there are 2, if there's enough room */ | ||||
| 			int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; | ||||
| 			if (start + zeros + exponent_digit_cnt + 1 | ||||
| 			      < buffer + buf_size) { | ||||
| 				memmove(start + zeros, start, | ||||
| 					exponent_digit_cnt + 1); | ||||
| 				memset(start, '0', zeros); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* Ensure that buffer has a decimal point in it.  The decimal point
 | ||||
|    will not be in the current locale, it will always be '.' */ | ||||
| Py_LOCAL_INLINE(void) | ||||
| ensure_decimal_point(char* buffer, size_t buf_size) | ||||
| { | ||||
| 	int insert_count = 0; | ||||
| 	char* chars_to_insert; | ||||
| 
 | ||||
| 	/* search for the first non-digit character */ | ||||
| 	char *p = buffer; | ||||
| 	while (*p && isdigit(Py_CHARMASK(*p))) | ||||
| 		++p; | ||||
| 
 | ||||
| 	if (*p == '.') { | ||||
| 		if (isdigit(Py_CHARMASK(*(p+1)))) { | ||||
| 			/* Nothing to do, we already have a decimal
 | ||||
| 			   point and a digit after it */ | ||||
| 		} | ||||
| 		else { | ||||
| 			/* We have a decimal point, but no following
 | ||||
| 			   digit.  Insert a zero after the decimal. */ | ||||
| 			++p; | ||||
| 			chars_to_insert = "0"; | ||||
| 			insert_count = 1; | ||||
| 		} | ||||
| 	} | ||||
| 	else { | ||||
| 		chars_to_insert = ".0"; | ||||
| 		insert_count = 2; | ||||
| 	} | ||||
| 	if (insert_count) { | ||||
| 		size_t buf_len = strlen(buffer); | ||||
| 		if (buf_len + insert_count + 1 >= buf_size) { | ||||
| 			/* If there is not enough room in the buffer
 | ||||
| 			   for the additional text, just skip it.  It's | ||||
| 			   not worth generating an error over. */ | ||||
| 		} | ||||
| 		else { | ||||
| 			memmove(p + insert_count, p, | ||||
| 				buffer + strlen(buffer) - p + 1); | ||||
| 			memcpy(p, chars_to_insert, insert_count); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* Add the locale specific grouping characters to buffer.  Note
 | ||||
|    that any decimal point (if it's present) in buffer is already | ||||
|    locale-specific.  Return 0 on error, else 1. */ | ||||
| Py_LOCAL_INLINE(int) | ||||
| add_thousands_grouping(char* buffer, size_t buf_size) | ||||
| { | ||||
| 	struct lconv *locale_data = localeconv(); | ||||
| 	const char *grouping = locale_data->grouping; | ||||
| 	const char *thousands_sep = locale_data->thousands_sep; | ||||
| 	size_t thousands_sep_len = strlen(thousands_sep); | ||||
| 	const char *decimal_point = locale_data->decimal_point; | ||||
| 	char *pend = buffer + strlen(buffer); /* current end of buffer */ | ||||
| 	char *pmax = buffer + buf_size;       /* max of buffer */ | ||||
| 	char current_grouping; | ||||
| 
 | ||||
| 	/* Find the decimal point, if any.  We're only concerned
 | ||||
| 	   about the characters to the left of the decimal when | ||||
| 	   adding grouping. */ | ||||
| 	char *p = strstr(buffer, decimal_point); | ||||
| 	if (!p) { | ||||
| 		/* No decimal, use the entire string. */ | ||||
| 
 | ||||
| 		/* If any exponent, adjust p. */ | ||||
| 		p = strpbrk(buffer, "eE"); | ||||
| 		if (!p) | ||||
| 			/* No exponent and no decimal.  Use the entire
 | ||||
| 			   string. */ | ||||
| 			p = pend; | ||||
| 	} | ||||
| 	/* At this point, p points just past the right-most character we
 | ||||
| 	   want to format.  We need to add the grouping string for the | ||||
| 	   characters between buffer and p. */ | ||||
| 
 | ||||
| 	/* Starting at p and working right-to-left, keep track of
 | ||||
| 	   what grouping needs to be added and insert that. */ | ||||
| 	current_grouping = *grouping++; | ||||
| 
 | ||||
| 	/* If the first character is 0, perform no grouping at all. */ | ||||
| 	if (current_grouping == 0) | ||||
| 		return 1; | ||||
| 
 | ||||
| 	while (p - buffer > current_grouping) { | ||||
| 		/* Always leave buffer and pend valid at the end of this
 | ||||
| 		   loop, since we might leave with a return statement. */ | ||||
| 
 | ||||
| 		/* Is there room to insert thousands_sep_len chars?. */ | ||||
| 		if (pmax - pend <= thousands_sep_len) | ||||
| 			/* No room. */ | ||||
| 			return 0; | ||||
| 
 | ||||
| 		/* Move the rest of the string down. */ | ||||
| 		p -= current_grouping; | ||||
| 		memmove(p + thousands_sep_len, | ||||
| 			p, | ||||
| 			pend - p + 1); | ||||
| 		/* Adjust end pointer. */ | ||||
| 		pend += thousands_sep_len; | ||||
| 		/* Copy the thousands_sep chars into the buffer. */ | ||||
| 		memcpy(p, thousands_sep, thousands_sep_len); | ||||
| 
 | ||||
| 		/* Move to the next grouping character, unless we're
 | ||||
| 		   repeating (which is designated by a grouping of 0). */ | ||||
| 		if (*grouping != 0) { | ||||
| 			current_grouping = *grouping++; | ||||
| 			if (current_grouping == CHAR_MAX) | ||||
| 				/* We're done. */ | ||||
| 				return 1; | ||||
| 		} | ||||
| 	} | ||||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| /* see FORMATBUFLEN in unicodeobject.c */ | ||||
| #define FLOAT_FORMATBUFLEN 120 | ||||
| 
 | ||||
|  | @ -222,7 +437,6 @@ PyOS_ascii_formatd(char       *buffer, | |||
| 		   const char *format,  | ||||
| 		   double      d) | ||||
| { | ||||
| 	char *p; | ||||
| 	char format_char; | ||||
| 	size_t format_len = strlen(format); | ||||
| 
 | ||||
|  | @ -277,144 +491,31 @@ PyOS_ascii_formatd(char       *buffer, | |||
| 	/* Have PyOS_snprintf do the hard work */ | ||||
| 	PyOS_snprintf(buffer, buf_size, format, d); | ||||
| 
 | ||||
| 	/* Get the current local, and find the decimal point character (or
 | ||||
| 	   string?).  Convert that string back to a dot.  Do not do this if | ||||
| 	   using the 'n' (number) format code. */ | ||||
| 	if (format_char != 'n') { | ||||
| 		struct lconv *locale_data = localeconv(); | ||||
| 		const char *decimal_point = locale_data->decimal_point; | ||||
| 		size_t decimal_point_len = strlen(decimal_point); | ||||
| 		size_t rest_len; | ||||
| 	/* Do various fixups on the return string */ | ||||
| 
 | ||||
| 		assert(decimal_point_len != 0); | ||||
| 
 | ||||
| 		if (decimal_point[0] != '.' || decimal_point[1] != 0) { | ||||
| 			p = buffer; | ||||
| 
 | ||||
| 			if (*p == '+' || *p == '-') | ||||
| 				p++; | ||||
| 
 | ||||
| 			while (isdigit(Py_CHARMASK(*p))) | ||||
| 				p++; | ||||
| 
 | ||||
| 			if (strncmp(p, decimal_point, | ||||
| 				    decimal_point_len) == 0) { | ||||
| 				*p = '.'; | ||||
| 				p++; | ||||
| 				if (decimal_point_len > 1) { | ||||
| 					rest_len = strlen(p + | ||||
| 						      (decimal_point_len - 1)); | ||||
| 					memmove(p, p + (decimal_point_len - 1), | ||||
| 						rest_len); | ||||
| 					p[rest_len] = 0; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	/* Get the current locale, and find the decimal point string.
 | ||||
| 	   Convert that string back to a dot.  Do not do this if using the | ||||
| 	   'n' (number) format code, since we want to keep the localized | ||||
| 	   decimal point in that case. */ | ||||
| 	if (format_char != 'n') | ||||
| 		change_decimal_from_locale_to_dot(buffer); | ||||
| 
 | ||||
| 	/* If an exponent exists, ensure that the exponent is at least
 | ||||
| 	   MIN_EXPONENT_DIGITS digits, providing the buffer is large enough | ||||
| 	   for the extra zeros.  Also, if there are more than | ||||
| 	   MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get | ||||
| 	   back to MIN_EXPONENT_DIGITS */ | ||||
| 	p = strpbrk(buffer, "eE"); | ||||
| 	if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { | ||||
| 		char *start = p + 2; | ||||
| 		int exponent_digit_cnt = 0; | ||||
| 		int leading_zero_cnt = 0; | ||||
| 		int in_leading_zeros = 1; | ||||
| 		int significant_digit_cnt; | ||||
| 
 | ||||
| 		p += 2; | ||||
| 		while (*p && isdigit(Py_CHARMASK(*p))) { | ||||
| 			if (in_leading_zeros && *p == '0') | ||||
| 				++leading_zero_cnt; | ||||
| 			if (*p != '0') | ||||
| 				in_leading_zeros = 0; | ||||
| 			++p; | ||||
| 			++exponent_digit_cnt; | ||||
| 		} | ||||
| 
 | ||||
| 		significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; | ||||
| 		if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { | ||||
| 			/* If there are 2 exactly digits, we're done,
 | ||||
| 			   regardless of what they contain */ | ||||
| 		} | ||||
| 		else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { | ||||
| 			int extra_zeros_cnt; | ||||
| 
 | ||||
| 			/* There are more than 2 digits in the exponent.  See
 | ||||
| 			   if we can delete some of the leading zeros */ | ||||
| 			if (significant_digit_cnt < MIN_EXPONENT_DIGITS) | ||||
| 				significant_digit_cnt = MIN_EXPONENT_DIGITS; | ||||
| 			extra_zeros_cnt = exponent_digit_cnt - | ||||
| 				significant_digit_cnt; | ||||
| 
 | ||||
| 			/* Delete extra_zeros_cnt worth of characters from the
 | ||||
| 			   front of the exponent */ | ||||
| 			assert(extra_zeros_cnt >= 0); | ||||
| 
 | ||||
| 			/* Add one to significant_digit_cnt to copy the
 | ||||
| 			   trailing 0 byte, thus setting the length */ | ||||
| 			memmove(start, | ||||
| 				start + extra_zeros_cnt, | ||||
| 				significant_digit_cnt + 1); | ||||
| 		} | ||||
| 		else { | ||||
| 			/* If there are fewer than 2 digits, add zeros
 | ||||
| 			   until there are 2, if there's enough room */ | ||||
| 			int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; | ||||
| 			if (start + zeros + exponent_digit_cnt + 1 | ||||
| 			      < buffer + buf_size) { | ||||
| 				memmove(start + zeros, start, | ||||
| 					exponent_digit_cnt + 1); | ||||
| 				memset(start, '0', zeros); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	ensure_minumim_exponent_length(buffer, buf_size); | ||||
| 
 | ||||
| 	/* If format_char is 'Z', make sure we have at least one character
 | ||||
| 	   after the decimal point (and make sure we have a decimal point). */ | ||||
| 	if (format_char == 'Z') { | ||||
| 		int insert_count = 0; | ||||
| 		char* chars_to_insert; | ||||
| 	if (format_char == 'Z') | ||||
| 		ensure_decimal_point(buffer, buf_size); | ||||
| 
 | ||||
| 		/* search for the first non-digit character */ | ||||
| 		p = buffer; | ||||
| 		while (*p && isdigit(Py_CHARMASK(*p))) | ||||
| 			++p; | ||||
| 
 | ||||
| 		if (*p == '.') { | ||||
| 			if (isdigit(Py_CHARMASK(*(p+1)))) { | ||||
| 				/* Nothing to do, we already have a decimal
 | ||||
| 				   point and a digit after it */ | ||||
| 			} | ||||
| 			else { | ||||
| 				/* We have a decimal point, but no following
 | ||||
| 				   digit.  Insert a zero after the decimal. */ | ||||
| 				++p; | ||||
| 				chars_to_insert = "0"; | ||||
| 				insert_count = 1; | ||||
| 			} | ||||
| 		} | ||||
| 		else { | ||||
| 			chars_to_insert = ".0"; | ||||
| 			insert_count = 2; | ||||
| 		} | ||||
| 		if (insert_count) { | ||||
| 			size_t buf_len = strlen(buffer); | ||||
| 			if (buf_len + insert_count + 1 >= buf_size) { | ||||
| 				/* If there is not enough room in the buffer
 | ||||
| 				   for the additional text, just skip it.  It's | ||||
| 				   not worth generating an error over. */ | ||||
| 			} | ||||
| 			else { | ||||
| 				memmove(p + insert_count, p, | ||||
| 					buffer + strlen(buffer) - p + 1); | ||||
| 				memcpy(p, chars_to_insert, insert_count); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	/* If format_char is 'n', add the thousands grouping. */ | ||||
| 	if (format_char == 'n') | ||||
| 		if (!add_thousands_grouping(buffer, buf_size)) | ||||
| 			return NULL; | ||||
| 
 | ||||
| 	return buffer; | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Eric Smith
						Eric Smith