mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	bpo-30647: Check nl_langinfo(CODESET) in locale coercion (GH-2374)
- On some versions of FreeBSD, setting the "UTF-8" locale succeeds, but a subsequent "nl_langinfo(CODESET)" fails - adding a check for this in the coercion logic means that coercion will happen on systems where this check succeeds, and will be skipped otherwise - that way CPython should automatically adapt to changes in platform behaviour, rather than needing a new release to enable coercion at build time - this also allows UTF-8 to be re-enabled as a coercion target, restoring the locale coercion behaviour on Mac OS X
This commit is contained in:
		
							parent
							
								
									f7d090c165
								
							
						
					
					
						commit
						18974c35ad
					
				
					 2 changed files with 29 additions and 15 deletions
				
			
		| 
						 | 
					@ -1,6 +1,7 @@
 | 
				
			||||||
# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
 | 
					# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import unittest
 | 
					import unittest
 | 
				
			||||||
 | 
					import locale
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import sysconfig
 | 
					import sysconfig
 | 
				
			||||||
| 
						 | 
					@ -32,24 +33,34 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# In order to get the warning messages to match up as expected, the candidate
 | 
					# In order to get the warning messages to match up as expected, the candidate
 | 
				
			||||||
# order here must much the target locale order in Python/pylifecycle.c
 | 
					# order here must much the target locale order in Python/pylifecycle.c
 | 
				
			||||||
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
 | 
					_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
 | 
				
			||||||
 | 
					 | 
				
			||||||
# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
 | 
					 | 
				
			||||||
#                 problems encountered on *BSD systems with those test cases
 | 
					 | 
				
			||||||
# For additional details see:
 | 
					 | 
				
			||||||
#     nl_langinfo CODESET error: https://bugs.python.org/issue30647
 | 
					 | 
				
			||||||
#     locale handling differences: https://bugs.python.org/issue30672
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# There's no reliable cross-platform way of checking locale alias
 | 
					# There's no reliable cross-platform way of checking locale alias
 | 
				
			||||||
# lists, so the only way of knowing which of these locales will work
 | 
					# lists, so the only way of knowing which of these locales will work
 | 
				
			||||||
# is to try them with locale.setlocale(). We do that in a subprocess
 | 
					# is to try them with locale.setlocale(). We do that in a subprocess
 | 
				
			||||||
# to avoid altering the locale of the test runner.
 | 
					# to avoid altering the locale of the test runner.
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# If the relevant locale module attributes exist, and we're not on a platform
 | 
				
			||||||
 | 
					# where we expect it to always succeed, we also check that
 | 
				
			||||||
 | 
					# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter
 | 
				
			||||||
 | 
					# will skip locale coercion for that particular target locale
 | 
				
			||||||
 | 
					_check_nl_langinfo_CODESET = bool(
 | 
				
			||||||
 | 
					    sys.platform not in ("darwin", "linux") and
 | 
				
			||||||
 | 
					    hasattr(locale, "nl_langinfo") and
 | 
				
			||||||
 | 
					    hasattr(locale, "CODESET")
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _set_locale_in_subprocess(locale_name):
 | 
					def _set_locale_in_subprocess(locale_name):
 | 
				
			||||||
    cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
 | 
					    cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
 | 
				
			||||||
 | 
					    if _check_nl_langinfo_CODESET:
 | 
				
			||||||
 | 
					        # If there's no valid CODESET, we expect coercion to be skipped
 | 
				
			||||||
 | 
					        cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
 | 
				
			||||||
    cmd = cmd_fmt.format(locale_name)
 | 
					    cmd = cmd_fmt.format(locale_name)
 | 
				
			||||||
    result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
 | 
					    result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
 | 
				
			||||||
    return result.rc == 0
 | 
					    return result.rc == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
 | 
					_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
 | 
				
			||||||
_EncodingDetails = namedtuple("EncodingDetails", _fields)
 | 
					_EncodingDetails = namedtuple("EncodingDetails", _fields)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -399,17 +399,10 @@ typedef struct _CandidateLocale {
 | 
				
			||||||
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
 | 
					static _LocaleCoercionTarget _TARGET_LOCALES[] = {
 | 
				
			||||||
    {"C.UTF-8"},
 | 
					    {"C.UTF-8"},
 | 
				
			||||||
    {"C.utf8"},
 | 
					    {"C.utf8"},
 | 
				
			||||||
    /* {"UTF-8"}, */
 | 
					    {"UTF-8"},
 | 
				
			||||||
    {NULL}
 | 
					    {NULL}
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
 | 
					 | 
				
			||||||
 *                 problems encountered on *BSD systems with those test cases
 | 
					 | 
				
			||||||
 * For additional details see:
 | 
					 | 
				
			||||||
 *     nl_langinfo CODESET error: https://bugs.python.org/issue30647
 | 
					 | 
				
			||||||
 *     locale handling differences: https://bugs.python.org/issue30672
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static char *
 | 
					static char *
 | 
				
			||||||
get_default_standard_stream_error_handler(void)
 | 
					get_default_standard_stream_error_handler(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -490,6 +483,16 @@ _Py_CoerceLegacyLocale(void)
 | 
				
			||||||
                const char *new_locale = setlocale(LC_CTYPE,
 | 
					                const char *new_locale = setlocale(LC_CTYPE,
 | 
				
			||||||
                                                   target->locale_name);
 | 
					                                                   target->locale_name);
 | 
				
			||||||
                if (new_locale != NULL) {
 | 
					                if (new_locale != NULL) {
 | 
				
			||||||
 | 
					#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET)
 | 
				
			||||||
 | 
					                    /* Also ensure that nl_langinfo works in this locale */
 | 
				
			||||||
 | 
					                    char *codeset = nl_langinfo(CODESET);
 | 
				
			||||||
 | 
					                    if (!codeset || *codeset == '\0') {
 | 
				
			||||||
 | 
					                        /* CODESET is not set or empty, so skip coercion */
 | 
				
			||||||
 | 
					                        new_locale = NULL;
 | 
				
			||||||
 | 
					                        setlocale(LC_CTYPE, "");
 | 
				
			||||||
 | 
					                        continue;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
                    /* Successfully configured locale, so make it the default */
 | 
					                    /* Successfully configured locale, so make it the default */
 | 
				
			||||||
                    _coerce_default_locale_settings(target);
 | 
					                    _coerce_default_locale_settings(target);
 | 
				
			||||||
                    return;
 | 
					                    return;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue