mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	 e662c398d8
			
		
	
	
		e662c398d8
		
			
		
	
	
	
	
		
			
			If the nl_langinfo(CODESET) function returns an empty string, Python now uses UTF-8 as the filesystem encoding. In May 2010 (commitb744ba1d14), I modified Python to log a warning and use UTF-8 as the filesystem encoding (instead of None) if nl_langinfo(CODESET) returns an empty string. In August 2020 (commit94908bbc15), I modified Python startup to fail with a fatal error and a specific error message if nl_langinfo(CODESET) returns an empty string. The intent was to prevent guessing the encoding and also investigate user configuration where this case happens. In 10 years (2010 to 2020), I saw zero user report about the error message related to nl_langinfo(CODESET) returning an empty string. Today, UTF-8 became the defacto standard and it's safe to make the assumption that the user expects UTF-8. For example, nl_langinfo(CODESET) can return an empty string on macOS if the LC_CTYPE locale is not supported, and UTF-8 is the default encoding on macOS. While this change is likely to not affect anyone in practice, it should make UTF-8 lover happy ;-) Rewrite also the documentation explaining how Python selects the filesystem encoding and error handler.
		
			
				
	
	
		
			59 lines
		
	
	
	
		
			1.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			59 lines
		
	
	
	
		
			1.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef Py_INTERNAL_FILEUTILS_H
 | |
| #define Py_INTERNAL_FILEUTILS_H
 | |
| #ifdef __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| #ifndef Py_BUILD_CORE
 | |
| #  error "Py_BUILD_CORE must be defined to include this header"
 | |
| #endif
 | |
| 
 | |
| #include <locale.h>   /* struct lconv */
 | |
| 
 | |
| PyAPI_DATA(int) _Py_HasFileSystemDefaultEncodeErrors;
 | |
| 
 | |
| PyAPI_FUNC(int) _Py_DecodeUTF8Ex(
 | |
|     const char *arg,
 | |
|     Py_ssize_t arglen,
 | |
|     wchar_t **wstr,
 | |
|     size_t *wlen,
 | |
|     const char **reason,
 | |
|     _Py_error_handler errors);
 | |
| 
 | |
| PyAPI_FUNC(int) _Py_EncodeUTF8Ex(
 | |
|     const wchar_t *text,
 | |
|     char **str,
 | |
|     size_t *error_pos,
 | |
|     const char **reason,
 | |
|     int raw_malloc,
 | |
|     _Py_error_handler errors);
 | |
| 
 | |
| PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
 | |
|     const char *arg,
 | |
|     Py_ssize_t arglen,
 | |
|     size_t *wlen);
 | |
| 
 | |
| PyAPI_FUNC(int) _Py_GetForceASCII(void);
 | |
| 
 | |
| /* Reset "force ASCII" mode (if it was initialized).
 | |
| 
 | |
|    This function should be called when Python changes the LC_CTYPE locale,
 | |
|    so the "force ASCII" mode can be detected again on the new locale
 | |
|    encoding. */
 | |
| PyAPI_FUNC(void) _Py_ResetForceASCII(void);
 | |
| 
 | |
| 
 | |
| PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
 | |
|     struct lconv *lc,
 | |
|     PyObject **decimal_point,
 | |
|     PyObject **thousands_sep);
 | |
| 
 | |
| PyAPI_FUNC(void) _Py_closerange(int first, int last);
 | |
| 
 | |
| PyAPI_FUNC(wchar_t*) _Py_GetLocaleEncoding(void);
 | |
| PyAPI_FUNC(PyObject*) _Py_GetLocaleEncodingObject(void);
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| }
 | |
| #endif
 | |
| #endif /* !Py_INTERNAL_FILEUTILS_H */
 |