diff --git a/Lib/http/client.py b/Lib/http/client.py index f69e7bac3b4..36e4e313105 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -146,6 +146,21 @@ _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} +def _encode(data, name='data'): + """Call data.encode("latin-1") but show a better error message.""" + try: + return data.encode("latin-1") + except UnicodeEncodeError as err: + raise UnicodeEncodeError( + err.encoding, + err.object, + err.start, + err.end, + "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') " + "if you want to send it encoded in UTF-8." % + (name.title(), data[err.start:err.end], name)) from None + + class HTTPMessage(email.message.Message): # XXX The only usage of this method is in # http.server.CGIHTTPRequestHandler. Maybe move the code there so @@ -1173,7 +1188,7 @@ def _send_request(self, method, url, body, headers): if isinstance(body, str): # RFC 2616 Section 3.7.1 says that text default has a # default charset of iso-8859-1. - body = body.encode('iso-8859-1') + body = _encode(body, 'body') self.endheaders(body) def getresponse(self): diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index d809414b637..295b9fb1468 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1042,7 +1042,7 @@ def test_all(self): # intentionally omitted for simplicity blacklist = {"HTTPMessage", "parse_headers"} for name in dir(client): - if name in blacklist: + if name.startswith("_") or name in blacklist: continue module_object = getattr(client, name) if getattr(module_object, "__module__", None) == "http.client": diff --git a/Misc/NEWS b/Misc/NEWS index 7a198d61341..efeff0a7cca 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -170,6 +170,9 @@ Core and Builtins Library ------- +- Issue #26045: Add UTF-8 suggestion to error message when posting a + non-Latin-1 string with http.client. + - Issue #26039: Added zipfile.ZipInfo.from_file() and zipinfo.ZipInfo.is_dir(). Patch by Thomas Kluyver.