mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	GH-84850: Remove urllib.request.URLopener and FancyURLopener (#125739)
				
					
				
			This commit is contained in:
		
							parent
							
								
									a99dd23c1f
								
							
						
					
					
						commit
						4d771977b1
					
				
					 7 changed files with 44 additions and 987 deletions
				
			
		|  | @ -145,10 +145,6 @@ although there is currently no date scheduled for their removal. | |||
|   * ``splitvalue()`` | ||||
|   * ``to_bytes()`` | ||||
| 
 | ||||
| * :mod:`urllib.request`: :class:`~urllib.request.URLopener` and | ||||
|   :class:`~urllib.request.FancyURLopener` style of invoking requests is | ||||
|   deprecated. Use newer :func:`~urllib.request.urlopen` functions and methods. | ||||
| 
 | ||||
| * :mod:`wsgiref`: ``SimpleHandler.stdout.write()`` should not do partial | ||||
|   writes. | ||||
| 
 | ||||
|  |  | |||
|  | @ -67,8 +67,7 @@ The :mod:`urllib.request` module defines the following functions: | |||
|    the response headers as it is specified in the documentation for | ||||
|    :class:`~http.client.HTTPResponse`. | ||||
| 
 | ||||
|    For FTP, file, and data URLs and requests explicitly handled by legacy | ||||
|    :class:`URLopener` and :class:`FancyURLopener` classes, this function | ||||
|    For FTP, file, and data URLs, this function | ||||
|    returns a :class:`urllib.response.addinfourl` object. | ||||
| 
 | ||||
|    Raises :exc:`~urllib.error.URLError` on protocol errors. | ||||
|  | @ -1339,7 +1338,7 @@ environment settings:: | |||
| 
 | ||||
|    >>> import urllib.request | ||||
|    >>> proxies = {'http': 'http://proxy.example.com:8080/'} | ||||
|    >>> opener = urllib.request.FancyURLopener(proxies) | ||||
|    >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler(proxies)) | ||||
|    >>> with opener.open("http://www.python.org") as f: | ||||
|    ...     f.read().decode('utf-8') | ||||
|    ... | ||||
|  | @ -1347,7 +1346,7 @@ environment settings:: | |||
| The following example uses no proxies at all, overriding environment settings:: | ||||
| 
 | ||||
|    >>> import urllib.request | ||||
|    >>> opener = urllib.request.FancyURLopener({}) | ||||
|    >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}})) | ||||
|    >>> with opener.open("http://www.python.org/") as f: | ||||
|    ...     f.read().decode('utf-8') | ||||
|    ... | ||||
|  | @ -1412,121 +1411,6 @@ some point in the future. | |||
|    Cleans up temporary files that may have been left behind by previous | ||||
|    calls to :func:`urlretrieve`. | ||||
| 
 | ||||
| .. class:: URLopener(proxies=None, **x509) | ||||
| 
 | ||||
|    .. deprecated:: 3.3 | ||||
| 
 | ||||
|    Base class for opening and reading URLs.  Unless you need to support opening | ||||
|    objects using schemes other than :file:`http:`, :file:`ftp:`, or :file:`file:`, | ||||
|    you probably want to use :class:`FancyURLopener`. | ||||
| 
 | ||||
|    By default, the :class:`URLopener` class sends a :mailheader:`User-Agent` header | ||||
|    of ``urllib/VVV``, where *VVV* is the :mod:`urllib` version number. | ||||
|    Applications can define their own :mailheader:`User-Agent` header by subclassing | ||||
|    :class:`URLopener` or :class:`FancyURLopener` and setting the class attribute | ||||
|    :attr:`version` to an appropriate string value in the subclass definition. | ||||
| 
 | ||||
|    The optional *proxies* parameter should be a dictionary mapping scheme names to | ||||
|    proxy URLs, where an empty dictionary turns proxies off completely.  Its default | ||||
|    value is ``None``, in which case environmental proxy settings will be used if | ||||
|    present, as discussed in the definition of :func:`urlopen`, above. | ||||
| 
 | ||||
|    Additional keyword parameters, collected in *x509*, may be used for | ||||
|    authentication of the client when using the :file:`https:` scheme.  The keywords | ||||
|    *key_file* and *cert_file* are supported to provide an  SSL key and certificate; | ||||
|    both are needed to support client authentication. | ||||
| 
 | ||||
|    :class:`URLopener` objects will raise an :exc:`OSError` exception if the server | ||||
|    returns an error code. | ||||
| 
 | ||||
|    .. method:: open(fullurl, data=None) | ||||
| 
 | ||||
|       Open *fullurl* using the appropriate protocol.  This method sets up cache and | ||||
|       proxy information, then calls the appropriate open method with its input | ||||
|       arguments.  If the scheme is not recognized, :meth:`open_unknown` is called. | ||||
|       The *data* argument has the same meaning as the *data* argument of | ||||
|       :func:`urlopen`. | ||||
| 
 | ||||
|       This method always quotes *fullurl* using :func:`~urllib.parse.quote`. | ||||
| 
 | ||||
|    .. method:: open_unknown(fullurl, data=None) | ||||
| 
 | ||||
|       Overridable interface to open unknown URL types. | ||||
| 
 | ||||
| 
 | ||||
|    .. method:: retrieve(url, filename=None, reporthook=None, data=None) | ||||
| 
 | ||||
|       Retrieves the contents of *url* and places it in *filename*.  The return value | ||||
|       is a tuple consisting of a local filename and either an | ||||
|       :class:`email.message.Message` object containing the response headers (for remote | ||||
|       URLs) or ``None`` (for local URLs).  The caller must then open and read the | ||||
|       contents of *filename*.  If *filename* is not given and the URL refers to a | ||||
|       local file, the input filename is returned.  If the URL is non-local and | ||||
|       *filename* is not given, the filename is the output of :func:`tempfile.mktemp` | ||||
|       with a suffix that matches the suffix of the last path component of the input | ||||
|       URL.  If *reporthook* is given, it must be a function accepting three numeric | ||||
|       parameters: A chunk number, the maximum size chunks are read in and the total size of the download | ||||
|       (-1 if unknown).  It will be called once at the start and after each chunk of data is read from the | ||||
|       network.  *reporthook* is ignored for local URLs. | ||||
| 
 | ||||
|       If the *url* uses the :file:`http:` scheme identifier, the optional *data* | ||||
|       argument may be given to specify a ``POST`` request (normally the request type | ||||
|       is ``GET``).  The *data* argument must in standard | ||||
|       :mimetype:`application/x-www-form-urlencoded` format; see the | ||||
|       :func:`urllib.parse.urlencode` function. | ||||
| 
 | ||||
| 
 | ||||
|    .. attribute:: version | ||||
| 
 | ||||
|       Variable that specifies the user agent of the opener object.  To get | ||||
|       :mod:`urllib` to tell servers that it is a particular user agent, set this in a | ||||
|       subclass as a class variable or in the constructor before calling the base | ||||
|       constructor. | ||||
| 
 | ||||
| 
 | ||||
| .. class:: FancyURLopener(...) | ||||
| 
 | ||||
|    .. deprecated:: 3.3 | ||||
| 
 | ||||
|    :class:`FancyURLopener` subclasses :class:`URLopener` providing default handling | ||||
|    for the following HTTP response codes: 301, 302, 303, 307 and 401.  For the 30x | ||||
|    response codes listed above, the :mailheader:`Location` header is used to fetch | ||||
|    the actual URL.  For 401 response codes (authentication required), basic HTTP | ||||
|    authentication is performed.  For the 30x response codes, recursion is bounded | ||||
|    by the value of the *maxtries* attribute, which defaults to 10. | ||||
| 
 | ||||
|    For all other response codes, the method :meth:`~BaseHandler.http_error_default` is called | ||||
|    which you can override in subclasses to handle the error appropriately. | ||||
| 
 | ||||
|    .. note:: | ||||
| 
 | ||||
|       According to the letter of :rfc:`2616`, 301 and 302 responses to POST requests | ||||
|       must not be automatically redirected without confirmation by the user.  In | ||||
|       reality, browsers do allow automatic redirection of these responses, changing | ||||
|       the POST to a GET, and :mod:`urllib` reproduces this behaviour. | ||||
| 
 | ||||
|    The parameters to the constructor are the same as those for :class:`URLopener`. | ||||
| 
 | ||||
|    .. note:: | ||||
| 
 | ||||
|       When performing basic authentication, a :class:`FancyURLopener` instance calls | ||||
|       its :meth:`prompt_user_passwd` method.  The default implementation asks the | ||||
|       users for the required information on the controlling terminal.  A subclass may | ||||
|       override this method to support more appropriate behavior if needed. | ||||
| 
 | ||||
|    The :class:`FancyURLopener` class offers one additional method that should be | ||||
|    overloaded to provide the appropriate behavior: | ||||
| 
 | ||||
|    .. method:: prompt_user_passwd(host, realm) | ||||
| 
 | ||||
|       Return information needed to authenticate the user at the given host in the | ||||
|       specified security realm.  The return value should be a tuple, ``(user, | ||||
|       password)``, which can be used for basic authentication. | ||||
| 
 | ||||
|       The implementation prompts for this information on the terminal; an application | ||||
|       should override this method to use an appropriate interaction model in the local | ||||
|       environment. | ||||
| 
 | ||||
| 
 | ||||
| :mod:`urllib.request` Restrictions | ||||
| ---------------------------------- | ||||
|  | @ -1578,8 +1462,7 @@ some point in the future. | |||
|   you try to fetch a file whose read permissions make it inaccessible; the FTP | ||||
|   code will try to read it, fail with a 550 error, and then perform a directory | ||||
|   listing for the unreadable file. If fine-grained control is needed, consider | ||||
|   using the :mod:`ftplib` module, subclassing :class:`FancyURLopener`, or changing | ||||
|   *_urlopener* to meet your needs. | ||||
|   using the :mod:`ftplib` module. | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -769,6 +769,10 @@ urllib | |||
| * Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`. | ||||
|   It had previously raised a :exc:`DeprecationWarning` since Python 3.11. | ||||
|   (Contributed by Nikita Sobolev in :gh:`118827`.) | ||||
| * Remove deprecated :class:`!URLopener` and :class:`!FancyURLopener` classes | ||||
|   from :mod:`urllib.request`. They had previously raised a | ||||
|   :exc:`DeprecationWarning` since Python 3.3. | ||||
|   (Contributed by Barney Gale in :gh:`84850`.) | ||||
| 
 | ||||
| Others | ||||
| ------ | ||||
|  |  | |||
|  | @ -7,11 +7,9 @@ | |||
| import email.message | ||||
| import io | ||||
| import unittest | ||||
| from unittest.mock import patch | ||||
| from test import support | ||||
| from test.support import os_helper | ||||
| from test.support import socket_helper | ||||
| from test.support import warnings_helper | ||||
| import os | ||||
| try: | ||||
|     import ssl | ||||
|  | @ -20,7 +18,6 @@ | |||
| import sys | ||||
| import tempfile | ||||
| 
 | ||||
| from base64 import b64encode | ||||
| import collections | ||||
| 
 | ||||
| 
 | ||||
|  | @ -35,32 +32,6 @@ def hexescape(char): | |||
|         hex_repr = "0%s" % hex_repr | ||||
|     return "%" + hex_repr | ||||
| 
 | ||||
| # Shortcut for testing FancyURLopener | ||||
| _urlopener = None | ||||
| 
 | ||||
| 
 | ||||
| def urlopen(url, data=None, proxies=None): | ||||
|     """urlopen(url [, data]) -> open file-like object""" | ||||
|     global _urlopener | ||||
|     if proxies is not None: | ||||
|         opener = urllib.request.FancyURLopener(proxies=proxies) | ||||
|     elif not _urlopener: | ||||
|         opener = FancyURLopener() | ||||
|         _urlopener = opener | ||||
|     else: | ||||
|         opener = _urlopener | ||||
|     if data is None: | ||||
|         return opener.open(url) | ||||
|     else: | ||||
|         return opener.open(url, data) | ||||
| 
 | ||||
| 
 | ||||
| def FancyURLopener(): | ||||
|     with warnings_helper.check_warnings( | ||||
|             ('FancyURLopener style of invoking requests is deprecated.', | ||||
|             DeprecationWarning)): | ||||
|         return urllib.request.FancyURLopener() | ||||
| 
 | ||||
| 
 | ||||
| def fakehttp(fakedata, mock_close=False): | ||||
|     class FakeSocket(io.BytesIO): | ||||
|  | @ -119,26 +90,6 @@ def unfakehttp(self): | |||
|         http.client.HTTPConnection = self._connection_class | ||||
| 
 | ||||
| 
 | ||||
| class FakeFTPMixin(object): | ||||
|     def fakeftp(self): | ||||
|         class FakeFtpWrapper(object): | ||||
|             def __init__(self,  user, passwd, host, port, dirs, timeout=None, | ||||
|                      persistent=True): | ||||
|                 pass | ||||
| 
 | ||||
|             def retrfile(self, file, type): | ||||
|                 return io.BytesIO(), 0 | ||||
| 
 | ||||
|             def close(self): | ||||
|                 pass | ||||
| 
 | ||||
|         self._ftpwrapper_class = urllib.request.ftpwrapper | ||||
|         urllib.request.ftpwrapper = FakeFtpWrapper | ||||
| 
 | ||||
|     def unfakeftp(self): | ||||
|         urllib.request.ftpwrapper = self._ftpwrapper_class | ||||
| 
 | ||||
| 
 | ||||
| class urlopen_FileTests(unittest.TestCase): | ||||
|     """Test urlopen() opening a temporary file. | ||||
| 
 | ||||
|  | @ -158,7 +109,7 @@ def setUp(self): | |||
|             f.close() | ||||
|         self.pathname = os_helper.TESTFN | ||||
|         self.quoted_pathname = urllib.parse.quote(self.pathname) | ||||
|         self.returned_obj = urlopen("file:%s" % self.quoted_pathname) | ||||
|         self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname) | ||||
| 
 | ||||
|     def tearDown(self): | ||||
|         """Shut down the open object""" | ||||
|  | @ -205,7 +156,7 @@ def test_headers(self): | |||
|         self.assertIsInstance(self.returned_obj.headers, email.message.Message) | ||||
| 
 | ||||
|     def test_url(self): | ||||
|         self.assertEqual(self.returned_obj.url, self.quoted_pathname) | ||||
|         self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname) | ||||
| 
 | ||||
|     def test_status(self): | ||||
|         self.assertIsNone(self.returned_obj.status) | ||||
|  | @ -214,7 +165,7 @@ def test_info(self): | |||
|         self.assertIsInstance(self.returned_obj.info(), email.message.Message) | ||||
| 
 | ||||
|     def test_geturl(self): | ||||
|         self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname) | ||||
|         self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname) | ||||
| 
 | ||||
|     def test_getcode(self): | ||||
|         self.assertIsNone(self.returned_obj.getcode()) | ||||
|  | @ -339,13 +290,13 @@ def test_getproxies_environment_prefer_lowercase(self): | |||
|         self.assertEqual('http://somewhere:3128', proxies['http']) | ||||
| 
 | ||||
| 
 | ||||
| class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): | ||||
| class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): | ||||
|     """Test urlopen() opening a fake http connection.""" | ||||
| 
 | ||||
|     def check_read(self, ver): | ||||
|         self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") | ||||
|         try: | ||||
|             fp = urlopen("http://python.org/") | ||||
|             fp = urllib.request.urlopen("http://python.org/") | ||||
|             self.assertEqual(fp.readline(), b"Hello!") | ||||
|             self.assertEqual(fp.readline(), b"") | ||||
|             self.assertEqual(fp.geturl(), 'http://python.org/') | ||||
|  | @ -366,8 +317,8 @@ def test_url_fragment(self): | |||
|     def test_willclose(self): | ||||
|         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") | ||||
|         try: | ||||
|             resp = urlopen("http://www.python.org") | ||||
|             self.assertTrue(resp.fp.will_close) | ||||
|             resp = urllib.request.urlopen("http://www.python.org") | ||||
|             self.assertTrue(resp.will_close) | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|  | @ -392,9 +343,6 @@ def test_url_path_with_control_char_rejected(self): | |||
|                 with self.assertRaisesRegex( | ||||
|                     InvalidURL, f"contain control.*{escaped_char_repr}"): | ||||
|                     urllib.request.urlopen(f"https:{schemeless_url}") | ||||
|                 # This code path quotes the URL so there is no injection. | ||||
|                 resp = urlopen(f"http:{schemeless_url}") | ||||
|                 self.assertNotIn(char, resp.geturl()) | ||||
|             finally: | ||||
|                 self.unfakehttp() | ||||
| 
 | ||||
|  | @ -416,11 +364,6 @@ def test_url_path_with_newline_header_injection_rejected(self): | |||
|                 urllib.request.urlopen(f"http:{schemeless_url}") | ||||
|             with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): | ||||
|                 urllib.request.urlopen(f"https:{schemeless_url}") | ||||
|             # This code path quotes the URL so there is no injection. | ||||
|             resp = urlopen(f"http:{schemeless_url}") | ||||
|             self.assertNotIn(' ', resp.geturl()) | ||||
|             self.assertNotIn('\r', resp.geturl()) | ||||
|             self.assertNotIn('\n', resp.geturl()) | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|  | @ -435,9 +378,9 @@ def test_url_host_with_control_char_rejected(self): | |||
|                 InvalidURL = http.client.InvalidURL | ||||
|                 with self.assertRaisesRegex( | ||||
|                     InvalidURL, f"contain control.*{escaped_char_repr}"): | ||||
|                     urlopen(f"http:{schemeless_url}") | ||||
|                     urllib.request.urlopen(f"http:{schemeless_url}") | ||||
|                 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): | ||||
|                     urlopen(f"https:{schemeless_url}") | ||||
|                     urllib.request.urlopen(f"https:{schemeless_url}") | ||||
|             finally: | ||||
|                 self.unfakehttp() | ||||
| 
 | ||||
|  | @ -450,9 +393,9 @@ def test_url_host_with_newline_header_injection_rejected(self): | |||
|             InvalidURL = http.client.InvalidURL | ||||
|             with self.assertRaisesRegex( | ||||
|                 InvalidURL, r"contain control.*\\r"): | ||||
|                 urlopen(f"http:{schemeless_url}") | ||||
|                 urllib.request.urlopen(f"http:{schemeless_url}") | ||||
|             with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): | ||||
|                 urlopen(f"https:{schemeless_url}") | ||||
|                 urllib.request.urlopen(f"https:{schemeless_url}") | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|  | @ -476,7 +419,7 @@ def test_read_bogus(self): | |||
| Content-Type: text/html; charset=iso-8859-1 | ||||
| ''', mock_close=True) | ||||
|         try: | ||||
|             self.assertRaises(OSError, urlopen, "http://python.org/") | ||||
|             self.assertRaises(OSError, urllib.request.urlopen, "http://python.org/") | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|  | @ -492,20 +435,20 @@ def test_invalid_redirect(self): | |||
|         try: | ||||
|             msg = "Redirection to url 'file:" | ||||
|             with self.assertRaisesRegex(urllib.error.HTTPError, msg): | ||||
|                 urlopen("http://python.org/") | ||||
|                 urllib.request.urlopen("http://python.org/") | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|     def test_redirect_limit_independent(self): | ||||
|         # Ticket #12923: make sure independent requests each use their | ||||
|         # own retry limit. | ||||
|         for i in range(FancyURLopener().maxtries): | ||||
|         for i in range(urllib.request.HTTPRedirectHandler.max_redirections): | ||||
|             self.fakehttp(b'''HTTP/1.1 302 Found | ||||
| Location: file://guidocomputer.athome.com:/python/license | ||||
| Connection: close | ||||
| ''', mock_close=True) | ||||
|             try: | ||||
|                 self.assertRaises(urllib.error.HTTPError, urlopen, | ||||
|                 self.assertRaises(urllib.error.HTTPError, urllib.request.urlopen, | ||||
|                     "http://something") | ||||
|             finally: | ||||
|                 self.unfakehttp() | ||||
|  | @ -515,14 +458,14 @@ def test_empty_socket(self): | |||
|         # data. (#1680230) | ||||
|         self.fakehttp(b'') | ||||
|         try: | ||||
|             self.assertRaises(OSError, urlopen, "http://something") | ||||
|             self.assertRaises(OSError, urllib.request.urlopen, "http://something") | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|     def test_missing_localfile(self): | ||||
|         # Test for #10836 | ||||
|         with self.assertRaises(urllib.error.URLError) as e: | ||||
|             urlopen('file://localhost/a/file/which/doesnot/exists.py') | ||||
|             urllib.request.urlopen('file://localhost/a/file/which/doesnot/exists.py') | ||||
|         self.assertTrue(e.exception.filename) | ||||
|         self.assertTrue(e.exception.reason) | ||||
| 
 | ||||
|  | @ -531,71 +474,28 @@ def test_file_notexists(self): | |||
|         tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') | ||||
|         try: | ||||
|             self.assertTrue(os.path.exists(tmp_file)) | ||||
|             with urlopen(tmp_fileurl) as fobj: | ||||
|             with urllib.request.urlopen(tmp_fileurl) as fobj: | ||||
|                 self.assertTrue(fobj) | ||||
|         finally: | ||||
|             os.close(fd) | ||||
|             os.unlink(tmp_file) | ||||
|         self.assertFalse(os.path.exists(tmp_file)) | ||||
|         with self.assertRaises(urllib.error.URLError): | ||||
|             urlopen(tmp_fileurl) | ||||
|             urllib.request.urlopen(tmp_fileurl) | ||||
| 
 | ||||
|     def test_ftp_nohost(self): | ||||
|         test_ftp_url = 'ftp:///path' | ||||
|         with self.assertRaises(urllib.error.URLError) as e: | ||||
|             urlopen(test_ftp_url) | ||||
|             urllib.request.urlopen(test_ftp_url) | ||||
|         self.assertFalse(e.exception.filename) | ||||
|         self.assertTrue(e.exception.reason) | ||||
| 
 | ||||
|     def test_ftp_nonexisting(self): | ||||
|         with self.assertRaises(urllib.error.URLError) as e: | ||||
|             urlopen('ftp://localhost/a/file/which/doesnot/exists.py') | ||||
|             urllib.request.urlopen('ftp://localhost/a/file/which/doesnot/exists.py') | ||||
|         self.assertFalse(e.exception.filename) | ||||
|         self.assertTrue(e.exception.reason) | ||||
| 
 | ||||
|     @patch.object(urllib.request, 'MAXFTPCACHE', 0) | ||||
|     def test_ftp_cache_pruning(self): | ||||
|         self.fakeftp() | ||||
|         try: | ||||
|             urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, []) | ||||
|             urlopen('ftp://localhost') | ||||
|         finally: | ||||
|             self.unfakeftp() | ||||
| 
 | ||||
|     def test_userpass_inurl(self): | ||||
|         self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") | ||||
|         try: | ||||
|             fp = urlopen("http://user:pass@python.org/") | ||||
|             self.assertEqual(fp.readline(), b"Hello!") | ||||
|             self.assertEqual(fp.readline(), b"") | ||||
|             self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') | ||||
|             self.assertEqual(fp.getcode(), 200) | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|     def test_userpass_inurl_w_spaces(self): | ||||
|         self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") | ||||
|         try: | ||||
|             userpass = "a b:c d" | ||||
|             url = "http://{}@python.org/".format(userpass) | ||||
|             fakehttp_wrapper = http.client.HTTPConnection | ||||
|             authorization = ("Authorization: Basic %s\r\n" % | ||||
|                              b64encode(userpass.encode("ASCII")).decode("ASCII")) | ||||
|             fp = urlopen(url) | ||||
|             # The authorization header must be in place | ||||
|             self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) | ||||
|             self.assertEqual(fp.readline(), b"Hello!") | ||||
|             self.assertEqual(fp.readline(), b"") | ||||
|             # the spaces are quoted in URL so no match | ||||
|             self.assertNotEqual(fp.geturl(), url) | ||||
|             self.assertEqual(fp.getcode(), 200) | ||||
|         finally: | ||||
|             self.unfakehttp() | ||||
| 
 | ||||
|     def test_URLopener_deprecation(self): | ||||
|         with warnings_helper.check_warnings(('',DeprecationWarning)): | ||||
|             urllib.request.URLopener() | ||||
| 
 | ||||
| 
 | ||||
| class urlopen_DataTests(unittest.TestCase): | ||||
|     """Test urlopen() opening a data URL.""" | ||||
|  | @ -1620,56 +1520,6 @@ def test_thishost(self): | |||
|         self.assertIsInstance(urllib.request.thishost(), tuple) | ||||
| 
 | ||||
| 
 | ||||
| class URLopener_Tests(FakeHTTPMixin, unittest.TestCase): | ||||
|     """Testcase to test the open method of URLopener class.""" | ||||
| 
 | ||||
|     def test_quoted_open(self): | ||||
|         class DummyURLopener(urllib.request.URLopener): | ||||
|             def open_spam(self, url): | ||||
|                 return url | ||||
|         with warnings_helper.check_warnings( | ||||
|                 ('DummyURLopener style of invoking requests is deprecated.', | ||||
|                 DeprecationWarning)): | ||||
|             self.assertEqual(DummyURLopener().open( | ||||
|                 'spam://example/ /'),'//example/%20/') | ||||
| 
 | ||||
|             # test the safe characters are not quoted by urlopen | ||||
|             self.assertEqual(DummyURLopener().open( | ||||
|                 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), | ||||
|                 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") | ||||
| 
 | ||||
|     @warnings_helper.ignore_warnings(category=DeprecationWarning) | ||||
|     def test_urlopener_retrieve_file(self): | ||||
|         with os_helper.temp_dir() as tmpdir: | ||||
|             fd, tmpfile = tempfile.mkstemp(dir=tmpdir) | ||||
|             os.close(fd) | ||||
|             fileurl = "file:" + urllib.request.pathname2url(tmpfile) | ||||
|             filename, _ = urllib.request.URLopener().retrieve(fileurl) | ||||
|             # Some buildbots have TEMP folder that uses a lowercase drive letter. | ||||
|             self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile)) | ||||
| 
 | ||||
|     @warnings_helper.ignore_warnings(category=DeprecationWarning) | ||||
|     def test_urlopener_retrieve_remote(self): | ||||
|         url = "http://www.python.org/file.txt" | ||||
|         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") | ||||
|         self.addCleanup(self.unfakehttp) | ||||
|         filename, _ = urllib.request.URLopener().retrieve(url) | ||||
|         self.assertEqual(os.path.splitext(filename)[1], ".txt") | ||||
| 
 | ||||
|     @warnings_helper.ignore_warnings(category=DeprecationWarning) | ||||
|     def test_local_file_open(self): | ||||
|         # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme | ||||
|         class DummyURLopener(urllib.request.URLopener): | ||||
|             def open_local_file(self, url): | ||||
|                 return url | ||||
|         for url in ('local_file://example', 'local-file://example'): | ||||
|             self.assertRaises(OSError, urllib.request.urlopen, url) | ||||
|             self.assertRaises(OSError, urllib.request.URLopener().open, url) | ||||
|             self.assertRaises(OSError, urllib.request.URLopener().retrieve, url) | ||||
|             self.assertRaises(OSError, DummyURLopener().open, url) | ||||
|             self.assertRaises(OSError, DummyURLopener().retrieve, url) | ||||
| 
 | ||||
| 
 | ||||
| class RequestTests(unittest.TestCase): | ||||
|     """Unit tests for urllib.request.Request.""" | ||||
| 
 | ||||
|  |  | |||
|  | @ -5,6 +5,7 @@ | |||
| 
 | ||||
| import contextlib | ||||
| import socket | ||||
| import urllib.error | ||||
| import urllib.parse | ||||
| import urllib.request | ||||
| import os | ||||
|  | @ -101,13 +102,10 @@ def test_getcode(self): | |||
|         # test getcode() with the fancy opener to get 404 error codes | ||||
|         URL = self.url + "XXXinvalidXXX" | ||||
|         with socket_helper.transient_internet(URL): | ||||
|             with self.assertWarns(DeprecationWarning): | ||||
|                 open_url = urllib.request.FancyURLopener().open(URL) | ||||
|             try: | ||||
|                 code = open_url.getcode() | ||||
|             finally: | ||||
|                 open_url.close() | ||||
|             self.assertEqual(code, 404) | ||||
|             with self.assertRaises(urllib.error.URLError) as e: | ||||
|                 with urllib.request.urlopen(URL): | ||||
|                     pass | ||||
|             self.assertEqual(e.exception.code, 404) | ||||
| 
 | ||||
|     @support.requires_resource('walltime') | ||||
|     def test_bad_address(self): | ||||
|  |  | |||
|  | @ -83,6 +83,7 @@ | |||
| 
 | ||||
| import base64 | ||||
| import bisect | ||||
| import contextlib | ||||
| import email | ||||
| import hashlib | ||||
| import http.client | ||||
|  | @ -94,15 +95,13 @@ | |||
| import sys | ||||
| import time | ||||
| import tempfile | ||||
| import contextlib | ||||
| import warnings | ||||
| 
 | ||||
| 
 | ||||
| from urllib.error import URLError, HTTPError, ContentTooShortError | ||||
| from urllib.parse import ( | ||||
|     urlparse, urlsplit, urljoin, unwrap, quote, unquote, | ||||
|     _splittype, _splithost, _splitport, _splituser, _splitpasswd, | ||||
|     _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes, | ||||
|     _splitattr, _splitvalue, _splittag, | ||||
|     unquote_to_bytes, urlunparse) | ||||
| from urllib.response import addinfourl, addclosehook | ||||
| 
 | ||||
|  | @ -128,7 +127,7 @@ | |||
|     'urlopen', 'install_opener', 'build_opener', | ||||
|     'pathname2url', 'url2pathname', 'getproxies', | ||||
|     # Legacy interface | ||||
|     'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', | ||||
|     'urlretrieve', 'urlcleanup', | ||||
| ] | ||||
| 
 | ||||
| # used in User-Agent header sent | ||||
|  | @ -165,8 +164,7 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, | |||
|     the reason phrase returned by the server --- instead of the response | ||||
|     headers as it is specified in the documentation for HTTPResponse. | ||||
| 
 | ||||
|     For FTP, file, and data URLs and requests explicitly handled by legacy | ||||
|     URLopener and FancyURLopener classes, this function returns a | ||||
|     For FTP, file, and data URLs, this function returns a | ||||
|     urllib.response.addinfourl object. | ||||
| 
 | ||||
|     Note that None may be returned if no handler handles the request (though | ||||
|  | @ -940,6 +938,7 @@ def _parse_realm(self, header): | |||
|         for mo in AbstractBasicAuthHandler.rx.finditer(header): | ||||
|             scheme, quote, realm = mo.groups() | ||||
|             if quote not in ['"', "'"]: | ||||
|                 import warnings | ||||
|                 warnings.warn("Basic Auth Realm was unquoted", | ||||
|                               UserWarning, 3) | ||||
| 
 | ||||
|  | @ -1495,7 +1494,7 @@ def open_local_file(self, req): | |||
|                     origurl = 'file://' + filename | ||||
|                 return addinfourl(open(localfile, 'rb'), headers, origurl) | ||||
|         except OSError as exp: | ||||
|             raise URLError(exp) | ||||
|             raise URLError(exp, exp.filename) | ||||
|         raise URLError('file not on local host') | ||||
| 
 | ||||
| def _safe_gethostbyname(host): | ||||
|  | @ -1647,8 +1646,6 @@ def data_open(self, req): | |||
| 
 | ||||
| # Code move from the old urllib module | ||||
| 
 | ||||
| MAXFTPCACHE = 10        # Trim the ftp cache beyond this size | ||||
| 
 | ||||
| # Helper for non-unix systems | ||||
| if os.name == 'nt': | ||||
|     from nturl2path import url2pathname, pathname2url | ||||
|  | @ -1668,678 +1665,6 @@ def pathname2url(pathname): | |||
|         return quote(pathname) | ||||
| 
 | ||||
| 
 | ||||
| ftpcache = {} | ||||
| 
 | ||||
| 
 | ||||
| class URLopener: | ||||
|     """Class to open URLs. | ||||
|     This is a class rather than just a subroutine because we may need | ||||
|     more than one set of global protocol-specific options. | ||||
|     Note -- this is a base class for those who don't want the | ||||
|     automatic handling of errors type 302 (relocated) and 401 | ||||
|     (authorization needed).""" | ||||
| 
 | ||||
|     __tempfiles = None | ||||
| 
 | ||||
|     version = "Python-urllib/%s" % __version__ | ||||
| 
 | ||||
|     # Constructor | ||||
|     def __init__(self, proxies=None, **x509): | ||||
|         msg = "%(class)s style of invoking requests is deprecated. " \ | ||||
|               "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} | ||||
|         warnings.warn(msg, DeprecationWarning, stacklevel=3) | ||||
|         if proxies is None: | ||||
|             proxies = getproxies() | ||||
|         assert hasattr(proxies, 'keys'), "proxies must be a mapping" | ||||
|         self.proxies = proxies | ||||
|         self.key_file = x509.get('key_file') | ||||
|         self.cert_file = x509.get('cert_file') | ||||
|         self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')] | ||||
|         self.__tempfiles = [] | ||||
|         self.__unlink = os.unlink # See cleanup() | ||||
|         self.tempcache = None | ||||
|         # Undocumented feature: if you assign {} to tempcache, | ||||
|         # it is used to cache files retrieved with | ||||
|         # self.retrieve().  This is not enabled by default | ||||
|         # since it does not work for changing documents (and I | ||||
|         # haven't got the logic to check expiration headers | ||||
|         # yet). | ||||
|         self.ftpcache = ftpcache | ||||
|         # Undocumented feature: you can use a different | ||||
|         # ftp cache by assigning to the .ftpcache member; | ||||
|         # in case you want logically independent URL openers | ||||
|         # XXX This is not threadsafe.  Bah. | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         self.close() | ||||
| 
 | ||||
|     def close(self): | ||||
|         self.cleanup() | ||||
| 
 | ||||
|     def cleanup(self): | ||||
|         # This code sometimes runs when the rest of this module | ||||
|         # has already been deleted, so it can't use any globals | ||||
|         # or import anything. | ||||
|         if self.__tempfiles: | ||||
|             for file in self.__tempfiles: | ||||
|                 try: | ||||
|                     self.__unlink(file) | ||||
|                 except OSError: | ||||
|                     pass | ||||
|             del self.__tempfiles[:] | ||||
|         if self.tempcache: | ||||
|             self.tempcache.clear() | ||||
| 
 | ||||
|     def addheader(self, *args): | ||||
|         """Add a header to be used by the HTTP interface only | ||||
|         e.g. u.addheader('Accept', 'sound/basic')""" | ||||
|         self.addheaders.append(args) | ||||
| 
 | ||||
|     # External interface | ||||
|     def open(self, fullurl, data=None): | ||||
|         """Use URLopener().open(file) instead of open(file, 'r').""" | ||||
|         fullurl = unwrap(_to_bytes(fullurl)) | ||||
|         fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") | ||||
|         if self.tempcache and fullurl in self.tempcache: | ||||
|             filename, headers = self.tempcache[fullurl] | ||||
|             fp = open(filename, 'rb') | ||||
|             return addinfourl(fp, headers, fullurl) | ||||
|         urltype, url = _splittype(fullurl) | ||||
|         if not urltype: | ||||
|             urltype = 'file' | ||||
|         if urltype in self.proxies: | ||||
|             proxy = self.proxies[urltype] | ||||
|             urltype, proxyhost = _splittype(proxy) | ||||
|             host, selector = _splithost(proxyhost) | ||||
|             url = (host, fullurl) # Signal special case to open_*() | ||||
|         else: | ||||
|             proxy = None | ||||
|         name = 'open_' + urltype | ||||
|         self.type = urltype | ||||
|         name = name.replace('-', '_') | ||||
|         if not hasattr(self, name) or name == 'open_local_file': | ||||
|             if proxy: | ||||
|                 return self.open_unknown_proxy(proxy, fullurl, data) | ||||
|             else: | ||||
|                 return self.open_unknown(fullurl, data) | ||||
|         try: | ||||
|             if data is None: | ||||
|                 return getattr(self, name)(url) | ||||
|             else: | ||||
|                 return getattr(self, name)(url, data) | ||||
|         except (HTTPError, URLError): | ||||
|             raise | ||||
|         except OSError as msg: | ||||
|             raise OSError('socket error', msg) from msg | ||||
| 
 | ||||
|     def open_unknown(self, fullurl, data=None): | ||||
|         """Overridable interface to open unknown URL type.""" | ||||
|         type, url = _splittype(fullurl) | ||||
|         raise OSError('url error', 'unknown url type', type) | ||||
| 
 | ||||
|     def open_unknown_proxy(self, proxy, fullurl, data=None): | ||||
|         """Overridable interface to open unknown URL type.""" | ||||
|         type, url = _splittype(fullurl) | ||||
|         raise OSError('url error', 'invalid proxy for %s' % type, proxy) | ||||
| 
 | ||||
|     # External interface | ||||
|     def retrieve(self, url, filename=None, reporthook=None, data=None): | ||||
|         """retrieve(url) returns (filename, headers) for a local object | ||||
|         or (tempfilename, headers) for a remote object.""" | ||||
|         url = unwrap(_to_bytes(url)) | ||||
|         if self.tempcache and url in self.tempcache: | ||||
|             return self.tempcache[url] | ||||
|         type, url1 = _splittype(url) | ||||
|         if filename is None and (not type or type == 'file'): | ||||
|             try: | ||||
|                 fp = self.open_local_file(url1) | ||||
|                 hdrs = fp.info() | ||||
|                 fp.close() | ||||
|                 return url2pathname(_splithost(url1)[1]), hdrs | ||||
|             except OSError: | ||||
|                 pass | ||||
|         fp = self.open(url, data) | ||||
|         try: | ||||
|             headers = fp.info() | ||||
|             if filename: | ||||
|                 tfp = open(filename, 'wb') | ||||
|             else: | ||||
|                 garbage, path = _splittype(url) | ||||
|                 garbage, path = _splithost(path or "") | ||||
|                 path, garbage = _splitquery(path or "") | ||||
|                 path, garbage = _splitattr(path or "") | ||||
|                 suffix = os.path.splitext(path)[1] | ||||
|                 (fd, filename) = tempfile.mkstemp(suffix) | ||||
|                 self.__tempfiles.append(filename) | ||||
|                 tfp = os.fdopen(fd, 'wb') | ||||
|             try: | ||||
|                 result = filename, headers | ||||
|                 if self.tempcache is not None: | ||||
|                     self.tempcache[url] = result | ||||
|                 bs = 1024*8 | ||||
|                 size = -1 | ||||
|                 read = 0 | ||||
|                 blocknum = 0 | ||||
|                 if "content-length" in headers: | ||||
|                     size = int(headers["Content-Length"]) | ||||
|                 if reporthook: | ||||
|                     reporthook(blocknum, bs, size) | ||||
|                 while block := fp.read(bs): | ||||
|                     read += len(block) | ||||
|                     tfp.write(block) | ||||
|                     blocknum += 1 | ||||
|                     if reporthook: | ||||
|                         reporthook(blocknum, bs, size) | ||||
|             finally: | ||||
|                 tfp.close() | ||||
|         finally: | ||||
|             fp.close() | ||||
| 
 | ||||
|         # raise exception if actual size does not match content-length header | ||||
|         if size >= 0 and read < size: | ||||
|             raise ContentTooShortError( | ||||
|                 "retrieval incomplete: got only %i out of %i bytes" | ||||
|                 % (read, size), result) | ||||
| 
 | ||||
|         return result | ||||
| 
 | ||||
|     # Each method named open_<type> knows how to open that type of URL | ||||
| 
 | ||||
|     def _open_generic_http(self, connection_factory, url, data): | ||||
|         """Make an HTTP connection using connection_class. | ||||
| 
 | ||||
|         This is an internal method that should be called from | ||||
|         open_http() or open_https(). | ||||
| 
 | ||||
|         Arguments: | ||||
|         - connection_factory should take a host name and return an | ||||
|           HTTPConnection instance. | ||||
|         - url is the url to retrieval or a host, relative-path pair. | ||||
|         - data is payload for a POST request or None. | ||||
|         """ | ||||
| 
 | ||||
|         user_passwd = None | ||||
|         proxy_passwd= None | ||||
|         if isinstance(url, str): | ||||
|             host, selector = _splithost(url) | ||||
|             if host: | ||||
|                 user_passwd, host = _splituser(host) | ||||
|                 host = unquote(host) | ||||
|             realhost = host | ||||
|         else: | ||||
|             host, selector = url | ||||
|             # check whether the proxy contains authorization information | ||||
|             proxy_passwd, host = _splituser(host) | ||||
|             # now we proceed with the url we want to obtain | ||||
|             urltype, rest = _splittype(selector) | ||||
|             url = rest | ||||
|             user_passwd = None | ||||
|             if urltype.lower() != 'http': | ||||
|                 realhost = None | ||||
|             else: | ||||
|                 realhost, rest = _splithost(rest) | ||||
|                 if realhost: | ||||
|                     user_passwd, realhost = _splituser(realhost) | ||||
|                 if user_passwd: | ||||
|                     selector = "%s://%s%s" % (urltype, realhost, rest) | ||||
|                 if proxy_bypass(realhost): | ||||
|                     host = realhost | ||||
| 
 | ||||
|         if not host: raise OSError('http error', 'no host given') | ||||
| 
 | ||||
|         if proxy_passwd: | ||||
|             proxy_passwd = unquote(proxy_passwd) | ||||
|             proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') | ||||
|         else: | ||||
|             proxy_auth = None | ||||
| 
 | ||||
|         if user_passwd: | ||||
|             user_passwd = unquote(user_passwd) | ||||
|             auth = base64.b64encode(user_passwd.encode()).decode('ascii') | ||||
|         else: | ||||
|             auth = None | ||||
|         http_conn = connection_factory(host) | ||||
|         headers = {} | ||||
|         if proxy_auth: | ||||
|             headers["Proxy-Authorization"] = "Basic %s" % proxy_auth | ||||
|         if auth: | ||||
|             headers["Authorization"] =  "Basic %s" % auth | ||||
|         if realhost: | ||||
|             headers["Host"] = realhost | ||||
| 
 | ||||
|         # Add Connection:close as we don't support persistent connections yet. | ||||
|         # This helps in closing the socket and avoiding ResourceWarning | ||||
| 
 | ||||
|         headers["Connection"] = "close" | ||||
| 
 | ||||
|         for header, value in self.addheaders: | ||||
|             headers[header] = value | ||||
| 
 | ||||
|         if data is not None: | ||||
|             headers["Content-Type"] = "application/x-www-form-urlencoded" | ||||
|             http_conn.request("POST", selector, data, headers) | ||||
|         else: | ||||
|             http_conn.request("GET", selector, headers=headers) | ||||
| 
 | ||||
|         try: | ||||
|             response = http_conn.getresponse() | ||||
|         except http.client.BadStatusLine: | ||||
|             # something went wrong with the HTTP status line | ||||
|             raise URLError("http protocol error: bad status line") | ||||
| 
 | ||||
|         # According to RFC 2616, "2xx" code indicates that the client's | ||||
|         # request was successfully received, understood, and accepted. | ||||
|         if 200 <= response.status < 300: | ||||
|             return addinfourl(response, response.msg, "http:" + url, | ||||
|                               response.status) | ||||
|         else: | ||||
|             return self.http_error( | ||||
|                 url, response.fp, | ||||
|                 response.status, response.reason, response.msg, data) | ||||
| 
 | ||||
|     def open_http(self, url, data=None): | ||||
|         """Use HTTP protocol.""" | ||||
|         return self._open_generic_http(http.client.HTTPConnection, url, data) | ||||
| 
 | ||||
|     def http_error(self, url, fp, errcode, errmsg, headers, data=None): | ||||
|         """Handle http errors. | ||||
| 
 | ||||
|         Derived class can override this, or provide specific handlers | ||||
|         named http_error_DDD where DDD is the 3-digit error code.""" | ||||
|         # First check if there's a specific handler for this error | ||||
|         name = 'http_error_%d' % errcode | ||||
|         if hasattr(self, name): | ||||
|             method = getattr(self, name) | ||||
|             if data is None: | ||||
|                 result = method(url, fp, errcode, errmsg, headers) | ||||
|             else: | ||||
|                 result = method(url, fp, errcode, errmsg, headers, data) | ||||
|             if result: return result | ||||
|         return self.http_error_default(url, fp, errcode, errmsg, headers) | ||||
| 
 | ||||
|     def http_error_default(self, url, fp, errcode, errmsg, headers): | ||||
|         """Default error handler: close the connection and raise OSError.""" | ||||
|         fp.close() | ||||
|         raise HTTPError(url, errcode, errmsg, headers, None) | ||||
| 
 | ||||
|     if _have_ssl: | ||||
|         def _https_connection(self, host): | ||||
|             if self.key_file or self.cert_file: | ||||
|                 http_version = http.client.HTTPSConnection._http_vsn | ||||
|                 context = http.client._create_https_context(http_version) | ||||
|                 context.load_cert_chain(self.cert_file, self.key_file) | ||||
|                 # cert and key file means the user wants to authenticate. | ||||
|                 # enable TLS 1.3 PHA implicitly even for custom contexts. | ||||
|                 if context.post_handshake_auth is not None: | ||||
|                     context.post_handshake_auth = True | ||||
|             else: | ||||
|                 context = None | ||||
|             return http.client.HTTPSConnection(host, context=context) | ||||
| 
 | ||||
|         def open_https(self, url, data=None): | ||||
|             """Use HTTPS protocol.""" | ||||
|             return self._open_generic_http(self._https_connection, url, data) | ||||
| 
 | ||||
|     def open_file(self, url): | ||||
|         """Use local file or FTP depending on form of URL.""" | ||||
|         if not isinstance(url, str): | ||||
|             raise URLError('file error: proxy support for file protocol currently not implemented') | ||||
|         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': | ||||
|             raise ValueError("file:// scheme is supported only on localhost") | ||||
|         else: | ||||
|             return self.open_local_file(url) | ||||
| 
 | ||||
|     def open_local_file(self, url): | ||||
|         """Use local file.""" | ||||
|         import email.utils | ||||
|         import mimetypes | ||||
|         host, file = _splithost(url) | ||||
|         localname = url2pathname(file) | ||||
|         try: | ||||
|             stats = os.stat(localname) | ||||
|         except OSError as e: | ||||
|             raise URLError(e.strerror, e.filename) | ||||
|         size = stats.st_size | ||||
|         modified = email.utils.formatdate(stats.st_mtime, usegmt=True) | ||||
|         mtype = mimetypes.guess_type(url)[0] | ||||
|         headers = email.message_from_string( | ||||
|             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | ||||
|             (mtype or 'text/plain', size, modified)) | ||||
|         if not host: | ||||
|             urlfile = file | ||||
|             if file[:1] == '/': | ||||
|                 urlfile = 'file://' + file | ||||
|             return addinfourl(open(localname, 'rb'), headers, urlfile) | ||||
|         host, port = _splitport(host) | ||||
|         if (not port | ||||
|            and socket.gethostbyname(host) in ((localhost(),) + thishost())): | ||||
|             urlfile = file | ||||
|             if file[:1] == '/': | ||||
|                 urlfile = 'file://' + file | ||||
|             elif file[:2] == './': | ||||
|                 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) | ||||
|             return addinfourl(open(localname, 'rb'), headers, urlfile) | ||||
|         raise URLError('local file error: not on local host') | ||||
| 
 | ||||
|     def open_ftp(self, url): | ||||
|         """Use FTP protocol.""" | ||||
|         if not isinstance(url, str): | ||||
|             raise URLError('ftp error: proxy support for ftp protocol currently not implemented') | ||||
|         import mimetypes | ||||
|         host, path = _splithost(url) | ||||
|         if not host: raise URLError('ftp error: no host given') | ||||
|         host, port = _splitport(host) | ||||
|         user, host = _splituser(host) | ||||
|         if user: user, passwd = _splitpasswd(user) | ||||
|         else: passwd = None | ||||
|         host = unquote(host) | ||||
|         user = unquote(user or '') | ||||
|         passwd = unquote(passwd or '') | ||||
|         host = socket.gethostbyname(host) | ||||
|         if not port: | ||||
|             import ftplib | ||||
|             port = ftplib.FTP_PORT | ||||
|         else: | ||||
|             port = int(port) | ||||
|         path, attrs = _splitattr(path) | ||||
|         path = unquote(path) | ||||
|         dirs = path.split('/') | ||||
|         dirs, file = dirs[:-1], dirs[-1] | ||||
|         if dirs and not dirs[0]: dirs = dirs[1:] | ||||
|         if dirs and not dirs[0]: dirs[0] = '/' | ||||
|         key = user, host, port, '/'.join(dirs) | ||||
|         # XXX thread unsafe! | ||||
|         if len(self.ftpcache) > MAXFTPCACHE: | ||||
|             # Prune the cache, rather arbitrarily | ||||
|             for k in list(self.ftpcache): | ||||
|                 if k != key: | ||||
|                     v = self.ftpcache[k] | ||||
|                     del self.ftpcache[k] | ||||
|                     v.close() | ||||
|         try: | ||||
|             if key not in self.ftpcache: | ||||
|                 self.ftpcache[key] = \ | ||||
|                     ftpwrapper(user, passwd, host, port, dirs) | ||||
|             if not file: type = 'D' | ||||
|             else: type = 'I' | ||||
|             for attr in attrs: | ||||
|                 attr, value = _splitvalue(attr) | ||||
|                 if attr.lower() == 'type' and \ | ||||
|                    value in ('a', 'A', 'i', 'I', 'd', 'D'): | ||||
|                     type = value.upper() | ||||
|             (fp, retrlen) = self.ftpcache[key].retrfile(file, type) | ||||
|             mtype = mimetypes.guess_type("ftp:" + url)[0] | ||||
|             headers = "" | ||||
|             if mtype: | ||||
|                 headers += "Content-Type: %s\n" % mtype | ||||
|             if retrlen is not None and retrlen >= 0: | ||||
|                 headers += "Content-Length: %d\n" % retrlen | ||||
|             headers = email.message_from_string(headers) | ||||
|             return addinfourl(fp, headers, "ftp:" + url) | ||||
|         except ftperrors() as exp: | ||||
|             raise URLError(f'ftp error: {exp}') from exp | ||||
| 
 | ||||
|     def open_data(self, url, data=None): | ||||
|         """Use "data" URL.""" | ||||
|         if not isinstance(url, str): | ||||
|             raise URLError('data error: proxy support for data protocol currently not implemented') | ||||
|         # ignore POSTed data | ||||
|         # | ||||
|         # syntax of data URLs: | ||||
|         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data | ||||
|         # mediatype := [ type "/" subtype ] *( ";" parameter ) | ||||
|         # data      := *urlchar | ||||
|         # parameter := attribute "=" value | ||||
|         try: | ||||
|             [type, data] = url.split(',', 1) | ||||
|         except ValueError: | ||||
|             raise OSError('data error', 'bad data URL') | ||||
|         if not type: | ||||
|             type = 'text/plain;charset=US-ASCII' | ||||
|         semi = type.rfind(';') | ||||
|         if semi >= 0 and '=' not in type[semi:]: | ||||
|             encoding = type[semi+1:] | ||||
|             type = type[:semi] | ||||
|         else: | ||||
|             encoding = '' | ||||
|         msg = [] | ||||
|         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', | ||||
|                                             time.gmtime(time.time()))) | ||||
|         msg.append('Content-type: %s' % type) | ||||
|         if encoding == 'base64': | ||||
|             # XXX is this encoding/decoding ok? | ||||
|             data = base64.decodebytes(data.encode('ascii')).decode('latin-1') | ||||
|         else: | ||||
|             data = unquote(data) | ||||
|         msg.append('Content-Length: %d' % len(data)) | ||||
|         msg.append('') | ||||
|         msg.append(data) | ||||
|         msg = '\n'.join(msg) | ||||
|         headers = email.message_from_string(msg) | ||||
|         f = io.StringIO(msg) | ||||
|         #f.fileno = None     # needed for addinfourl | ||||
|         return addinfourl(f, headers, url) | ||||
| 
 | ||||
| 
 | ||||
| class FancyURLopener(URLopener): | ||||
|     """Derived class with handlers for errors we can handle (perhaps).""" | ||||
| 
 | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         URLopener.__init__(self, *args, **kwargs) | ||||
|         self.auth_cache = {} | ||||
|         self.tries = 0 | ||||
|         self.maxtries = 10 | ||||
| 
 | ||||
|     def http_error_default(self, url, fp, errcode, errmsg, headers): | ||||
|         """Default error handling -- don't raise an exception.""" | ||||
|         return addinfourl(fp, headers, "http:" + url, errcode) | ||||
| 
 | ||||
|     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): | ||||
|         """Error 302 -- relocated (temporarily).""" | ||||
|         self.tries += 1 | ||||
|         try: | ||||
|             if self.maxtries and self.tries >= self.maxtries: | ||||
|                 if hasattr(self, "http_error_500"): | ||||
|                     meth = self.http_error_500 | ||||
|                 else: | ||||
|                     meth = self.http_error_default | ||||
|                 return meth(url, fp, 500, | ||||
|                             "Internal Server Error: Redirect Recursion", | ||||
|                             headers) | ||||
|             result = self.redirect_internal(url, fp, errcode, errmsg, | ||||
|                                             headers, data) | ||||
|             return result | ||||
|         finally: | ||||
|             self.tries = 0 | ||||
| 
 | ||||
|     def redirect_internal(self, url, fp, errcode, errmsg, headers, data): | ||||
|         if 'location' in headers: | ||||
|             newurl = headers['location'] | ||||
|         elif 'uri' in headers: | ||||
|             newurl = headers['uri'] | ||||
|         else: | ||||
|             return | ||||
|         fp.close() | ||||
| 
 | ||||
|         # In case the server sent a relative URL, join with original: | ||||
|         newurl = urljoin(self.type + ":" + url, newurl) | ||||
| 
 | ||||
|         urlparts = urlparse(newurl) | ||||
| 
 | ||||
|         # For security reasons, we don't allow redirection to anything other | ||||
|         # than http, https and ftp. | ||||
| 
 | ||||
|         # We are using newer HTTPError with older redirect_internal method | ||||
|         # This older method will get deprecated in 3.3 | ||||
| 
 | ||||
|         if urlparts.scheme not in ('http', 'https', 'ftp', ''): | ||||
|             raise HTTPError(newurl, errcode, | ||||
|                             errmsg + | ||||
|                             " Redirection to url '%s' is not allowed." % newurl, | ||||
|                             headers, fp) | ||||
| 
 | ||||
|         return self.open(newurl) | ||||
| 
 | ||||
|     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): | ||||
|         """Error 301 -- also relocated (permanently).""" | ||||
|         return self.http_error_302(url, fp, errcode, errmsg, headers, data) | ||||
| 
 | ||||
|     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): | ||||
|         """Error 303 -- also relocated (essentially identical to 302).""" | ||||
|         return self.http_error_302(url, fp, errcode, errmsg, headers, data) | ||||
| 
 | ||||
|     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): | ||||
|         """Error 307 -- relocated, but turn POST into error.""" | ||||
|         if data is None: | ||||
|             return self.http_error_302(url, fp, errcode, errmsg, headers, data) | ||||
|         else: | ||||
|             return self.http_error_default(url, fp, errcode, errmsg, headers) | ||||
| 
 | ||||
|     def http_error_308(self, url, fp, errcode, errmsg, headers, data=None): | ||||
|         """Error 308 -- relocated, but turn POST into error.""" | ||||
|         if data is None: | ||||
|             return self.http_error_301(url, fp, errcode, errmsg, headers, data) | ||||
|         else: | ||||
|             return self.http_error_default(url, fp, errcode, errmsg, headers) | ||||
| 
 | ||||
|     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, | ||||
|             retry=False): | ||||
|         """Error 401 -- authentication required. | ||||
|         This function supports Basic authentication only.""" | ||||
|         if 'www-authenticate' not in headers: | ||||
|             URLopener.http_error_default(self, url, fp, | ||||
|                                          errcode, errmsg, headers) | ||||
|         stuff = headers['www-authenticate'] | ||||
|         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) | ||||
|         if not match: | ||||
|             URLopener.http_error_default(self, url, fp, | ||||
|                                          errcode, errmsg, headers) | ||||
|         scheme, realm = match.groups() | ||||
|         if scheme.lower() != 'basic': | ||||
|             URLopener.http_error_default(self, url, fp, | ||||
|                                          errcode, errmsg, headers) | ||||
|         if not retry: | ||||
|             URLopener.http_error_default(self, url, fp, errcode, errmsg, | ||||
|                     headers) | ||||
|         name = 'retry_' + self.type + '_basic_auth' | ||||
|         if data is None: | ||||
|             return getattr(self,name)(url, realm) | ||||
|         else: | ||||
|             return getattr(self,name)(url, realm, data) | ||||
| 
 | ||||
|     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, | ||||
|             retry=False): | ||||
|         """Error 407 -- proxy authentication required. | ||||
|         This function supports Basic authentication only.""" | ||||
|         if 'proxy-authenticate' not in headers: | ||||
|             URLopener.http_error_default(self, url, fp, | ||||
|                                          errcode, errmsg, headers) | ||||
|         stuff = headers['proxy-authenticate'] | ||||
|         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) | ||||
|         if not match: | ||||
|             URLopener.http_error_default(self, url, fp, | ||||
|                                          errcode, errmsg, headers) | ||||
|         scheme, realm = match.groups() | ||||
|         if scheme.lower() != 'basic': | ||||
|             URLopener.http_error_default(self, url, fp, | ||||
|                                          errcode, errmsg, headers) | ||||
|         if not retry: | ||||
|             URLopener.http_error_default(self, url, fp, errcode, errmsg, | ||||
|                     headers) | ||||
|         name = 'retry_proxy_' + self.type + '_basic_auth' | ||||
|         if data is None: | ||||
|             return getattr(self,name)(url, realm) | ||||
|         else: | ||||
|             return getattr(self,name)(url, realm, data) | ||||
| 
 | ||||
|     def retry_proxy_http_basic_auth(self, url, realm, data=None): | ||||
|         host, selector = _splithost(url) | ||||
|         newurl = 'http://' + host + selector | ||||
|         proxy = self.proxies['http'] | ||||
|         urltype, proxyhost = _splittype(proxy) | ||||
|         proxyhost, proxyselector = _splithost(proxyhost) | ||||
|         i = proxyhost.find('@') + 1 | ||||
|         proxyhost = proxyhost[i:] | ||||
|         user, passwd = self.get_user_passwd(proxyhost, realm, i) | ||||
|         if not (user or passwd): return None | ||||
|         proxyhost = "%s:%s@%s" % (quote(user, safe=''), | ||||
|                                   quote(passwd, safe=''), proxyhost) | ||||
|         self.proxies['http'] = 'http://' + proxyhost + proxyselector | ||||
|         if data is None: | ||||
|             return self.open(newurl) | ||||
|         else: | ||||
|             return self.open(newurl, data) | ||||
| 
 | ||||
|     def retry_proxy_https_basic_auth(self, url, realm, data=None): | ||||
|         host, selector = _splithost(url) | ||||
|         newurl = 'https://' + host + selector | ||||
|         proxy = self.proxies['https'] | ||||
|         urltype, proxyhost = _splittype(proxy) | ||||
|         proxyhost, proxyselector = _splithost(proxyhost) | ||||
|         i = proxyhost.find('@') + 1 | ||||
|         proxyhost = proxyhost[i:] | ||||
|         user, passwd = self.get_user_passwd(proxyhost, realm, i) | ||||
|         if not (user or passwd): return None | ||||
|         proxyhost = "%s:%s@%s" % (quote(user, safe=''), | ||||
|                                   quote(passwd, safe=''), proxyhost) | ||||
|         self.proxies['https'] = 'https://' + proxyhost + proxyselector | ||||
|         if data is None: | ||||
|             return self.open(newurl) | ||||
|         else: | ||||
|             return self.open(newurl, data) | ||||
| 
 | ||||
|     def retry_http_basic_auth(self, url, realm, data=None): | ||||
|         host, selector = _splithost(url) | ||||
|         i = host.find('@') + 1 | ||||
|         host = host[i:] | ||||
|         user, passwd = self.get_user_passwd(host, realm, i) | ||||
|         if not (user or passwd): return None | ||||
|         host = "%s:%s@%s" % (quote(user, safe=''), | ||||
|                              quote(passwd, safe=''), host) | ||||
|         newurl = 'http://' + host + selector | ||||
|         if data is None: | ||||
|             return self.open(newurl) | ||||
|         else: | ||||
|             return self.open(newurl, data) | ||||
| 
 | ||||
|     def retry_https_basic_auth(self, url, realm, data=None): | ||||
|         host, selector = _splithost(url) | ||||
|         i = host.find('@') + 1 | ||||
|         host = host[i:] | ||||
|         user, passwd = self.get_user_passwd(host, realm, i) | ||||
|         if not (user or passwd): return None | ||||
|         host = "%s:%s@%s" % (quote(user, safe=''), | ||||
|                              quote(passwd, safe=''), host) | ||||
|         newurl = 'https://' + host + selector | ||||
|         if data is None: | ||||
|             return self.open(newurl) | ||||
|         else: | ||||
|             return self.open(newurl, data) | ||||
| 
 | ||||
|     def get_user_passwd(self, host, realm, clear_cache=0): | ||||
|         key = realm + '@' + host.lower() | ||||
|         if key in self.auth_cache: | ||||
|             if clear_cache: | ||||
|                 del self.auth_cache[key] | ||||
|             else: | ||||
|                 return self.auth_cache[key] | ||||
|         user, passwd = self.prompt_user_passwd(host, realm) | ||||
|         if user or passwd: self.auth_cache[key] = (user, passwd) | ||||
|         return user, passwd | ||||
| 
 | ||||
|     def prompt_user_passwd(self, host, realm): | ||||
|         """Override this in a GUI environment!""" | ||||
|         import getpass | ||||
|         try: | ||||
|             user = input("Enter username for %s at %s: " % (realm, host)) | ||||
|             passwd = getpass.getpass("Enter password for %s in %s at %s: " % | ||||
|                 (user, realm, host)) | ||||
|             return user, passwd | ||||
|         except KeyboardInterrupt: | ||||
|             print() | ||||
|             return None, None | ||||
| 
 | ||||
| 
 | ||||
| # Utility functions | ||||
| 
 | ||||
| _localhost = None | ||||
|  | @ -2485,9 +1810,7 @@ def getproxies_environment(): | |||
|     """Return a dictionary of scheme -> proxy server URL mappings. | ||||
| 
 | ||||
|     Scan the environment for variables named <scheme>_proxy; | ||||
|     this seems to be the standard convention.  If you need a | ||||
|     different way, you can pass a proxies dictionary to the | ||||
|     [Fancy]URLopener constructor. | ||||
|     this seems to be the standard convention. | ||||
|     """ | ||||
|     # in order to prefer lowercase variables, process environment in | ||||
|     # two passes: first matches any, second pass matches lowercase only | ||||
|  |  | |||
|  | @ -0,0 +1,3 @@ | |||
| Remove :class:`!URLopener` and :class:`!FancyURLopener` classes from | ||||
| :mod:`urllib.request`. They had previously raised :exc:`DeprecationWarning` | ||||
| since Python 3.3. | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Barney Gale
						Barney Gale