Make a new urllib package .

It consists of code from urllib, urllib2, urlparse, and robotparser.
The old modules have all been removed.  The new package has five
submodules: urllib.parse, urllib.request, urllib.response,
urllib.error, and urllib.robotparser.  The urllib.request.urlopen()
function uses the url opener from urllib2.

Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.

Joint work with Senthil Kumaran.
This commit is contained in:
Jeremy Hylton 2008-06-18 20:49:58 +00:00
parent a656d2cd89
commit 1afc169616
40 changed files with 3190 additions and 3536 deletions

View file

@ -35,7 +35,7 @@
from io import StringIO from io import StringIO
import sys import sys
import os import os
import urllib import urllib.parse
import email.parser import email.parser
__all__ = ["MiniFieldStorage", "FieldStorage", __all__ = ["MiniFieldStorage", "FieldStorage",
@ -216,8 +216,8 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
else: else:
continue continue
if len(nv[1]) or keep_blank_values: if len(nv[1]) or keep_blank_values:
name = urllib.unquote(nv[0].replace('+', ' ')) name = urllib.parse.unquote(nv[0].replace('+', ' '))
value = urllib.unquote(nv[1].replace('+', ' ')) value = urllib.parse.unquote(nv[1].replace('+', ' '))
r.append((name, value)) r.append((name, value))
return r return r

View file

@ -7,8 +7,9 @@
__revision__ = "$Id$" __revision__ = "$Id$"
import os, string, urllib2, getpass, urlparse import os, string, getpass
import io import io
import urllib.parse, urllib.request
from distutils.core import PyPIRCCommand from distutils.core import PyPIRCCommand
from distutils.errors import * from distutils.errors import *
@ -94,7 +95,8 @@ def _set_config(self):
def classifiers(self): def classifiers(self):
''' Fetch the list of classifiers from the server. ''' Fetch the list of classifiers from the server.
''' '''
response = urllib2.urlopen(self.repository+'?:action=list_classifiers') url = self.repository+'?:action=list_classifiers'
response = urllib.request.urlopen(url)
print(response.read()) print(response.read())
def verify_metadata(self): def verify_metadata(self):
@ -166,8 +168,8 @@ def send_metadata(self):
password = getpass.getpass('Password: ') password = getpass.getpass('Password: ')
# set up the authentication # set up the authentication
auth = urllib2.HTTPPasswordMgr() auth = urllib.request.HTTPPasswordMgr()
host = urlparse.urlparse(self.repository)[1] host = urllib.parse.urlparse(self.repository)[1]
auth.add_password(self.realm, host, username, password) auth.add_password(self.realm, host, username, password)
# send the info to the server and report the result # send the info to the server and report the result
code, result = self.post_to_server(self.build_post_data('submit'), code, result = self.post_to_server(self.build_post_data('submit'),
@ -276,20 +278,20 @@ def post_to_server(self, data, auth=None):
'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary, 'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary,
'Content-length': str(len(body)) 'Content-length': str(len(body))
} }
req = urllib2.Request(self.repository, body, headers) req = urllib.request.Request(self.repository, body, headers)
# handle HTTP and include the Basic Auth handler # handle HTTP and include the Basic Auth handler
opener = urllib2.build_opener( opener = urllib.request.build_opener(
urllib2.HTTPBasicAuthHandler(password_mgr=auth) urllib.request.HTTPBasicAuthHandler(password_mgr=auth)
) )
data = '' data = ''
try: try:
result = opener.open(req) result = opener.open(req)
except urllib2.HTTPError as e: except urllib.error.HTTPError as e:
if self.show_response: if self.show_response:
data = e.fp.read() data = e.fp.read()
result = e.code, e.msg result = e.code, e.msg
except urllib2.URLError as e: except urllib.error.URLError as e:
result = 500, str(e) result = 500, str(e)
else: else:
if self.show_response: if self.show_response:

View file

@ -13,7 +13,7 @@
import configparser import configparser
import http.client import http.client
import base64 import base64
import urlparse import urllib.parse
class upload(PyPIRCCommand): class upload(PyPIRCCommand):
@ -145,10 +145,11 @@ def upload_file(self, command, pyversion, filename):
self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO) self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO)
# build the Request # build the Request
# We can't use urllib2 since we need to send the Basic # We can't use urllib since we need to send the Basic
# auth right with the first request # auth right with the first request
# TODO(jhylton): Can we fix urllib?
schema, netloc, url, params, query, fragments = \ schema, netloc, url, params, query, fragments = \
urlparse.urlparse(self.repository) urllib.parse.urlparse(self.repository)
assert not params and not query and not fragments assert not params and not query and not fragments
if schema == 'http': if schema == 'http':
http = http.client.HTTPConnection(netloc) http = http.client.HTTPConnection(netloc)

View file

@ -25,6 +25,7 @@
import base64 import base64
import random import random
import socket import socket
import urllib.parse
import warnings import warnings
from io import StringIO from io import StringIO
@ -218,8 +219,7 @@ def encode_rfc2231(s, charset=None, language=None):
charset is given but not language, the string is encoded using the empty charset is given but not language, the string is encoded using the empty
string for language. string for language.
""" """
import urllib s = urllib.parse.quote(s, safe='')
s = urllib.quote(s, safe='')
if charset is None and language is None: if charset is None and language is None:
return s return s
if language is None: if language is None:
@ -234,7 +234,6 @@ def decode_params(params):
params is a sequence of 2-tuples containing (param name, string value). params is a sequence of 2-tuples containing (param name, string value).
""" """
import urllib
# Copy params so we don't mess with the original # Copy params so we don't mess with the original
params = params[:] params = params[:]
new_params = [] new_params = []
@ -272,7 +271,7 @@ def decode_params(params):
# language specifiers at the beginning of the string. # language specifiers at the beginning of the string.
for num, s, encoded in continuations: for num, s, encoded in continuations:
if encoded: if encoded:
s = urllib.unquote(s) s = urllib.parse.unquote(s)
extended = True extended = True
value.append(s) value.append(s)
value = quote(EMPTYSTRING.join(value)) value = quote(EMPTYSTRING.join(value))

View file

@ -70,7 +70,7 @@
import socket import socket
import email.parser import email.parser
import email.message import email.message
from urlparse import urlsplit from urllib.parse import urlsplit
import warnings import warnings
__all__ = ["HTTPResponse", "HTTPConnection", __all__ = ["HTTPResponse", "HTTPConnection",

View file

@ -28,7 +28,10 @@
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
import re, urlparse, copy, time, urllib import copy
import re
import time
import urllib.parse, urllib.request
try: try:
import threading as _threading import threading as _threading
except ImportError: except ImportError:
@ -580,7 +583,7 @@ def request_host(request):
""" """
url = request.get_full_url() url = request.get_full_url()
host = urlparse.urlparse(url)[1] host = urllib.parse.urlparse(url)[1]
if host == "": if host == "":
host = request.get_header("Host", "") host = request.get_header("Host", "")
@ -602,13 +605,11 @@ def eff_request_host(request):
def request_path(request): def request_path(request):
"""request-URI, as defined by RFC 2965.""" """request-URI, as defined by RFC 2965."""
url = request.get_full_url() url = request.get_full_url()
#scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url) path, parameters, query, frag = urllib.parse.urlparse(url)[2:]
#req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
path, parameters, query, frag = urlparse.urlparse(url)[2:]
if parameters: if parameters:
path = "%s;%s" % (path, parameters) path = "%s;%s" % (path, parameters)
path = escape_path(path) path = escape_path(path)
req_path = urlparse.urlunparse(("", "", path, "", query, frag)) req_path = urllib.parse.urlunparse(("", "", path, "", query, frag))
if not req_path.startswith("/"): if not req_path.startswith("/"):
# fix bad RFC 2396 absoluteURI # fix bad RFC 2396 absoluteURI
req_path = "/"+req_path req_path = "/"+req_path
@ -644,7 +645,7 @@ def escape_path(path):
# And here, kind of: draft-fielding-uri-rfc2396bis-03 # And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05) # (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718) # (And here, for new URI schemes: RFC 2718)
path = urllib.quote(path, HTTP_PATH_SAFE) path = urllib.parse.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
return path return path
@ -1197,8 +1198,7 @@ class CookieJar:
"""Collection of HTTP cookies. """Collection of HTTP cookies.
You may not need to know about this class: try You may not need to know about this class: try
urllib2.build_opener(HTTPCookieProcessor).open(url). urllib.request.build_opener(HTTPCookieProcessor).open(url).
""" """
non_word_re = re.compile(r"\W") non_word_re = re.compile(r"\W")

View file

@ -93,7 +93,7 @@
import time import time
import socket # For gethostbyaddr() import socket # For gethostbyaddr()
import shutil import shutil
import urllib import urllib.parse
import select import select
import mimetypes import mimetypes
import posixpath import posixpath
@ -683,7 +683,7 @@ def list_directory(self, path):
return None return None
list.sort(key=lambda a: a.lower()) list.sort(key=lambda a: a.lower())
r = [] r = []
displaypath = cgi.escape(urllib.unquote(self.path)) displaypath = cgi.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">') r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath) r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath) r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
@ -699,7 +699,7 @@ def list_directory(self, path):
displayname = name + "@" displayname = name + "@"
# Note: a link to a directory displays with @ and links with / # Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n' r.append('<li><a href="%s">%s</a>\n'
% (urllib.quote(linkname), cgi.escape(displayname))) % (urllib.parse.quote(linkname), cgi.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n") r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding() enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc) encoded = ''.join(r).encode(enc)
@ -723,7 +723,7 @@ def translate_path(self, path):
# abandon query parameters # abandon query parameters
path = path.split('?',1)[0] path = path.split('?',1)[0]
path = path.split('#',1)[0] path = path.split('#',1)[0]
path = posixpath.normpath(urllib.unquote(path)) path = posixpath.normpath(urllib.parse.unquote(path))
words = path.split('/') words = path.split('/')
words = filter(None, words) words = filter(None, words)
path = os.getcwd() path = os.getcwd()
@ -947,7 +947,7 @@ def run_cgi(self):
env['SERVER_PROTOCOL'] = self.protocol_version env['SERVER_PROTOCOL'] = self.protocol_version
env['SERVER_PORT'] = str(self.server.server_port) env['SERVER_PORT'] = str(self.server.server_port)
env['REQUEST_METHOD'] = self.command env['REQUEST_METHOD'] = self.command
uqrest = urllib.unquote(rest) uqrest = urllib.parse.unquote(rest)
env['PATH_INFO'] = uqrest env['PATH_INFO'] = uqrest
env['PATH_TRANSLATED'] = self.translate_path(uqrest) env['PATH_TRANSLATED'] = self.translate_path(uqrest)
env['SCRIPT_NAME'] = scriptname env['SCRIPT_NAME'] = scriptname

View file

@ -2,7 +2,7 @@
Do not import directly; use urllib instead.""" Do not import directly; use urllib instead."""
import urllib import urllib.parse
import os import os
__all__ = ["url2pathname","pathname2url"] __all__ = ["url2pathname","pathname2url"]
@ -13,7 +13,7 @@ def url2pathname(pathname):
# #
# XXXX The .. handling should be fixed... # XXXX The .. handling should be fixed...
# #
tp = urllib.splittype(pathname)[0] tp = urllib.parsesplittype(pathname)[0]
if tp and tp != 'file': if tp and tp != 'file':
raise RuntimeError('Cannot convert non-local URL to pathname') raise RuntimeError('Cannot convert non-local URL to pathname')
# Turn starting /// into /, an empty hostname means current host # Turn starting /// into /, an empty hostname means current host
@ -47,7 +47,7 @@ def url2pathname(pathname):
i = i + 1 i = i + 1
rv = ':' + ':'.join(components) rv = ':' + ':'.join(components)
# and finally unquote slashes and other funny characters # and finally unquote slashes and other funny characters
return urllib.unquote(rv) return urllib.parseunquote(rv)
def pathname2url(pathname): def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL """OS-specific conversion from a file system path to a relative URL
@ -73,8 +73,8 @@ def pathname2url(pathname):
return '/'.join(components) return '/'.join(components)
def _pncomp2url(component): def _pncomp2url(component):
component = urllib.quote(component[:31], safe='') # We want to quote slashes # We want to quote slashes
return component return urllib.parsequote(component[:31], safe='')
def test(): def test():
for url in ["index.html", for url in ["index.html",

View file

@ -24,7 +24,7 @@
import os import os
import posixpath import posixpath
import urllib import urllib.parse
__all__ = [ __all__ = [
"guess_type","guess_extension","guess_all_extensions", "guess_type","guess_extension","guess_all_extensions",
@ -104,7 +104,7 @@ def guess_type(self, url, strict=True):
Optional `strict' argument when False adds a bunch of commonly found, Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types. but non-standard types.
""" """
scheme, url = urllib.splittype(url) scheme, url = urllib.parse.splittype(url)
if scheme == 'data': if scheme == 'data':
# syntax of data URLs: # syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data

View file

@ -725,7 +725,7 @@ def run_the_test():
def dash_R_cleanup(fs, ps, pic, abcs): def dash_R_cleanup(fs, ps, pic, abcs):
import gc, copyreg import gc, copyreg
import _strptime, linecache import _strptime, linecache
import urlparse, urllib, urllib2, mimetypes, doctest import urllib.parse, urllib.request, mimetypes, doctest
import struct, filecmp, _abcoll import struct, filecmp, _abcoll
from distutils.dir_util import _path_created from distutils.dir_util import _path_created
from weakref import WeakSet from weakref import WeakSet
@ -758,9 +758,8 @@ def dash_R_cleanup(fs, ps, pic, abcs):
_path_created.clear() _path_created.clear()
re.purge() re.purge()
_strptime._regex_cache.clear() _strptime._regex_cache.clear()
urlparse.clear_cache() urllib.parse.clear_cache()
urllib.urlcleanup() urllib.request.urlcleanup()
urllib2.install_opener(None)
linecache.clearcache() linecache.clearcache()
mimetypes._default_mime_types() mimetypes._default_mime_types()
filecmp._cache.clear() filecmp._cache.clear()

View file

@ -352,10 +352,10 @@ def check_syntax_error(testcase, statement):
testcase.fail('Missing SyntaxError: "%s"' % statement) testcase.fail('Missing SyntaxError: "%s"' % statement)
def open_urlresource(url, *args, **kw): def open_urlresource(url, *args, **kw):
import urllib, urlparse import urllib.request, urllib.parse
requires('urlfetch') requires('urlfetch')
filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's URL! filename = urllib.parse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
for path in [os.path.curdir, os.path.pardir]: for path in [os.path.curdir, os.path.pardir]:
fn = os.path.join(path, filename) fn = os.path.join(path, filename)
@ -363,7 +363,7 @@ def open_urlresource(url, *args, **kw):
return open(fn, *args, **kw) return open(fn, *args, **kw)
print('\tfetching %s ...' % url, file=get_original_stdout()) print('\tfetching %s ...' % url, file=get_original_stdout())
fn, _ = urllib.urlretrieve(url, filename) fn, _ = urllib.request.urlretrieve(url, filename)
return open(fn, *args, **kw) return open(fn, *args, **kw)

View file

@ -111,7 +111,7 @@ def test_all(self):
self.check_all("re") self.check_all("re")
self.check_all("reprlib") self.check_all("reprlib")
self.check_all("rlcompleter") self.check_all("rlcompleter")
self.check_all("robotparser") self.check_all("urllib.robotparser")
self.check_all("sched") self.check_all("sched")
self.check_all("shelve") self.check_all("shelve")
self.check_all("shlex") self.check_all("shlex")
@ -134,8 +134,6 @@ def test_all(self):
self.check_all("traceback") self.check_all("traceback")
self.check_all("tty") self.check_all("tty")
self.check_all("unittest") self.check_all("unittest")
self.check_all("urllib")
self.check_all("urlparse")
self.check_all("uu") self.check_all("uu")
self.check_all("warnings") self.check_all("warnings")
self.check_all("wave") self.check_all("wave")

View file

@ -1,6 +1,6 @@
"""Tests for http/cookiejar.py.""" """Tests for http/cookiejar.py."""
import re, os, time, urllib2 import re, os, time, urllib.request
from unittest import TestCase from unittest import TestCase
from test import support from test import support
@ -206,7 +206,7 @@ def interact_netscape(cookiejar, url, *set_cookie_hdrs):
def _interact(cookiejar, url, set_cookie_hdrs, hdr_name): def _interact(cookiejar, url, set_cookie_hdrs, hdr_name):
"""Perform a single request / response cycle, returning Cookie: header.""" """Perform a single request / response cycle, returning Cookie: header."""
req = urllib2.Request(url) req = urllib.request.Request(url)
cookiejar.add_cookie_header(req) cookiejar.add_cookie_header(req)
cookie_hdr = req.get_header("Cookie", "") cookie_hdr = req.get_header("Cookie", "")
headers = [] headers = []
@ -330,7 +330,7 @@ def test_domain_return_ok(self):
("http://foo/", "foo.local", True), ("http://foo/", "foo.local", True),
("http://foo/", ".local", True), ("http://foo/", ".local", True),
]: ]:
request = urllib2.Request(url) request = urllib.request.Request(url)
r = pol.domain_return_ok(domain, request) r = pol.domain_return_ok(domain, request)
if ok: self.assert_(r) if ok: self.assert_(r)
else: self.assert_(not r) else: self.assert_(not r)
@ -547,46 +547,48 @@ def test_escape_path(self):
def test_request_path(self): def test_request_path(self):
# with parameters # with parameters
req = urllib2.Request("http://www.example.com/rheum/rhaponicum;" req = urllib.request.Request(
"foo=bar;sing=song?apples=pears&spam=eggs#ni") "http://www.example.com/rheum/rhaponicum;"
"foo=bar;sing=song?apples=pears&spam=eggs#ni")
self.assertEquals(request_path(req), "/rheum/rhaponicum;" self.assertEquals(request_path(req), "/rheum/rhaponicum;"
"foo=bar;sing=song?apples=pears&spam=eggs#ni") "foo=bar;sing=song?apples=pears&spam=eggs#ni")
# without parameters # without parameters
req = urllib2.Request("http://www.example.com/rheum/rhaponicum?" req = urllib.request.Request(
"apples=pears&spam=eggs#ni") "http://www.example.com/rheum/rhaponicum?"
"apples=pears&spam=eggs#ni")
self.assertEquals(request_path(req), "/rheum/rhaponicum?" self.assertEquals(request_path(req), "/rheum/rhaponicum?"
"apples=pears&spam=eggs#ni") "apples=pears&spam=eggs#ni")
# missing final slash # missing final slash
req = urllib2.Request("http://www.example.com") req = urllib.request.Request("http://www.example.com")
self.assertEquals(request_path(req), "/") self.assertEquals(request_path(req), "/")
def test_request_port(self): def test_request_port(self):
req = urllib2.Request("http://www.acme.com:1234/", req = urllib.request.Request("http://www.acme.com:1234/",
headers={"Host": "www.acme.com:4321"}) headers={"Host": "www.acme.com:4321"})
self.assertEquals(request_port(req), "1234") self.assertEquals(request_port(req), "1234")
req = urllib2.Request("http://www.acme.com/", req = urllib.request.Request("http://www.acme.com/",
headers={"Host": "www.acme.com:4321"}) headers={"Host": "www.acme.com:4321"})
self.assertEquals(request_port(req), DEFAULT_HTTP_PORT) self.assertEquals(request_port(req), DEFAULT_HTTP_PORT)
def test_request_host(self): def test_request_host(self):
# this request is illegal (RFC2616, 14.2.3) # this request is illegal (RFC2616, 14.2.3)
req = urllib2.Request("http://1.1.1.1/", req = urllib.request.Request("http://1.1.1.1/",
headers={"Host": "www.acme.com:80"}) headers={"Host": "www.acme.com:80"})
# libwww-perl wants this response, but that seems wrong (RFC 2616, # libwww-perl wants this response, but that seems wrong (RFC 2616,
# section 5.2, point 1., and RFC 2965 section 1, paragraph 3) # section 5.2, point 1., and RFC 2965 section 1, paragraph 3)
#self.assertEquals(request_host(req), "www.acme.com") #self.assertEquals(request_host(req), "www.acme.com")
self.assertEquals(request_host(req), "1.1.1.1") self.assertEquals(request_host(req), "1.1.1.1")
req = urllib2.Request("http://www.acme.com/", req = urllib.request.Request("http://www.acme.com/",
headers={"Host": "irrelevant.com"}) headers={"Host": "irrelevant.com"})
self.assertEquals(request_host(req), "www.acme.com") self.assertEquals(request_host(req), "www.acme.com")
# not actually sure this one is valid Request object, so maybe should # not actually sure this one is valid Request object, so maybe should
# remove test for no host in url in request_host function? # remove test for no host in url in request_host function?
req = urllib2.Request("/resource.html", req = urllib.request.Request("/resource.html",
headers={"Host": "www.acme.com"}) headers={"Host": "www.acme.com"})
self.assertEquals(request_host(req), "www.acme.com") self.assertEquals(request_host(req), "www.acme.com")
# port shouldn't be in request-host # port shouldn't be in request-host
req = urllib2.Request("http://www.acme.com:2345/resource.html", req = urllib.request.Request("http://www.acme.com:2345/resource.html",
headers={"Host": "www.acme.com:5432"}) headers={"Host": "www.acme.com:5432"})
self.assertEquals(request_host(req), "www.acme.com") self.assertEquals(request_host(req), "www.acme.com")
def test_is_HDN(self): def test_is_HDN(self):
@ -766,24 +768,24 @@ def test_domain_allow(self):
blocked_domains=["acme.com"], blocked_domains=["acme.com"],
allowed_domains=["www.acme.com"])) allowed_domains=["www.acme.com"]))
req = urllib2.Request("http://acme.com/") req = urllib.request.Request("http://acme.com/")
headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"]
res = FakeResponse(headers, "http://acme.com/") res = FakeResponse(headers, "http://acme.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
self.assertEquals(len(c), 0) self.assertEquals(len(c), 0)
req = urllib2.Request("http://www.acme.com/") req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/") res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
self.assertEquals(len(c), 1) self.assertEquals(len(c), 1)
req = urllib2.Request("http://www.coyote.com/") req = urllib.request.Request("http://www.coyote.com/")
res = FakeResponse(headers, "http://www.coyote.com/") res = FakeResponse(headers, "http://www.coyote.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
self.assertEquals(len(c), 1) self.assertEquals(len(c), 1)
# set a cookie with non-allowed domain... # set a cookie with non-allowed domain...
req = urllib2.Request("http://www.coyote.com/") req = urllib.request.Request("http://www.coyote.com/")
res = FakeResponse(headers, "http://www.coyote.com/") res = FakeResponse(headers, "http://www.coyote.com/")
cookies = c.make_cookies(res, req) cookies = c.make_cookies(res, req)
c.set_cookie(cookies[0]) c.set_cookie(cookies[0])
@ -798,7 +800,7 @@ def test_domain_block(self):
c = CookieJar(policy=pol) c = CookieJar(policy=pol)
headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"]
req = urllib2.Request("http://www.acme.com/") req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/") res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
self.assertEquals(len(c), 0) self.assertEquals(len(c), 0)
@ -808,11 +810,11 @@ def test_domain_block(self):
self.assertEquals(len(c), 1) self.assertEquals(len(c), 1)
c.clear() c.clear()
req = urllib2.Request("http://www.roadrunner.net/") req = urllib.request.Request("http://www.roadrunner.net/")
res = FakeResponse(headers, "http://www.roadrunner.net/") res = FakeResponse(headers, "http://www.roadrunner.net/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
self.assertEquals(len(c), 1) self.assertEquals(len(c), 1)
req = urllib2.Request("http://www.roadrunner.net/") req = urllib.request.Request("http://www.roadrunner.net/")
c.add_cookie_header(req) c.add_cookie_header(req)
self.assert_((req.has_header("Cookie") and self.assert_((req.has_header("Cookie") and
req.has_header("Cookie2"))) req.has_header("Cookie2")))
@ -823,7 +825,7 @@ def test_domain_block(self):
self.assertEquals(len(c), 1) self.assertEquals(len(c), 1)
# set a cookie with blocked domain... # set a cookie with blocked domain...
req = urllib2.Request("http://www.acme.com/") req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/") res = FakeResponse(headers, "http://www.acme.com/")
cookies = c.make_cookies(res, req) cookies = c.make_cookies(res, req)
c.set_cookie(cookies[0]) c.set_cookie(cookies[0])
@ -866,7 +868,7 @@ def test_missing_final_slash(self):
url = "http://www.acme.com" url = "http://www.acme.com"
c = CookieJar(DefaultCookiePolicy(rfc2965=True)) c = CookieJar(DefaultCookiePolicy(rfc2965=True))
interact_2965(c, url, "foo=bar; Version=1") interact_2965(c, url, "foo=bar; Version=1")
req = urllib2.Request(url) req = urllib.request.Request(url)
self.assertEquals(len(c), 1) self.assertEquals(len(c), 1)
c.add_cookie_header(req) c.add_cookie_header(req)
self.assert_(req.has_header("Cookie")) self.assert_(req.has_header("Cookie"))
@ -1009,7 +1011,7 @@ def test_bad_cookie_header(self):
def cookiejar_from_cookie_headers(headers): def cookiejar_from_cookie_headers(headers):
c = CookieJar() c = CookieJar()
req = urllib2.Request("http://www.example.com/") req = urllib.request.Request("http://www.example.com/")
r = FakeResponse(headers, "http://www.example.com/") r = FakeResponse(headers, "http://www.example.com/")
c.extract_cookies(r, req) c.extract_cookies(r, req)
return c return c
@ -1080,9 +1082,9 @@ def test_netscape_example_1(self):
c = CookieJar(DefaultCookiePolicy(rfc2965 = True)) c = CookieJar(DefaultCookiePolicy(rfc2965 = True))
#req = urllib2.Request("http://1.1.1.1/", #req = urllib.request.Request("http://1.1.1.1/",
# headers={"Host": "www.acme.com:80"}) # headers={"Host": "www.acme.com:80"})
req = urllib2.Request("http://www.acme.com:80/", req = urllib.request.Request("http://www.acme.com:80/",
headers={"Host": "www.acme.com:80"}) headers={"Host": "www.acme.com:80"})
headers.append( headers.append(
@ -1091,7 +1093,7 @@ def test_netscape_example_1(self):
res = FakeResponse(headers, "http://www.acme.com/") res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/") req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req) c.add_cookie_header(req)
self.assertEqual(req.get_header("Cookie"), "CUSTOMER=WILE_E_COYOTE") self.assertEqual(req.get_header("Cookie"), "CUSTOMER=WILE_E_COYOTE")
@ -1101,7 +1103,7 @@ def test_netscape_example_1(self):
res = FakeResponse(headers, "http://www.acme.com/") res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/foo/bar") req = urllib.request.Request("http://www.acme.com/foo/bar")
c.add_cookie_header(req) c.add_cookie_header(req)
h = req.get_header("Cookie") h = req.get_header("Cookie")
@ -1112,7 +1114,7 @@ def test_netscape_example_1(self):
res = FakeResponse(headers, "http://www.acme.com") res = FakeResponse(headers, "http://www.acme.com")
c.extract_cookies(res, req) c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/") req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req) c.add_cookie_header(req)
h = req.get_header("Cookie") h = req.get_header("Cookie")
@ -1120,7 +1122,7 @@ def test_netscape_example_1(self):
"CUSTOMER=WILE_E_COYOTE" in h and "CUSTOMER=WILE_E_COYOTE" in h and
"SHIPPING=FEDEX" not in h) "SHIPPING=FEDEX" not in h)
req = urllib2.Request("http://www.acme.com/foo/") req = urllib.request.Request("http://www.acme.com/foo/")
c.add_cookie_header(req) c.add_cookie_header(req)
h = req.get_header("Cookie") h = req.get_header("Cookie")
@ -1155,13 +1157,13 @@ def test_netscape_example_2(self):
c = CookieJar() c = CookieJar()
headers = [] headers = []
req = urllib2.Request("http://www.acme.com/") req = urllib.request.Request("http://www.acme.com/")
headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/") headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/")
res = FakeResponse(headers, "http://www.acme.com/") res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/") req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req) c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"), self.assertEquals(req.get_header("Cookie"),
@ -1172,7 +1174,7 @@ def test_netscape_example_2(self):
res = FakeResponse(headers, "http://www.acme.com/") res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
req = urllib2.Request("http://www.acme.com/ammo") req = urllib.request.Request("http://www.acme.com/ammo")
c.add_cookie_header(req) c.add_cookie_header(req)
self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*" self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*"
@ -1503,7 +1505,7 @@ def test_netscape_misc(self):
# Some additional Netscape cookies tests. # Some additional Netscape cookies tests.
c = CookieJar() c = CookieJar()
headers = [] headers = []
req = urllib2.Request("http://foo.bar.acme.com/foo") req = urllib.request.Request("http://foo.bar.acme.com/foo")
# Netscape allows a host part that contains dots # Netscape allows a host part that contains dots
headers.append("Set-Cookie: Customer=WILE_E_COYOTE; domain=.acme.com") headers.append("Set-Cookie: Customer=WILE_E_COYOTE; domain=.acme.com")
@ -1517,7 +1519,7 @@ def test_netscape_misc(self):
res = FakeResponse(headers, "http://www.acme.com/foo") res = FakeResponse(headers, "http://www.acme.com/foo")
c.extract_cookies(res, req) c.extract_cookies(res, req)
req = urllib2.Request("http://foo.bar.acme.com/foo") req = urllib.request.Request("http://foo.bar.acme.com/foo")
c.add_cookie_header(req) c.add_cookie_header(req)
self.assert_( self.assert_(
"PART_NUMBER=3,4" in req.get_header("Cookie") and "PART_NUMBER=3,4" in req.get_header("Cookie") and
@ -1559,12 +1561,12 @@ def test_empty_path(self):
c = CookieJar(DefaultCookiePolicy(rfc2965 = True)) c = CookieJar(DefaultCookiePolicy(rfc2965 = True))
headers = [] headers = []
req = urllib2.Request("http://www.ants.com/") req = urllib.request.Request("http://www.ants.com/")
headers.append("Set-Cookie: JSESSIONID=ABCDERANDOM123; Path=") headers.append("Set-Cookie: JSESSIONID=ABCDERANDOM123; Path=")
res = FakeResponse(headers, "http://www.ants.com/") res = FakeResponse(headers, "http://www.ants.com/")
c.extract_cookies(res, req) c.extract_cookies(res, req)
req = urllib2.Request("http://www.ants.com/") req = urllib.request.Request("http://www.ants.com/")
c.add_cookie_header(req) c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"), self.assertEquals(req.get_header("Cookie"),
@ -1572,7 +1574,7 @@ def test_empty_path(self):
self.assertEquals(req.get_header("Cookie2"), '$Version="1"') self.assertEquals(req.get_header("Cookie2"), '$Version="1"')
# missing path in the request URI # missing path in the request URI
req = urllib2.Request("http://www.ants.com:8080") req = urllib.request.Request("http://www.ants.com:8080")
c.add_cookie_header(req) c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"), self.assertEquals(req.get_header("Cookie"),
@ -1585,7 +1587,7 @@ def test_session_cookies(self):
# Check session cookies are deleted properly by # Check session cookies are deleted properly by
# CookieJar.clear_session_cookies method # CookieJar.clear_session_cookies method
req = urllib2.Request('http://www.perlmeister.com/scripts') req = urllib.request.Request('http://www.perlmeister.com/scripts')
headers = [] headers = []
headers.append("Set-Cookie: s1=session;Path=/scripts") headers.append("Set-Cookie: s1=session;Path=/scripts")
headers.append("Set-Cookie: p1=perm; Domain=.perlmeister.com;" headers.append("Set-Cookie: p1=perm; Domain=.perlmeister.com;"

View file

@ -11,7 +11,7 @@
import sys import sys
import base64 import base64
import shutil import shutil
import urllib import urllib.parse
import http.client import http.client
import tempfile import tempfile
import threading import threading
@ -322,7 +322,8 @@ def test_headers_and_content(self):
(res.read(), res.getheader('Content-type'), res.status)) (res.read(), res.getheader('Content-type'), res.status))
def test_post(self): def test_post(self):
params = urllib.urlencode({'spam' : 1, 'eggs' : 'python', 'bacon' : 123456}) params = urllib.parse.urlencode(
{'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
headers = {'Content-type' : 'application/x-www-form-urlencoded'} headers = {'Content-type' : 'application/x-www-form-urlencoded'}
res = self.request('/cgi-bin/file2.py', 'POST', params, headers) res = self.request('/cgi-bin/file2.py', 'POST', params, headers)

View file

@ -247,22 +247,22 @@ def testImpWrapper(self):
i = ImpWrapper() i = ImpWrapper()
sys.meta_path.append(i) sys.meta_path.append(i)
sys.path_hooks.append(ImpWrapper) sys.path_hooks.append(ImpWrapper)
mnames = ("colorsys", "urlparse", "distutils.core") mnames = ("colorsys", "urllib.parse", "distutils.core")
for mname in mnames: for mname in mnames:
parent = mname.split(".")[0] parent = mname.split(".")[0]
for n in list(sys.modules.keys()): for n in list(sys.modules):
if n.startswith(parent): if n.startswith(parent):
del sys.modules[n] del sys.modules[n]
for mname in mnames: for mname in mnames:
m = __import__(mname, globals(), locals(), ["__dummy__"]) m = __import__(mname, globals(), locals(), ["__dummy__"])
m.__loader__ # to make sure we actually handled the import m.__loader__ # to make sure we actually handled the import
# Delete urllib from modules because urlparse was imported above. ## # Delete urllib from modules because urlparse was imported above.
# Without this hack, test_socket_ssl fails if run in this order: ## # Without this hack, test_socket_ssl fails if run in this order:
# regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl ## # regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl
try: ## try:
del sys.modules['urllib'] ## del sys.modules['urllib']
except KeyError: ## except KeyError:
pass ## pass
def test_main(): def test_main():
support.run_unittest(ImportHooksTestCase) support.run_unittest(ImportHooksTestCase)

View file

@ -156,16 +156,6 @@ def test_others(self):
# These were once about the 10 longest modules # These were once about the 10 longest modules
cm('random', ignore=('Random',)) # from _random import Random as CoreGenerator cm('random', ignore=('Random',)) # from _random import Random as CoreGenerator
cm('cgi', ignore=('log',)) # set with = in module cm('cgi', ignore=('log',)) # set with = in module
cm('urllib', ignore=('_CFNumberToInt32',
'_CStringFromCFString',
'_CFSetup',
'getproxies_registry',
'proxy_bypass_registry',
'proxy_bypass_macosx_sysconf',
'open_https',
'_https_connection',
'getproxies_macosx_sysconf',
'getproxies_internetconfig',)) # not on all platforms
cm('pickle') cm('pickle')
cm('aifc', ignore=('openfp',)) # set with = in module cm('aifc', ignore=('openfp',)) # set with = in module
cm('sre_parse', ignore=('dump',)) # from sre_constants import * cm('sre_parse', ignore=('dump',)) # from sre_constants import *

View file

@ -1,5 +1,6 @@
import unittest, robotparser
import io import io
import unittest
import urllib.robotparser
from test import support from test import support
class RobotTestCase(unittest.TestCase): class RobotTestCase(unittest.TestCase):
@ -34,7 +35,7 @@ def RobotTest(index, robots_txt, good_urls, bad_urls,
agent="test_robotparser"): agent="test_robotparser"):
lines = io.StringIO(robots_txt).readlines() lines = io.StringIO(robots_txt).readlines()
parser = robotparser.RobotFileParser() parser = urllib.robotparser.RobotFileParser()
parser.parse(lines) parser.parse(lines)
for url in good_urls: for url in good_urls:
tests.addTest(RobotTestCase(index, parser, url, 1, agent)) tests.addTest(RobotTestCase(index, parser, url, 1, agent))
@ -140,7 +141,7 @@ def runTest(self):
support.requires('network') support.requires('network')
# whole site is password-protected. # whole site is password-protected.
url = 'http://mueblesmoraleda.com' url = 'http://mueblesmoraleda.com'
parser = robotparser.RobotFileParser() parser = urllib.robotparser.RobotFileParser()
parser.set_url(url) parser.set_url(url)
parser.read() parser.read()
self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False) self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)

View file

@ -10,7 +10,7 @@
import time import time
import os import os
import pprint import pprint
import urllib, urlparse import urllib.parse, urllib.request
import shutil import shutil
import traceback import traceback
import asyncore import asyncore
@ -440,8 +440,8 @@ def translate_path(self, path):
""" """
# abandon query parameters # abandon query parameters
path = urlparse.urlparse(path)[2] path = urllib.parse.urlparse(path)[2]
path = os.path.normpath(urllib.unquote(path)) path = os.path.normpath(urllib.parse.unquote(path))
words = path.split('/') words = path.split('/')
words = filter(None, words) words = filter(None, words)
path = self.root path = self.root
@ -943,7 +943,7 @@ def testSocketServer(self):
# now fetch the same data from the HTTPS server # now fetch the same data from the HTTPS server
url = 'https://%s:%d/%s' % ( url = 'https://%s:%d/%s' % (
HOST, server.port, os.path.split(CERTFILE)[1]) HOST, server.port, os.path.split(CERTFILE)[1])
f = urllib.urlopen(url) f = urllib.request.urlopen(url)
dlen = f.info().get("content-length") dlen = f.info().get("content-length")
if dlen and (int(dlen) > 0): if dlen and (int(dlen) > 0):
d2 = f.read(int(dlen)) d2 = f.read(int(dlen))

View file

@ -1,6 +1,7 @@
"""Regresssion tests for urllib""" """Regresssion tests for urllib"""
import urllib import urllib.parse
import urllib.request
import http.client import http.client
import email.message import email.message
import io import io
@ -16,6 +17,23 @@ def hexescape(char):
hex_repr = "0%s" % hex_repr hex_repr = "0%s" % hex_repr
return "%" + hex_repr return "%" + hex_repr
# Shortcut for testing FancyURLopener
_urlopener = None
def urlopen(url, data=None, proxies=None):
"""urlopen(url [, data]) -> open file-like object"""
global _urlopener
if proxies is not None:
opener = urllib.request.FancyURLopener(proxies=proxies)
elif not _urlopener:
opener = urllib.request.FancyURLopener()
_urlopener = opener
else:
opener = _urlopener
if data is None:
return opener.open(url)
else:
return opener.open(url, data)
class urlopen_FileTests(unittest.TestCase): class urlopen_FileTests(unittest.TestCase):
"""Test urlopen() opening a temporary file. """Test urlopen() opening a temporary file.
@ -25,15 +43,16 @@ class urlopen_FileTests(unittest.TestCase):
""" """
def setUp(self): def setUp(self):
"""Setup of a temp file to use for testing""" # Create a temp file to use for testing
self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, "ascii") self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
FILE = open(support.TESTFN, 'wb') "ascii")
f = open(support.TESTFN, 'wb')
try: try:
FILE.write(self.text) f.write(self.text)
finally: finally:
FILE.close() f.close()
self.pathname = support.TESTFN self.pathname = support.TESTFN
self.returned_obj = urllib.urlopen("file:%s" % self.pathname) self.returned_obj = urlopen("file:%s" % self.pathname)
def tearDown(self): def tearDown(self):
"""Shut down the open object""" """Shut down the open object"""
@ -119,7 +138,7 @@ def unfakehttp(self):
def test_read(self): def test_read(self):
self.fakehttp(b"Hello!") self.fakehttp(b"Hello!")
try: try:
fp = urllib.urlopen("http://python.org/") fp = urlopen("http://python.org/")
self.assertEqual(fp.readline(), b"Hello!") self.assertEqual(fp.readline(), b"Hello!")
self.assertEqual(fp.readline(), b"") self.assertEqual(fp.readline(), b"")
self.assertEqual(fp.geturl(), 'http://python.org/') self.assertEqual(fp.geturl(), 'http://python.org/')
@ -136,7 +155,7 @@ def test_read_bogus(self):
Content-Type: text/html; charset=iso-8859-1 Content-Type: text/html; charset=iso-8859-1
''') ''')
try: try:
self.assertRaises(IOError, urllib.urlopen, "http://python.org/") self.assertRaises(IOError, urlopen, "http://python.org/")
finally: finally:
self.unfakehttp() self.unfakehttp()
@ -145,7 +164,7 @@ def test_empty_socket(self):
# data. (#1680230) # data. (#1680230)
self.fakehttp(b'') self.fakehttp(b'')
try: try:
self.assertRaises(IOError, urllib.urlopen, "http://something") self.assertRaises(IOError, urlopen, "http://something")
finally: finally:
self.unfakehttp() self.unfakehttp()
@ -180,7 +199,8 @@ def tearDown(self):
except: pass except: pass
def constructLocalFileUrl(self, filePath): def constructLocalFileUrl(self, filePath):
return "file://%s" % urllib.pathname2url(os.path.abspath(filePath)) return "file://%s" % urllib.request.pathname2url(
os.path.abspath(filePath))
def createNewTempFile(self, data=b""): def createNewTempFile(self, data=b""):
"""Creates a new temporary file containing the specified data, """Creates a new temporary file containing the specified data,
@ -204,7 +224,7 @@ def registerFileForCleanUp(self, fileName):
def test_basic(self): def test_basic(self):
# Make sure that a local file just gets its own location returned and # Make sure that a local file just gets its own location returned and
# a headers value is returned. # a headers value is returned.
result = urllib.urlretrieve("file:%s" % support.TESTFN) result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
self.assertEqual(result[0], support.TESTFN) self.assertEqual(result[0], support.TESTFN)
self.assert_(isinstance(result[1], email.message.Message), self.assert_(isinstance(result[1], email.message.Message),
"did not get a email.message.Message instance as second " "did not get a email.message.Message instance as second "
@ -214,7 +234,7 @@ def test_copy(self):
# Test that setting the filename argument works. # Test that setting the filename argument works.
second_temp = "%s.2" % support.TESTFN second_temp = "%s.2" % support.TESTFN
self.registerFileForCleanUp(second_temp) self.registerFileForCleanUp(second_temp)
result = urllib.urlretrieve(self.constructLocalFileUrl( result = urllib.request.urlretrieve(self.constructLocalFileUrl(
support.TESTFN), second_temp) support.TESTFN), second_temp)
self.assertEqual(second_temp, result[0]) self.assertEqual(second_temp, result[0])
self.assert_(os.path.exists(second_temp), "copy of the file was not " self.assert_(os.path.exists(second_temp), "copy of the file was not "
@ -238,7 +258,8 @@ def hooktester(count, block_size, total_size, count_holder=[0]):
count_holder[0] = count_holder[0] + 1 count_holder[0] = count_holder[0] + 1
second_temp = "%s.2" % support.TESTFN second_temp = "%s.2" % support.TESTFN
self.registerFileForCleanUp(second_temp) self.registerFileForCleanUp(second_temp)
urllib.urlretrieve(self.constructLocalFileUrl(support.TESTFN), urllib.request.urlretrieve(
self.constructLocalFileUrl(support.TESTFN),
second_temp, hooktester) second_temp, hooktester)
def test_reporthook_0_bytes(self): def test_reporthook_0_bytes(self):
@ -247,7 +268,7 @@ def test_reporthook_0_bytes(self):
def hooktester(count, block_size, total_size, _report=report): def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size)) _report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile() srcFileName = self.createNewTempFile()
urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester) support.TESTFN, hooktester)
self.assertEqual(len(report), 1) self.assertEqual(len(report), 1)
self.assertEqual(report[0][2], 0) self.assertEqual(report[0][2], 0)
@ -261,7 +282,7 @@ def test_reporthook_5_bytes(self):
def hooktester(count, block_size, total_size, _report=report): def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size)) _report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile(b"x" * 5) srcFileName = self.createNewTempFile(b"x" * 5)
urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester) support.TESTFN, hooktester)
self.assertEqual(len(report), 2) self.assertEqual(len(report), 2)
self.assertEqual(report[0][1], 8192) self.assertEqual(report[0][1], 8192)
@ -275,7 +296,7 @@ def test_reporthook_8193_bytes(self):
def hooktester(count, block_size, total_size, _report=report): def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size)) _report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile(b"x" * 8193) srcFileName = self.createNewTempFile(b"x" * 8193)
urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester) support.TESTFN, hooktester)
self.assertEqual(len(report), 3) self.assertEqual(len(report), 3)
self.assertEqual(report[0][1], 8192) self.assertEqual(report[0][1], 8192)
@ -284,10 +305,10 @@ def hooktester(count, block_size, total_size, _report=report):
class QuotingTests(unittest.TestCase): class QuotingTests(unittest.TestCase):
"""Tests for urllib.quote() and urllib.quote_plus() """Tests for urllib.quote() and urllib.quote_plus()
According to RFC 2396 ("Uniform Resource Identifiers), to escape a According to RFC 2396 (Uniform Resource Identifiers), to escape a
character you write it as '%' + <2 character US-ASCII hex value>. The Python character you write it as '%' + <2 character US-ASCII hex value>.
code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly. The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
Case does not matter on the hex letters. character properly. Case does not matter on the hex letters.
The various character sets specified are: The various character sets specified are:
@ -313,24 +334,24 @@ def test_never_quote(self):
"abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz",
"0123456789", "0123456789",
"_.-"]) "_.-"])
result = urllib.quote(do_not_quote) result = urllib.parse.quote(do_not_quote)
self.assertEqual(do_not_quote, result, self.assertEqual(do_not_quote, result,
"using quote(): %s != %s" % (do_not_quote, result)) "using quote(): %s != %s" % (do_not_quote, result))
result = urllib.quote_plus(do_not_quote) result = urllib.parse.quote_plus(do_not_quote)
self.assertEqual(do_not_quote, result, self.assertEqual(do_not_quote, result,
"using quote_plus(): %s != %s" % (do_not_quote, result)) "using quote_plus(): %s != %s" % (do_not_quote, result))
def test_default_safe(self): def test_default_safe(self):
# Test '/' is default value for 'safe' parameter # Test '/' is default value for 'safe' parameter
self.assertEqual(urllib.quote.__defaults__[0], '/') self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
def test_safe(self): def test_safe(self):
# Test setting 'safe' parameter does what it should do # Test setting 'safe' parameter does what it should do
quote_by_default = "<>" quote_by_default = "<>"
result = urllib.quote(quote_by_default, safe=quote_by_default) result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
self.assertEqual(quote_by_default, result, self.assertEqual(quote_by_default, result,
"using quote(): %s != %s" % (quote_by_default, result)) "using quote(): %s != %s" % (quote_by_default, result))
result = urllib.quote_plus(quote_by_default, safe=quote_by_default) result = urllib.parse.quote_plus(quote_by_default, safe=quote_by_default)
self.assertEqual(quote_by_default, result, self.assertEqual(quote_by_default, result,
"using quote_plus(): %s != %s" % "using quote_plus(): %s != %s" %
(quote_by_default, result)) (quote_by_default, result))
@ -343,11 +364,11 @@ def test_default_quoting(self):
should_quote.append(chr(127)) # For 0x7F should_quote.append(chr(127)) # For 0x7F
should_quote = ''.join(should_quote) should_quote = ''.join(should_quote)
for char in should_quote: for char in should_quote:
result = urllib.quote(char) result = urllib.parse.quote(char)
self.assertEqual(hexescape(char), result, self.assertEqual(hexescape(char), result,
"using quote(): %s should be escaped to %s, not %s" % "using quote(): %s should be escaped to %s, not %s" %
(char, hexescape(char), result)) (char, hexescape(char), result))
result = urllib.quote_plus(char) result = urllib.parse.quote_plus(char)
self.assertEqual(hexescape(char), result, self.assertEqual(hexescape(char), result,
"using quote_plus(): " "using quote_plus(): "
"%s should be escapes to %s, not %s" % "%s should be escapes to %s, not %s" %
@ -355,7 +376,7 @@ def test_default_quoting(self):
del should_quote del should_quote
partial_quote = "ab[]cd" partial_quote = "ab[]cd"
expected = "ab%5B%5Dcd" expected = "ab%5B%5Dcd"
result = urllib.quote(partial_quote) result = urllib.parse.quote(partial_quote)
self.assertEqual(expected, result, self.assertEqual(expected, result,
"using quote(): %s != %s" % (expected, result)) "using quote(): %s != %s" % (expected, result))
self.assertEqual(expected, result, self.assertEqual(expected, result,
@ -364,26 +385,26 @@ def test_default_quoting(self):
def test_quoting_space(self): def test_quoting_space(self):
# Make sure quote() and quote_plus() handle spaces as specified in # Make sure quote() and quote_plus() handle spaces as specified in
# their unique way # their unique way
result = urllib.quote(' ') result = urllib.parse.quote(' ')
self.assertEqual(result, hexescape(' '), self.assertEqual(result, hexescape(' '),
"using quote(): %s != %s" % (result, hexescape(' '))) "using quote(): %s != %s" % (result, hexescape(' ')))
result = urllib.quote_plus(' ') result = urllib.parse.quote_plus(' ')
self.assertEqual(result, '+', self.assertEqual(result, '+',
"using quote_plus(): %s != +" % result) "using quote_plus(): %s != +" % result)
given = "a b cd e f" given = "a b cd e f"
expect = given.replace(' ', hexescape(' ')) expect = given.replace(' ', hexescape(' '))
result = urllib.quote(given) result = urllib.parse.quote(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using quote(): %s != %s" % (expect, result)) "using quote(): %s != %s" % (expect, result))
expect = given.replace(' ', '+') expect = given.replace(' ', '+')
result = urllib.quote_plus(given) result = urllib.parse.quote_plus(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using quote_plus(): %s != %s" % (expect, result)) "using quote_plus(): %s != %s" % (expect, result))
def test_quoting_plus(self): def test_quoting_plus(self):
self.assertEqual(urllib.quote_plus('alpha+beta gamma'), self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
'alpha%2Bbeta+gamma') 'alpha%2Bbeta+gamma')
self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'), self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
'alpha+beta+gamma') 'alpha+beta+gamma')
class UnquotingTests(unittest.TestCase): class UnquotingTests(unittest.TestCase):
@ -399,21 +420,21 @@ def test_unquoting(self):
for num in range(128): for num in range(128):
given = hexescape(chr(num)) given = hexescape(chr(num))
expect = chr(num) expect = chr(num)
result = urllib.unquote(given) result = urllib.parse.unquote(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using unquote(): %s != %s" % (expect, result)) "using unquote(): %s != %s" % (expect, result))
result = urllib.unquote_plus(given) result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" % "using unquote_plus(): %s != %s" %
(expect, result)) (expect, result))
escape_list.append(given) escape_list.append(given)
escape_string = ''.join(escape_list) escape_string = ''.join(escape_list)
del escape_list del escape_list
result = urllib.unquote(escape_string) result = urllib.parse.unquote(escape_string)
self.assertEqual(result.count('%'), 1, self.assertEqual(result.count('%'), 1,
"using quote(): not all characters escaped; %s" % "using quote(): not all characters escaped; %s" %
result) result)
result = urllib.unquote(escape_string) result = urllib.parse.unquote(escape_string)
self.assertEqual(result.count('%'), 1, self.assertEqual(result.count('%'), 1,
"using unquote(): not all characters escaped: " "using unquote(): not all characters escaped: "
"%s" % result) "%s" % result)
@ -423,10 +444,10 @@ def test_unquoting_parts(self):
# interspersed # interspersed
given = 'ab%sd' % hexescape('c') given = 'ab%sd' % hexescape('c')
expect = "abcd" expect = "abcd"
result = urllib.unquote(given) result = urllib.parse.unquote(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using quote(): %s != %s" % (expect, result)) "using quote(): %s != %s" % (expect, result))
result = urllib.unquote_plus(given) result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" % (expect, result)) "using unquote_plus(): %s != %s" % (expect, result))
@ -434,16 +455,16 @@ def test_unquoting_plus(self):
# Test difference between unquote() and unquote_plus() # Test difference between unquote() and unquote_plus()
given = "are+there+spaces..." given = "are+there+spaces..."
expect = given expect = given
result = urllib.unquote(given) result = urllib.parse.unquote(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using unquote(): %s != %s" % (expect, result)) "using unquote(): %s != %s" % (expect, result))
expect = given.replace('+', ' ') expect = given.replace('+', ' ')
result = urllib.unquote_plus(given) result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" % (expect, result)) "using unquote_plus(): %s != %s" % (expect, result))
def test_unquote_with_unicode(self): def test_unquote_with_unicode(self):
r = urllib.unquote('br%C3%BCckner_sapporo_20050930.doc') r = urllib.parse.unquote('br%C3%BCckner_sapporo_20050930.doc')
self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc') self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc')
class urlencode_Tests(unittest.TestCase): class urlencode_Tests(unittest.TestCase):
@ -462,7 +483,7 @@ def help_inputtype(self, given, test_type):
""" """
expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
result = urllib.urlencode(given) result = urllib.parse.urlencode(given)
for expected in expect_somewhere: for expected in expect_somewhere:
self.assert_(expected in result, self.assert_(expected in result,
"testing %s: %s not found in %s" % "testing %s: %s not found in %s" %
@ -495,20 +516,20 @@ def test_quoting(self):
# Make sure keys and values are quoted using quote_plus() # Make sure keys and values are quoted using quote_plus()
given = {"&":"="} given = {"&":"="}
expect = "%s=%s" % (hexescape('&'), hexescape('=')) expect = "%s=%s" % (hexescape('&'), hexescape('='))
result = urllib.urlencode(given) result = urllib.parse.urlencode(given)
self.assertEqual(expect, result) self.assertEqual(expect, result)
given = {"key name":"A bunch of pluses"} given = {"key name":"A bunch of pluses"}
expect = "key+name=A+bunch+of+pluses" expect = "key+name=A+bunch+of+pluses"
result = urllib.urlencode(given) result = urllib.parse.urlencode(given)
self.assertEqual(expect, result) self.assertEqual(expect, result)
def test_doseq(self): def test_doseq(self):
# Test that passing True for 'doseq' parameter works correctly # Test that passing True for 'doseq' parameter works correctly
given = {'sequence':['1', '2', '3']} given = {'sequence':['1', '2', '3']}
expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3'])) expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
result = urllib.urlencode(given) result = urllib.parse.urlencode(given)
self.assertEqual(expect, result) self.assertEqual(expect, result)
result = urllib.urlencode(given, True) result = urllib.parse.urlencode(given, True)
for value in given["sequence"]: for value in given["sequence"]:
expect = "sequence=%s" % value expect = "sequence=%s" % value
self.assert_(expect in result, self.assert_(expect in result,
@ -523,11 +544,11 @@ def test_basic(self):
# Make sure simple tests pass # Make sure simple tests pass
expected_path = os.path.join("parts", "of", "a", "path") expected_path = os.path.join("parts", "of", "a", "path")
expected_url = "parts/of/a/path" expected_url = "parts/of/a/path"
result = urllib.pathname2url(expected_path) result = urllib.request.pathname2url(expected_path)
self.assertEqual(expected_url, result, self.assertEqual(expected_url, result,
"pathname2url() failed; %s != %s" % "pathname2url() failed; %s != %s" %
(result, expected_url)) (result, expected_url))
result = urllib.url2pathname(expected_url) result = urllib.request.url2pathname(expected_url)
self.assertEqual(expected_path, result, self.assertEqual(expected_path, result,
"url2pathame() failed; %s != %s" % "url2pathame() failed; %s != %s" %
(result, expected_path)) (result, expected_path))
@ -536,25 +557,25 @@ def test_quoting(self):
# Test automatic quoting and unquoting works for pathnam2url() and # Test automatic quoting and unquoting works for pathnam2url() and
# url2pathname() respectively # url2pathname() respectively
given = os.path.join("needs", "quot=ing", "here") given = os.path.join("needs", "quot=ing", "here")
expect = "needs/%s/here" % urllib.quote("quot=ing") expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
result = urllib.pathname2url(given) result = urllib.request.pathname2url(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" % "pathname2url() failed; %s != %s" %
(expect, result)) (expect, result))
expect = given expect = given
result = urllib.url2pathname(result) result = urllib.request.url2pathname(result)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" % "url2pathname() failed; %s != %s" %
(expect, result)) (expect, result))
given = os.path.join("make sure", "using_quote") given = os.path.join("make sure", "using_quote")
expect = "%s/using_quote" % urllib.quote("make sure") expect = "%s/using_quote" % urllib.parse.quote("make sure")
result = urllib.pathname2url(given) result = urllib.request.pathname2url(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" % "pathname2url() failed; %s != %s" %
(expect, result)) (expect, result))
given = "make+sure/using_unquote" given = "make+sure/using_unquote"
expect = os.path.join("make+sure", "using_unquote") expect = os.path.join("make+sure", "using_unquote")
result = urllib.url2pathname(given) result = urllib.request.url2pathname(given)
self.assertEqual(expect, result, self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" % "url2pathname() failed; %s != %s" %
(expect, result)) (expect, result))

View file

@ -5,8 +5,8 @@
import io import io
import socket import socket
import urllib2 import urllib.request
from urllib2 import Request, OpenerDirector from urllib.request import Request, OpenerDirector
# XXX # XXX
# Request # Request
@ -17,10 +17,10 @@ class TrivialTests(unittest.TestCase):
def test_trivial(self): def test_trivial(self):
# A couple trivial tests # A couple trivial tests
self.assertRaises(ValueError, urllib2.urlopen, 'bogus url') self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
# XXX Name hacking to get this to work on Windows. # XXX Name hacking to get this to work on Windows.
fname = os.path.abspath(urllib2.__file__).replace('\\', '/') fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
if fname[1:2] == ":": if fname[1:2] == ":":
fname = fname[2:] fname = fname[2:]
# And more hacking to get it to work on MacOS. This assumes # And more hacking to get it to work on MacOS. This assumes
@ -29,18 +29,21 @@ def test_trivial(self):
fname = '/' + fname.replace(':', '/') fname = '/' + fname.replace(':', '/')
file_url = "file://%s" % fname file_url = "file://%s" % fname
f = urllib2.urlopen(file_url) f = urllib.request.urlopen(file_url)
buf = f.read() buf = f.read()
f.close() f.close()
def test_parse_http_list(self): def test_parse_http_list(self):
tests = [('a,b,c', ['a', 'b', 'c']), tests = [
('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']), ('a,b,c', ['a', 'b', 'c']),
('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']), ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])] ('a, b, "c", "d", "e,f", g, h',
['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
('a="b\\"c", d="e\\,f", g="h\\\\i"',
['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
for string, list in tests: for string, list in tests:
self.assertEquals(urllib2.parse_http_list(string), list) self.assertEquals(urllib.request.parse_http_list(string), list)
def test_request_headers_dict(): def test_request_headers_dict():
@ -107,7 +110,7 @@ def test_request_headers_methods():
def test_password_manager(self): def test_password_manager(self):
""" """
>>> mgr = urllib2.HTTPPasswordMgr() >>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password >>> add = mgr.add_password
>>> add("Some Realm", "http://example.com/", "joe", "password") >>> add("Some Realm", "http://example.com/", "joe", "password")
>>> add("Some Realm", "http://example.com/ni", "ni", "ni") >>> add("Some Realm", "http://example.com/ni", "ni", "ni")
@ -172,7 +175,7 @@ def test_password_manager(self):
def test_password_manager_default_port(self): def test_password_manager_default_port(self):
""" """
>>> mgr = urllib2.HTTPPasswordMgr() >>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password >>> add = mgr.add_password
The point to note here is that we can't guess the default port if there's The point to note here is that we can't guess the default port if there's
@ -288,7 +291,7 @@ def handle(self, fn_name, action, *args, **kwds):
res = MockResponse(200, "OK", {}, "") res = MockResponse(200, "OK", {}, "")
return self.parent.error("http", args[0], res, code, "", {}) return self.parent.error("http", args[0], res, code, "", {})
elif action == "raise": elif action == "raise":
raise urllib2.URLError("blah") raise urllib.error.URLError("blah")
assert False assert False
def close(self): pass def close(self): pass
def add_parent(self, parent): def add_parent(self, parent):
@ -337,7 +340,7 @@ def build_test_opener(*handler_instances):
opener.add_handler(h) opener.add_handler(h)
return opener return opener
class MockHTTPHandler(urllib2.BaseHandler): class MockHTTPHandler(urllib.request.BaseHandler):
# useful for testing redirections and auth # useful for testing redirections and auth
# sends supplied headers and code as first response # sends supplied headers and code as first response
# sends 200 OK as second response # sends 200 OK as second response
@ -392,7 +395,7 @@ def test_badly_named_methods(self):
# TypeError in real code; here, returning self from these mock # TypeError in real code; here, returning self from these mock
# methods would either cause no exception, or AttributeError. # methods would either cause no exception, or AttributeError.
from urllib2 import URLError from urllib.error import URLError
o = OpenerDirector() o = OpenerDirector()
meth_spec = [ meth_spec = [
@ -400,7 +403,7 @@ def test_badly_named_methods(self):
[("redirect_request", "return self")], [("redirect_request", "return self")],
] ]
handlers = add_ordered_mock_handlers(o, meth_spec) handlers = add_ordered_mock_handlers(o, meth_spec)
o.add_handler(urllib2.UnknownHandler()) o.add_handler(urllib.request.UnknownHandler())
for scheme in "do", "proxy", "redirect": for scheme in "do", "proxy", "redirect":
self.assertRaises(URLError, o.open, scheme+"://example.com/") self.assertRaises(URLError, o.open, scheme+"://example.com/")
@ -458,7 +461,7 @@ def test_raise(self):
handlers = add_ordered_mock_handlers(o, meth_spec) handlers = add_ordered_mock_handlers(o, meth_spec)
req = Request("http://example.com/") req = Request("http://example.com/")
self.assertRaises(urllib2.URLError, o.open, req) self.assertRaises(urllib.error.URLError, o.open, req)
self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})]) self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
## def test_error(self): ## def test_error(self):
@ -529,8 +532,7 @@ def test_processors(self):
def sanepathname2url(path): def sanepathname2url(path):
import urllib urlpath = urllib.request.pathname2url(path)
urlpath = urllib.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"): if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:] urlpath = urlpath[2:]
# XXX don't ask me about the mac... # XXX don't ask me about the mac...
@ -545,7 +547,7 @@ def retrfile(self, filename, filetype):
self.filename, self.filetype = filename, filetype self.filename, self.filetype = filename, filetype
return io.StringIO(self.data), len(self.data) return io.StringIO(self.data), len(self.data)
class NullFTPHandler(urllib2.FTPHandler): class NullFTPHandler(urllib.request.FTPHandler):
def __init__(self, data): self.data = data def __init__(self, data): self.data = data
def connect_ftp(self, user, passwd, host, port, dirs, def connect_ftp(self, user, passwd, host, port, dirs,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT): timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
@ -587,7 +589,7 @@ def connect_ftp(self, user, passwd, host, port, dirs,
def test_file(self): def test_file(self):
import email.utils, socket import email.utils, socket
h = urllib2.FileHandler() h = urllib.request.FileHandler()
o = h.parent = MockOpener() o = h.parent = MockOpener()
TESTFN = support.TESTFN TESTFN = support.TESTFN
@ -644,12 +646,12 @@ def test_file(self):
finally: finally:
f.close() f.close()
self.assertRaises(urllib2.URLError, self.assertRaises(urllib.error.URLError,
h.file_open, Request(url)) h.file_open, Request(url))
finally: finally:
os.remove(TESTFN) os.remove(TESTFN)
h = urllib2.FileHandler() h = urllib.request.FileHandler()
o = h.parent = MockOpener() o = h.parent = MockOpener()
# XXXX why does // mean ftp (and /// mean not ftp!), and where # XXXX why does // mean ftp (and /// mean not ftp!), and where
# is file: scheme specified? I think this is really a bug, and # is file: scheme specified? I think this is really a bug, and
@ -668,7 +670,7 @@ def test_file(self):
try: try:
h.file_open(req) h.file_open(req)
# XXXX remove OSError when bug fixed # XXXX remove OSError when bug fixed
except (urllib2.URLError, OSError): except (urllib.error.URLError, OSError):
self.assert_(not ftp) self.assert_(not ftp)
else: else:
self.assert_(o.req is req) self.assert_(o.req is req)
@ -685,6 +687,7 @@ def read(self):
return '' return ''
class MockHTTPClass: class MockHTTPClass:
def __init__(self): def __init__(self):
self.level = 0
self.req_headers = [] self.req_headers = []
self.data = None self.data = None
self.raise_on_endheaders = False self.raise_on_endheaders = False
@ -707,7 +710,7 @@ def request(self, method, url, body=None, headers={}):
def getresponse(self): def getresponse(self):
return MockHTTPResponse(MockFile(), {}, 200, "OK") return MockHTTPResponse(MockFile(), {}, 200, "OK")
h = urllib2.AbstractHTTPHandler() h = urllib.request.AbstractHTTPHandler()
o = h.parent = MockOpener() o = h.parent = MockOpener()
url = "http://example.com/" url = "http://example.com/"
@ -737,7 +740,7 @@ def getresponse(self):
# check socket.error converted to URLError # check socket.error converted to URLError
http.raise_on_endheaders = True http.raise_on_endheaders = True
self.assertRaises(urllib2.URLError, h.do_open, http, req) self.assertRaises(urllib.error.URLError, h.do_open, http, req)
# check adding of standard headers # check adding of standard headers
o.addheaders = [("Spam", "eggs")] o.addheaders = [("Spam", "eggs")]
@ -768,7 +771,7 @@ def getresponse(self):
self.assertEqual(req.unredirected_hdrs["Spam"], "foo") self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
def test_errors(self): def test_errors(self):
h = urllib2.HTTPErrorProcessor() h = urllib.request.HTTPErrorProcessor()
o = h.parent = MockOpener() o = h.parent = MockOpener()
url = "http://example.com/" url = "http://example.com/"
@ -794,7 +797,7 @@ def test_errors(self):
def test_cookies(self): def test_cookies(self):
cj = MockCookieJar() cj = MockCookieJar()
h = urllib2.HTTPCookieProcessor(cj) h = urllib.request.HTTPCookieProcessor(cj)
o = h.parent = MockOpener() o = h.parent = MockOpener()
req = Request("http://example.com/") req = Request("http://example.com/")
@ -810,7 +813,7 @@ def test_cookies(self):
def test_redirect(self): def test_redirect(self):
from_url = "http://example.com/a.html" from_url = "http://example.com/a.html"
to_url = "http://example.com/b.html" to_url = "http://example.com/b.html"
h = urllib2.HTTPRedirectHandler() h = urllib.request.HTTPRedirectHandler()
o = h.parent = MockOpener() o = h.parent = MockOpener()
# ordinary redirect behaviour # ordinary redirect behaviour
@ -825,7 +828,7 @@ def test_redirect(self):
try: try:
method(req, MockFile(), code, "Blah", method(req, MockFile(), code, "Blah",
MockHeaders({"location": to_url})) MockHeaders({"location": to_url}))
except urllib2.HTTPError: except urllib.error.HTTPError:
# 307 in response to POST requires user OK # 307 in response to POST requires user OK
self.assert_(code == 307 and data is not None) self.assert_(code == 307 and data is not None)
self.assertEqual(o.req.get_full_url(), to_url) self.assertEqual(o.req.get_full_url(), to_url)
@ -860,9 +863,9 @@ def redirect(h, req, url=to_url):
while 1: while 1:
redirect(h, req, "http://example.com/") redirect(h, req, "http://example.com/")
count = count + 1 count = count + 1
except urllib2.HTTPError: except urllib.error.HTTPError:
# don't stop until max_repeats, because cookies may introduce state # don't stop until max_repeats, because cookies may introduce state
self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats) self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
# detect endless non-repeating chain of redirects # detect endless non-repeating chain of redirects
req = Request(from_url, origin_req_host="example.com") req = Request(from_url, origin_req_host="example.com")
@ -871,9 +874,9 @@ def redirect(h, req, url=to_url):
while 1: while 1:
redirect(h, req, "http://example.com/%d" % count) redirect(h, req, "http://example.com/%d" % count)
count = count + 1 count = count + 1
except urllib2.HTTPError: except urllib.error.HTTPError:
self.assertEqual(count, self.assertEqual(count,
urllib2.HTTPRedirectHandler.max_redirections) urllib.request.HTTPRedirectHandler.max_redirections)
def test_cookie_redirect(self): def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests # cookies shouldn't leak into redirected requests
@ -883,16 +886,16 @@ def test_cookie_redirect(self):
cj = CookieJar() cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs") interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n") hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
hdeh = urllib2.HTTPDefaultErrorHandler() hdeh = urllib.request.HTTPDefaultErrorHandler()
hrh = urllib2.HTTPRedirectHandler() hrh = urllib.request.HTTPRedirectHandler()
cp = urllib2.HTTPCookieProcessor(cj) cp = urllib.request.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp) o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/") o.open("http://www.example.com/")
self.assert_(not hh.req.has_header("Cookie")) self.assert_(not hh.req.has_header("Cookie"))
def test_proxy(self): def test_proxy(self):
o = OpenerDirector() o = OpenerDirector()
ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128")) ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
o.add_handler(ph) o.add_handler(ph)
meth_spec = [ meth_spec = [
[("http_open", "return response")] [("http_open", "return response")]
@ -910,7 +913,7 @@ def test_proxy(self):
def test_basic_auth(self, quote_char='"'): def test_basic_auth(self, quote_char='"'):
opener = OpenerDirector() opener = OpenerDirector()
password_manager = MockPasswordManager() password_manager = MockPasswordManager()
auth_handler = urllib2.HTTPBasicAuthHandler(password_manager) auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
realm = "ACME Widget Store" realm = "ACME Widget Store"
http_handler = MockHTTPHandler( http_handler = MockHTTPHandler(
401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' % 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
@ -928,10 +931,10 @@ def test_basic_auth_with_single_quoted_realm(self):
def test_proxy_basic_auth(self): def test_proxy_basic_auth(self):
opener = OpenerDirector() opener = OpenerDirector()
ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128")) ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
opener.add_handler(ph) opener.add_handler(ph)
password_manager = MockPasswordManager() password_manager = MockPasswordManager()
auth_handler = urllib2.ProxyBasicAuthHandler(password_manager) auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
realm = "ACME Networks" realm = "ACME Networks"
http_handler = MockHTTPHandler( http_handler = MockHTTPHandler(
407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
@ -958,15 +961,15 @@ def __init__(self):
self.recorded = [] self.recorded = []
def record(self, info): def record(self, info):
self.recorded.append(info) self.recorded.append(info)
class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler): class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
def http_error_401(self, *args, **kwds): def http_error_401(self, *args, **kwds):
self.parent.record("digest") self.parent.record("digest")
urllib2.HTTPDigestAuthHandler.http_error_401(self, urllib.request.HTTPDigestAuthHandler.http_error_401(self,
*args, **kwds) *args, **kwds)
class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler): class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
def http_error_401(self, *args, **kwds): def http_error_401(self, *args, **kwds):
self.parent.record("basic") self.parent.record("basic")
urllib2.HTTPBasicAuthHandler.http_error_401(self, urllib.request.HTTPBasicAuthHandler.http_error_401(self,
*args, **kwds) *args, **kwds)
opener = RecordingOpenerDirector() opener = RecordingOpenerDirector()
@ -1030,13 +1033,13 @@ def _test_basic_auth(self, opener, auth_handler, auth_header,
class MiscTests(unittest.TestCase): class MiscTests(unittest.TestCase):
def test_build_opener(self): def test_build_opener(self):
class MyHTTPHandler(urllib2.HTTPHandler): pass class MyHTTPHandler(urllib.request.HTTPHandler): pass
class FooHandler(urllib2.BaseHandler): class FooHandler(urllib.request.BaseHandler):
def foo_open(self): pass def foo_open(self): pass
class BarHandler(urllib2.BaseHandler): class BarHandler(urllib.request.BaseHandler):
def bar_open(self): pass def bar_open(self): pass
build_opener = urllib2.build_opener build_opener = urllib.request.build_opener
o = build_opener(FooHandler, BarHandler) o = build_opener(FooHandler, BarHandler)
self.opener_has_handler(o, FooHandler) self.opener_has_handler(o, FooHandler)
@ -1054,14 +1057,14 @@ def bar_open(self): pass
# a particular case of overriding: default handlers can be passed # a particular case of overriding: default handlers can be passed
# in explicitly # in explicitly
o = build_opener() o = build_opener()
self.opener_has_handler(o, urllib2.HTTPHandler) self.opener_has_handler(o, urllib.request.HTTPHandler)
o = build_opener(urllib2.HTTPHandler) o = build_opener(urllib.request.HTTPHandler)
self.opener_has_handler(o, urllib2.HTTPHandler) self.opener_has_handler(o, urllib.request.HTTPHandler)
o = build_opener(urllib2.HTTPHandler()) o = build_opener(urllib.request.HTTPHandler())
self.opener_has_handler(o, urllib2.HTTPHandler) self.opener_has_handler(o, urllib.request.HTTPHandler)
# Issue2670: multiple handlers sharing the same base class # Issue2670: multiple handlers sharing the same base class
class MyOtherHTTPHandler(urllib2.HTTPHandler): pass class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
o = build_opener(MyHTTPHandler, MyOtherHTTPHandler) o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
self.opener_has_handler(o, MyHTTPHandler) self.opener_has_handler(o, MyHTTPHandler)
self.opener_has_handler(o, MyOtherHTTPHandler) self.opener_has_handler(o, MyOtherHTTPHandler)
@ -1077,7 +1080,7 @@ def opener_has_handler(self, opener, handler_class):
def test_main(verbose=None): def test_main(verbose=None):
from test import test_urllib2 from test import test_urllib2
support.run_doctest(test_urllib2, verbose) support.run_doctest(test_urllib2, verbose)
support.run_doctest(urllib2, verbose) support.run_doctest(urllib.request, verbose)
tests = (TrivialTests, tests = (TrivialTests,
OpenerDirectorTests, OpenerDirectorTests,
HandlerTests, HandlerTests,

View file

@ -2,8 +2,8 @@
import email import email
import threading import threading
import urlparse import urllib.parse
import urllib2 import urllib.request
import http.server import http.server
import unittest import unittest
import hashlib import hashlib
@ -45,7 +45,7 @@ def __init__(self, request_handler):
self._stop_server = False self._stop_server = False
self.ready = threading.Event() self.ready = threading.Event()
request_handler.protocol_version = "HTTP/1.0" request_handler.protocol_version = "HTTP/1.0"
self.httpd = LoopbackHttpServer(('127.0.0.1', 0), self.httpd = LoopbackHttpServer(("127.0.0.1", 0),
request_handler) request_handler)
#print "Serving HTTP on %s port %s" % (self.httpd.server_name, #print "Serving HTTP on %s port %s" % (self.httpd.server_name,
# self.httpd.server_port) # self.httpd.server_port)
@ -154,11 +154,11 @@ def handle_request(self, request_handler):
if len(self._users) == 0: if len(self._users) == 0:
return True return True
if 'Proxy-Authorization' not in request_handler.headers: if "Proxy-Authorization" not in request_handler.headers:
return self._return_auth_challenge(request_handler) return self._return_auth_challenge(request_handler)
else: else:
auth_dict = self._create_auth_dict( auth_dict = self._create_auth_dict(
request_handler.headers['Proxy-Authorization'] request_handler.headers["Proxy-Authorization"]
) )
if auth_dict["username"] in self._users: if auth_dict["username"] in self._users:
password = self._users[ auth_dict["username"] ] password = self._users[ auth_dict["username"] ]
@ -199,12 +199,12 @@ class FakeProxyHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args): def log_message(self, format, *args):
# Uncomment the next line for debugging. # Uncomment the next line for debugging.
#sys.stderr.write(format % args) # sys.stderr.write(format % args)
pass pass
def do_GET(self): def do_GET(self):
(scm, netloc, path, params, query, fragment) = urlparse.urlparse( (scm, netloc, path, params, query, fragment) = urllib.parse.urlparse(
self.path, 'http') self.path, "http")
self.short_path = path self.short_path = path
if self.digest_auth_handler.handle_request(self): if self.digest_auth_handler.handle_request(self):
self.send_response(200, "OK") self.send_response(200, "OK")
@ -234,9 +234,10 @@ def setUp(self):
self.server.start() self.server.start()
self.server.ready.wait() self.server.ready.wait()
proxy_url = "http://127.0.0.1:%d" % self.server.port proxy_url = "http://127.0.0.1:%d" % self.server.port
handler = urllib2.ProxyHandler({"http" : proxy_url}) handler = urllib.request.ProxyHandler({"http" : proxy_url})
self._digest_auth_handler = urllib2.ProxyDigestAuthHandler() self._digest_auth_handler = urllib.request.ProxyDigestAuthHandler()
self.opener = urllib2.build_opener(handler, self._digest_auth_handler) self.opener = urllib.request.build_opener(
handler, self._digest_auth_handler)
def tearDown(self): def tearDown(self):
self.server.stop() self.server.stop()
@ -245,13 +246,13 @@ def test_proxy_with_bad_password_raises_httperror(self):
self._digest_auth_handler.add_password(self.REALM, self.URL, self._digest_auth_handler.add_password(self.REALM, self.URL,
self.USER, self.PASSWD+"bad") self.USER, self.PASSWD+"bad")
FakeProxyHandler.digest_auth_handler.set_qop("auth") FakeProxyHandler.digest_auth_handler.set_qop("auth")
self.assertRaises(urllib2.HTTPError, self.assertRaises(urllib.error.HTTPError,
self.opener.open, self.opener.open,
self.URL) self.URL)
def test_proxy_with_no_password_raises_httperror(self): def test_proxy_with_no_password_raises_httperror(self):
FakeProxyHandler.digest_auth_handler.set_qop("auth") FakeProxyHandler.digest_auth_handler.set_qop("auth")
self.assertRaises(urllib2.HTTPError, self.assertRaises(urllib.error.HTTPError,
self.opener.open, self.opener.open,
self.URL) self.URL)
@ -270,7 +271,7 @@ def test_proxy_qop_auth_int_works_or_throws_urlerror(self):
FakeProxyHandler.digest_auth_handler.set_qop("auth-int") FakeProxyHandler.digest_auth_handler.set_qop("auth-int")
try: try:
result = self.opener.open(self.URL) result = self.opener.open(self.URL)
except urllib2.URLError: except urllib.error.URLError:
# It's okay if we don't support auth-int, but we certainly # It's okay if we don't support auth-int, but we certainly
# shouldn't receive any kind of exception here other than # shouldn't receive any kind of exception here other than
# a URLError. # a URLError.
@ -296,7 +297,7 @@ def do_GET(self):
self.wfile.write(body) self.wfile.write(body)
def do_POST(self): def do_POST(self):
content_length = self.headers['Content-Length'] content_length = self.headers["Content-Length"]
post_data = self.rfile.read(int(content_length)) post_data = self.rfile.read(int(content_length))
self.do_GET() self.do_GET()
self.requests.append(post_data) self.requests.append(post_data)
@ -311,7 +312,7 @@ def send_head(self):
for (header, value) in headers: for (header, value) in headers:
self.send_header(header, value % self.port) self.send_header(header, value % self.port)
if body: if body:
self.send_header('Content-type', 'text/plain') self.send_header("Content-type", "text/plain")
self.end_headers() self.end_headers()
return body return body
self.end_headers() self.end_headers()
@ -332,7 +333,22 @@ class TestUrlopen(unittest.TestCase):
for transparent redirection have been written. for transparent redirection have been written.
""" """
def start_server(self, responses): def setUp(self):
self.server = None
def tearDown(self):
if self.server is not None:
self.server.stop()
def urlopen(self, url, data=None):
f = urllib.request.urlopen(url, data)
result = f.read()
f.close()
return result
def start_server(self, responses=None):
if responses is None:
responses = [(200, [], b"we don't care")]
handler = GetRequestHandler(responses) handler = GetRequestHandler(responses)
self.server = LoopbackHttpServerThread(handler) self.server = LoopbackHttpServerThread(handler)
@ -342,106 +358,71 @@ def start_server(self, responses):
handler.port = port handler.port = port
return handler return handler
def test_redirection(self): def test_redirection(self):
expected_response = b'We got here...' expected_response = b"We got here..."
responses = [ responses = [
(302, [('Location', 'http://localhost:%s/somewhere_else')], ''), (302, [("Location", "http://localhost:%s/somewhere_else")], ""),
(200, [], expected_response) (200, [], expected_response)
] ]
handler = self.start_server(responses) handler = self.start_server(responses)
data = self.urlopen("http://localhost:%s/" % handler.port)
try: self.assertEquals(data, expected_response)
f = urllib2.urlopen('http://localhost:%s/' % handler.port) self.assertEquals(handler.requests, ["/", "/somewhere_else"])
data = f.read()
f.close()
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/', '/somewhere_else'])
finally:
self.server.stop()
def test_404(self): def test_404(self):
expected_response = b'Bad bad bad...' expected_response = b"Bad bad bad..."
handler = self.start_server([(404, [], expected_response)]) handler = self.start_server([(404, [], expected_response)])
try: try:
try: self.urlopen("http://localhost:%s/weeble" % handler.port)
urllib2.urlopen('http://localhost:%s/weeble' % handler.port) except urllib.error.URLError as f:
except urllib2.URLError as f: data = f.read()
data = f.read() f.close()
f.close() else:
else: self.fail("404 should raise URLError")
self.fail('404 should raise URLError')
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/weeble'])
finally:
self.server.stop()
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ["/weeble"])
def test_200(self): def test_200(self):
expected_response = b'pycon 2008...' expected_response = b"pycon 2008..."
handler = self.start_server([(200, [], expected_response)]) handler = self.start_server([(200, [], expected_response)])
data = self.urlopen("http://localhost:%s/bizarre" % handler.port)
try: self.assertEquals(data, expected_response)
f = urllib2.urlopen('http://localhost:%s/bizarre' % handler.port) self.assertEquals(handler.requests, ["/bizarre"])
data = f.read()
f.close()
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/bizarre'])
finally:
self.server.stop()
def test_200_with_parameters(self): def test_200_with_parameters(self):
expected_response = b'pycon 2008...' expected_response = b"pycon 2008..."
handler = self.start_server([(200, [], expected_response)]) handler = self.start_server([(200, [], expected_response)])
data = self.urlopen("http://localhost:%s/bizarre" % handler.port,
try: b"get=with_feeling")
f = urllib2.urlopen('http://localhost:%s/bizarre' % handler.port, b'get=with_feeling') self.assertEquals(data, expected_response)
data = f.read() self.assertEquals(handler.requests, ["/bizarre", b"get=with_feeling"])
f.close()
self.assertEquals(data, expected_response)
self.assertEquals(handler.requests, ['/bizarre', b'get=with_feeling'])
finally:
self.server.stop()
def test_sending_headers(self): def test_sending_headers(self):
handler = self.start_server([(200, [], b"we don't care")]) handler = self.start_server()
req = urllib.request.Request("http://localhost:%s/" % handler.port,
try: headers={"Range": "bytes=20-39"})
req = urllib2.Request("http://localhost:%s/" % handler.port, urllib.request.urlopen(req)
headers={'Range': 'bytes=20-39'}) self.assertEqual(handler.headers_received["Range"], "bytes=20-39")
urllib2.urlopen(req)
self.assertEqual(handler.headers_received['Range'], 'bytes=20-39')
finally:
self.server.stop()
def test_basic(self): def test_basic(self):
handler = self.start_server([(200, [], b"we don't care")]) handler = self.start_server()
open_url = urllib.request.urlopen("http://localhost:%s" % handler.port)
for attr in ("read", "close", "info", "geturl"):
self.assert_(hasattr(open_url, attr), "object returned from "
"urlopen lacks the %s attribute" % attr)
try: try:
open_url = urllib2.urlopen("http://localhost:%s" % handler.port) self.assert_(open_url.read(), "calling 'read' failed")
for attr in ("read", "close", "info", "geturl"):
self.assert_(hasattr(open_url, attr), "object returned from "
"urlopen lacks the %s attribute" % attr)
try:
self.assert_(open_url.read(), "calling 'read' failed")
finally:
open_url.close()
finally: finally:
self.server.stop() open_url.close()
def test_info(self): def test_info(self):
handler = self.start_server([(200, [], b"we don't care")]) handler = self.start_server()
try: try:
open_url = urllib2.urlopen("http://localhost:%s" % handler.port) open_url = urllib.request.urlopen(
"http://localhost:%s" % handler.port)
info_obj = open_url.info() info_obj = open_url.info()
self.assert_(isinstance(info_obj, email.message.Message), self.assert_(isinstance(info_obj, email.message.Message),
"object returned by 'info' is not an instance of " "object returned by 'info' is not an instance of "
@ -452,15 +433,10 @@ def test_info(self):
def test_geturl(self): def test_geturl(self):
# Make sure same URL as opened is returned by geturl. # Make sure same URL as opened is returned by geturl.
handler = self.start_server([(200, [], b"we don't care")]) handler = self.start_server()
open_url = urllib.request.urlopen("http://localhost:%s" % handler.port)
try: url = open_url.geturl()
open_url = urllib2.urlopen("http://localhost:%s" % handler.port) self.assertEqual(url, "http://localhost:%s" % handler.port)
url = open_url.geturl()
self.assertEqual(url, "http://localhost:%s" % handler.port)
finally:
self.server.stop()
def test_bad_address(self): def test_bad_address(self):
# Make sure proper exception is raised when connecting to a bogus # Make sure proper exception is raised when connecting to a bogus
@ -472,17 +448,10 @@ def test_bad_address(self):
# started failing then. One hopes the .invalid # started failing then. One hopes the .invalid
# domain will be spared to serve its defined # domain will be spared to serve its defined
# purpose. # purpose.
# urllib2.urlopen, "http://www.sadflkjsasadf.com/") urllib.request.urlopen,
urllib2.urlopen, "http://www.python.invalid./") "http://www.python.invalid./")
def test_main(): def test_main():
# We will NOT depend on the network resource flag
# (Lib/test/regrtest.py -u network) since all tests here are only
# localhost. However, if this is a bad rationale, then uncomment
# the next line.
#support.requires("network")
support.run_unittest(ProxyAuthTests) support.run_unittest(ProxyAuthTests)
support.run_unittest(TestUrlopen) support.run_unittest(TestUrlopen)

View file

@ -4,10 +4,11 @@
from test import support from test import support
from test.test_urllib2 import sanepathname2url from test.test_urllib2 import sanepathname2url
import socket
import urllib2
import sys
import os import os
import socket
import sys
import urllib.error
import urllib.request
def _retry_thrice(func, exc, *args, **kwargs): def _retry_thrice(func, exc, *args, **kwargs):
@ -28,7 +29,8 @@ def wrapped(*args, **kwargs):
# Connecting to remote hosts is flaky. Make it more robust by retrying # Connecting to remote hosts is flaky. Make it more robust by retrying
# the connection several times. # the connection several times.
_urlopen_with_retry = _wrap_with_retry_thrice(urllib2.urlopen, urllib2.URLError) _urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
urllib.error.URLError)
class AuthTests(unittest.TestCase): class AuthTests(unittest.TestCase):
@ -78,16 +80,11 @@ def test_close(self):
# calling .close() on urllib2's response objects should close the # calling .close() on urllib2's response objects should close the
# underlying socket # underlying socket
# delve deep into response to fetch socket._socketobject
response = _urlopen_with_retry("http://www.python.org/") response = _urlopen_with_retry("http://www.python.org/")
abused_fileobject = response.fp sock = response.fp
httpresponse = abused_fileobject.raw self.assert_(not sock.closed)
self.assert_(httpresponse.__class__ is http.client.HTTPResponse)
fileobject = httpresponse.fp
self.assert_(not fileobject.closed)
response.close() response.close()
self.assert_(fileobject.closed) self.assert_(sock.closed)
class OtherNetworkTests(unittest.TestCase): class OtherNetworkTests(unittest.TestCase):
def setUp(self): def setUp(self):
@ -116,8 +113,9 @@ def test_file(self):
f.write('hi there\n') f.write('hi there\n')
f.close() f.close()
urls = [ urls = [
'file:'+sanepathname2url(os.path.abspath(TESTFN)), 'file:' + sanepathname2url(os.path.abspath(TESTFN)),
('file:///nonsensename/etc/passwd', None, urllib2.URLError), ('file:///nonsensename/etc/passwd', None,
urllib.error.URLError),
] ]
self._test_urls(urls, self._extra_handlers(), retry=True) self._test_urls(urls, self._extra_handlers(), retry=True)
finally: finally:
@ -157,9 +155,9 @@ def _test_urls(self, urls, handlers, retry=True):
import logging import logging
debug = logging.getLogger("test_urllib2").debug debug = logging.getLogger("test_urllib2").debug
urlopen = urllib2.build_opener(*handlers).open urlopen = urllib.request.build_opener(*handlers).open
if retry: if retry:
urlopen = _wrap_with_retry_thrice(urlopen, urllib2.URLError) urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
for url in urls: for url in urls:
if isinstance(url, tuple): if isinstance(url, tuple):
@ -186,7 +184,7 @@ def _test_urls(self, urls, handlers, retry=True):
def _extra_handlers(self): def _extra_handlers(self):
handlers = [] handlers = []
cfh = urllib2.CacheFTPHandler() cfh = urllib.request.CacheFTPHandler()
cfh.setTimeout(1) cfh.setTimeout(1)
handlers.append(cfh) handlers.append(cfh)
@ -197,7 +195,7 @@ class TimeoutTest(unittest.TestCase):
def test_http_basic(self): def test_http_basic(self):
self.assertTrue(socket.getdefaulttimeout() is None) self.assertTrue(socket.getdefaulttimeout() is None)
u = _urlopen_with_retry("http://www.python.org") u = _urlopen_with_retry("http://www.python.org")
self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None) self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_default_timeout(self): def test_http_default_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None) self.assertTrue(socket.getdefaulttimeout() is None)
@ -206,7 +204,7 @@ def test_http_default_timeout(self):
u = _urlopen_with_retry("http://www.python.org") u = _urlopen_with_retry("http://www.python.org")
finally: finally:
socket.setdefaulttimeout(None) socket.setdefaulttimeout(None)
self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 60) self.assertEqual(u.fp._sock.gettimeout(), 60)
def test_http_no_timeout(self): def test_http_no_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None) self.assertTrue(socket.getdefaulttimeout() is None)
@ -215,11 +213,11 @@ def test_http_no_timeout(self):
u = _urlopen_with_retry("http://www.python.org", timeout=None) u = _urlopen_with_retry("http://www.python.org", timeout=None)
finally: finally:
socket.setdefaulttimeout(None) socket.setdefaulttimeout(None)
self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None) self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_timeout(self): def test_http_timeout(self):
u = _urlopen_with_retry("http://www.python.org", timeout=120) u = _urlopen_with_retry("http://www.python.org", timeout=120)
self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 120) self.assertEqual(u.fp._sock.gettimeout(), 120)
FTP_HOST = "ftp://ftp.mirror.nl/pub/mirror/gnu/" FTP_HOST = "ftp://ftp.mirror.nl/pub/mirror/gnu/"

View file

@ -4,7 +4,7 @@
from test import support from test import support
import socket import socket
import urllib import urllib.request
import sys import sys
import os import os
import email.message import email.message
@ -36,11 +36,11 @@ def tearDown(self):
socket.setdefaulttimeout(None) socket.setdefaulttimeout(None)
def testURLread(self): def testURLread(self):
f = _open_with_retry(urllib.urlopen, "http://www.python.org/") f = _open_with_retry(urllib.request.urlopen, "http://www.python.org/")
x = f.read() x = f.read()
class urlopenNetworkTests(unittest.TestCase): class urlopenNetworkTests(unittest.TestCase):
"""Tests urllib.urlopen using the network. """Tests urllib.reqest.urlopen using the network.
These tests are not exhaustive. Assuming that testing using files does a These tests are not exhaustive. Assuming that testing using files does a
good job overall of some of the basic interface features. There are no good job overall of some of the basic interface features. There are no
@ -55,7 +55,7 @@ class urlopenNetworkTests(unittest.TestCase):
""" """
def urlopen(self, *args): def urlopen(self, *args):
return _open_with_retry(urllib.urlopen, *args) return _open_with_retry(urllib.request.urlopen, *args)
def test_basic(self): def test_basic(self):
# Simple test expected to pass. # Simple test expected to pass.
@ -105,7 +105,7 @@ def test_geturl(self):
def test_getcode(self): def test_getcode(self):
# test getcode() with the fancy opener to get 404 error codes # test getcode() with the fancy opener to get 404 error codes
URL = "http://www.python.org/XXXinvalidXXX" URL = "http://www.python.org/XXXinvalidXXX"
open_url = urllib.FancyURLopener().open(URL) open_url = urllib.request.FancyURLopener().open(URL)
try: try:
code = open_url.getcode() code = open_url.getcode()
finally: finally:
@ -114,7 +114,7 @@ def test_getcode(self):
def test_fileno(self): def test_fileno(self):
if (sys.platform in ('win32',) or if (sys.platform in ('win32',) or
not hasattr(os, 'fdopen')): not hasattr(os, 'fdopen')):
# On Windows, socket handles are not file descriptors; this # On Windows, socket handles are not file descriptors; this
# test can't pass on Windows. # test can't pass on Windows.
return return
@ -142,13 +142,14 @@ def test_bad_address(self):
# domain will be spared to serve its defined # domain will be spared to serve its defined
# purpose. # purpose.
# urllib.urlopen, "http://www.sadflkjsasadf.com/") # urllib.urlopen, "http://www.sadflkjsasadf.com/")
urllib.urlopen, "http://www.python.invalid./") urllib.request.urlopen,
"http://www.python.invalid./")
class urlretrieveNetworkTests(unittest.TestCase): class urlretrieveNetworkTests(unittest.TestCase):
"""Tests urllib.urlretrieve using the network.""" """Tests urllib.request.urlretrieve using the network."""
def urlretrieve(self, *args): def urlretrieve(self, *args):
return _open_with_retry(urllib.urlretrieve, *args) return _open_with_retry(urllib.request.urlretrieve, *args)
def test_basic(self): def test_basic(self):
# Test basic functionality. # Test basic functionality.

View file

@ -2,7 +2,7 @@
from test import support from test import support
import unittest import unittest
import urlparse import urllib.parse
RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q" RFC2396_BASE = "http://a/b/c/d;p?q"
@ -10,19 +10,19 @@
class UrlParseTestCase(unittest.TestCase): class UrlParseTestCase(unittest.TestCase):
def checkRoundtrips(self, url, parsed, split): def checkRoundtrips(self, url, parsed, split):
result = urlparse.urlparse(url) result = urllib.parse.urlparse(url)
self.assertEqual(result, parsed) self.assertEqual(result, parsed)
t = (result.scheme, result.netloc, result.path, t = (result.scheme, result.netloc, result.path,
result.params, result.query, result.fragment) result.params, result.query, result.fragment)
self.assertEqual(t, parsed) self.assertEqual(t, parsed)
# put it back together and it should be the same # put it back together and it should be the same
result2 = urlparse.urlunparse(result) result2 = urllib.parse.urlunparse(result)
self.assertEqual(result2, url) self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl()) self.assertEqual(result2, result.geturl())
# the result of geturl() is a fixpoint; we can always parse it # the result of geturl() is a fixpoint; we can always parse it
# again to get the same result: # again to get the same result:
result3 = urlparse.urlparse(result.geturl()) result3 = urllib.parse.urlparse(result.geturl())
self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result) self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.scheme, result.scheme)
@ -37,17 +37,17 @@ def checkRoundtrips(self, url, parsed, split):
self.assertEqual(result3.port, result.port) self.assertEqual(result3.port, result.port)
# check the roundtrip using urlsplit() as well # check the roundtrip using urlsplit() as well
result = urlparse.urlsplit(url) result = urllib.parse.urlsplit(url)
self.assertEqual(result, split) self.assertEqual(result, split)
t = (result.scheme, result.netloc, result.path, t = (result.scheme, result.netloc, result.path,
result.query, result.fragment) result.query, result.fragment)
self.assertEqual(t, split) self.assertEqual(t, split)
result2 = urlparse.urlunsplit(result) result2 = urllib.parse.urlunsplit(result)
self.assertEqual(result2, url) self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl()) self.assertEqual(result2, result.geturl())
# check the fixpoint property of re-parsing the result of geturl() # check the fixpoint property of re-parsing the result of geturl()
result3 = urlparse.urlsplit(result.geturl()) result3 = urllib.parse.urlsplit(result.geturl())
self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result) self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.scheme, result.scheme)
@ -83,7 +83,7 @@ def test_roundtrips(self):
self.checkRoundtrips(url, parsed, split) self.checkRoundtrips(url, parsed, split)
def test_http_roundtrips(self): def test_http_roundtrips(self):
# urlparse.urlsplit treats 'http:' as an optimized special case, # urllib.parse.urlsplit treats 'http:' as an optimized special case,
# so we test both 'http:' and 'https:' in all the following. # so we test both 'http:' and 'https:' in all the following.
# Three cheers for white box knowledge! # Three cheers for white box knowledge!
testcases = [ testcases = [
@ -111,13 +111,13 @@ def test_http_roundtrips(self):
self.checkRoundtrips(url, parsed, split) self.checkRoundtrips(url, parsed, split)
def checkJoin(self, base, relurl, expected): def checkJoin(self, base, relurl, expected):
self.assertEqual(urlparse.urljoin(base, relurl), expected, self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
(base, relurl, expected)) (base, relurl, expected))
def test_unparse_parse(self): def test_unparse_parse(self):
for u in ['Python', './Python']: for u in ['Python', './Python']:
self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u) self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
def test_RFC1808(self): def test_RFC1808(self):
# "normal" cases from RFC 1808: # "normal" cases from RFC 1808:
@ -223,11 +223,11 @@ def test_urldefrag(self):
(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
(RFC2396_BASE, 'http://a/b/c/d;p?q', ''), (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
]: ]:
self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
def test_urlsplit_attributes(self): def test_urlsplit_attributes(self):
url = "HTTP://WWW.PYTHON.ORG/doc/#frag" url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
p = urlparse.urlsplit(url) p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http") self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "WWW.PYTHON.ORG") self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
self.assertEqual(p.path, "/doc/") self.assertEqual(p.path, "/doc/")
@ -242,7 +242,7 @@ def test_urlsplit_attributes(self):
#self.assertEqual(p.geturl(), url) #self.assertEqual(p.geturl(), url)
url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url) p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http") self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User:Pass@www.python.org:080") self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/") self.assertEqual(p.path, "/doc/")
@ -259,7 +259,7 @@ def test_urlsplit_attributes(self):
# and request email addresses as usernames. # and request email addresses as usernames.
url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url) p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http") self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/") self.assertEqual(p.path, "/doc/")
@ -274,11 +274,11 @@ def test_urlsplit_attributes(self):
def test_attributes_bad_port(self): def test_attributes_bad_port(self):
"""Check handling of non-integer ports.""" """Check handling of non-integer ports."""
p = urlparse.urlsplit("http://www.example.net:foo") p = urllib.parse.urlsplit("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port) self.assertRaises(ValueError, lambda: p.port)
p = urlparse.urlparse("http://www.example.net:foo") p = urllib.parse.urlparse("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port) self.assertRaises(ValueError, lambda: p.port)
@ -289,7 +289,7 @@ def test_attributes_without_netloc(self):
# scheme://netloc syntax, the netloc and related attributes # scheme://netloc syntax, the netloc and related attributes
# should be left empty. # should be left empty.
uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
p = urlparse.urlsplit(uri) p = urllib.parse.urlsplit(uri)
self.assertEqual(p.netloc, "") self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None) self.assertEqual(p.username, None)
self.assertEqual(p.password, None) self.assertEqual(p.password, None)
@ -297,7 +297,7 @@ def test_attributes_without_netloc(self):
self.assertEqual(p.port, None) self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), uri) self.assertEqual(p.geturl(), uri)
p = urlparse.urlparse(uri) p = urllib.parse.urlparse(uri)
self.assertEqual(p.netloc, "") self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None) self.assertEqual(p.username, None)
self.assertEqual(p.password, None) self.assertEqual(p.password, None)
@ -307,7 +307,7 @@ def test_attributes_without_netloc(self):
def test_noslash(self): def test_noslash(self):
# Issue 1637: http://foo.com?query is legal # Issue 1637: http://foo.com?query is legal
self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"), self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
('http', 'example.com', '', '', 'blahblah=/foo', '')) ('http', 'example.com', '', '', 'blahblah=/foo', ''))
def test_main(): def test_main():

View file

@ -111,8 +111,10 @@ def test_dump_big_int(self):
(int(2**34),)) (int(2**34),))
xmlrpclib.dumps((xmlrpclib.MAXINT, xmlrpclib.MININT)) xmlrpclib.dumps((xmlrpclib.MAXINT, xmlrpclib.MININT))
self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MAXINT+1,)) self.assertRaises(OverflowError, xmlrpclib.dumps,
self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MININT-1,)) (xmlrpclib.MAXINT+1,))
self.assertRaises(OverflowError, xmlrpclib.dumps,
(xmlrpclib.MININT-1,))
def dummy_write(s): def dummy_write(s):
pass pass
@ -120,9 +122,10 @@ def dummy_write(s):
m = xmlrpclib.Marshaller() m = xmlrpclib.Marshaller()
m.dump_int(xmlrpclib.MAXINT, dummy_write) m.dump_int(xmlrpclib.MAXINT, dummy_write)
m.dump_int(xmlrpclib.MININT, dummy_write) m.dump_int(xmlrpclib.MININT, dummy_write)
self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MAXINT+1, dummy_write) self.assertRaises(OverflowError, m.dump_int,
self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MININT-1, dummy_write) xmlrpclib.MAXINT+1, dummy_write)
self.assertRaises(OverflowError, m.dump_int,
xmlrpclib.MININT-1, dummy_write)
def test_dump_none(self): def test_dump_none(self):
value = alist + [None] value = alist + [None]
@ -132,7 +135,6 @@ def test_dump_none(self):
xmlrpclib.loads(strg)[0][0]) xmlrpclib.loads(strg)[0][0])
self.assertRaises(TypeError, xmlrpclib.dumps, (arg1,)) self.assertRaises(TypeError, xmlrpclib.dumps, (arg1,))
class HelperTestCase(unittest.TestCase): class HelperTestCase(unittest.TestCase):
def test_escape(self): def test_escape(self):
self.assertEqual(xmlrpclib.escape("a&b"), "a&amp;b") self.assertEqual(xmlrpclib.escape("a&b"), "a&amp;b")
@ -160,7 +162,6 @@ def test_dotted_attribute(self):
# private methods # private methods
self.assertRaises(AttributeError, self.assertRaises(AttributeError,
xmlrpc.server.resolve_dotted_attribute, str, '__add') xmlrpc.server.resolve_dotted_attribute, str, '__add')
self.assert_(xmlrpc.server.resolve_dotted_attribute(str, 'title')) self.assert_(xmlrpc.server.resolve_dotted_attribute(str, 'title'))
class DateTimeTestCase(unittest.TestCase): class DateTimeTestCase(unittest.TestCase):
@ -170,7 +171,8 @@ def test_default(self):
def test_time(self): def test_time(self):
d = 1181399930.036952 d = 1181399930.036952
t = xmlrpclib.DateTime(d) t = xmlrpclib.DateTime(d)
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d))) self.assertEqual(str(t),
time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d)))
def test_time_tuple(self): def test_time_tuple(self):
d = (2007,6,9,10,38,50,5,160,0) d = (2007,6,9,10,38,50,5,160,0)
@ -180,7 +182,7 @@ def test_time_tuple(self):
def test_time_struct(self): def test_time_struct(self):
d = time.localtime(1181399930.036952) d = time.localtime(1181399930.036952)
t = xmlrpclib.DateTime(d) t = xmlrpclib.DateTime(d)
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d)) self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d))
def test_datetime_datetime(self): def test_datetime_datetime(self):
d = datetime.datetime(2007,1,2,3,4,5) d = datetime.datetime(2007,1,2,3,4,5)
@ -350,12 +352,12 @@ def XXXtest_404(self):
self.assertEqual(response.reason, 'Not Found') self.assertEqual(response.reason, 'Not Found')
def test_introspection1(self): def test_introspection1(self):
expected_methods = set(['pow', 'div', 'my_function', 'add',
'system.listMethods', 'system.methodHelp',
'system.methodSignature', 'system.multicall'])
try: try:
p = xmlrpclib.ServerProxy('http://localhost:%d' % PORT) p = xmlrpclib.ServerProxy('http://localhost:%d' % PORT)
meth = p.system.listMethods() meth = p.system.listMethods()
expected_methods = set(['pow', 'div', 'my_function', 'add',
'system.listMethods', 'system.methodHelp',
'system.methodSignature', 'system.multicall'])
self.assertEqual(set(meth), expected_methods) self.assertEqual(set(meth), expected_methods)
except (xmlrpclib.ProtocolError, socket.error) as e: except (xmlrpclib.ProtocolError, socket.error) as e:
# ignore failures due to non-blocking socket 'unavailable' errors # ignore failures due to non-blocking socket 'unavailable' errors
@ -593,7 +595,8 @@ def test_cgi_xmlrpc_response(self):
# will respond exception, if so, our goal is achieved ;) # will respond exception, if so, our goal is achieved ;)
handle = open(support.TESTFN, "r").read() handle = open(support.TESTFN, "r").read()
# start with 44th char so as not to get http header, we just need only xml # start with 44th char so as not to get http header, we just
# need only xml
self.assertRaises(xmlrpclib.Fault, xmlrpclib.loads, handle[44:]) self.assertRaises(xmlrpclib.Fault, xmlrpclib.loads, handle[44:])
os.remove("xmldata.txt") os.remove("xmldata.txt")

File diff suppressed because it is too large Load diff

0
Lib/urllib/__init__.py Normal file
View file

59
Lib/urllib/error.py Normal file
View file

@ -0,0 +1,59 @@
"""Exception classes raised by urllib.
The base exception class is URLError, which inherits from IOError. It
doesn't define any behavior of its own, but is the base class for all
exceptions defined in this package.
HTTPError is an exception class that is also a valid HTTP response
instance. It behaves this way because HTTP protocol errors are valid
responses, with a status code, headers, and a body. In some contexts,
an application may want to handle an exception like a regular
response.
"""
import urllib.response
# do these error classes make sense?
# make sure all of the IOError stuff is overridden. we just want to be
# subtypes.
class URLError(IOError):
# URLError is a sub-type of IOError, but it doesn't share any of
# the implementation. need to override __init__ and __str__.
# It sets self.args for compatibility with other EnvironmentError
# subclasses, but args doesn't have the typical format with errno in
# slot 0 and strerror in slot 1. This may be better than nothing.
def __init__(self, reason, filename=None):
self.args = reason,
self.reason = reason
if filename is not None:
self.filename = filename
def __str__(self):
return '<urlopen error %s>' % self.reason
class HTTPError(URLError, urllib.response.addinfourl):
"""Raised when HTTP error occurs, but also acts like non-error return"""
__super_init = urllib.response.addinfourl.__init__
def __init__(self, url, code, msg, hdrs, fp):
self.code = code
self.msg = msg
self.hdrs = hdrs
self.fp = fp
self.filename = url
# The addinfourl classes depend on fp being a valid file
# object. In some cases, the HTTPError may not have a valid
# file object. If this happens, the simplest workaround is to
# not initialize the base classes.
if fp is not None:
self.__super_init(fp, hdrs, url, code)
def __str__(self):
return 'HTTP Error %s: %s' % (self.code, self.msg)
# exception raised when downloaded size does not match content-length
class ContentTooShortError(URLError):
def __init__(self, message, content):
URLError.__init__(self, message)
self.content = content

View file

@ -259,6 +259,311 @@ def urldefrag(url):
return url, '' return url, ''
_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
res = s.split('%')
for i in range(1, len(res)):
item = res[i]
try:
res[i] = _hextochr[item[:2]] + item[2:]
except KeyError:
res[i] = '%' + item
except UnicodeDecodeError:
res[i] = chr(int(item[:2], 16)) + item[2:]
return "".join(res)
def unquote_plus(s):
"""unquote('%7e/abc+def') -> '~/abc def'"""
s = s.replace('+', ' ')
return unquote(s)
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_safe_quoters= {}
class Quoter:
def __init__(self, safe):
self.cache = {}
self.safe = safe + always_safe
def __call__(self, c):
try:
return self.cache[c]
except KeyError:
if ord(c) < 256:
res = (c in self.safe) and c or ('%%%02X' % ord(c))
self.cache[c] = res
return res
else:
return "".join(['%%%02X' % i for i in c.encode("utf-8")])
def quote(s, safe = '/'):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
different set of reserved characters that must be quoted.
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
the following reserved characters.
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
"$" | ","
Each of these characters is reserved in some component of a URL,
but not necessarily in all of them.
By default, the quote function is intended for quoting the path
section of a URL. Thus, it will not encode '/'. This character
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
"""
cachekey = (safe, always_safe)
try:
quoter = _safe_quoters[cachekey]
except KeyError:
quoter = Quoter(safe)
_safe_quoters[cachekey] = quoter
res = map(quoter, s)
return ''.join(res)
def quote_plus(s, safe = ''):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
s = quote(s, safe + ' ')
return s.replace(' ', '+')
return quote(s, safe)
def urlencode(query,doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
sequence element is converted to a separate parameter.
If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
"""
if hasattr(query,"items"):
# mapping objects
query = query.items()
else:
# it's a bother at times that strings and string-like objects are
# sequences...
try:
# non-sequence items should not work with len()
# non-empty strings will fail this
if len(query) and not isinstance(query[0], tuple):
raise TypeError
# zero-length sequences of all types will get here and succeed,
# but that's a minor nit - since the original implementation
# allowed empty dicts that type of behavior probably should be
# preserved for consistency
except TypeError:
ty,va,tb = sys.exc_info()
raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
l = []
if not doseq:
# preserve old behavior
for k, v in query:
k = quote_plus(str(k))
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
for k, v in query:
k = quote_plus(str(k))
if isinstance(v, str):
v = quote_plus(v)
l.append(k + '=' + v)
elif isinstance(v, str):
# is there a reasonable way to convert to ASCII?
# encode generates a string, but "replace" or "ignore"
# lose information and "strict" can raise UnicodeError
v = quote_plus(v.encode("ASCII","replace"))
l.append(k + '=' + v)
else:
try:
# is this a sufficient test for sequence-ness?
x = len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
l.append(k + '=' + quote_plus(str(elt)))
return '&'.join(l)
# Utilities to parse URLs (most of these return None for missing parts):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
# splitpasswd('user:passwd') -> 'user', 'passwd'
# splitport('host:port') --> 'host', 'port'
# splitquery('/path?query') --> '/path', 'query'
# splittag('/path#tag') --> '/path', 'tag'
# splitattr('/path;attr1=value1;attr2=value2;...') ->
# '/path', ['attr1=value1', 'attr2=value2', ...]
# splitvalue('attr=value') --> 'attr', 'value'
# urllib.parse.unquote('abc%20def') -> 'abc def'
# quote('abc def') -> 'abc%20def')
def toBytes(url):
"""toBytes(u"URL") --> 'URL'."""
# Most URL schemes require ASCII. If that changes, the conversion
# can be relaxed.
# XXX get rid of toBytes()
if isinstance(url, str):
try:
url = url.encode("ASCII").decode()
except UnicodeError:
raise UnicodeError("URL " + repr(url) +
" contains non-ASCII characters")
return url
def unwrap(url):
"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
url = str(url).strip()
if url[:1] == '<' and url[-1:] == '>':
url = url[1:-1].strip()
if url[:4] == 'URL:': url = url[4:].strip()
return url
_typeprog = None
def splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
if _typeprog is None:
import re
_typeprog = re.compile('^([^/:]+):')
match = _typeprog.match(url)
if match:
scheme = match.group(1)
return scheme.lower(), url[len(scheme) + 1:]
return None, url
_hostprog = None
def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
import re
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
if match: return match.group(1, 2)
return None, url
_userprog = None
def splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
global _userprog
if _userprog is None:
import re
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
if match: return map(unquote, match.group(1, 2))
return None, host
_passwdprog = None
def splitpasswd(user):
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
global _passwdprog
if _passwdprog is None:
import re
_passwdprog = re.compile('^([^:]*):(.*)$')
match = _passwdprog.match(user)
if match: return match.group(1, 2)
return user, None
# splittag('/path#tag') --> '/path', 'tag'
_portprog = None
def splitport(host):
"""splitport('host:port') --> 'host', 'port'."""
global _portprog
if _portprog is None:
import re
_portprog = re.compile('^(.*):([0-9]+)$')
match = _portprog.match(host)
if match: return match.group(1, 2)
return host, None
_nportprog = None
def splitnport(host, defport=-1):
"""Split host and port, returning numeric port.
Return given default port if no ':' found; defaults to -1.
Return numerical port if a valid number are found after ':'.
Return None if ':' but not a valid number."""
global _nportprog
if _nportprog is None:
import re
_nportprog = re.compile('^(.*):(.*)$')
match = _nportprog.match(host)
if match:
host, port = match.group(1, 2)
try:
if not port: raise ValueError("no digits")
nport = int(port)
except ValueError:
nport = None
return host, nport
return host, defport
_queryprog = None
def splitquery(url):
"""splitquery('/path?query') --> '/path', 'query'."""
global _queryprog
if _queryprog is None:
import re
_queryprog = re.compile('^(.*)\?([^?]*)$')
match = _queryprog.match(url)
if match: return match.group(1, 2)
return url, None
_tagprog = None
def splittag(url):
"""splittag('/path#tag') --> '/path', 'tag'."""
global _tagprog
if _tagprog is None:
import re
_tagprog = re.compile('^(.*)#([^#]*)$')
match = _tagprog.match(url)
if match: return match.group(1, 2)
return url, None
def splitattr(url):
"""splitattr('/path;attr1=value1;attr2=value2;...') ->
'/path', ['attr1=value1', 'attr2=value2', ...]."""
words = url.split(';')
return words[0], words[1:]
_valueprog = None
def splitvalue(attr):
"""splitvalue('attr=value') --> 'attr', 'value'."""
global _valueprog
if _valueprog is None:
import re
_valueprog = re.compile('^([^=]*)=(.*)$')
match = _valueprog.match(attr)
if match: return match.group(1, 2)
return attr, None
test_input = """ test_input = """
http://a/b/c/d http://a/b/c/d

2295
Lib/urllib/request.py Normal file

File diff suppressed because it is too large Load diff

83
Lib/urllib/response.py Normal file
View file

@ -0,0 +1,83 @@
"""Response classes used by urllib.
The base class, addbase, defines a minimal file-like interface,
including read() and readline(). The typical response object is an
addinfourl instance, which defines an info() method that returns
headers and a geturl() method that returns the url.
"""
class addbase(object):
"""Base class for addinfo and addclosehook."""
# XXX Add a method to expose the timeout on the underlying socket?
def __init__(self, fp):
# TODO(jhylton): Is there a better way to delegate using io?
self.fp = fp
self.read = self.fp.read
self.readline = self.fp.readline
# TODO(jhylton): Make sure an object with readlines() is also iterable
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
if hasattr(self.fp, "fileno"):
self.fileno = self.fp.fileno
else:
self.fileno = lambda: None
if hasattr(self.fp, "__iter__"):
self.__iter__ = self.fp.__iter__
if hasattr(self.fp, "__next__"):
self.__next__ = self.fp.__next__
def __repr__(self):
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
id(self), self.fp)
def close(self):
self.read = None
self.readline = None
self.readlines = None
self.fileno = None
if self.fp: self.fp.close()
self.fp = None
class addclosehook(addbase):
"""Class to add a close hook to an open file."""
def __init__(self, fp, closehook, *hookargs):
addbase.__init__(self, fp)
self.closehook = closehook
self.hookargs = hookargs
def close(self):
addbase.close(self)
if self.closehook:
self.closehook(*self.hookargs)
self.closehook = None
self.hookargs = None
class addinfo(addbase):
"""class to add an info() method to an open file."""
def __init__(self, fp, headers):
addbase.__init__(self, fp)
self.headers = headers
def info(self):
return self.headers
class addinfourl(addbase):
"""class to add info() and geturl() methods to an open file."""
def __init__(self, fp, headers, url, code=None):
addbase.__init__(self, fp)
self.headers = headers
self.url = url
self.code = code
def info(self):
return self.headers
def getcode(self):
return self.code
def geturl(self):
return self.url

View file

@ -9,8 +9,8 @@
The robots.txt Exclusion Protocol is implemented as specified in The robots.txt Exclusion Protocol is implemented as specified in
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
""" """
import urlparse
import urllib import urllib.parse, urllib.request
__all__ = ["RobotFileParser"] __all__ = ["RobotFileParser"]
@ -48,24 +48,19 @@ def modified(self):
def set_url(self, url): def set_url(self, url):
"""Sets the URL referring to a robots.txt file.""" """Sets the URL referring to a robots.txt file."""
self.url = url self.url = url
self.host, self.path = urlparse.urlparse(url)[1:3] self.host, self.path = urllib.parse.urlparse(url)[1:3]
def read(self): def read(self):
"""Reads the robots.txt URL and feeds it to the parser.""" """Reads the robots.txt URL and feeds it to the parser."""
opener = URLopener() try:
f = opener.open(self.url) f = urllib.request.urlopen(self.url)
lines = [] except urllib.error.HTTPError as err:
line = f.readline() if err.code in (401, 403):
while line: self.disallow_all = True
lines.append(line.strip()) elif err.code >= 400:
line = f.readline() self.allow_all = True
self.errcode = opener.errcode else:
if self.errcode in (401, 403): self.parse(f.read().splitlines())
self.disallow_all = True
elif self.errcode >= 400:
self.allow_all = True
elif self.errcode == 200 and lines:
self.parse(lines)
def _add_entry(self, entry): def _add_entry(self, entry):
if "*" in entry.useragents: if "*" in entry.useragents:
@ -75,15 +70,15 @@ def _add_entry(self, entry):
self.entries.append(entry) self.entries.append(entry)
def parse(self, lines): def parse(self, lines):
"""parse the input lines from a robots.txt file. """Parse the input lines from a robots.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines.""" We allow that a user-agent: line is not preceded by
one or more blank lines.
"""
state = 0 state = 0
linenumber = 0
entry = Entry() entry = Entry()
for line in lines: for line in lines:
linenumber = linenumber + 1
if not line: if not line:
if state == 1: if state == 1:
entry = Entry() entry = Entry()
@ -102,7 +97,7 @@ def parse(self, lines):
line = line.split(':', 1) line = line.split(':', 1)
if len(line) == 2: if len(line) == 2:
line[0] = line[0].strip().lower() line[0] = line[0].strip().lower()
line[1] = urllib.unquote(line[1].strip()) line[1] = urllib.parse.unquote(line[1].strip())
if line[0] == "user-agent": if line[0] == "user-agent":
if state == 2: if state == 2:
self._add_entry(entry) self._add_entry(entry)
@ -128,7 +123,7 @@ def can_fetch(self, useragent, url):
return True return True
# search for given user agent matches # search for given user agent matches
# the first match counts # the first match counts
url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/" url = urllib.parse.quote(urllib.parse.urlparse(urllib.parse.unquote(url))[2]) or "/"
for entry in self.entries: for entry in self.entries:
if entry.applies_to(useragent): if entry.applies_to(useragent):
return entry.allowance(url) return entry.allowance(url)
@ -138,7 +133,6 @@ def can_fetch(self, useragent, url):
# agent not found ==> access granted # agent not found ==> access granted
return True return True
def __str__(self): def __str__(self):
return ''.join([str(entry) + "\n" for entry in self.entries]) return ''.join([str(entry) + "\n" for entry in self.entries])
@ -150,7 +144,7 @@ def __init__(self, path, allowance):
if path == '' and not allowance: if path == '' and not allowance:
# an empty value means allow all # an empty value means allow all
allowance = True allowance = True
self.path = urllib.quote(path) self.path = urllib.parse.quote(path)
self.allowance = allowance self.allowance = allowance
def applies_to(self, filename): def applies_to(self, filename):
@ -195,18 +189,3 @@ def allowance(self, filename):
if line.applies_to(filename): if line.applies_to(filename):
return line.allowance return line.allowance
return True return True
class URLopener(urllib.FancyURLopener):
def __init__(self, *args):
urllib.FancyURLopener.__init__(self, *args)
self.errcode = 200
def prompt_user_passwd(self, host, realm):
## If robots.txt file is accessible only with a password,
## we act as if the file wasn't there.
return None, None
def http_error_default(self, url, fp, errcode, errmsg, headers):
self.errcode = errcode
return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
errmsg, headers)

File diff suppressed because it is too large Load diff

View file

@ -11,7 +11,8 @@
""" """
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer
import urllib, sys import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler from wsgiref.handlers import SimpleHandler
__version__ = "0.1" __version__ = "0.1"
@ -93,7 +94,7 @@ def get_environ(self):
else: else:
path,query = self.path,'' path,query = self.path,''
env['PATH_INFO'] = urllib.unquote(path) env['PATH_INFO'] = urllib.parse.unquote(path)
env['QUERY_STRING'] = query env['QUERY_STRING'] = query
host = self.address_string() host = self.address_string()

View file

@ -50,7 +50,7 @@ def guess_scheme(environ):
def application_uri(environ): def application_uri(environ):
"""Return the application's base URI (no PATH_INFO or QUERY_STRING)""" """Return the application's base URI (no PATH_INFO or QUERY_STRING)"""
url = environ['wsgi.url_scheme']+'://' url = environ['wsgi.url_scheme']+'://'
from urllib import quote from urllib.parse import quote
if environ.get('HTTP_HOST'): if environ.get('HTTP_HOST'):
url += environ['HTTP_HOST'] url += environ['HTTP_HOST']
@ -70,7 +70,7 @@ def application_uri(environ):
def request_uri(environ, include_query=1): def request_uri(environ, include_query=1):
"""Return the full request URI, optionally including the query string""" """Return the full request URI, optionally including the query string"""
url = application_uri(environ) url = application_uri(environ)
from urllib import quote from urllib.parse import quote
path_info = quote(environ.get('PATH_INFO','')) path_info = quote(environ.get('PATH_INFO',''))
if not environ.get('SCRIPT_NAME'): if not environ.get('SCRIPT_NAME'):
url += path_info[1:] url += path_info[1:]

View file

@ -190,8 +190,8 @@ def parse(self, input):
options.errorHandler = self.errorHandler options.errorHandler = self.errorHandler
fp = input.byteStream fp = input.byteStream
if fp is None and options.systemId: if fp is None and options.systemId:
import urllib2 import urllib.request
fp = urllib2.urlopen(input.systemId) fp = urllib.request.urlopen(input.systemId)
return self._parse_bytestream(fp, options) return self._parse_bytestream(fp, options)
def parseWithContext(self, input, cnode, action): def parseWithContext(self, input, cnode, action):
@ -223,14 +223,14 @@ def resolveEntity(self, publicId, systemId):
source.encoding = self._guess_media_encoding(source) source.encoding = self._guess_media_encoding(source)
# determine the base URI is we can # determine the base URI is we can
import posixpath, urlparse import posixpath, urllib.parse
parts = urlparse.urlparse(systemId) parts = urllib.parse.urlparse(systemId)
scheme, netloc, path, params, query, fragment = parts scheme, netloc, path, params, query, fragment = parts
# XXX should we check the scheme here as well? # XXX should we check the scheme here as well?
if path and not path.endswith("/"): if path and not path.endswith("/"):
path = posixpath.dirname(path) + "/" path = posixpath.dirname(path) + "/"
parts = scheme, netloc, path, params, query, fragment parts = scheme, netloc, path, params, query, fragment
source.baseURI = urlparse.urlunparse(parts) source.baseURI = urllib.parse.urlunparse(parts)
return source return source
@ -242,8 +242,8 @@ def _get_opener(self):
return self._opener return self._opener
def _create_opener(self): def _create_opener(self):
import urllib2 import urllib.request
return urllib2.build_opener() return urllib.request.build_opener()
def _guess_media_encoding(self, source): def _guess_media_encoding(self, source):
info = source.byteStream.info() info = source.byteStream.info()

View file

@ -3,7 +3,7 @@
convenience of application and driver writers. convenience of application and driver writers.
""" """
import os, urlparse, urllib import os, urllib.parse, urllib.request
from . import handler from . import handler
from . import xmlreader from . import xmlreader
@ -289,8 +289,8 @@ def prepare_input_source(source, base = ""):
source.setSystemId(sysidfilename) source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb") f = open(sysidfilename, "rb")
else: else:
source.setSystemId(urlparse.urljoin(base, sysid)) source.setSystemId(urllib.parse.urljoin(base, sysid))
f = urllib.urlopen(source.getSystemId()) f = urllib.request.urlopen(source.getSystemId())
source.setByteStream(f) source.setByteStream(f)

View file

@ -1160,12 +1160,12 @@ def get_host_info(self, host):
if isinstance(host, tuple): if isinstance(host, tuple):
host, x509 = host host, x509 = host
import urllib import urllib.parse
auth, host = urllib.splituser(host) auth, host = urllib.parse.splituser(host)
if auth: if auth:
import base64 import base64
auth = base64.encodestring(urllib.unquote(auth)) auth = base64.encodestring(urllib.parse.unquote(auth))
auth = "".join(auth.split()) # get rid of whitespace auth = "".join(auth.split()) # get rid of whitespace
extra_headers = [ extra_headers = [
("Authorization", "Basic " + auth) ("Authorization", "Basic " + auth)
@ -1321,11 +1321,11 @@ def __init__(self, uri, transport=None, encoding=None, verbose=0,
# establish a "logical" server connection # establish a "logical" server connection
# get the url # get the url
import urllib import urllib.parse
type, uri = urllib.splittype(uri) type, uri = urllib.parse.splittype(uri)
if type not in ("http", "https"): if type not in ("http", "https"):
raise IOError("unsupported XML-RPC protocol") raise IOError("unsupported XML-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri) self.__host, self.__handler = urllib.parse.splithost(uri)
if not self.__handler: if not self.__handler:
self.__handler = "/RPC2" self.__handler = "/RPC2"

View file

@ -809,7 +809,7 @@ LIBSUBDIRS= tkinter site-packages test test/output test/data \
email email/mime email/test email/test/data \ email email/mime email/test email/test/data \
html json json/tests http dbm xmlrpc \ html json json/tests http dbm xmlrpc \
sqlite3 sqlite3/test \ sqlite3 sqlite3/test \
logging bsddb bsddb/test csv wsgiref \ logging bsddb bsddb/test csv wsgiref urllib \
lib2to3 lib2to3/fixes lib2to3/pgen2 lib2to3/tests \ lib2to3 lib2to3/fixes lib2to3/pgen2 lib2to3/tests \
ctypes ctypes/test ctypes/macholib idlelib idlelib/Icons \ ctypes ctypes/test ctypes/macholib idlelib idlelib/Icons \
distutils distutils/command distutils/tests $(XMLLIBSUBDIRS) \ distutils distutils/command distutils/tests $(XMLLIBSUBDIRS) \

View file

@ -81,6 +81,15 @@ Extension Modules
Library Library
------- -------
- a new ``urllib`` package was created. It consists of code from
``urllib``, ``urllib2``, ``urlparse``, and ``robotparser``. The old
modules have all been removed. The new package has five submodules:
``urllib.parse``, ``urllib.request``, ``urllib.response``,
``urllib.error``, and ``urllib.robotparser``. The
``urllib.request.urlopen()`` function uses the url opener from
``urllib2``. (Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.)
- rfc822 has been removed in favor of the email package. - rfc822 has been removed in favor of the email package.
- mimetools has been removed in favor of the email package. - mimetools has been removed in favor of the email package.