2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-09 23:38:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import stringprep, re, codecs
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-10 11:20:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from unicodedata import ucd_3_2_0 as unicodedata
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# IDNA section 3.1
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# IDNA section 5
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								ace_prefix = b"xn--"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								sace_prefix = "xn--"
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# This assumes query strings, so AllowUnassigned is true
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def nameprep(label):  # type: (str) -> str
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Map
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    newlabel = []
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for c in label:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if stringprep.in_table_b1(c):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # Map to nothing
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            continue
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        newlabel.append(stringprep.map_table_b2(c))
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    label = "".join(newlabel)
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Normalize
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    label = unicodedata.normalize("NFKC", label)
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Prohibit
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    for i, c in enumerate(label):
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if stringprep.in_table_c12(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c22(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c3(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c4(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c5(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c6(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c7(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c8(c) or \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           stringprep.in_table_c9(c):
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, i, i+1, f"Invalid character {c!r}")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Check bidi
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												Merged revisions 56125-56153 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/p3yk
........
  r56127 | georg.brandl | 2007-06-30 09:32:49 +0200 (Sat, 30 Jun 2007) | 2 lines
  Fix a place where floor division would be in order.
........
  r56135 | guido.van.rossum | 2007-07-01 06:13:54 +0200 (Sun, 01 Jul 2007) | 28 lines
  Make map() and filter() identical to itertools.imap() and .ifilter(),
  respectively.
  I fixed two bootstrap issues, due to the dynamic import of itertools:
  1. Starting python requires that map() and filter() are not used until
     site.py has added build/lib.<arch> to sys.path.
  2. Building python requires that setup.py and distutils and everything
     they use is free of map() and filter() calls.
  Beyond this, I only fixed the tests in test_builtin.py.
  Others, please help fixing the remaining tests that are now broken!
  The fixes are usually simple:
  a. map(None, X) -> list(X)
  b. map(F, X) -> list(map(F, X))
  c. map(lambda x: F(x), X) -> [F(x) for x in X]
  d. filter(F, X) -> list(filter(F, X))
  e. filter(lambda x: P(x), X) -> [x for x in X if P(x)]
  Someone, please also contribute a fixer for 2to3 to do this.
  It can leave map()/filter() calls alone that are already
  inside a list() or sorted() call or for-loop.
  Only in rare cases have I seen code that depends on map() of lists
  of different lengths going to the end of the longest, or on filter()
  of a string or tuple returning an object of the same type; these
  will need more thought to fix.
........
  r56136 | guido.van.rossum | 2007-07-01 06:22:01 +0200 (Sun, 01 Jul 2007) | 3 lines
  Make it so that test_decimal fails instead of hangs, to help automated
  test runners.
........
  r56139 | georg.brandl | 2007-07-01 18:20:58 +0200 (Sun, 01 Jul 2007) | 2 lines
  Fix a few test cases after the map->imap change.
........
  r56142 | neal.norwitz | 2007-07-02 06:38:12 +0200 (Mon, 02 Jul 2007) | 1 line
  Get a bunch more tests passing after converting map/filter to return iterators.
........
  r56147 | guido.van.rossum | 2007-07-02 15:32:02 +0200 (Mon, 02 Jul 2007) | 4 lines
  Fix the remaining failing unit tests (at least on OSX).
  Also tweaked urllib2 so it doesn't raise socket.gaierror when
  all network interfaces are turned off.
........
											
										 
										
											2007-07-03 08:25:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    RandAL = [stringprep.in_table_d1(x) for x in label]
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-07 16:54:41 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if any(RandAL):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # There is a RandAL char in the string. Must perform further
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # tests:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # 1) The characters in section 5.8 MUST be prohibited.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # This is table C.8, which was already checked
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # 2) If a string contains any RandALCat character, the string
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # MUST NOT contain any LCat character.
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        for i, x in enumerate(label):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if stringprep.in_table_d2(x):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                raise UnicodeEncodeError("idna", label, i, i+1,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                         "Violation of BIDI requirement 2")
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-07 16:54:41 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # 3) If a string contains any RandALCat character, a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # RandALCat character MUST be the first character of the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # string, and a RandALCat character MUST be the last
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # character of the string.
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if not RandAL[0]:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, 0, 1,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                     "Violation of BIDI requirement 3")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if not RandAL[-1]:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, len(label)-1, len(label),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                     "Violation of BIDI requirement 3")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return label
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def ToASCII(label):  # type: (str) -> bytes
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Step 1: try ASCII
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        label_ascii = label.encode("ascii")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    except UnicodeEncodeError:
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        pass
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Skip to step 3: UseSTD3ASCIIRules is false, so
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Skip to step 8.
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if 0 < len(label_ascii) < 64:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return label_ascii
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if len(label) == 0:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, 0, 1, "label empty")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, 0, len(label), "label too long")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 2: nameprep
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    label = nameprep(label)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 3: UseSTD3ASCIIRules is false
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 4: try ASCII
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        label_ascii = label.encode("ascii")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    except UnicodeEncodeError:
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        pass
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Skip to step 8.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if 0 < len(label) < 64:
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return label_ascii
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if len(label) == 0:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, 0, 1, "label empty")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, 0, len(label), "label too long")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 5: Check ACE prefix
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if label.lower().startswith(sace_prefix):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise UnicodeEncodeError(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            "idna", label, 0, len(sace_prefix), "Label starts with ACE prefix")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 6: Encode with PUNYCODE
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    label_ascii = label.encode("punycode")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 7: Prepend ACE prefix
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    label_ascii = ace_prefix + label_ascii
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 8: Check size
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # do not check for empty as we prepend ace_prefix.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if len(label_ascii) < 64:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return label_ascii
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    raise UnicodeEncodeError("idna", label, 0, len(label), "label too long")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def ToUnicode(label):
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-07 16:54:41 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if len(label) > 1024:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Protection from https://github.com/python/cpython/issues/98433.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # https://datatracker.ietf.org/doc/html/rfc5894#section-6
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # doesn't specify a label size limit prior to NAMEPREP. But having
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # one makes practical sense.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # This leaves ample room for nameprep() to remove Nothing characters
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # per https://www.rfc-editor.org/rfc/rfc3454#section-3.1 while still
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # preventing us from wasting time decoding a big thing that'll just
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # hit the actual <= 63 length limit in Step 6.
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if isinstance(label, str):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            label = label.encode("utf-8", errors="backslashreplace")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise UnicodeDecodeError("idna", label, 0, len(label), "label way too long")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 1: Check for ASCII
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if isinstance(label, bytes):
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        pure_ascii = True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            label = label.encode("ascii")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            pure_ascii = True
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        except UnicodeEncodeError:
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            pure_ascii = False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not pure_ascii:
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        assert isinstance(label, str)
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Step 2: Perform nameprep
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        label = nameprep(label)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # It doesn't say this, but apparently, it should be ASCII now
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            label = label.encode("ascii")
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        except UnicodeEncodeError as exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeEncodeError("idna", label, exc.start, exc.end,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                     "Invalid character in IDN label")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 3: Check for ACE prefix
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    assert isinstance(label, bytes)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not label.lower().startswith(ace_prefix):
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return str(label, "ascii")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 4: Remove ACE prefix
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    label1 = label[len(ace_prefix):]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 5: Decode using PUNYCODE
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        result = label1.decode("punycode")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    except UnicodeDecodeError as exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        offset = len(ace_prefix)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise UnicodeDecodeError("idna", label, offset+exc.start, offset+exc.end, exc.reason)
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 6: Apply ToASCII
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    label2 = ToASCII(result)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 7: Compare the result of step 6 with the one of step 3
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # label2 will already be in lower case.
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if str(label, "ascii").lower() != str(label2, "ascii"):
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        raise UnicodeDecodeError("idna", label, 0, len(label),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                 f"IDNA does not round-trip, '{label!r}' != '{label2!r}'")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # Step 8: return the result of step 5
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return result
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								### Codec APIs
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class Codec(codecs.Codec):
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def encode(self, input, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if errors != 'strict':
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # IDNA is quite clear that implementations must be strict
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeError(f"Unsupported error handling: {errors}")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if not input:
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return b'', 0
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-10 22:49:20 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            result = input.encode('ascii')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        except UnicodeEncodeError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            pass
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # ASCII name: fast path
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            labels = result.split(b'.')
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            for i, label in enumerate(labels[:-1]):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if len(label) == 0:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    offset = sum(len(l) for l in labels[:i]) + i
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    raise UnicodeEncodeError("idna", input, offset, offset+1,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                             "label empty")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            for i, label in enumerate(labels):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if len(label) >= 64:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    offset = sum(len(l) for l in labels[:i]) + i
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    raise UnicodeEncodeError("idna", input, offset, offset+len(label),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                             "label too long")
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-10 22:49:20 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return result, len(input)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        result = bytearray()
							 | 
						
					
						
							
								
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        labels = dots.split(input)
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if labels and not labels[-1]:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            trailing_dot = b'.'
							 | 
						
					
						
							
								
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            del labels[-1]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            trailing_dot = b''
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        for i, label in enumerate(labels):
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if result:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # Join with U+002E
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                result.extend(b'.')
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                result.extend(ToASCII(label))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except (UnicodeEncodeError, UnicodeDecodeError) as exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                offset = sum(len(l) for l in labels[:i]) + i
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                raise UnicodeEncodeError(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    "idna",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    input,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    offset + exc.start,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    offset + exc.end,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    exc.reason,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                )
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return bytes(result+trailing_dot), len(input)
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def decode(self, input, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if errors != 'strict':
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeError(f"Unsupported error handling: {errors}")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if not input:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return "", 0
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # IDNA allows decoding to operate on Unicode strings, too.
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-19 17:56:33 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if not isinstance(input, bytes):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # XXX obviously wrong, see #3232
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            input = bytes(input)
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-10 22:49:20 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-15 07:38:13 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if ace_prefix not in input.lower():
							 | 
						
					
						
							
								
									
										
										
										
											2011-11-10 22:49:20 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            # Fast path
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                return input.decode('ascii'), len(input)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except UnicodeDecodeError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                pass
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-19 17:56:33 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        labels = input.split(b".")
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if labels and len(labels[-1]) == 0:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            trailing_dot = '.'
							 | 
						
					
						
							
								
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            del labels[-1]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            trailing_dot = ''
							 | 
						
					
						
							
								
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        result = []
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        for i, label in enumerate(labels):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                u_label = ToUnicode(label)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except (UnicodeEncodeError, UnicodeDecodeError) as exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                offset = sum(len(x) for x in labels[:i]) + len(labels[:i])
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                raise UnicodeDecodeError(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    "idna", input, offset+exc.start, offset+exc.end, exc.reason)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                result.append(u_label)
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return ".".join(result)+trailing_dot, len(input)
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _buffer_encode(self, input, errors, final):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if errors != 'strict':
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # IDNA is quite clear that implementations must be strict
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeError(f"Unsupported error handling: {errors}")
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if not input:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return (b'', 0)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        labels = dots.split(input)
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        trailing_dot = b''
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if labels:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not labels[-1]:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                trailing_dot = b'.'
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                del labels[-1]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            elif not final:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # Keep potentially unfinished label until the next call
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                del labels[-1]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if labels:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    trailing_dot = b'.'
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        result = bytearray()
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        size = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for label in labels:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if size:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                # Join with U+002E
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                result.extend(b'.')
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                size += 1
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                result.extend(ToASCII(label))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except (UnicodeEncodeError, UnicodeDecodeError) as exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                raise UnicodeEncodeError(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    "idna",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    input,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    size + exc.start,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    size + exc.end,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    exc.reason,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                )
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            size += len(label)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        result += trailing_dot
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        size += len(trailing_dot)
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return (bytes(result), size)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _buffer_decode(self, input, errors, final):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if errors != 'strict':
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            raise UnicodeError("Unsupported error handling: {errors}")
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if not input:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return ("", 0)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # IDNA allows decoding to operate on Unicode strings, too.
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if isinstance(input, str):
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            labels = dots.split(input)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # Must be ASCII string
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                input = str(input, "ascii")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except (UnicodeEncodeError, UnicodeDecodeError) as exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                raise UnicodeDecodeError("idna", input,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                         exc.start, exc.end, exc.reason)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            labels = input.split(".")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        trailing_dot = ''
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if labels:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not labels[-1]:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                trailing_dot = '.'
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                del labels[-1]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            elif not final:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # Keep potentially unfinished label until the next call
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                del labels[-1]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if labels:
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    trailing_dot = '.'
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        result = []
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        size = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for label in labels:
							 | 
						
					
						
							
								
									
										
										
										
											2024-03-17 00:58:42 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                u_label = ToUnicode(label)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except (UnicodeEncodeError, UnicodeDecodeError) as exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                raise UnicodeDecodeError(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    "idna",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    input.encode("ascii", errors="backslashreplace"),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    size + exc.start,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    size + exc.end,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    exc.reason,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                result.append(u_label)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if size:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                size += 1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            size += len(label)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 09:43:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        result = ".".join(result) + trailing_dot
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        size += len(trailing_dot)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return (result, size)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 09:43:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class StreamWriter(Codec,codecs.StreamWriter):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    pass
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class StreamReader(Codec,codecs.StreamReader):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    pass
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								### encodings module API
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def getregentry():
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-21 09:43:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return codecs.CodecInfo(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        name='idna',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        encode=Codec().encode,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        decode=Codec().decode,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        incrementalencoder=IncrementalEncoder,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        incrementaldecoder=IncrementalDecoder,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        streamwriter=StreamWriter,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        streamreader=StreamReader,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    )
							 |