| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-09 23:38:20 +00:00
										 |  |  | import stringprep, re, codecs | 
					
						
							| 
									
										
										
										
											2006-03-10 11:20:04 +00:00
										 |  |  | from unicodedata import ucd_3_2_0 as unicodedata | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # IDNA section 3.1 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  | dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # IDNA section 5 | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  | ace_prefix = b"xn--" | 
					
						
							|  |  |  | sace_prefix = "xn--" | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # This assumes query strings, so AllowUnassigned is true | 
					
						
							|  |  |  | def nameprep(label): | 
					
						
							|  |  |  |     # Map | 
					
						
							|  |  |  |     newlabel = [] | 
					
						
							|  |  |  |     for c in label: | 
					
						
							|  |  |  |         if stringprep.in_table_b1(c): | 
					
						
							|  |  |  |             # Map to nothing | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         newlabel.append(stringprep.map_table_b2(c)) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |     label = "".join(newlabel) | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |     # Normalize | 
					
						
							|  |  |  |     label = unicodedata.normalize("NFKC", label) | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |     # Prohibit | 
					
						
							|  |  |  |     for c in label: | 
					
						
							|  |  |  |         if stringprep.in_table_c12(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c22(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c3(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c4(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c5(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c6(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c7(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c8(c) or \ | 
					
						
							|  |  |  |            stringprep.in_table_c9(c): | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             raise UnicodeError("Invalid character %r" % c) | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Check bidi | 
					
						
							| 
									
										
											  
											
												Merged revisions 56125-56153 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/p3yk
........
  r56127 | georg.brandl | 2007-06-30 09:32:49 +0200 (Sat, 30 Jun 2007) | 2 lines
  Fix a place where floor division would be in order.
........
  r56135 | guido.van.rossum | 2007-07-01 06:13:54 +0200 (Sun, 01 Jul 2007) | 28 lines
  Make map() and filter() identical to itertools.imap() and .ifilter(),
  respectively.
  I fixed two bootstrap issues, due to the dynamic import of itertools:
  1. Starting python requires that map() and filter() are not used until
     site.py has added build/lib.<arch> to sys.path.
  2. Building python requires that setup.py and distutils and everything
     they use is free of map() and filter() calls.
  Beyond this, I only fixed the tests in test_builtin.py.
  Others, please help fixing the remaining tests that are now broken!
  The fixes are usually simple:
  a. map(None, X) -> list(X)
  b. map(F, X) -> list(map(F, X))
  c. map(lambda x: F(x), X) -> [F(x) for x in X]
  d. filter(F, X) -> list(filter(F, X))
  e. filter(lambda x: P(x), X) -> [x for x in X if P(x)]
  Someone, please also contribute a fixer for 2to3 to do this.
  It can leave map()/filter() calls alone that are already
  inside a list() or sorted() call or for-loop.
  Only in rare cases have I seen code that depends on map() of lists
  of different lengths going to the end of the longest, or on filter()
  of a string or tuple returning an object of the same type; these
  will need more thought to fix.
........
  r56136 | guido.van.rossum | 2007-07-01 06:22:01 +0200 (Sun, 01 Jul 2007) | 3 lines
  Make it so that test_decimal fails instead of hangs, to help automated
  test runners.
........
  r56139 | georg.brandl | 2007-07-01 18:20:58 +0200 (Sun, 01 Jul 2007) | 2 lines
  Fix a few test cases after the map->imap change.
........
  r56142 | neal.norwitz | 2007-07-02 06:38:12 +0200 (Mon, 02 Jul 2007) | 1 line
  Get a bunch more tests passing after converting map/filter to return iterators.
........
  r56147 | guido.van.rossum | 2007-07-02 15:32:02 +0200 (Mon, 02 Jul 2007) | 4 lines
  Fix the remaining failing unit tests (at least on OSX).
  Also tweaked urllib2 so it doesn't raise socket.gaierror when
  all network interfaces are turned off.
........
											
										 
											2007-07-03 08:25:58 +00:00
										 |  |  |     RandAL = [stringprep.in_table_d1(x) for x in label] | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |     for c in RandAL: | 
					
						
							|  |  |  |         if c: | 
					
						
							|  |  |  |             # There is a RandAL char in the string. Must perform further | 
					
						
							|  |  |  |             # tests: | 
					
						
							|  |  |  |             # 1) The characters in section 5.8 MUST be prohibited. | 
					
						
							|  |  |  |             # This is table C.8, which was already checked | 
					
						
							|  |  |  |             # 2) If a string contains any RandALCat character, the string | 
					
						
							|  |  |  |             # MUST NOT contain any LCat character. | 
					
						
							| 
									
										
											  
											
												Merged revisions 56125-56153 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/p3yk
........
  r56127 | georg.brandl | 2007-06-30 09:32:49 +0200 (Sat, 30 Jun 2007) | 2 lines
  Fix a place where floor division would be in order.
........
  r56135 | guido.van.rossum | 2007-07-01 06:13:54 +0200 (Sun, 01 Jul 2007) | 28 lines
  Make map() and filter() identical to itertools.imap() and .ifilter(),
  respectively.
  I fixed two bootstrap issues, due to the dynamic import of itertools:
  1. Starting python requires that map() and filter() are not used until
     site.py has added build/lib.<arch> to sys.path.
  2. Building python requires that setup.py and distutils and everything
     they use is free of map() and filter() calls.
  Beyond this, I only fixed the tests in test_builtin.py.
  Others, please help fixing the remaining tests that are now broken!
  The fixes are usually simple:
  a. map(None, X) -> list(X)
  b. map(F, X) -> list(map(F, X))
  c. map(lambda x: F(x), X) -> [F(x) for x in X]
  d. filter(F, X) -> list(filter(F, X))
  e. filter(lambda x: P(x), X) -> [x for x in X if P(x)]
  Someone, please also contribute a fixer for 2to3 to do this.
  It can leave map()/filter() calls alone that are already
  inside a list() or sorted() call or for-loop.
  Only in rare cases have I seen code that depends on map() of lists
  of different lengths going to the end of the longest, or on filter()
  of a string or tuple returning an object of the same type; these
  will need more thought to fix.
........
  r56136 | guido.van.rossum | 2007-07-01 06:22:01 +0200 (Sun, 01 Jul 2007) | 3 lines
  Make it so that test_decimal fails instead of hangs, to help automated
  test runners.
........
  r56139 | georg.brandl | 2007-07-01 18:20:58 +0200 (Sun, 01 Jul 2007) | 2 lines
  Fix a few test cases after the map->imap change.
........
  r56142 | neal.norwitz | 2007-07-02 06:38:12 +0200 (Mon, 02 Jul 2007) | 1 line
  Get a bunch more tests passing after converting map/filter to return iterators.
........
  r56147 | guido.van.rossum | 2007-07-02 15:32:02 +0200 (Mon, 02 Jul 2007) | 4 lines
  Fix the remaining failing unit tests (at least on OSX).
  Also tweaked urllib2 so it doesn't raise socket.gaierror when
  all network interfaces are turned off.
........
											
										 
											2007-07-03 08:25:58 +00:00
										 |  |  |             if any(stringprep.in_table_d2(x) for x in label): | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |                 raise UnicodeError("Violation of BIDI requirement 2") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # 3) If a string contains any RandALCat character, a | 
					
						
							|  |  |  |             # RandALCat character MUST be the first character of the | 
					
						
							|  |  |  |             # string, and a RandALCat character MUST be the last | 
					
						
							|  |  |  |             # character of the string. | 
					
						
							|  |  |  |             if not RandAL[0] or not RandAL[-1]: | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |                 raise UnicodeError("Violation of BIDI requirement 3") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return label | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def ToASCII(label): | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         # Step 1: try ASCII | 
					
						
							|  |  |  |         label = label.encode("ascii") | 
					
						
							|  |  |  |     except UnicodeError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # Skip to step 3: UseSTD3ASCIIRules is false, so | 
					
						
							|  |  |  |         # Skip to step 8. | 
					
						
							|  |  |  |         if 0 < len(label) < 64: | 
					
						
							|  |  |  |             return label | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         raise UnicodeError("label empty or too long") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Step 2: nameprep | 
					
						
							|  |  |  |     label = nameprep(label) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Step 3: UseSTD3ASCIIRules is false | 
					
						
							|  |  |  |     # Step 4: try ASCII | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         label = label.encode("ascii") | 
					
						
							|  |  |  |     except UnicodeError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # Skip to step 8. | 
					
						
							|  |  |  |         if 0 < len(label) < 64: | 
					
						
							|  |  |  |             return label | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         raise UnicodeError("label empty or too long") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Step 5: Check ACE prefix | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |     if label.startswith(sace_prefix): | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         raise UnicodeError("Label starts with ACE prefix") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Step 6: Encode with PUNYCODE | 
					
						
							|  |  |  |     label = label.encode("punycode") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Step 7: Prepend ACE prefix | 
					
						
							|  |  |  |     label = ace_prefix + label | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Step 8: Check size | 
					
						
							|  |  |  |     if 0 < len(label) < 64: | 
					
						
							|  |  |  |         return label | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |     raise UnicodeError("label empty or too long") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def ToUnicode(label): | 
					
						
							|  |  |  |     # Step 1: Check for ASCII | 
					
						
							| 
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 |  |  |     if isinstance(label, bytes): | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |         pure_ascii = True | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             label = label.encode("ascii") | 
					
						
							|  |  |  |             pure_ascii = True | 
					
						
							|  |  |  |         except UnicodeError: | 
					
						
							|  |  |  |             pure_ascii = False | 
					
						
							|  |  |  |     if not pure_ascii: | 
					
						
							|  |  |  |         # Step 2: Perform nameprep | 
					
						
							|  |  |  |         label = nameprep(label) | 
					
						
							|  |  |  |         # It doesn't say this, but apparently, it should be ASCII now | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             label = label.encode("ascii") | 
					
						
							|  |  |  |         except UnicodeError: | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             raise UnicodeError("Invalid character in IDN label") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |     # Step 3: Check for ACE prefix | 
					
						
							|  |  |  |     if not label.startswith(ace_prefix): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         return str(label, "ascii") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Step 4: Remove ACE prefix | 
					
						
							|  |  |  |     label1 = label[len(ace_prefix):] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Step 5: Decode using PUNYCODE | 
					
						
							|  |  |  |     result = label1.decode("punycode") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Step 6: Apply ToASCII | 
					
						
							|  |  |  |     label2 = ToASCII(result) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Step 7: Compare the result of step 6 with the one of step 3 | 
					
						
							|  |  |  |     # label2 will already be in lower case. | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |     if str(label, "ascii").lower() != str(label2, "ascii"): | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         raise UnicodeError("IDNA does not round-trip", label, label2) | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Step 8: return the result of step 5 | 
					
						
							|  |  |  |     return result | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | ### Codec APIs | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Codec(codecs.Codec): | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |     def encode(self, input, errors='strict'): | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if errors != 'strict': | 
					
						
							|  |  |  |             # IDNA is quite clear that implementations must be strict | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             raise UnicodeError("unsupported error handling "+errors) | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 |  |  |         if not input: | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |             return b'', 0 | 
					
						
							| 
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-10 22:49:20 +01:00
										 |  |  |         try: | 
					
						
							|  |  |  |             result = input.encode('ascii') | 
					
						
							|  |  |  |         except UnicodeEncodeError: | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # ASCII name: fast path | 
					
						
							|  |  |  |             labels = result.split(b'.') | 
					
						
							|  |  |  |             for label in labels[:-1]: | 
					
						
							|  |  |  |                 if not (0 < len(label) < 64): | 
					
						
							|  |  |  |                     raise UnicodeError("label empty or too long") | 
					
						
							|  |  |  |             if len(labels[-1]) >= 64: | 
					
						
							|  |  |  |                 raise UnicodeError("label too long") | 
					
						
							|  |  |  |             return result, len(input) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |         result = bytearray() | 
					
						
							| 
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 |  |  |         labels = dots.split(input) | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |         if labels and not labels[-1]: | 
					
						
							| 
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 |  |  |             trailing_dot = b'.' | 
					
						
							| 
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 |  |  |             del labels[-1] | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 |  |  |             trailing_dot = b'' | 
					
						
							| 
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 |  |  |         for label in labels: | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |             if result: | 
					
						
							|  |  |  |                 # Join with U+002E | 
					
						
							|  |  |  |                 result.extend(b'.') | 
					
						
							|  |  |  |             result.extend(ToASCII(label)) | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         return bytes(result+trailing_dot), len(input) | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |     def decode(self, input, errors='strict'): | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |         if errors != 'strict': | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             raise UnicodeError("Unsupported error handling "+errors) | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 |  |  |         if not input: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return "", 0 | 
					
						
							| 
									
										
										
										
											2005-08-25 11:03:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |         # IDNA allows decoding to operate on Unicode strings, too. | 
					
						
							| 
									
										
										
										
											2008-08-19 17:56:33 +00:00
										 |  |  |         if not isinstance(input, bytes): | 
					
						
							|  |  |  |             # XXX obviously wrong, see #3232 | 
					
						
							| 
									
										
										
										
											2007-05-09 23:40:37 +00:00
										 |  |  |             input = bytes(input) | 
					
						
							| 
									
										
										
										
											2011-11-10 22:49:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if ace_prefix not in input: | 
					
						
							|  |  |  |             # Fast path | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 return input.decode('ascii'), len(input) | 
					
						
							|  |  |  |             except UnicodeDecodeError: | 
					
						
							|  |  |  |                 pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-19 17:56:33 +00:00
										 |  |  |         labels = input.split(b".") | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 |  |  |         if labels and len(labels[-1]) == 0: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             trailing_dot = '.' | 
					
						
							| 
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 |  |  |             del labels[-1] | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             trailing_dot = '' | 
					
						
							| 
									
										
										
										
											2003-08-05 06:19:47 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  |         result = [] | 
					
						
							|  |  |  |         for label in labels: | 
					
						
							|  |  |  |             result.append(ToUnicode(label)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         return ".".join(result)+trailing_dot, len(input) | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  | class IncrementalEncoder(codecs.BufferedIncrementalEncoder): | 
					
						
							|  |  |  |     def _buffer_encode(self, input, errors, final): | 
					
						
							|  |  |  |         if errors != 'strict': | 
					
						
							|  |  |  |             # IDNA is quite clear that implementations must be strict | 
					
						
							|  |  |  |             raise UnicodeError("unsupported error handling "+errors) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not input: | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |             return (b'', 0) | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         labels = dots.split(input) | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |         trailing_dot = b'' | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         if labels: | 
					
						
							|  |  |  |             if not labels[-1]: | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |                 trailing_dot = b'.' | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |                 del labels[-1] | 
					
						
							|  |  |  |             elif not final: | 
					
						
							|  |  |  |                 # Keep potentially unfinished label until the next call | 
					
						
							|  |  |  |                 del labels[-1] | 
					
						
							|  |  |  |                 if labels: | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |                     trailing_dot = b'.' | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |         result = bytearray() | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         size = 0 | 
					
						
							|  |  |  |         for label in labels: | 
					
						
							|  |  |  |             if size: | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |                 # Join with U+002E | 
					
						
							|  |  |  |                 result.extend(b'.') | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |                 size += 1 | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |             result.extend(ToASCII(label)) | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             size += len(label) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |         result += trailing_dot | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         size += len(trailing_dot) | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         return (bytes(result), size) | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | class IncrementalDecoder(codecs.BufferedIncrementalDecoder): | 
					
						
							|  |  |  |     def _buffer_decode(self, input, errors, final): | 
					
						
							|  |  |  |         if errors != 'strict': | 
					
						
							|  |  |  |             raise UnicodeError("Unsupported error handling "+errors) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not input: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("", 0) | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # IDNA allows decoding to operate on Unicode strings, too. | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         if isinstance(input, str): | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             labels = dots.split(input) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # Must be ASCII string | 
					
						
							| 
									
										
										
										
											2007-05-11 10:32:57 +00:00
										 |  |  |             input = str(input, "ascii") | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             labels = input.split(".") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         trailing_dot = '' | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         if labels: | 
					
						
							|  |  |  |             if not labels[-1]: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 trailing_dot = '.' | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |                 del labels[-1] | 
					
						
							|  |  |  |             elif not final: | 
					
						
							|  |  |  |                 # Keep potentially unfinished label until the next call | 
					
						
							|  |  |  |                 del labels[-1] | 
					
						
							|  |  |  |                 if labels: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     trailing_dot = '.' | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         result = [] | 
					
						
							|  |  |  |         size = 0 | 
					
						
							|  |  |  |         for label in labels: | 
					
						
							|  |  |  |             result.append(ToUnicode(label)) | 
					
						
							|  |  |  |             if size: | 
					
						
							|  |  |  |                 size += 1 | 
					
						
							|  |  |  |             size += len(label) | 
					
						
							| 
									
										
										
										
											2006-04-21 09:43:23 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         result = ".".join(result) + trailing_dot | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |         size += len(trailing_dot) | 
					
						
							|  |  |  |         return (result, size) | 
					
						
							| 
									
										
										
										
											2006-04-21 09:43:23 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-18 10:39:54 +00:00
										 |  |  | class StreamWriter(Codec,codecs.StreamWriter): | 
					
						
							|  |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class StreamReader(Codec,codecs.StreamReader): | 
					
						
							|  |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ### encodings module API | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def getregentry(): | 
					
						
							| 
									
										
										
										
											2006-04-21 09:43:23 +00:00
										 |  |  |     return codecs.CodecInfo( | 
					
						
							|  |  |  |         name='idna', | 
					
						
							|  |  |  |         encode=Codec().encode, | 
					
						
							|  |  |  |         decode=Codec().decode, | 
					
						
							|  |  |  |         incrementalencoder=IncrementalEncoder, | 
					
						
							|  |  |  |         incrementaldecoder=IncrementalDecoder, | 
					
						
							|  |  |  |         streamwriter=StreamWriter, | 
					
						
							|  |  |  |         streamreader=StreamReader, | 
					
						
							|  |  |  |     ) |