mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	bpo-36742: Fixes handling of pre-normalization characters in urlsplit() (GH-13017)
This commit is contained in:
		
							parent
							
								
									b84cb70880
								
							
						
					
					
						commit
						d537ab0ff9
					
				
					 3 changed files with 14 additions and 4 deletions
				
			
		|  | @ -1011,6 +1011,12 @@ def test_urlsplit_normalization(self): | ||||||
|         self.assertIn('\u2100', denorm_chars) |         self.assertIn('\u2100', denorm_chars) | ||||||
|         self.assertIn('\uFF03', denorm_chars) |         self.assertIn('\uFF03', denorm_chars) | ||||||
| 
 | 
 | ||||||
|  |         # bpo-36742: Verify port separators are ignored when they | ||||||
|  |         # existed prior to decomposition | ||||||
|  |         urllib.parse.urlsplit('http://\u30d5\u309a:80') | ||||||
|  |         with self.assertRaises(ValueError): | ||||||
|  |             urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380') | ||||||
|  | 
 | ||||||
|         for scheme in ["http", "https", "ftp"]: |         for scheme in ["http", "https", "ftp"]: | ||||||
|             for c in denorm_chars: |             for c in denorm_chars: | ||||||
|                 url = "{}://netloc{}false.netloc/path".format(scheme, c) |                 url = "{}://netloc{}false.netloc/path".format(scheme, c) | ||||||
|  |  | ||||||
|  | @ -402,13 +402,16 @@ def _checknetloc(netloc): | ||||||
|     # looking for characters like \u2100 that expand to 'a/c' |     # looking for characters like \u2100 that expand to 'a/c' | ||||||
|     # IDNA uses NFKC equivalence, so normalize for this check |     # IDNA uses NFKC equivalence, so normalize for this check | ||||||
|     import unicodedata |     import unicodedata | ||||||
|     netloc2 = unicodedata.normalize('NFKC', netloc) |     n = netloc.rpartition('@')[2] # ignore anything to the left of '@' | ||||||
|     if netloc == netloc2: |     n = n.replace(':', '')        # ignore characters already included | ||||||
|  |     n = n.replace('#', '')        # but not the surrounding text | ||||||
|  |     n = n.replace('?', '') | ||||||
|  |     netloc2 = unicodedata.normalize('NFKC', n) | ||||||
|  |     if n == netloc2: | ||||||
|         return |         return | ||||||
|     _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay |  | ||||||
|     for c in '/?#@:': |     for c in '/?#@:': | ||||||
|         if c in netloc2: |         if c in netloc2: | ||||||
|             raise ValueError("netloc '" + netloc2 + "' contains invalid " + |             raise ValueError("netloc '" + netloc + "' contains invalid " + | ||||||
|                              "characters under NFKC normalization") |                              "characters under NFKC normalization") | ||||||
| 
 | 
 | ||||||
| def urlsplit(url, scheme='', allow_fragments=True): | def urlsplit(url, scheme='', allow_fragments=True): | ||||||
|  |  | ||||||
|  | @ -0,0 +1 @@ | ||||||
|  | Fixes mishandling of pre-normalization characters in urlsplit(). | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Steve Dower
						Steve Dower