mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 11:14:33 +00:00 
			
		
		
		
	 972cf5c06a
			
		
	
	
		972cf5c06a
		
	
	
	
	
		
			
			* bpo-35168: Documentation about shlex.punctuation_chars now states that it should be set in __init__.py * bpo-35168: Convert shlex.punctuation_chars to read-only property * Add NEWS.d entry
		
			
				
	
	
		
			371 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			371 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import io
 | |
| import itertools
 | |
| import shlex
 | |
| import string
 | |
| import unittest
 | |
| 
 | |
| 
 | |
| 
 | |
| # The original test data set was from shellwords, by Hartmut Goebel.
 | |
| 
 | |
| data = r"""x|x|
 | |
| foo bar|foo|bar|
 | |
|  foo bar|foo|bar|
 | |
|  foo bar |foo|bar|
 | |
| foo   bar    bla     fasel|foo|bar|bla|fasel|
 | |
| x y  z              xxxx|x|y|z|xxxx|
 | |
| \x bar|\|x|bar|
 | |
| \ x bar|\|x|bar|
 | |
| \ bar|\|bar|
 | |
| foo \x bar|foo|\|x|bar|
 | |
| foo \ x bar|foo|\|x|bar|
 | |
| foo \ bar|foo|\|bar|
 | |
| foo "bar" bla|foo|"bar"|bla|
 | |
| "foo" "bar" "bla"|"foo"|"bar"|"bla"|
 | |
| "foo" bar "bla"|"foo"|bar|"bla"|
 | |
| "foo" bar bla|"foo"|bar|bla|
 | |
| foo 'bar' bla|foo|'bar'|bla|
 | |
| 'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
 | |
| 'foo' bar 'bla'|'foo'|bar|'bla'|
 | |
| 'foo' bar bla|'foo'|bar|bla|
 | |
| blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
 | |
| blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
 | |
| ""|""|
 | |
| ''|''|
 | |
| foo "" bar|foo|""|bar|
 | |
| foo '' bar|foo|''|bar|
 | |
| foo "" "" "" bar|foo|""|""|""|bar|
 | |
| foo '' '' '' bar|foo|''|''|''|bar|
 | |
| \""|\|""|
 | |
| "\"|"\"|
 | |
| "foo\ bar"|"foo\ bar"|
 | |
| "foo\\ bar"|"foo\\ bar"|
 | |
| "foo\\ bar\"|"foo\\ bar\"|
 | |
| "foo\\" bar\""|"foo\\"|bar|\|""|
 | |
| "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
 | |
| "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
 | |
| "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
 | |
| "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
 | |
| \''|\|''|
 | |
| 'foo\ bar'|'foo\ bar'|
 | |
| 'foo\\ bar'|'foo\\ bar'|
 | |
| "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
 | |
| \"foo"|\|"foo"|
 | |
| \"foo"\x|\|"foo"|\|x|
 | |
| "foo\x"|"foo\x"|
 | |
| "foo\ "|"foo\ "|
 | |
| foo\ xx|foo|\|xx|
 | |
| foo\ x\x|foo|\|x|\|x|
 | |
| foo\ x\x\""|foo|\|x|\|x|\|""|
 | |
| "foo\ x\x"|"foo\ x\x"|
 | |
| "foo\ x\x\\"|"foo\ x\x\\"|
 | |
| "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
 | |
| "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
 | |
| "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
 | |
| "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
 | |
| 'foo\ bar'|'foo\ bar'|
 | |
| 'foo\\ bar'|'foo\\ bar'|
 | |
| foo\ bar|foo|\|bar|
 | |
| foo#bar\nbaz|foobaz|
 | |
| :-) ;-)|:|-|)|;|-|)|
 | |
| áéíóú|á|é|í|ó|ú|
 | |
| """
 | |
| 
 | |
| posix_data = r"""x|x|
 | |
| foo bar|foo|bar|
 | |
|  foo bar|foo|bar|
 | |
|  foo bar |foo|bar|
 | |
| foo   bar    bla     fasel|foo|bar|bla|fasel|
 | |
| x y  z              xxxx|x|y|z|xxxx|
 | |
| \x bar|x|bar|
 | |
| \ x bar| x|bar|
 | |
| \ bar| bar|
 | |
| foo \x bar|foo|x|bar|
 | |
| foo \ x bar|foo| x|bar|
 | |
| foo \ bar|foo| bar|
 | |
| foo "bar" bla|foo|bar|bla|
 | |
| "foo" "bar" "bla"|foo|bar|bla|
 | |
| "foo" bar "bla"|foo|bar|bla|
 | |
| "foo" bar bla|foo|bar|bla|
 | |
| foo 'bar' bla|foo|bar|bla|
 | |
| 'foo' 'bar' 'bla'|foo|bar|bla|
 | |
| 'foo' bar 'bla'|foo|bar|bla|
 | |
| 'foo' bar bla|foo|bar|bla|
 | |
| blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
 | |
| blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
 | |
| ""||
 | |
| ''||
 | |
| foo "" bar|foo||bar|
 | |
| foo '' bar|foo||bar|
 | |
| foo "" "" "" bar|foo||||bar|
 | |
| foo '' '' '' bar|foo||||bar|
 | |
| \"|"|
 | |
| "\""|"|
 | |
| "foo\ bar"|foo\ bar|
 | |
| "foo\\ bar"|foo\ bar|
 | |
| "foo\\ bar\""|foo\ bar"|
 | |
| "foo\\" bar\"|foo\|bar"|
 | |
| "foo\\ bar\" dfadf"|foo\ bar" dfadf|
 | |
| "foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
 | |
| "foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
 | |
| "foo\x bar\" dfadf"|foo\x bar" dfadf|
 | |
| \'|'|
 | |
| 'foo\ bar'|foo\ bar|
 | |
| 'foo\\ bar'|foo\\ bar|
 | |
| "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
 | |
| \"foo|"foo|
 | |
| \"foo\x|"foox|
 | |
| "foo\x"|foo\x|
 | |
| "foo\ "|foo\ |
 | |
| foo\ xx|foo xx|
 | |
| foo\ x\x|foo xx|
 | |
| foo\ x\x\"|foo xx"|
 | |
| "foo\ x\x"|foo\ x\x|
 | |
| "foo\ x\x\\"|foo\ x\x\|
 | |
| "foo\ x\x\\""foobar"|foo\ x\x\foobar|
 | |
| "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
 | |
| "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
 | |
| "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
 | |
| "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
 | |
| 'foo\ bar'|foo\ bar|
 | |
| 'foo\\ bar'|foo\\ bar|
 | |
| foo\ bar|foo bar|
 | |
| foo#bar\nbaz|foo|baz|
 | |
| :-) ;-)|:-)|;-)|
 | |
| áéíóú|áéíóú|
 | |
| """
 | |
| 
 | |
| class ShlexTest(unittest.TestCase):
 | |
|     def setUp(self):
 | |
|         self.data = [x.split("|")[:-1]
 | |
|                      for x in data.splitlines()]
 | |
|         self.posix_data = [x.split("|")[:-1]
 | |
|                            for x in posix_data.splitlines()]
 | |
|         for item in self.data:
 | |
|             item[0] = item[0].replace(r"\n", "\n")
 | |
|         for item in self.posix_data:
 | |
|             item[0] = item[0].replace(r"\n", "\n")
 | |
| 
 | |
|     def splitTest(self, data, comments):
 | |
|         for i in range(len(data)):
 | |
|             l = shlex.split(data[i][0], comments=comments)
 | |
|             self.assertEqual(l, data[i][1:],
 | |
|                              "%s: %s != %s" %
 | |
|                              (data[i][0], l, data[i][1:]))
 | |
| 
 | |
|     def oldSplit(self, s):
 | |
|         ret = []
 | |
|         lex = shlex.shlex(io.StringIO(s))
 | |
|         tok = lex.get_token()
 | |
|         while tok:
 | |
|             ret.append(tok)
 | |
|             tok = lex.get_token()
 | |
|         return ret
 | |
| 
 | |
|     def testSplitPosix(self):
 | |
|         """Test data splitting with posix parser"""
 | |
|         self.splitTest(self.posix_data, comments=True)
 | |
| 
 | |
|     def testCompat(self):
 | |
|         """Test compatibility interface"""
 | |
|         for i in range(len(self.data)):
 | |
|             l = self.oldSplit(self.data[i][0])
 | |
|             self.assertEqual(l, self.data[i][1:],
 | |
|                              "%s: %s != %s" %
 | |
|                              (self.data[i][0], l, self.data[i][1:]))
 | |
| 
 | |
|     def testSyntaxSplitAmpersandAndPipe(self):
 | |
|         """Test handling of syntax splitting of &, |"""
 | |
|         # Could take these forms: &&, &, |&, ;&, ;;&
 | |
|         # of course, the same applies to | and ||
 | |
|         # these should all parse to the same output
 | |
|         for delimiter in ('&&', '&', '|&', ';&', ';;&',
 | |
|                           '||', '|', '&|', ';|', ';;|'):
 | |
|             src = ['echo hi %s echo bye' % delimiter,
 | |
|                    'echo hi%secho bye' % delimiter]
 | |
|             ref = ['echo', 'hi', delimiter, 'echo', 'bye']
 | |
|             for ss, ws in itertools.product(src, (False, True)):
 | |
|                 s = shlex.shlex(ss, punctuation_chars=True)
 | |
|                 s.whitespace_split = ws
 | |
|                 result = list(s)
 | |
|                 self.assertEqual(ref, result,
 | |
|                                  "While splitting '%s' [ws=%s]" % (ss, ws))
 | |
| 
 | |
|     def testSyntaxSplitSemicolon(self):
 | |
|         """Test handling of syntax splitting of ;"""
 | |
|         # Could take these forms: ;, ;;, ;&, ;;&
 | |
|         # these should all parse to the same output
 | |
|         for delimiter in (';', ';;', ';&', ';;&'):
 | |
|             src = ['echo hi %s echo bye' % delimiter,
 | |
|                    'echo hi%s echo bye' % delimiter,
 | |
|                    'echo hi%secho bye' % delimiter]
 | |
|             ref = ['echo', 'hi', delimiter, 'echo', 'bye']
 | |
|             for ss, ws in itertools.product(src, (False, True)):
 | |
|                 s = shlex.shlex(ss, punctuation_chars=True)
 | |
|                 s.whitespace_split = ws
 | |
|                 result = list(s)
 | |
|                 self.assertEqual(ref, result,
 | |
|                                  "While splitting '%s' [ws=%s]" % (ss, ws))
 | |
| 
 | |
|     def testSyntaxSplitRedirect(self):
 | |
|         """Test handling of syntax splitting of >"""
 | |
|         # of course, the same applies to <, |
 | |
|         # these should all parse to the same output
 | |
|         for delimiter in ('<', '|'):
 | |
|             src = ['echo hi %s out' % delimiter,
 | |
|                    'echo hi%s out' % delimiter,
 | |
|                    'echo hi%sout' % delimiter]
 | |
|             ref = ['echo', 'hi', delimiter, 'out']
 | |
|             for ss, ws in itertools.product(src, (False, True)):
 | |
|                 s = shlex.shlex(ss, punctuation_chars=True)
 | |
|                 result = list(s)
 | |
|                 self.assertEqual(ref, result,
 | |
|                                  "While splitting '%s' [ws=%s]" % (ss, ws))
 | |
| 
 | |
|     def testSyntaxSplitParen(self):
 | |
|         """Test handling of syntax splitting of ()"""
 | |
|         # these should all parse to the same output
 | |
|         src = ['( echo hi )',
 | |
|                '(echo hi)']
 | |
|         ref = ['(', 'echo', 'hi', ')']
 | |
|         for ss, ws in itertools.product(src, (False, True)):
 | |
|             s = shlex.shlex(ss, punctuation_chars=True)
 | |
|             s.whitespace_split = ws
 | |
|             result = list(s)
 | |
|             self.assertEqual(ref, result,
 | |
|                              "While splitting '%s' [ws=%s]" % (ss, ws))
 | |
| 
 | |
|     def testSyntaxSplitCustom(self):
 | |
|         """Test handling of syntax splitting with custom chars"""
 | |
|         ss = "~/a&&b-c --color=auto||d *.py?"
 | |
|         ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
 | |
|         s = shlex.shlex(ss, punctuation_chars="|")
 | |
|         result = list(s)
 | |
|         self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
 | |
|         ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
 | |
|         s = shlex.shlex(ss, punctuation_chars="|")
 | |
|         s.whitespace_split = True
 | |
|         result = list(s)
 | |
|         self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
 | |
| 
 | |
|     def testTokenTypes(self):
 | |
|         """Test that tokens are split with types as expected."""
 | |
|         for source, expected in (
 | |
|                                 ('a && b || c',
 | |
|                                  [('a', 'a'), ('&&', 'c'), ('b', 'a'),
 | |
|                                   ('||', 'c'), ('c', 'a')]),
 | |
|                               ):
 | |
|             s = shlex.shlex(source, punctuation_chars=True)
 | |
|             observed = []
 | |
|             while True:
 | |
|                 t = s.get_token()
 | |
|                 if t == s.eof:
 | |
|                     break
 | |
|                 if t[0] in s.punctuation_chars:
 | |
|                     tt = 'c'
 | |
|                 else:
 | |
|                     tt = 'a'
 | |
|                 observed.append((t, tt))
 | |
|             self.assertEqual(observed, expected)
 | |
| 
 | |
|     def testPunctuationInWordChars(self):
 | |
|         """Test that any punctuation chars are removed from wordchars"""
 | |
|         s = shlex.shlex('a_b__c', punctuation_chars='_')
 | |
|         self.assertNotIn('_', s.wordchars)
 | |
|         self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
 | |
| 
 | |
|     def testPunctuationWithWhitespaceSplit(self):
 | |
|         """Test that with whitespace_split, behaviour is as expected"""
 | |
|         s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
 | |
|         # whitespace_split is False, so splitting will be based on
 | |
|         # punctuation_chars
 | |
|         self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
 | |
|         s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
 | |
|         s.whitespace_split = True
 | |
|         # whitespace_split is True, so splitting will be based on
 | |
|         # white space
 | |
|         self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
 | |
| 
 | |
|     def testPunctuationWithPosix(self):
 | |
|         """Test that punctuation_chars and posix behave correctly together."""
 | |
|         # see Issue #29132
 | |
|         s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
 | |
|         self.assertEqual(list(s), ['f', '>', 'abc'])
 | |
|         s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
 | |
|         self.assertEqual(list(s), ['f', '>', '"abc"'])
 | |
| 
 | |
|     def testEmptyStringHandling(self):
 | |
|         """Test that parsing of empty strings is correctly handled."""
 | |
|         # see Issue #21999
 | |
|         expected = ['', ')', 'abc']
 | |
|         for punct in (False, True):
 | |
|             s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
 | |
|             slist = list(s)
 | |
|             self.assertEqual(slist, expected)
 | |
|         expected = ["''", ')', 'abc']
 | |
|         s = shlex.shlex("'')abc", punctuation_chars=True)
 | |
|         self.assertEqual(list(s), expected)
 | |
| 
 | |
|     def testUnicodeHandling(self):
 | |
|         """Test punctuation_chars and whitespace_split handle unicode."""
 | |
|         ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
 | |
|         # Should be parsed as one complete token (whitespace_split=True).
 | |
|         ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
 | |
|         s = shlex.shlex(ss, punctuation_chars=True)
 | |
|         s.whitespace_split = True
 | |
|         self.assertEqual(list(s), ref)
 | |
|         # Without whitespace_split, uses wordchars and splits on all.
 | |
|         ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
 | |
|         s = shlex.shlex(ss, punctuation_chars=True)
 | |
|         self.assertEqual(list(s), ref)
 | |
| 
 | |
|     def testQuote(self):
 | |
|         safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
 | |
|         unicode_sample = '\xe9\xe0\xdf'  # e + acute accent, a + grave, sharp s
 | |
|         unsafe = '"`$\\!' + unicode_sample
 | |
| 
 | |
|         self.assertEqual(shlex.quote(''), "''")
 | |
|         self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
 | |
|         self.assertEqual(shlex.quote('test file name'), "'test file name'")
 | |
|         for u in unsafe:
 | |
|             self.assertEqual(shlex.quote('test%sname' % u),
 | |
|                              "'test%sname'" % u)
 | |
|         for u in unsafe:
 | |
|             self.assertEqual(shlex.quote("test%s'name'" % u),
 | |
|                              "'test%s'\"'\"'name'\"'\"''" % u)
 | |
| 
 | |
|     def testJoin(self):
 | |
|         for split_command, command in [
 | |
|             (['a ', 'b'], "'a ' b"),
 | |
|             (['a', ' b'], "a ' b'"),
 | |
|             (['a', ' ', 'b'], "a ' ' b"),
 | |
|             (['"a', 'b"'], '\'"a\' \'b"\''),
 | |
|         ]:
 | |
|             with self.subTest(command=command):
 | |
|                 joined = shlex.join(split_command)
 | |
|                 self.assertEqual(joined, command)
 | |
| 
 | |
|     def testJoinRoundtrip(self):
 | |
|         all_data = self.data + self.posix_data
 | |
|         for command, *split_command in all_data:
 | |
|             with self.subTest(command=command):
 | |
|                 joined = shlex.join(split_command)
 | |
|                 resplit = shlex.split(joined)
 | |
|                 self.assertEqual(split_command, resplit)
 | |
| 
 | |
|     def testPunctuationCharsReadOnly(self):
 | |
|         punctuation_chars = "/|$%^"
 | |
|         shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
 | |
|         self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
 | |
|         with self.assertRaises(AttributeError):
 | |
|             shlex_instance.punctuation_chars = False
 | |
| 
 | |
| 
 | |
| # Allow this test to be used with old shlex.py
 | |
| if not getattr(shlex, "split", None):
 | |
|     for methname in dir(ShlexTest):
 | |
|         if methname.startswith("test") and methname != "testCompat":
 | |
|             delattr(ShlexTest, methname)
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     unittest.main()
 |