| 
									
										
										
										
											2002-07-23 19:04:11 +00:00
										 |  |  | from test.test_support import verbose, sortdict | 
					
						
							| 
									
										
										
										
											2000-12-23 22:08:27 +00:00
										 |  |  | import warnings | 
					
						
							|  |  |  | warnings.filterwarnings("ignore", "the regex module is deprecated", | 
					
						
							| 
									
										
										
										
											2002-10-17 22:13:28 +00:00
										 |  |  |                         DeprecationWarning, __name__) | 
					
						
							| 
									
										
										
										
											1996-12-20 22:00:21 +00:00
										 |  |  | import regex | 
					
						
							|  |  |  | from regex_syntax import * | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | re = 'a+b+c+' | 
					
						
							|  |  |  | print 'no match:', regex.match(re, 'hello aaaabcccc world') | 
					
						
							|  |  |  | print 'successful search:', regex.search(re, 'hello aaaabcccc world') | 
					
						
							|  |  |  | try: | 
					
						
							|  |  |  |     cre = regex.compile('\(' + re) | 
					
						
							|  |  |  | except regex.error: | 
					
						
							|  |  |  |     print 'caught expected exception' | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     print 'expected regex.error not raised' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') | 
					
						
							|  |  |  | prev = regex.set_syntax(RE_SYNTAX_AWK) | 
					
						
							|  |  |  | print 'successful awk syntax:', regex.search('(a+)|(b+)', 'cdb') | 
					
						
							|  |  |  | regex.set_syntax(prev) | 
					
						
							|  |  |  | print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | re = '\(<one>[0-9]+\) *\(<two>[0-9]+\)' | 
					
						
							|  |  |  | print 'matching with group names and compile()' | 
					
						
							|  |  |  | cre = regex.compile(re) | 
					
						
							|  |  |  | print cre.match('801 999') | 
					
						
							|  |  |  | try: | 
					
						
							|  |  |  |     print cre.group('one') | 
					
						
							|  |  |  | except regex.error: | 
					
						
							|  |  |  |     print 'caught expected exception' | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     print 'expected regex.error not raised' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | print 'matching with group names and symcomp()' | 
					
						
							|  |  |  | cre = regex.symcomp(re) | 
					
						
							|  |  |  | print cre.match('801 999') | 
					
						
							|  |  |  | print cre.group(0) | 
					
						
							|  |  |  | print cre.group('one') | 
					
						
							|  |  |  | print cre.group(1, 2) | 
					
						
							|  |  |  | print cre.group('one', 'two') | 
					
						
							|  |  |  | print 'realpat:', cre.realpat | 
					
						
							| 
									
										
											  
											
												Get rid of the superstitious "~" in dict hashing's "i = (~hash) & mask".
The comment following used to say:
	/* We use ~hash instead of hash, as degenerate hash functions, such
	   as for ints <sigh>, can have lots of leading zeros. It's not
	   really a performance risk, but better safe than sorry.
	   12-Dec-00 tim:  so ~hash produces lots of leading ones instead --
	   what's the gain? */
That is, there was never a good reason for doing it.  And to the contrary,
as explained on Python-Dev last December, it tended to make the *sum*
(i + incr) & mask (which is the first table index examined in case of
collison) the same "too often" across distinct hashes.
Changing to the simpler "i = hash & mask" reduced the number of string-dict
collisions (== # number of times we go around the lookup for-loop) from about
6 million to 5 million during a full run of the test suite (these are
approximate because the test suite does some random stuff from run to run).
The number of collisions in non-string dicts also decreased, but not as
dramatically.
Note that this may, for a given dict, change the order (wrt previous
releases) of entries exposed by .keys(), .values() and .items().  A number
of std tests suffered bogus failures as a result.  For dicts keyed by
small ints, or (less so) by characters, the order is much more likely to be
in increasing order of key now; e.g.,
>>> d = {}
>>> for i in range(10):
...    d[i] = i
...
>>> d
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
>>>
Unfortunately. people may latch on to that in small examples and draw a
bogus conclusion.
test_support.py
    Moved test_extcall's sortdict() into test_support, made it stronger,
    and imported sortdict into other std tests that needed it.
test_unicode.py
    Excluced cp875 from the "roundtrip over range(128)" test, because
    cp875 doesn't have a well-defined inverse for unicode("?", "cp875").
    See Python-Dev for excruciating details.
Cookie.py
    Chaged various output functions to sort dicts before building
    strings from them.
test_extcall
    Fiddled the expected-result file.  This remains sensitive to native
    dict ordering, because, e.g., if there are multiple errors in a
    keyword-arg dict (and test_extcall sets up many cases like that), the
    specific error Python complains about first depends on native dict
    ordering.
											
										 
											2001-05-13 00:19:31 +00:00
										 |  |  | print 'groupindex:', sortdict(cre.groupindex) | 
					
						
							| 
									
										
										
										
											1996-12-20 22:00:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | re = 'world' | 
					
						
							|  |  |  | cre = regex.compile(re) | 
					
						
							|  |  |  | print 'not case folded search:', cre.search('HELLO WORLD') | 
					
						
							|  |  |  | cre = regex.compile(re, regex.casefold) | 
					
						
							|  |  |  | print 'case folded search:', cre.search('HELLO WORLD') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | print '__members__:', cre.__members__ | 
					
						
							|  |  |  | print 'regs:', cre.regs | 
					
						
							|  |  |  | print 'last:', cre.last | 
					
						
							| 
									
										
										
										
											1997-05-16 13:51:48 +00:00
										 |  |  | print 'translate:', len(cre.translate) | 
					
						
							| 
									
										
										
										
											1996-12-20 22:00:21 +00:00
										 |  |  | print 'givenpat:', cre.givenpat | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | print 'match with pos:', cre.match('hello world', 7) | 
					
						
							|  |  |  | print 'search with pos:', cre.search('hello world there world', 7) | 
					
						
							|  |  |  | print 'bogus group:', cre.group(0, 1, 3) | 
					
						
							|  |  |  | try: | 
					
						
							|  |  |  |     print 'no name:', cre.group('one') | 
					
						
							|  |  |  | except regex.error: | 
					
						
							|  |  |  |     print 'caught expected exception' | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     print 'expected regex.error not raised' | 
					
						
							| 
									
										
										
										
											1997-06-03 18:07:49 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | from regex_tests import * | 
					
						
							|  |  |  | if verbose: print 'Running regex_tests test suite' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | for t in tests: | 
					
						
							|  |  |  |     pattern=s=outcome=repl=expected=None | 
					
						
							|  |  |  |     if len(t)==5: | 
					
						
							| 
									
										
										
										
											1998-03-26 19:42:58 +00:00
										 |  |  |         pattern, s, outcome, repl, expected = t | 
					
						
							| 
									
										
										
										
											1997-06-03 18:07:49 +00:00
										 |  |  |     elif len(t)==3: | 
					
						
							| 
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 |  |  |         pattern, s, outcome = t | 
					
						
							| 
									
										
										
										
											1997-06-03 18:07:49 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1998-03-26 19:42:58 +00:00
										 |  |  |         raise ValueError, ('Test tuples should have 3 or 5 fields',t) | 
					
						
							| 
									
										
										
										
											1997-06-03 18:07:49 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							| 
									
										
										
										
											1998-03-26 19:42:58 +00:00
										 |  |  |         obj=regex.compile(pattern) | 
					
						
							| 
									
										
										
										
											1997-06-03 18:07:49 +00:00
										 |  |  |     except regex.error: | 
					
						
							| 
									
										
										
										
											1998-03-26 19:42:58 +00:00
										 |  |  |         if outcome==SYNTAX_ERROR: pass    # Expected a syntax error | 
					
						
							| 
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 |  |  |         else: | 
					
						
							|  |  |  |             # Regex syntax errors aren't yet reported, so for | 
					
						
							| 
									
										
										
										
											1998-03-26 19:42:58 +00:00
										 |  |  |             # the official test suite they'll be quietly ignored. | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |             #print '=== Syntax error:', t | 
					
						
							| 
									
										
										
										
											1997-06-03 18:07:49 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											1998-03-26 19:42:58 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             result=obj.search(s) | 
					
						
							|  |  |  |         except regex.error, msg: | 
					
						
							|  |  |  |             print '=== Unexpected exception', t, repr(msg) | 
					
						
							|  |  |  |         if outcome==SYNTAX_ERROR: | 
					
						
							|  |  |  |             # This should have been a syntax error; forget it. | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |         elif outcome==FAIL: | 
					
						
							|  |  |  |             if result==-1: pass   # No match, as expected | 
					
						
							|  |  |  |             else: print '=== Succeeded incorrectly', t | 
					
						
							|  |  |  |         elif outcome==SUCCEED: | 
					
						
							|  |  |  |             if result!=-1: | 
					
						
							|  |  |  |                 # Matched, as expected, so now we compute the | 
					
						
							|  |  |  |                 # result string and compare it to our expected result. | 
					
						
							|  |  |  |                 start, end = obj.regs[0] | 
					
						
							|  |  |  |                 found=s[start:end] | 
					
						
							|  |  |  |                 groups=obj.group(1,2,3,4,5,6,7,8,9,10) | 
					
						
							|  |  |  |                 vardict=vars() | 
					
						
							|  |  |  |                 for i in range(len(groups)): | 
					
						
							|  |  |  |                     vardict['g'+str(i+1)]=str(groups[i]) | 
					
						
							|  |  |  |                 repl=eval(repl) | 
					
						
							|  |  |  |                 if repl!=expected: | 
					
						
							|  |  |  |                     print '=== grouping error', t, repr(repl)+' should be '+repr(expected) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 print '=== Failed incorrectly', t |