mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	gh-94808: improve comments and coverage of fastsearch.h (GH-96760)
This commit is contained in:
		
							parent
							
								
									4995f5f9a0
								
							
						
					
					
						commit
						69d9a08099
					
				
					 3 changed files with 54 additions and 5 deletions
				
			
		|  | @ -341,6 +341,42 @@ def reference_find(p, s): | ||||||
|                 self.checkequal(reference_find(p, text), |                 self.checkequal(reference_find(p, text), | ||||||
|                                 text, 'find', p) |                                 text, 'find', p) | ||||||
| 
 | 
 | ||||||
|  |     def test_find_many_lengths(self): | ||||||
|  |         haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)] | ||||||
|  |         haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats] | ||||||
|  | 
 | ||||||
|  |         needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)] | ||||||
|  |         needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats] | ||||||
|  | 
 | ||||||
|  |         for n, haystack1 in haystacks: | ||||||
|  |             haystack2 = haystack1[:-1] | ||||||
|  |             for m, needle in needles: | ||||||
|  |                 answer1 = 5 * (n - m) if m <= n else -1 | ||||||
|  |                 self.assertEqual(haystack1.find(needle), answer1, msg=(n,m)) | ||||||
|  |                 self.assertEqual(haystack2.find(needle), -1, msg=(n,m)) | ||||||
|  | 
 | ||||||
|  |     def test_adaptive_find(self): | ||||||
|  |         # This would be very slow for the naive algorithm, | ||||||
|  |         # but str.find() should be O(n + m). | ||||||
|  |         for N in 1000, 10_000, 100_000, 1_000_000: | ||||||
|  |             A, B = 'a' * N, 'b' * N | ||||||
|  |             haystack = A + A + B + A + A | ||||||
|  |             needle = A + B + B + A | ||||||
|  |             self.checkequal(-1, haystack, 'find', needle) | ||||||
|  |             self.checkequal(0, haystack, 'count', needle) | ||||||
|  |             self.checkequal(len(haystack), haystack + needle, 'find', needle) | ||||||
|  |             self.checkequal(1, haystack + needle, 'count', needle) | ||||||
|  | 
 | ||||||
|  |     def test_find_with_memory(self): | ||||||
|  |         # Test the "Skip with memory" path in the two-way algorithm. | ||||||
|  |         for N in 1000, 3000, 10_000, 30_000: | ||||||
|  |             needle = 'ab' * N | ||||||
|  |             haystack = ('ab'*(N-1) + 'b') * 2 | ||||||
|  |             self.checkequal(-1, haystack, 'find', needle) | ||||||
|  |             self.checkequal(0, haystack, 'count', needle) | ||||||
|  |             self.checkequal(len(haystack), haystack + needle, 'find', needle) | ||||||
|  |             self.checkequal(1, haystack + needle, 'count', needle) | ||||||
|  | 
 | ||||||
|     def test_find_shift_table_overflow(self): |     def test_find_shift_table_overflow(self): | ||||||
|         """When the table of 8-bit shifts overflows.""" |         """When the table of 8-bit shifts overflows.""" | ||||||
|         N = 2**8 + 100 |         N = 2**8 + 100 | ||||||
|  | @ -715,6 +751,18 @@ def test_replace(self): | ||||||
|         self.checkraises(TypeError, 'hello', 'replace', 42, 'h') |         self.checkraises(TypeError, 'hello', 'replace', 42, 'h') | ||||||
|         self.checkraises(TypeError, 'hello', 'replace', 'h', 42) |         self.checkraises(TypeError, 'hello', 'replace', 'h', 42) | ||||||
| 
 | 
 | ||||||
|  |     def test_replace_uses_two_way_maxcount(self): | ||||||
|  |         # Test that maxcount works in _two_way_count in fastsearch.h | ||||||
|  |         A, B = "A"*1000, "B"*1000 | ||||||
|  |         AABAA = A + A + B + A + A | ||||||
|  |         ABBA = A + B + B + A | ||||||
|  |         self.checkequal(AABAA + ABBA, | ||||||
|  |                         AABAA + ABBA, 'replace', ABBA, "ccc", 0) | ||||||
|  |         self.checkequal(AABAA + "ccc", | ||||||
|  |                         AABAA + ABBA, 'replace', ABBA, "ccc", 1) | ||||||
|  |         self.checkequal(AABAA + "ccc", | ||||||
|  |                         AABAA + ABBA, 'replace', ABBA, "ccc", 2) | ||||||
|  | 
 | ||||||
|     @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, |     @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, | ||||||
|                      'only applies to 32-bit platforms') |                      'only applies to 32-bit platforms') | ||||||
|     def test_replace_overflow(self): |     def test_replace_overflow(self): | ||||||
|  |  | ||||||
|  | @ -18,7 +18,8 @@ | ||||||
|    algorithm, which has worst-case O(n) runtime and best-case O(n/k). |    algorithm, which has worst-case O(n) runtime and best-case O(n/k). | ||||||
|    Also compute a table of shifts to achieve O(n/k) in more cases, |    Also compute a table of shifts to achieve O(n/k) in more cases, | ||||||
|    and often (data dependent) deduce larger shifts than pure C&P can |    and often (data dependent) deduce larger shifts than pure C&P can | ||||||
|    deduce. */ |    deduce. See stringlib_find_two_way_notes.txt in this folder for a | ||||||
|  |    detailed explanation. */ | ||||||
| 
 | 
 | ||||||
| #define FAST_COUNT 0 | #define FAST_COUNT 0 | ||||||
| #define FAST_SEARCH 1 | #define FAST_SEARCH 1 | ||||||
|  | @ -398,7 +399,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, | ||||||
|                 if (window_last >= haystack_end) { |                 if (window_last >= haystack_end) { | ||||||
|                     return -1; |                     return -1; | ||||||
|                 } |                 } | ||||||
|                 LOG("Horspool skip"); |                 LOG("Horspool skip\n"); | ||||||
|             } |             } | ||||||
|           no_shift: |           no_shift: | ||||||
|             window = window_last - len_needle + 1; |             window = window_last - len_needle + 1; | ||||||
|  | @ -457,7 +458,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, | ||||||
|                 if (window_last >= haystack_end) { |                 if (window_last >= haystack_end) { | ||||||
|                     return -1; |                     return -1; | ||||||
|                 } |                 } | ||||||
|                 LOG("Horspool skip"); |                 LOG("Horspool skip\n"); | ||||||
|             } |             } | ||||||
|             window = window_last - len_needle + 1; |             window = window_last - len_needle + 1; | ||||||
|             assert((window[len_needle - 1] & TABLE_MASK) == |             assert((window[len_needle - 1] & TABLE_MASK) == | ||||||
|  |  | ||||||
|  | @ -395,7 +395,7 @@ of their proof goes something like this (this is far from complete): | ||||||
|       needle == (a + w) + (w + b), meaning there's a bad equality |       needle == (a + w) + (w + b), meaning there's a bad equality | ||||||
|       w == w, it's impossible for w + b to be bigger than both |       w == w, it's impossible for w + b to be bigger than both | ||||||
|       b and w + w + b, so this can't happen. We thus have all of |       b and w + w + b, so this can't happen. We thus have all of | ||||||
|       the ineuqalities with no question marks. |       the inequalities with no question marks. | ||||||
|     * By maximality, the right part is not a substring of the left |     * By maximality, the right part is not a substring of the left | ||||||
|       part. Thus, we have all of the inequalities involving no |       part. Thus, we have all of the inequalities involving no | ||||||
|       left-side question marks. |       left-side question marks. | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dennis Sweeney
						Dennis Sweeney