| 
									
										
										
										
											2023-11-04 15:56:58 +01:00
										 |  |  | import unittest | 
					
						
							| 
									
										
										
										
											2023-11-06 09:32:35 -05:00
										 |  |  | from test.test_tools import skip_if_missing, imports_under_tool | 
					
						
							| 
									
										
										
										
											2023-11-04 15:56:58 +01:00
										 |  |  | from test import support | 
					
						
							|  |  |  | from test.support.hypothesis_helper import hypothesis | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | st = hypothesis.strategies | 
					
						
							|  |  |  | given = hypothesis.given | 
					
						
							|  |  |  | example = hypothesis.example | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-06 09:32:35 -05:00
										 |  |  | skip_if_missing("unicode") | 
					
						
							| 
									
										
										
										
											2023-11-04 15:56:58 +01:00
										 |  |  | with imports_under_tool("unicode"): | 
					
						
							|  |  |  |     from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @st.composite | 
					
						
							|  |  |  | def char_name_db(draw, min_length=1, max_length=30): | 
					
						
							|  |  |  |     m = draw(st.integers(min_value=min_length, max_value=max_length)) | 
					
						
							|  |  |  |     names = draw( | 
					
						
							|  |  |  |         st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m) | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     characters = draw(st.sets(st.characters(), min_size=m, max_size=m)) | 
					
						
							|  |  |  |     return list(zip(names, characters)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class TestDawg(unittest.TestCase): | 
					
						
							|  |  |  |     """Tests for the directed acyclic word graph data structure that is used
 | 
					
						
							|  |  |  |     to store the unicode character names in unicodedata. Tests ported from PyPy | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_dawg_direct_simple(self): | 
					
						
							|  |  |  |         dawg = Dawg() | 
					
						
							|  |  |  |         dawg.insert("a", -4) | 
					
						
							|  |  |  |         dawg.insert("c", -2) | 
					
						
							|  |  |  |         dawg.insert("cat", -1) | 
					
						
							|  |  |  |         dawg.insert("catarr", 0) | 
					
						
							|  |  |  |         dawg.insert("catnip", 1) | 
					
						
							|  |  |  |         dawg.insert("zcatnip", 5) | 
					
						
							|  |  |  |         packed, data, inverse = dawg.finish() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual(lookup(packed, data, b"a"), -4) | 
					
						
							|  |  |  |         self.assertEqual(lookup(packed, data, b"c"), -2) | 
					
						
							|  |  |  |         self.assertEqual(lookup(packed, data, b"cat"), -1) | 
					
						
							|  |  |  |         self.assertEqual(lookup(packed, data, b"catarr"), 0) | 
					
						
							|  |  |  |         self.assertEqual(lookup(packed, data, b"catnip"), 1) | 
					
						
							|  |  |  |         self.assertEqual(lookup(packed, data, b"zcatnip"), 5) | 
					
						
							|  |  |  |         self.assertRaises(KeyError, lookup, packed, data, b"b") | 
					
						
							|  |  |  |         self.assertRaises(KeyError, lookup, packed, data, b"catni") | 
					
						
							|  |  |  |         self.assertRaises(KeyError, lookup, packed, data, b"catnipp") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual(inverse_lookup(packed, inverse, -4), b"a") | 
					
						
							|  |  |  |         self.assertEqual(inverse_lookup(packed, inverse, -2), b"c") | 
					
						
							|  |  |  |         self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat") | 
					
						
							|  |  |  |         self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr") | 
					
						
							|  |  |  |         self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip") | 
					
						
							|  |  |  |         self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip") | 
					
						
							|  |  |  |         self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_forbid_empty_dawg(self): | 
					
						
							|  |  |  |         dawg = Dawg() | 
					
						
							|  |  |  |         self.assertRaises(ValueError, dawg.finish) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @given(char_name_db()) | 
					
						
							|  |  |  |     @example([("abc", "a"), ("abd", "b")]) | 
					
						
							|  |  |  |     @example( | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             ("bab", "1"), | 
					
						
							|  |  |  |             ("a", ":"), | 
					
						
							|  |  |  |             ("ad", "@"), | 
					
						
							|  |  |  |             ("b", "<"), | 
					
						
							|  |  |  |             ("aacc", "?"), | 
					
						
							|  |  |  |             ("dab", "D"), | 
					
						
							|  |  |  |             ("aa", "0"), | 
					
						
							|  |  |  |             ("ab", "F"), | 
					
						
							|  |  |  |             ("aaa", "7"), | 
					
						
							|  |  |  |             ("cbd", "="), | 
					
						
							|  |  |  |             ("abad", ";"), | 
					
						
							|  |  |  |             ("ac", "B"), | 
					
						
							|  |  |  |             ("abb", "4"), | 
					
						
							|  |  |  |             ("bb", "2"), | 
					
						
							|  |  |  |             ("aab", "9"), | 
					
						
							|  |  |  |             ("caaaaba", "E"), | 
					
						
							|  |  |  |             ("ca", ">"), | 
					
						
							|  |  |  |             ("bbaaa", "5"), | 
					
						
							|  |  |  |             ("d", "3"), | 
					
						
							|  |  |  |             ("baac", "8"), | 
					
						
							|  |  |  |             ("c", "6"), | 
					
						
							|  |  |  |             ("ba", "A"), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     @example( | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             ("bcdac", "9"), | 
					
						
							|  |  |  |             ("acc", "g"), | 
					
						
							|  |  |  |             ("d", "d"), | 
					
						
							|  |  |  |             ("daabdda", "0"), | 
					
						
							|  |  |  |             ("aba", ";"), | 
					
						
							|  |  |  |             ("c", "6"), | 
					
						
							|  |  |  |             ("aa", "7"), | 
					
						
							|  |  |  |             ("abbd", "c"), | 
					
						
							|  |  |  |             ("badbd", "?"), | 
					
						
							|  |  |  |             ("bbd", "f"), | 
					
						
							|  |  |  |             ("cc", "@"), | 
					
						
							|  |  |  |             ("bb", "8"), | 
					
						
							|  |  |  |             ("daca", ">"), | 
					
						
							|  |  |  |             ("ba", ":"), | 
					
						
							|  |  |  |             ("baac", "3"), | 
					
						
							|  |  |  |             ("dbdddac", "a"), | 
					
						
							|  |  |  |             ("a", "2"), | 
					
						
							|  |  |  |             ("cabd", "b"), | 
					
						
							|  |  |  |             ("b", "="), | 
					
						
							|  |  |  |             ("abd", "4"), | 
					
						
							|  |  |  |             ("adcbd", "5"), | 
					
						
							|  |  |  |             ("abc", "e"), | 
					
						
							|  |  |  |             ("ab", "1"), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     def test_dawg(self, data): | 
					
						
							|  |  |  |         # suppress debug prints | 
					
						
							|  |  |  |         with support.captured_stdout() as output: | 
					
						
							|  |  |  |             # it's enough to build it, building will also check the result | 
					
						
							|  |  |  |             build_compression_dawg(data) |