mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			122 lines
		
	
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			122 lines
		
	
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import unittest
 | 
						|
from test.test_tools import skip_if_missing, imports_under_tool
 | 
						|
from test import support
 | 
						|
from test.support.hypothesis_helper import hypothesis
 | 
						|
 | 
						|
st = hypothesis.strategies
 | 
						|
given = hypothesis.given
 | 
						|
example = hypothesis.example
 | 
						|
 | 
						|
 | 
						|
skip_if_missing("unicode")
 | 
						|
with imports_under_tool("unicode"):
 | 
						|
    from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup
 | 
						|
 | 
						|
 | 
						|
@st.composite
 | 
						|
def char_name_db(draw, min_length=1, max_length=30):
 | 
						|
    m = draw(st.integers(min_value=min_length, max_value=max_length))
 | 
						|
    names = draw(
 | 
						|
        st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m)
 | 
						|
    )
 | 
						|
    characters = draw(st.sets(st.characters(), min_size=m, max_size=m))
 | 
						|
    return list(zip(names, characters))
 | 
						|
 | 
						|
 | 
						|
class TestDawg(unittest.TestCase):
 | 
						|
    """Tests for the directed acyclic word graph data structure that is used
 | 
						|
    to store the unicode character names in unicodedata. Tests ported from PyPy
 | 
						|
    """
 | 
						|
 | 
						|
    def test_dawg_direct_simple(self):
 | 
						|
        dawg = Dawg()
 | 
						|
        dawg.insert("a", -4)
 | 
						|
        dawg.insert("c", -2)
 | 
						|
        dawg.insert("cat", -1)
 | 
						|
        dawg.insert("catarr", 0)
 | 
						|
        dawg.insert("catnip", 1)
 | 
						|
        dawg.insert("zcatnip", 5)
 | 
						|
        packed, data, inverse = dawg.finish()
 | 
						|
 | 
						|
        self.assertEqual(lookup(packed, data, b"a"), -4)
 | 
						|
        self.assertEqual(lookup(packed, data, b"c"), -2)
 | 
						|
        self.assertEqual(lookup(packed, data, b"cat"), -1)
 | 
						|
        self.assertEqual(lookup(packed, data, b"catarr"), 0)
 | 
						|
        self.assertEqual(lookup(packed, data, b"catnip"), 1)
 | 
						|
        self.assertEqual(lookup(packed, data, b"zcatnip"), 5)
 | 
						|
        self.assertRaises(KeyError, lookup, packed, data, b"b")
 | 
						|
        self.assertRaises(KeyError, lookup, packed, data, b"catni")
 | 
						|
        self.assertRaises(KeyError, lookup, packed, data, b"catnipp")
 | 
						|
 | 
						|
        self.assertEqual(inverse_lookup(packed, inverse, -4), b"a")
 | 
						|
        self.assertEqual(inverse_lookup(packed, inverse, -2), b"c")
 | 
						|
        self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat")
 | 
						|
        self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr")
 | 
						|
        self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip")
 | 
						|
        self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip")
 | 
						|
        self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12)
 | 
						|
 | 
						|
    def test_forbid_empty_dawg(self):
 | 
						|
        dawg = Dawg()
 | 
						|
        self.assertRaises(ValueError, dawg.finish)
 | 
						|
 | 
						|
    @given(char_name_db())
 | 
						|
    @example([("abc", "a"), ("abd", "b")])
 | 
						|
    @example(
 | 
						|
        [
 | 
						|
            ("bab", "1"),
 | 
						|
            ("a", ":"),
 | 
						|
            ("ad", "@"),
 | 
						|
            ("b", "<"),
 | 
						|
            ("aacc", "?"),
 | 
						|
            ("dab", "D"),
 | 
						|
            ("aa", "0"),
 | 
						|
            ("ab", "F"),
 | 
						|
            ("aaa", "7"),
 | 
						|
            ("cbd", "="),
 | 
						|
            ("abad", ";"),
 | 
						|
            ("ac", "B"),
 | 
						|
            ("abb", "4"),
 | 
						|
            ("bb", "2"),
 | 
						|
            ("aab", "9"),
 | 
						|
            ("caaaaba", "E"),
 | 
						|
            ("ca", ">"),
 | 
						|
            ("bbaaa", "5"),
 | 
						|
            ("d", "3"),
 | 
						|
            ("baac", "8"),
 | 
						|
            ("c", "6"),
 | 
						|
            ("ba", "A"),
 | 
						|
        ]
 | 
						|
    )
 | 
						|
    @example(
 | 
						|
        [
 | 
						|
            ("bcdac", "9"),
 | 
						|
            ("acc", "g"),
 | 
						|
            ("d", "d"),
 | 
						|
            ("daabdda", "0"),
 | 
						|
            ("aba", ";"),
 | 
						|
            ("c", "6"),
 | 
						|
            ("aa", "7"),
 | 
						|
            ("abbd", "c"),
 | 
						|
            ("badbd", "?"),
 | 
						|
            ("bbd", "f"),
 | 
						|
            ("cc", "@"),
 | 
						|
            ("bb", "8"),
 | 
						|
            ("daca", ">"),
 | 
						|
            ("ba", ":"),
 | 
						|
            ("baac", "3"),
 | 
						|
            ("dbdddac", "a"),
 | 
						|
            ("a", "2"),
 | 
						|
            ("cabd", "b"),
 | 
						|
            ("b", "="),
 | 
						|
            ("abd", "4"),
 | 
						|
            ("adcbd", "5"),
 | 
						|
            ("abc", "e"),
 | 
						|
            ("ab", "1"),
 | 
						|
        ]
 | 
						|
    )
 | 
						|
    def test_dawg(self, data):
 | 
						|
        # suppress debug prints
 | 
						|
        with support.captured_stdout() as output:
 | 
						|
            # it's enough to build it, building will also check the result
 | 
						|
            build_compression_dawg(data)
 |