mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			400 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			400 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Classes representing state-machine concepts"""
 | ||
| 
 | ||
| class NFA:
 | ||
|     """A non deterministic finite automata
 | ||
| 
 | ||
|     A non deterministic automata is a form of a finite state
 | ||
|     machine. An NFA's rules are less restrictive than a DFA.
 | ||
|     The NFA rules are:
 | ||
| 
 | ||
|       * A transition can be non-deterministic and can result in
 | ||
|         nothing, one, or two or more states.
 | ||
| 
 | ||
|       * An epsilon transition consuming empty input is valid.
 | ||
|         Transitions consuming labeled symbols are also permitted.
 | ||
| 
 | ||
|     This class assumes that there is only one starting state and one
 | ||
|     accepting (ending) state.
 | ||
| 
 | ||
|     Attributes:
 | ||
|         name (str): The name of the rule the NFA is representing.
 | ||
|         start (NFAState): The starting state.
 | ||
|         end (NFAState): The ending state
 | ||
|     """
 | ||
| 
 | ||
|     def __init__(self, start, end):
 | ||
|         self.name = start.rule_name
 | ||
|         self.start = start
 | ||
|         self.end = end
 | ||
| 
 | ||
|     def __repr__(self):
 | ||
|         return "NFA(start={}, end={})".format(self.start, self.end)
 | ||
| 
 | ||
|     def dump(self, writer=print):
 | ||
|         """Dump a graphical representation of the NFA"""
 | ||
|         todo = [self.start]
 | ||
|         for i, state in enumerate(todo):
 | ||
|             writer("  State", i, state is self.end and "(final)" or "")
 | ||
|             for arc in state.arcs:
 | ||
|                 label = arc.label
 | ||
|                 next = arc.target
 | ||
|                 if next in todo:
 | ||
|                     j = todo.index(next)
 | ||
|                 else:
 | ||
|                     j = len(todo)
 | ||
|                     todo.append(next)
 | ||
|                 if label is None:
 | ||
|                     writer("    -> %d" % j)
 | ||
|                 else:
 | ||
|                     writer("    %s -> %d" % (label, j))
 | ||
| 
 | ||
|     def dump_graph(self, writer):
 | ||
|         """Dump a DOT representation of the NFA"""
 | ||
|         writer('digraph %s_nfa {\n' % self.name)
 | ||
|         todo = [self.start]
 | ||
|         for i, state in enumerate(todo):
 | ||
|             writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
 | ||
|             for arc in state.arcs:
 | ||
|                 label = arc.label
 | ||
|                 next = arc.target
 | ||
|                 if next in todo:
 | ||
|                     j = todo.index(next)
 | ||
|                 else:
 | ||
|                     j = len(todo)
 | ||
|                     todo.append(next)
 | ||
|                 if label is None:
 | ||
|                     writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
 | ||
|                 else:
 | ||
|                     writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
 | ||
|         writer('}\n')
 | ||
| 
 | ||
| 
 | ||
| class NFAArc:
 | ||
|     """An arc representing a transition between two NFA states.
 | ||
| 
 | ||
|     NFA states can be connected via two ways:
 | ||
| 
 | ||
|         * A label transition: An input equal to the label must
 | ||
|           be consumed to perform the transition.
 | ||
|         * An epsilon transition: The transition can be taken without
 | ||
|           consuming any input symbol.
 | ||
| 
 | ||
|         Attributes:
 | ||
|             target (NFAState): The ending state of the transition arc.
 | ||
|             label (Optional[str]): The label that must be consumed to make
 | ||
|                 the transition. An epsilon transition is represented
 | ||
|                 using `None`.
 | ||
|     """
 | ||
| 
 | ||
|     def __init__(self, target, label):
 | ||
|         self.target = target
 | ||
|         self.label = label
 | ||
| 
 | ||
|     def __repr__(self):
 | ||
|         return "<%s: %s>" % (self.__class__.__name__, self.label)
 | ||
| 
 | ||
| 
 | ||
| class NFAState:
 | ||
|     """A state of a NFA, non deterministic finite automata.
 | ||
| 
 | ||
|     Attributes:
 | ||
|         target (rule_name): The name of the rule used to represent the NFA's
 | ||
|             ending state after a transition.
 | ||
|         arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
 | ||
|             between the current NFA state and another NFA state via following
 | ||
|             a label.
 | ||
|     """
 | ||
| 
 | ||
|     def __init__(self, rule_name):
 | ||
|         self.rule_name = rule_name
 | ||
|         self.arcs = []
 | ||
| 
 | ||
|     def add_arc(self, target, label=None):
 | ||
|         """Add a new arc to connect the state to a target state within the NFA
 | ||
| 
 | ||
|         The method adds a new arc to the list of arcs available as transitions
 | ||
|         from the present state. An optional label indicates a named transition
 | ||
|         that consumes an input while the absence of a label represents an epsilon
 | ||
|         transition.
 | ||
| 
 | ||
|         Attributes:
 | ||
|             target (NFAState): The end of the transition that the arc represents.
 | ||
|             label (Optional[str]): The label that must be consumed for making
 | ||
|                 the transition. If the label is not provided the transition is assumed
 | ||
|                 to be an epsilon-transition.
 | ||
|         """
 | ||
|         assert label is None or isinstance(label, str)
 | ||
|         assert isinstance(target, NFAState)
 | ||
|         self.arcs.append(NFAArc(target, label))
 | ||
| 
 | ||
|     def __repr__(self):
 | ||
|         return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
 | ||
| 
 | ||
| 
 | ||
| class DFA:
 | ||
|     """A deterministic finite automata
 | ||
| 
 | ||
|     A deterministic finite automata is a form of a finite state machine
 | ||
|     that obeys the following rules:
 | ||
| 
 | ||
|        * Each of the transitions is uniquely determined by
 | ||
|          the source state and input symbol
 | ||
|        * Reading an input symbol is required for each state
 | ||
|          transition (no epsilon transitions).
 | ||
| 
 | ||
|     The finite-state machine will accept or reject a string of symbols
 | ||
|     and only produces a unique computation of the automaton for each input
 | ||
|     string. The DFA must have a unique starting state (represented as the first
 | ||
|     element in the list of states) but can have multiple final states.
 | ||
| 
 | ||
|     Attributes:
 | ||
|         name (str): The name of the rule the DFA is representing.
 | ||
|         states (List[DFAState]): A collection of DFA states.
 | ||
|     """
 | ||
| 
 | ||
|     def __init__(self, name, states):
 | ||
|         self.name = name
 | ||
|         self.states = states
 | ||
| 
 | ||
|     @classmethod
 | ||
|     def from_nfa(cls, nfa):
 | ||
|         """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
 | ||
| 
 | ||
|         To simulate the operation of a DFA on a given input string, it's
 | ||
|         necessary to keep track of a single state at any time, or more precisely,
 | ||
|         the state that the automaton will reach after seeing a prefix of the
 | ||
|         input. In contrast, to simulate an NFA, it's necessary to keep track of
 | ||
|         a set of states: all of the states that the automaton could reach after
 | ||
|         seeing the same prefix of the input, according to the nondeterministic
 | ||
|         choices made by the automaton. There are two possible sources of
 | ||
|         non-determinism:
 | ||
| 
 | ||
|         1) Multiple (one or more) transitions with the same label
 | ||
| 
 | ||
|                          'A'     +-------+
 | ||
|                     +----------->+ State +----------->+
 | ||
|                     |            |   2   |
 | ||
|             +-------+            +-------+
 | ||
|             | State |
 | ||
|             |   1   |            +-------+
 | ||
|             +-------+            | State |
 | ||
|                     +----------->+   3   +----------->+
 | ||
|                          'A'     +-------+
 | ||
| 
 | ||
|         2) Epsilon transitions (transitions that can be taken without consuming any input)
 | ||
| 
 | ||
|             +-------+            +-------+
 | ||
|             | State |     ε      | State |
 | ||
|             |   1   +----------->+   2   +----------->+
 | ||
|             +-------+            +-------+
 | ||
| 
 | ||
|         Looking at the first case above, we can't determine which transition should be
 | ||
|         followed when given an input A. We could choose whether or not to follow the
 | ||
|         transition while in the second case the problem is that we can choose both to
 | ||
|         follow the transition or not doing it. To solve this problem we can imagine that
 | ||
|         we follow all possibilities at the same time and we construct new states from the
 | ||
|         set of all possible reachable states. For every case in the previous example:
 | ||
| 
 | ||
| 
 | ||
|         1) For multiple transitions with the same label we colapse all of the
 | ||
|            final states under the same one
 | ||
| 
 | ||
|             +-------+            +-------+
 | ||
|             | State |     'A'    | State |
 | ||
|             |   1   +----------->+  2-3  +----------->+
 | ||
|             +-------+            +-------+
 | ||
| 
 | ||
|         2) For epsilon transitions we collapse all epsilon-reachable states
 | ||
|            into the same one
 | ||
| 
 | ||
|             +-------+
 | ||
|             | State |
 | ||
|             |  1-2  +----------->
 | ||
|             +-------+
 | ||
| 
 | ||
|         Because the DFA states consist of sets of NFA states, an n-state NFA
 | ||
|         may be converted to a DFA with at most 2**n states. Notice that the
 | ||
|         constructed DFA is not minimal and can be simplified or reduced
 | ||
|         afterwards.
 | ||
| 
 | ||
|         Parameters:
 | ||
|             name (NFA): The NFA to transform to DFA.
 | ||
|         """
 | ||
|         assert isinstance(nfa, NFA)
 | ||
| 
 | ||
|         def add_closure(nfa_state, base_nfa_set):
 | ||
|             """Calculate the epsilon-closure of a given state
 | ||
| 
 | ||
|             Add to the *base_nfa_set* all the states that are
 | ||
|             reachable from *nfa_state* via epsilon-transitions.
 | ||
|             """
 | ||
|             assert isinstance(nfa_state, NFAState)
 | ||
|             if nfa_state in base_nfa_set:
 | ||
|                 return
 | ||
|             base_nfa_set.add(nfa_state)
 | ||
|             for nfa_arc in nfa_state.arcs:
 | ||
|                 if nfa_arc.label is None:
 | ||
|                     add_closure(nfa_arc.target, base_nfa_set)
 | ||
| 
 | ||
|         # Calculate the epsilon-closure of the starting state
 | ||
|         base_nfa_set = set()
 | ||
|         add_closure(nfa.start, base_nfa_set)
 | ||
| 
 | ||
|         # Start by visiting the NFA starting state (there is only one).
 | ||
|         states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
 | ||
| 
 | ||
|         for state in states:  # NB states grow while we're iterating
 | ||
| 
 | ||
|             # Find transitions from the current state to other reachable states
 | ||
|             # and store them in mapping that correlates the label to all the
 | ||
|             # possible reachable states that can be obtained by consuming a
 | ||
|             # token equal to the label. Each set of all the states that can
 | ||
|             # be reached after following a label will be the a DFA state.
 | ||
|             arcs = {}
 | ||
|             for nfa_state in state.nfa_set:
 | ||
|                 for nfa_arc in nfa_state.arcs:
 | ||
|                     if nfa_arc.label is not None:
 | ||
|                         nfa_set = arcs.setdefault(nfa_arc.label, set())
 | ||
|                         # All states that can be reached by epsilon-transitions
 | ||
|                         # are also included in the set of reachable states.
 | ||
|                         add_closure(nfa_arc.target, nfa_set)
 | ||
| 
 | ||
|             # Now create new DFAs by visiting all posible transitions between
 | ||
|             # the current DFA state and the new power-set states (each nfa_set)
 | ||
|             # via the different labels. As the nodes are appended to *states* this
 | ||
|             # is performing a breadth-first search traversal over the power-set of
 | ||
|             # the states of the original NFA.
 | ||
|             for label, nfa_set in sorted(arcs.items()):
 | ||
|                 for exisisting_state in states:
 | ||
|                     if exisisting_state.nfa_set == nfa_set:
 | ||
|                         # The DFA state already exists for this rule.
 | ||
|                         next_state = exisisting_state
 | ||
|                         break
 | ||
|                 else:
 | ||
|                     next_state = DFAState(nfa.name, nfa_set, nfa.end)
 | ||
|                     states.append(next_state)
 | ||
| 
 | ||
|                 # Add a transition between the current DFA state and the new
 | ||
|                 # DFA state (the power-set state) via the current label.
 | ||
|                 state.add_arc(next_state, label)
 | ||
| 
 | ||
|         return cls(nfa.name, states)
 | ||
| 
 | ||
|     def __iter__(self):
 | ||
|         return iter(self.states)
 | ||
| 
 | ||
|     def simplify(self):
 | ||
|         """Attempt to reduce the number of states of the DFA
 | ||
| 
 | ||
|         Transform the DFA into an equivalent DFA that has fewer states. Two
 | ||
|         classes of states can be removed or merged from the original DFA without
 | ||
|         affecting the language it accepts to minimize it:
 | ||
| 
 | ||
|             * Unreachable states can not be reached from the initial
 | ||
|               state of the DFA, for any input string.
 | ||
|             * Nondistinguishable states are those that cannot be distinguished
 | ||
|               from one another for any input string.
 | ||
| 
 | ||
|         This algorithm does not achieve the optimal fully-reduced solution, but it
 | ||
|         works well enough for the particularities of the Python grammar. The
 | ||
|         algorithm repeatedly looks for two states that have the same set of
 | ||
|         arcs (same labels pointing to the same nodes) and unifies them, until
 | ||
|         things stop changing.
 | ||
|         """
 | ||
|         changes = True
 | ||
|         while changes:
 | ||
|             changes = False
 | ||
|             for i, state_i in enumerate(self.states):
 | ||
|                 for j in range(i + 1, len(self.states)):
 | ||
|                     state_j = self.states[j]
 | ||
|                     if state_i == state_j:
 | ||
|                         del self.states[j]
 | ||
|                         for state in self.states:
 | ||
|                             state.unifystate(state_j, state_i)
 | ||
|                         changes = True
 | ||
|                         break
 | ||
| 
 | ||
|     def dump(self, writer=print):
 | ||
|         """Dump a graphical representation of the DFA"""
 | ||
|         for i, state in enumerate(self.states):
 | ||
|             writer("  State", i, state.is_final and "(final)" or "")
 | ||
|             for label, next in sorted(state.arcs.items()):
 | ||
|                 writer("    %s -> %d" % (label, self.states.index(next)))
 | ||
| 
 | ||
|     def dump_graph(self, writer):
 | ||
|         """Dump a DOT representation of the DFA"""
 | ||
|         writer('digraph %s_dfa {\n' % self.name)
 | ||
|         for i, state in enumerate(self.states):
 | ||
|             writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
 | ||
|             for label, next in sorted(state.arcs.items()):
 | ||
|                 writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
 | ||
|         writer('}\n')
 | ||
| 
 | ||
| 
 | ||
| class DFAState(object):
 | ||
|     """A state of a DFA
 | ||
| 
 | ||
|     Attributes:
 | ||
|         rule_name (rule_name): The name of the DFA rule containing the represented state.
 | ||
|         nfa_set (Set[NFAState]): The set of NFA states used to create this state.
 | ||
|         final (bool): True if the state represents an accepting state of the DFA
 | ||
|             containing this state.
 | ||
|         arcs (Dict[label, DFAState]): A mapping representing transitions between
 | ||
|             the current DFA state and another DFA state via following a label.
 | ||
|     """
 | ||
| 
 | ||
|     def __init__(self, rule_name, nfa_set, final):
 | ||
|         assert isinstance(nfa_set, set)
 | ||
|         assert isinstance(next(iter(nfa_set)), NFAState)
 | ||
|         assert isinstance(final, NFAState)
 | ||
|         self.rule_name = rule_name
 | ||
|         self.nfa_set = nfa_set
 | ||
|         self.arcs = {}  # map from terminals/nonterminals to DFAState
 | ||
|         self.is_final = final in nfa_set
 | ||
| 
 | ||
|     def add_arc(self, target, label):
 | ||
|         """Add a new arc to the current state.
 | ||
| 
 | ||
|         Parameters:
 | ||
|             target (DFAState): The DFA state at the end of the arc.
 | ||
|             label (str): The label respresenting the token that must be consumed
 | ||
|                 to perform this transition.
 | ||
|         """
 | ||
|         assert isinstance(label, str)
 | ||
|         assert label not in self.arcs
 | ||
|         assert isinstance(target, DFAState)
 | ||
|         self.arcs[label] = target
 | ||
| 
 | ||
|     def unifystate(self, old, new):
 | ||
|         """Replace all arcs from the current node to *old* with *new*.
 | ||
| 
 | ||
|         Parameters:
 | ||
|             old (DFAState): The  DFA state to remove from all existing arcs.
 | ||
|             new (DFAState): The DFA state to replace in all existing arcs.
 | ||
|         """
 | ||
|         for label, next_ in self.arcs.items():
 | ||
|             if next_ is old:
 | ||
|                 self.arcs[label] = new
 | ||
| 
 | ||
|     def __eq__(self, other):
 | ||
|         # The nfa_set does not matter for  equality
 | ||
|         assert isinstance(other, DFAState)
 | ||
|         if self.is_final != other.is_final:
 | ||
|             return False
 | ||
|         # We cannot just return self.arcs == other.arcs because that
 | ||
|         # would invoke this method recursively if there are any cycles.
 | ||
|         if len(self.arcs) != len(other.arcs):
 | ||
|             return False
 | ||
|         for label, next_ in self.arcs.items():
 | ||
|             if next_ is not other.arcs.get(label):
 | ||
|                 return False
 | ||
|         return True
 | ||
| 
 | ||
|     __hash__ = None  # For Py3 compatibility.
 | ||
| 
 | ||
|     def __repr__(self):
 | ||
|         return "<%s: %s is_final=%s>" % (
 | ||
|             self.__class__.__name__,
 | ||
|             self.rule_name,
 | ||
|             self.is_final,
 | ||
|         )
 | 
