2026-1-6
This commit is contained in:
0
venv/Lib/site-packages/whoosh/automata/__init__.py
Normal file
0
venv/Lib/site-packages/whoosh/automata/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
715
venv/Lib/site-packages/whoosh/automata/fsa.py
Normal file
715
venv/Lib/site-packages/whoosh/automata/fsa.py
Normal file
@@ -0,0 +1,715 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import itertools
|
||||
import operator
|
||||
import sys
|
||||
from bisect import bisect_left
|
||||
from collections import defaultdict
|
||||
|
||||
from whoosh.compat import iteritems, next, text_type, unichr, xrange
|
||||
|
||||
|
||||
unull = unichr(0)
|
||||
|
||||
|
||||
# Marker constants
|
||||
|
||||
class Marker(object):
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s>" % self.name
|
||||
|
||||
|
||||
EPSILON = Marker("EPSILON")
|
||||
ANY = Marker("ANY")
|
||||
|
||||
|
||||
# Base class
|
||||
|
||||
class FSA(object):
|
||||
def __init__(self, initial):
|
||||
self.initial = initial
|
||||
self.transitions = {}
|
||||
self.final_states = set()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.all_states())
|
||||
|
||||
def __eq__(self, other):
|
||||
if self.initial != other.initial:
|
||||
return False
|
||||
if self.final_states != other.final_states:
|
||||
return False
|
||||
st = self.transitions
|
||||
ot = other.transitions
|
||||
if list(st) != list(ot):
|
||||
return False
|
||||
for key in st:
|
||||
if st[key] != ot[key]:
|
||||
return False
|
||||
return True
|
||||
|
||||
def all_states(self):
|
||||
stateset = set(self.transitions)
|
||||
for src, trans in iteritems(self.transitions):
|
||||
stateset.update(trans.values())
|
||||
return stateset
|
||||
|
||||
def all_labels(self):
|
||||
labels = set()
|
||||
for src, trans in iteritems(self.transitions):
|
||||
labels.update(trans)
|
||||
return labels
|
||||
|
||||
def get_labels(self, src):
|
||||
return iter(self.transitions.get(src, []))
|
||||
|
||||
def generate_all(self, state=None, sofar=""):
|
||||
state = self.start() if state is None else state
|
||||
if self.is_final(state):
|
||||
yield sofar
|
||||
for label in sorted(self.get_labels(state)):
|
||||
newstate = self.next_state(state, label)
|
||||
for string in self.generate_all(newstate, sofar + label):
|
||||
yield string
|
||||
|
||||
def start(self):
|
||||
return self.initial
|
||||
|
||||
def next_state(self, state, label):
|
||||
raise NotImplementedError
|
||||
|
||||
def is_final(self, state):
|
||||
raise NotImplementedError
|
||||
|
||||
def add_transition(self, src, label, dest):
|
||||
raise NotImplementedError
|
||||
|
||||
def add_final_state(self, state):
|
||||
raise NotImplementedError
|
||||
|
||||
def to_dfa(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def accept(self, string, debug=False):
|
||||
state = self.start()
|
||||
|
||||
for label in string:
|
||||
if debug:
|
||||
print(" ", state, "->", label, "->")
|
||||
|
||||
state = self.next_state(state, label)
|
||||
if not state:
|
||||
break
|
||||
|
||||
return self.is_final(state)
|
||||
|
||||
def append(self, fsa):
|
||||
self.transitions.update(fsa.transitions)
|
||||
for state in self.final_states:
|
||||
self.add_transition(state, EPSILON, fsa.initial)
|
||||
self.final_states = fsa.final_states
|
||||
|
||||
|
||||
# Implementations
|
||||
|
||||
class NFA(FSA):
|
||||
def __init__(self, initial):
|
||||
self.transitions = {}
|
||||
self.final_states = set()
|
||||
self.initial = initial
|
||||
|
||||
def dump(self, stream=sys.stdout):
|
||||
starts = self.start()
|
||||
for src in self.transitions:
|
||||
beg = "@" if src in starts else " "
|
||||
print(beg, src, file=stream)
|
||||
xs = self.transitions[src]
|
||||
for label in xs:
|
||||
dests = xs[label]
|
||||
end = "||" if self.is_final(dests) else ""
|
||||
|
||||
def start(self):
|
||||
return frozenset(self._expand(set([self.initial])))
|
||||
|
||||
def add_transition(self, src, label, dest):
|
||||
self.transitions.setdefault(src, {}).setdefault(label, set()).add(dest)
|
||||
|
||||
def add_final_state(self, state):
|
||||
self.final_states.add(state)
|
||||
|
||||
def triples(self):
|
||||
for src, trans in iteritems(self.transitions):
|
||||
for label, dests in iteritems(trans):
|
||||
for dest in dests:
|
||||
yield src, label, dest
|
||||
|
||||
def is_final(self, states):
|
||||
return bool(self.final_states.intersection(states))
|
||||
|
||||
def _expand(self, states):
|
||||
transitions = self.transitions
|
||||
frontier = set(states)
|
||||
while frontier:
|
||||
state = frontier.pop()
|
||||
if state in transitions and EPSILON in transitions[state]:
|
||||
new_states = transitions[state][EPSILON].difference(states)
|
||||
frontier.update(new_states)
|
||||
states.update(new_states)
|
||||
return states
|
||||
|
||||
def next_state(self, states, label):
|
||||
transitions = self.transitions
|
||||
dest_states = set()
|
||||
for state in states:
|
||||
if state in transitions:
|
||||
xs = transitions[state]
|
||||
if label in xs:
|
||||
dest_states.update(xs[label])
|
||||
if ANY in xs:
|
||||
dest_states.update(xs[ANY])
|
||||
return frozenset(self._expand(dest_states))
|
||||
|
||||
def get_labels(self, states):
|
||||
transitions = self.transitions
|
||||
labels = set()
|
||||
for state in states:
|
||||
if state in transitions:
|
||||
labels.update(transitions[state])
|
||||
return labels
|
||||
|
||||
def embed(self, other):
|
||||
# Copy all transitions from the other NFA into this one
|
||||
for s, othertrans in iteritems(other.transitions):
|
||||
trans = self.transitions.setdefault(s, {})
|
||||
for label, otherdests in iteritems(othertrans):
|
||||
dests = trans.setdefault(label, set())
|
||||
dests.update(otherdests)
|
||||
|
||||
def insert(self, src, other, dest):
|
||||
self.embed(other)
|
||||
|
||||
# Connect src to the other NFA's initial state, and the other
|
||||
# NFA's final states to dest
|
||||
self.add_transition(src, EPSILON, other.initial)
|
||||
for finalstate in other.final_states:
|
||||
self.add_transition(finalstate, EPSILON, dest)
|
||||
|
||||
def to_dfa(self):
|
||||
dfa = DFA(self.start())
|
||||
frontier = [self.start()]
|
||||
seen = set()
|
||||
while frontier:
|
||||
current = frontier.pop()
|
||||
if self.is_final(current):
|
||||
dfa.add_final_state(current)
|
||||
labels = self.get_labels(current)
|
||||
for label in labels:
|
||||
if label is EPSILON:
|
||||
continue
|
||||
new_state = self.next_state(current, label)
|
||||
if new_state not in seen:
|
||||
frontier.append(new_state)
|
||||
seen.add(new_state)
|
||||
if self.is_final(new_state):
|
||||
dfa.add_final_state(new_state)
|
||||
if label is ANY:
|
||||
dfa.set_default_transition(current, new_state)
|
||||
else:
|
||||
dfa.add_transition(current, label, new_state)
|
||||
return dfa
|
||||
|
||||
|
||||
class DFA(FSA):
|
||||
def __init__(self, initial):
|
||||
self.initial = initial
|
||||
self.transitions = {}
|
||||
self.defaults = {}
|
||||
self.final_states = set()
|
||||
self.outlabels = {}
|
||||
|
||||
def dump(self, stream=sys.stdout):
|
||||
for src in sorted(self.transitions):
|
||||
beg = "@" if src == self.initial else " "
|
||||
print(beg, src, file=stream)
|
||||
xs = self.transitions[src]
|
||||
for label in sorted(xs):
|
||||
dest = xs[label]
|
||||
end = "||" if self.is_final(dest) else ""
|
||||
|
||||
def start(self):
|
||||
return self.initial
|
||||
|
||||
def add_transition(self, src, label, dest):
|
||||
self.transitions.setdefault(src, {})[label] = dest
|
||||
|
||||
def set_default_transition(self, src, dest):
|
||||
self.defaults[src] = dest
|
||||
|
||||
def add_final_state(self, state):
|
||||
self.final_states.add(state)
|
||||
|
||||
def is_final(self, state):
|
||||
return state in self.final_states
|
||||
|
||||
def next_state(self, src, label):
|
||||
trans = self.transitions.get(src, {})
|
||||
return trans.get(label, self.defaults.get(src, None))
|
||||
|
||||
def next_valid_string(self, string, asbytes=False):
|
||||
state = self.start()
|
||||
stack = []
|
||||
|
||||
# Follow the DFA as far as possible
|
||||
i = 0
|
||||
for i, label in enumerate(string):
|
||||
stack.append((string[:i], state, label))
|
||||
state = self.next_state(state, label)
|
||||
if not state:
|
||||
break
|
||||
else:
|
||||
stack.append((string[:i + 1], state, None))
|
||||
|
||||
if self.is_final(state):
|
||||
# Word is already valid
|
||||
return string
|
||||
|
||||
# Perform a 'wall following' search for the lexicographically smallest
|
||||
# accepting state.
|
||||
while stack:
|
||||
path, state, label = stack.pop()
|
||||
label = self.find_next_edge(state, label, asbytes=asbytes)
|
||||
if label:
|
||||
path += label
|
||||
state = self.next_state(state, label)
|
||||
if self.is_final(state):
|
||||
return path
|
||||
stack.append((path, state, None))
|
||||
return None
|
||||
|
||||
def find_next_edge(self, s, label, asbytes):
|
||||
if label is None:
|
||||
label = b"\x00" if asbytes else u'\0'
|
||||
else:
|
||||
label = (label + 1) if asbytes else unichr(ord(label) + 1)
|
||||
trans = self.transitions.get(s, {})
|
||||
if label in trans or s in self.defaults:
|
||||
return label
|
||||
|
||||
try:
|
||||
labels = self.outlabels[s]
|
||||
except KeyError:
|
||||
self.outlabels[s] = labels = sorted(trans)
|
||||
|
||||
pos = bisect_left(labels, label)
|
||||
if pos < len(labels):
|
||||
return labels[pos]
|
||||
return None
|
||||
|
||||
def reachable_from(self, src, inclusive=True):
|
||||
transitions = self.transitions
|
||||
|
||||
reached = set()
|
||||
if inclusive:
|
||||
reached.add(src)
|
||||
|
||||
stack = [src]
|
||||
seen = set()
|
||||
while stack:
|
||||
src = stack.pop()
|
||||
seen.add(src)
|
||||
for _, dest in iteritems(transitions[src]):
|
||||
reached.add(dest)
|
||||
if dest not in seen:
|
||||
stack.append(dest)
|
||||
return reached
|
||||
|
||||
def minimize(self):
|
||||
transitions = self.transitions
|
||||
initial = self.initial
|
||||
|
||||
# Step 1: Delete unreachable states
|
||||
reachable = self.reachable_from(initial)
|
||||
for src in list(transitions):
|
||||
if src not in reachable:
|
||||
del transitions[src]
|
||||
final_states = self.final_states.intersection(reachable)
|
||||
labels = self.all_labels()
|
||||
|
||||
# Step 2: Partition the states into equivalence sets
|
||||
changed = True
|
||||
parts = [final_states, reachable - final_states]
|
||||
while changed:
|
||||
changed = False
|
||||
for i in xrange(len(parts)):
|
||||
part = parts[i]
|
||||
changed_part = False
|
||||
for label in labels:
|
||||
next_part = None
|
||||
new_part = set()
|
||||
for state in part:
|
||||
dest = transitions[state].get(label)
|
||||
if dest is not None:
|
||||
if next_part is None:
|
||||
for p in parts:
|
||||
if dest in p:
|
||||
next_part = p
|
||||
elif dest not in next_part:
|
||||
new_part.add(state)
|
||||
changed = True
|
||||
changed_part = True
|
||||
if changed_part:
|
||||
old_part = part - new_part
|
||||
parts.pop(i)
|
||||
parts.append(old_part)
|
||||
parts.append(new_part)
|
||||
break
|
||||
|
||||
# Choose one state from each equivalence set and map all equivalent
|
||||
# states to it
|
||||
new_trans = {}
|
||||
|
||||
# Create mapping
|
||||
mapping = {}
|
||||
new_initial = None
|
||||
for part in parts:
|
||||
representative = part.pop()
|
||||
if representative is initial:
|
||||
new_initial = representative
|
||||
mapping[representative] = representative
|
||||
new_trans[representative] = {}
|
||||
for state in part:
|
||||
if state is initial:
|
||||
new_initial = representative
|
||||
mapping[state] = representative
|
||||
assert new_initial is not None
|
||||
|
||||
# Apply mapping to existing transitions
|
||||
new_finals = set(mapping[s] for s in final_states)
|
||||
for state, d in iteritems(new_trans):
|
||||
trans = transitions[state]
|
||||
for label, dest in iteritems(trans):
|
||||
d[label] = mapping[dest]
|
||||
|
||||
# Remove dead states - non-final states with no outgoing arcs except
|
||||
# to themselves
|
||||
non_final_srcs = [src for src in new_trans if src not in new_finals]
|
||||
removing = set()
|
||||
for src in non_final_srcs:
|
||||
dests = set(new_trans[src].values())
|
||||
dests.discard(src)
|
||||
if not dests:
|
||||
removing.add(src)
|
||||
del new_trans[src]
|
||||
# Delete transitions to removed dead states
|
||||
for t in new_trans.values():
|
||||
for label in list(t):
|
||||
if t[label] in removing:
|
||||
del t[label]
|
||||
|
||||
self.transitions = new_trans
|
||||
self.initial = new_initial
|
||||
self.final_states = new_finals
|
||||
|
||||
def to_dfa(self):
|
||||
return self
|
||||
|
||||
|
||||
# Useful functions
|
||||
|
||||
def renumber_dfa(dfa, base=0):
|
||||
c = itertools.count(base)
|
||||
mapping = {}
|
||||
|
||||
def remap(state):
|
||||
if state in mapping:
|
||||
newnum = mapping[state]
|
||||
else:
|
||||
newnum = next(c)
|
||||
mapping[state] = newnum
|
||||
return newnum
|
||||
|
||||
newdfa = DFA(remap(dfa.initial))
|
||||
for src, trans in iteritems(dfa.transitions):
|
||||
for label, dest in iteritems(trans):
|
||||
newdfa.add_transition(remap(src), label, remap(dest))
|
||||
for finalstate in dfa.final_states:
|
||||
newdfa.add_final_state(remap(finalstate))
|
||||
for src, dest in iteritems(dfa.defaults):
|
||||
newdfa.set_default_transition(remap(src), remap(dest))
|
||||
return newdfa
|
||||
|
||||
|
||||
def u_to_utf8(dfa, base=0):
|
||||
c = itertools.count(base)
|
||||
transitions = dfa.transitions
|
||||
|
||||
for src, trans in iteritems(transitions):
|
||||
trans = transitions[src]
|
||||
for label, dest in list(iteritems(trans)):
|
||||
if label is EPSILON:
|
||||
continue
|
||||
elif label is ANY:
|
||||
raise Exception
|
||||
else:
|
||||
assert isinstance(label, text_type)
|
||||
label8 = label.encode("utf8")
|
||||
for i, byte in enumerate(label8):
|
||||
if i < len(label8) - 1:
|
||||
st = next(c)
|
||||
dfa.add_transition(src, byte, st)
|
||||
src = st
|
||||
else:
|
||||
dfa.add_transition(src, byte, dest)
|
||||
del trans[label]
|
||||
|
||||
|
||||
def find_all_matches(dfa, lookup_func, first=unull):
|
||||
"""
|
||||
Uses lookup_func to find all words within levenshtein distance k of word.
|
||||
|
||||
Args:
|
||||
word: The word to look up
|
||||
k: Maximum edit distance
|
||||
lookup_func: A single argument function that returns the first word in the
|
||||
database that is greater than or equal to the input argument.
|
||||
Yields:
|
||||
Every matching word within levenshtein distance k from the database.
|
||||
"""
|
||||
|
||||
match = dfa.next_valid_string(first)
|
||||
while match:
|
||||
key = lookup_func(match)
|
||||
if key is None:
|
||||
return
|
||||
if match == key:
|
||||
yield match
|
||||
key += unull
|
||||
match = dfa.next_valid_string(key)
|
||||
|
||||
|
||||
# Construction functions
|
||||
|
||||
def reverse_nfa(n):
|
||||
s = object()
|
||||
nfa = NFA(s)
|
||||
for src, trans in iteritems(n.transitions):
|
||||
for label, destset in iteritems(trans):
|
||||
for dest in destset:
|
||||
nfa.add_transition(dest, label, src)
|
||||
for finalstate in n.final_states:
|
||||
nfa.add_transition(s, EPSILON, finalstate)
|
||||
nfa.add_final_state(n.initial)
|
||||
return nfa
|
||||
|
||||
|
||||
def product(dfa1, op, dfa2):
|
||||
dfa1 = dfa1.to_dfa()
|
||||
dfa2 = dfa2.to_dfa()
|
||||
start = (dfa1.start(), dfa2.start())
|
||||
dfa = DFA(start)
|
||||
stack = [start]
|
||||
while stack:
|
||||
src = stack.pop()
|
||||
state1, state2 = src
|
||||
trans1 = set(dfa1.transitions[state1])
|
||||
trans2 = set(dfa2.transitions[state2])
|
||||
for label in trans1.intersection(trans2):
|
||||
state1 = dfa1.next_state(state1, label)
|
||||
state2 = dfa2.next_state(state2, label)
|
||||
if op(state1 is not None, state2 is not None):
|
||||
dest = (state1, state2)
|
||||
dfa.add_transition(src, label, dest)
|
||||
stack.append(dest)
|
||||
if op(dfa1.is_final(state1), dfa2.is_final(state2)):
|
||||
dfa.add_final_state(dest)
|
||||
return dfa
|
||||
|
||||
|
||||
def intersection(dfa1, dfa2):
|
||||
return product(dfa1, operator.and_, dfa2)
|
||||
|
||||
|
||||
def union(dfa1, dfa2):
|
||||
return product(dfa1, operator.or_, dfa2)
|
||||
|
||||
|
||||
def epsilon_nfa():
|
||||
return basic_nfa(EPSILON)
|
||||
|
||||
|
||||
def dot_nfa():
|
||||
return basic_nfa(ANY)
|
||||
|
||||
|
||||
def basic_nfa(label):
|
||||
s = object()
|
||||
e = object()
|
||||
nfa = NFA(s)
|
||||
nfa.add_transition(s, label, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
|
||||
def charset_nfa(labels):
|
||||
s = object()
|
||||
e = object()
|
||||
nfa = NFA(s)
|
||||
for label in labels:
|
||||
nfa.add_transition(s, label, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
|
||||
def string_nfa(string):
|
||||
s = object()
|
||||
e = object()
|
||||
nfa = NFA(s)
|
||||
for label in string:
|
||||
e = object()
|
||||
nfa.add_transition(s, label, e)
|
||||
s = e
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
|
||||
def choice_nfa(n1, n2):
|
||||
s = object()
|
||||
e = object()
|
||||
nfa = NFA(s)
|
||||
# -> nfa1 -
|
||||
# / \
|
||||
# s e
|
||||
# \ /
|
||||
# -> nfa2 -
|
||||
nfa.insert(s, n1, e)
|
||||
nfa.insert(s, n2, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
|
||||
def concat_nfa(n1, n2):
|
||||
s = object()
|
||||
m = object()
|
||||
e = object()
|
||||
nfa = NFA(s)
|
||||
nfa.insert(s, n1, m)
|
||||
nfa.insert(m, n2, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
|
||||
def star_nfa(n):
|
||||
s = object()
|
||||
e = object()
|
||||
nfa = NFA(s)
|
||||
# -----<-----
|
||||
# / \
|
||||
# s ---> n ---> e
|
||||
# \ /
|
||||
# ----->-----
|
||||
|
||||
nfa.insert(s, n, e)
|
||||
nfa.add_transition(s, EPSILON, e)
|
||||
for finalstate in n.final_states:
|
||||
nfa.add_transition(finalstate, EPSILON, s)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
|
||||
def plus_nfa(n):
|
||||
return concat_nfa(n, star_nfa(n))
|
||||
|
||||
|
||||
def optional_nfa(n):
|
||||
return choice_nfa(n, epsilon_nfa())
|
||||
|
||||
|
||||
# Daciuk Mihov DFA construction algorithm
|
||||
|
||||
class DMNode(object):
|
||||
def __init__(self, n):
|
||||
self.n = n
|
||||
self.arcs = {}
|
||||
self.final = False
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s, %r>" % (self.n, self.tuple())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.tuple())
|
||||
|
||||
def tuple(self):
|
||||
arcs = tuple(sorted(iteritems(self.arcs)))
|
||||
return arcs, self.final
|
||||
|
||||
|
||||
def strings_dfa(strings):
|
||||
dfa = DFA(0)
|
||||
c = itertools.count(1)
|
||||
|
||||
last = ""
|
||||
seen = {}
|
||||
nodes = [DMNode(0)]
|
||||
|
||||
for string in strings:
|
||||
if string <= last:
|
||||
raise Exception("Strings must be in order")
|
||||
if not string:
|
||||
raise Exception("Can't add empty string")
|
||||
|
||||
# Find the common prefix with the previous string
|
||||
i = 0
|
||||
while i < len(last) and i < len(string) and last[i] == string[i]:
|
||||
i += 1
|
||||
prefixlen = i
|
||||
|
||||
# Freeze the transitions after the prefix, since they're not shared
|
||||
add_suffix(dfa, nodes, last, prefixlen + 1, seen)
|
||||
|
||||
# Create new nodes for the substring after the prefix
|
||||
for label in string[prefixlen:]:
|
||||
node = DMNode(next(c))
|
||||
# Create an arc from the previous node to this node
|
||||
nodes[-1].arcs[label] = node.n
|
||||
nodes.append(node)
|
||||
# Mark the last node as an accept state
|
||||
nodes[-1].final = True
|
||||
|
||||
last = string
|
||||
|
||||
if len(nodes) > 1:
|
||||
add_suffix(dfa, nodes, last, 0, seen)
|
||||
return dfa
|
||||
|
||||
|
||||
def add_suffix(dfa, nodes, last, downto, seen):
|
||||
while len(nodes) > downto:
|
||||
node = nodes.pop()
|
||||
tup = node.tuple()
|
||||
|
||||
# If a node just like this one (final/nonfinal, same arcs to same
|
||||
# destinations) is already seen, replace with it
|
||||
try:
|
||||
this = seen[tup]
|
||||
except KeyError:
|
||||
this = node.n
|
||||
if node.final:
|
||||
dfa.add_final_state(this)
|
||||
seen[tup] = this
|
||||
else:
|
||||
# If we replaced the node with an already seen one, fix the parent
|
||||
# node's pointer to this
|
||||
parent = nodes[-1]
|
||||
inlabel = last[len(nodes) - 1]
|
||||
parent.arcs[inlabel] = this
|
||||
|
||||
# Add the node's transitions to the DFA
|
||||
for label, dest in iteritems(node.arcs):
|
||||
dfa.add_transition(this, label, dest)
|
||||
|
||||
|
||||
|
||||
|
||||
90
venv/Lib/site-packages/whoosh/automata/glob.py
Normal file
90
venv/Lib/site-packages/whoosh/automata/glob.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# Copyright 2012 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from whoosh.automata.fsa import ANY, EPSILON, NFA
|
||||
|
||||
|
||||
# Constants for glob
|
||||
_LIT = 0
|
||||
_STAR = 1
|
||||
_PLUS = 2
|
||||
_QUEST = 3
|
||||
_RANGE = 4
|
||||
|
||||
|
||||
def parse_glob(pattern, _glob_multi="*", _glob_single="?",
|
||||
_glob_range1="[", _glob_range2="]"):
|
||||
pos = 0
|
||||
last = None
|
||||
while pos < len(pattern):
|
||||
char = pattern[pos]
|
||||
pos += 1
|
||||
if char == _glob_multi: # *
|
||||
# (Ignore more than one star in a row)
|
||||
if last is not _STAR:
|
||||
yield _STAR, None
|
||||
last = _STAR
|
||||
elif char == _glob_single: # ?
|
||||
# (Ignore ? after a star)
|
||||
if last is not _STAR:
|
||||
yield _QUEST, None
|
||||
last = _QUEST
|
||||
elif char == _glob_range1: # [
|
||||
chars = set()
|
||||
negate = False
|
||||
# Take the char range specification until the ]
|
||||
while pos < len(pattern):
|
||||
char = pattern[pos]
|
||||
pos += 1
|
||||
if char == _glob_range2:
|
||||
break
|
||||
chars.add(char)
|
||||
if chars:
|
||||
yield _RANGE, (chars, negate)
|
||||
last = _RANGE
|
||||
else:
|
||||
yield _LIT, char
|
||||
last = _LIT
|
||||
|
||||
|
||||
def glob_automaton(pattern):
|
||||
nfa = NFA(0)
|
||||
i = -1
|
||||
for i, (op, arg) in enumerate(parse_glob(pattern)):
|
||||
if op is _LIT:
|
||||
nfa.add_transition(i, arg, i + 1)
|
||||
elif op is _STAR:
|
||||
nfa.add_transition(i, ANY, i + 1)
|
||||
nfa.add_transition(i, EPSILON, i + 1)
|
||||
nfa.add_transition(i + 1, EPSILON, i)
|
||||
elif op is _QUEST:
|
||||
nfa.add_transition(i, ANY, i + 1)
|
||||
elif op is _RANGE:
|
||||
for char in arg[0]:
|
||||
nfa.add_transition(i, char, i + 1)
|
||||
nfa.add_final_state(i + 1)
|
||||
return nfa
|
||||
30
venv/Lib/site-packages/whoosh/automata/lev.py
Normal file
30
venv/Lib/site-packages/whoosh/automata/lev.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from whoosh.compat import unichr, xrange
|
||||
from whoosh.automata.fsa import ANY, EPSILON, NFA, unull
|
||||
|
||||
|
||||
def levenshtein_automaton(term, k, prefix=0):
|
||||
nfa = NFA((0, 0))
|
||||
if prefix:
|
||||
for i in xrange(prefix):
|
||||
c = term[i]
|
||||
nfa.add_transition((i, 0), c, (i + 1, 0))
|
||||
|
||||
for i in xrange(prefix, len(term)):
|
||||
c = term[i]
|
||||
for e in xrange(k + 1):
|
||||
# Correct character
|
||||
nfa.add_transition((i, e), c, (i + 1, e))
|
||||
if e < k:
|
||||
# Deletion
|
||||
nfa.add_transition((i, e), ANY, (i, e + 1))
|
||||
# Insertion
|
||||
nfa.add_transition((i, e), EPSILON, (i + 1, e + 1))
|
||||
# Substitution
|
||||
nfa.add_transition((i, e), ANY, (i + 1, e + 1))
|
||||
for e in xrange(k + 1):
|
||||
if e < k:
|
||||
nfa.add_transition((len(term), e), ANY, (len(term), e + 1))
|
||||
nfa.add_final_state((len(term), e))
|
||||
return nfa
|
||||
388
venv/Lib/site-packages/whoosh/automata/nfa.py
Normal file
388
venv/Lib/site-packages/whoosh/automata/nfa.py
Normal file
@@ -0,0 +1,388 @@
|
||||
# Copyright 2012 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from whoosh.automata.fst import Arc
|
||||
|
||||
|
||||
class Instruction(object):
|
||||
def __repr__(self):
|
||||
return "%s()" % (self.__class__.__name__, )
|
||||
|
||||
|
||||
class Char(Instruction):
|
||||
"""
|
||||
Matches a literal character.
|
||||
"""
|
||||
|
||||
def __init__(self, c):
|
||||
self.c = c
|
||||
|
||||
def __repr__(self):
|
||||
return "Char(%r)" % self.c
|
||||
|
||||
class Lit(Instruction):
|
||||
"""
|
||||
Matches a literal string.
|
||||
"""
|
||||
|
||||
def __init__(self, c):
|
||||
self.c = c
|
||||
|
||||
def __repr__(self):
|
||||
return "Lit(%r)" % self.c
|
||||
|
||||
|
||||
class Any(Instruction):
|
||||
"""
|
||||
Matches any character.
|
||||
"""
|
||||
|
||||
|
||||
class Match(Instruction):
|
||||
"""
|
||||
Stop this thread: the string matched.
|
||||
"""
|
||||
|
||||
def __repr__(self):
|
||||
return "Match()"
|
||||
|
||||
|
||||
class Jmp(Instruction):
|
||||
"""
|
||||
Jump to a specified instruction.
|
||||
"""
|
||||
|
||||
def __init__(self, x):
|
||||
self.x = x
|
||||
|
||||
def __repr__(self):
|
||||
return "Jmp(%s)" % self.x
|
||||
|
||||
|
||||
class Split(Instruction):
|
||||
"""
|
||||
Split execution: continue at two separate specified instructions.
|
||||
"""
|
||||
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def __repr__(self):
|
||||
return "Split(%s, %s)" % (self.x, self.y)
|
||||
|
||||
|
||||
class Label(Instruction):
|
||||
"""
|
||||
Placeholder to act as a target for JMP instructions
|
||||
"""
|
||||
|
||||
def __hash__(self):
|
||||
return id(self)
|
||||
|
||||
def __repr__(self):
|
||||
return "L(%s)" % hex(id(self))
|
||||
|
||||
|
||||
def concat(e1, e2):
|
||||
return e1 + e2
|
||||
|
||||
|
||||
def alt(e1, e2):
|
||||
L1, L2, L3 = Label(), Label(), Label()
|
||||
return [L1] + e1 + [Jmp(L3), L2] + e2 + [L3]
|
||||
|
||||
|
||||
def zero_or_one(e):
|
||||
L1, L2 = Label(), Label()
|
||||
return [Split(L1, L2), L1] + e + [L2]
|
||||
|
||||
|
||||
def zero_or_more(e):
|
||||
L1, L2, L3 = Label(), Label(), Label()
|
||||
return [L1, Split(L2, L3), L2] + e + [Jmp(L1), L3]
|
||||
|
||||
|
||||
def one_or_more(e):
|
||||
L1, L2 = Label(), Label()
|
||||
return [L1] + e + [Split(L1, L2), L2]
|
||||
|
||||
|
||||
def fixup(program):
|
||||
refs = {}
|
||||
i = 0
|
||||
while i < len(program):
|
||||
op = program[i]
|
||||
if isinstance(op, Label):
|
||||
refs[op] = i
|
||||
program.pop(i)
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if refs:
|
||||
for op in program:
|
||||
if isinstance(op, (Jmp, Split)):
|
||||
op.x = refs[op.x]
|
||||
if isinstance(op, Split):
|
||||
op.y = refs[op.y]
|
||||
|
||||
return program + [Match]
|
||||
|
||||
|
||||
class ThreadList(object):
|
||||
def __init__(self, program, max=1000):
|
||||
self.program = program
|
||||
self.max = max
|
||||
self.threads = []
|
||||
|
||||
def __nonzero__(self):
|
||||
return bool(self.threads)
|
||||
|
||||
def current(self):
|
||||
return self.threads.pop()
|
||||
|
||||
def add(self, thread):
|
||||
op = self.program[thread.pc]
|
||||
optype = type(op)
|
||||
if optype is Jmp:
|
||||
self.add(thread.at(op.x))
|
||||
elif optype is Split:
|
||||
self.add(thread.copy_at(op.x))
|
||||
self.add(thread.at(op.y))
|
||||
else:
|
||||
self.threads.append(thread)
|
||||
|
||||
|
||||
class Thread(object):
|
||||
def __init__(self, pc, address, sofar='', accept=False):
|
||||
self.pc = pc
|
||||
self.address = address
|
||||
self.sofar = sofar
|
||||
self.accept = accept
|
||||
|
||||
def at(self, pc):
|
||||
self.pc = pc
|
||||
return self
|
||||
|
||||
def copy_at(self, pc):
|
||||
return Thread(pc, self.address, self.sofar, self.accept)
|
||||
|
||||
def __repr__(self):
|
||||
d = self.__dict__
|
||||
return "Thread(%s)" % ",".join("%s=%r" % (k, v) for k, v in d.items())
|
||||
|
||||
|
||||
def advance(thread, arc, c):
|
||||
thread.pc += 1
|
||||
thread.address = arc.target
|
||||
thread.sofar += c
|
||||
thread.accept = arc.accept
|
||||
|
||||
|
||||
def run(graph, program, address):
|
||||
threads = ThreadList(program)
|
||||
threads.add(Thread(0, address))
|
||||
arc = Arc()
|
||||
while threads:
|
||||
thread = threads.current()
|
||||
address = thread.address
|
||||
op = program[thread.pc]
|
||||
optype = type(op)
|
||||
|
||||
if optype is Char:
|
||||
if address:
|
||||
arc = graph.find_arc(address, op.c, arc)
|
||||
if arc:
|
||||
advance(thread, arc)
|
||||
threads.add(thread)
|
||||
elif optype is Lit:
|
||||
if address:
|
||||
c = op.c
|
||||
arc = graph.find_path(c, arc, address)
|
||||
if arc:
|
||||
advance(thread, arc, c)
|
||||
threads.add(thread)
|
||||
elif optype is Any:
|
||||
if address:
|
||||
sofar = thread.sofar
|
||||
pc = thread.pc + 1
|
||||
for arc in graph.iter_arcs(address, arc):
|
||||
t = Thread(pc, arc.target, sofar + arc.label, arc.accept)
|
||||
threads.add(t)
|
||||
elif op is Match:
|
||||
if thread.accept:
|
||||
yield thread.sofar
|
||||
else:
|
||||
raise Exception("Don't know what to do with %r" % op)
|
||||
|
||||
|
||||
LO = 0
|
||||
HI = 1
|
||||
|
||||
|
||||
def regex_limit(graph, mode, program, address):
|
||||
low = mode == LO
|
||||
output = []
|
||||
threads = ThreadList(program)
|
||||
threads.add(Thread(0, address))
|
||||
arc = Arc()
|
||||
while threads:
|
||||
thread = threads.current()
|
||||
address = thread.address
|
||||
op = program[thread.pc]
|
||||
optype = type(op)
|
||||
|
||||
if optype is Char:
|
||||
if address:
|
||||
arc = graph.find_arc(address, op.c, arc)
|
||||
if arc:
|
||||
if low and arc.accept:
|
||||
return thread.sofar + thread.label
|
||||
advance(thread, arc)
|
||||
threads.add(thread)
|
||||
elif optype is Lit:
|
||||
if address:
|
||||
labels = op.c
|
||||
for label in labels:
|
||||
arc = graph.find_arc(address, label)
|
||||
if arc is None:
|
||||
return thread.sofar
|
||||
elif thread.accept:
|
||||
return thread.sofar
|
||||
elif optype is Any:
|
||||
if address:
|
||||
if low:
|
||||
arc = graph.arc_at(address, arc)
|
||||
else:
|
||||
for arc in graph.iter_arcs(address):
|
||||
pass
|
||||
advance(thread, arc, arc.label)
|
||||
threads.add(thread)
|
||||
elif thread.accept:
|
||||
return thread.sofar
|
||||
elif op is Match:
|
||||
return thread.sofar
|
||||
else:
|
||||
raise Exception("Don't know what to do with %r" % op)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# from whoosh import index, query
|
||||
# from whoosh.filedb.filestore import RamStorage
|
||||
# from whoosh.automata import fst
|
||||
# from whoosh.util.testing import timing
|
||||
#
|
||||
# st = RamStorage()
|
||||
# gw = fst.GraphWriter(st.create_file("test"))
|
||||
# gw.start_field("test")
|
||||
# for key in ["aaaa", "aaab", "aabb", "abbb", "babb", "bbab", "bbba"]:
|
||||
# gw.insert(key)
|
||||
# gw.close()
|
||||
# gr = fst.GraphReader(st.open_file("test"))
|
||||
#
|
||||
# program = one_or_more([Lit("a")])
|
||||
# print program
|
||||
# program = fixup(program)
|
||||
# print program
|
||||
# print list(run(gr, program, gr.root("test")))
|
||||
#
|
||||
# ix = index.open_dir("e:/dev/src/houdini/help/index")
|
||||
# r = ix.reader()
|
||||
# gr = r._get_graph()
|
||||
#
|
||||
# # program = fixup([Any(), Any(), Any(), Any(), Any()])
|
||||
# # program = fixup(concat(zero_or_more([Any()]), [Char("/")]))
|
||||
# # with timing():
|
||||
# # x = list(run(gr, program, gr.root("path")))
|
||||
# # print len(x)
|
||||
#
|
||||
# q = query.Regex("path", "^.[abc].*/$")
|
||||
# with timing():
|
||||
# y = list(q._btexts(r))
|
||||
# print len(y)
|
||||
# print y[0], y[-1]
|
||||
#
|
||||
# pr = [Any()] + alt([Lit("c")], alt([Lit("b")], [Lit("a")])) + zero_or_more([Any()]) + [Lit("/")]
|
||||
# program = fixup(pr)
|
||||
# # with timing():
|
||||
# # x = list(run(gr, program, gr.root("path")))
|
||||
# # print len(x), x
|
||||
#
|
||||
# with timing():
|
||||
# print "lo=", regex_limit(gr, LO, program, gr.root("path"))
|
||||
# print "hi=", regex_limit(gr, HI, program, gr.root("path"))
|
||||
#
|
||||
#
|
||||
#
|
||||
# #int
|
||||
# #backtrackingvm(Inst *prog, char *input)
|
||||
# #{
|
||||
# # enum { MAXTHREAD = 1000 };
|
||||
# # Thread ready[MAXTHREAD];
|
||||
# # int nready;
|
||||
# # Inst *pc;
|
||||
# # char *sp;
|
||||
# #
|
||||
# # /* queue initial thread */
|
||||
# # ready[0] = thread(prog, input);
|
||||
# # nready = 1;
|
||||
# #
|
||||
# # /* run threads in stack order */
|
||||
# # while(nready > 0){
|
||||
# # --nready; /* pop state for next thread to run */
|
||||
# # pc = ready[nready].pc;
|
||||
# # sp = ready[nready].sp;
|
||||
# # for(;;){
|
||||
# # switch(pc->opcode){
|
||||
# # case Char:
|
||||
# # if(*sp != pc->c)
|
||||
# # goto Dead;
|
||||
# # pc++;
|
||||
# # sp++;
|
||||
# # continue;
|
||||
# # case Match:
|
||||
# # return 1;
|
||||
# # case Jmp:
|
||||
# # pc = pc->x;
|
||||
# # continue;
|
||||
# # case Split:
|
||||
# # if(nready >= MAXTHREAD){
|
||||
# # fprintf(stderr, "regexp overflow");
|
||||
# # return -1;
|
||||
# # }
|
||||
# # /* queue new thread */
|
||||
# # ready[nready++] = thread(pc->y, sp);
|
||||
# # pc = pc->x; /* continue current thread */
|
||||
# # continue;
|
||||
# # }
|
||||
# # }
|
||||
# # Dead:;
|
||||
# # }
|
||||
# # return 0;
|
||||
# #}
|
||||
#
|
||||
#
|
||||
135
venv/Lib/site-packages/whoosh/automata/reg.py
Normal file
135
venv/Lib/site-packages/whoosh/automata/reg.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# Copyright 2014 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import re
|
||||
from whoosh.automata.fsa import ANY, EPSILON, NFA
|
||||
|
||||
|
||||
# Operator precedence
|
||||
CHOICE = ("|", )
|
||||
ops = ()
|
||||
|
||||
|
||||
def parse(pattern):
|
||||
stack = []
|
||||
ops = []
|
||||
|
||||
|
||||
|
||||
|
||||
class RegexBuilder(object):
|
||||
def __init__(self):
|
||||
self.statenum = 1
|
||||
|
||||
def new_state(self):
|
||||
self.statenum += 1
|
||||
return self.statenum
|
||||
|
||||
def epsilon(self):
|
||||
s = self.new_state()
|
||||
e = self.new_state()
|
||||
nfa = NFA(s)
|
||||
nfa.add_transition(s, EPSILON, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
def char(self, label):
|
||||
s = self.new_state()
|
||||
e = self.new_state()
|
||||
nfa = NFA(s)
|
||||
nfa.add_transition(s, label, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
def charset(self, chars):
|
||||
s = self.new_state()
|
||||
e = self.new_state()
|
||||
nfa = NFA(s)
|
||||
for char in chars:
|
||||
nfa.add_transition(s, char, e)
|
||||
nfa.add_final_state(e)
|
||||
return e
|
||||
|
||||
def dot(self):
|
||||
s = self.new_state()
|
||||
e = self.new_state()
|
||||
nfa = NFA(s)
|
||||
nfa.add_transition(s, ANY, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
def choice(self, n1, n2):
|
||||
s = self.new_state()
|
||||
s1 = self.new_state()
|
||||
s2 = self.new_state()
|
||||
e1 = self.new_state()
|
||||
e2 = self.new_state()
|
||||
e = self.new_state()
|
||||
nfa = NFA(s)
|
||||
nfa.add_transition(s, EPSILON, s1)
|
||||
nfa.add_transition(s, EPSILON, s2)
|
||||
nfa.insert(s1, n1, e1)
|
||||
nfa.insert(s2, n2, e2)
|
||||
nfa.add_transition(e1, EPSILON, e)
|
||||
nfa.add_transition(e2, EPSILON, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
def concat(self, n1, n2):
|
||||
s = self.new_state()
|
||||
m = self.new_state()
|
||||
e = self.new_state()
|
||||
nfa = NFA(s)
|
||||
nfa.insert(s, n1, m)
|
||||
nfa.insert(m, n2, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
def star(self, n):
|
||||
s = self.new_state()
|
||||
m1 = self.new_state()
|
||||
m2 = self.new_state()
|
||||
e = self.new_state()
|
||||
nfa = NFA(s)
|
||||
nfa.add_transition(s, EPSILON, m1)
|
||||
nfa.add_transition(s, EPSILON, e)
|
||||
nfa.insert(m1, n, m2)
|
||||
nfa.add_transition(m2, EPSILON, m1)
|
||||
nfa.add_transition(m2, EPSILON, e)
|
||||
nfa.add_final_state(e)
|
||||
return nfa
|
||||
|
||||
def plus(self, n):
|
||||
return self.concat(n, self.star(n))
|
||||
|
||||
def question(self, n):
|
||||
return self.choice(n, self.epsilon())
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user