2026-1-6

2026-01-06 14:18:39 +08:00
commit 5a384b694e
10345 changed files with 2050918 additions and 0 deletions
--- a/venv/Lib/site-packages/whoosh/automata/init.py
+++ b/venv/Lib/site-packages/whoosh/automata/init.py
--- a/venv/Lib/site-packages/whoosh/automata/pycache/init.cpython-312.pyc
+++ b/venv/Lib/site-packages/whoosh/automata/pycache/init.cpython-312.pyc
--- a/venv/Lib/site-packages/whoosh/automata/pycache/fsa.cpython-312.pyc
+++ b/venv/Lib/site-packages/whoosh/automata/pycache/fsa.cpython-312.pyc
--- a/venv/Lib/site-packages/whoosh/automata/pycache/glob.cpython-312.pyc
+++ b/venv/Lib/site-packages/whoosh/automata/pycache/glob.cpython-312.pyc
--- a/venv/Lib/site-packages/whoosh/automata/pycache/lev.cpython-312.pyc
+++ b/venv/Lib/site-packages/whoosh/automata/pycache/lev.cpython-312.pyc
--- a/venv/Lib/site-packages/whoosh/automata/pycache/nfa.cpython-312.pyc
+++ b/venv/Lib/site-packages/whoosh/automata/pycache/nfa.cpython-312.pyc
--- a/venv/Lib/site-packages/whoosh/automata/pycache/reg.cpython-312.pyc
+++ b/venv/Lib/site-packages/whoosh/automata/pycache/reg.cpython-312.pyc
--- a/venv/Lib/site-packages/whoosh/automata/fsa.py
+++ b/venv/Lib/site-packages/whoosh/automata/fsa.py
@@ -0,0 +1,715 @@
+from __future__ import print_function
+
+import itertools
+import operator
+import sys
+from bisect import bisect_left
+from collections import defaultdict
+
+from whoosh.compat import iteritems, next, text_type, unichr, xrange
+
+
+unull = unichr(0)
+
+
+# Marker constants
+
+class Marker(object):
+    def __init__(self, name):
+        self.name = name
+
+    def __repr__(self):
+        return "<%s>" % self.name
+
+
+EPSILON = Marker("EPSILON")
+ANY = Marker("ANY")
+
+
+# Base class
+
+class FSA(object):
+    def __init__(self, initial):
+        self.initial = initial
+        self.transitions = {}
+        self.final_states = set()
+
+    def __len__(self):
+        return len(self.all_states())
+
+    def __eq__(self, other):
+        if self.initial != other.initial:
+            return False
+        if self.final_states != other.final_states:
+            return False
+        st = self.transitions
+        ot = other.transitions
+        if list(st) != list(ot):
+            return False
+        for key in st:
+            if st[key] != ot[key]:
+                return False
+        return True
+
+    def all_states(self):
+        stateset = set(self.transitions)
+        for src, trans in iteritems(self.transitions):
+            stateset.update(trans.values())
+        return stateset
+
+    def all_labels(self):
+        labels = set()
+        for src, trans in iteritems(self.transitions):
+            labels.update(trans)
+        return labels
+
+    def get_labels(self, src):
+        return iter(self.transitions.get(src, []))
+
+    def generate_all(self, state=None, sofar=""):
+        state = self.start() if state is None else state
+        if self.is_final(state):
+            yield sofar
+        for label in sorted(self.get_labels(state)):
+            newstate = self.next_state(state, label)
+            for string in self.generate_all(newstate, sofar + label):
+                yield string
+
+    def start(self):
+        return self.initial
+
+    def next_state(self, state, label):
+        raise NotImplementedError
+
+    def is_final(self, state):
+        raise NotImplementedError
+
+    def add_transition(self, src, label, dest):
+        raise NotImplementedError
+
+    def add_final_state(self, state):
+        raise NotImplementedError
+
+    def to_dfa(self):
+        raise NotImplementedError
+
+    def accept(self, string, debug=False):
+        state = self.start()
+
+        for label in string:
+            if debug:
+                print("  ", state, "->", label, "->")
+
+            state = self.next_state(state, label)
+            if not state:
+                break
+
+        return self.is_final(state)
+
+    def append(self, fsa):
+        self.transitions.update(fsa.transitions)
+        for state in self.final_states:
+            self.add_transition(state, EPSILON, fsa.initial)
+        self.final_states = fsa.final_states
+
+
+# Implementations
+
+class NFA(FSA):
+    def __init__(self, initial):
+        self.transitions = {}
+        self.final_states = set()
+        self.initial = initial
+
+    def dump(self, stream=sys.stdout):
+        starts = self.start()
+        for src in self.transitions:
+            beg = "@" if src in starts else " "
+            print(beg, src, file=stream)
+            xs = self.transitions[src]
+            for label in xs:
+                dests = xs[label]
+                end = "||" if self.is_final(dests) else ""
+
+    def start(self):
+        return frozenset(self._expand(set([self.initial])))
+
+    def add_transition(self, src, label, dest):
+        self.transitions.setdefault(src, {}).setdefault(label, set()).add(dest)
+
+    def add_final_state(self, state):
+        self.final_states.add(state)
+
+    def triples(self):
+        for src, trans in iteritems(self.transitions):
+            for label, dests in iteritems(trans):
+                for dest in dests:
+                    yield src, label, dest
+
+    def is_final(self, states):
+        return bool(self.final_states.intersection(states))
+
+    def _expand(self, states):
+        transitions = self.transitions
+        frontier = set(states)
+        while frontier:
+            state = frontier.pop()
+            if state in transitions and EPSILON in transitions[state]:
+                new_states = transitions[state][EPSILON].difference(states)
+                frontier.update(new_states)
+                states.update(new_states)
+        return states
+
+    def next_state(self, states, label):
+        transitions = self.transitions
+        dest_states = set()
+        for state in states:
+            if state in transitions:
+                xs = transitions[state]
+                if label in xs:
+                    dest_states.update(xs[label])
+                if ANY in xs:
+                    dest_states.update(xs[ANY])
+        return frozenset(self._expand(dest_states))
+
+    def get_labels(self, states):
+        transitions = self.transitions
+        labels = set()
+        for state in states:
+            if state in transitions:
+                labels.update(transitions[state])
+        return labels
+
+    def embed(self, other):
+        # Copy all transitions from the other NFA into this one
+        for s, othertrans in iteritems(other.transitions):
+            trans = self.transitions.setdefault(s, {})
+            for label, otherdests in iteritems(othertrans):
+                dests = trans.setdefault(label, set())
+                dests.update(otherdests)
+
+    def insert(self, src, other, dest):
+        self.embed(other)
+
+        # Connect src to the other NFA's initial state, and the other
+        # NFA's final states to dest
+        self.add_transition(src, EPSILON, other.initial)
+        for finalstate in other.final_states:
+            self.add_transition(finalstate, EPSILON, dest)
+
+    def to_dfa(self):
+        dfa = DFA(self.start())
+        frontier = [self.start()]
+        seen = set()
+        while frontier:
+            current = frontier.pop()
+            if self.is_final(current):
+                dfa.add_final_state(current)
+            labels = self.get_labels(current)
+            for label in labels:
+                if label is EPSILON:
+                    continue
+                new_state = self.next_state(current, label)
+                if new_state not in seen:
+                    frontier.append(new_state)
+                    seen.add(new_state)
+                    if self.is_final(new_state):
+                        dfa.add_final_state(new_state)
+                if label is ANY:
+                    dfa.set_default_transition(current, new_state)
+                else:
+                    dfa.add_transition(current, label, new_state)
+        return dfa
+
+
+class DFA(FSA):
+    def __init__(self, initial):
+        self.initial = initial
+        self.transitions = {}
+        self.defaults = {}
+        self.final_states = set()
+        self.outlabels = {}
+
+    def dump(self, stream=sys.stdout):
+        for src in sorted(self.transitions):
+            beg = "@" if src == self.initial else " "
+            print(beg, src, file=stream)
+            xs = self.transitions[src]
+            for label in sorted(xs):
+                dest = xs[label]
+                end = "||" if self.is_final(dest) else ""
+
+    def start(self):
+        return self.initial
+
+    def add_transition(self, src, label, dest):
+        self.transitions.setdefault(src, {})[label] = dest
+
+    def set_default_transition(self, src, dest):
+        self.defaults[src] = dest
+
+    def add_final_state(self, state):
+        self.final_states.add(state)
+
+    def is_final(self, state):
+        return state in self.final_states
+
+    def next_state(self, src, label):
+        trans = self.transitions.get(src, {})
+        return trans.get(label, self.defaults.get(src, None))
+
+    def next_valid_string(self, string, asbytes=False):
+        state = self.start()
+        stack = []
+
+        # Follow the DFA as far as possible
+        i = 0
+        for i, label in enumerate(string):
+            stack.append((string[:i], state, label))
+            state = self.next_state(state, label)
+            if not state:
+                break
+        else:
+            stack.append((string[:i + 1], state, None))
+
+        if self.is_final(state):
+            # Word is already valid
+            return string
+
+        # Perform a 'wall following' search for the lexicographically smallest
+        # accepting state.
+        while stack:
+            path, state, label = stack.pop()
+            label = self.find_next_edge(state, label, asbytes=asbytes)
+            if label:
+                path += label
+                state = self.next_state(state, label)
+                if self.is_final(state):
+                    return path
+                stack.append((path, state, None))
+        return None
+
+    def find_next_edge(self, s, label, asbytes):
+        if label is None:
+            label = b"\x00" if asbytes else u'\0'
+        else:
+            label = (label + 1) if asbytes else unichr(ord(label) + 1)
+        trans = self.transitions.get(s, {})
+        if label in trans or s in self.defaults:
+            return label
+
+        try:
+            labels = self.outlabels[s]
+        except KeyError:
+            self.outlabels[s] = labels = sorted(trans)
+
+        pos = bisect_left(labels, label)
+        if pos < len(labels):
+            return labels[pos]
+        return None
+
+    def reachable_from(self, src, inclusive=True):
+        transitions = self.transitions
+
+        reached = set()
+        if inclusive:
+            reached.add(src)
+
+        stack = [src]
+        seen = set()
+        while stack:
+            src = stack.pop()
+            seen.add(src)
+            for _, dest in iteritems(transitions[src]):
+                reached.add(dest)
+                if dest not in seen:
+                    stack.append(dest)
+        return reached
+
+    def minimize(self):
+        transitions = self.transitions
+        initial = self.initial
+
+        # Step 1: Delete unreachable states
+        reachable = self.reachable_from(initial)
+        for src in list(transitions):
+            if src not in reachable:
+                del transitions[src]
+        final_states = self.final_states.intersection(reachable)
+        labels = self.all_labels()
+
+        # Step 2: Partition the states into equivalence sets
+        changed = True
+        parts = [final_states, reachable - final_states]
+        while changed:
+            changed = False
+            for i in xrange(len(parts)):
+                part = parts[i]
+                changed_part = False
+                for label in labels:
+                    next_part = None
+                    new_part = set()
+                    for state in part:
+                        dest = transitions[state].get(label)
+                        if dest is not None:
+                            if next_part is None:
+                                for p in parts:
+                                    if dest in p:
+                                        next_part = p
+                            elif dest not in next_part:
+                                new_part.add(state)
+                                changed = True
+                                changed_part = True
+                    if changed_part:
+                        old_part = part - new_part
+                        parts.pop(i)
+                        parts.append(old_part)
+                        parts.append(new_part)
+                        break
+
+        # Choose one state from each equivalence set and map all equivalent
+        # states to it
+        new_trans = {}
+
+        # Create mapping
+        mapping = {}
+        new_initial = None
+        for part in parts:
+            representative = part.pop()
+            if representative is initial:
+                new_initial = representative
+            mapping[representative] = representative
+            new_trans[representative] = {}
+            for state in part:
+                if state is initial:
+                    new_initial = representative
+                mapping[state] = representative
+        assert new_initial is not None
+
+        # Apply mapping to existing transitions
+        new_finals = set(mapping[s] for s in final_states)
+        for state, d in iteritems(new_trans):
+            trans = transitions[state]
+            for label, dest in iteritems(trans):
+                d[label] = mapping[dest]
+
+        # Remove dead states - non-final states with no outgoing arcs except
+        # to themselves
+        non_final_srcs = [src for src in new_trans if src not in new_finals]
+        removing = set()
+        for src in non_final_srcs:
+            dests = set(new_trans[src].values())
+            dests.discard(src)
+            if not dests:
+                removing.add(src)
+                del new_trans[src]
+        # Delete transitions to removed dead states
+        for t in new_trans.values():
+            for label in list(t):
+                if t[label] in removing:
+                    del t[label]
+
+        self.transitions = new_trans
+        self.initial = new_initial
+        self.final_states = new_finals
+
+    def to_dfa(self):
+        return self
+
+
+# Useful functions
+
+def renumber_dfa(dfa, base=0):
+    c = itertools.count(base)
+    mapping = {}
+
+    def remap(state):
+        if state in mapping:
+            newnum = mapping[state]
+        else:
+            newnum = next(c)
+            mapping[state] = newnum
+        return newnum
+
+    newdfa = DFA(remap(dfa.initial))
+    for src, trans in iteritems(dfa.transitions):
+        for label, dest in iteritems(trans):
+            newdfa.add_transition(remap(src), label, remap(dest))
+    for finalstate in dfa.final_states:
+        newdfa.add_final_state(remap(finalstate))
+    for src, dest in iteritems(dfa.defaults):
+        newdfa.set_default_transition(remap(src), remap(dest))
+    return newdfa
+
+
+def u_to_utf8(dfa, base=0):
+    c = itertools.count(base)
+    transitions = dfa.transitions
+
+    for src, trans in iteritems(transitions):
+        trans = transitions[src]
+        for label, dest in list(iteritems(trans)):
+            if label is EPSILON:
+                continue
+            elif label is ANY:
+                raise Exception
+            else:
+                assert isinstance(label, text_type)
+                label8 = label.encode("utf8")
+                for i, byte in enumerate(label8):
+                    if i < len(label8) - 1:
+                        st = next(c)
+                        dfa.add_transition(src, byte, st)
+                        src = st
+                    else:
+                        dfa.add_transition(src, byte, dest)
+                del trans[label]
+
+
+def find_all_matches(dfa, lookup_func, first=unull):
+    """
+    Uses lookup_func to find all words within levenshtein distance k of word.
+
+    Args:
+      word: The word to look up
+      k: Maximum edit distance
+      lookup_func: A single argument function that returns the first word in the
+        database that is greater than or equal to the input argument.
+    Yields:
+      Every matching word within levenshtein distance k from the database.
+    """
+
+    match = dfa.next_valid_string(first)
+    while match:
+        key = lookup_func(match)
+        if key is None:
+            return
+        if match == key:
+            yield match
+            key += unull
+        match = dfa.next_valid_string(key)
+
+
+# Construction functions
+
+def reverse_nfa(n):
+    s = object()
+    nfa = NFA(s)
+    for src, trans in iteritems(n.transitions):
+        for label, destset in iteritems(trans):
+            for dest in destset:
+                nfa.add_transition(dest, label, src)
+    for finalstate in n.final_states:
+        nfa.add_transition(s, EPSILON, finalstate)
+    nfa.add_final_state(n.initial)
+    return nfa
+
+
+def product(dfa1, op, dfa2):
+    dfa1 = dfa1.to_dfa()
+    dfa2 = dfa2.to_dfa()
+    start = (dfa1.start(), dfa2.start())
+    dfa = DFA(start)
+    stack = [start]
+    while stack:
+        src = stack.pop()
+        state1, state2 = src
+        trans1 = set(dfa1.transitions[state1])
+        trans2 = set(dfa2.transitions[state2])
+        for label in trans1.intersection(trans2):
+            state1 = dfa1.next_state(state1, label)
+            state2 = dfa2.next_state(state2, label)
+            if op(state1 is not None, state2 is not None):
+                dest = (state1, state2)
+                dfa.add_transition(src, label, dest)
+                stack.append(dest)
+                if op(dfa1.is_final(state1), dfa2.is_final(state2)):
+                    dfa.add_final_state(dest)
+    return dfa
+
+
+def intersection(dfa1, dfa2):
+    return product(dfa1, operator.and_, dfa2)
+
+
+def union(dfa1, dfa2):
+    return product(dfa1, operator.or_, dfa2)
+
+
+def epsilon_nfa():
+    return basic_nfa(EPSILON)
+
+
+def dot_nfa():
+    return basic_nfa(ANY)
+
+
+def basic_nfa(label):
+    s = object()
+    e = object()
+    nfa = NFA(s)
+    nfa.add_transition(s, label, e)
+    nfa.add_final_state(e)
+    return nfa
+
+
+def charset_nfa(labels):
+    s = object()
+    e = object()
+    nfa = NFA(s)
+    for label in labels:
+        nfa.add_transition(s, label, e)
+    nfa.add_final_state(e)
+    return nfa
+
+
+def string_nfa(string):
+    s = object()
+    e = object()
+    nfa = NFA(s)
+    for label in string:
+        e = object()
+        nfa.add_transition(s, label, e)
+        s = e
+    nfa.add_final_state(e)
+    return nfa
+
+
+def choice_nfa(n1, n2):
+    s = object()
+    e = object()
+    nfa = NFA(s)
+    #   -> nfa1 -
+    #  /         \
+    # s           e
+    #  \         /
+    #   -> nfa2 -
+    nfa.insert(s, n1, e)
+    nfa.insert(s, n2, e)
+    nfa.add_final_state(e)
+    return nfa
+
+
+def concat_nfa(n1, n2):
+    s = object()
+    m = object()
+    e = object()
+    nfa = NFA(s)
+    nfa.insert(s, n1, m)
+    nfa.insert(m, n2, e)
+    nfa.add_final_state(e)
+    return nfa
+
+
+def star_nfa(n):
+    s = object()
+    e = object()
+    nfa = NFA(s)
+    #   -----<-----
+    #  /           \
+    # s ---> n ---> e
+    #  \           /
+    #   ----->-----
+
+    nfa.insert(s, n, e)
+    nfa.add_transition(s, EPSILON, e)
+    for finalstate in n.final_states:
+        nfa.add_transition(finalstate, EPSILON, s)
+    nfa.add_final_state(e)
+    return nfa
+
+
+def plus_nfa(n):
+    return concat_nfa(n, star_nfa(n))
+
+
+def optional_nfa(n):
+    return choice_nfa(n, epsilon_nfa())
+
+
+# Daciuk Mihov DFA construction algorithm
+
+class DMNode(object):
+    def __init__(self, n):
+        self.n = n
+        self.arcs = {}
+        self.final = False
+
+    def __repr__(self):
+        return "<%s, %r>" % (self.n, self.tuple())
+
+    def __hash__(self):
+        return hash(self.tuple())
+
+    def tuple(self):
+        arcs = tuple(sorted(iteritems(self.arcs)))
+        return arcs, self.final
+
+
+def strings_dfa(strings):
+    dfa = DFA(0)
+    c = itertools.count(1)
+
+    last = ""
+    seen = {}
+    nodes = [DMNode(0)]
+
+    for string in strings:
+        if string <= last:
+            raise Exception("Strings must be in order")
+        if not string:
+            raise Exception("Can't add empty string")
+
+        # Find the common prefix with the previous string
+        i = 0
+        while i < len(last) and i < len(string) and last[i] == string[i]:
+            i += 1
+        prefixlen = i
+
+        # Freeze the transitions after the prefix, since they're not shared
+        add_suffix(dfa, nodes, last, prefixlen + 1, seen)
+
+        # Create new nodes for the substring after the prefix
+        for label in string[prefixlen:]:
+            node = DMNode(next(c))
+            # Create an arc from the previous node to this node
+            nodes[-1].arcs[label] = node.n
+            nodes.append(node)
+        # Mark the last node as an accept state
+        nodes[-1].final = True
+
+        last = string
+
+    if len(nodes) > 1:
+        add_suffix(dfa, nodes, last, 0, seen)
+    return dfa
+
+
+def add_suffix(dfa, nodes, last, downto, seen):
+    while len(nodes) > downto:
+        node = nodes.pop()
+        tup = node.tuple()
+
+        # If a node just like this one (final/nonfinal, same arcs to same
+        # destinations) is already seen, replace with it
+        try:
+            this = seen[tup]
+        except KeyError:
+            this = node.n
+            if node.final:
+                dfa.add_final_state(this)
+            seen[tup] = this
+        else:
+            # If we replaced the node with an already seen one, fix the parent
+            # node's pointer to this
+            parent = nodes[-1]
+            inlabel = last[len(nodes) - 1]
+            parent.arcs[inlabel] = this
+
+        # Add the node's transitions to the DFA
+        for label, dest in iteritems(node.arcs):
+            dfa.add_transition(this, label, dest)
+
+
+
+
--- a/venv/Lib/site-packages/whoosh/automata/glob.py
+++ b/venv/Lib/site-packages/whoosh/automata/glob.py
@@ -0,0 +1,90 @@
+# Copyright 2012 Matt Chaput. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#    1. Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and documentation are
+# those of the authors and should not be interpreted as representing official
+# policies, either expressed or implied, of Matt Chaput.
+
+from whoosh.automata.fsa import ANY, EPSILON, NFA
+
+
+# Constants for glob
+_LIT = 0
+_STAR = 1
+_PLUS = 2
+_QUEST = 3
+_RANGE = 4
+
+
+def parse_glob(pattern, _glob_multi="*", _glob_single="?",
+               _glob_range1="[", _glob_range2="]"):
+    pos = 0
+    last = None
+    while pos < len(pattern):
+        char = pattern[pos]
+        pos += 1
+        if char == _glob_multi:  # *
+            # (Ignore more than one star in a row)
+            if last is not _STAR:
+                yield _STAR, None
+                last = _STAR
+        elif char == _glob_single:  # ?
+            # (Ignore ? after a star)
+            if last is not _STAR:
+                yield _QUEST, None
+                last = _QUEST
+        elif char == _glob_range1:  # [
+            chars = set()
+            negate = False
+            # Take the char range specification until the ]
+            while pos < len(pattern):
+                char = pattern[pos]
+                pos += 1
+                if char == _glob_range2:
+                    break
+                chars.add(char)
+            if chars:
+                yield _RANGE, (chars, negate)
+                last = _RANGE
+        else:
+            yield _LIT, char
+            last = _LIT
+
+
+def glob_automaton(pattern):
+    nfa = NFA(0)
+    i = -1
+    for i, (op, arg) in enumerate(parse_glob(pattern)):
+        if op is _LIT:
+            nfa.add_transition(i, arg, i + 1)
+        elif op is _STAR:
+            nfa.add_transition(i, ANY, i + 1)
+            nfa.add_transition(i, EPSILON, i + 1)
+            nfa.add_transition(i + 1, EPSILON, i)
+        elif op is _QUEST:
+            nfa.add_transition(i, ANY, i + 1)
+        elif op is _RANGE:
+            for char in arg[0]:
+                nfa.add_transition(i, char, i + 1)
+    nfa.add_final_state(i + 1)
+    return nfa
--- a/venv/Lib/site-packages/whoosh/automata/lev.py
+++ b/venv/Lib/site-packages/whoosh/automata/lev.py
@@ -0,0 +1,30 @@
+from __future__ import print_function
+
+from whoosh.compat import unichr, xrange
+from whoosh.automata.fsa import ANY, EPSILON, NFA, unull
+
+
+def levenshtein_automaton(term, k, prefix=0):
+    nfa = NFA((0, 0))
+    if prefix:
+        for i in xrange(prefix):
+            c = term[i]
+            nfa.add_transition((i, 0), c, (i + 1, 0))
+
+    for i in xrange(prefix, len(term)):
+        c = term[i]
+        for e in xrange(k + 1):
+            # Correct character
+            nfa.add_transition((i, e), c, (i + 1, e))
+            if e < k:
+                # Deletion
+                nfa.add_transition((i, e), ANY, (i, e + 1))
+                # Insertion
+                nfa.add_transition((i, e), EPSILON, (i + 1, e + 1))
+                # Substitution
+                nfa.add_transition((i, e), ANY, (i + 1, e + 1))
+    for e in xrange(k + 1):
+        if e < k:
+            nfa.add_transition((len(term), e), ANY, (len(term), e + 1))
+        nfa.add_final_state((len(term), e))
+    return nfa
--- a/venv/Lib/site-packages/whoosh/automata/nfa.py
+++ b/venv/Lib/site-packages/whoosh/automata/nfa.py
@@ -0,0 +1,388 @@
+# Copyright 2012 Matt Chaput. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#    1. Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and documentation are
+# those of the authors and should not be interpreted as representing official
+# policies, either expressed or implied, of Matt Chaput.
+
+from whoosh.automata.fst import Arc
+
+
+class Instruction(object):
+    def __repr__(self):
+        return "%s()" % (self.__class__.__name__, )
+
+
+class Char(Instruction):
+    """
+    Matches a literal character.
+    """
+
+    def __init__(self, c):
+        self.c = c
+
+    def __repr__(self):
+        return "Char(%r)" % self.c
+
+class Lit(Instruction):
+    """
+    Matches a literal string.
+    """
+
+    def __init__(self, c):
+        self.c = c
+
+    def __repr__(self):
+        return "Lit(%r)" % self.c
+
+
+class Any(Instruction):
+    """
+    Matches any character.
+    """
+
+
+class Match(Instruction):
+    """
+    Stop this thread: the string matched.
+    """
+
+    def __repr__(self):
+        return "Match()"
+
+
+class Jmp(Instruction):
+    """
+    Jump to a specified instruction.
+    """
+
+    def __init__(self, x):
+        self.x = x
+
+    def __repr__(self):
+        return "Jmp(%s)" % self.x
+
+
+class Split(Instruction):
+    """
+    Split execution: continue at two separate specified instructions.
+    """
+
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def __repr__(self):
+        return "Split(%s, %s)" % (self.x, self.y)
+
+
+class Label(Instruction):
+    """
+    Placeholder to act as a target for JMP instructions
+    """
+
+    def __hash__(self):
+        return id(self)
+
+    def __repr__(self):
+        return "L(%s)" % hex(id(self))
+
+
+def concat(e1, e2):
+    return e1 + e2
+
+
+def alt(e1, e2):
+    L1, L2, L3 = Label(), Label(), Label()
+    return [L1] + e1 + [Jmp(L3), L2] + e2 + [L3]
+
+
+def zero_or_one(e):
+    L1, L2 = Label(), Label()
+    return [Split(L1, L2), L1] + e + [L2]
+
+
+def zero_or_more(e):
+    L1, L2, L3 = Label(), Label(), Label()
+    return [L1, Split(L2, L3), L2] + e + [Jmp(L1), L3]
+
+
+def one_or_more(e):
+    L1, L2 = Label(), Label()
+    return [L1] + e + [Split(L1, L2), L2]
+
+
+def fixup(program):
+    refs = {}
+    i = 0
+    while i < len(program):
+        op = program[i]
+        if isinstance(op, Label):
+            refs[op] = i
+            program.pop(i)
+        else:
+            i += 1
+
+    if refs:
+        for op in program:
+            if isinstance(op, (Jmp, Split)):
+                op.x = refs[op.x]
+            if isinstance(op, Split):
+                op.y = refs[op.y]
+
+    return program + [Match]
+
+
+class ThreadList(object):
+    def __init__(self, program, max=1000):
+        self.program = program
+        self.max = max
+        self.threads = []
+
+    def __nonzero__(self):
+        return bool(self.threads)
+
+    def current(self):
+        return self.threads.pop()
+
+    def add(self, thread):
+        op = self.program[thread.pc]
+        optype = type(op)
+        if optype is Jmp:
+            self.add(thread.at(op.x))
+        elif optype is Split:
+            self.add(thread.copy_at(op.x))
+            self.add(thread.at(op.y))
+        else:
+            self.threads.append(thread)
+
+
+class Thread(object):
+    def __init__(self, pc, address, sofar='', accept=False):
+        self.pc = pc
+        self.address = address
+        self.sofar = sofar
+        self.accept = accept
+
+    def at(self, pc):
+        self.pc = pc
+        return self
+
+    def copy_at(self, pc):
+        return Thread(pc, self.address, self.sofar, self.accept)
+
+    def __repr__(self):
+        d = self.__dict__
+        return "Thread(%s)" % ",".join("%s=%r" % (k, v) for k, v in d.items())
+
+
+def advance(thread, arc, c):
+    thread.pc += 1
+    thread.address = arc.target
+    thread.sofar += c
+    thread.accept = arc.accept
+
+
+def run(graph, program, address):
+    threads = ThreadList(program)
+    threads.add(Thread(0, address))
+    arc = Arc()
+    while threads:
+        thread = threads.current()
+        address = thread.address
+        op = program[thread.pc]
+        optype = type(op)
+
+        if optype is Char:
+            if address:
+                arc = graph.find_arc(address, op.c, arc)
+                if arc:
+                    advance(thread, arc)
+                    threads.add(thread)
+        elif optype is Lit:
+            if address:
+                c = op.c
+                arc = graph.find_path(c, arc, address)
+                if arc:
+                    advance(thread, arc, c)
+                    threads.add(thread)
+        elif optype is Any:
+            if address:
+                sofar = thread.sofar
+                pc = thread.pc + 1
+                for arc in graph.iter_arcs(address, arc):
+                    t = Thread(pc, arc.target, sofar + arc.label, arc.accept)
+                    threads.add(t)
+        elif op is Match:
+            if thread.accept:
+                yield thread.sofar
+        else:
+            raise Exception("Don't know what to do with %r" % op)
+
+
+LO = 0
+HI = 1
+
+
+def regex_limit(graph, mode, program, address):
+    low = mode == LO
+    output = []
+    threads = ThreadList(program)
+    threads.add(Thread(0, address))
+    arc = Arc()
+    while threads:
+        thread = threads.current()
+        address = thread.address
+        op = program[thread.pc]
+        optype = type(op)
+
+        if optype is Char:
+            if address:
+                arc = graph.find_arc(address, op.c, arc)
+                if arc:
+                    if low and arc.accept:
+                        return thread.sofar + thread.label
+                    advance(thread, arc)
+                    threads.add(thread)
+        elif optype is Lit:
+            if address:
+                labels = op.c
+                for label in labels:
+                    arc = graph.find_arc(address, label)
+                    if arc is None:
+                        return thread.sofar
+            elif thread.accept:
+                return thread.sofar
+        elif optype is Any:
+            if address:
+                if low:
+                    arc = graph.arc_at(address, arc)
+                else:
+                    for arc in graph.iter_arcs(address):
+                        pass
+                advance(thread, arc, arc.label)
+                threads.add(thread)
+            elif thread.accept:
+                return thread.sofar
+        elif op is Match:
+            return thread.sofar
+        else:
+            raise Exception("Don't know what to do with %r" % op)
+
+
+# if __name__ == "__main__":
+#     from whoosh import index, query
+#     from whoosh.filedb.filestore import RamStorage
+#     from whoosh.automata import fst
+#     from whoosh.util.testing import timing
+#
+#     st = RamStorage()
+#     gw = fst.GraphWriter(st.create_file("test"))
+#     gw.start_field("test")
+#     for key in ["aaaa", "aaab", "aabb", "abbb", "babb", "bbab", "bbba"]:
+#         gw.insert(key)
+#     gw.close()
+#     gr = fst.GraphReader(st.open_file("test"))
+#
+#     program = one_or_more([Lit("a")])
+#     print program
+#     program = fixup(program)
+#     print program
+#     print list(run(gr, program, gr.root("test")))
+#
+#     ix = index.open_dir("e:/dev/src/houdini/help/index")
+#     r = ix.reader()
+#     gr = r._get_graph()
+#
+# #    program = fixup([Any(), Any(), Any(), Any(), Any()])
+# #    program = fixup(concat(zero_or_more([Any()]), [Char("/")]))
+# #    with timing():
+# #        x = list(run(gr, program, gr.root("path")))
+# #    print len(x)
+#
+#     q = query.Regex("path", "^.[abc].*/$")
+#     with timing():
+#         y = list(q._btexts(r))
+#     print len(y)
+#     print y[0], y[-1]
+#
+#     pr = [Any()] + alt([Lit("c")], alt([Lit("b")], [Lit("a")])) + zero_or_more([Any()]) + [Lit("/")]
+#     program = fixup(pr)
+# #    with timing():
+# #        x = list(run(gr, program, gr.root("path")))
+# #    print len(x), x
+#
+#     with timing():
+#         print "lo=", regex_limit(gr, LO, program, gr.root("path"))
+#         print "hi=", regex_limit(gr, HI, program, gr.root("path"))
+#
+#
+#
+# #int
+# #backtrackingvm(Inst *prog, char *input)
+# #{
+# #    enum { MAXTHREAD = 1000 };
+# #    Thread ready[MAXTHREAD];
+# #    int nready;
+# #    Inst *pc;
+# #    char *sp;
+# #
+# #    /* queue initial thread */
+# #    ready[0] = thread(prog, input);
+# #    nready = 1;
+# #
+# #    /* run threads in stack order */
+# #    while(nready > 0){
+# #        --nready;  /* pop state for next thread to run */
+# #        pc = ready[nready].pc;
+# #        sp = ready[nready].sp;
+# #        for(;;){
+# #            switch(pc->opcode){
+# #            case Char:
+# #                if(*sp != pc->c)
+# #                    goto Dead;
+# #                pc++;
+# #                sp++;
+# #                continue;
+# #            case Match:
+# #                return 1;
+# #            case Jmp:
+# #                pc = pc->x;
+# #                continue;
+# #            case Split:
+# #                if(nready >= MAXTHREAD){
+# #                    fprintf(stderr, "regexp overflow");
+# #                    return -1;
+# #                }
+# #                /* queue new thread */
+# #                ready[nready++] = thread(pc->y, sp);
+# #                pc = pc->x;  /* continue current thread */
+# #                continue;
+# #            }
+# #        }
+# #    Dead:;
+# #    }
+# #    return 0;
+# #}
+#
+#
--- a/venv/Lib/site-packages/whoosh/automata/reg.py
+++ b/venv/Lib/site-packages/whoosh/automata/reg.py
@@ -0,0 +1,135 @@
+# Copyright 2014 Matt Chaput. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#    1. Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and documentation are
+# those of the authors and should not be interpreted as representing official
+# policies, either expressed or implied, of Matt Chaput.
+
+import re
+from whoosh.automata.fsa import ANY, EPSILON, NFA
+
+
+# Operator precedence
+CHOICE = ("|", )
+ops = ()
+
+
+def parse(pattern):
+    stack = []
+    ops = []
+
+
+
+
+class RegexBuilder(object):
+    def __init__(self):
+        self.statenum = 1
+
+    def new_state(self):
+        self.statenum += 1
+        return self.statenum
+
+    def epsilon(self):
+        s = self.new_state()
+        e = self.new_state()
+        nfa = NFA(s)
+        nfa.add_transition(s, EPSILON, e)
+        nfa.add_final_state(e)
+        return nfa
+
+    def char(self, label):
+        s = self.new_state()
+        e = self.new_state()
+        nfa = NFA(s)
+        nfa.add_transition(s, label, e)
+        nfa.add_final_state(e)
+        return nfa
+
+    def charset(self, chars):
+        s = self.new_state()
+        e = self.new_state()
+        nfa = NFA(s)
+        for char in chars:
+            nfa.add_transition(s, char, e)
+        nfa.add_final_state(e)
+        return e
+
+    def dot(self):
+        s = self.new_state()
+        e = self.new_state()
+        nfa = NFA(s)
+        nfa.add_transition(s, ANY, e)
+        nfa.add_final_state(e)
+        return nfa
+
+    def choice(self, n1, n2):
+        s = self.new_state()
+        s1 = self.new_state()
+        s2 = self.new_state()
+        e1 = self.new_state()
+        e2 = self.new_state()
+        e = self.new_state()
+        nfa = NFA(s)
+        nfa.add_transition(s, EPSILON, s1)
+        nfa.add_transition(s, EPSILON, s2)
+        nfa.insert(s1, n1, e1)
+        nfa.insert(s2, n2, e2)
+        nfa.add_transition(e1, EPSILON, e)
+        nfa.add_transition(e2, EPSILON, e)
+        nfa.add_final_state(e)
+        return nfa
+
+    def concat(self, n1, n2):
+        s = self.new_state()
+        m = self.new_state()
+        e = self.new_state()
+        nfa = NFA(s)
+        nfa.insert(s, n1, m)
+        nfa.insert(m, n2, e)
+        nfa.add_final_state(e)
+        return nfa
+
+    def star(self, n):
+        s = self.new_state()
+        m1 = self.new_state()
+        m2 = self.new_state()
+        e = self.new_state()
+        nfa = NFA(s)
+        nfa.add_transition(s, EPSILON, m1)
+        nfa.add_transition(s, EPSILON, e)
+        nfa.insert(m1, n, m2)
+        nfa.add_transition(m2, EPSILON, m1)
+        nfa.add_transition(m2, EPSILON, e)
+        nfa.add_final_state(e)
+        return nfa
+
+    def plus(self, n):
+        return self.concat(n, self.star(n))
+
+    def question(self, n):
+        return self.choice(n, self.epsilon())
+
+
+
+
+