2026-1-6
This commit is contained in:
90
venv/Lib/site-packages/whoosh/automata/glob.py
Normal file
90
venv/Lib/site-packages/whoosh/automata/glob.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# Copyright 2012 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from whoosh.automata.fsa import ANY, EPSILON, NFA
|
||||
|
||||
|
||||
# Constants for glob
|
||||
_LIT = 0
|
||||
_STAR = 1
|
||||
_PLUS = 2
|
||||
_QUEST = 3
|
||||
_RANGE = 4
|
||||
|
||||
|
||||
def parse_glob(pattern, _glob_multi="*", _glob_single="?",
|
||||
_glob_range1="[", _glob_range2="]"):
|
||||
pos = 0
|
||||
last = None
|
||||
while pos < len(pattern):
|
||||
char = pattern[pos]
|
||||
pos += 1
|
||||
if char == _glob_multi: # *
|
||||
# (Ignore more than one star in a row)
|
||||
if last is not _STAR:
|
||||
yield _STAR, None
|
||||
last = _STAR
|
||||
elif char == _glob_single: # ?
|
||||
# (Ignore ? after a star)
|
||||
if last is not _STAR:
|
||||
yield _QUEST, None
|
||||
last = _QUEST
|
||||
elif char == _glob_range1: # [
|
||||
chars = set()
|
||||
negate = False
|
||||
# Take the char range specification until the ]
|
||||
while pos < len(pattern):
|
||||
char = pattern[pos]
|
||||
pos += 1
|
||||
if char == _glob_range2:
|
||||
break
|
||||
chars.add(char)
|
||||
if chars:
|
||||
yield _RANGE, (chars, negate)
|
||||
last = _RANGE
|
||||
else:
|
||||
yield _LIT, char
|
||||
last = _LIT
|
||||
|
||||
|
||||
def glob_automaton(pattern):
|
||||
nfa = NFA(0)
|
||||
i = -1
|
||||
for i, (op, arg) in enumerate(parse_glob(pattern)):
|
||||
if op is _LIT:
|
||||
nfa.add_transition(i, arg, i + 1)
|
||||
elif op is _STAR:
|
||||
nfa.add_transition(i, ANY, i + 1)
|
||||
nfa.add_transition(i, EPSILON, i + 1)
|
||||
nfa.add_transition(i + 1, EPSILON, i)
|
||||
elif op is _QUEST:
|
||||
nfa.add_transition(i, ANY, i + 1)
|
||||
elif op is _RANGE:
|
||||
for char in arg[0]:
|
||||
nfa.add_transition(i, char, i + 1)
|
||||
nfa.add_final_state(i + 1)
|
||||
return nfa
|
||||
Reference in New Issue
Block a user