2026-1-6
This commit is contained in:
30
venv/Lib/site-packages/whoosh/qparser/__init__.py
Normal file
30
venv/Lib/site-packages/whoosh/qparser/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# Copyright 2010 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from whoosh.qparser.default import *
|
||||
from whoosh.qparser.plugins import *
|
||||
from whoosh.qparser.syntax import *
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
65
venv/Lib/site-packages/whoosh/qparser/common.py
Normal file
65
venv/Lib/site-packages/whoosh/qparser/common.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# Copyright 2010 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
"""
|
||||
This module contains common utility objects/functions for the other query
|
||||
parser modules.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
from whoosh.compat import string_type
|
||||
|
||||
|
||||
class QueryParserError(Exception):
|
||||
def __init__(self, cause, msg=None):
|
||||
super(QueryParserError, self).__init__(str(cause))
|
||||
self.cause = cause
|
||||
|
||||
|
||||
def get_single_text(field, text, **kwargs):
|
||||
"""Returns the first token from an analyzer's output.
|
||||
"""
|
||||
|
||||
for t in field.process_text(text, mode="query", **kwargs):
|
||||
return t
|
||||
|
||||
|
||||
def attach(q, stxnode):
|
||||
if q:
|
||||
try:
|
||||
q.startchar = stxnode.startchar
|
||||
q.endchar = stxnode.endchar
|
||||
except AttributeError:
|
||||
raise AttributeError("Can't set attribute on %s"
|
||||
% q.__class__.__name__)
|
||||
return q
|
||||
|
||||
|
||||
def print_debug(level, msg, out=sys.stderr):
|
||||
if level:
|
||||
out.write("%s%s\n" % (" " * (level - 1), msg))
|
||||
922
venv/Lib/site-packages/whoosh/qparser/dateparse.py
Normal file
922
venv/Lib/site-packages/whoosh/qparser/dateparse.py
Normal file
@@ -0,0 +1,922 @@
|
||||
# Copyright 2010 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from whoosh.compat import string_type, iteritems
|
||||
from whoosh.qparser import plugins, syntax
|
||||
from whoosh.qparser.taggers import Tagger
|
||||
from whoosh.support.relativedelta import relativedelta
|
||||
from whoosh.util.text import rcompile
|
||||
from whoosh.util.times import adatetime, timespan
|
||||
from whoosh.util.times import fill_in, is_void, relative_days
|
||||
from whoosh.util.times import TimeError
|
||||
|
||||
|
||||
class DateParseError(Exception):
|
||||
"Represents an error in parsing date text."
|
||||
|
||||
|
||||
# Utility functions
|
||||
|
||||
def print_debug(level, msg, *args):
|
||||
if level > 0:
|
||||
print((" " * (level - 1)) + (msg % args))
|
||||
|
||||
|
||||
# Parser element objects
|
||||
|
||||
class Props(object):
|
||||
"""A dumb little object that just puts copies a dictionary into attibutes
|
||||
so I can use dot syntax instead of square bracket string item lookup and
|
||||
save a little bit of typing. Used by :class:`Regex`.
|
||||
"""
|
||||
|
||||
def __init__(self, **args):
|
||||
self.__dict__ = args
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.__dict__)
|
||||
|
||||
def get(self, key, default=None):
|
||||
return self.__dict__.get(key, default)
|
||||
|
||||
|
||||
class ParserBase(object):
|
||||
"""Base class for date parser elements.
|
||||
"""
|
||||
|
||||
def to_parser(self, e):
|
||||
if isinstance(e, string_type):
|
||||
return Regex(e)
|
||||
else:
|
||||
return e
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
raise NotImplementedError
|
||||
|
||||
def date_from(self, text, dt=None, pos=0, debug=-9999):
|
||||
if dt is None:
|
||||
dt = datetime.now()
|
||||
|
||||
d, pos = self.parse(text, dt, pos, debug + 1)
|
||||
return d
|
||||
|
||||
|
||||
class MultiBase(ParserBase):
|
||||
"""Base class for date parser elements such as Sequence and Bag that
|
||||
have sub-elements.
|
||||
"""
|
||||
|
||||
def __init__(self, elements, name=None):
|
||||
"""
|
||||
:param elements: the sub-elements to match.
|
||||
:param name: a name for this element (for debugging purposes only).
|
||||
"""
|
||||
|
||||
self.elements = [self.to_parser(e) for e in elements]
|
||||
self.name = name
|
||||
|
||||
def __repr__(self):
|
||||
return "%s<%s>%r" % (self.__class__.__name__, self.name or '',
|
||||
self.elements)
|
||||
|
||||
|
||||
class Sequence(MultiBase):
|
||||
"""Merges the dates parsed by a sequence of sub-elements.
|
||||
"""
|
||||
|
||||
def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", name=None,
|
||||
progressive=False):
|
||||
"""
|
||||
:param elements: the sequence of sub-elements to parse.
|
||||
:param sep: a separator regular expression to match between elements,
|
||||
or None to not have separators.
|
||||
:param name: a name for this element (for debugging purposes only).
|
||||
:param progressive: if True, elements after the first do not need to
|
||||
match. That is, for elements (a, b, c) and progressive=True, the
|
||||
sequence matches like ``a[b[c]]``.
|
||||
"""
|
||||
|
||||
super(Sequence, self).__init__(elements, name)
|
||||
self.sep_pattern = sep
|
||||
if sep:
|
||||
self.sep_expr = rcompile(sep, re.IGNORECASE)
|
||||
else:
|
||||
self.sep_expr = None
|
||||
self.progressive = progressive
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
d = adatetime()
|
||||
first = True
|
||||
foundall = False
|
||||
failed = False
|
||||
|
||||
print_debug(debug, "Seq %s sep=%r text=%r", self.name,
|
||||
self.sep_pattern, text[pos:])
|
||||
for e in self.elements:
|
||||
print_debug(debug, "Seq %s text=%r", self.name, text[pos:])
|
||||
if self.sep_expr and not first:
|
||||
print_debug(debug, "Seq %s looking for sep", self.name)
|
||||
m = self.sep_expr.match(text, pos)
|
||||
if m:
|
||||
pos = m.end()
|
||||
else:
|
||||
print_debug(debug, "Seq %s didn't find sep", self.name)
|
||||
break
|
||||
|
||||
print_debug(debug, "Seq %s trying=%r at=%s", self.name, e, pos)
|
||||
|
||||
try:
|
||||
at, newpos = e.parse(text, dt, pos=pos, debug=debug + 1)
|
||||
except TimeError:
|
||||
failed = True
|
||||
break
|
||||
|
||||
print_debug(debug, "Seq %s result=%r", self.name, at)
|
||||
if not at:
|
||||
break
|
||||
pos = newpos
|
||||
|
||||
print_debug(debug, "Seq %s adding=%r to=%r", self.name, at, d)
|
||||
try:
|
||||
d = fill_in(d, at)
|
||||
except TimeError:
|
||||
print_debug(debug, "Seq %s Error in fill_in", self.name)
|
||||
failed = True
|
||||
break
|
||||
print_debug(debug, "Seq %s filled date=%r", self.name, d)
|
||||
|
||||
first = False
|
||||
else:
|
||||
foundall = True
|
||||
|
||||
if not failed and (foundall or (not first and self.progressive)):
|
||||
print_debug(debug, "Seq %s final=%r", self.name, d)
|
||||
return (d, pos)
|
||||
else:
|
||||
print_debug(debug, "Seq %s failed", self.name)
|
||||
return (None, None)
|
||||
|
||||
|
||||
class Combo(Sequence):
|
||||
"""Parses a sequence of elements in order and combines the dates parsed
|
||||
by the sub-elements somehow. The default behavior is to accept two dates
|
||||
from the sub-elements and turn them into a range.
|
||||
"""
|
||||
|
||||
def __init__(self, elements, fn=None, sep="(\\s+|\\s*,\\s*)", min=2, max=2,
|
||||
name=None):
|
||||
"""
|
||||
:param elements: the sequence of sub-elements to parse.
|
||||
:param fn: a function to run on all dates found. It should return a
|
||||
datetime, adatetime, or timespan object. If this argument is None,
|
||||
the default behavior accepts two dates and returns a timespan.
|
||||
:param sep: a separator regular expression to match between elements,
|
||||
or None to not have separators.
|
||||
:param min: the minimum number of dates required from the sub-elements.
|
||||
:param max: the maximum number of dates allowed from the sub-elements.
|
||||
:param name: a name for this element (for debugging purposes only).
|
||||
"""
|
||||
|
||||
super(Combo, self).__init__(elements, sep=sep, name=name)
|
||||
self.fn = fn
|
||||
self.min = min
|
||||
self.max = max
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
dates = []
|
||||
first = True
|
||||
|
||||
print_debug(debug, "Combo %s sep=%r text=%r", self.name,
|
||||
self.sep_pattern, text[pos:])
|
||||
for e in self.elements:
|
||||
if self.sep_expr and not first:
|
||||
print_debug(debug, "Combo %s looking for sep at %r",
|
||||
self.name, text[pos:])
|
||||
m = self.sep_expr.match(text, pos)
|
||||
if m:
|
||||
pos = m.end()
|
||||
else:
|
||||
print_debug(debug, "Combo %s didn't find sep", self.name)
|
||||
return (None, None)
|
||||
|
||||
print_debug(debug, "Combo %s trying=%r", self.name, e)
|
||||
try:
|
||||
at, pos = e.parse(text, dt, pos, debug + 1)
|
||||
except TimeError:
|
||||
at, pos = None, None
|
||||
|
||||
print_debug(debug, "Combo %s result=%r", self.name, at)
|
||||
if at is None:
|
||||
return (None, None)
|
||||
|
||||
first = False
|
||||
if is_void(at):
|
||||
continue
|
||||
if len(dates) == self.max:
|
||||
print_debug(debug, "Combo %s length > %s", self.name, self.max)
|
||||
return (None, None)
|
||||
dates.append(at)
|
||||
|
||||
print_debug(debug, "Combo %s dates=%r", self.name, dates)
|
||||
if len(dates) < self.min:
|
||||
print_debug(debug, "Combo %s length < %s", self.name, self.min)
|
||||
return (None, None)
|
||||
|
||||
return (self.dates_to_timespan(dates), pos)
|
||||
|
||||
def dates_to_timespan(self, dates):
|
||||
if self.fn:
|
||||
return self.fn(dates)
|
||||
elif len(dates) == 2:
|
||||
return timespan(dates[0], dates[1])
|
||||
else:
|
||||
raise DateParseError("Don't know what to do with %r" % (dates,))
|
||||
|
||||
|
||||
class Choice(MultiBase):
|
||||
"""Returns the date from the first of its sub-elements that matches.
|
||||
"""
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
print_debug(debug, "Choice %s text=%r", self.name, text[pos:])
|
||||
for e in self.elements:
|
||||
print_debug(debug, "Choice %s trying=%r", self.name, e)
|
||||
|
||||
try:
|
||||
d, newpos = e.parse(text, dt, pos, debug + 1)
|
||||
except TimeError:
|
||||
d, newpos = None, None
|
||||
if d:
|
||||
print_debug(debug, "Choice %s matched", self.name)
|
||||
return (d, newpos)
|
||||
print_debug(debug, "Choice %s no match", self.name)
|
||||
return (None, None)
|
||||
|
||||
|
||||
class Bag(MultiBase):
|
||||
"""Parses its sub-elements in any order and merges the dates.
|
||||
"""
|
||||
|
||||
def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", onceper=True,
|
||||
requireall=False, allof=None, anyof=None, name=None):
|
||||
"""
|
||||
:param elements: the sub-elements to parse.
|
||||
:param sep: a separator regular expression to match between elements,
|
||||
or None to not have separators.
|
||||
:param onceper: only allow each element to match once.
|
||||
:param requireall: if True, the sub-elements can match in any order,
|
||||
but they must all match.
|
||||
:param allof: a list of indexes into the list of elements. When this
|
||||
argument is not None, this element matches only if all the
|
||||
indicated sub-elements match.
|
||||
:param allof: a list of indexes into the list of elements. When this
|
||||
argument is not None, this element matches only if any of the
|
||||
indicated sub-elements match.
|
||||
:param name: a name for this element (for debugging purposes only).
|
||||
"""
|
||||
|
||||
super(Bag, self).__init__(elements, name)
|
||||
self.sep_expr = rcompile(sep, re.IGNORECASE)
|
||||
self.onceper = onceper
|
||||
self.requireall = requireall
|
||||
self.allof = allof
|
||||
self.anyof = anyof
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
first = True
|
||||
d = adatetime()
|
||||
seen = [False] * len(self.elements)
|
||||
|
||||
while True:
|
||||
newpos = pos
|
||||
print_debug(debug, "Bag %s text=%r", self.name, text[pos:])
|
||||
if not first:
|
||||
print_debug(debug, "Bag %s looking for sep", self.name)
|
||||
m = self.sep_expr.match(text, pos)
|
||||
if m:
|
||||
newpos = m.end()
|
||||
else:
|
||||
print_debug(debug, "Bag %s didn't find sep", self.name)
|
||||
break
|
||||
|
||||
for i, e in enumerate(self.elements):
|
||||
print_debug(debug, "Bag %s trying=%r", self.name, e)
|
||||
|
||||
try:
|
||||
at, xpos = e.parse(text, dt, newpos, debug + 1)
|
||||
except TimeError:
|
||||
at, xpos = None, None
|
||||
|
||||
print_debug(debug, "Bag %s result=%r", self.name, at)
|
||||
if at:
|
||||
if self.onceper and seen[i]:
|
||||
return (None, None)
|
||||
|
||||
d = fill_in(d, at)
|
||||
newpos = xpos
|
||||
seen[i] = True
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
pos = newpos
|
||||
if self.onceper and all(seen):
|
||||
break
|
||||
|
||||
first = False
|
||||
|
||||
if (not any(seen)
|
||||
or (self.allof and not all(seen[pos] for pos in self.allof))
|
||||
or (self.anyof and not any(seen[pos] for pos in self.anyof))
|
||||
or (self.requireall and not all(seen))):
|
||||
return (None, None)
|
||||
|
||||
print_debug(debug, "Bag %s final=%r", self.name, d)
|
||||
return (d, pos)
|
||||
|
||||
|
||||
class Optional(ParserBase):
|
||||
"""Wraps a sub-element to indicate that the sub-element is optional.
|
||||
"""
|
||||
|
||||
def __init__(self, element):
|
||||
self.element = self.to_parser(element)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r)" % (self.__class__.__name__, self.element)
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
try:
|
||||
d, pos = self.element.parse(text, dt, pos, debug + 1)
|
||||
except TimeError:
|
||||
d, pos = None, None
|
||||
|
||||
if d:
|
||||
return (d, pos)
|
||||
else:
|
||||
return (adatetime(), pos)
|
||||
|
||||
|
||||
class ToEnd(ParserBase):
|
||||
"""Wraps a sub-element and requires that the end of the sub-element's match
|
||||
be the end of the text.
|
||||
"""
|
||||
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r)" % (self.__class__.__name__, self.element)
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
try:
|
||||
d, pos = self.element.parse(text, dt, pos, debug + 1)
|
||||
except TimeError:
|
||||
d, pos = None, None
|
||||
|
||||
if d and pos == len(text):
|
||||
return (d, pos)
|
||||
else:
|
||||
return (None, None)
|
||||
|
||||
|
||||
class Regex(ParserBase):
|
||||
"""Matches a regular expression and maps named groups in the pattern to
|
||||
datetime attributes using a function or overridden method.
|
||||
|
||||
There are two points at which you can customize the behavior of this class,
|
||||
either by supplying functions to the initializer or overriding methods.
|
||||
|
||||
* The ``modify`` function or ``modify_props`` method takes a ``Props``
|
||||
object containing the named groups and modifies its values (in place).
|
||||
* The ``fn`` function or ``props_to_date`` method takes a ``Props`` object
|
||||
and the base datetime and returns an adatetime/datetime.
|
||||
"""
|
||||
|
||||
fn = None
|
||||
modify = None
|
||||
|
||||
def __init__(self, pattern, fn=None, modify=None):
|
||||
self.pattern = pattern
|
||||
self.expr = rcompile(pattern, re.IGNORECASE)
|
||||
self.fn = fn
|
||||
self.modify = modify
|
||||
|
||||
def __repr__(self):
|
||||
return "<%r>" % (self.pattern,)
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
m = self.expr.match(text, pos)
|
||||
if not m:
|
||||
return (None, None)
|
||||
|
||||
props = self.extract(m)
|
||||
self.modify_props(props)
|
||||
|
||||
try:
|
||||
d = self.props_to_date(props, dt)
|
||||
except TimeError:
|
||||
d = None
|
||||
|
||||
if d:
|
||||
return (d, m.end())
|
||||
else:
|
||||
return (None, None)
|
||||
|
||||
def extract(self, match):
|
||||
d = match.groupdict()
|
||||
for key, value in iteritems(d):
|
||||
try:
|
||||
value = int(value)
|
||||
d[key] = value
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return Props(**d)
|
||||
|
||||
def modify_props(self, props):
|
||||
if self.modify:
|
||||
self.modify(props)
|
||||
|
||||
def props_to_date(self, props, dt):
|
||||
if self.fn:
|
||||
return self.fn(props, dt)
|
||||
else:
|
||||
args = {}
|
||||
for key in adatetime.units:
|
||||
args[key] = props.get(key)
|
||||
return adatetime(**args)
|
||||
|
||||
|
||||
class Month(Regex):
|
||||
def __init__(self, *patterns):
|
||||
self.patterns = patterns
|
||||
self.exprs = [rcompile(pat, re.IGNORECASE) for pat in self.patterns]
|
||||
|
||||
self.pattern = ("(?P<month>"
|
||||
+ "|".join("(%s)" % pat for pat in self.patterns)
|
||||
+ ")")
|
||||
self.expr = rcompile(self.pattern, re.IGNORECASE)
|
||||
|
||||
def modify_props(self, p):
|
||||
text = p.month
|
||||
for i, expr in enumerate(self.exprs):
|
||||
m = expr.match(text)
|
||||
if m:
|
||||
p.month = i + 1
|
||||
break
|
||||
|
||||
|
||||
class PlusMinus(Regex):
|
||||
def __init__(self, years, months, weeks, days, hours, minutes, seconds):
|
||||
rel_years = "((?P<years>[0-9]+) *(%s))?" % years
|
||||
rel_months = "((?P<months>[0-9]+) *(%s))?" % months
|
||||
rel_weeks = "((?P<weeks>[0-9]+) *(%s))?" % weeks
|
||||
rel_days = "((?P<days>[0-9]+) *(%s))?" % days
|
||||
rel_hours = "((?P<hours>[0-9]+) *(%s))?" % hours
|
||||
rel_mins = "((?P<mins>[0-9]+) *(%s))?" % minutes
|
||||
rel_secs = "((?P<secs>[0-9]+) *(%s))?" % seconds
|
||||
|
||||
self.pattern = ("(?P<dir>[+-]) *%s *%s *%s *%s *%s *%s *%s(?=(\\W|$))"
|
||||
% (rel_years, rel_months, rel_weeks, rel_days,
|
||||
rel_hours, rel_mins, rel_secs))
|
||||
self.expr = rcompile(self.pattern, re.IGNORECASE)
|
||||
|
||||
def props_to_date(self, p, dt):
|
||||
if p.dir == "-":
|
||||
dir = -1
|
||||
else:
|
||||
dir = 1
|
||||
|
||||
delta = relativedelta(years=(p.get("years") or 0) * dir,
|
||||
months=(p.get("months") or 0) * dir,
|
||||
weeks=(p.get("weeks") or 0) * dir,
|
||||
days=(p.get("days") or 0) * dir,
|
||||
hours=(p.get("hours") or 0) * dir,
|
||||
minutes=(p.get("mins") or 0) * dir,
|
||||
seconds=(p.get("secs") or 0) * dir)
|
||||
return dt + delta
|
||||
|
||||
|
||||
class Daynames(Regex):
|
||||
def __init__(self, next, last, daynames):
|
||||
self.next_pattern = next
|
||||
self.last_pattern = last
|
||||
self._dayname_exprs = tuple(rcompile(pat, re.IGNORECASE)
|
||||
for pat in daynames)
|
||||
dn_pattern = "|".join(daynames)
|
||||
self.pattern = ("(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))"
|
||||
% (next, last, dn_pattern))
|
||||
self.expr = rcompile(self.pattern, re.IGNORECASE)
|
||||
|
||||
def props_to_date(self, p, dt):
|
||||
if re.match(p.dir, self.last_pattern):
|
||||
dir = -1
|
||||
else:
|
||||
dir = 1
|
||||
|
||||
for daynum, expr in enumerate(self._dayname_exprs):
|
||||
m = expr.match(p.day)
|
||||
if m:
|
||||
break
|
||||
current_daynum = dt.weekday()
|
||||
days_delta = relative_days(current_daynum, daynum, dir)
|
||||
|
||||
d = dt.date() + timedelta(days=days_delta)
|
||||
return adatetime(year=d.year, month=d.month, day=d.day)
|
||||
|
||||
|
||||
class Time12(Regex):
|
||||
def __init__(self):
|
||||
self.pattern = ("(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])"
|
||||
"(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?"
|
||||
"\\s*(?P<ampm>am|pm)(?=(\\W|$))")
|
||||
self.expr = rcompile(self.pattern, re.IGNORECASE)
|
||||
|
||||
def props_to_date(self, p, dt):
|
||||
isam = p.ampm.lower().startswith("a")
|
||||
|
||||
if p.hour == 12:
|
||||
if isam:
|
||||
hr = 0
|
||||
else:
|
||||
hr = 12
|
||||
else:
|
||||
hr = p.hour
|
||||
if not isam:
|
||||
hr += 12
|
||||
|
||||
return adatetime(hour=hr, minute=p.mins, second=p.secs, microsecond=p.usecs)
|
||||
|
||||
|
||||
# Top-level parser classes
|
||||
|
||||
class DateParser(object):
|
||||
"""Base class for locale-specific parser classes.
|
||||
"""
|
||||
|
||||
day = Regex("(?P<day>([123][0-9])|[1-9])(?=(\\W|$))(?!=:)",
|
||||
lambda p, dt: adatetime(day=p.day))
|
||||
year = Regex("(?P<year>[0-9]{4})(?=(\\W|$))",
|
||||
lambda p, dt: adatetime(year=p.year))
|
||||
time24 = Regex("(?P<hour>([0-1][0-9])|(2[0-3])):(?P<mins>[0-5][0-9])"
|
||||
"(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?"
|
||||
"(?=(\\W|$))",
|
||||
lambda p, dt: adatetime(hour=p.hour, minute=p.mins,
|
||||
second=p.secs, microsecond=p.usecs))
|
||||
time12 = Time12()
|
||||
|
||||
def __init__(self):
|
||||
simple_year = "(?P<year>[0-9]{4})"
|
||||
simple_month = "(?P<month>[0-1][0-9])"
|
||||
simple_day = "(?P<day>[0-3][0-9])"
|
||||
simple_hour = "(?P<hour>([0-1][0-9])|(2[0-3]))"
|
||||
simple_minute = "(?P<minute>[0-5][0-9])"
|
||||
simple_second = "(?P<second>[0-5][0-9])"
|
||||
simple_usec = "(?P<microsecond>[0-9]{6})"
|
||||
|
||||
tup = (simple_year, simple_month, simple_day, simple_hour,
|
||||
simple_minute, simple_second, simple_usec)
|
||||
simple_seq = Sequence(tup, sep="[- .:/]*", name="simple",
|
||||
progressive=True)
|
||||
self.simple = Sequence((simple_seq, "(?=(\\s|$))"), sep='')
|
||||
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
raise NotImplementedError
|
||||
|
||||
#
|
||||
|
||||
def get_parser(self):
|
||||
return self.all
|
||||
|
||||
def parse(self, text, dt, pos=0, debug=-9999):
|
||||
parser = self.get_parser()
|
||||
|
||||
d, newpos = parser.parse(text, dt, pos=pos, debug=debug)
|
||||
if isinstance(d, (adatetime, timespan)):
|
||||
d = d.disambiguated(dt)
|
||||
|
||||
return (d, newpos)
|
||||
|
||||
def date_from(self, text, basedate=None, pos=0, debug=-9999, toend=True):
|
||||
if basedate is None:
|
||||
basedate = datetime.utcnow()
|
||||
|
||||
parser = self.get_parser()
|
||||
if toend:
|
||||
parser = ToEnd(parser)
|
||||
|
||||
d = parser.date_from(text, basedate, pos=pos, debug=debug)
|
||||
if isinstance(d, (adatetime, timespan)):
|
||||
d = d.disambiguated(basedate)
|
||||
return d
|
||||
|
||||
|
||||
class English(DateParser):
|
||||
day = Regex("(?P<day>([123][0-9])|[1-9])(st|nd|rd|th)?(?=(\\W|$))",
|
||||
lambda p, dt: adatetime(day=p.day))
|
||||
|
||||
def setup(self):
|
||||
self.plusdate = PlusMinus("years|year|yrs|yr|ys|y",
|
||||
"months|month|mons|mon|mos|mo",
|
||||
"weeks|week|wks|wk|ws|w",
|
||||
"days|day|dys|dy|ds|d",
|
||||
"hours|hour|hrs|hr|hs|h",
|
||||
"minutes|minute|mins|min|ms|m",
|
||||
"seconds|second|secs|sec|s")
|
||||
|
||||
self.dayname = Daynames("next", "last",
|
||||
("monday|mon|mo", "tuesday|tues|tue|tu",
|
||||
"wednesday|wed|we", "thursday|thur|thu|th",
|
||||
"friday|fri|fr", "saturday|sat|sa",
|
||||
"sunday|sun|su"))
|
||||
|
||||
midnight_l = lambda p, dt: adatetime(hour=0, minute=0, second=0,
|
||||
microsecond=0)
|
||||
midnight = Regex("midnight", midnight_l)
|
||||
|
||||
noon_l = lambda p, dt: adatetime(hour=12, minute=0, second=0,
|
||||
microsecond=0)
|
||||
noon = Regex("noon", noon_l)
|
||||
|
||||
now = Regex("now", lambda p, dt: dt)
|
||||
|
||||
self.time = Choice((self.time12, self.time24, midnight, noon, now),
|
||||
name="time")
|
||||
|
||||
def tomorrow_to_date(p, dt):
|
||||
d = dt.date() + timedelta(days=+1)
|
||||
return adatetime(year=d.year, month=d.month, day=d.day)
|
||||
tomorrow = Regex("tomorrow", tomorrow_to_date)
|
||||
|
||||
def yesterday_to_date(p, dt):
|
||||
d = dt.date() + timedelta(days=-1)
|
||||
return adatetime(year=d.year, month=d.month, day=d.day)
|
||||
yesterday = Regex("yesterday", yesterday_to_date)
|
||||
|
||||
thisyear = Regex("this year", lambda p, dt: adatetime(year=dt.year))
|
||||
thismonth = Regex("this month",
|
||||
lambda p, dt: adatetime(year=dt.year,
|
||||
month=dt.month))
|
||||
today = Regex("today",
|
||||
lambda p, dt: adatetime(year=dt.year, month=dt.month,
|
||||
day=dt.day))
|
||||
|
||||
self.month = Month("january|jan", "february|febuary|feb", "march|mar",
|
||||
"april|apr", "may", "june|jun", "july|jul",
|
||||
"august|aug", "september|sept|sep", "october|oct",
|
||||
"november|nov", "december|dec")
|
||||
|
||||
# If you specify a day number you must also specify a month... this
|
||||
# Choice captures that constraint
|
||||
|
||||
self.dmy = Choice((Sequence((self.day, self.month, self.year),
|
||||
name="dmy"),
|
||||
Sequence((self.month, self.day, self.year),
|
||||
name="mdy"),
|
||||
Sequence((self.year, self.month, self.day),
|
||||
name="ymd"),
|
||||
Sequence((self.year, self.day, self.month),
|
||||
name="ydm"),
|
||||
Sequence((self.day, self.month), name="dm"),
|
||||
Sequence((self.month, self.day), name="md"),
|
||||
Sequence((self.month, self.year), name="my"),
|
||||
self.month, self.year, self.dayname, tomorrow,
|
||||
yesterday, thisyear, thismonth, today, now,
|
||||
), name="date")
|
||||
|
||||
self.datetime = Bag((self.time, self.dmy), name="datetime")
|
||||
self.bundle = Choice((self.plusdate, self.datetime, self.simple),
|
||||
name="bundle")
|
||||
self.torange = Combo((self.bundle, "to", self.bundle), name="torange")
|
||||
|
||||
self.all = Choice((self.torange, self.bundle), name="all")
|
||||
|
||||
|
||||
# QueryParser plugin
|
||||
|
||||
class DateParserPlugin(plugins.Plugin):
|
||||
"""Adds more powerful parsing of DATETIME fields.
|
||||
|
||||
>>> parser.add_plugin(DateParserPlugin())
|
||||
>>> parser.parse(u"date:'last tuesday'")
|
||||
"""
|
||||
|
||||
def __init__(self, basedate=None, dateparser=None, callback=None,
|
||||
free=False, free_expr="([A-Za-z][A-Za-z_0-9]*):([^^]+)"):
|
||||
"""
|
||||
:param basedate: a datetime object representing the current time
|
||||
against which to measure relative dates. If you do not supply this
|
||||
argument, the plugin uses ``datetime.utcnow()``.
|
||||
:param dateparser: an instance of
|
||||
:class:`whoosh.qparser.dateparse.DateParser`. If you do not supply
|
||||
this argument, the plugin automatically uses
|
||||
:class:`whoosh.qparser.dateparse.English`.
|
||||
:param callback: a callback function for parsing errors. This allows
|
||||
you to provide feedback to the user about problems parsing dates.
|
||||
:param remove: if True, unparseable dates are removed from the token
|
||||
stream instead of being replaced with ErrorToken.
|
||||
:param free: if True, this plugin will install a filter early in the
|
||||
parsing process and try to find undelimited dates such as
|
||||
``date:last tuesday``. Note that allowing this could result in
|
||||
normal query words accidentally being parsed as dates sometimes.
|
||||
"""
|
||||
|
||||
self.basedate = basedate
|
||||
if dateparser is None:
|
||||
dateparser = English()
|
||||
self.dateparser = dateparser
|
||||
self.callback = callback
|
||||
self.free = free
|
||||
self.freeexpr = free_expr
|
||||
|
||||
def taggers(self, parser):
|
||||
if self.free:
|
||||
# If we're tokenizing, we have to go before the FieldsPlugin
|
||||
return [(DateTagger(self, self.freeexpr), -1)]
|
||||
else:
|
||||
return ()
|
||||
|
||||
def filters(self, parser):
|
||||
# Run the filter after the FieldsPlugin assigns field names
|
||||
return [(self.do_dates, 110)]
|
||||
|
||||
def errorize(self, message, node):
|
||||
if self.callback:
|
||||
self.callback(message)
|
||||
return syntax.ErrorNode(message, node)
|
||||
|
||||
def text_to_dt(self, node):
|
||||
text = node.text
|
||||
try:
|
||||
dt = self.dateparser.date_from(text, self.basedate)
|
||||
if dt is None:
|
||||
return self.errorize(text, node)
|
||||
else:
|
||||
n = DateTimeNode(node.fieldname, dt, node.boost)
|
||||
except DateParseError:
|
||||
e = sys.exc_info()[1]
|
||||
n = self.errorize(e, node)
|
||||
n.startchar = node.startchar
|
||||
n.endchar = node.endchar
|
||||
return n
|
||||
|
||||
def range_to_dt(self, node):
|
||||
start = end = None
|
||||
dp = self.dateparser.get_parser()
|
||||
|
||||
if node.start:
|
||||
start = dp.date_from(node.start, self.basedate)
|
||||
if start is None:
|
||||
return self.errorize(node.start, node)
|
||||
if node.end:
|
||||
end = dp.date_from(node.end, self.basedate)
|
||||
if end is None:
|
||||
return self.errorize(node.end, node)
|
||||
|
||||
if start and end:
|
||||
ts = timespan(start, end).disambiguated(self.basedate)
|
||||
start, end = ts.start, ts.end
|
||||
elif start:
|
||||
start = start.disambiguated(self.basedate)
|
||||
if isinstance(start, timespan):
|
||||
start = start.start
|
||||
elif end:
|
||||
end = end.disambiguated(self.basedate)
|
||||
if isinstance(end, timespan):
|
||||
end = end.end
|
||||
drn = DateRangeNode(node.fieldname, start, end, boost=node.boost)
|
||||
drn.startchar = node.startchar
|
||||
drn.endchar = node.endchar
|
||||
return drn
|
||||
|
||||
def do_dates(self, parser, group):
|
||||
schema = parser.schema
|
||||
if not schema:
|
||||
return group
|
||||
|
||||
from whoosh.fields import DATETIME
|
||||
datefields = frozenset(fieldname for fieldname, field
|
||||
in parser.schema.items()
|
||||
if isinstance(field, DATETIME))
|
||||
|
||||
for i, node in enumerate(group):
|
||||
if node.has_fieldname:
|
||||
fname = node.fieldname or parser.fieldname
|
||||
else:
|
||||
fname = None
|
||||
|
||||
if isinstance(node, syntax.GroupNode):
|
||||
group[i] = self.do_dates(parser, node)
|
||||
elif fname in datefields:
|
||||
if node.has_text:
|
||||
group[i] = self.text_to_dt(node)
|
||||
elif isinstance(node, syntax.RangeNode):
|
||||
group[i] = self.range_to_dt(node)
|
||||
return group
|
||||
|
||||
|
||||
class DateTimeNode(syntax.SyntaxNode):
|
||||
has_fieldname = True
|
||||
has_boost = True
|
||||
|
||||
def __init__(self, fieldname, dt, boost=1.0):
|
||||
self.fieldname = fieldname
|
||||
self.dt = dt
|
||||
self.boost = 1.0
|
||||
|
||||
def r(self):
|
||||
return repr(self.dt)
|
||||
|
||||
def query(self, parser):
|
||||
from whoosh import query
|
||||
|
||||
fieldname = self.fieldname or parser.fieldname
|
||||
field = parser.schema[fieldname]
|
||||
dt = self.dt
|
||||
if isinstance(self.dt, datetime):
|
||||
btext = field.to_bytes(dt)
|
||||
return query.Term(fieldname, btext, boost=self.boost)
|
||||
elif isinstance(self.dt, timespan):
|
||||
return query.DateRange(fieldname, dt.start, dt.end,
|
||||
boost=self.boost)
|
||||
else:
|
||||
raise Exception("Unknown time object: %r" % dt)
|
||||
|
||||
|
||||
class DateRangeNode(syntax.SyntaxNode):
|
||||
has_fieldname = True
|
||||
has_boost = True
|
||||
|
||||
def __init__(self, fieldname, start, end, boost=1.0):
|
||||
self.fieldname = fieldname
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.boost = 1.0
|
||||
|
||||
def r(self):
|
||||
return "%r-%r" % (self.start, self.end)
|
||||
|
||||
def query(self, parser):
|
||||
from whoosh import query
|
||||
|
||||
fieldname = self.fieldname or parser.fieldname
|
||||
return query.DateRange(fieldname, self.start, self.end,
|
||||
boost=self.boost)
|
||||
|
||||
|
||||
class DateTagger(Tagger):
|
||||
def __init__(self, plugin, expr):
|
||||
self.plugin = plugin
|
||||
self.expr = rcompile(expr, re.IGNORECASE)
|
||||
|
||||
def match(self, parser, text, pos):
|
||||
from whoosh.fields import DATETIME
|
||||
|
||||
match = self.expr.match(text, pos)
|
||||
if match:
|
||||
fieldname = match.group(1)
|
||||
dtext = match.group(2)
|
||||
|
||||
if parser.schema and fieldname in parser.schema:
|
||||
field = parser.schema[fieldname]
|
||||
if isinstance(field, DATETIME):
|
||||
plugin = self.plugin
|
||||
dateparser = plugin.dateparser
|
||||
basedate = plugin.basedate
|
||||
|
||||
d, newpos = dateparser.parse(dtext, basedate)
|
||||
if d:
|
||||
node = DateTimeNode(fieldname, d)
|
||||
node.startchar = match.start()
|
||||
node.endchar = newpos + match.start(2)
|
||||
return node
|
||||
439
venv/Lib/site-packages/whoosh/qparser/default.py
Normal file
439
venv/Lib/site-packages/whoosh/qparser/default.py
Normal file
@@ -0,0 +1,439 @@
|
||||
# Copyright 2011 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import sys
|
||||
|
||||
from whoosh import query
|
||||
from whoosh.compat import text_type
|
||||
from whoosh.qparser import syntax
|
||||
from whoosh.qparser.common import print_debug, QueryParserError
|
||||
|
||||
|
||||
# Query parser object
|
||||
|
||||
class QueryParser(object):
|
||||
"""A hand-written query parser built on modular plug-ins. The default
|
||||
configuration implements a powerful fielded query language similar to
|
||||
Lucene's.
|
||||
|
||||
You can use the ``plugins`` argument when creating the object to override
|
||||
the default list of plug-ins, and/or use ``add_plugin()`` and/or
|
||||
``remove_plugin_class()`` to change the plug-ins included in the parser.
|
||||
|
||||
>>> from whoosh import qparser
|
||||
>>> parser = qparser.QueryParser("content", schema)
|
||||
>>> parser.remove_plugin_class(qparser.WildcardPlugin)
|
||||
>>> parser.add_plugin(qparser.PrefixPlugin())
|
||||
>>> parser.parse(u"hello there")
|
||||
And([Term("content", u"hello"), Term("content", u"there")])
|
||||
"""
|
||||
|
||||
def __init__(self, fieldname, schema, plugins=None, termclass=query.Term,
|
||||
phraseclass=query.Phrase, group=syntax.AndGroup):
|
||||
"""
|
||||
:param fieldname: the default field -- the parser uses this as the
|
||||
field for any terms without an explicit field.
|
||||
:param schema: a :class:`whoosh.fields.Schema` object to use when
|
||||
parsing. The appropriate fields in the schema will be used to
|
||||
tokenize terms/phrases before they are turned into query objects.
|
||||
You can specify None for the schema to create a parser that does
|
||||
not analyze the text of the query, usually for testing purposes.
|
||||
:param plugins: a list of plugins to use. WhitespacePlugin is
|
||||
automatically included, do not put it in this list. This overrides
|
||||
the default list of plugins. Classes in the list will be
|
||||
automatically instantiated.
|
||||
:param termclass: the query class to use for individual search terms.
|
||||
The default is :class:`whoosh.query.Term`.
|
||||
:param phraseclass: the query class to use for phrases. The default
|
||||
is :class:`whoosh.query.Phrase`.
|
||||
:param group: the default grouping. ``AndGroup`` makes terms required
|
||||
by default. ``OrGroup`` makes terms optional by default.
|
||||
"""
|
||||
|
||||
self.fieldname = fieldname
|
||||
self.schema = schema
|
||||
self.termclass = termclass
|
||||
self.phraseclass = phraseclass
|
||||
self.group = group
|
||||
self.plugins = []
|
||||
|
||||
if plugins is None:
|
||||
plugins = self.default_set()
|
||||
self._add_ws_plugin()
|
||||
self.add_plugins(plugins)
|
||||
|
||||
def default_set(self):
|
||||
"""Returns the default list of plugins to use.
|
||||
"""
|
||||
|
||||
from whoosh.qparser import plugins
|
||||
|
||||
return [plugins.WhitespacePlugin(),
|
||||
plugins.SingleQuotePlugin(),
|
||||
plugins.FieldsPlugin(),
|
||||
plugins.WildcardPlugin(),
|
||||
plugins.PhrasePlugin(),
|
||||
plugins.RangePlugin(),
|
||||
plugins.GroupPlugin(),
|
||||
plugins.OperatorsPlugin(),
|
||||
plugins.BoostPlugin(),
|
||||
plugins.EveryPlugin(),
|
||||
]
|
||||
|
||||
def add_plugins(self, pins):
|
||||
"""Adds the given list of plugins to the list of plugins in this
|
||||
parser.
|
||||
"""
|
||||
|
||||
for pin in pins:
|
||||
self.add_plugin(pin)
|
||||
|
||||
def add_plugin(self, pin):
|
||||
"""Adds the given plugin to the list of plugins in this parser.
|
||||
"""
|
||||
|
||||
if isinstance(pin, type):
|
||||
pin = pin()
|
||||
self.plugins.append(pin)
|
||||
|
||||
def _add_ws_plugin(self):
|
||||
from whoosh.qparser.plugins import WhitespacePlugin
|
||||
self.add_plugin(WhitespacePlugin())
|
||||
|
||||
def remove_plugin(self, pi):
|
||||
"""Removes the given plugin object from the list of plugins in this
|
||||
parser.
|
||||
"""
|
||||
|
||||
self.plugins.remove(pi)
|
||||
|
||||
def remove_plugin_class(self, cls):
|
||||
"""Removes any plugins of the given class from this parser.
|
||||
"""
|
||||
|
||||
self.plugins = [pi for pi in self.plugins if not isinstance(pi, cls)]
|
||||
|
||||
def replace_plugin(self, plugin):
|
||||
"""Removes any plugins of the class of the given plugin and then adds
|
||||
it. This is a convenience method to keep from having to call
|
||||
``remove_plugin_class`` followed by ``add_plugin`` each time you want
|
||||
to reconfigure a default plugin.
|
||||
|
||||
>>> qp = qparser.QueryParser("content", schema)
|
||||
>>> qp.replace_plugin(qparser.NotPlugin("(^| )-"))
|
||||
"""
|
||||
|
||||
self.remove_plugin_class(plugin.__class__)
|
||||
self.add_plugin(plugin)
|
||||
|
||||
def _priorized(self, methodname):
|
||||
# methodname is "taggers" or "filters". Returns a priorized list of
|
||||
# tagger objects or filter functions.
|
||||
items_and_priorities = []
|
||||
for plugin in self.plugins:
|
||||
# Call either .taggers() or .filters() on the plugin
|
||||
method = getattr(plugin, methodname)
|
||||
for item in method(self):
|
||||
items_and_priorities.append(item)
|
||||
# Sort the list by priority (lower priority runs first)
|
||||
items_and_priorities.sort(key=lambda x: x[1])
|
||||
# Return the sorted list without the priorities
|
||||
return [item for item, _ in items_and_priorities]
|
||||
|
||||
def multitoken_query(self, spec, texts, fieldname, termclass, boost):
|
||||
"""Returns a query for multiple texts. This method implements the
|
||||
intention specified in the field's ``multitoken_query`` attribute,
|
||||
which specifies what to do when strings that look like single terms
|
||||
to the parser turn out to yield multiple tokens when analyzed.
|
||||
|
||||
:param spec: a string describing how to join the text strings into a
|
||||
query. This is usually the value of the field's
|
||||
``multitoken_query`` attribute.
|
||||
:param texts: a list of token strings.
|
||||
:param fieldname: the name of the field.
|
||||
:param termclass: the query class to use for single terms.
|
||||
:param boost: the original term's boost in the query string, should be
|
||||
applied to the returned query object.
|
||||
"""
|
||||
|
||||
spec = spec.lower()
|
||||
if spec == "first":
|
||||
# Throw away all but the first token
|
||||
return termclass(fieldname, texts[0], boost=boost)
|
||||
elif spec == "phrase":
|
||||
# Turn the token into a phrase
|
||||
return self.phraseclass(fieldname, texts, boost=boost)
|
||||
else:
|
||||
if spec == "default":
|
||||
qclass = self.group.qclass
|
||||
elif spec == "and":
|
||||
qclass = query.And
|
||||
elif spec == "or":
|
||||
qclass = query.Or
|
||||
else:
|
||||
raise QueryParserError("Unknown multitoken_query value %r"
|
||||
% spec)
|
||||
return qclass([termclass(fieldname, t, boost=boost)
|
||||
for t in texts])
|
||||
|
||||
def term_query(self, fieldname, text, termclass, boost=1.0, tokenize=True,
|
||||
removestops=True):
|
||||
"""Returns the appropriate query object for a single term in the query
|
||||
string.
|
||||
"""
|
||||
|
||||
if self.schema and fieldname in self.schema:
|
||||
field = self.schema[fieldname]
|
||||
|
||||
# If this field type wants to parse queries itself, let it do so
|
||||
# and return early
|
||||
if field.self_parsing():
|
||||
try:
|
||||
q = field.parse_query(fieldname, text, boost=boost)
|
||||
return q
|
||||
except:
|
||||
e = sys.exc_info()[1]
|
||||
return query.error_query(e)
|
||||
|
||||
# Otherwise, ask the field to process the text into a list of
|
||||
# tokenized strings
|
||||
texts = list(field.process_text(text, mode="query",
|
||||
tokenize=tokenize,
|
||||
removestops=removestops))
|
||||
|
||||
# If the analyzer returned more than one token, use the field's
|
||||
# multitoken_query attribute to decide what query class, if any, to
|
||||
# use to put the tokens together
|
||||
if len(texts) > 1:
|
||||
return self.multitoken_query(field.multitoken_query, texts,
|
||||
fieldname, termclass, boost)
|
||||
|
||||
# It's possible field.process_text() will return an empty list (for
|
||||
# example, on a stop word)
|
||||
if not texts:
|
||||
return None
|
||||
text = texts[0]
|
||||
|
||||
return termclass(fieldname, text, boost=boost)
|
||||
|
||||
def taggers(self):
|
||||
"""Returns a priorized list of tagger objects provided by the parser's
|
||||
currently configured plugins.
|
||||
"""
|
||||
|
||||
return self._priorized("taggers")
|
||||
|
||||
def filters(self):
|
||||
"""Returns a priorized list of filter functions provided by the
|
||||
parser's currently configured plugins.
|
||||
"""
|
||||
|
||||
return self._priorized("filters")
|
||||
|
||||
def tag(self, text, pos=0, debug=False):
|
||||
"""Returns a group of syntax nodes corresponding to the given text,
|
||||
created by matching the Taggers provided by the parser's plugins.
|
||||
|
||||
:param text: the text to tag.
|
||||
:param pos: the position in the text to start tagging at.
|
||||
"""
|
||||
|
||||
# The list out output tags
|
||||
stack = []
|
||||
# End position of the previous match
|
||||
prev = pos
|
||||
# Priorized list of taggers provided by the parser's plugins
|
||||
taggers = self.taggers()
|
||||
if debug:
|
||||
print_debug(debug, "Taggers: %r" % taggers)
|
||||
|
||||
# Define a function that will make a WordNode from the "interstitial"
|
||||
# text between matches
|
||||
def inter(startchar, endchar):
|
||||
n = syntax.WordNode(text[startchar:endchar])
|
||||
n.startchar = startchar
|
||||
n.endchar = endchar
|
||||
return n
|
||||
|
||||
while pos < len(text):
|
||||
node = None
|
||||
# Try each tagger to see if it matches at the current position
|
||||
for tagger in taggers:
|
||||
node = tagger.match(self, text, pos)
|
||||
if node is not None:
|
||||
if node.endchar <= pos:
|
||||
raise Exception("Token %r did not move cursor forward."
|
||||
" (%r, %s)" % (tagger, text, pos))
|
||||
if prev < pos:
|
||||
tween = inter(prev, pos)
|
||||
if debug:
|
||||
print_debug(debug, "Tween: %r" % tween)
|
||||
stack.append(tween)
|
||||
|
||||
if debug:
|
||||
print_debug(debug, "Tagger: %r at %s: %r"
|
||||
% (tagger, pos, node))
|
||||
stack.append(node)
|
||||
prev = pos = node.endchar
|
||||
break
|
||||
|
||||
if not node:
|
||||
# No taggers matched, move forward
|
||||
pos += 1
|
||||
|
||||
# If there's unmatched text left over on the end, put it in a WordNode
|
||||
if prev < len(text):
|
||||
stack.append(inter(prev, len(text)))
|
||||
|
||||
# Wrap the list of nodes in a group node
|
||||
group = self.group(stack)
|
||||
if debug:
|
||||
print_debug(debug, "Tagged group: %r" % group)
|
||||
return group
|
||||
|
||||
def filterize(self, nodes, debug=False):
|
||||
"""Takes a group of nodes and runs the filters provided by the parser's
|
||||
plugins.
|
||||
"""
|
||||
|
||||
# Call each filter in the priorized list of plugin filters
|
||||
if debug:
|
||||
print_debug(debug, "Pre-filtered group: %r" % nodes)
|
||||
for f in self.filters():
|
||||
if debug:
|
||||
print_debug(debug, "..Applying: %r" % f)
|
||||
nodes = f(self, nodes)
|
||||
if debug:
|
||||
print_debug(debug, "..Result: %r" % nodes)
|
||||
if nodes is None:
|
||||
raise Exception("Filter %r did not return anything" % f)
|
||||
return nodes
|
||||
|
||||
def process(self, text, pos=0, debug=False):
|
||||
"""Returns a group of syntax nodes corresponding to the given text,
|
||||
tagged by the plugin Taggers and filtered by the plugin filters.
|
||||
|
||||
:param text: the text to tag.
|
||||
:param pos: the position in the text to start tagging at.
|
||||
"""
|
||||
|
||||
nodes = self.tag(text, pos=pos, debug=debug)
|
||||
nodes = self.filterize(nodes, debug=debug)
|
||||
return nodes
|
||||
|
||||
def parse(self, text, normalize=True, debug=False):
|
||||
"""Parses the input string and returns a :class:`whoosh.query.Query`
|
||||
object/tree.
|
||||
|
||||
:param text: the unicode string to parse.
|
||||
:param normalize: whether to call normalize() on the query object/tree
|
||||
before returning it. This should be left on unless you're trying to
|
||||
debug the parser output.
|
||||
:rtype: :class:`whoosh.query.Query`
|
||||
"""
|
||||
|
||||
if not isinstance(text, text_type):
|
||||
text = text.decode("latin1")
|
||||
|
||||
nodes = self.process(text, debug=debug)
|
||||
if debug:
|
||||
print_debug(debug, "Syntax tree: %r" % nodes)
|
||||
|
||||
q = nodes.query(self)
|
||||
if not q:
|
||||
q = query.NullQuery
|
||||
if debug:
|
||||
print_debug(debug, "Pre-normalized query: %r" % q)
|
||||
|
||||
if normalize:
|
||||
q = q.normalize()
|
||||
if debug:
|
||||
print_debug(debug, "Normalized query: %r" % q)
|
||||
return q
|
||||
|
||||
def parse_(self, text, normalize=True):
|
||||
pass
|
||||
|
||||
|
||||
# Premade parser configurations
|
||||
|
||||
def MultifieldParser(fieldnames, schema, fieldboosts=None, **kwargs):
|
||||
"""Returns a QueryParser configured to search in multiple fields.
|
||||
|
||||
Instead of assigning unfielded clauses to a default field, this parser
|
||||
transforms them into an OR clause that searches a list of fields. For
|
||||
example, if the list of multi-fields is "f1", "f2" and the query string is
|
||||
"hello there", the class will parse "(f1:hello OR f2:hello) (f1:there OR
|
||||
f2:there)". This is very useful when you have two textual fields (e.g.
|
||||
"title" and "content") you want to search by default.
|
||||
|
||||
:param fieldnames: a list of field names to search.
|
||||
:param fieldboosts: an optional dictionary mapping field names to boosts.
|
||||
"""
|
||||
|
||||
from whoosh.qparser.plugins import MultifieldPlugin
|
||||
|
||||
p = QueryParser(None, schema, **kwargs)
|
||||
mfp = MultifieldPlugin(fieldnames, fieldboosts=fieldboosts)
|
||||
p.add_plugin(mfp)
|
||||
return p
|
||||
|
||||
|
||||
def SimpleParser(fieldname, schema, **kwargs):
|
||||
"""Returns a QueryParser configured to support only +, -, and phrase
|
||||
syntax.
|
||||
"""
|
||||
|
||||
from whoosh.qparser import plugins, syntax
|
||||
|
||||
pins = [plugins.WhitespacePlugin,
|
||||
plugins.PlusMinusPlugin,
|
||||
plugins.PhrasePlugin]
|
||||
orgroup = syntax.OrGroup
|
||||
return QueryParser(fieldname, schema, plugins=pins, group=orgroup,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def DisMaxParser(fieldboosts, schema, tiebreak=0.0, **kwargs):
|
||||
"""Returns a QueryParser configured to support only +, -, and phrase
|
||||
syntax, and which converts individual terms into DisjunctionMax queries
|
||||
across a set of fields.
|
||||
|
||||
:param fieldboosts: a dictionary mapping field names to boosts.
|
||||
"""
|
||||
|
||||
from whoosh.qparser import plugins, syntax
|
||||
|
||||
mfp = plugins.MultifieldPlugin(list(fieldboosts.keys()),
|
||||
fieldboosts=fieldboosts,
|
||||
group=syntax.DisMaxGroup)
|
||||
pins = [plugins.WhitespacePlugin,
|
||||
plugins.PlusMinusPlugin,
|
||||
plugins.PhrasePlugin,
|
||||
mfp]
|
||||
orgroup = syntax.OrGroup
|
||||
return QueryParser(None, schema, plugins=pins, group=orgroup, **kwargs)
|
||||
1413
venv/Lib/site-packages/whoosh/qparser/plugins.py
Normal file
1413
venv/Lib/site-packages/whoosh/qparser/plugins.py
Normal file
File diff suppressed because it is too large
Load Diff
645
venv/Lib/site-packages/whoosh/qparser/syntax.py
Normal file
645
venv/Lib/site-packages/whoosh/qparser/syntax.py
Normal file
@@ -0,0 +1,645 @@
|
||||
# Copyright 2011 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import sys, weakref
|
||||
|
||||
from whoosh import query
|
||||
from whoosh.qparser.common import get_single_text, QueryParserError, attach
|
||||
|
||||
|
||||
class SyntaxNode(object):
|
||||
"""Base class for nodes that make up the abstract syntax tree (AST) of a
|
||||
parsed user query string. The AST is an intermediate step, generated
|
||||
from the query string, then converted into a :class:`whoosh.query.Query`
|
||||
tree by calling the ``query()`` method on the nodes.
|
||||
|
||||
Instances have the following required attributes:
|
||||
|
||||
``has_fieldname``
|
||||
True if this node has a ``fieldname`` attribute.
|
||||
``has_text``
|
||||
True if this node has a ``text`` attribute
|
||||
``has_boost``
|
||||
True if this node has a ``boost`` attribute.
|
||||
``startchar``
|
||||
The character position in the original text at which this node started.
|
||||
``endchar``
|
||||
The character position in the original text at which this node ended.
|
||||
"""
|
||||
|
||||
has_fieldname = False
|
||||
has_text = False
|
||||
has_boost = False
|
||||
_parent = None
|
||||
|
||||
def __repr__(self):
|
||||
r = "<"
|
||||
if self.has_fieldname:
|
||||
r += "%r:" % self.fieldname
|
||||
r += self.r()
|
||||
if self.has_boost and self.boost != 1.0:
|
||||
r += " ^%s" % self.boost
|
||||
r += ">"
|
||||
return r
|
||||
|
||||
def r(self):
|
||||
"""Returns a basic representation of this node. The base class's
|
||||
``__repr__`` method calls this, then does the extra busy work of adding
|
||||
fieldname and boost where appropriate.
|
||||
"""
|
||||
|
||||
return "%s %r" % (self.__class__.__name__, self.__dict__)
|
||||
|
||||
def apply(self, fn):
|
||||
return self
|
||||
|
||||
def accept(self, fn):
|
||||
def fn_wrapper(n):
|
||||
return fn(n.apply(fn_wrapper))
|
||||
|
||||
return fn_wrapper(self)
|
||||
|
||||
def query(self, parser):
|
||||
"""Returns a :class:`whoosh.query.Query` instance corresponding to this
|
||||
syntax tree node.
|
||||
"""
|
||||
|
||||
raise NotImplementedError(self.__class__.__name__)
|
||||
|
||||
def is_ws(self):
|
||||
"""Returns True if this node is ignorable whitespace.
|
||||
"""
|
||||
|
||||
return False
|
||||
|
||||
def is_text(self):
|
||||
return False
|
||||
|
||||
def set_fieldname(self, name, override=False):
|
||||
"""Sets the fieldname associated with this node. If ``override`` is
|
||||
False (the default), the fieldname will only be replaced if this node
|
||||
does not already have a fieldname set.
|
||||
|
||||
For nodes that don't have a fieldname, this is a no-op.
|
||||
"""
|
||||
|
||||
if not self.has_fieldname:
|
||||
return
|
||||
|
||||
if self.fieldname is None or override:
|
||||
self.fieldname = name
|
||||
return self
|
||||
|
||||
def set_boost(self, boost):
|
||||
"""Sets the boost associated with this node.
|
||||
|
||||
For nodes that don't have a boost, this is a no-op.
|
||||
"""
|
||||
|
||||
if not self.has_boost:
|
||||
return
|
||||
self.boost = boost
|
||||
return self
|
||||
|
||||
def set_range(self, startchar, endchar):
|
||||
"""Sets the character range associated with this node.
|
||||
"""
|
||||
|
||||
self.startchar = startchar
|
||||
self.endchar = endchar
|
||||
return self
|
||||
|
||||
# Navigation methods
|
||||
|
||||
def parent(self):
|
||||
if self._parent:
|
||||
return self._parent()
|
||||
|
||||
def next_sibling(self):
|
||||
p = self.parent()
|
||||
if p:
|
||||
return p.node_after(self)
|
||||
|
||||
def prev_sibling(self):
|
||||
p = self.parent()
|
||||
if p:
|
||||
return p.node_before(self)
|
||||
|
||||
def bake(self, parent):
|
||||
self._parent = weakref.ref(parent)
|
||||
|
||||
|
||||
class MarkerNode(SyntaxNode):
|
||||
"""Base class for nodes that only exist to mark places in the tree.
|
||||
"""
|
||||
|
||||
def r(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
|
||||
class Whitespace(MarkerNode):
|
||||
"""Abstract syntax tree node for ignorable whitespace.
|
||||
"""
|
||||
|
||||
def r(self):
|
||||
return " "
|
||||
|
||||
def is_ws(self):
|
||||
return True
|
||||
|
||||
|
||||
class FieldnameNode(SyntaxNode):
|
||||
"""Abstract syntax tree node for field name assignments.
|
||||
"""
|
||||
|
||||
has_fieldname = True
|
||||
|
||||
def __init__(self, fieldname, original):
|
||||
self.fieldname = fieldname
|
||||
self.original = original
|
||||
|
||||
def __repr__(self):
|
||||
return "<%r:>" % self.fieldname
|
||||
|
||||
|
||||
class GroupNode(SyntaxNode):
|
||||
"""Base class for abstract syntax tree node types that group together
|
||||
sub-nodes.
|
||||
|
||||
Instances have the following attributes:
|
||||
|
||||
``merging``
|
||||
True if side-by-side instances of this group can be merged into a
|
||||
single group.
|
||||
``qclass``
|
||||
If a subclass doesn't override ``query()``, the base class will simply
|
||||
wrap this class around the queries returned by the subnodes.
|
||||
|
||||
This class implements a number of list methods for operating on the
|
||||
subnodes.
|
||||
"""
|
||||
|
||||
has_boost = True
|
||||
merging = True
|
||||
qclass = None
|
||||
|
||||
def __init__(self, nodes=None, boost=1.0, **kwargs):
|
||||
self.nodes = nodes or []
|
||||
self.boost = boost
|
||||
self.kwargs = kwargs
|
||||
|
||||
def r(self):
|
||||
return "%s %s" % (self.__class__.__name__,
|
||||
", ".join(repr(n) for n in self.nodes))
|
||||
|
||||
@property
|
||||
def startchar(self):
|
||||
if not self.nodes:
|
||||
return None
|
||||
return self.nodes[0].startchar
|
||||
|
||||
@property
|
||||
def endchar(self):
|
||||
if not self.nodes:
|
||||
return None
|
||||
return self.nodes[-1].endchar
|
||||
|
||||
def apply(self, fn):
|
||||
return self.__class__(self.type, [fn(node) for node in self.nodes],
|
||||
boost=self.boost, **self.kwargs)
|
||||
|
||||
def query(self, parser):
|
||||
subs = []
|
||||
for node in self.nodes:
|
||||
subq = node.query(parser)
|
||||
if subq is not None:
|
||||
subs.append(subq)
|
||||
|
||||
q = self.qclass(subs, boost=self.boost, **self.kwargs)
|
||||
return attach(q, self)
|
||||
|
||||
def empty_copy(self):
|
||||
"""Returns an empty copy of this group.
|
||||
|
||||
This is used in the common pattern where a filter creates an new
|
||||
group and then adds nodes from the input group to it if they meet
|
||||
certain criteria, then returns the new group::
|
||||
|
||||
def remove_whitespace(parser, group):
|
||||
newgroup = group.empty_copy()
|
||||
for node in group:
|
||||
if not node.is_ws():
|
||||
newgroup.append(node)
|
||||
return newgroup
|
||||
"""
|
||||
|
||||
c = self.__class__(**self.kwargs)
|
||||
if self.has_boost:
|
||||
c.boost = self.boost
|
||||
if self.has_fieldname:
|
||||
c.fieldname = self.fieldname
|
||||
if self.has_text:
|
||||
c.text = self.text
|
||||
return c
|
||||
|
||||
def set_fieldname(self, name, override=False):
|
||||
SyntaxNode.set_fieldname(self, name, override=override)
|
||||
for node in self.nodes:
|
||||
node.set_fieldname(name, override=override)
|
||||
|
||||
def set_range(self, startchar, endchar):
|
||||
for node in self.nodes:
|
||||
node.set_range(startchar, endchar)
|
||||
return self
|
||||
|
||||
# List-like methods
|
||||
|
||||
def __nonzero__(self):
|
||||
return bool(self.nodes)
|
||||
|
||||
__bool__ = __nonzero__
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.nodes)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.nodes)
|
||||
|
||||
def __getitem__(self, n):
|
||||
return self.nodes.__getitem__(n)
|
||||
|
||||
def __setitem__(self, n, v):
|
||||
self.nodes.__setitem__(n, v)
|
||||
|
||||
def __delitem__(self, n):
|
||||
self.nodes.__delitem__(n)
|
||||
|
||||
def insert(self, n, v):
|
||||
self.nodes.insert(n, v)
|
||||
|
||||
def append(self, v):
|
||||
self.nodes.append(v)
|
||||
|
||||
def extend(self, vs):
|
||||
self.nodes.extend(vs)
|
||||
|
||||
def pop(self, *args, **kwargs):
|
||||
return self.nodes.pop(*args, **kwargs)
|
||||
|
||||
def reverse(self):
|
||||
self.nodes.reverse()
|
||||
|
||||
def index(self, v):
|
||||
return self.nodes.index(v)
|
||||
|
||||
# Navigation methods
|
||||
|
||||
def bake(self, parent):
|
||||
SyntaxNode.bake(self, parent)
|
||||
for node in self.nodes:
|
||||
node.bake(self)
|
||||
|
||||
def node_before(self, n):
|
||||
try:
|
||||
i = self.nodes.index(n)
|
||||
except ValueError:
|
||||
return
|
||||
if i > 0:
|
||||
return self.nodes[i - 1]
|
||||
|
||||
def node_after(self, n):
|
||||
try:
|
||||
i = self.nodes.index(n)
|
||||
except ValueError:
|
||||
return
|
||||
if i < len(self.nodes) - 2:
|
||||
return self.nodes[i + 1]
|
||||
|
||||
|
||||
class BinaryGroup(GroupNode):
|
||||
"""Intermediate base class for group nodes that have two subnodes and
|
||||
whose ``qclass`` initializer takes two arguments instead of a list.
|
||||
"""
|
||||
|
||||
merging = False
|
||||
has_boost = False
|
||||
|
||||
def query(self, parser):
|
||||
assert len(self.nodes) == 2
|
||||
|
||||
qa = self.nodes[0].query(parser)
|
||||
qb = self.nodes[1].query(parser)
|
||||
if qa is None and qb is None:
|
||||
q = query.NullQuery
|
||||
elif qa is None:
|
||||
q = qb
|
||||
elif qb is None:
|
||||
q = qa
|
||||
else:
|
||||
q = self.qclass(self.nodes[0].query(parser),
|
||||
self.nodes[1].query(parser))
|
||||
|
||||
return attach(q, self)
|
||||
|
||||
|
||||
class Wrapper(GroupNode):
|
||||
"""Intermediate base class for nodes that wrap a single sub-node.
|
||||
"""
|
||||
|
||||
merging = False
|
||||
|
||||
def query(self, parser):
|
||||
q = self.nodes[0].query(parser)
|
||||
if q:
|
||||
return attach(self.qclass(q), self)
|
||||
|
||||
|
||||
class ErrorNode(SyntaxNode):
|
||||
def __init__(self, message, node=None):
|
||||
self.message = message
|
||||
self.node = node
|
||||
|
||||
def r(self):
|
||||
return "ERR %r %r" % (self.node, self.message)
|
||||
|
||||
@property
|
||||
def startchar(self):
|
||||
return self.node.startchar
|
||||
|
||||
@property
|
||||
def endchar(self):
|
||||
return self.node.endchar
|
||||
|
||||
def query(self, parser):
|
||||
if self.node:
|
||||
q = self.node.query(parser)
|
||||
else:
|
||||
q = query.NullQuery
|
||||
|
||||
return attach(query.error_query(self.message, q), self)
|
||||
|
||||
|
||||
class AndGroup(GroupNode):
|
||||
qclass = query.And
|
||||
|
||||
|
||||
class OrGroup(GroupNode):
|
||||
qclass = query.Or
|
||||
|
||||
@classmethod
|
||||
def factory(cls, scale=1.0):
|
||||
class ScaledOrGroup(OrGroup):
|
||||
def __init__(self, nodes=None, **kwargs):
|
||||
if "scale" in kwargs:
|
||||
del kwargs["scale"]
|
||||
super(ScaledOrGroup, self).__init__(nodes=nodes, scale=scale,
|
||||
**kwargs)
|
||||
return ScaledOrGroup
|
||||
|
||||
|
||||
class DisMaxGroup(GroupNode):
|
||||
qclass = query.DisjunctionMax
|
||||
|
||||
|
||||
class OrderedGroup(GroupNode):
|
||||
qclass = query.Ordered
|
||||
|
||||
|
||||
class AndNotGroup(BinaryGroup):
|
||||
qclass = query.AndNot
|
||||
|
||||
|
||||
class AndMaybeGroup(BinaryGroup):
|
||||
qclass = query.AndMaybe
|
||||
|
||||
|
||||
class RequireGroup(BinaryGroup):
|
||||
qclass = query.Require
|
||||
|
||||
|
||||
class NotGroup(Wrapper):
|
||||
qclass = query.Not
|
||||
|
||||
|
||||
class RangeNode(SyntaxNode):
|
||||
"""Syntax node for range queries.
|
||||
"""
|
||||
|
||||
has_fieldname = True
|
||||
|
||||
def __init__(self, start, end, startexcl, endexcl):
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.startexcl = startexcl
|
||||
self.endexcl = endexcl
|
||||
self.boost = 1.0
|
||||
self.fieldname = None
|
||||
self.kwargs = {}
|
||||
|
||||
def r(self):
|
||||
b1 = "{" if self.startexcl else "["
|
||||
b2 = "}" if self.endexcl else "]"
|
||||
return "%s%r %r%s" % (b1, self.start, self.end, b2)
|
||||
|
||||
def query(self, parser):
|
||||
fieldname = self.fieldname or parser.fieldname
|
||||
start = self.start
|
||||
end = self.end
|
||||
|
||||
if parser.schema and fieldname in parser.schema:
|
||||
field = parser.schema[fieldname]
|
||||
if field.self_parsing():
|
||||
try:
|
||||
q = field.parse_range(fieldname, start, end,
|
||||
self.startexcl, self.endexcl,
|
||||
boost=self.boost)
|
||||
if q is not None:
|
||||
return attach(q, self)
|
||||
except QueryParserError:
|
||||
e = sys.exc_info()[1]
|
||||
return attach(query.error_query(e), self)
|
||||
|
||||
if start:
|
||||
start = get_single_text(field, start, tokenize=False,
|
||||
removestops=False)
|
||||
if end:
|
||||
end = get_single_text(field, end, tokenize=False,
|
||||
removestops=False)
|
||||
|
||||
q = query.TermRange(fieldname, start, end, self.startexcl,
|
||||
self.endexcl, boost=self.boost)
|
||||
return attach(q, self)
|
||||
|
||||
|
||||
class TextNode(SyntaxNode):
|
||||
"""Intermediate base class for basic nodes that search for text, such as
|
||||
term queries, wildcards, prefixes, etc.
|
||||
|
||||
Instances have the following attributes:
|
||||
|
||||
``qclass``
|
||||
If a subclass does not override ``query()``, the base class will use
|
||||
this class to construct the query.
|
||||
``tokenize``
|
||||
If True and the subclass does not override ``query()``, the node's text
|
||||
will be tokenized before constructing the query
|
||||
``removestops``
|
||||
If True and the subclass does not override ``query()``, and the field's
|
||||
analyzer has a stop word filter, stop words will be removed from the
|
||||
text before constructing the query.
|
||||
"""
|
||||
|
||||
has_fieldname = True
|
||||
has_text = True
|
||||
has_boost = True
|
||||
qclass = None
|
||||
tokenize = False
|
||||
removestops = False
|
||||
|
||||
def __init__(self, text):
|
||||
self.fieldname = None
|
||||
self.text = text
|
||||
self.boost = 1.0
|
||||
|
||||
def r(self):
|
||||
return "%s %r" % (self.__class__.__name__, self.text)
|
||||
|
||||
def is_text(self):
|
||||
return True
|
||||
|
||||
def query(self, parser):
|
||||
fieldname = self.fieldname or parser.fieldname
|
||||
termclass = self.qclass or parser.termclass
|
||||
q = parser.term_query(fieldname, self.text, termclass,
|
||||
boost=self.boost, tokenize=self.tokenize,
|
||||
removestops=self.removestops)
|
||||
return attach(q, self)
|
||||
|
||||
|
||||
class WordNode(TextNode):
|
||||
"""Syntax node for term queries.
|
||||
"""
|
||||
|
||||
tokenize = True
|
||||
removestops = True
|
||||
|
||||
def r(self):
|
||||
return repr(self.text)
|
||||
|
||||
|
||||
# Operators
|
||||
|
||||
class Operator(SyntaxNode):
|
||||
"""Base class for PrefixOperator, PostfixOperator, and InfixOperator.
|
||||
|
||||
Operators work by moving the nodes they apply to (e.g. for prefix operator,
|
||||
the previous node, for infix operator, the nodes on either side, etc.) into
|
||||
a group node. The group provides the code for what to do with the nodes.
|
||||
"""
|
||||
|
||||
def __init__(self, text, grouptype, leftassoc=True):
|
||||
"""
|
||||
:param text: the text of the operator in the query string.
|
||||
:param grouptype: the type of group to create in place of the operator
|
||||
and the node(s) it operates on.
|
||||
:param leftassoc: for infix opeators, whether the operator is left
|
||||
associative. use ``leftassoc=False`` for right-associative infix
|
||||
operators.
|
||||
"""
|
||||
|
||||
self.text = text
|
||||
self.grouptype = grouptype
|
||||
self.leftassoc = leftassoc
|
||||
|
||||
def r(self):
|
||||
return "OP %r" % self.text
|
||||
|
||||
def replace_self(self, parser, group, position):
|
||||
"""Called with the parser, a group, and the position at which the
|
||||
operator occurs in that group. Should return a group with the operator
|
||||
replaced by whatever effect the operator has (e.g. for an infix op,
|
||||
replace the op and the nodes on either side with a sub-group).
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class PrefixOperator(Operator):
|
||||
def replace_self(self, parser, group, position):
|
||||
length = len(group)
|
||||
del group[position]
|
||||
if position < length - 1:
|
||||
group[position] = self.grouptype([group[position]])
|
||||
return position
|
||||
|
||||
|
||||
class PostfixOperator(Operator):
|
||||
def replace_self(self, parser, group, position):
|
||||
del group[position]
|
||||
if position > 0:
|
||||
group[position - 1] = self.grouptype([group[position - 1]])
|
||||
return position
|
||||
|
||||
|
||||
class InfixOperator(Operator):
|
||||
def replace_self(self, parser, group, position):
|
||||
la = self.leftassoc
|
||||
gtype = self.grouptype
|
||||
merging = gtype.merging
|
||||
|
||||
if position > 0 and position < len(group) - 1:
|
||||
left = group[position - 1]
|
||||
right = group[position + 1]
|
||||
|
||||
# The first two clauses check whether the "strong" side is already
|
||||
# a group of the type we are going to create. If it is, we just
|
||||
# append the "weak" side to the "strong" side instead of creating
|
||||
# a new group inside the existing one. This is necessary because
|
||||
# we can quickly run into Python's recursion limit otherwise.
|
||||
if merging and la and isinstance(left, gtype):
|
||||
left.append(right)
|
||||
del group[position:position + 2]
|
||||
elif merging and not la and isinstance(right, gtype):
|
||||
right.insert(0, left)
|
||||
del group[position - 1:position + 1]
|
||||
return position - 1
|
||||
else:
|
||||
# Replace the operator and the two surrounding objects
|
||||
group[position - 1:position + 2] = [gtype([left, right])]
|
||||
else:
|
||||
del group[position]
|
||||
|
||||
return position
|
||||
|
||||
|
||||
# Functions
|
||||
|
||||
def to_word(n):
|
||||
node = WordNode(n.original)
|
||||
node.startchar = n.startchar
|
||||
node.endchar = n.endchar
|
||||
return node
|
||||
93
venv/Lib/site-packages/whoosh/qparser/taggers.py
Normal file
93
venv/Lib/site-packages/whoosh/qparser/taggers.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# Copyright 2011 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from whoosh.util.text import rcompile
|
||||
|
||||
|
||||
# Tagger objects
|
||||
|
||||
class Tagger(object):
|
||||
"""Base class for taggers, objects which match syntax in the query string
|
||||
and translate it into a :class:`whoosh.qparser.syntax.SyntaxNode` object.
|
||||
"""
|
||||
|
||||
def match(self, parser, text, pos):
|
||||
"""This method should see if this tagger matches the query string at
|
||||
the given position. If it matches, it should return
|
||||
|
||||
:param parser: the :class:`whoosh.qparser.default.QueryParser` object.
|
||||
:param text: the text being parsed.
|
||||
:param pos: the position in the text at which the tagger should try to
|
||||
match.
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class RegexTagger(Tagger):
|
||||
"""Tagger class that uses regular expressions to match the query string.
|
||||
Subclasses should override ``create()`` instead of ``match()``.
|
||||
"""
|
||||
|
||||
def __init__(self, expr):
|
||||
self.expr = rcompile(expr)
|
||||
|
||||
def match(self, parser, text, pos):
|
||||
match = self.expr.match(text, pos)
|
||||
if match:
|
||||
node = self.create(parser, match)
|
||||
if node is not None:
|
||||
node = node.set_range(match.start(), match.end())
|
||||
return node
|
||||
|
||||
def create(self, parser, match):
|
||||
"""When the regular expression matches, this method is called to
|
||||
translate the regex match object into a syntax node.
|
||||
|
||||
:param parser: the :class:`whoosh.qparser.default.QueryParser` object.
|
||||
:param match: the regex match object.
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class FnTagger(RegexTagger):
|
||||
"""Tagger that takes a regular expression and a class or function, and for
|
||||
matches calls the class/function with the regex match's named groups as
|
||||
keyword arguments.
|
||||
"""
|
||||
|
||||
def __init__(self, expr, fn, memo=""):
|
||||
RegexTagger.__init__(self, expr)
|
||||
self.fn = fn
|
||||
self.memo = memo
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s %r (%s)>" % (self.__class__.__name__, self.expr, self.memo)
|
||||
|
||||
def create(self, parser, match):
|
||||
return self.fn(**match.groupdict())
|
||||
Reference in New Issue
Block a user