2026-1-6
This commit is contained in:
347
venv/Lib/site-packages/whoosh/query/ranges.py
Normal file
347
venv/Lib/site-packages/whoosh/query/ranges.py
Normal file
@@ -0,0 +1,347 @@
|
||||
# Copyright 2007 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from __future__ import division
|
||||
|
||||
from whoosh.compat import b, u
|
||||
from whoosh.query import qcore, terms, compound, wrappers
|
||||
from whoosh.util.times import datetime_to_long
|
||||
|
||||
|
||||
class RangeMixin(object):
|
||||
# Contains methods shared by TermRange and NumericRange
|
||||
|
||||
def __repr__(self):
|
||||
return ('%s(%r, %r, %r, %s, %s, boost=%s, constantscore=%s)'
|
||||
% (self.__class__.__name__, self.fieldname, self.start,
|
||||
self.end, self.startexcl, self.endexcl, self.boost,
|
||||
self.constantscore))
|
||||
|
||||
def __unicode__(self):
|
||||
startchar = "{" if self.startexcl else "["
|
||||
endchar = "}" if self.endexcl else "]"
|
||||
start = '' if self.start is None else self.start
|
||||
end = '' if self.end is None else self.end
|
||||
return u("%s:%s%s TO %s%s") % (self.fieldname, startchar, start, end,
|
||||
endchar)
|
||||
|
||||
__str__ = __unicode__
|
||||
|
||||
def __eq__(self, other):
|
||||
return (other and self.__class__ is other.__class__
|
||||
and self.fieldname == other.fieldname
|
||||
and self.start == other.start and self.end == other.end
|
||||
and self.startexcl == other.startexcl
|
||||
and self.endexcl == other.endexcl
|
||||
and self.boost == other.boost
|
||||
and self.constantscore == other.constantscore)
|
||||
|
||||
def __hash__(self):
|
||||
return (hash(self.fieldname) ^ hash(self.start) ^ hash(self.startexcl)
|
||||
^ hash(self.end) ^ hash(self.endexcl) ^ hash(self.boost))
|
||||
|
||||
def is_range(self):
|
||||
return True
|
||||
|
||||
def _comparable_start(self):
|
||||
if self.start is None:
|
||||
return (qcore.Lowest, 0)
|
||||
else:
|
||||
second = 1 if self.startexcl else 0
|
||||
return (self.start, second)
|
||||
|
||||
def _comparable_end(self):
|
||||
if self.end is None:
|
||||
return (qcore.Highest, 0)
|
||||
else:
|
||||
second = -1 if self.endexcl else 0
|
||||
return (self.end, second)
|
||||
|
||||
def overlaps(self, other):
|
||||
if not isinstance(other, TermRange):
|
||||
return False
|
||||
if self.fieldname != other.fieldname:
|
||||
return False
|
||||
|
||||
start1 = self._comparable_start()
|
||||
start2 = other._comparable_start()
|
||||
end1 = self._comparable_end()
|
||||
end2 = other._comparable_end()
|
||||
|
||||
return ((start1 >= start2 and start1 <= end2)
|
||||
or (end1 >= start2 and end1 <= end2)
|
||||
or (start2 >= start1 and start2 <= end1)
|
||||
or (end2 >= start1 and end2 <= end1))
|
||||
|
||||
def merge(self, other, intersect=True):
|
||||
assert self.fieldname == other.fieldname
|
||||
|
||||
start1 = self._comparable_start()
|
||||
start2 = other._comparable_start()
|
||||
end1 = self._comparable_end()
|
||||
end2 = other._comparable_end()
|
||||
|
||||
if start1 >= start2 and end1 <= end2:
|
||||
start = start2
|
||||
end = end2
|
||||
elif start2 >= start1 and end2 <= end1:
|
||||
start = start1
|
||||
end = end1
|
||||
elif intersect:
|
||||
start = max(start1, start2)
|
||||
end = min(end1, end2)
|
||||
else:
|
||||
start = min(start1, start2)
|
||||
end = max(end1, end2)
|
||||
|
||||
startval = None if start[0] is qcore.Lowest else start[0]
|
||||
startexcl = start[1] == 1
|
||||
endval = None if end[0] is qcore.Highest else end[0]
|
||||
endexcl = end[1] == -1
|
||||
|
||||
boost = max(self.boost, other.boost)
|
||||
constantscore = self.constantscore or other.constantscore
|
||||
|
||||
return self.__class__(self.fieldname, startval, endval, startexcl,
|
||||
endexcl, boost=boost,
|
||||
constantscore=constantscore)
|
||||
|
||||
|
||||
class TermRange(RangeMixin, terms.MultiTerm):
|
||||
"""Matches documents containing any terms in a given range.
|
||||
|
||||
>>> # Match documents where the indexed "id" field is greater than or equal
|
||||
>>> # to 'apple' and less than or equal to 'pear'.
|
||||
>>> TermRange("id", u"apple", u"pear")
|
||||
"""
|
||||
|
||||
def __init__(self, fieldname, start, end, startexcl=False, endexcl=False,
|
||||
boost=1.0, constantscore=True):
|
||||
"""
|
||||
:param fieldname: The name of the field to search.
|
||||
:param start: Match terms equal to or greater than this.
|
||||
:param end: Match terms equal to or less than this.
|
||||
:param startexcl: If True, the range start is exclusive. If False, the
|
||||
range start is inclusive.
|
||||
:param endexcl: If True, the range end is exclusive. If False, the
|
||||
range end is inclusive.
|
||||
:param boost: Boost factor that should be applied to the raw score of
|
||||
results matched by this query.
|
||||
"""
|
||||
|
||||
self.fieldname = fieldname
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.startexcl = startexcl
|
||||
self.endexcl = endexcl
|
||||
self.boost = boost
|
||||
self.constantscore = constantscore
|
||||
|
||||
def normalize(self):
|
||||
if self.start in ('', None) and self.end in (u('\uffff'), None):
|
||||
from whoosh.query import Every
|
||||
return Every(self.fieldname, boost=self.boost)
|
||||
elif self.start == self.end:
|
||||
if self.startexcl or self.endexcl:
|
||||
return qcore.NullQuery
|
||||
return terms.Term(self.fieldname, self.start, boost=self.boost)
|
||||
else:
|
||||
return TermRange(self.fieldname, self.start, self.end,
|
||||
self.startexcl, self.endexcl,
|
||||
boost=self.boost)
|
||||
|
||||
#def replace(self, fieldname, oldtext, newtext):
|
||||
# q = self.copy()
|
||||
# if q.fieldname == fieldname:
|
||||
# if q.start == oldtext:
|
||||
# q.start = newtext
|
||||
# if q.end == oldtext:
|
||||
# q.end = newtext
|
||||
# return q
|
||||
|
||||
def _btexts(self, ixreader):
|
||||
fieldname = self.fieldname
|
||||
field = ixreader.schema[fieldname]
|
||||
startexcl = self.startexcl
|
||||
endexcl = self.endexcl
|
||||
|
||||
if self.start is None:
|
||||
start = b("")
|
||||
else:
|
||||
try:
|
||||
start = field.to_bytes(self.start)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
if self.end is None:
|
||||
end = b("\xFF\xFF\xFF\xFF")
|
||||
else:
|
||||
try:
|
||||
end = field.to_bytes(self.end)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
for fname, t in ixreader.terms_from(fieldname, start):
|
||||
if fname != fieldname:
|
||||
break
|
||||
if t == start and startexcl:
|
||||
continue
|
||||
if t == end and endexcl:
|
||||
break
|
||||
if t > end:
|
||||
break
|
||||
yield t
|
||||
|
||||
|
||||
class NumericRange(RangeMixin, qcore.Query):
|
||||
"""A range query for NUMERIC fields. Takes advantage of tiered indexing
|
||||
to speed up large ranges by matching at a high resolution at the edges of
|
||||
the range and a low resolution in the middle.
|
||||
|
||||
>>> # Match numbers from 10 to 5925 in the "number" field.
|
||||
>>> nr = NumericRange("number", 10, 5925)
|
||||
"""
|
||||
|
||||
def __init__(self, fieldname, start, end, startexcl=False, endexcl=False,
|
||||
boost=1.0, constantscore=True):
|
||||
"""
|
||||
:param fieldname: The name of the field to search.
|
||||
:param start: Match terms equal to or greater than this number. This
|
||||
should be a number type, not a string.
|
||||
:param end: Match terms equal to or less than this number. This should
|
||||
be a number type, not a string.
|
||||
:param startexcl: If True, the range start is exclusive. If False, the
|
||||
range start is inclusive.
|
||||
:param endexcl: If True, the range end is exclusive. If False, the
|
||||
range end is inclusive.
|
||||
:param boost: Boost factor that should be applied to the raw score of
|
||||
results matched by this query.
|
||||
:param constantscore: If True, the compiled query returns a constant
|
||||
score (the value of the ``boost`` keyword argument) instead of
|
||||
actually scoring the matched terms. This gives a nice speed boost
|
||||
and won't affect the results in most cases since numeric ranges
|
||||
will almost always be used as a filter.
|
||||
"""
|
||||
|
||||
self.fieldname = fieldname
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.startexcl = startexcl
|
||||
self.endexcl = endexcl
|
||||
self.boost = boost
|
||||
self.constantscore = constantscore
|
||||
|
||||
def simplify(self, ixreader):
|
||||
return self._compile_query(ixreader).simplify(ixreader)
|
||||
|
||||
def estimate_size(self, ixreader):
|
||||
return self._compile_query(ixreader).estimate_size(ixreader)
|
||||
|
||||
def estimate_min_size(self, ixreader):
|
||||
return self._compile_query(ixreader).estimate_min_size(ixreader)
|
||||
|
||||
def docs(self, searcher):
|
||||
q = self._compile_query(searcher.reader())
|
||||
return q.docs(searcher)
|
||||
|
||||
def _compile_query(self, ixreader):
|
||||
from whoosh.fields import NUMERIC
|
||||
from whoosh.util.numeric import tiered_ranges
|
||||
|
||||
field = ixreader.schema[self.fieldname]
|
||||
if not isinstance(field, NUMERIC):
|
||||
raise Exception("NumericRange: field %r is not numeric"
|
||||
% self.fieldname)
|
||||
|
||||
start = self.start
|
||||
if start is not None:
|
||||
start = field.prepare_number(start)
|
||||
end = self.end
|
||||
if end is not None:
|
||||
end = field.prepare_number(end)
|
||||
|
||||
subqueries = []
|
||||
stb = field.sortable_to_bytes
|
||||
# Get the term ranges for the different resolutions
|
||||
ranges = tiered_ranges(field.numtype, field.bits, field.signed,
|
||||
start, end, field.shift_step,
|
||||
self.startexcl, self.endexcl)
|
||||
for startnum, endnum, shift in ranges:
|
||||
if startnum == endnum:
|
||||
subq = terms.Term(self.fieldname, stb(startnum, shift))
|
||||
else:
|
||||
startbytes = stb(startnum, shift)
|
||||
endbytes = stb(endnum, shift)
|
||||
subq = TermRange(self.fieldname, startbytes, endbytes)
|
||||
subqueries.append(subq)
|
||||
|
||||
if len(subqueries) == 1:
|
||||
q = subqueries[0]
|
||||
elif subqueries:
|
||||
q = compound.Or(subqueries, boost=self.boost)
|
||||
else:
|
||||
return qcore.NullQuery
|
||||
|
||||
if self.constantscore:
|
||||
q = wrappers.ConstantScoreQuery(q, self.boost)
|
||||
return q
|
||||
|
||||
def matcher(self, searcher, context=None):
|
||||
q = self._compile_query(searcher.reader())
|
||||
return q.matcher(searcher, context)
|
||||
|
||||
|
||||
class DateRange(NumericRange):
|
||||
"""This is a very thin subclass of :class:`NumericRange` that only
|
||||
overrides the initializer and ``__repr__()`` methods to work with datetime
|
||||
objects instead of numbers. Internally this object converts the datetime
|
||||
objects it's created with to numbers and otherwise acts like a
|
||||
``NumericRange`` query.
|
||||
|
||||
>>> DateRange("date", datetime(2010, 11, 3, 3, 0),
|
||||
... datetime(2010, 11, 3, 17, 59))
|
||||
"""
|
||||
|
||||
def __init__(self, fieldname, start, end, startexcl=False, endexcl=False,
|
||||
boost=1.0, constantscore=True):
|
||||
self.startdate = start
|
||||
self.enddate = end
|
||||
if start:
|
||||
start = datetime_to_long(start)
|
||||
if end:
|
||||
end = datetime_to_long(end)
|
||||
super(DateRange, self).__init__(fieldname, start, end,
|
||||
startexcl=startexcl, endexcl=endexcl,
|
||||
boost=boost,
|
||||
constantscore=constantscore)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r, %r, %r, %s, %s, boost=%s)' % (self.__class__.__name__,
|
||||
self.fieldname,
|
||||
self.startdate, self.enddate,
|
||||
self.startexcl, self.endexcl,
|
||||
self.boost)
|
||||
Reference in New Issue
Block a user