2026-1-6
This commit is contained in:
142
venv/Lib/site-packages/whoosh/util/__init__.py
Normal file
142
venv/Lib/site-packages/whoosh/util/__init__.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# Copyright 2007 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from __future__ import with_statement
|
||||
import random, sys, time
|
||||
from bisect import insort, bisect_left
|
||||
from functools import wraps
|
||||
|
||||
from whoosh.compat import xrange
|
||||
|
||||
|
||||
# These must be valid separate characters in CASE-INSENSTIVE filenames
|
||||
IDCHARS = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
|
||||
if hasattr(time, "perf_counter"):
|
||||
now = time.perf_counter
|
||||
elif sys.platform == 'win32':
|
||||
now = time.clock
|
||||
else:
|
||||
now = time.time
|
||||
|
||||
|
||||
def random_name(size=28):
|
||||
return "".join(random.choice(IDCHARS) for _ in xrange(size))
|
||||
|
||||
|
||||
def random_bytes(size=28):
|
||||
gen = (random.randint(0, 255) for _ in xrange(size))
|
||||
if sys.version_info[0] >= 3:
|
||||
return bytes(gen)
|
||||
else:
|
||||
return array("B", gen).tostring()
|
||||
|
||||
|
||||
def make_binary_tree(fn, args, **kwargs):
|
||||
"""Takes a function/class that takes two positional arguments and a list of
|
||||
arguments and returns a binary tree of results/instances.
|
||||
|
||||
>>> make_binary_tree(UnionMatcher, [matcher1, matcher2, matcher3])
|
||||
UnionMatcher(matcher1, UnionMatcher(matcher2, matcher3))
|
||||
|
||||
Any keyword arguments given to this function are passed to the class
|
||||
initializer.
|
||||
"""
|
||||
|
||||
count = len(args)
|
||||
if not count:
|
||||
raise ValueError("Called make_binary_tree with empty list")
|
||||
elif count == 1:
|
||||
return args[0]
|
||||
|
||||
half = count // 2
|
||||
return fn(make_binary_tree(fn, args[:half], **kwargs),
|
||||
make_binary_tree(fn, args[half:], **kwargs), **kwargs)
|
||||
|
||||
|
||||
def make_weighted_tree(fn, ls, **kwargs):
|
||||
"""Takes a function/class that takes two positional arguments and a list of
|
||||
(weight, argument) tuples and returns a huffman-like weighted tree of
|
||||
results/instances.
|
||||
"""
|
||||
|
||||
if not ls:
|
||||
raise ValueError("Called make_weighted_tree with empty list")
|
||||
|
||||
ls.sort()
|
||||
while len(ls) > 1:
|
||||
a = ls.pop(0)
|
||||
b = ls.pop(0)
|
||||
insort(ls, (a[0] + b[0], fn(a[1], b[1])))
|
||||
return ls[0][1]
|
||||
|
||||
|
||||
# Fibonacci function
|
||||
|
||||
_fib_cache = {}
|
||||
|
||||
|
||||
def fib(n):
|
||||
"""Returns the nth value in the Fibonacci sequence.
|
||||
"""
|
||||
|
||||
if n <= 2:
|
||||
return n
|
||||
if n in _fib_cache:
|
||||
return _fib_cache[n]
|
||||
result = fib(n - 1) + fib(n - 2)
|
||||
_fib_cache[n] = result
|
||||
return result
|
||||
|
||||
|
||||
# Decorators
|
||||
|
||||
def synchronized(func):
|
||||
"""Decorator for storage-access methods, which synchronizes on a threading
|
||||
lock. The parent object must have 'is_closed' and '_sync_lock' attributes.
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
def synchronized_wrapper(self, *args, **kwargs):
|
||||
with self._sync_lock:
|
||||
return func(self, *args, **kwargs)
|
||||
|
||||
return synchronized_wrapper
|
||||
|
||||
|
||||
def unclosed(method):
|
||||
"""
|
||||
Decorator to check if the object is closed.
|
||||
"""
|
||||
|
||||
@wraps(method)
|
||||
def unclosed_wrapper(self, *args, **kwargs):
|
||||
if self.closed:
|
||||
raise ValueError("Operation on a closed object")
|
||||
return method(self, *args, **kwargs)
|
||||
return unclosed_wrapper
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
375
venv/Lib/site-packages/whoosh/util/cache.py
Normal file
375
venv/Lib/site-packages/whoosh/util/cache.py
Normal file
@@ -0,0 +1,375 @@
|
||||
# Copyright 2007 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from __future__ import with_statement
|
||||
import functools, random
|
||||
from array import array
|
||||
from heapq import nsmallest
|
||||
from operator import itemgetter
|
||||
from threading import Lock
|
||||
from time import time
|
||||
|
||||
from whoosh.compat import iteritems, xrange
|
||||
|
||||
|
||||
try:
|
||||
from collections import Counter
|
||||
except ImportError:
|
||||
class Counter(dict):
|
||||
def __missing__(self, key):
|
||||
return 0
|
||||
|
||||
|
||||
def unbound_cache(func):
|
||||
"""Caching decorator with an unbounded cache size.
|
||||
"""
|
||||
|
||||
cache = {}
|
||||
|
||||
@functools.wraps(func)
|
||||
def caching_wrapper(*args):
|
||||
try:
|
||||
return cache[args]
|
||||
except KeyError:
|
||||
result = func(*args)
|
||||
cache[args] = result
|
||||
return result
|
||||
|
||||
return caching_wrapper
|
||||
|
||||
|
||||
def lru_cache(maxsize=100):
|
||||
"""A simple cache that, when the cache is full, deletes the least recently
|
||||
used 10% of the cached values.
|
||||
|
||||
This function duplicates (more-or-less) the protocol of the
|
||||
``functools.lru_cache`` decorator in the Python 3.2 standard library.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
|
||||
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
"""
|
||||
|
||||
def decorating_function(user_function):
|
||||
stats = [0, 0] # Hits, misses
|
||||
data = {}
|
||||
lastused = {}
|
||||
|
||||
@functools.wraps(user_function)
|
||||
def wrapper(*args):
|
||||
try:
|
||||
result = data[args]
|
||||
stats[0] += 1 # Hit
|
||||
except KeyError:
|
||||
stats[1] += 1 # Miss
|
||||
if len(data) == maxsize:
|
||||
for k, _ in nsmallest(maxsize // 10 or 1,
|
||||
iteritems(lastused),
|
||||
key=itemgetter(1)):
|
||||
del data[k]
|
||||
del lastused[k]
|
||||
data[args] = user_function(*args)
|
||||
result = data[args]
|
||||
finally:
|
||||
lastused[args] = time()
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
return stats[0], stats[1], maxsize, len(data)
|
||||
|
||||
def cache_clear():
|
||||
data.clear()
|
||||
lastused.clear()
|
||||
stats[0] = stats[1] = 0
|
||||
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return wrapper
|
||||
return decorating_function
|
||||
|
||||
|
||||
def lfu_cache(maxsize=100):
|
||||
"""A simple cache that, when the cache is full, deletes the least frequently
|
||||
used 10% of the cached values.
|
||||
|
||||
This function duplicates (more-or-less) the protocol of the
|
||||
``functools.lru_cache`` decorator in the Python 3.2 standard library.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
|
||||
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
"""
|
||||
|
||||
def decorating_function(user_function):
|
||||
stats = [0, 0] # Hits, misses
|
||||
data = {}
|
||||
usecount = Counter()
|
||||
|
||||
@functools.wraps(user_function)
|
||||
def wrapper(*args):
|
||||
try:
|
||||
result = data[args]
|
||||
stats[0] += 1 # Hit
|
||||
except KeyError:
|
||||
stats[1] += 1 # Miss
|
||||
if len(data) == maxsize:
|
||||
for k, _ in nsmallest(maxsize // 10 or 1,
|
||||
iteritems(usecount),
|
||||
key=itemgetter(1)):
|
||||
del data[k]
|
||||
del usecount[k]
|
||||
data[args] = user_function(*args)
|
||||
result = data[args]
|
||||
finally:
|
||||
usecount[args] += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
return stats[0], stats[1], maxsize, len(data)
|
||||
|
||||
def cache_clear():
|
||||
data.clear()
|
||||
usecount.clear()
|
||||
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return wrapper
|
||||
return decorating_function
|
||||
|
||||
|
||||
def random_cache(maxsize=100):
|
||||
"""A very simple cache that, when the cache is filled, deletes 10% of the
|
||||
cached values AT RANDOM.
|
||||
|
||||
This function duplicates (more-or-less) the protocol of the
|
||||
``functools.lru_cache`` decorator in the Python 3.2 standard library.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
|
||||
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
"""
|
||||
|
||||
def decorating_function(user_function):
|
||||
stats = [0, 0] # hits, misses
|
||||
data = {}
|
||||
|
||||
@functools.wraps(user_function)
|
||||
def wrapper(*args):
|
||||
try:
|
||||
result = data[args]
|
||||
stats[0] += 1 # Hit
|
||||
except KeyError:
|
||||
stats[1] += 1 # Miss
|
||||
if len(data) == maxsize:
|
||||
keys = data.keys()
|
||||
for i in xrange(maxsize // 10 or 1):
|
||||
n = random.randint(0, len(keys) - 1)
|
||||
k = keys.pop(n)
|
||||
del data[k]
|
||||
data[args] = user_function(*args)
|
||||
result = data[args]
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
return stats[0], stats[1], maxsize, len(data)
|
||||
|
||||
def cache_clear():
|
||||
data.clear()
|
||||
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return wrapper
|
||||
return decorating_function
|
||||
|
||||
|
||||
def db_lru_cache(maxsize=100):
|
||||
"""Double-barrel least-recently-used cache decorator. This is a simple
|
||||
LRU algorithm that keeps a primary and secondary dict. Keys are checked
|
||||
in the primary dict, and then the secondary. Once the primary dict fills
|
||||
up, the secondary dict is cleared and the two dicts are swapped.
|
||||
|
||||
This function duplicates (more-or-less) the protocol of the
|
||||
``functools.lru_cache`` decorator in the Python 3.2 standard library.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
|
||||
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
"""
|
||||
|
||||
def decorating_function(user_function):
|
||||
# Cache1, Cache2, Pointer, Hits, Misses
|
||||
stats = [{}, {}, 0, 0, 0]
|
||||
|
||||
@functools.wraps(user_function)
|
||||
def wrapper(*args):
|
||||
ptr = stats[2]
|
||||
a = stats[ptr]
|
||||
b = stats[not ptr]
|
||||
key = args
|
||||
|
||||
if key in a:
|
||||
stats[3] += 1 # Hit
|
||||
return a[key]
|
||||
elif key in b:
|
||||
stats[3] += 1 # Hit
|
||||
return b[key]
|
||||
else:
|
||||
stats[4] += 1 # Miss
|
||||
result = user_function(*args)
|
||||
a[key] = result
|
||||
if len(a) >= maxsize:
|
||||
stats[2] = not ptr
|
||||
b.clear()
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
return stats[3], stats[4], maxsize, len(stats[0]) + len(stats[1])
|
||||
|
||||
def cache_clear():
|
||||
"""Clear the cache and cache statistics"""
|
||||
stats[0].clear()
|
||||
stats[1].clear()
|
||||
stats[3] = stats[4] = 0
|
||||
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
|
||||
return wrapper
|
||||
return decorating_function
|
||||
|
||||
|
||||
def clockface_lru_cache(maxsize=100):
|
||||
"""Least-recently-used cache decorator.
|
||||
|
||||
This function duplicates (more-or-less) the protocol of the
|
||||
``functools.lru_cache`` decorator in the Python 3.2 standard library, but
|
||||
uses the clock face LRU algorithm instead of an ordered dictionary.
|
||||
|
||||
If *maxsize* is set to None, the LRU features are disabled and the cache
|
||||
can grow without bound.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics named tuple (hits, misses, maxsize, currsize)
|
||||
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
"""
|
||||
|
||||
def decorating_function(user_function):
|
||||
stats = [0, 0, 0] # hits, misses, hand
|
||||
data = {}
|
||||
|
||||
if maxsize:
|
||||
# The keys at each point on the clock face
|
||||
clock_keys = [None] * maxsize
|
||||
# The "referenced" bits at each point on the clock face
|
||||
clock_refs = array("B", (0 for _ in xrange(maxsize)))
|
||||
lock = Lock()
|
||||
|
||||
@functools.wraps(user_function)
|
||||
def wrapper(*args):
|
||||
key = args
|
||||
try:
|
||||
with lock:
|
||||
pos, result = data[key]
|
||||
# The key is in the cache. Set the key's reference bit
|
||||
clock_refs[pos] = 1
|
||||
# Record a cache hit
|
||||
stats[0] += 1
|
||||
except KeyError:
|
||||
# Compute the value
|
||||
result = user_function(*args)
|
||||
with lock:
|
||||
# Current position of the clock hand
|
||||
hand = stats[2]
|
||||
# Remember to stop here after a full revolution
|
||||
end = hand
|
||||
# Sweep around the clock looking for a position with
|
||||
# the reference bit off
|
||||
while True:
|
||||
hand = (hand + 1) % maxsize
|
||||
current_ref = clock_refs[hand]
|
||||
if current_ref:
|
||||
# This position's "referenced" bit is set. Turn
|
||||
# the bit off and move on.
|
||||
clock_refs[hand] = 0
|
||||
elif not current_ref or hand == end:
|
||||
# We've either found a position with the
|
||||
# "reference" bit off or reached the end of the
|
||||
# circular cache. So we'll replace this
|
||||
# position with the new key
|
||||
current_key = clock_keys[hand]
|
||||
if current_key in data:
|
||||
del data[current_key]
|
||||
clock_keys[hand] = key
|
||||
clock_refs[hand] = 1
|
||||
break
|
||||
# Put the key and result in the cache
|
||||
data[key] = (hand, result)
|
||||
# Save the new hand position
|
||||
stats[2] = hand
|
||||
# Record a cache miss
|
||||
stats[1] += 1
|
||||
return result
|
||||
|
||||
else:
|
||||
@functools.wraps(user_function)
|
||||
def wrapper(*args):
|
||||
key = args
|
||||
try:
|
||||
result = data[key]
|
||||
stats[0] += 1
|
||||
except KeyError:
|
||||
result = user_function(*args)
|
||||
data[key] = result
|
||||
stats[1] += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
return stats[0], stats[1], maxsize, len(data)
|
||||
|
||||
def cache_clear():
|
||||
"""Clear the cache and cache statistics"""
|
||||
data.clear()
|
||||
stats[0] = stats[1] = stats[2] = 0
|
||||
for i in xrange(maxsize):
|
||||
clock_keys[i] = None
|
||||
clock_refs[i] = 0
|
||||
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return wrapper
|
||||
return decorating_function
|
||||
|
||||
163
venv/Lib/site-packages/whoosh/util/filelock.py
Normal file
163
venv/Lib/site-packages/whoosh/util/filelock.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# Copyright 2010 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
"""
|
||||
This module contains classes implementing exclusive locks for platforms with
|
||||
fcntl (UNIX and Mac OS X) and Windows. Whoosh originally used directory
|
||||
creation as a locking method, but it had the problem that if the program
|
||||
crashed the lock directory was left behind and would keep the index locked
|
||||
until it was cleaned up. Using OS-level file locks fixes this.
|
||||
"""
|
||||
|
||||
import errno
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def try_for(fn, timeout=5.0, delay=0.1):
|
||||
"""Calls ``fn`` every ``delay`` seconds until it returns True or
|
||||
``timeout`` seconds elapse. Returns True if the lock was acquired, or False
|
||||
if the timeout was reached.
|
||||
|
||||
:param timeout: Length of time (in seconds) to keep retrying to acquire the
|
||||
lock. 0 means return immediately. Only used when blocking is False.
|
||||
:param delay: How often (in seconds) to retry acquiring the lock during
|
||||
the timeout period. Only used when blocking is False and timeout > 0.
|
||||
"""
|
||||
|
||||
until = time.time() + timeout
|
||||
v = fn()
|
||||
while not v and time.time() < until:
|
||||
time.sleep(delay)
|
||||
v = fn()
|
||||
return v
|
||||
|
||||
|
||||
class LockBase(object):
|
||||
"""Base class for file locks.
|
||||
"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self.fd = None
|
||||
self.filename = filename
|
||||
self.locked = False
|
||||
|
||||
def __del__(self):
|
||||
if hasattr(self, "fd") and self.fd:
|
||||
try:
|
||||
self.release()
|
||||
except:
|
||||
pass
|
||||
|
||||
def acquire(self, blocking=False):
|
||||
"""Acquire the lock. Returns True if the lock was acquired.
|
||||
|
||||
:param blocking: if True, call blocks until the lock is acquired.
|
||||
This may not be available on all platforms. On Windows, this is
|
||||
actually just a delay of 10 seconds, rechecking every second.
|
||||
"""
|
||||
pass
|
||||
|
||||
def release(self):
|
||||
pass
|
||||
|
||||
|
||||
class FcntlLock(LockBase):
|
||||
"""File lock based on UNIX-only fcntl module.
|
||||
"""
|
||||
|
||||
def acquire(self, blocking=False):
|
||||
import fcntl # @UnresolvedImport
|
||||
|
||||
flags = os.O_CREAT | os.O_WRONLY
|
||||
self.fd = os.open(self.filename, flags)
|
||||
|
||||
mode = fcntl.LOCK_EX
|
||||
if not blocking:
|
||||
mode |= fcntl.LOCK_NB
|
||||
|
||||
try:
|
||||
fcntl.flock(self.fd, mode)
|
||||
self.locked = True
|
||||
return True
|
||||
except IOError:
|
||||
e = sys.exc_info()[1]
|
||||
if e.errno not in (errno.EAGAIN, errno.EACCES):
|
||||
raise
|
||||
os.close(self.fd)
|
||||
self.fd = None
|
||||
return False
|
||||
|
||||
def release(self):
|
||||
if self.fd is None:
|
||||
raise Exception("Lock was not acquired")
|
||||
|
||||
import fcntl # @UnresolvedImport
|
||||
fcntl.flock(self.fd, fcntl.LOCK_UN)
|
||||
os.close(self.fd)
|
||||
self.fd = None
|
||||
|
||||
|
||||
class MsvcrtLock(LockBase):
|
||||
"""File lock based on Windows-only msvcrt module.
|
||||
"""
|
||||
|
||||
def acquire(self, blocking=False):
|
||||
import msvcrt # @UnresolvedImport
|
||||
|
||||
flags = os.O_CREAT | os.O_WRONLY
|
||||
mode = msvcrt.LK_NBLCK
|
||||
if blocking:
|
||||
mode = msvcrt.LK_LOCK
|
||||
|
||||
self.fd = os.open(self.filename, flags)
|
||||
try:
|
||||
msvcrt.locking(self.fd, mode, 1)
|
||||
return True
|
||||
except IOError:
|
||||
e = sys.exc_info()[1]
|
||||
if e.errno not in (errno.EAGAIN, errno.EACCES, errno.EDEADLK):
|
||||
raise
|
||||
os.close(self.fd)
|
||||
self.fd = None
|
||||
return False
|
||||
|
||||
def release(self):
|
||||
import msvcrt # @UnresolvedImport
|
||||
|
||||
if self.fd is None:
|
||||
raise Exception("Lock was not acquired")
|
||||
msvcrt.locking(self.fd, msvcrt.LK_UNLCK, 1)
|
||||
os.close(self.fd)
|
||||
self.fd = None
|
||||
|
||||
|
||||
if os.name == "nt":
|
||||
FileLock = MsvcrtLock
|
||||
else:
|
||||
FileLock = FcntlLock
|
||||
84
venv/Lib/site-packages/whoosh/util/loading.py
Normal file
84
venv/Lib/site-packages/whoosh/util/loading.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# Copyright 2012 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import pickle
|
||||
|
||||
|
||||
class RenamingUnpickler(pickle.Unpickler):
|
||||
"""Subclasses ``pickle.Unpickler`` to allow remapping of class names before
|
||||
loading them.
|
||||
"""
|
||||
|
||||
def __init__(self, f, objmap, shortcuts=None):
|
||||
pickle.Unpickler.__init__(self, f)
|
||||
|
||||
if shortcuts:
|
||||
objmap = dict((k % shortcuts, v % shortcuts)
|
||||
for k, v in objmap.items())
|
||||
self._objmap = objmap
|
||||
|
||||
def find_class(self, modulename, objname):
|
||||
fqname = "%s.%s" % (modulename, objname)
|
||||
if fqname in self._objmap:
|
||||
fqname = self._objmap[fqname]
|
||||
try:
|
||||
obj = find_object(fqname)
|
||||
except ImportError:
|
||||
raise ImportError("Couldn't find %r" % fqname)
|
||||
return obj
|
||||
|
||||
|
||||
def find_object(name, blacklist=None, whitelist=None):
|
||||
"""Imports and returns an object given a fully qualified name.
|
||||
|
||||
>>> find_object("whoosh.analysis.StopFilter")
|
||||
<class 'whoosh.analysis.StopFilter'>
|
||||
"""
|
||||
|
||||
if blacklist:
|
||||
for pre in blacklist:
|
||||
if name.startswith(pre):
|
||||
raise TypeError("%r: can't instantiate names starting with %r"
|
||||
% (name, pre))
|
||||
if whitelist:
|
||||
passes = False
|
||||
for pre in whitelist:
|
||||
if name.startswith(pre):
|
||||
passes = True
|
||||
break
|
||||
if not passes:
|
||||
raise TypeError("Can't instantiate %r" % name)
|
||||
|
||||
lastdot = name.rfind(".")
|
||||
|
||||
assert lastdot > -1, "Name %r must be fully qualified" % name
|
||||
modname = name[:lastdot]
|
||||
clsname = name[lastdot + 1:]
|
||||
|
||||
mod = __import__(modname, fromlist=[clsname])
|
||||
cls = getattr(mod, clsname)
|
||||
return cls
|
||||
317
venv/Lib/site-packages/whoosh/util/numeric.py
Normal file
317
venv/Lib/site-packages/whoosh/util/numeric.py
Normal file
@@ -0,0 +1,317 @@
|
||||
# Copyright 2010 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import math, struct
|
||||
from array import array
|
||||
from bisect import bisect_left
|
||||
from struct import pack, unpack
|
||||
|
||||
from whoosh.compat import b, long_type
|
||||
from whoosh.system import pack_byte, unpack_byte, pack_ushort, unpack_ushort
|
||||
from whoosh.system import pack_int, unpack_int, pack_uint, unpack_uint
|
||||
from whoosh.system import pack_long, unpack_long, pack_ulong, unpack_ulong
|
||||
from whoosh.system import pack_float, unpack_float, pack_double, unpack_double
|
||||
|
||||
|
||||
NaN = struct.unpack("<d", b('\xff\xff\xff\xff\xff\xff\xff\xff'))[0]
|
||||
|
||||
typecode_max = {"b": 127, "B": 255, "h": 2 ** 15 - 1, "H": 2 ** 16 - 1,
|
||||
"i": 2 ** 31 - 1, "I": 2 ** 32 - 1,
|
||||
"q": 2 ** 63 - 1, "Q": 2 ** 64 - 1}
|
||||
typecode_min = {"b": 0 - 128, "B": 0, "h": 0 - 2 ** 15, "H": 0,
|
||||
"i": 0 - 2 ** 31, "I": 0,
|
||||
"q": 0 - 2 ** 63, "Q": 0}
|
||||
typecode_pack = {"B": pack_byte, "H": pack_ushort, "i": pack_int,
|
||||
"I": pack_uint, "q": pack_long, "Q": pack_ulong,
|
||||
"f": pack_float, "d": pack_double}
|
||||
typecode_unpack = {"B": unpack_byte, "H": unpack_ushort, "i": unpack_int,
|
||||
"I": unpack_uint, "q": unpack_long, "Q": unpack_ulong,
|
||||
"f": unpack_float, "d": unpack_double}
|
||||
|
||||
|
||||
# Functions related to binary representations
|
||||
|
||||
def bits_required(maxnum):
|
||||
"""Returns the number of bits required to represent the given (unsigned)
|
||||
integer.
|
||||
"""
|
||||
|
||||
return max(1, math.ceil(math.log(maxnum, 2)))
|
||||
|
||||
|
||||
def typecode_required(maxnum):
|
||||
if maxnum < 256:
|
||||
return "B"
|
||||
elif maxnum < 2 ** 16:
|
||||
return "H"
|
||||
elif maxnum < 2 ** 31 - 1:
|
||||
return "i"
|
||||
elif maxnum < 2 ** 32:
|
||||
return "I"
|
||||
elif maxnum < 2 ** 63 - 1:
|
||||
return "q"
|
||||
else:
|
||||
return "Q"
|
||||
|
||||
|
||||
def max_value(bitcount):
|
||||
"""Returns the maximum (unsigned) integer representable in the given number
|
||||
of bits.
|
||||
"""
|
||||
|
||||
return ~(~0 << bitcount)
|
||||
|
||||
|
||||
def bytes_for_bits(bitcount):
|
||||
r = int(math.ceil((bitcount + 1) / 8.0))
|
||||
return r
|
||||
|
||||
|
||||
# Functions for converting numbers to and from sortable representations
|
||||
|
||||
_istruct = struct.Struct(">i")
|
||||
_qstruct = struct.Struct(">q")
|
||||
_dstruct = struct.Struct(">d")
|
||||
_ipack, _iunpack = _istruct.pack, _istruct.unpack
|
||||
_qpack, _qunpack = _qstruct.pack, _qstruct.unpack
|
||||
_dpack, _dunpack = _dstruct.pack, _dstruct.unpack
|
||||
|
||||
|
||||
def to_sortable(numtype, intsize, signed, x):
|
||||
if numtype is int or numtype is long_type:
|
||||
if signed:
|
||||
x += (1 << intsize - 1)
|
||||
return x
|
||||
else:
|
||||
return float_to_sortable_long(x, signed)
|
||||
|
||||
|
||||
def from_sortable(numtype, intsize, signed, x):
|
||||
if numtype is int or numtype is long_type:
|
||||
if signed:
|
||||
x -= (1 << intsize - 1)
|
||||
return x
|
||||
else:
|
||||
return sortable_long_to_float(x, signed)
|
||||
|
||||
|
||||
def float_to_sortable_long(x, signed):
|
||||
x = _qunpack(_dpack(x))[0]
|
||||
if x < 0:
|
||||
x ^= 0x7fffffffffffffff
|
||||
if signed:
|
||||
x += 1 << 63
|
||||
assert x >= 0
|
||||
return x
|
||||
|
||||
|
||||
def sortable_long_to_float(x, signed):
|
||||
if signed:
|
||||
x -= 1 << 63
|
||||
if x < 0:
|
||||
x ^= 0x7fffffffffffffff
|
||||
x = _dunpack(_qpack(x))[0]
|
||||
return x
|
||||
|
||||
|
||||
# Functions for generating tiered ranges
|
||||
|
||||
def split_ranges(intsize, step, start, end):
|
||||
"""Splits a range of numbers (from ``start`` to ``end``, inclusive)
|
||||
into a sequence of trie ranges of the form ``(start, end, shift)``. The
|
||||
consumer of these tuples is expected to shift the ``start`` and ``end``
|
||||
right by ``shift``.
|
||||
|
||||
This is used for generating term ranges for a numeric field. The queries
|
||||
for the edges of the range are generated at high precision and large blocks
|
||||
in the middle are generated at low precision.
|
||||
"""
|
||||
|
||||
shift = 0
|
||||
while True:
|
||||
diff = 1 << (shift + step)
|
||||
mask = ((1 << step) - 1) << shift
|
||||
setbits = lambda x: x | ((1 << shift) - 1)
|
||||
|
||||
haslower = (start & mask) != 0
|
||||
hasupper = (end & mask) != mask
|
||||
|
||||
not_mask = ~mask & ((1 << intsize + 1) - 1)
|
||||
nextstart = (start + diff if haslower else start) & not_mask
|
||||
nextend = (end - diff if hasupper else end) & not_mask
|
||||
|
||||
if shift + step >= intsize or nextstart > nextend:
|
||||
yield (start, setbits(end), shift)
|
||||
break
|
||||
|
||||
if haslower:
|
||||
yield (start, setbits(start | mask), shift)
|
||||
if hasupper:
|
||||
yield (end & not_mask, setbits(end), shift)
|
||||
|
||||
start = nextstart
|
||||
end = nextend
|
||||
shift += step
|
||||
|
||||
|
||||
def tiered_ranges(numtype, intsize, signed, start, end, shift_step,
|
||||
startexcl, endexcl):
|
||||
assert numtype in (int, float)
|
||||
assert intsize in (8, 16, 32, 64)
|
||||
|
||||
# Convert start and end values to sortable ints
|
||||
if start is None:
|
||||
start = 0
|
||||
else:
|
||||
start = to_sortable(numtype, intsize, signed, start)
|
||||
if startexcl:
|
||||
start += 1
|
||||
|
||||
if end is None:
|
||||
end = 2 ** intsize - 1
|
||||
else:
|
||||
end = to_sortable(numtype, intsize, signed, end)
|
||||
if endexcl:
|
||||
end -= 1
|
||||
|
||||
if not shift_step:
|
||||
return ((start, end, 0),)
|
||||
|
||||
# Yield (rstart, rend, shift) ranges for the different resolutions
|
||||
return split_ranges(intsize, shift_step, start, end)
|
||||
|
||||
|
||||
# Float-to-byte encoding/decoding
|
||||
|
||||
def float_to_byte(value, mantissabits=5, zeroexp=2):
|
||||
"""Encodes a floating point number in a single byte.
|
||||
"""
|
||||
|
||||
# Assume int size == float size
|
||||
|
||||
fzero = (63 - zeroexp) << mantissabits
|
||||
bits = unpack("i", pack("f", value))[0]
|
||||
smallfloat = bits >> (24 - mantissabits)
|
||||
if smallfloat < fzero:
|
||||
# Map negative numbers and 0 to 0
|
||||
# Map underflow to next smallest non-zero number
|
||||
if bits <= 0:
|
||||
result = chr(0)
|
||||
else:
|
||||
result = chr(1)
|
||||
elif smallfloat >= fzero + 0x100:
|
||||
# Map overflow to largest number
|
||||
result = chr(255)
|
||||
else:
|
||||
result = chr(smallfloat - fzero)
|
||||
return b(result)
|
||||
|
||||
|
||||
def byte_to_float(b, mantissabits=5, zeroexp=2):
|
||||
"""Decodes a floating point number stored in a single byte.
|
||||
"""
|
||||
if type(b) is not int:
|
||||
b = ord(b)
|
||||
if b == 0:
|
||||
return 0.0
|
||||
|
||||
bits = (b & 0xff) << (24 - mantissabits)
|
||||
bits += (63 - zeroexp) << 24
|
||||
return unpack("f", pack("i", bits))[0]
|
||||
|
||||
|
||||
# Length-to-byte approximation functions
|
||||
|
||||
# Old implementation:
|
||||
|
||||
#def length_to_byte(length):
|
||||
# """Returns a logarithmic approximation of the given number, in the range
|
||||
# 0-255. The approximation has high precision at the low end (e.g.
|
||||
# 1 -> 0, 2 -> 1, 3 -> 2 ...) and low precision at the high end. Numbers
|
||||
# equal to or greater than 108116 all approximate to 255.
|
||||
#
|
||||
# This is useful for storing field lengths, where the general case is small
|
||||
# documents and very large documents are more rare.
|
||||
# """
|
||||
#
|
||||
# # This encoding formula works up to 108116 -> 255, so if the length is
|
||||
# # equal to or greater than that limit, just return 255.
|
||||
# if length >= 108116:
|
||||
# return 255
|
||||
#
|
||||
# # The parameters of this formula where chosen heuristically so that low
|
||||
# # numbers would approximate closely, and the byte range 0-255 would cover
|
||||
# # a decent range of document lengths (i.e. 1 to ~100000).
|
||||
# return int(round(log((length / 27.0) + 1, 1.033)))
|
||||
#def _byte_to_length(n):
|
||||
# return int(round((pow(1.033, n) - 1) * 27))
|
||||
#_b2l_cache = array("i", (_byte_to_length(i) for i in xrange(256)))
|
||||
#byte_to_length = _b2l_cache.__getitem__
|
||||
|
||||
# New implementation
|
||||
|
||||
# Instead of computing the actual formula to get the byte for any given length,
|
||||
# precompute the length associated with each byte, and use bisect to find the
|
||||
# nearest value. This gives quite a large speed-up.
|
||||
#
|
||||
# Note that this does not give all the same answers as the old, "real"
|
||||
# implementation since this implementation always "rounds down" (thanks to the
|
||||
# bisect_left) while the old implementation would "round up" or "round down"
|
||||
# depending on the input. Since this is a fairly gross approximation anyway,
|
||||
# I don't think it matters much.
|
||||
|
||||
# Values generated using the formula from the "old" implementation above
|
||||
_length_byte_cache = array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14,
|
||||
16, 17, 18, 20, 21, 23, 25, 26, 28, 30, 32, 34, 36, 38, 40, 42, 45, 47, 49, 52,
|
||||
54, 57, 60, 63, 66, 69, 72, 75, 79, 82, 86, 89, 93, 97, 101, 106, 110, 114,
|
||||
119, 124, 129, 134, 139, 145, 150, 156, 162, 169, 175, 182, 189, 196, 203, 211,
|
||||
219, 227, 235, 244, 253, 262, 271, 281, 291, 302, 313, 324, 336, 348, 360, 373,
|
||||
386, 399, 414, 428, 443, 459, 475, 491, 508, 526, 544, 563, 583, 603, 623, 645,
|
||||
667, 690, 714, 738, 763, 789, 816, 844, 873, 903, 933, 965, 998, 1032, 1066,
|
||||
1103, 1140, 1178, 1218, 1259, 1302, 1345, 1391, 1438, 1486, 1536, 1587, 1641,
|
||||
1696, 1753, 1811, 1872, 1935, 1999, 2066, 2135, 2207, 2280, 2356, 2435, 2516,
|
||||
2600, 2687, 2777, 2869, 2965, 3063, 3165, 3271, 3380, 3492, 3608, 3728, 3852,
|
||||
3980, 4112, 4249, 4390, 4536, 4686, 4842, 5002, 5168, 5340, 5517, 5700, 5889,
|
||||
6084, 6286, 6494, 6709, 6932, 7161, 7398, 7643, 7897, 8158, 8428, 8707, 8995,
|
||||
9293, 9601, 9918, 10247, 10586, 10936, 11298, 11671, 12057, 12456, 12868,
|
||||
13294, 13733, 14187, 14656, 15141, 15641, 16159, 16693, 17244, 17814, 18403,
|
||||
19011, 19640, 20289, 20959, 21652, 22367, 23106, 23869, 24658, 25472, 26314,
|
||||
27183, 28081, 29009, 29967, 30957, 31979, 33035, 34126, 35254, 36418, 37620,
|
||||
38863, 40146, 41472, 42841, 44256, 45717, 47227, 48786, 50397, 52061, 53780,
|
||||
55556, 57390, 59285, 61242, 63264, 65352, 67510, 69739, 72041, 74419, 76876,
|
||||
79414, 82035, 84743, 87541, 90430, 93416, 96499, 99684, 102975, 106374])
|
||||
|
||||
|
||||
def length_to_byte(length):
|
||||
if length is None:
|
||||
return 0
|
||||
if length >= 106374:
|
||||
return 255
|
||||
else:
|
||||
return bisect_left(_length_byte_cache, length)
|
||||
|
||||
byte_to_length = _length_byte_cache.__getitem__
|
||||
373
venv/Lib/site-packages/whoosh/util/numlists.py
Normal file
373
venv/Lib/site-packages/whoosh/util/numlists.py
Normal file
@@ -0,0 +1,373 @@
|
||||
from array import array
|
||||
|
||||
from whoosh.compat import xrange
|
||||
from whoosh.system import emptybytes
|
||||
from whoosh.system import pack_byte, unpack_byte
|
||||
from whoosh.system import pack_ushort_le, unpack_ushort_le
|
||||
from whoosh.system import pack_uint_le, unpack_uint_le
|
||||
|
||||
|
||||
def delta_encode(nums):
|
||||
base = 0
|
||||
for n in nums:
|
||||
yield n - base
|
||||
base = n
|
||||
|
||||
|
||||
def delta_decode(nums):
|
||||
base = 0
|
||||
for n in nums:
|
||||
base += n
|
||||
yield base
|
||||
|
||||
|
||||
class GrowableArray(object):
|
||||
def __init__(self, inittype="B", allow_longs=True):
|
||||
self.array = array(inittype)
|
||||
self._allow_longs = allow_longs
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r)" % (self.__class__.__name__, self.array)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.array)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.array)
|
||||
|
||||
def _retype(self, maxnum):
|
||||
if maxnum < 2 ** 16:
|
||||
newtype = "H"
|
||||
elif maxnum < 2 ** 31:
|
||||
newtype = "i"
|
||||
elif maxnum < 2 ** 32:
|
||||
newtype = "I"
|
||||
elif self._allow_longs:
|
||||
newtype = "q"
|
||||
else:
|
||||
raise OverflowError("%r is too big to fit in an array" % maxnum)
|
||||
|
||||
try:
|
||||
self.array = array(newtype, iter(self.array))
|
||||
except ValueError:
|
||||
self.array = list(self.array)
|
||||
|
||||
def append(self, n):
|
||||
try:
|
||||
self.array.append(n)
|
||||
except OverflowError:
|
||||
self._retype(n)
|
||||
self.array.append(n)
|
||||
|
||||
def extend(self, ns):
|
||||
append = self.append
|
||||
for n in ns:
|
||||
append(n)
|
||||
|
||||
@property
|
||||
def typecode(self):
|
||||
if isinstance(self.array, array):
|
||||
return self.array.typecode
|
||||
else:
|
||||
return "q"
|
||||
|
||||
def to_file(self, dbfile):
|
||||
if isinstance(self.array, array):
|
||||
dbfile.write_array(self.array)
|
||||
else:
|
||||
write_long = dbfile.write_long
|
||||
for n in self.array:
|
||||
write_long(n)
|
||||
|
||||
|
||||
# Number list encoding base class
|
||||
|
||||
class NumberEncoding(object):
|
||||
maxint = None
|
||||
|
||||
def write_nums(self, f, numbers):
|
||||
raise NotImplementedError
|
||||
|
||||
def read_nums(self, f, n):
|
||||
raise NotImplementedError
|
||||
|
||||
def write_deltas(self, f, numbers):
|
||||
return self.write_nums(f, list(delta_encode(numbers)))
|
||||
|
||||
def read_deltas(self, f, n):
|
||||
return delta_decode(self.read_nums(f, n))
|
||||
|
||||
def get(self, f, pos, i):
|
||||
f.seek(pos)
|
||||
n = None
|
||||
for n in self.read_nums(f, i + 1):
|
||||
pass
|
||||
return n
|
||||
|
||||
|
||||
# Fixed width encodings
|
||||
|
||||
class FixedEncoding(NumberEncoding):
|
||||
_encode = None
|
||||
_decode = None
|
||||
size = None
|
||||
|
||||
def write_nums(self, f, numbers):
|
||||
_encode = self._encode
|
||||
|
||||
for n in numbers:
|
||||
f.write(_encode(n))
|
||||
|
||||
def read_nums(self, f, n):
|
||||
_decode = self._decode
|
||||
|
||||
for _ in xrange(n):
|
||||
yield _decode(f.read(self.size))
|
||||
|
||||
def get(self, f, pos, i):
|
||||
f.seek(pos + i * self.size)
|
||||
return self._decode(f.read(self.size))
|
||||
|
||||
|
||||
class ByteEncoding(FixedEncoding):
|
||||
size = 1
|
||||
maxint = 255
|
||||
_encode = pack_byte
|
||||
_decode = unpack_byte
|
||||
|
||||
|
||||
class UShortEncoding(FixedEncoding):
|
||||
size = 2
|
||||
maxint = 2 ** 16 - 1
|
||||
_encode = pack_ushort_le
|
||||
_decode = unpack_ushort_le
|
||||
|
||||
|
||||
class UIntEncoding(FixedEncoding):
|
||||
size = 4
|
||||
maxint = 2 ** 32 - 1
|
||||
_encode = pack_uint_le
|
||||
_decode = unpack_uint_le
|
||||
|
||||
|
||||
# High-bit encoded variable-length integer
|
||||
|
||||
class Varints(NumberEncoding):
|
||||
maxint = None
|
||||
|
||||
def write_nums(self, f, numbers):
|
||||
for n in numbers:
|
||||
f.write_varint(n)
|
||||
|
||||
def read_nums(self, f, n):
|
||||
for _ in xrange(n):
|
||||
yield f.read_varint()
|
||||
|
||||
|
||||
# Simple16 algorithm for storing arrays of positive integers (usually delta
|
||||
# encoded lists of sorted integers)
|
||||
#
|
||||
# 1. http://www2008.org/papers/pdf/p387-zhangA.pdf
|
||||
# 2. http://www2009.org/proceedings/pdf/p401.pdf
|
||||
|
||||
class Simple16(NumberEncoding):
|
||||
# The maximum possible integer value Simple16 can encode is < 2^28.
|
||||
# Therefore, in order to use Simple16, the application must have its own
|
||||
# code to encode numbers in the range of [2^28, 2^32). A simple way is just
|
||||
# write those numbers as 32-bit integers (that is, no compression for very
|
||||
# big numbers).
|
||||
_numsize = 16
|
||||
_bitsize = 28
|
||||
maxint = 2 ** _bitsize - 1
|
||||
|
||||
# Number of stored numbers per code
|
||||
_num = [28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1]
|
||||
# Number of bits for each number per code
|
||||
_bits = [
|
||||
(1,) * 28,
|
||||
(2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
|
||||
(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1),
|
||||
(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2),
|
||||
(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
|
||||
(4, 3, 3, 3, 3, 3, 3, 3, 3),
|
||||
(3, 4, 4, 4, 4, 3, 3, 3),
|
||||
(4, 4, 4, 4, 4, 4, 4),
|
||||
(5, 5, 5, 5, 4, 4),
|
||||
(4, 4, 5, 5, 5, 5),
|
||||
(6, 6, 6, 5, 5),
|
||||
(5, 5, 6, 6, 6),
|
||||
(7, 7, 7, 7),
|
||||
(10, 9, 9),
|
||||
(14, 14),
|
||||
(28,),
|
||||
]
|
||||
|
||||
def write_nums(self, f, numbers):
|
||||
_compress = self._compress
|
||||
|
||||
i = 0
|
||||
while i < len(numbers):
|
||||
value, taken = _compress(numbers, i, len(numbers) - i)
|
||||
f.write_uint_le(value)
|
||||
i += taken
|
||||
|
||||
def _compress(self, inarray, inoffset, n):
|
||||
_numsize = self._numsize
|
||||
_bitsize = self._bitsize
|
||||
_num = self._num
|
||||
_bits = self._bits
|
||||
|
||||
for key in xrange(_numsize):
|
||||
value = key << _bitsize
|
||||
num = _num[key] if _num[key] < n else n
|
||||
bits = 0
|
||||
|
||||
j = 0
|
||||
while j < num and inarray[inoffset + j] < (1 << _bits[key][j]):
|
||||
x = inarray[inoffset + j]
|
||||
value |= x << bits
|
||||
bits += _bits[key][j]
|
||||
j += 1
|
||||
|
||||
if j == num:
|
||||
return value, num
|
||||
|
||||
raise Exception
|
||||
|
||||
def read_nums(self, f, n):
|
||||
_decompress = self._decompress
|
||||
|
||||
i = 0
|
||||
while i < n:
|
||||
value = unpack_uint_le(f.read(4))[0]
|
||||
for v in _decompress(value, n - i):
|
||||
yield v
|
||||
i += 1
|
||||
|
||||
def _decompress(self, value, n):
|
||||
_numsize = self._numsize
|
||||
_bitsize = self._bitsize
|
||||
_num = self._num
|
||||
_bits = self._bits
|
||||
|
||||
key = value >> _bitsize
|
||||
num = _num[key] if _num[key] < n else n
|
||||
bits = 0
|
||||
for j in xrange(num):
|
||||
v = value >> bits
|
||||
yield v & (0xffffffff >> (32 - _bits[key][j]))
|
||||
bits += _bits[key][j]
|
||||
|
||||
def get(self, f, pos, i):
|
||||
f.seek(pos)
|
||||
base = 0
|
||||
value = unpack_uint_le(f.read(4))
|
||||
key = value >> self._bitsize
|
||||
num = self._num[key]
|
||||
while i > base + num:
|
||||
base += num
|
||||
value = unpack_uint_le(f.read(4))
|
||||
key = value >> self._bitsize
|
||||
num = self._num[key]
|
||||
|
||||
offset = i - base
|
||||
if offset:
|
||||
value = value >> sum(self._bits[key][:offset])
|
||||
return value & (2 ** self._bits[key][offset] - 1)
|
||||
|
||||
|
||||
# Google Packed Ints algorithm: a set of four numbers is preceded by a "key"
|
||||
# byte, which encodes how many bytes each of the next four integers use
|
||||
# (stored in the byte as four 2-bit numbers)
|
||||
|
||||
class GInts(NumberEncoding):
|
||||
maxint = 2 ** 32 - 1
|
||||
|
||||
# Number of future bytes to expect after a "key" byte value of N -- used to
|
||||
# skip ahead from a key byte
|
||||
_lens = array("B", [4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 5, 6,
|
||||
7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9,
|
||||
10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11,
|
||||
12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7,
|
||||
8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
|
||||
11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11,
|
||||
12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9,
|
||||
10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
|
||||
12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
|
||||
13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11,
|
||||
12, 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
|
||||
11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11,
|
||||
12, 13, 14, 12, 13, 14, 15, 13, 14, 15, 16])
|
||||
|
||||
def key_to_sizes(self, key):
|
||||
"""Returns a list of the sizes of the next four numbers given a key
|
||||
byte.
|
||||
"""
|
||||
|
||||
return [(key >> (i * 2) & 3) + 1 for i in xrange(4)]
|
||||
|
||||
def write_nums(self, f, numbers):
|
||||
buf = emptybytes
|
||||
count = 0
|
||||
key = 0
|
||||
for v in numbers:
|
||||
shift = count * 2
|
||||
if v < 256:
|
||||
buf += pack_byte(v)
|
||||
elif v < 65536:
|
||||
key |= 1 << shift
|
||||
buf += pack_ushort_le(v)
|
||||
elif v < 16777216:
|
||||
key |= 2 << shift
|
||||
buf += pack_uint_le(v)[:3]
|
||||
else:
|
||||
key |= 3 << shift
|
||||
buf += pack_uint_le(v)
|
||||
|
||||
count += 1
|
||||
if count == 4:
|
||||
f.write_byte(key)
|
||||
f.write(buf)
|
||||
count = 0
|
||||
key = 0
|
||||
buf = emptybytes # Clear the buffer
|
||||
|
||||
# Write out leftovers in the buffer
|
||||
if count:
|
||||
f.write_byte(key)
|
||||
f.write(buf)
|
||||
|
||||
def read_nums(self, f, n):
|
||||
"""Read N integers from the bytes stream dbfile. Expects that the file
|
||||
is positioned at a key byte.
|
||||
"""
|
||||
|
||||
count = 0
|
||||
key = None
|
||||
for _ in xrange(n):
|
||||
if count == 0:
|
||||
key = f.read_byte()
|
||||
code = key >> (count * 2) & 3
|
||||
if code == 0:
|
||||
yield f.read_byte()
|
||||
elif code == 1:
|
||||
yield f.read_ushort_le()
|
||||
elif code == 2:
|
||||
yield unpack_uint_le(f.read(3) + "\x00")[0]
|
||||
else:
|
||||
yield f.read_uint_le()
|
||||
|
||||
count = (count + 1) % 4
|
||||
|
||||
# def get(self, f, pos, i):
|
||||
# f.seek(pos)
|
||||
# base = 0
|
||||
# key = f.read_byte()
|
||||
# while i > base + 4:
|
||||
# base += 4
|
||||
# f.seek(self._lens[key], 1)
|
||||
# key = f.read_byte()
|
||||
#
|
||||
# for n in self.read_nums(f, (i + 1) - base):
|
||||
# pass
|
||||
# return n
|
||||
130
venv/Lib/site-packages/whoosh/util/testing.py
Normal file
130
venv/Lib/site-packages/whoosh/util/testing.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# Copyright 2007 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import os.path
|
||||
import random
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
from contextlib import contextmanager
|
||||
|
||||
from whoosh.filedb.filestore import FileStorage
|
||||
from whoosh.util import now, random_name
|
||||
|
||||
|
||||
class TempDir(object):
|
||||
def __init__(self, basename="", parentdir=None, ext=".whoosh",
|
||||
suppress=frozenset(), keepdir=False):
|
||||
self.basename = basename or random_name(8)
|
||||
self.parentdir = parentdir
|
||||
|
||||
dirname = parentdir or tempfile.mkdtemp(ext, self.basename)
|
||||
self.dir = os.path.abspath(dirname)
|
||||
self.suppress = suppress
|
||||
self.keepdir = keepdir
|
||||
|
||||
def __enter__(self):
|
||||
if not os.path.exists(self.dir):
|
||||
os.makedirs(self.dir)
|
||||
return self.dir
|
||||
|
||||
def cleanup(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.cleanup()
|
||||
if not self.keepdir:
|
||||
try:
|
||||
shutil.rmtree(self.dir)
|
||||
except OSError:
|
||||
e = sys.exc_info()[1]
|
||||
#sys.stderr.write("Can't remove temp dir: " + str(e) + "\n")
|
||||
#if exc_type is None:
|
||||
# raise
|
||||
|
||||
if exc_type is not None:
|
||||
if self.keepdir:
|
||||
sys.stderr.write("Temp dir=" + self.dir + "\n")
|
||||
if exc_type not in self.suppress:
|
||||
return False
|
||||
|
||||
|
||||
class TempStorage(TempDir):
|
||||
def __init__(self, debug=False, **kwargs):
|
||||
TempDir.__init__(self, **kwargs)
|
||||
self._debug = debug
|
||||
|
||||
def cleanup(self):
|
||||
self.store.close()
|
||||
|
||||
def __enter__(self):
|
||||
dirpath = TempDir.__enter__(self)
|
||||
self.store = FileStorage(dirpath, debug=self._debug)
|
||||
return self.store
|
||||
|
||||
|
||||
class TempIndex(TempStorage):
|
||||
def __init__(self, schema, ixname='', storage_debug=False, **kwargs):
|
||||
TempStorage.__init__(self, basename=ixname, debug=storage_debug,
|
||||
**kwargs)
|
||||
self.schema = schema
|
||||
|
||||
def __enter__(self):
|
||||
fstore = TempStorage.__enter__(self)
|
||||
return fstore.create_index(self.schema, indexname=self.basename)
|
||||
|
||||
|
||||
def is_abstract_method(attr):
|
||||
"""Returns True if the given object has __isabstractmethod__ == True.
|
||||
"""
|
||||
|
||||
return (hasattr(attr, "__isabstractmethod__")
|
||||
and getattr(attr, "__isabstractmethod__"))
|
||||
|
||||
|
||||
def check_abstract_methods(base, subclass):
|
||||
"""Raises AssertionError if ``subclass`` does not override a method on
|
||||
``base`` that is marked as an abstract method.
|
||||
"""
|
||||
|
||||
for attrname in dir(base):
|
||||
if attrname.startswith("_"):
|
||||
continue
|
||||
attr = getattr(base, attrname)
|
||||
if is_abstract_method(attr):
|
||||
oattr = getattr(subclass, attrname)
|
||||
if is_abstract_method(oattr):
|
||||
raise Exception("%s.%s not overridden"
|
||||
% (subclass.__name__, attrname))
|
||||
|
||||
|
||||
@contextmanager
|
||||
def timing(name=None):
|
||||
t = now()
|
||||
yield
|
||||
t = now() - t
|
||||
print("%s: %0.06f s" % (name or '', t))
|
||||
132
venv/Lib/site-packages/whoosh/util/text.py
Normal file
132
venv/Lib/site-packages/whoosh/util/text.py
Normal file
@@ -0,0 +1,132 @@
|
||||
# Copyright 2007 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import codecs, re
|
||||
|
||||
from whoosh.compat import string_type, u, byte
|
||||
|
||||
|
||||
# Note: these functions return a tuple of (text, length), so when you call
|
||||
# them, you have to add [0] on the end, e.g. str = utf8encode(unicode)[0]
|
||||
|
||||
utf8encode = codecs.getencoder("utf-8")
|
||||
utf8decode = codecs.getdecoder("utf-8")
|
||||
|
||||
|
||||
# Prefix encoding functions
|
||||
|
||||
def first_diff(a, b):
|
||||
"""
|
||||
Returns the position of the first differing character in the sequences a
|
||||
and b. For example, first_diff('render', 'rending') == 4. This function
|
||||
limits the return value to 255 so the difference can be encoded in a single
|
||||
byte.
|
||||
"""
|
||||
|
||||
i = 0
|
||||
while i <= 255 and i < len(a) and i < len(b) and a[i] == b[i]:
|
||||
i += 1
|
||||
return i
|
||||
|
||||
|
||||
def prefix_encode(a, b):
|
||||
"""
|
||||
Compresses bytestring b as a byte representing the prefix it shares with a,
|
||||
followed by the suffix bytes.
|
||||
"""
|
||||
|
||||
i = first_diff(a, b)
|
||||
return byte(i) + b[i:]
|
||||
|
||||
|
||||
def prefix_encode_all(ls):
|
||||
"""Compresses the given list of (unicode) strings by storing each string
|
||||
(except the first one) as an integer (encoded in a byte) representing
|
||||
the prefix it shares with its predecessor, followed by the suffix encoded
|
||||
as UTF-8.
|
||||
"""
|
||||
|
||||
last = u('')
|
||||
for w in ls:
|
||||
i = first_diff(last, w)
|
||||
yield chr(i) + w[i:].encode("utf-8")
|
||||
last = w
|
||||
|
||||
|
||||
def prefix_decode_all(ls):
|
||||
"""Decompresses a list of strings compressed by prefix_encode().
|
||||
"""
|
||||
|
||||
last = u('')
|
||||
for w in ls:
|
||||
i = ord(w[0])
|
||||
decoded = last[:i] + w[1:].decode("utf-8")
|
||||
yield decoded
|
||||
last = decoded
|
||||
|
||||
|
||||
# Natural key sorting function
|
||||
|
||||
_nkre = re.compile(r"\D+|\d+", re.UNICODE)
|
||||
|
||||
|
||||
def _nkconv(i):
|
||||
try:
|
||||
return int(i)
|
||||
except ValueError:
|
||||
return i.lower()
|
||||
|
||||
|
||||
def natural_key(s):
|
||||
"""Converts string ``s`` into a tuple that will sort "naturally" (i.e.,
|
||||
``name5`` will come before ``name10`` and ``1`` will come before ``A``).
|
||||
This function is designed to be used as the ``key`` argument to sorting
|
||||
functions.
|
||||
|
||||
:param s: the str/unicode string to convert.
|
||||
:rtype: tuple
|
||||
"""
|
||||
|
||||
# Use _nkre to split the input string into a sequence of
|
||||
# digit runs and non-digit runs. Then use _nkconv() to convert
|
||||
# the digit runs into ints and the non-digit runs to lowercase.
|
||||
return tuple(_nkconv(m) for m in _nkre.findall(s))
|
||||
|
||||
|
||||
# Regular expression functions
|
||||
|
||||
def rcompile(pattern, flags=0, verbose=False):
|
||||
"""A wrapper for re.compile that checks whether "pattern" is a regex object
|
||||
or a string to be compiled, and automatically adds the re.UNICODE flag.
|
||||
"""
|
||||
|
||||
if not isinstance(pattern, string_type):
|
||||
# If it's not a string, assume it's already a compiled pattern
|
||||
return pattern
|
||||
if verbose:
|
||||
flags |= re.VERBOSE
|
||||
return re.compile(pattern, re.UNICODE | flags)
|
||||
467
venv/Lib/site-packages/whoosh/util/times.py
Normal file
467
venv/Lib/site-packages/whoosh/util/times.py
Normal file
@@ -0,0 +1,467 @@
|
||||
# Copyright 2010 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
import calendar
|
||||
import copy
|
||||
from datetime import date, datetime, timedelta
|
||||
|
||||
from whoosh.compat import iteritems
|
||||
|
||||
|
||||
class TimeError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def relative_days(current_wday, wday, dir):
|
||||
"""Returns the number of days (positive or negative) to the "next" or
|
||||
"last" of a certain weekday. ``current_wday`` and ``wday`` are numbers,
|
||||
i.e. 0 = monday, 1 = tuesday, 2 = wednesday, etc.
|
||||
|
||||
>>> # Get the number of days to the next tuesday, if today is Sunday
|
||||
>>> relative_days(6, 1, 1)
|
||||
2
|
||||
|
||||
:param current_wday: the number of the current weekday.
|
||||
:param wday: the target weekday.
|
||||
:param dir: -1 for the "last" (past) weekday, 1 for the "next" (future)
|
||||
weekday.
|
||||
"""
|
||||
|
||||
if current_wday == wday:
|
||||
return 7 * dir
|
||||
|
||||
if dir == 1:
|
||||
return (wday + 7 - current_wday) % 7
|
||||
else:
|
||||
return (current_wday + 7 - wday) % 7 * -1
|
||||
|
||||
|
||||
def timedelta_to_usecs(td):
|
||||
total = td.days * 86400000000 # Microseconds in a day
|
||||
total += td.seconds * 1000000 # Microseconds in a second
|
||||
total += td.microseconds
|
||||
return total
|
||||
|
||||
|
||||
def datetime_to_long(dt):
|
||||
"""Converts a datetime object to a long integer representing the number
|
||||
of microseconds since ``datetime.min``.
|
||||
"""
|
||||
|
||||
return timedelta_to_usecs(dt.replace(tzinfo=None) - dt.min)
|
||||
|
||||
|
||||
def long_to_datetime(x):
|
||||
"""Converts a long integer representing the number of microseconds since
|
||||
``datetime.min`` to a datetime object.
|
||||
"""
|
||||
|
||||
days = x // 86400000000 # Microseconds in a day
|
||||
x -= days * 86400000000
|
||||
|
||||
seconds = x // 1000000 # Microseconds in a second
|
||||
x -= seconds * 1000000
|
||||
|
||||
return datetime.min + timedelta(days=days, seconds=seconds, microseconds=x)
|
||||
|
||||
|
||||
# Ambiguous datetime object
|
||||
|
||||
class adatetime(object):
|
||||
"""An "ambiguous" datetime object. This object acts like a
|
||||
``datetime.datetime`` object but can have any of its attributes set to
|
||||
None, meaning unspecified.
|
||||
"""
|
||||
|
||||
units = frozenset(("year", "month", "day", "hour", "minute", "second",
|
||||
"microsecond"))
|
||||
|
||||
def __init__(self, year=None, month=None, day=None, hour=None, minute=None,
|
||||
second=None, microsecond=None):
|
||||
if isinstance(year, datetime):
|
||||
dt = year
|
||||
self.year, self.month, self.day = dt.year, dt.month, dt.day
|
||||
self.hour, self.minute, self.second = dt.hour, dt.minute, dt.second
|
||||
self.microsecond = dt.microsecond
|
||||
else:
|
||||
if month is not None and (month < 1 or month > 12):
|
||||
raise TimeError("month must be in 1..12")
|
||||
|
||||
if day is not None and day < 1:
|
||||
raise TimeError("day must be greater than 1")
|
||||
if (year is not None and month is not None and day is not None
|
||||
and day > calendar.monthrange(year, month)[1]):
|
||||
raise TimeError("day is out of range for month")
|
||||
|
||||
if hour is not None and (hour < 0 or hour > 23):
|
||||
raise TimeError("hour must be in 0..23")
|
||||
if minute is not None and (minute < 0 or minute > 59):
|
||||
raise TimeError("minute must be in 0..59")
|
||||
if second is not None and (second < 0 or second > 59):
|
||||
raise TimeError("second must be in 0..59")
|
||||
if microsecond is not None and (microsecond < 0
|
||||
or microsecond > 999999):
|
||||
raise TimeError("microsecond must be in 0..999999")
|
||||
|
||||
self.year, self.month, self.day = year, month, day
|
||||
self.hour, self.minute, self.second = hour, minute, second
|
||||
self.microsecond = microsecond
|
||||
|
||||
def __eq__(self, other):
|
||||
if not other.__class__ is self.__class__:
|
||||
if not is_ambiguous(self) and isinstance(other, datetime):
|
||||
return fix(self) == other
|
||||
else:
|
||||
return False
|
||||
return all(getattr(self, unit) == getattr(other, unit)
|
||||
for unit in self.units)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s%r" % (self.__class__.__name__, self.tuple())
|
||||
|
||||
def tuple(self):
|
||||
"""Returns the attributes of the ``adatetime`` object as a tuple of
|
||||
``(year, month, day, hour, minute, second, microsecond)``.
|
||||
"""
|
||||
|
||||
return (self.year, self.month, self.day, self.hour, self.minute,
|
||||
self.second, self.microsecond)
|
||||
|
||||
def date(self):
|
||||
return date(self.year, self.month, self.day)
|
||||
|
||||
def copy(self):
|
||||
return adatetime(year=self.year, month=self.month, day=self.day,
|
||||
hour=self.hour, minute=self.minute, second=self.second,
|
||||
microsecond=self.microsecond)
|
||||
|
||||
def replace(self, **kwargs):
|
||||
"""Returns a copy of this object with the attributes given as keyword
|
||||
arguments replaced.
|
||||
|
||||
>>> adt = adatetime(year=2009, month=10, day=31)
|
||||
>>> adt.replace(year=2010)
|
||||
(2010, 10, 31, None, None, None, None)
|
||||
"""
|
||||
|
||||
newadatetime = self.copy()
|
||||
for key, value in iteritems(kwargs):
|
||||
if key in self.units:
|
||||
setattr(newadatetime, key, value)
|
||||
else:
|
||||
raise KeyError("Unknown argument %r" % key)
|
||||
return newadatetime
|
||||
|
||||
def floor(self):
|
||||
"""Returns a ``datetime`` version of this object with all unspecified
|
||||
(None) attributes replaced by their lowest values.
|
||||
|
||||
This method raises an error if the ``adatetime`` object has no year.
|
||||
|
||||
>>> adt = adatetime(year=2009, month=5)
|
||||
>>> adt.floor()
|
||||
datetime.datetime(2009, 5, 1, 0, 0, 0, 0)
|
||||
"""
|
||||
|
||||
y, m, d, h, mn, s, ms = (self.year, self.month, self.day, self.hour,
|
||||
self.minute, self.second, self.microsecond)
|
||||
|
||||
if y is None:
|
||||
raise ValueError("Date has no year")
|
||||
|
||||
if m is None:
|
||||
m = 1
|
||||
if d is None:
|
||||
d = 1
|
||||
if h is None:
|
||||
h = 0
|
||||
if mn is None:
|
||||
mn = 0
|
||||
if s is None:
|
||||
s = 0
|
||||
if ms is None:
|
||||
ms = 0
|
||||
return datetime(y, m, d, h, mn, s, ms)
|
||||
|
||||
def ceil(self):
|
||||
"""Returns a ``datetime`` version of this object with all unspecified
|
||||
(None) attributes replaced by their highest values.
|
||||
|
||||
This method raises an error if the ``adatetime`` object has no year.
|
||||
|
||||
>>> adt = adatetime(year=2009, month=5)
|
||||
>>> adt.floor()
|
||||
datetime.datetime(2009, 5, 30, 23, 59, 59, 999999)
|
||||
"""
|
||||
|
||||
y, m, d, h, mn, s, ms = (self.year, self.month, self.day, self.hour,
|
||||
self.minute, self.second, self.microsecond)
|
||||
|
||||
if y is None:
|
||||
raise ValueError("Date has no year")
|
||||
|
||||
if m is None:
|
||||
m = 12
|
||||
if d is None:
|
||||
d = calendar.monthrange(y, m)[1]
|
||||
if h is None:
|
||||
h = 23
|
||||
if mn is None:
|
||||
mn = 59
|
||||
if s is None:
|
||||
s = 59
|
||||
if ms is None:
|
||||
ms = 999999
|
||||
return datetime(y, m, d, h, mn, s, ms)
|
||||
|
||||
def disambiguated(self, basedate):
|
||||
"""Returns either a ``datetime`` or unambiguous ``timespan`` version
|
||||
of this object.
|
||||
|
||||
Unless this ``adatetime`` object is full specified down to the
|
||||
microsecond, this method will return a timespan built from the "floor"
|
||||
and "ceil" of this object.
|
||||
|
||||
This method raises an error if the ``adatetime`` object has no year.
|
||||
|
||||
>>> adt = adatetime(year=2009, month=10, day=31)
|
||||
>>> adt.disambiguated()
|
||||
timespan(datetime(2009, 10, 31, 0, 0, 0, 0), datetime(2009, 10, 31, 23, 59 ,59, 999999)
|
||||
"""
|
||||
|
||||
dt = self
|
||||
if not is_ambiguous(dt):
|
||||
return fix(dt)
|
||||
return timespan(dt, dt).disambiguated(basedate)
|
||||
|
||||
|
||||
# Time span class
|
||||
|
||||
class timespan(object):
|
||||
"""A span of time between two ``datetime`` or ``adatetime`` objects.
|
||||
"""
|
||||
|
||||
def __init__(self, start, end):
|
||||
"""
|
||||
:param start: a ``datetime`` or ``adatetime`` object representing the
|
||||
start of the time span.
|
||||
:param end: a ``datetime`` or ``adatetime`` object representing the
|
||||
end of the time span.
|
||||
"""
|
||||
|
||||
if not isinstance(start, (datetime, adatetime)):
|
||||
raise TimeError("%r is not a datetime object" % start)
|
||||
if not isinstance(end, (datetime, adatetime)):
|
||||
raise TimeError("%r is not a datetime object" % end)
|
||||
|
||||
self.start = copy.copy(start)
|
||||
self.end = copy.copy(end)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not other.__class__ is self.__class__:
|
||||
return False
|
||||
return self.start == other.start and self.end == other.end
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r, %r)" % (self.__class__.__name__, self.start, self.end)
|
||||
|
||||
def disambiguated(self, basedate, debug=0):
|
||||
"""Returns an unambiguous version of this object.
|
||||
|
||||
>>> start = adatetime(year=2009, month=2)
|
||||
>>> end = adatetime(year=2009, month=10)
|
||||
>>> ts = timespan(start, end)
|
||||
>>> ts
|
||||
timespan(adatetime(2009, 2, None, None, None, None, None), adatetime(2009, 10, None, None, None, None, None))
|
||||
>>> td.disambiguated(datetime.now())
|
||||
timespan(datetime(2009, 2, 28, 0, 0, 0, 0), datetime(2009, 10, 31, 23, 59 ,59, 999999)
|
||||
"""
|
||||
|
||||
#- If year is in start but not end, use basedate.year for end
|
||||
#-- If year is in start but not end, but startdate is > basedate,
|
||||
# use "next <monthname>" to get end month/year
|
||||
#- If year is in end but not start, copy year from end to start
|
||||
#- Support "next february", "last april", etc.
|
||||
|
||||
start, end = copy.copy(self.start), copy.copy(self.end)
|
||||
start_year_was_amb = start.year is None
|
||||
end_year_was_amb = end.year is None
|
||||
|
||||
if has_no_date(start) and has_no_date(end):
|
||||
# The start and end points are just times, so use the basedate
|
||||
# for the date information.
|
||||
by, bm, bd = basedate.year, basedate.month, basedate.day
|
||||
start = start.replace(year=by, month=bm, day=bd)
|
||||
end = end.replace(year=by, month=bm, day=bd)
|
||||
else:
|
||||
# If one side has a year and the other doesn't, the decision
|
||||
# of what year to assign to the ambiguous side is kind of
|
||||
# arbitrary. I've used a heuristic here based on how the range
|
||||
# "reads", but it may only be reasonable in English. And maybe
|
||||
# even just to me.
|
||||
|
||||
if start.year is None and end.year is None:
|
||||
# No year on either side, use the basedate
|
||||
start.year = end.year = basedate.year
|
||||
elif start.year is None:
|
||||
# No year in the start, use the year from the end
|
||||
start.year = end.year
|
||||
elif end.year is None:
|
||||
end.year = max(start.year, basedate.year)
|
||||
|
||||
if start.year == end.year:
|
||||
# Once again, if one side has a month and day but the other side
|
||||
# doesn't, the disambiguation is arbitrary. Does "3 am to 5 am
|
||||
# tomorrow" mean 3 AM today to 5 AM tomorrow, or 3am tomorrow to
|
||||
# 5 am tomorrow? What I picked is similar to the year: if the
|
||||
# end has a month+day and the start doesn't, copy the month+day
|
||||
# from the end to the start UNLESS that would make the end come
|
||||
# before the start on that day, in which case use the basedate
|
||||
# instead. If the start has a month+day and the end doesn't, use
|
||||
# the basedate.
|
||||
start_dm = not (start.month is None and start.day is None)
|
||||
end_dm = not (end.month is None and end.day is None)
|
||||
if end_dm and not start_dm:
|
||||
if start.floor().time() > end.ceil().time():
|
||||
start.month = basedate.month
|
||||
start.day = basedate.day
|
||||
else:
|
||||
start.month = end.month
|
||||
start.day = end.day
|
||||
elif start_dm and not end_dm:
|
||||
end.month = basedate.month
|
||||
end.day = basedate.day
|
||||
|
||||
if floor(start).date() > ceil(end).date():
|
||||
# If the disambiguated dates are out of order:
|
||||
# - If no start year was given, reduce the start year to put the
|
||||
# start before the end
|
||||
# - If no end year was given, increase the end year to put the end
|
||||
# after the start
|
||||
# - If a year was specified for both, just swap the start and end
|
||||
if start_year_was_amb:
|
||||
start.year = end.year - 1
|
||||
elif end_year_was_amb:
|
||||
end.year = start.year + 1
|
||||
else:
|
||||
start, end = end, start
|
||||
|
||||
start = floor(start)
|
||||
end = ceil(end)
|
||||
|
||||
if start.date() == end.date() and start.time() > end.time():
|
||||
# If the start and end are on the same day, but the start time
|
||||
# is after the end time, move the end time to the next day
|
||||
end += timedelta(days=1)
|
||||
|
||||
return timespan(start, end)
|
||||
|
||||
|
||||
# Functions for working with datetime/adatetime objects
|
||||
|
||||
def floor(at):
|
||||
if isinstance(at, datetime):
|
||||
return at
|
||||
return at.floor()
|
||||
|
||||
|
||||
def ceil(at):
|
||||
if isinstance(at, datetime):
|
||||
return at
|
||||
return at.ceil()
|
||||
|
||||
|
||||
def fill_in(at, basedate, units=adatetime.units):
|
||||
"""Returns a copy of ``at`` with any unspecified (None) units filled in
|
||||
with values from ``basedate``.
|
||||
"""
|
||||
|
||||
if isinstance(at, datetime):
|
||||
return at
|
||||
|
||||
args = {}
|
||||
for unit in units:
|
||||
v = getattr(at, unit)
|
||||
if v is None:
|
||||
v = getattr(basedate, unit)
|
||||
args[unit] = v
|
||||
return fix(adatetime(**args))
|
||||
|
||||
|
||||
def has_no_date(at):
|
||||
"""Returns True if the given object is an ``adatetime`` where ``year``,
|
||||
``month``, and ``day`` are all None.
|
||||
"""
|
||||
|
||||
if isinstance(at, datetime):
|
||||
return False
|
||||
return at.year is None and at.month is None and at.day is None
|
||||
|
||||
|
||||
def has_no_time(at):
|
||||
"""Returns True if the given object is an ``adatetime`` where ``hour``,
|
||||
``minute``, ``second`` and ``microsecond`` are all None.
|
||||
"""
|
||||
|
||||
if isinstance(at, datetime):
|
||||
return False
|
||||
return (at.hour is None and at.minute is None and at.second is None
|
||||
and at.microsecond is None)
|
||||
|
||||
|
||||
def is_ambiguous(at):
|
||||
"""Returns True if the given object is an ``adatetime`` with any of its
|
||||
attributes equal to None.
|
||||
"""
|
||||
|
||||
if isinstance(at, datetime):
|
||||
return False
|
||||
return any((getattr(at, attr) is None) for attr in adatetime.units)
|
||||
|
||||
|
||||
def is_void(at):
|
||||
"""Returns True if the given object is an ``adatetime`` with all of its
|
||||
attributes equal to None.
|
||||
"""
|
||||
|
||||
if isinstance(at, datetime):
|
||||
return False
|
||||
return all((getattr(at, attr) is None) for attr in adatetime.units)
|
||||
|
||||
|
||||
def fix(at):
|
||||
"""If the given object is an ``adatetime`` that is unambiguous (because
|
||||
all its attributes are specified, that is, not equal to None), returns a
|
||||
``datetime`` version of it. Otherwise returns the ``adatetime`` object
|
||||
unchanged.
|
||||
"""
|
||||
|
||||
if is_ambiguous(at) or isinstance(at, datetime):
|
||||
return at
|
||||
return datetime(year=at.year, month=at.month, day=at.day, hour=at.hour,
|
||||
minute=at.minute, second=at.second,
|
||||
microsecond=at.microsecond)
|
||||
110
venv/Lib/site-packages/whoosh/util/varints.py
Normal file
110
venv/Lib/site-packages/whoosh/util/varints.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright 2007 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from array import array
|
||||
|
||||
from whoosh.compat import array_tobytes, xrange
|
||||
|
||||
|
||||
# Varint cache
|
||||
|
||||
# Build a cache of the varint byte sequences for the first N integers, so we
|
||||
# don't have to constantly recalculate them on the fly. This makes a small but
|
||||
# noticeable difference.
|
||||
|
||||
def _varint(i):
|
||||
a = array("B")
|
||||
while (i & ~0x7F) != 0:
|
||||
a.append((i & 0x7F) | 0x80)
|
||||
i = i >> 7
|
||||
a.append(i)
|
||||
return array_tobytes(a)
|
||||
|
||||
|
||||
_varint_cache_size = 512
|
||||
_varint_cache = []
|
||||
for i in xrange(0, _varint_cache_size):
|
||||
_varint_cache.append(_varint(i))
|
||||
_varint_cache = tuple(_varint_cache)
|
||||
|
||||
|
||||
def varint(i):
|
||||
"""Encodes the given integer into a string of the minimum number of bytes.
|
||||
"""
|
||||
if i < len(_varint_cache):
|
||||
return _varint_cache[i]
|
||||
return _varint(i)
|
||||
|
||||
|
||||
def varint_to_int(vi):
|
||||
b = ord(vi[0])
|
||||
p = 1
|
||||
i = b & 0x7f
|
||||
shift = 7
|
||||
while b & 0x80 != 0:
|
||||
b = ord(vi[p])
|
||||
p += 1
|
||||
i |= (b & 0x7F) << shift
|
||||
shift += 7
|
||||
return i
|
||||
|
||||
|
||||
def signed_varint(i):
|
||||
"""Zig-zag encodes a signed integer into a varint.
|
||||
"""
|
||||
|
||||
if i >= 0:
|
||||
return varint(i << 1)
|
||||
return varint((i << 1) ^ (~0))
|
||||
|
||||
|
||||
def decode_signed_varint(i):
|
||||
"""Zig-zag decodes an integer value.
|
||||
"""
|
||||
|
||||
if not i & 1:
|
||||
return i >> 1
|
||||
return (i >> 1) ^ (~0)
|
||||
|
||||
|
||||
def read_varint(readfn):
|
||||
"""
|
||||
Reads a variable-length encoded integer.
|
||||
|
||||
:param readfn: a callable that reads a given number of bytes,
|
||||
like file.read().
|
||||
"""
|
||||
|
||||
b = ord(readfn(1))
|
||||
i = b & 0x7F
|
||||
|
||||
shift = 7
|
||||
while b & 0x80 != 0:
|
||||
b = ord(readfn(1))
|
||||
i |= (b & 0x7F) << shift
|
||||
shift += 7
|
||||
return i
|
||||
165
venv/Lib/site-packages/whoosh/util/versions.py
Normal file
165
venv/Lib/site-packages/whoosh/util/versions.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# Copyright 2012 Matt Chaput. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# The views and conclusions contained in the software and documentation are
|
||||
# those of the authors and should not be interpreted as representing official
|
||||
# policies, either expressed or implied, of Matt Chaput.
|
||||
|
||||
from whoosh.util.text import rcompile
|
||||
|
||||
|
||||
class BaseVersion(object):
|
||||
@classmethod
|
||||
def parse(cls, text):
|
||||
obj = cls()
|
||||
match = cls._version_exp.match(text)
|
||||
if match:
|
||||
groupdict = match.groupdict()
|
||||
for groupname, typ in cls._parts:
|
||||
v = groupdict.get(groupname)
|
||||
if v is not None:
|
||||
setattr(obj, groupname, typ(v))
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
vs = ", ".join(repr(getattr(self, slot)) for slot in self.__slots__)
|
||||
return "%s(%s)" % (self.__class__.__name__, vs)
|
||||
|
||||
def tuple(self):
|
||||
return tuple(getattr(self, slot) for slot in self.__slots__)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not hasattr(other, "tuple"):
|
||||
raise ValueError("Can't compare %r with %r" % (self, other))
|
||||
return self.tuple() == other.tuple()
|
||||
|
||||
def __lt__(self, other):
|
||||
if not hasattr(other, "tuple"):
|
||||
raise ValueError("Can't compare %r with %r" % (self, other))
|
||||
return self.tuple() < other.tuple()
|
||||
|
||||
# It's dumb that you have to define these
|
||||
|
||||
def __gt__(self, other):
|
||||
if not hasattr(other, "tuple"):
|
||||
raise ValueError("Can't compare %r with %r" % (self, other))
|
||||
return self.tuple() > other.tuple()
|
||||
|
||||
def __ge__(self, other):
|
||||
if not hasattr(other, "tuple"):
|
||||
raise ValueError("Can't compare %r with %r" % (self, other))
|
||||
return self.tuple() >= other.tuple()
|
||||
|
||||
def __le__(self, other):
|
||||
if not hasattr(other, "tuple"):
|
||||
raise ValueError("Can't compare %r with %r" % (self, other))
|
||||
return self.tuple() <= other.tuple()
|
||||
|
||||
def __ne__(self, other):
|
||||
if not hasattr(other, "tuple"):
|
||||
raise ValueError("Can't compare %r with %r" % (self, other))
|
||||
return self.tuple() != other.tuple()
|
||||
|
||||
|
||||
class SimpleVersion(BaseVersion):
|
||||
"""An object that parses version numbers such as::
|
||||
|
||||
12.2.5b
|
||||
|
||||
The filter supports a limited subset of PEP 386 versions including::
|
||||
|
||||
1
|
||||
1.2
|
||||
1.2c
|
||||
1.2c3
|
||||
1.2.3
|
||||
1.2.3a
|
||||
1.2.3b4
|
||||
10.7.5rc1
|
||||
999.999.999c999
|
||||
"""
|
||||
|
||||
_version_exp = rcompile(r"""
|
||||
^
|
||||
(?P<major>\d{1,4})
|
||||
(
|
||||
[.](?P<minor>\d{1,4})
|
||||
(
|
||||
[.](?P<release>\d{1,4})
|
||||
)?
|
||||
(
|
||||
(?P<ex>[abc]|rc)
|
||||
(?P<exnum>\d{1,4})?
|
||||
)?
|
||||
)?
|
||||
$
|
||||
""", verbose=True)
|
||||
|
||||
# (groupid, method, skippable, default)
|
||||
_parts = [("major", int),
|
||||
("minor", int),
|
||||
("release", int),
|
||||
("ex", str),
|
||||
("exnum", int),
|
||||
]
|
||||
|
||||
_ex_bits = {"a": 0, "b": 1, "c": 2, "rc": 10, "z": 15}
|
||||
_bits_ex = dict((v, k) for k, v in _ex_bits.items())
|
||||
|
||||
__slots__ = ("major", "minor", "release", "ex", "exnum")
|
||||
|
||||
def __init__(self, major=1, minor=0, release=0, ex="z", exnum=0):
|
||||
self.major = major
|
||||
self.minor = minor
|
||||
self.release = release
|
||||
self.ex = ex
|
||||
self.exnum = exnum
|
||||
|
||||
def to_int(self):
|
||||
assert self.major < 1024
|
||||
n = self.major << 34
|
||||
|
||||
assert self.minor < 1024
|
||||
n |= self.minor << 24
|
||||
|
||||
assert self.release < 1024
|
||||
n |= self.release << 14
|
||||
|
||||
exbits = self._ex_bits.get(self.ex, 15)
|
||||
n |= exbits << 10
|
||||
|
||||
assert self.exnum < 1024
|
||||
n |= self.exnum
|
||||
|
||||
return n
|
||||
|
||||
@classmethod
|
||||
def from_int(cls, n):
|
||||
major = (n & (1023 << 34)) >> 34
|
||||
minor = (n & (1023 << 24)) >> 24
|
||||
release = (n & (1023 << 14)) >> 14
|
||||
exbits = (n & (7 << 10)) >> 10
|
||||
ex = cls._bits_ex.get(exbits, "z")
|
||||
exnum = n & 1023
|
||||
|
||||
return cls(major, minor, release, ex, exnum)
|
||||
Reference in New Issue
Block a user