This commit is contained in:
“shengyudong”
2026-01-06 14:18:39 +08:00
commit 5a384b694e
10345 changed files with 2050918 additions and 0 deletions

View File

@@ -0,0 +1,142 @@
# Copyright 2007 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
from __future__ import with_statement
import random, sys, time
from bisect import insort, bisect_left
from functools import wraps
from whoosh.compat import xrange
# These must be valid separate characters in CASE-INSENSTIVE filenames
IDCHARS = "0123456789abcdefghijklmnopqrstuvwxyz"
if hasattr(time, "perf_counter"):
now = time.perf_counter
elif sys.platform == 'win32':
now = time.clock
else:
now = time.time
def random_name(size=28):
return "".join(random.choice(IDCHARS) for _ in xrange(size))
def random_bytes(size=28):
gen = (random.randint(0, 255) for _ in xrange(size))
if sys.version_info[0] >= 3:
return bytes(gen)
else:
return array("B", gen).tostring()
def make_binary_tree(fn, args, **kwargs):
"""Takes a function/class that takes two positional arguments and a list of
arguments and returns a binary tree of results/instances.
>>> make_binary_tree(UnionMatcher, [matcher1, matcher2, matcher3])
UnionMatcher(matcher1, UnionMatcher(matcher2, matcher3))
Any keyword arguments given to this function are passed to the class
initializer.
"""
count = len(args)
if not count:
raise ValueError("Called make_binary_tree with empty list")
elif count == 1:
return args[0]
half = count // 2
return fn(make_binary_tree(fn, args[:half], **kwargs),
make_binary_tree(fn, args[half:], **kwargs), **kwargs)
def make_weighted_tree(fn, ls, **kwargs):
"""Takes a function/class that takes two positional arguments and a list of
(weight, argument) tuples and returns a huffman-like weighted tree of
results/instances.
"""
if not ls:
raise ValueError("Called make_weighted_tree with empty list")
ls.sort()
while len(ls) > 1:
a = ls.pop(0)
b = ls.pop(0)
insort(ls, (a[0] + b[0], fn(a[1], b[1])))
return ls[0][1]
# Fibonacci function
_fib_cache = {}
def fib(n):
"""Returns the nth value in the Fibonacci sequence.
"""
if n <= 2:
return n
if n in _fib_cache:
return _fib_cache[n]
result = fib(n - 1) + fib(n - 2)
_fib_cache[n] = result
return result
# Decorators
def synchronized(func):
"""Decorator for storage-access methods, which synchronizes on a threading
lock. The parent object must have 'is_closed' and '_sync_lock' attributes.
"""
@wraps(func)
def synchronized_wrapper(self, *args, **kwargs):
with self._sync_lock:
return func(self, *args, **kwargs)
return synchronized_wrapper
def unclosed(method):
"""
Decorator to check if the object is closed.
"""
@wraps(method)
def unclosed_wrapper(self, *args, **kwargs):
if self.closed:
raise ValueError("Operation on a closed object")
return method(self, *args, **kwargs)
return unclosed_wrapper

View File

@@ -0,0 +1,375 @@
# Copyright 2007 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
from __future__ import with_statement
import functools, random
from array import array
from heapq import nsmallest
from operator import itemgetter
from threading import Lock
from time import time
from whoosh.compat import iteritems, xrange
try:
from collections import Counter
except ImportError:
class Counter(dict):
def __missing__(self, key):
return 0
def unbound_cache(func):
"""Caching decorator with an unbounded cache size.
"""
cache = {}
@functools.wraps(func)
def caching_wrapper(*args):
try:
return cache[args]
except KeyError:
result = func(*args)
cache[args] = result
return result
return caching_wrapper
def lru_cache(maxsize=100):
"""A simple cache that, when the cache is full, deletes the least recently
used 10% of the cached values.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0] # Hits, misses
data = {}
lastused = {}
@functools.wraps(user_function)
def wrapper(*args):
try:
result = data[args]
stats[0] += 1 # Hit
except KeyError:
stats[1] += 1 # Miss
if len(data) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iteritems(lastused),
key=itemgetter(1)):
del data[k]
del lastused[k]
data[args] = user_function(*args)
result = data[args]
finally:
lastused[args] = time()
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
data.clear()
lastused.clear()
stats[0] = stats[1] = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def lfu_cache(maxsize=100):
"""A simple cache that, when the cache is full, deletes the least frequently
used 10% of the cached values.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0] # Hits, misses
data = {}
usecount = Counter()
@functools.wraps(user_function)
def wrapper(*args):
try:
result = data[args]
stats[0] += 1 # Hit
except KeyError:
stats[1] += 1 # Miss
if len(data) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iteritems(usecount),
key=itemgetter(1)):
del data[k]
del usecount[k]
data[args] = user_function(*args)
result = data[args]
finally:
usecount[args] += 1
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
data.clear()
usecount.clear()
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def random_cache(maxsize=100):
"""A very simple cache that, when the cache is filled, deletes 10% of the
cached values AT RANDOM.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0] # hits, misses
data = {}
@functools.wraps(user_function)
def wrapper(*args):
try:
result = data[args]
stats[0] += 1 # Hit
except KeyError:
stats[1] += 1 # Miss
if len(data) == maxsize:
keys = data.keys()
for i in xrange(maxsize // 10 or 1):
n = random.randint(0, len(keys) - 1)
k = keys.pop(n)
del data[k]
data[args] = user_function(*args)
result = data[args]
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
data.clear()
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def db_lru_cache(maxsize=100):
"""Double-barrel least-recently-used cache decorator. This is a simple
LRU algorithm that keeps a primary and secondary dict. Keys are checked
in the primary dict, and then the secondary. Once the primary dict fills
up, the secondary dict is cleared and the two dicts are swapped.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library.
Arguments to the cached function must be hashable.
View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
# Cache1, Cache2, Pointer, Hits, Misses
stats = [{}, {}, 0, 0, 0]
@functools.wraps(user_function)
def wrapper(*args):
ptr = stats[2]
a = stats[ptr]
b = stats[not ptr]
key = args
if key in a:
stats[3] += 1 # Hit
return a[key]
elif key in b:
stats[3] += 1 # Hit
return b[key]
else:
stats[4] += 1 # Miss
result = user_function(*args)
a[key] = result
if len(a) >= maxsize:
stats[2] = not ptr
b.clear()
return result
def cache_info():
return stats[3], stats[4], maxsize, len(stats[0]) + len(stats[1])
def cache_clear():
"""Clear the cache and cache statistics"""
stats[0].clear()
stats[1].clear()
stats[3] = stats[4] = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
def clockface_lru_cache(maxsize=100):
"""Least-recently-used cache decorator.
This function duplicates (more-or-less) the protocol of the
``functools.lru_cache`` decorator in the Python 3.2 standard library, but
uses the clock face LRU algorithm instead of an ordered dictionary.
If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.
Arguments to the cached function must be hashable.
View the cache statistics named tuple (hits, misses, maxsize, currsize)
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
"""
def decorating_function(user_function):
stats = [0, 0, 0] # hits, misses, hand
data = {}
if maxsize:
# The keys at each point on the clock face
clock_keys = [None] * maxsize
# The "referenced" bits at each point on the clock face
clock_refs = array("B", (0 for _ in xrange(maxsize)))
lock = Lock()
@functools.wraps(user_function)
def wrapper(*args):
key = args
try:
with lock:
pos, result = data[key]
# The key is in the cache. Set the key's reference bit
clock_refs[pos] = 1
# Record a cache hit
stats[0] += 1
except KeyError:
# Compute the value
result = user_function(*args)
with lock:
# Current position of the clock hand
hand = stats[2]
# Remember to stop here after a full revolution
end = hand
# Sweep around the clock looking for a position with
# the reference bit off
while True:
hand = (hand + 1) % maxsize
current_ref = clock_refs[hand]
if current_ref:
# This position's "referenced" bit is set. Turn
# the bit off and move on.
clock_refs[hand] = 0
elif not current_ref or hand == end:
# We've either found a position with the
# "reference" bit off or reached the end of the
# circular cache. So we'll replace this
# position with the new key
current_key = clock_keys[hand]
if current_key in data:
del data[current_key]
clock_keys[hand] = key
clock_refs[hand] = 1
break
# Put the key and result in the cache
data[key] = (hand, result)
# Save the new hand position
stats[2] = hand
# Record a cache miss
stats[1] += 1
return result
else:
@functools.wraps(user_function)
def wrapper(*args):
key = args
try:
result = data[key]
stats[0] += 1
except KeyError:
result = user_function(*args)
data[key] = result
stats[1] += 1
return result
def cache_info():
return stats[0], stats[1], maxsize, len(data)
def cache_clear():
"""Clear the cache and cache statistics"""
data.clear()
stats[0] = stats[1] = stats[2] = 0
for i in xrange(maxsize):
clock_keys[i] = None
clock_refs[i] = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function

View File

@@ -0,0 +1,163 @@
# Copyright 2010 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
"""
This module contains classes implementing exclusive locks for platforms with
fcntl (UNIX and Mac OS X) and Windows. Whoosh originally used directory
creation as a locking method, but it had the problem that if the program
crashed the lock directory was left behind and would keep the index locked
until it was cleaned up. Using OS-level file locks fixes this.
"""
import errno
import os
import sys
import time
def try_for(fn, timeout=5.0, delay=0.1):
"""Calls ``fn`` every ``delay`` seconds until it returns True or
``timeout`` seconds elapse. Returns True if the lock was acquired, or False
if the timeout was reached.
:param timeout: Length of time (in seconds) to keep retrying to acquire the
lock. 0 means return immediately. Only used when blocking is False.
:param delay: How often (in seconds) to retry acquiring the lock during
the timeout period. Only used when blocking is False and timeout > 0.
"""
until = time.time() + timeout
v = fn()
while not v and time.time() < until:
time.sleep(delay)
v = fn()
return v
class LockBase(object):
"""Base class for file locks.
"""
def __init__(self, filename):
self.fd = None
self.filename = filename
self.locked = False
def __del__(self):
if hasattr(self, "fd") and self.fd:
try:
self.release()
except:
pass
def acquire(self, blocking=False):
"""Acquire the lock. Returns True if the lock was acquired.
:param blocking: if True, call blocks until the lock is acquired.
This may not be available on all platforms. On Windows, this is
actually just a delay of 10 seconds, rechecking every second.
"""
pass
def release(self):
pass
class FcntlLock(LockBase):
"""File lock based on UNIX-only fcntl module.
"""
def acquire(self, blocking=False):
import fcntl # @UnresolvedImport
flags = os.O_CREAT | os.O_WRONLY
self.fd = os.open(self.filename, flags)
mode = fcntl.LOCK_EX
if not blocking:
mode |= fcntl.LOCK_NB
try:
fcntl.flock(self.fd, mode)
self.locked = True
return True
except IOError:
e = sys.exc_info()[1]
if e.errno not in (errno.EAGAIN, errno.EACCES):
raise
os.close(self.fd)
self.fd = None
return False
def release(self):
if self.fd is None:
raise Exception("Lock was not acquired")
import fcntl # @UnresolvedImport
fcntl.flock(self.fd, fcntl.LOCK_UN)
os.close(self.fd)
self.fd = None
class MsvcrtLock(LockBase):
"""File lock based on Windows-only msvcrt module.
"""
def acquire(self, blocking=False):
import msvcrt # @UnresolvedImport
flags = os.O_CREAT | os.O_WRONLY
mode = msvcrt.LK_NBLCK
if blocking:
mode = msvcrt.LK_LOCK
self.fd = os.open(self.filename, flags)
try:
msvcrt.locking(self.fd, mode, 1)
return True
except IOError:
e = sys.exc_info()[1]
if e.errno not in (errno.EAGAIN, errno.EACCES, errno.EDEADLK):
raise
os.close(self.fd)
self.fd = None
return False
def release(self):
import msvcrt # @UnresolvedImport
if self.fd is None:
raise Exception("Lock was not acquired")
msvcrt.locking(self.fd, msvcrt.LK_UNLCK, 1)
os.close(self.fd)
self.fd = None
if os.name == "nt":
FileLock = MsvcrtLock
else:
FileLock = FcntlLock

View File

@@ -0,0 +1,84 @@
# Copyright 2012 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
import pickle
class RenamingUnpickler(pickle.Unpickler):
"""Subclasses ``pickle.Unpickler`` to allow remapping of class names before
loading them.
"""
def __init__(self, f, objmap, shortcuts=None):
pickle.Unpickler.__init__(self, f)
if shortcuts:
objmap = dict((k % shortcuts, v % shortcuts)
for k, v in objmap.items())
self._objmap = objmap
def find_class(self, modulename, objname):
fqname = "%s.%s" % (modulename, objname)
if fqname in self._objmap:
fqname = self._objmap[fqname]
try:
obj = find_object(fqname)
except ImportError:
raise ImportError("Couldn't find %r" % fqname)
return obj
def find_object(name, blacklist=None, whitelist=None):
"""Imports and returns an object given a fully qualified name.
>>> find_object("whoosh.analysis.StopFilter")
<class 'whoosh.analysis.StopFilter'>
"""
if blacklist:
for pre in blacklist:
if name.startswith(pre):
raise TypeError("%r: can't instantiate names starting with %r"
% (name, pre))
if whitelist:
passes = False
for pre in whitelist:
if name.startswith(pre):
passes = True
break
if not passes:
raise TypeError("Can't instantiate %r" % name)
lastdot = name.rfind(".")
assert lastdot > -1, "Name %r must be fully qualified" % name
modname = name[:lastdot]
clsname = name[lastdot + 1:]
mod = __import__(modname, fromlist=[clsname])
cls = getattr(mod, clsname)
return cls

View File

@@ -0,0 +1,317 @@
# Copyright 2010 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
import math, struct
from array import array
from bisect import bisect_left
from struct import pack, unpack
from whoosh.compat import b, long_type
from whoosh.system import pack_byte, unpack_byte, pack_ushort, unpack_ushort
from whoosh.system import pack_int, unpack_int, pack_uint, unpack_uint
from whoosh.system import pack_long, unpack_long, pack_ulong, unpack_ulong
from whoosh.system import pack_float, unpack_float, pack_double, unpack_double
NaN = struct.unpack("<d", b('\xff\xff\xff\xff\xff\xff\xff\xff'))[0]
typecode_max = {"b": 127, "B": 255, "h": 2 ** 15 - 1, "H": 2 ** 16 - 1,
"i": 2 ** 31 - 1, "I": 2 ** 32 - 1,
"q": 2 ** 63 - 1, "Q": 2 ** 64 - 1}
typecode_min = {"b": 0 - 128, "B": 0, "h": 0 - 2 ** 15, "H": 0,
"i": 0 - 2 ** 31, "I": 0,
"q": 0 - 2 ** 63, "Q": 0}
typecode_pack = {"B": pack_byte, "H": pack_ushort, "i": pack_int,
"I": pack_uint, "q": pack_long, "Q": pack_ulong,
"f": pack_float, "d": pack_double}
typecode_unpack = {"B": unpack_byte, "H": unpack_ushort, "i": unpack_int,
"I": unpack_uint, "q": unpack_long, "Q": unpack_ulong,
"f": unpack_float, "d": unpack_double}
# Functions related to binary representations
def bits_required(maxnum):
"""Returns the number of bits required to represent the given (unsigned)
integer.
"""
return max(1, math.ceil(math.log(maxnum, 2)))
def typecode_required(maxnum):
if maxnum < 256:
return "B"
elif maxnum < 2 ** 16:
return "H"
elif maxnum < 2 ** 31 - 1:
return "i"
elif maxnum < 2 ** 32:
return "I"
elif maxnum < 2 ** 63 - 1:
return "q"
else:
return "Q"
def max_value(bitcount):
"""Returns the maximum (unsigned) integer representable in the given number
of bits.
"""
return ~(~0 << bitcount)
def bytes_for_bits(bitcount):
r = int(math.ceil((bitcount + 1) / 8.0))
return r
# Functions for converting numbers to and from sortable representations
_istruct = struct.Struct(">i")
_qstruct = struct.Struct(">q")
_dstruct = struct.Struct(">d")
_ipack, _iunpack = _istruct.pack, _istruct.unpack
_qpack, _qunpack = _qstruct.pack, _qstruct.unpack
_dpack, _dunpack = _dstruct.pack, _dstruct.unpack
def to_sortable(numtype, intsize, signed, x):
if numtype is int or numtype is long_type:
if signed:
x += (1 << intsize - 1)
return x
else:
return float_to_sortable_long(x, signed)
def from_sortable(numtype, intsize, signed, x):
if numtype is int or numtype is long_type:
if signed:
x -= (1 << intsize - 1)
return x
else:
return sortable_long_to_float(x, signed)
def float_to_sortable_long(x, signed):
x = _qunpack(_dpack(x))[0]
if x < 0:
x ^= 0x7fffffffffffffff
if signed:
x += 1 << 63
assert x >= 0
return x
def sortable_long_to_float(x, signed):
if signed:
x -= 1 << 63
if x < 0:
x ^= 0x7fffffffffffffff
x = _dunpack(_qpack(x))[0]
return x
# Functions for generating tiered ranges
def split_ranges(intsize, step, start, end):
"""Splits a range of numbers (from ``start`` to ``end``, inclusive)
into a sequence of trie ranges of the form ``(start, end, shift)``. The
consumer of these tuples is expected to shift the ``start`` and ``end``
right by ``shift``.
This is used for generating term ranges for a numeric field. The queries
for the edges of the range are generated at high precision and large blocks
in the middle are generated at low precision.
"""
shift = 0
while True:
diff = 1 << (shift + step)
mask = ((1 << step) - 1) << shift
setbits = lambda x: x | ((1 << shift) - 1)
haslower = (start & mask) != 0
hasupper = (end & mask) != mask
not_mask = ~mask & ((1 << intsize + 1) - 1)
nextstart = (start + diff if haslower else start) & not_mask
nextend = (end - diff if hasupper else end) & not_mask
if shift + step >= intsize or nextstart > nextend:
yield (start, setbits(end), shift)
break
if haslower:
yield (start, setbits(start | mask), shift)
if hasupper:
yield (end & not_mask, setbits(end), shift)
start = nextstart
end = nextend
shift += step
def tiered_ranges(numtype, intsize, signed, start, end, shift_step,
startexcl, endexcl):
assert numtype in (int, float)
assert intsize in (8, 16, 32, 64)
# Convert start and end values to sortable ints
if start is None:
start = 0
else:
start = to_sortable(numtype, intsize, signed, start)
if startexcl:
start += 1
if end is None:
end = 2 ** intsize - 1
else:
end = to_sortable(numtype, intsize, signed, end)
if endexcl:
end -= 1
if not shift_step:
return ((start, end, 0),)
# Yield (rstart, rend, shift) ranges for the different resolutions
return split_ranges(intsize, shift_step, start, end)
# Float-to-byte encoding/decoding
def float_to_byte(value, mantissabits=5, zeroexp=2):
"""Encodes a floating point number in a single byte.
"""
# Assume int size == float size
fzero = (63 - zeroexp) << mantissabits
bits = unpack("i", pack("f", value))[0]
smallfloat = bits >> (24 - mantissabits)
if smallfloat < fzero:
# Map negative numbers and 0 to 0
# Map underflow to next smallest non-zero number
if bits <= 0:
result = chr(0)
else:
result = chr(1)
elif smallfloat >= fzero + 0x100:
# Map overflow to largest number
result = chr(255)
else:
result = chr(smallfloat - fzero)
return b(result)
def byte_to_float(b, mantissabits=5, zeroexp=2):
"""Decodes a floating point number stored in a single byte.
"""
if type(b) is not int:
b = ord(b)
if b == 0:
return 0.0
bits = (b & 0xff) << (24 - mantissabits)
bits += (63 - zeroexp) << 24
return unpack("f", pack("i", bits))[0]
# Length-to-byte approximation functions
# Old implementation:
#def length_to_byte(length):
# """Returns a logarithmic approximation of the given number, in the range
# 0-255. The approximation has high precision at the low end (e.g.
# 1 -> 0, 2 -> 1, 3 -> 2 ...) and low precision at the high end. Numbers
# equal to or greater than 108116 all approximate to 255.
#
# This is useful for storing field lengths, where the general case is small
# documents and very large documents are more rare.
# """
#
# # This encoding formula works up to 108116 -> 255, so if the length is
# # equal to or greater than that limit, just return 255.
# if length >= 108116:
# return 255
#
# # The parameters of this formula where chosen heuristically so that low
# # numbers would approximate closely, and the byte range 0-255 would cover
# # a decent range of document lengths (i.e. 1 to ~100000).
# return int(round(log((length / 27.0) + 1, 1.033)))
#def _byte_to_length(n):
# return int(round((pow(1.033, n) - 1) * 27))
#_b2l_cache = array("i", (_byte_to_length(i) for i in xrange(256)))
#byte_to_length = _b2l_cache.__getitem__
# New implementation
# Instead of computing the actual formula to get the byte for any given length,
# precompute the length associated with each byte, and use bisect to find the
# nearest value. This gives quite a large speed-up.
#
# Note that this does not give all the same answers as the old, "real"
# implementation since this implementation always "rounds down" (thanks to the
# bisect_left) while the old implementation would "round up" or "round down"
# depending on the input. Since this is a fairly gross approximation anyway,
# I don't think it matters much.
# Values generated using the formula from the "old" implementation above
_length_byte_cache = array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14,
16, 17, 18, 20, 21, 23, 25, 26, 28, 30, 32, 34, 36, 38, 40, 42, 45, 47, 49, 52,
54, 57, 60, 63, 66, 69, 72, 75, 79, 82, 86, 89, 93, 97, 101, 106, 110, 114,
119, 124, 129, 134, 139, 145, 150, 156, 162, 169, 175, 182, 189, 196, 203, 211,
219, 227, 235, 244, 253, 262, 271, 281, 291, 302, 313, 324, 336, 348, 360, 373,
386, 399, 414, 428, 443, 459, 475, 491, 508, 526, 544, 563, 583, 603, 623, 645,
667, 690, 714, 738, 763, 789, 816, 844, 873, 903, 933, 965, 998, 1032, 1066,
1103, 1140, 1178, 1218, 1259, 1302, 1345, 1391, 1438, 1486, 1536, 1587, 1641,
1696, 1753, 1811, 1872, 1935, 1999, 2066, 2135, 2207, 2280, 2356, 2435, 2516,
2600, 2687, 2777, 2869, 2965, 3063, 3165, 3271, 3380, 3492, 3608, 3728, 3852,
3980, 4112, 4249, 4390, 4536, 4686, 4842, 5002, 5168, 5340, 5517, 5700, 5889,
6084, 6286, 6494, 6709, 6932, 7161, 7398, 7643, 7897, 8158, 8428, 8707, 8995,
9293, 9601, 9918, 10247, 10586, 10936, 11298, 11671, 12057, 12456, 12868,
13294, 13733, 14187, 14656, 15141, 15641, 16159, 16693, 17244, 17814, 18403,
19011, 19640, 20289, 20959, 21652, 22367, 23106, 23869, 24658, 25472, 26314,
27183, 28081, 29009, 29967, 30957, 31979, 33035, 34126, 35254, 36418, 37620,
38863, 40146, 41472, 42841, 44256, 45717, 47227, 48786, 50397, 52061, 53780,
55556, 57390, 59285, 61242, 63264, 65352, 67510, 69739, 72041, 74419, 76876,
79414, 82035, 84743, 87541, 90430, 93416, 96499, 99684, 102975, 106374])
def length_to_byte(length):
if length is None:
return 0
if length >= 106374:
return 255
else:
return bisect_left(_length_byte_cache, length)
byte_to_length = _length_byte_cache.__getitem__

View File

@@ -0,0 +1,373 @@
from array import array
from whoosh.compat import xrange
from whoosh.system import emptybytes
from whoosh.system import pack_byte, unpack_byte
from whoosh.system import pack_ushort_le, unpack_ushort_le
from whoosh.system import pack_uint_le, unpack_uint_le
def delta_encode(nums):
base = 0
for n in nums:
yield n - base
base = n
def delta_decode(nums):
base = 0
for n in nums:
base += n
yield base
class GrowableArray(object):
def __init__(self, inittype="B", allow_longs=True):
self.array = array(inittype)
self._allow_longs = allow_longs
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self.array)
def __len__(self):
return len(self.array)
def __iter__(self):
return iter(self.array)
def _retype(self, maxnum):
if maxnum < 2 ** 16:
newtype = "H"
elif maxnum < 2 ** 31:
newtype = "i"
elif maxnum < 2 ** 32:
newtype = "I"
elif self._allow_longs:
newtype = "q"
else:
raise OverflowError("%r is too big to fit in an array" % maxnum)
try:
self.array = array(newtype, iter(self.array))
except ValueError:
self.array = list(self.array)
def append(self, n):
try:
self.array.append(n)
except OverflowError:
self._retype(n)
self.array.append(n)
def extend(self, ns):
append = self.append
for n in ns:
append(n)
@property
def typecode(self):
if isinstance(self.array, array):
return self.array.typecode
else:
return "q"
def to_file(self, dbfile):
if isinstance(self.array, array):
dbfile.write_array(self.array)
else:
write_long = dbfile.write_long
for n in self.array:
write_long(n)
# Number list encoding base class
class NumberEncoding(object):
maxint = None
def write_nums(self, f, numbers):
raise NotImplementedError
def read_nums(self, f, n):
raise NotImplementedError
def write_deltas(self, f, numbers):
return self.write_nums(f, list(delta_encode(numbers)))
def read_deltas(self, f, n):
return delta_decode(self.read_nums(f, n))
def get(self, f, pos, i):
f.seek(pos)
n = None
for n in self.read_nums(f, i + 1):
pass
return n
# Fixed width encodings
class FixedEncoding(NumberEncoding):
_encode = None
_decode = None
size = None
def write_nums(self, f, numbers):
_encode = self._encode
for n in numbers:
f.write(_encode(n))
def read_nums(self, f, n):
_decode = self._decode
for _ in xrange(n):
yield _decode(f.read(self.size))
def get(self, f, pos, i):
f.seek(pos + i * self.size)
return self._decode(f.read(self.size))
class ByteEncoding(FixedEncoding):
size = 1
maxint = 255
_encode = pack_byte
_decode = unpack_byte
class UShortEncoding(FixedEncoding):
size = 2
maxint = 2 ** 16 - 1
_encode = pack_ushort_le
_decode = unpack_ushort_le
class UIntEncoding(FixedEncoding):
size = 4
maxint = 2 ** 32 - 1
_encode = pack_uint_le
_decode = unpack_uint_le
# High-bit encoded variable-length integer
class Varints(NumberEncoding):
maxint = None
def write_nums(self, f, numbers):
for n in numbers:
f.write_varint(n)
def read_nums(self, f, n):
for _ in xrange(n):
yield f.read_varint()
# Simple16 algorithm for storing arrays of positive integers (usually delta
# encoded lists of sorted integers)
#
# 1. http://www2008.org/papers/pdf/p387-zhangA.pdf
# 2. http://www2009.org/proceedings/pdf/p401.pdf
class Simple16(NumberEncoding):
# The maximum possible integer value Simple16 can encode is < 2^28.
# Therefore, in order to use Simple16, the application must have its own
# code to encode numbers in the range of [2^28, 2^32). A simple way is just
# write those numbers as 32-bit integers (that is, no compression for very
# big numbers).
_numsize = 16
_bitsize = 28
maxint = 2 ** _bitsize - 1
# Number of stored numbers per code
_num = [28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1]
# Number of bits for each number per code
_bits = [
(1,) * 28,
(2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1),
(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2),
(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
(4, 3, 3, 3, 3, 3, 3, 3, 3),
(3, 4, 4, 4, 4, 3, 3, 3),
(4, 4, 4, 4, 4, 4, 4),
(5, 5, 5, 5, 4, 4),
(4, 4, 5, 5, 5, 5),
(6, 6, 6, 5, 5),
(5, 5, 6, 6, 6),
(7, 7, 7, 7),
(10, 9, 9),
(14, 14),
(28,),
]
def write_nums(self, f, numbers):
_compress = self._compress
i = 0
while i < len(numbers):
value, taken = _compress(numbers, i, len(numbers) - i)
f.write_uint_le(value)
i += taken
def _compress(self, inarray, inoffset, n):
_numsize = self._numsize
_bitsize = self._bitsize
_num = self._num
_bits = self._bits
for key in xrange(_numsize):
value = key << _bitsize
num = _num[key] if _num[key] < n else n
bits = 0
j = 0
while j < num and inarray[inoffset + j] < (1 << _bits[key][j]):
x = inarray[inoffset + j]
value |= x << bits
bits += _bits[key][j]
j += 1
if j == num:
return value, num
raise Exception
def read_nums(self, f, n):
_decompress = self._decompress
i = 0
while i < n:
value = unpack_uint_le(f.read(4))[0]
for v in _decompress(value, n - i):
yield v
i += 1
def _decompress(self, value, n):
_numsize = self._numsize
_bitsize = self._bitsize
_num = self._num
_bits = self._bits
key = value >> _bitsize
num = _num[key] if _num[key] < n else n
bits = 0
for j in xrange(num):
v = value >> bits
yield v & (0xffffffff >> (32 - _bits[key][j]))
bits += _bits[key][j]
def get(self, f, pos, i):
f.seek(pos)
base = 0
value = unpack_uint_le(f.read(4))
key = value >> self._bitsize
num = self._num[key]
while i > base + num:
base += num
value = unpack_uint_le(f.read(4))
key = value >> self._bitsize
num = self._num[key]
offset = i - base
if offset:
value = value >> sum(self._bits[key][:offset])
return value & (2 ** self._bits[key][offset] - 1)
# Google Packed Ints algorithm: a set of four numbers is preceded by a "key"
# byte, which encodes how many bytes each of the next four integers use
# (stored in the byte as four 2-bit numbers)
class GInts(NumberEncoding):
maxint = 2 ** 32 - 1
# Number of future bytes to expect after a "key" byte value of N -- used to
# skip ahead from a key byte
_lens = array("B", [4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 5, 6,
7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9,
10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11,
12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7,
8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11,
12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9,
10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11,
12, 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11,
12, 13, 14, 12, 13, 14, 15, 13, 14, 15, 16])
def key_to_sizes(self, key):
"""Returns a list of the sizes of the next four numbers given a key
byte.
"""
return [(key >> (i * 2) & 3) + 1 for i in xrange(4)]
def write_nums(self, f, numbers):
buf = emptybytes
count = 0
key = 0
for v in numbers:
shift = count * 2
if v < 256:
buf += pack_byte(v)
elif v < 65536:
key |= 1 << shift
buf += pack_ushort_le(v)
elif v < 16777216:
key |= 2 << shift
buf += pack_uint_le(v)[:3]
else:
key |= 3 << shift
buf += pack_uint_le(v)
count += 1
if count == 4:
f.write_byte(key)
f.write(buf)
count = 0
key = 0
buf = emptybytes # Clear the buffer
# Write out leftovers in the buffer
if count:
f.write_byte(key)
f.write(buf)
def read_nums(self, f, n):
"""Read N integers from the bytes stream dbfile. Expects that the file
is positioned at a key byte.
"""
count = 0
key = None
for _ in xrange(n):
if count == 0:
key = f.read_byte()
code = key >> (count * 2) & 3
if code == 0:
yield f.read_byte()
elif code == 1:
yield f.read_ushort_le()
elif code == 2:
yield unpack_uint_le(f.read(3) + "\x00")[0]
else:
yield f.read_uint_le()
count = (count + 1) % 4
# def get(self, f, pos, i):
# f.seek(pos)
# base = 0
# key = f.read_byte()
# while i > base + 4:
# base += 4
# f.seek(self._lens[key], 1)
# key = f.read_byte()
#
# for n in self.read_nums(f, (i + 1) - base):
# pass
# return n

View File

@@ -0,0 +1,130 @@
# Copyright 2007 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
import os.path
import random
import shutil
import sys
import tempfile
from contextlib import contextmanager
from whoosh.filedb.filestore import FileStorage
from whoosh.util import now, random_name
class TempDir(object):
def __init__(self, basename="", parentdir=None, ext=".whoosh",
suppress=frozenset(), keepdir=False):
self.basename = basename or random_name(8)
self.parentdir = parentdir
dirname = parentdir or tempfile.mkdtemp(ext, self.basename)
self.dir = os.path.abspath(dirname)
self.suppress = suppress
self.keepdir = keepdir
def __enter__(self):
if not os.path.exists(self.dir):
os.makedirs(self.dir)
return self.dir
def cleanup(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
self.cleanup()
if not self.keepdir:
try:
shutil.rmtree(self.dir)
except OSError:
e = sys.exc_info()[1]
#sys.stderr.write("Can't remove temp dir: " + str(e) + "\n")
#if exc_type is None:
# raise
if exc_type is not None:
if self.keepdir:
sys.stderr.write("Temp dir=" + self.dir + "\n")
if exc_type not in self.suppress:
return False
class TempStorage(TempDir):
def __init__(self, debug=False, **kwargs):
TempDir.__init__(self, **kwargs)
self._debug = debug
def cleanup(self):
self.store.close()
def __enter__(self):
dirpath = TempDir.__enter__(self)
self.store = FileStorage(dirpath, debug=self._debug)
return self.store
class TempIndex(TempStorage):
def __init__(self, schema, ixname='', storage_debug=False, **kwargs):
TempStorage.__init__(self, basename=ixname, debug=storage_debug,
**kwargs)
self.schema = schema
def __enter__(self):
fstore = TempStorage.__enter__(self)
return fstore.create_index(self.schema, indexname=self.basename)
def is_abstract_method(attr):
"""Returns True if the given object has __isabstractmethod__ == True.
"""
return (hasattr(attr, "__isabstractmethod__")
and getattr(attr, "__isabstractmethod__"))
def check_abstract_methods(base, subclass):
"""Raises AssertionError if ``subclass`` does not override a method on
``base`` that is marked as an abstract method.
"""
for attrname in dir(base):
if attrname.startswith("_"):
continue
attr = getattr(base, attrname)
if is_abstract_method(attr):
oattr = getattr(subclass, attrname)
if is_abstract_method(oattr):
raise Exception("%s.%s not overridden"
% (subclass.__name__, attrname))
@contextmanager
def timing(name=None):
t = now()
yield
t = now() - t
print("%s: %0.06f s" % (name or '', t))

View File

@@ -0,0 +1,132 @@
# Copyright 2007 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
import codecs, re
from whoosh.compat import string_type, u, byte
# Note: these functions return a tuple of (text, length), so when you call
# them, you have to add [0] on the end, e.g. str = utf8encode(unicode)[0]
utf8encode = codecs.getencoder("utf-8")
utf8decode = codecs.getdecoder("utf-8")
# Prefix encoding functions
def first_diff(a, b):
"""
Returns the position of the first differing character in the sequences a
and b. For example, first_diff('render', 'rending') == 4. This function
limits the return value to 255 so the difference can be encoded in a single
byte.
"""
i = 0
while i <= 255 and i < len(a) and i < len(b) and a[i] == b[i]:
i += 1
return i
def prefix_encode(a, b):
"""
Compresses bytestring b as a byte representing the prefix it shares with a,
followed by the suffix bytes.
"""
i = first_diff(a, b)
return byte(i) + b[i:]
def prefix_encode_all(ls):
"""Compresses the given list of (unicode) strings by storing each string
(except the first one) as an integer (encoded in a byte) representing
the prefix it shares with its predecessor, followed by the suffix encoded
as UTF-8.
"""
last = u('')
for w in ls:
i = first_diff(last, w)
yield chr(i) + w[i:].encode("utf-8")
last = w
def prefix_decode_all(ls):
"""Decompresses a list of strings compressed by prefix_encode().
"""
last = u('')
for w in ls:
i = ord(w[0])
decoded = last[:i] + w[1:].decode("utf-8")
yield decoded
last = decoded
# Natural key sorting function
_nkre = re.compile(r"\D+|\d+", re.UNICODE)
def _nkconv(i):
try:
return int(i)
except ValueError:
return i.lower()
def natural_key(s):
"""Converts string ``s`` into a tuple that will sort "naturally" (i.e.,
``name5`` will come before ``name10`` and ``1`` will come before ``A``).
This function is designed to be used as the ``key`` argument to sorting
functions.
:param s: the str/unicode string to convert.
:rtype: tuple
"""
# Use _nkre to split the input string into a sequence of
# digit runs and non-digit runs. Then use _nkconv() to convert
# the digit runs into ints and the non-digit runs to lowercase.
return tuple(_nkconv(m) for m in _nkre.findall(s))
# Regular expression functions
def rcompile(pattern, flags=0, verbose=False):
"""A wrapper for re.compile that checks whether "pattern" is a regex object
or a string to be compiled, and automatically adds the re.UNICODE flag.
"""
if not isinstance(pattern, string_type):
# If it's not a string, assume it's already a compiled pattern
return pattern
if verbose:
flags |= re.VERBOSE
return re.compile(pattern, re.UNICODE | flags)

View File

@@ -0,0 +1,467 @@
# Copyright 2010 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
import calendar
import copy
from datetime import date, datetime, timedelta
from whoosh.compat import iteritems
class TimeError(Exception):
pass
def relative_days(current_wday, wday, dir):
"""Returns the number of days (positive or negative) to the "next" or
"last" of a certain weekday. ``current_wday`` and ``wday`` are numbers,
i.e. 0 = monday, 1 = tuesday, 2 = wednesday, etc.
>>> # Get the number of days to the next tuesday, if today is Sunday
>>> relative_days(6, 1, 1)
2
:param current_wday: the number of the current weekday.
:param wday: the target weekday.
:param dir: -1 for the "last" (past) weekday, 1 for the "next" (future)
weekday.
"""
if current_wday == wday:
return 7 * dir
if dir == 1:
return (wday + 7 - current_wday) % 7
else:
return (current_wday + 7 - wday) % 7 * -1
def timedelta_to_usecs(td):
total = td.days * 86400000000 # Microseconds in a day
total += td.seconds * 1000000 # Microseconds in a second
total += td.microseconds
return total
def datetime_to_long(dt):
"""Converts a datetime object to a long integer representing the number
of microseconds since ``datetime.min``.
"""
return timedelta_to_usecs(dt.replace(tzinfo=None) - dt.min)
def long_to_datetime(x):
"""Converts a long integer representing the number of microseconds since
``datetime.min`` to a datetime object.
"""
days = x // 86400000000 # Microseconds in a day
x -= days * 86400000000
seconds = x // 1000000 # Microseconds in a second
x -= seconds * 1000000
return datetime.min + timedelta(days=days, seconds=seconds, microseconds=x)
# Ambiguous datetime object
class adatetime(object):
"""An "ambiguous" datetime object. This object acts like a
``datetime.datetime`` object but can have any of its attributes set to
None, meaning unspecified.
"""
units = frozenset(("year", "month", "day", "hour", "minute", "second",
"microsecond"))
def __init__(self, year=None, month=None, day=None, hour=None, minute=None,
second=None, microsecond=None):
if isinstance(year, datetime):
dt = year
self.year, self.month, self.day = dt.year, dt.month, dt.day
self.hour, self.minute, self.second = dt.hour, dt.minute, dt.second
self.microsecond = dt.microsecond
else:
if month is not None and (month < 1 or month > 12):
raise TimeError("month must be in 1..12")
if day is not None and day < 1:
raise TimeError("day must be greater than 1")
if (year is not None and month is not None and day is not None
and day > calendar.monthrange(year, month)[1]):
raise TimeError("day is out of range for month")
if hour is not None and (hour < 0 or hour > 23):
raise TimeError("hour must be in 0..23")
if minute is not None and (minute < 0 or minute > 59):
raise TimeError("minute must be in 0..59")
if second is not None and (second < 0 or second > 59):
raise TimeError("second must be in 0..59")
if microsecond is not None and (microsecond < 0
or microsecond > 999999):
raise TimeError("microsecond must be in 0..999999")
self.year, self.month, self.day = year, month, day
self.hour, self.minute, self.second = hour, minute, second
self.microsecond = microsecond
def __eq__(self, other):
if not other.__class__ is self.__class__:
if not is_ambiguous(self) and isinstance(other, datetime):
return fix(self) == other
else:
return False
return all(getattr(self, unit) == getattr(other, unit)
for unit in self.units)
def __repr__(self):
return "%s%r" % (self.__class__.__name__, self.tuple())
def tuple(self):
"""Returns the attributes of the ``adatetime`` object as a tuple of
``(year, month, day, hour, minute, second, microsecond)``.
"""
return (self.year, self.month, self.day, self.hour, self.minute,
self.second, self.microsecond)
def date(self):
return date(self.year, self.month, self.day)
def copy(self):
return adatetime(year=self.year, month=self.month, day=self.day,
hour=self.hour, minute=self.minute, second=self.second,
microsecond=self.microsecond)
def replace(self, **kwargs):
"""Returns a copy of this object with the attributes given as keyword
arguments replaced.
>>> adt = adatetime(year=2009, month=10, day=31)
>>> adt.replace(year=2010)
(2010, 10, 31, None, None, None, None)
"""
newadatetime = self.copy()
for key, value in iteritems(kwargs):
if key in self.units:
setattr(newadatetime, key, value)
else:
raise KeyError("Unknown argument %r" % key)
return newadatetime
def floor(self):
"""Returns a ``datetime`` version of this object with all unspecified
(None) attributes replaced by their lowest values.
This method raises an error if the ``adatetime`` object has no year.
>>> adt = adatetime(year=2009, month=5)
>>> adt.floor()
datetime.datetime(2009, 5, 1, 0, 0, 0, 0)
"""
y, m, d, h, mn, s, ms = (self.year, self.month, self.day, self.hour,
self.minute, self.second, self.microsecond)
if y is None:
raise ValueError("Date has no year")
if m is None:
m = 1
if d is None:
d = 1
if h is None:
h = 0
if mn is None:
mn = 0
if s is None:
s = 0
if ms is None:
ms = 0
return datetime(y, m, d, h, mn, s, ms)
def ceil(self):
"""Returns a ``datetime`` version of this object with all unspecified
(None) attributes replaced by their highest values.
This method raises an error if the ``adatetime`` object has no year.
>>> adt = adatetime(year=2009, month=5)
>>> adt.floor()
datetime.datetime(2009, 5, 30, 23, 59, 59, 999999)
"""
y, m, d, h, mn, s, ms = (self.year, self.month, self.day, self.hour,
self.minute, self.second, self.microsecond)
if y is None:
raise ValueError("Date has no year")
if m is None:
m = 12
if d is None:
d = calendar.monthrange(y, m)[1]
if h is None:
h = 23
if mn is None:
mn = 59
if s is None:
s = 59
if ms is None:
ms = 999999
return datetime(y, m, d, h, mn, s, ms)
def disambiguated(self, basedate):
"""Returns either a ``datetime`` or unambiguous ``timespan`` version
of this object.
Unless this ``adatetime`` object is full specified down to the
microsecond, this method will return a timespan built from the "floor"
and "ceil" of this object.
This method raises an error if the ``adatetime`` object has no year.
>>> adt = adatetime(year=2009, month=10, day=31)
>>> adt.disambiguated()
timespan(datetime(2009, 10, 31, 0, 0, 0, 0), datetime(2009, 10, 31, 23, 59 ,59, 999999)
"""
dt = self
if not is_ambiguous(dt):
return fix(dt)
return timespan(dt, dt).disambiguated(basedate)
# Time span class
class timespan(object):
"""A span of time between two ``datetime`` or ``adatetime`` objects.
"""
def __init__(self, start, end):
"""
:param start: a ``datetime`` or ``adatetime`` object representing the
start of the time span.
:param end: a ``datetime`` or ``adatetime`` object representing the
end of the time span.
"""
if not isinstance(start, (datetime, adatetime)):
raise TimeError("%r is not a datetime object" % start)
if not isinstance(end, (datetime, adatetime)):
raise TimeError("%r is not a datetime object" % end)
self.start = copy.copy(start)
self.end = copy.copy(end)
def __eq__(self, other):
if not other.__class__ is self.__class__:
return False
return self.start == other.start and self.end == other.end
def __repr__(self):
return "%s(%r, %r)" % (self.__class__.__name__, self.start, self.end)
def disambiguated(self, basedate, debug=0):
"""Returns an unambiguous version of this object.
>>> start = adatetime(year=2009, month=2)
>>> end = adatetime(year=2009, month=10)
>>> ts = timespan(start, end)
>>> ts
timespan(adatetime(2009, 2, None, None, None, None, None), adatetime(2009, 10, None, None, None, None, None))
>>> td.disambiguated(datetime.now())
timespan(datetime(2009, 2, 28, 0, 0, 0, 0), datetime(2009, 10, 31, 23, 59 ,59, 999999)
"""
#- If year is in start but not end, use basedate.year for end
#-- If year is in start but not end, but startdate is > basedate,
# use "next <monthname>" to get end month/year
#- If year is in end but not start, copy year from end to start
#- Support "next february", "last april", etc.
start, end = copy.copy(self.start), copy.copy(self.end)
start_year_was_amb = start.year is None
end_year_was_amb = end.year is None
if has_no_date(start) and has_no_date(end):
# The start and end points are just times, so use the basedate
# for the date information.
by, bm, bd = basedate.year, basedate.month, basedate.day
start = start.replace(year=by, month=bm, day=bd)
end = end.replace(year=by, month=bm, day=bd)
else:
# If one side has a year and the other doesn't, the decision
# of what year to assign to the ambiguous side is kind of
# arbitrary. I've used a heuristic here based on how the range
# "reads", but it may only be reasonable in English. And maybe
# even just to me.
if start.year is None and end.year is None:
# No year on either side, use the basedate
start.year = end.year = basedate.year
elif start.year is None:
# No year in the start, use the year from the end
start.year = end.year
elif end.year is None:
end.year = max(start.year, basedate.year)
if start.year == end.year:
# Once again, if one side has a month and day but the other side
# doesn't, the disambiguation is arbitrary. Does "3 am to 5 am
# tomorrow" mean 3 AM today to 5 AM tomorrow, or 3am tomorrow to
# 5 am tomorrow? What I picked is similar to the year: if the
# end has a month+day and the start doesn't, copy the month+day
# from the end to the start UNLESS that would make the end come
# before the start on that day, in which case use the basedate
# instead. If the start has a month+day and the end doesn't, use
# the basedate.
start_dm = not (start.month is None and start.day is None)
end_dm = not (end.month is None and end.day is None)
if end_dm and not start_dm:
if start.floor().time() > end.ceil().time():
start.month = basedate.month
start.day = basedate.day
else:
start.month = end.month
start.day = end.day
elif start_dm and not end_dm:
end.month = basedate.month
end.day = basedate.day
if floor(start).date() > ceil(end).date():
# If the disambiguated dates are out of order:
# - If no start year was given, reduce the start year to put the
# start before the end
# - If no end year was given, increase the end year to put the end
# after the start
# - If a year was specified for both, just swap the start and end
if start_year_was_amb:
start.year = end.year - 1
elif end_year_was_amb:
end.year = start.year + 1
else:
start, end = end, start
start = floor(start)
end = ceil(end)
if start.date() == end.date() and start.time() > end.time():
# If the start and end are on the same day, but the start time
# is after the end time, move the end time to the next day
end += timedelta(days=1)
return timespan(start, end)
# Functions for working with datetime/adatetime objects
def floor(at):
if isinstance(at, datetime):
return at
return at.floor()
def ceil(at):
if isinstance(at, datetime):
return at
return at.ceil()
def fill_in(at, basedate, units=adatetime.units):
"""Returns a copy of ``at`` with any unspecified (None) units filled in
with values from ``basedate``.
"""
if isinstance(at, datetime):
return at
args = {}
for unit in units:
v = getattr(at, unit)
if v is None:
v = getattr(basedate, unit)
args[unit] = v
return fix(adatetime(**args))
def has_no_date(at):
"""Returns True if the given object is an ``adatetime`` where ``year``,
``month``, and ``day`` are all None.
"""
if isinstance(at, datetime):
return False
return at.year is None and at.month is None and at.day is None
def has_no_time(at):
"""Returns True if the given object is an ``adatetime`` where ``hour``,
``minute``, ``second`` and ``microsecond`` are all None.
"""
if isinstance(at, datetime):
return False
return (at.hour is None and at.minute is None and at.second is None
and at.microsecond is None)
def is_ambiguous(at):
"""Returns True if the given object is an ``adatetime`` with any of its
attributes equal to None.
"""
if isinstance(at, datetime):
return False
return any((getattr(at, attr) is None) for attr in adatetime.units)
def is_void(at):
"""Returns True if the given object is an ``adatetime`` with all of its
attributes equal to None.
"""
if isinstance(at, datetime):
return False
return all((getattr(at, attr) is None) for attr in adatetime.units)
def fix(at):
"""If the given object is an ``adatetime`` that is unambiguous (because
all its attributes are specified, that is, not equal to None), returns a
``datetime`` version of it. Otherwise returns the ``adatetime`` object
unchanged.
"""
if is_ambiguous(at) or isinstance(at, datetime):
return at
return datetime(year=at.year, month=at.month, day=at.day, hour=at.hour,
minute=at.minute, second=at.second,
microsecond=at.microsecond)

View File

@@ -0,0 +1,110 @@
# Copyright 2007 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
from array import array
from whoosh.compat import array_tobytes, xrange
# Varint cache
# Build a cache of the varint byte sequences for the first N integers, so we
# don't have to constantly recalculate them on the fly. This makes a small but
# noticeable difference.
def _varint(i):
a = array("B")
while (i & ~0x7F) != 0:
a.append((i & 0x7F) | 0x80)
i = i >> 7
a.append(i)
return array_tobytes(a)
_varint_cache_size = 512
_varint_cache = []
for i in xrange(0, _varint_cache_size):
_varint_cache.append(_varint(i))
_varint_cache = tuple(_varint_cache)
def varint(i):
"""Encodes the given integer into a string of the minimum number of bytes.
"""
if i < len(_varint_cache):
return _varint_cache[i]
return _varint(i)
def varint_to_int(vi):
b = ord(vi[0])
p = 1
i = b & 0x7f
shift = 7
while b & 0x80 != 0:
b = ord(vi[p])
p += 1
i |= (b & 0x7F) << shift
shift += 7
return i
def signed_varint(i):
"""Zig-zag encodes a signed integer into a varint.
"""
if i >= 0:
return varint(i << 1)
return varint((i << 1) ^ (~0))
def decode_signed_varint(i):
"""Zig-zag decodes an integer value.
"""
if not i & 1:
return i >> 1
return (i >> 1) ^ (~0)
def read_varint(readfn):
"""
Reads a variable-length encoded integer.
:param readfn: a callable that reads a given number of bytes,
like file.read().
"""
b = ord(readfn(1))
i = b & 0x7F
shift = 7
while b & 0x80 != 0:
b = ord(readfn(1))
i |= (b & 0x7F) << shift
shift += 7
return i

View File

@@ -0,0 +1,165 @@
# Copyright 2012 Matt Chaput. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
from whoosh.util.text import rcompile
class BaseVersion(object):
@classmethod
def parse(cls, text):
obj = cls()
match = cls._version_exp.match(text)
if match:
groupdict = match.groupdict()
for groupname, typ in cls._parts:
v = groupdict.get(groupname)
if v is not None:
setattr(obj, groupname, typ(v))
return obj
def __repr__(self):
vs = ", ".join(repr(getattr(self, slot)) for slot in self.__slots__)
return "%s(%s)" % (self.__class__.__name__, vs)
def tuple(self):
return tuple(getattr(self, slot) for slot in self.__slots__)
def __eq__(self, other):
if not hasattr(other, "tuple"):
raise ValueError("Can't compare %r with %r" % (self, other))
return self.tuple() == other.tuple()
def __lt__(self, other):
if not hasattr(other, "tuple"):
raise ValueError("Can't compare %r with %r" % (self, other))
return self.tuple() < other.tuple()
# It's dumb that you have to define these
def __gt__(self, other):
if not hasattr(other, "tuple"):
raise ValueError("Can't compare %r with %r" % (self, other))
return self.tuple() > other.tuple()
def __ge__(self, other):
if not hasattr(other, "tuple"):
raise ValueError("Can't compare %r with %r" % (self, other))
return self.tuple() >= other.tuple()
def __le__(self, other):
if not hasattr(other, "tuple"):
raise ValueError("Can't compare %r with %r" % (self, other))
return self.tuple() <= other.tuple()
def __ne__(self, other):
if not hasattr(other, "tuple"):
raise ValueError("Can't compare %r with %r" % (self, other))
return self.tuple() != other.tuple()
class SimpleVersion(BaseVersion):
"""An object that parses version numbers such as::
12.2.5b
The filter supports a limited subset of PEP 386 versions including::
1
1.2
1.2c
1.2c3
1.2.3
1.2.3a
1.2.3b4
10.7.5rc1
999.999.999c999
"""
_version_exp = rcompile(r"""
^
(?P<major>\d{1,4})
(
[.](?P<minor>\d{1,4})
(
[.](?P<release>\d{1,4})
)?
(
(?P<ex>[abc]|rc)
(?P<exnum>\d{1,4})?
)?
)?
$
""", verbose=True)
# (groupid, method, skippable, default)
_parts = [("major", int),
("minor", int),
("release", int),
("ex", str),
("exnum", int),
]
_ex_bits = {"a": 0, "b": 1, "c": 2, "rc": 10, "z": 15}
_bits_ex = dict((v, k) for k, v in _ex_bits.items())
__slots__ = ("major", "minor", "release", "ex", "exnum")
def __init__(self, major=1, minor=0, release=0, ex="z", exnum=0):
self.major = major
self.minor = minor
self.release = release
self.ex = ex
self.exnum = exnum
def to_int(self):
assert self.major < 1024
n = self.major << 34
assert self.minor < 1024
n |= self.minor << 24
assert self.release < 1024
n |= self.release << 14
exbits = self._ex_bits.get(self.ex, 15)
n |= exbits << 10
assert self.exnum < 1024
n |= self.exnum
return n
@classmethod
def from_int(cls, n):
major = (n & (1023 << 34)) >> 34
minor = (n & (1023 << 24)) >> 24
release = (n & (1023 << 14)) >> 14
exbits = (n & (7 << 10)) >> 10
ex = cls._bits_ex.get(exbits, "z")
exnum = n & 1023
return cls(major, minor, release, ex, exnum)