added catalog package with a simple variation of hurry.query and a keyword index
git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2550 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
3acc628b80
commit
56a889540b
6 changed files with 535 additions and 0 deletions
198
catalog/README.txt
Normal file
198
catalog/README.txt
Normal file
|
@ -0,0 +1,198 @@
|
|||
=========================
|
||||
Catalog, Indexes, Queries
|
||||
=========================
|
||||
|
||||
($Id$)
|
||||
|
||||
|
||||
Set up Working Environment
|
||||
==========================
|
||||
|
||||
We first have to set up an IntIds utility (we use a dummy implementation
|
||||
for testing purposes here) and a catalog with a few indexes.
|
||||
|
||||
>>> from zope import component
|
||||
>>> from cybertools.relation.tests import IntIdsStub
|
||||
>>> intid = IntIdsStub()
|
||||
>>> component.provideUtility(intid)
|
||||
|
||||
>>> from zope.app.catalog.interfaces import ICatalog
|
||||
>>> from zope.app.catalog.catalog import Catalog
|
||||
>>> catalog = Catalog()
|
||||
>>> component.provideUtility(catalog, ICatalog)
|
||||
|
||||
>>> from zope.interface import Interface, Attribute, implements
|
||||
>>> class IContent(Interface):
|
||||
... f1 = Attribute('f1')
|
||||
... f2 = Attribute('f2')
|
||||
... f3 = Attribute('f3')
|
||||
... t1 = Attribute('t1')
|
||||
... t2 = Attribute('t2')
|
||||
... k1 = Attribute('k1')
|
||||
|
||||
>>> from zope.app.catalog.field import FieldIndex
|
||||
>>> from zope.app.catalog.text import TextIndex
|
||||
>>> from cybertools.catalog.keyword import KeywordIndex
|
||||
>>> catalog['f1'] = FieldIndex('f1', IContent)
|
||||
>>> catalog['f2'] = FieldIndex('f2', IContent)
|
||||
>>> catalog['f3'] = FieldIndex('f3', IContent)
|
||||
>>> catalog['t1'] = TextIndex('t1', IContent)
|
||||
>>> catalog['t2'] = TextIndex('t2', IContent)
|
||||
>>> catalog['k1'] = KeywordIndex('k1', IContent)
|
||||
|
||||
In addition we need a class for the content objects that we want
|
||||
to index and query.
|
||||
|
||||
>>> from zope.app.container.contained import Contained
|
||||
>>> class Content(Contained):
|
||||
... implements(IContent)
|
||||
... def __init__(self, id, f1='', f2='', f3='', t1='', t2='', k1=[]):
|
||||
... self.id = id
|
||||
... self.f1 = f1
|
||||
... self.f2 = f2
|
||||
... self.f3 = f3
|
||||
... self.t1 = t1
|
||||
... self.t2 = t2
|
||||
... self.k1 = k1
|
||||
... def __cmp__(self, other):
|
||||
... return cmp(self.id, other.id)
|
||||
|
||||
The id attribute is just so we can identify objects we find again
|
||||
easily. By including the __cmp__ method we make sure search results
|
||||
can be stably sorted.
|
||||
|
||||
We are now ready to create a few content objects.
|
||||
|
||||
Now let's create some objects so that they'll be cataloged.
|
||||
|
||||
>>> content = [
|
||||
... Content(1, 'a', 'b', 'd'),
|
||||
... Content(2, 'a', 'c'),
|
||||
... Content(3, 'X', 'c'),
|
||||
... Content(4, 'a', 'b', 'e'),
|
||||
... Content(5, 'X', 'b', 'e', k1=('zope', 'plone')),
|
||||
... Content(6, 'Y', 'Z', t1='some interesting text')]
|
||||
|
||||
And catalog them now.
|
||||
|
||||
>>> for entry in content:
|
||||
... catalog.index_doc(intid.register(entry), entry)
|
||||
|
||||
Let's provide a simple function for displaying query results.
|
||||
|
||||
>>> def displayQuery(q):
|
||||
... return [intid.getObject(uid).id for uid in q.apply()]
|
||||
|
||||
|
||||
Field Index Queries
|
||||
===================
|
||||
|
||||
Now for a query where f1 equals a.
|
||||
|
||||
>>> from cybertools.catalog.query import Eq
|
||||
>>> f1 = ('', 'f1')
|
||||
>>> displayQuery(Eq(f1, 'a'))
|
||||
[1, 2, 4]
|
||||
|
||||
Not equals (this is more efficient than the generic ~ operator).
|
||||
|
||||
>>> from cybertools.catalog.query import NotEq
|
||||
>>> displayQuery(NotEq(f1, 'a'))
|
||||
[3, 5, 6]
|
||||
|
||||
Testing whether a field is in a set.
|
||||
|
||||
>>> from cybertools.catalog.query import In
|
||||
>>> displayQuery(In(f1, ['a', 'X']))
|
||||
[1, 2, 3, 4, 5]
|
||||
|
||||
Whether documents are in a specified range.
|
||||
|
||||
>>> from cybertools.catalog.query import Between
|
||||
>>> displayQuery(Between(f1, 'X', 'Y'))
|
||||
[3, 5, 6]
|
||||
|
||||
You can leave out one end of the range.
|
||||
|
||||
>>> displayQuery(Between(f1, 'X', None)) # 'X' < 'a'
|
||||
[1, 2, 3, 4, 5, 6]
|
||||
>>> displayQuery(Between(f1, None, 'X'))
|
||||
[3, 5]
|
||||
|
||||
You can also use greater-equals and less-equals for the same purpose.
|
||||
|
||||
>>> from cybertools.catalog.query import Ge, Le
|
||||
>>> displayQuery(Ge(f1, 'X'))
|
||||
[1, 2, 3, 4, 5, 6]
|
||||
>>> displayQuery(Le(f1, 'X'))
|
||||
[3, 5]
|
||||
|
||||
It's also possible to use not with the ~ operator.
|
||||
|
||||
>>> displayQuery(~Eq(f1, 'a'))
|
||||
[3, 5, 6]
|
||||
|
||||
Using and (&).
|
||||
|
||||
>>> f2 = ('', 'f2')
|
||||
>>> displayQuery(Eq(f1, 'a') & Eq(f2, 'b'))
|
||||
[1, 4]
|
||||
|
||||
Using or (|).
|
||||
|
||||
>>> displayQuery(Eq(f1, 'a') | Eq(f2, 'b'))
|
||||
[1, 2, 4, 5]
|
||||
|
||||
These can be chained.
|
||||
|
||||
>>> displayQuery(Eq(f1, 'a') & Eq(f2, 'b') & Between(f1, 'a', 'b'))
|
||||
[1, 4]
|
||||
>>> displayQuery(Eq(f1, 'a') | Eq(f1, 'X') | Eq(f2, 'b'))
|
||||
[1, 2, 3, 4, 5]
|
||||
|
||||
And nested.
|
||||
|
||||
>>> displayQuery((Eq(f1, 'a') | Eq(f1, 'X')) & (Eq(f2, 'b') | Eq(f2, 'c')))
|
||||
[1, 2, 3, 4, 5]
|
||||
|
||||
"and" and "or" can also be spelled differently.
|
||||
|
||||
>>> from cybertools.catalog.query import And, Or
|
||||
>>> displayQuery(And(Eq(f1, 'a'), Eq(f2, 'b')))
|
||||
[1, 4]
|
||||
>>> displayQuery(Or(Eq(f1, 'a'), Eq(f2, 'b')))
|
||||
[1, 2, 4, 5]
|
||||
|
||||
Combination of In and &
|
||||
-----------------------
|
||||
|
||||
A combination of 'In' and '&'.
|
||||
|
||||
>>> displayQuery(In(f1, ['a', 'X', 'Y', 'Z']))
|
||||
[1, 2, 3, 4, 5, 6]
|
||||
>>> displayQuery(In(f1, ['Z']))
|
||||
[]
|
||||
>>> displayQuery(In(f1, ['a', 'X', 'Y', 'Z']) & In(f1, ['Z']))
|
||||
[]
|
||||
|
||||
|
||||
Text Index Queries
|
||||
==================
|
||||
|
||||
>>> from cybertools.catalog.query import Text
|
||||
>>> t1 = ('', 't1')
|
||||
>>> displayQuery(Text(t1, 'interesting'))
|
||||
[6]
|
||||
|
||||
|
||||
Keyword Index Queries
|
||||
=====================
|
||||
|
||||
>>> from cybertools.catalog.query import AllOf, AnyOf
|
||||
>>> k1 = ('', 'k1')
|
||||
>>> displayQuery(AnyOf(k1, 'plone'))
|
||||
[5]
|
||||
>>> displayQuery(AllOf(k1, ['plone', 'zope', 'zms']))
|
||||
[]
|
||||
>>> displayQuery(AllOf(k1, ['plone', 'zope']))
|
||||
[5]
|
3
catalog/__init__.py
Normal file
3
catalog/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
$Id$
|
||||
"""
|
43
catalog/keyword.py
Normal file
43
catalog/keyword.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
#
|
||||
# Copyright (c) 2008 Helmut Merz helmutm@cy55.de
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
"""Keyword catalog index.
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
import zope.index.keyword
|
||||
import zope.interface
|
||||
|
||||
import zope.app.container.contained
|
||||
import zope.app.catalog.attribute
|
||||
import zope.app.catalog.interfaces
|
||||
|
||||
|
||||
class IKeywordIndex(zope.app.catalog.interfaces.IAttributeIndex,
|
||||
zope.app.catalog.interfaces.ICatalogIndex):
|
||||
"""Interface-based catalog keyword index.
|
||||
"""
|
||||
|
||||
|
||||
class KeywordIndex(zope.app.catalog.attribute.AttributeIndex,
|
||||
zope.index.keyword.KeywordIndex,
|
||||
zope.app.container.contained.Contained):
|
||||
|
||||
zope.interface.implements(IKeywordIndex)
|
||||
|
260
catalog/query.py
Normal file
260
catalog/query.py
Normal file
|
@ -0,0 +1,260 @@
|
|||
#
|
||||
# Copyright (c) 2008 Helmut Merz helmutm@cy55.de
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
"""
|
||||
Catalog query terms and their logical combinations.
|
||||
|
||||
This is mainly a simplified version of Martijn Faassen's hurry.query
|
||||
(http://cheeseshop.python.org/pypi/hurry.query).
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
from BTrees.IFBTree import weightedIntersection, weightedUnion
|
||||
from BTrees.IFBTree import difference, IFBTree, IFBucket
|
||||
from zope.app.intid.interfaces import IIntIds
|
||||
from zope.app.catalog.catalog import ResultSet
|
||||
from zope.app.catalog.field import IFieldIndex
|
||||
from zope.app.catalog.text import ITextIndex
|
||||
from zope.app.catalog.interfaces import ICatalog
|
||||
from zope import component
|
||||
|
||||
from cybertools.catalog.keyword import IKeywordIndex
|
||||
|
||||
|
||||
class Term(object):
|
||||
|
||||
def __and__(self, other):
|
||||
return And(self, other)
|
||||
|
||||
def __rand__(self, other):
|
||||
return And(other, self)
|
||||
|
||||
def __or__(self, other):
|
||||
return Or(self, other)
|
||||
|
||||
def __ror__(self, other):
|
||||
return Or(other, self)
|
||||
|
||||
def __invert__(self):
|
||||
return Not(self)
|
||||
|
||||
|
||||
class And(Term):
|
||||
|
||||
def __init__(self, *terms):
|
||||
self.terms = terms
|
||||
|
||||
def apply(self):
|
||||
results = []
|
||||
for term in self.terms:
|
||||
r = term.apply()
|
||||
if not r:
|
||||
# empty results
|
||||
return r
|
||||
results.append((len(r), r))
|
||||
if not results:
|
||||
# no applicable terms at all
|
||||
return IFBucket()
|
||||
results.sort()
|
||||
_, result = results.pop(0)
|
||||
for _, r in results:
|
||||
w, result = weightedIntersection(result, r)
|
||||
return result
|
||||
|
||||
|
||||
class Or(Term):
|
||||
|
||||
def __init__(self, *terms):
|
||||
self.terms = terms
|
||||
|
||||
def apply(self):
|
||||
results = []
|
||||
for term in self.terms:
|
||||
r = term.apply()
|
||||
# empty results
|
||||
if not r:
|
||||
continue
|
||||
results.append(r)
|
||||
if not results:
|
||||
# no applicable terms at all
|
||||
return IFBucket()
|
||||
result = results.pop(0)
|
||||
for r in results:
|
||||
w, result = weightedUnion(result, r)
|
||||
return result
|
||||
|
||||
|
||||
class Not(Term):
|
||||
|
||||
def __init__(self, term):
|
||||
self.term = term
|
||||
|
||||
def apply(self):
|
||||
return difference(self._all(), self.term.apply())
|
||||
|
||||
def _all(self):
|
||||
# XXX may not work well/be efficient with extentcatalog
|
||||
# XXX not very efficient in general, better to use internal
|
||||
# IntIds datastructure but that would break abstraction..
|
||||
intids = component.getUtility(IIntIds)
|
||||
result = IFBucket()
|
||||
for uid in intids:
|
||||
result[uid] = 0
|
||||
return result
|
||||
|
||||
|
||||
class IndexTerm(Term):
|
||||
|
||||
def __init__(self, (catalog_name, index_name)):
|
||||
self.catalog_name = catalog_name
|
||||
self.index_name = index_name
|
||||
|
||||
def getIndex(self):
|
||||
catalog = component.getUtility(ICatalog, self.catalog_name)
|
||||
index = catalog[self.index_name]
|
||||
return index
|
||||
|
||||
|
||||
# field index
|
||||
|
||||
class FieldTerm(IndexTerm):
|
||||
|
||||
def getIndex(self):
|
||||
index = super(FieldTerm, self).getIndex()
|
||||
assert IFieldIndex.providedBy(index)
|
||||
return index
|
||||
|
||||
|
||||
class Eq(FieldTerm):
|
||||
|
||||
def __init__(self, index_id, value):
|
||||
assert value is not None
|
||||
super(Eq, self).__init__(index_id)
|
||||
self.value = value
|
||||
|
||||
def apply(self):
|
||||
return self.getIndex().apply((self.value, self.value))
|
||||
|
||||
|
||||
class NotEq(FieldTerm):
|
||||
|
||||
def __init__(self, index_id, not_value):
|
||||
super(NotEq, self).__init__(index_id)
|
||||
self.not_value = not_value
|
||||
|
||||
def apply(self):
|
||||
index = self.getIndex()
|
||||
all = index.apply((None, None))
|
||||
r = index.apply((self.not_value, self.not_value))
|
||||
return difference(all, r)
|
||||
|
||||
|
||||
class Between(FieldTerm):
|
||||
|
||||
def __init__(self, index_id, min_value, max_value):
|
||||
super(Between, self).__init__(index_id)
|
||||
self.min_value = min_value
|
||||
self.max_value = max_value
|
||||
|
||||
def apply(self):
|
||||
return self.getIndex().apply((self.min_value, self.max_value))
|
||||
|
||||
|
||||
class Ge(Between):
|
||||
|
||||
def __init__(self, index_id, min_value):
|
||||
super(Ge, self).__init__(index_id, min_value, None)
|
||||
|
||||
|
||||
class Le(Between):
|
||||
|
||||
def __init__(self, index_id, max_value):
|
||||
super(Le, self).__init__(index_id, None, max_value)
|
||||
|
||||
|
||||
class In(FieldTerm):
|
||||
|
||||
def __init__(self, index_id, values):
|
||||
assert None not in values
|
||||
super(In, self).__init__(index_id)
|
||||
self.values = values
|
||||
|
||||
def apply(self):
|
||||
results = []
|
||||
index = self.getIndex()
|
||||
for value in self.values:
|
||||
r = index.apply((value, value))
|
||||
# empty results
|
||||
if not r:
|
||||
continue
|
||||
results.append(r)
|
||||
if not results:
|
||||
# no applicable terms at all
|
||||
return IFBucket()
|
||||
result = results.pop(0)
|
||||
for r in results:
|
||||
w, result = weightedUnion(result, r)
|
||||
return result
|
||||
|
||||
|
||||
# text index
|
||||
|
||||
class Text(IndexTerm):
|
||||
|
||||
def __init__(self, index_id, text):
|
||||
super(Text, self).__init__(index_id)
|
||||
self.text = text
|
||||
|
||||
def getIndex(self):
|
||||
index = super(Text, self).getIndex()
|
||||
assert ITextIndex.providedBy(index)
|
||||
return index
|
||||
|
||||
def apply(self):
|
||||
index = self.getIndex()
|
||||
return index.apply(self.text)
|
||||
|
||||
|
||||
# keyword index
|
||||
|
||||
class KeywordTerm(IndexTerm):
|
||||
|
||||
def __init__(self, index_id, values):
|
||||
super(KeywordTerm, self).__init__(index_id)
|
||||
self.values = values
|
||||
|
||||
def getIndex(self):
|
||||
index = super(KeywordTerm, self).getIndex()
|
||||
assert IKeywordIndex.providedBy(index)
|
||||
return index
|
||||
|
||||
|
||||
class AnyOf(KeywordTerm):
|
||||
|
||||
def apply(self):
|
||||
index = self.getIndex()
|
||||
return index.search(self.values, 'or')
|
||||
|
||||
|
||||
class AllOf(KeywordTerm):
|
||||
|
||||
def apply(self):
|
||||
index = self.getIndex()
|
||||
return index.search(self.values, 'and')
|
||||
|
28
catalog/tests.py
Executable file
28
catalog/tests.py
Executable file
|
@ -0,0 +1,28 @@
|
|||
#! /usr/bin/python
|
||||
|
||||
"""
|
||||
Tests for the 'cybertools.catalog' package.
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
import unittest, doctest
|
||||
from zope.testing.doctestunit import DocFileSuite
|
||||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
"Basic tests for the cybertools.catalog package."
|
||||
|
||||
def testBasicStuff(self):
|
||||
pass
|
||||
|
||||
|
||||
def test_suite():
|
||||
flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
|
||||
return unittest.TestSuite((
|
||||
unittest.makeSuite(Test),
|
||||
DocFileSuite('README.txt', optionflags=flags),
|
||||
))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(defaultTest='test_suite')
|
|
@ -37,6 +37,9 @@ class IntIdsStub(object):
|
|||
id = self.getId(ob)
|
||||
self.objs[id] = None
|
||||
|
||||
def __iter__(self):
|
||||
return iter(xrange(len(self.objs)))
|
||||
|
||||
|
||||
class TestRelation(unittest.TestCase):
|
||||
"Basic tests for the relation package."
|
||||
|
|
Loading…
Add table
Reference in a new issue