minor improvements, esp for classifier

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2174 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-11-12 16:21:36 +00:00
parent 368dd0c022
commit bee421bece
5 changed files with 65 additions and 22 deletions

View file

@ -12,6 +12,9 @@
<h1 tal:attributes="ondblclick item/openEditWindow">
<span tal:content="item/title">Title</span>
</h1>
<p tal:define="description description|item/description"
tal:condition="description">
<i tal:content="description">Description</i></p>
</metal:title>

View file

@ -15,6 +15,10 @@ pre {
max-height: 35em;
}
table.listing td {
white-space: normal;
}
.box div.body div.even {
background-color: #f4f4f4;
}

View file

@ -22,17 +22,20 @@ Adapters and others classes for analyzing resources.
$Id$
"""
from itertools import tee
from zope.cachedescriptors.property import Lazy
from zope import component
from zope.component import adapts
from zope.event import notify
from zope.interface import implements
from zope.traversing.api import getName, getParent
from cybertools.typology.interfaces import IType
from loops.classifier.interfaces import IClassifier, IExtractor, IAnalyzer
from loops.classifier.interfaces import IInformationSet, IStatement
from loops.common import AdapterBase, adapted
from loops.interfaces import IResource, IConcept
from loops.query import ConceptQuery
from loops.resource import Resource
from loops.setup import addAndConfigureObject
from loops.type import TypeInterfaceSourceList
@ -50,6 +53,24 @@ class Classifier(AdapterBase):
_contextAttributes = list(IClassifier) + list(IConcept)
logLevel = 5
@Lazy
def conceptManager(self):
return self.context.getConceptManager()
@Lazy
def defaultPredicate(self):
return self.conceptManager.getDefaultPredicate()
@Lazy
def predicateType(self):
return self.conceptManager.getPredicateType()
@Lazy
def typeConcept(self):
return self.conceptManager.getTypeConcept()
def getOptions(self):
return getattr(self.context, '_options', [])
def setOptions(self, value):
@ -57,19 +78,25 @@ class Classifier(AdapterBase):
options = property(getOptions, setOptions)
def process(self, resource):
self.log('Processing %s' % resource.title, 3)
infoSet = InformationSet()
for name in self.extractors.split():
extractor = component.getAdapter(adapted(resource), IExtractor, name=name)
infoSet.update(extractor.extractInformationSet())
analyzer = component.getAdapter(self, IAnalyzer, name=self.analyzer)
statements = analyzer.extractStatements(infoSet)
defaultPredicate = self.context.getConceptManager().getDefaultPredicate()
for statement in statements:
object = statement.object
qualifiers = IType(object).qualifiers
if 'system' in qualifiers:
continue
if statement.subject is None:
statement.subject = resource
if statement.predicate is None:
statement.predicate = defaultPredicate
self.assignConcept(statement.subject, statement.object,
statement.predicate = self.defaultPredicate
self.log('Assigning: %s %s %s' % (statement.subject.title,
statement.predicate.title, object.title), 5)
self.assignConcept(statement.subject, object,
statement.predicate)
def assignConcept(self, resource, concept, predicate):
@ -77,6 +104,10 @@ class Classifier(AdapterBase):
if resource not in resources:
concept.assignResource(resource, predicate)
def log(self, message, level=5):
if level >= self.logLevel:
print 'Classifier %s:' % getName(self.context), message
class Extractor(object):
@ -101,6 +132,17 @@ class Analyzer(object):
def extractStatements(self, informationSet):
return []
@Lazy
def query(self):
return ConceptQuery(self.context)
def findConcepts(self, word):
r1, r2 = tee(self.query.query(word, 'loops:concept:*'))
names = ', '.join(c.title for c in r2)
self.context.log('Searching for concept using "%s", result: %s'
% (word, names), 2)
return r1
class InformationSet(dict):

View file

@ -26,13 +26,13 @@ from zope import component
from zope.app.catalog.interfaces import ICatalog
from zope.cachedescriptors.property import Lazy
from zope.component import adapts
from zope.traversing.api import getName
from cybertools.organize.interfaces import IPerson
from cybertools.typology.interfaces import IType
from loops.classifier.base import Analyzer
from loops.classifier.base import Statement
from loops.common import adapted
from loops.query import ConceptQuery
class SampleAnalyzer(Analyzer):
@ -47,10 +47,6 @@ class SampleAnalyzer(Analyzer):
resource.
"""
@Lazy
def query(self):
return ConceptQuery(self.context)
def handleCustomer(self, name):
custTypes = self.getTypes(('institution', 'customer',))
for c in self.findConcepts(name):
@ -94,9 +90,6 @@ class SampleAnalyzer(Analyzer):
result.extend(self.handleOwner(parts.pop(0)))
return result
def findConcepts(self, name):
return self.query.query(name, 'loops:concept:*')
@Lazy
def conceptManager(self):
return self.context.context.getConceptManager()

View file

@ -22,16 +22,16 @@ Standard implementations of classifier components.
$Id$
"""
import os
import re
from zope.cachedescriptors.property import Lazy
from zope.component import adapts
from zope.traversing.api import getName
from loops.classifier.base import Analyzer, Extractor
from loops.classifier.base import InformationSet
from loops.classifier.base import Statement
from loops.interfaces import IExternalFile
from loops.query import ConceptQuery
class FilenameExtractor(Extractor):
@ -42,7 +42,7 @@ class FilenameExtractor(Extractor):
self.context = context
def extractInformationSet(self):
filename = self.context.externalAddress
filename, ext = os.path.splitext(self.context.externalAddress)
return InformationSet(filename=filename)
@ -63,21 +63,22 @@ class PathExtractor(Extractor):
class WordBasedAnalyzer(Analyzer):
@Lazy
def query(self):
return ConceptQuery(self.context)
stopWords = [u'and', u'und']
def extractStatements(self, informationSet):
result = []
for key, value in informationSet.items():
words = self.split(value)
for w in words:
if w in self.stopWords:
continue
if len(w) > 1:
result.extend([Statement(c) for c in self.findConcepts(w)])
return result
wordPattern = '\\'.join(list(' .,+*%&-!?/:_[](){}'))
def split(self, text):
return re.split('\W+', text)
def findConcepts(self, word):
return self.query.query(word, 'loops:concept:*')
return re.split('[%s]+' % self.wordPattern, text)
#return re.split(r'[\W_]+', text)