minor improvements, esp for classifier
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2174 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
368dd0c022
commit
bee421bece
5 changed files with 65 additions and 22 deletions
|
@ -12,6 +12,9 @@
|
||||||
<h1 tal:attributes="ondblclick item/openEditWindow">
|
<h1 tal:attributes="ondblclick item/openEditWindow">
|
||||||
<span tal:content="item/title">Title</span>
|
<span tal:content="item/title">Title</span>
|
||||||
</h1>
|
</h1>
|
||||||
|
<p tal:define="description description|item/description"
|
||||||
|
tal:condition="description">
|
||||||
|
<i tal:content="description">Description</i></p>
|
||||||
</metal:title>
|
</metal:title>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,10 @@ pre {
|
||||||
max-height: 35em;
|
max-height: 35em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
table.listing td {
|
||||||
|
white-space: normal;
|
||||||
|
}
|
||||||
|
|
||||||
.box div.body div.even {
|
.box div.body div.even {
|
||||||
background-color: #f4f4f4;
|
background-color: #f4f4f4;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,17 +22,20 @@ Adapters and others classes for analyzing resources.
|
||||||
$Id$
|
$Id$
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from itertools import tee
|
||||||
from zope.cachedescriptors.property import Lazy
|
from zope.cachedescriptors.property import Lazy
|
||||||
from zope import component
|
from zope import component
|
||||||
from zope.component import adapts
|
from zope.component import adapts
|
||||||
from zope.event import notify
|
from zope.event import notify
|
||||||
from zope.interface import implements
|
from zope.interface import implements
|
||||||
from zope.traversing.api import getName, getParent
|
from zope.traversing.api import getName, getParent
|
||||||
|
from cybertools.typology.interfaces import IType
|
||||||
|
|
||||||
from loops.classifier.interfaces import IClassifier, IExtractor, IAnalyzer
|
from loops.classifier.interfaces import IClassifier, IExtractor, IAnalyzer
|
||||||
from loops.classifier.interfaces import IInformationSet, IStatement
|
from loops.classifier.interfaces import IInformationSet, IStatement
|
||||||
from loops.common import AdapterBase, adapted
|
from loops.common import AdapterBase, adapted
|
||||||
from loops.interfaces import IResource, IConcept
|
from loops.interfaces import IResource, IConcept
|
||||||
|
from loops.query import ConceptQuery
|
||||||
from loops.resource import Resource
|
from loops.resource import Resource
|
||||||
from loops.setup import addAndConfigureObject
|
from loops.setup import addAndConfigureObject
|
||||||
from loops.type import TypeInterfaceSourceList
|
from loops.type import TypeInterfaceSourceList
|
||||||
|
@ -50,6 +53,24 @@ class Classifier(AdapterBase):
|
||||||
|
|
||||||
_contextAttributes = list(IClassifier) + list(IConcept)
|
_contextAttributes = list(IClassifier) + list(IConcept)
|
||||||
|
|
||||||
|
logLevel = 5
|
||||||
|
|
||||||
|
@Lazy
|
||||||
|
def conceptManager(self):
|
||||||
|
return self.context.getConceptManager()
|
||||||
|
|
||||||
|
@Lazy
|
||||||
|
def defaultPredicate(self):
|
||||||
|
return self.conceptManager.getDefaultPredicate()
|
||||||
|
|
||||||
|
@Lazy
|
||||||
|
def predicateType(self):
|
||||||
|
return self.conceptManager.getPredicateType()
|
||||||
|
|
||||||
|
@Lazy
|
||||||
|
def typeConcept(self):
|
||||||
|
return self.conceptManager.getTypeConcept()
|
||||||
|
|
||||||
def getOptions(self):
|
def getOptions(self):
|
||||||
return getattr(self.context, '_options', [])
|
return getattr(self.context, '_options', [])
|
||||||
def setOptions(self, value):
|
def setOptions(self, value):
|
||||||
|
@ -57,19 +78,25 @@ class Classifier(AdapterBase):
|
||||||
options = property(getOptions, setOptions)
|
options = property(getOptions, setOptions)
|
||||||
|
|
||||||
def process(self, resource):
|
def process(self, resource):
|
||||||
|
self.log('Processing %s' % resource.title, 3)
|
||||||
infoSet = InformationSet()
|
infoSet = InformationSet()
|
||||||
for name in self.extractors.split():
|
for name in self.extractors.split():
|
||||||
extractor = component.getAdapter(adapted(resource), IExtractor, name=name)
|
extractor = component.getAdapter(adapted(resource), IExtractor, name=name)
|
||||||
infoSet.update(extractor.extractInformationSet())
|
infoSet.update(extractor.extractInformationSet())
|
||||||
analyzer = component.getAdapter(self, IAnalyzer, name=self.analyzer)
|
analyzer = component.getAdapter(self, IAnalyzer, name=self.analyzer)
|
||||||
statements = analyzer.extractStatements(infoSet)
|
statements = analyzer.extractStatements(infoSet)
|
||||||
defaultPredicate = self.context.getConceptManager().getDefaultPredicate()
|
|
||||||
for statement in statements:
|
for statement in statements:
|
||||||
|
object = statement.object
|
||||||
|
qualifiers = IType(object).qualifiers
|
||||||
|
if 'system' in qualifiers:
|
||||||
|
continue
|
||||||
if statement.subject is None:
|
if statement.subject is None:
|
||||||
statement.subject = resource
|
statement.subject = resource
|
||||||
if statement.predicate is None:
|
if statement.predicate is None:
|
||||||
statement.predicate = defaultPredicate
|
statement.predicate = self.defaultPredicate
|
||||||
self.assignConcept(statement.subject, statement.object,
|
self.log('Assigning: %s %s %s' % (statement.subject.title,
|
||||||
|
statement.predicate.title, object.title), 5)
|
||||||
|
self.assignConcept(statement.subject, object,
|
||||||
statement.predicate)
|
statement.predicate)
|
||||||
|
|
||||||
def assignConcept(self, resource, concept, predicate):
|
def assignConcept(self, resource, concept, predicate):
|
||||||
|
@ -77,6 +104,10 @@ class Classifier(AdapterBase):
|
||||||
if resource not in resources:
|
if resource not in resources:
|
||||||
concept.assignResource(resource, predicate)
|
concept.assignResource(resource, predicate)
|
||||||
|
|
||||||
|
def log(self, message, level=5):
|
||||||
|
if level >= self.logLevel:
|
||||||
|
print 'Classifier %s:' % getName(self.context), message
|
||||||
|
|
||||||
|
|
||||||
class Extractor(object):
|
class Extractor(object):
|
||||||
|
|
||||||
|
@ -101,6 +132,17 @@ class Analyzer(object):
|
||||||
def extractStatements(self, informationSet):
|
def extractStatements(self, informationSet):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@Lazy
|
||||||
|
def query(self):
|
||||||
|
return ConceptQuery(self.context)
|
||||||
|
|
||||||
|
def findConcepts(self, word):
|
||||||
|
r1, r2 = tee(self.query.query(word, 'loops:concept:*'))
|
||||||
|
names = ', '.join(c.title for c in r2)
|
||||||
|
self.context.log('Searching for concept using "%s", result: %s'
|
||||||
|
% (word, names), 2)
|
||||||
|
return r1
|
||||||
|
|
||||||
|
|
||||||
class InformationSet(dict):
|
class InformationSet(dict):
|
||||||
|
|
||||||
|
|
|
@ -26,13 +26,13 @@ from zope import component
|
||||||
from zope.app.catalog.interfaces import ICatalog
|
from zope.app.catalog.interfaces import ICatalog
|
||||||
from zope.cachedescriptors.property import Lazy
|
from zope.cachedescriptors.property import Lazy
|
||||||
from zope.component import adapts
|
from zope.component import adapts
|
||||||
|
from zope.traversing.api import getName
|
||||||
|
|
||||||
from cybertools.organize.interfaces import IPerson
|
from cybertools.organize.interfaces import IPerson
|
||||||
from cybertools.typology.interfaces import IType
|
from cybertools.typology.interfaces import IType
|
||||||
from loops.classifier.base import Analyzer
|
from loops.classifier.base import Analyzer
|
||||||
from loops.classifier.base import Statement
|
from loops.classifier.base import Statement
|
||||||
from loops.common import adapted
|
from loops.common import adapted
|
||||||
from loops.query import ConceptQuery
|
|
||||||
|
|
||||||
|
|
||||||
class SampleAnalyzer(Analyzer):
|
class SampleAnalyzer(Analyzer):
|
||||||
|
@ -47,10 +47,6 @@ class SampleAnalyzer(Analyzer):
|
||||||
resource.
|
resource.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@Lazy
|
|
||||||
def query(self):
|
|
||||||
return ConceptQuery(self.context)
|
|
||||||
|
|
||||||
def handleCustomer(self, name):
|
def handleCustomer(self, name):
|
||||||
custTypes = self.getTypes(('institution', 'customer',))
|
custTypes = self.getTypes(('institution', 'customer',))
|
||||||
for c in self.findConcepts(name):
|
for c in self.findConcepts(name):
|
||||||
|
@ -94,9 +90,6 @@ class SampleAnalyzer(Analyzer):
|
||||||
result.extend(self.handleOwner(parts.pop(0)))
|
result.extend(self.handleOwner(parts.pop(0)))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def findConcepts(self, name):
|
|
||||||
return self.query.query(name, 'loops:concept:*')
|
|
||||||
|
|
||||||
@Lazy
|
@Lazy
|
||||||
def conceptManager(self):
|
def conceptManager(self):
|
||||||
return self.context.context.getConceptManager()
|
return self.context.context.getConceptManager()
|
||||||
|
|
|
@ -22,16 +22,16 @@ Standard implementations of classifier components.
|
||||||
$Id$
|
$Id$
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from zope.cachedescriptors.property import Lazy
|
from zope.cachedescriptors.property import Lazy
|
||||||
from zope.component import adapts
|
from zope.component import adapts
|
||||||
|
from zope.traversing.api import getName
|
||||||
|
|
||||||
from loops.classifier.base import Analyzer, Extractor
|
from loops.classifier.base import Analyzer, Extractor
|
||||||
from loops.classifier.base import InformationSet
|
from loops.classifier.base import InformationSet
|
||||||
from loops.classifier.base import Statement
|
from loops.classifier.base import Statement
|
||||||
from loops.interfaces import IExternalFile
|
from loops.interfaces import IExternalFile
|
||||||
from loops.query import ConceptQuery
|
|
||||||
|
|
||||||
|
|
||||||
class FilenameExtractor(Extractor):
|
class FilenameExtractor(Extractor):
|
||||||
|
@ -42,7 +42,7 @@ class FilenameExtractor(Extractor):
|
||||||
self.context = context
|
self.context = context
|
||||||
|
|
||||||
def extractInformationSet(self):
|
def extractInformationSet(self):
|
||||||
filename = self.context.externalAddress
|
filename, ext = os.path.splitext(self.context.externalAddress)
|
||||||
return InformationSet(filename=filename)
|
return InformationSet(filename=filename)
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,21 +63,22 @@ class PathExtractor(Extractor):
|
||||||
|
|
||||||
class WordBasedAnalyzer(Analyzer):
|
class WordBasedAnalyzer(Analyzer):
|
||||||
|
|
||||||
@Lazy
|
stopWords = [u'and', u'und']
|
||||||
def query(self):
|
|
||||||
return ConceptQuery(self.context)
|
|
||||||
|
|
||||||
def extractStatements(self, informationSet):
|
def extractStatements(self, informationSet):
|
||||||
result = []
|
result = []
|
||||||
for key, value in informationSet.items():
|
for key, value in informationSet.items():
|
||||||
words = self.split(value)
|
words = self.split(value)
|
||||||
for w in words:
|
for w in words:
|
||||||
result.extend([Statement(c) for c in self.findConcepts(w)])
|
if w in self.stopWords:
|
||||||
|
continue
|
||||||
|
if len(w) > 1:
|
||||||
|
result.extend([Statement(c) for c in self.findConcepts(w)])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
wordPattern = '\\'.join(list(' .,+*%&-!?/:_[](){}'))
|
||||||
|
|
||||||
def split(self, text):
|
def split(self, text):
|
||||||
return re.split('\W+', text)
|
return re.split('[%s]+' % self.wordPattern, text)
|
||||||
|
#return re.split(r'[\W_]+', text)
|
||||||
def findConcepts(self, word):
|
|
||||||
return self.query.query(word, 'loops:concept:*')
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue