more extractors and analyzers

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2108 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-10-10 11:53:00 +00:00
parent 96d0462b18
commit 907dda565a
6 changed files with 88 additions and 16 deletions

View file

@ -84,6 +84,9 @@ and follow the classifier step by step.
>>> infoSet
{'filename': 'cust_im_contract_webbg_20071015.txt'}
Let's now use the sample analyzer - an example that interprets very carefully
the underscore-separated parts of the filename.
>>> analyzer = component.getAdapter(classifier, name=classifier.analyzer)
>>> statements = analyzer.extractStatements(infoSet)
>>> statements
@ -138,6 +141,13 @@ So we are now ready to have the whole stuff run in one call.
>>> len(webbg.getResources((concepts['ownedby'],)))
3
We can repeat the process without getting additional assignments.
>>> for name in rnames[1:]:
... classifier.process(resources[name])
>>> len(webbg.getResources())
4
Fin de partie
=============

View file

@ -69,10 +69,13 @@ class Classifier(AdapterBase):
statement.subject = resource
if statement.predicate is None:
statement.predicate = defaultPredicate
self.assignConcept(statement)
self.assignConcept(statement.subject, statement.object,
statement.predicate)
def assignConcept(self, statement):
statement.object.assignResource(statement.subject, statement.predicate)
def assignConcept(self, resource, concept, predicate):
resources = concept.getResources([predicate])
if resource not in resources:
concept.assignResource(resource, predicate)
class Extractor(object):

View file

@ -43,7 +43,6 @@ class ClassifierView(ConceptView):
cta = adapted(self.context)
if cta is not None:
for r in collectResources(self.context):
print '***', r.title
cta.process(r)
return True

View file

@ -3,13 +3,11 @@
<configure
xmlns:zope="http://namespaces.zope.org/zope"
xmlns:browser="http://namespaces.zope.org/browser"
i18n_domain="zope"
>
i18n_domain="zope">
<zope:adapter
factory="loops.classifier.base.Classifier"
trusted="True" />
<zope:class class="loops.classifier.base.Classifier">
<require permission="zope.View"
interface="loops.classifier.interfaces.IClassifier" />
@ -19,9 +17,7 @@
<zope:adapter
factory="loops.classifier.standard.FilenameExtractor"
name="filename"
trusted="True" />
name="filename" trusted="True" />
<zope:class class="loops.classifier.standard.FilenameExtractor">
<require permission="zope.View"
interface="loops.classifier.interfaces.IExtractor" />
@ -30,10 +26,28 @@
</zope:class>
<zope:adapter
factory="loops.classifier.sample.SampleAnalyzer"
name="sample"
trusted="True" />
factory="loops.classifier.standard.PathExtractor"
name="path" trusted="True" />
<zope:class class="loops.classifier.standard.PathExtractor">
<require permission="zope.View"
interface="loops.classifier.interfaces.IExtractor" />
<require permission="zope.ManageContent"
set_schema="loops.classifier.interfaces.IExtractor" />
</zope:class>
<zope:adapter
factory="loops.classifier.standard.WordBasedAnalyzer"
name="word-based" trusted="True" />
<zope:class class="loops.classifier.standard.WordBasedAnalyzer">
<require permission="zope.View"
interface="loops.classifier.interfaces.IAnalyzer" />
<require permission="zope.ManageContent"
set_schema="loops.classifier.interfaces.IAnalyzer" />
</zope:class>
<zope:adapter
factory="loops.classifier.sample.SampleAnalyzer"
name="sample" trusted="True" />
<zope:class class="loops.classifier.sample.SampleAnalyzer">
<require permission="zope.View"
interface="loops.classifier.interfaces.IAnalyzer" />

View file

@ -32,6 +32,7 @@ from cybertools.typology.interfaces import IType
from loops.classifier.base import Analyzer
from loops.classifier.base import Statement
from loops.common import adapted
from loops.query import ConceptQuery
class SampleAnalyzer(Analyzer):
@ -46,6 +47,10 @@ class SampleAnalyzer(Analyzer):
resource.
"""
@Lazy
def query(self):
return ConceptQuery(self.context)
def handleCustomer(self, name):
custTypes = self.getTypes(('institution', 'customer',))
for c in self.findConcepts(name):
@ -90,8 +95,7 @@ class SampleAnalyzer(Analyzer):
return result
def findConcepts(self, name):
cat = component.getUtility(ICatalog)
return cat.searchResults(loops_text=name)
return self.query.query(name, 'loops:concept:*')
@Lazy
def conceptManager(self):

View file

@ -22,11 +22,16 @@ Standard implementations of classifier components.
$Id$
"""
import re
from zope.cachedescriptors.property import Lazy
from zope.component import adapts
from loops.classifier.base import Extractor
from loops.classifier.base import Analyzer, Extractor
from loops.classifier.base import InformationSet
from loops.classifier.base import Statement
from loops.interfaces import IExternalFile
from loops.query import ConceptQuery
class FilenameExtractor(Extractor):
@ -39,3 +44,40 @@ class FilenameExtractor(Extractor):
def extractInformationSet(self):
filename = self.context.externalAddress
return InformationSet(filename=filename)
class PathExtractor(Extractor):
adapts(IExternalFile)
def __init__(self, context):
self.context = context
def extractInformationSet(self):
params = self.context.storageParams
if 'subdir' in params:
return InformationSet(path=params['subdir'])
else:
return InformationSet()
class WordBasedAnalyzer(Analyzer):
@Lazy
def query(self):
return ConceptQuery(self.context)
def extractStatements(self, informationSet):
result = []
for key, value in informationSet.items():
words = self.split(value)
for w in words:
result.extend([Statement(c) for c in self.findConcepts(w)])
return result
def split(self, text):
return re.split('\W+', text)
def findConcepts(self, word):
return self.query.query(word, 'loops:concept:*')