more extractors and analyzers

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2108 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-10-10 11:53:00 +00:00
parent 96d0462b18
commit 907dda565a
6 changed files with 88 additions and 16 deletions

View file

@ -84,6 +84,9 @@ and follow the classifier step by step.
>>> infoSet >>> infoSet
{'filename': 'cust_im_contract_webbg_20071015.txt'} {'filename': 'cust_im_contract_webbg_20071015.txt'}
Let's now use the sample analyzer - an example that interprets very carefully
the underscore-separated parts of the filename.
>>> analyzer = component.getAdapter(classifier, name=classifier.analyzer) >>> analyzer = component.getAdapter(classifier, name=classifier.analyzer)
>>> statements = analyzer.extractStatements(infoSet) >>> statements = analyzer.extractStatements(infoSet)
>>> statements >>> statements
@ -138,6 +141,13 @@ So we are now ready to have the whole stuff run in one call.
>>> len(webbg.getResources((concepts['ownedby'],))) >>> len(webbg.getResources((concepts['ownedby'],)))
3 3
We can repeat the process without getting additional assignments.
>>> for name in rnames[1:]:
... classifier.process(resources[name])
>>> len(webbg.getResources())
4
Fin de partie Fin de partie
============= =============

View file

@ -69,10 +69,13 @@ class Classifier(AdapterBase):
statement.subject = resource statement.subject = resource
if statement.predicate is None: if statement.predicate is None:
statement.predicate = defaultPredicate statement.predicate = defaultPredicate
self.assignConcept(statement) self.assignConcept(statement.subject, statement.object,
statement.predicate)
def assignConcept(self, statement): def assignConcept(self, resource, concept, predicate):
statement.object.assignResource(statement.subject, statement.predicate) resources = concept.getResources([predicate])
if resource not in resources:
concept.assignResource(resource, predicate)
class Extractor(object): class Extractor(object):

View file

@ -43,7 +43,6 @@ class ClassifierView(ConceptView):
cta = adapted(self.context) cta = adapted(self.context)
if cta is not None: if cta is not None:
for r in collectResources(self.context): for r in collectResources(self.context):
print '***', r.title
cta.process(r) cta.process(r)
return True return True

View file

@ -3,13 +3,11 @@
<configure <configure
xmlns:zope="http://namespaces.zope.org/zope" xmlns:zope="http://namespaces.zope.org/zope"
xmlns:browser="http://namespaces.zope.org/browser" xmlns:browser="http://namespaces.zope.org/browser"
i18n_domain="zope" i18n_domain="zope">
>
<zope:adapter <zope:adapter
factory="loops.classifier.base.Classifier" factory="loops.classifier.base.Classifier"
trusted="True" /> trusted="True" />
<zope:class class="loops.classifier.base.Classifier"> <zope:class class="loops.classifier.base.Classifier">
<require permission="zope.View" <require permission="zope.View"
interface="loops.classifier.interfaces.IClassifier" /> interface="loops.classifier.interfaces.IClassifier" />
@ -19,9 +17,7 @@
<zope:adapter <zope:adapter
factory="loops.classifier.standard.FilenameExtractor" factory="loops.classifier.standard.FilenameExtractor"
name="filename" name="filename" trusted="True" />
trusted="True" />
<zope:class class="loops.classifier.standard.FilenameExtractor"> <zope:class class="loops.classifier.standard.FilenameExtractor">
<require permission="zope.View" <require permission="zope.View"
interface="loops.classifier.interfaces.IExtractor" /> interface="loops.classifier.interfaces.IExtractor" />
@ -30,10 +26,28 @@
</zope:class> </zope:class>
<zope:adapter <zope:adapter
factory="loops.classifier.sample.SampleAnalyzer" factory="loops.classifier.standard.PathExtractor"
name="sample" name="path" trusted="True" />
trusted="True" /> <zope:class class="loops.classifier.standard.PathExtractor">
<require permission="zope.View"
interface="loops.classifier.interfaces.IExtractor" />
<require permission="zope.ManageContent"
set_schema="loops.classifier.interfaces.IExtractor" />
</zope:class>
<zope:adapter
factory="loops.classifier.standard.WordBasedAnalyzer"
name="word-based" trusted="True" />
<zope:class class="loops.classifier.standard.WordBasedAnalyzer">
<require permission="zope.View"
interface="loops.classifier.interfaces.IAnalyzer" />
<require permission="zope.ManageContent"
set_schema="loops.classifier.interfaces.IAnalyzer" />
</zope:class>
<zope:adapter
factory="loops.classifier.sample.SampleAnalyzer"
name="sample" trusted="True" />
<zope:class class="loops.classifier.sample.SampleAnalyzer"> <zope:class class="loops.classifier.sample.SampleAnalyzer">
<require permission="zope.View" <require permission="zope.View"
interface="loops.classifier.interfaces.IAnalyzer" /> interface="loops.classifier.interfaces.IAnalyzer" />

View file

@ -32,6 +32,7 @@ from cybertools.typology.interfaces import IType
from loops.classifier.base import Analyzer from loops.classifier.base import Analyzer
from loops.classifier.base import Statement from loops.classifier.base import Statement
from loops.common import adapted from loops.common import adapted
from loops.query import ConceptQuery
class SampleAnalyzer(Analyzer): class SampleAnalyzer(Analyzer):
@ -46,6 +47,10 @@ class SampleAnalyzer(Analyzer):
resource. resource.
""" """
@Lazy
def query(self):
return ConceptQuery(self.context)
def handleCustomer(self, name): def handleCustomer(self, name):
custTypes = self.getTypes(('institution', 'customer',)) custTypes = self.getTypes(('institution', 'customer',))
for c in self.findConcepts(name): for c in self.findConcepts(name):
@ -90,8 +95,7 @@ class SampleAnalyzer(Analyzer):
return result return result
def findConcepts(self, name): def findConcepts(self, name):
cat = component.getUtility(ICatalog) return self.query.query(name, 'loops:concept:*')
return cat.searchResults(loops_text=name)
@Lazy @Lazy
def conceptManager(self): def conceptManager(self):

View file

@ -22,11 +22,16 @@ Standard implementations of classifier components.
$Id$ $Id$
""" """
import re
from zope.cachedescriptors.property import Lazy
from zope.component import adapts from zope.component import adapts
from loops.classifier.base import Extractor from loops.classifier.base import Analyzer, Extractor
from loops.classifier.base import InformationSet from loops.classifier.base import InformationSet
from loops.classifier.base import Statement
from loops.interfaces import IExternalFile from loops.interfaces import IExternalFile
from loops.query import ConceptQuery
class FilenameExtractor(Extractor): class FilenameExtractor(Extractor):
@ -39,3 +44,40 @@ class FilenameExtractor(Extractor):
def extractInformationSet(self): def extractInformationSet(self):
filename = self.context.externalAddress filename = self.context.externalAddress
return InformationSet(filename=filename) return InformationSet(filename=filename)
class PathExtractor(Extractor):
adapts(IExternalFile)
def __init__(self, context):
self.context = context
def extractInformationSet(self):
params = self.context.storageParams
if 'subdir' in params:
return InformationSet(path=params['subdir'])
else:
return InformationSet()
class WordBasedAnalyzer(Analyzer):
@Lazy
def query(self):
return ConceptQuery(self.context)
def extractStatements(self, informationSet):
result = []
for key, value in informationSet.items():
words = self.split(value)
for w in words:
result.extend([Statement(c) for c in self.findConcepts(w)])
return result
def split(self, text):
return re.split('\W+', text)
def findConcepts(self, word):
return self.query.query(word, 'loops:concept:*')