more extractors and analyzers
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2108 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
96d0462b18
commit
907dda565a
6 changed files with 88 additions and 16 deletions
|
@ -84,6 +84,9 @@ and follow the classifier step by step.
|
||||||
>>> infoSet
|
>>> infoSet
|
||||||
{'filename': 'cust_im_contract_webbg_20071015.txt'}
|
{'filename': 'cust_im_contract_webbg_20071015.txt'}
|
||||||
|
|
||||||
|
Let's now use the sample analyzer - an example that interprets very carefully
|
||||||
|
the underscore-separated parts of the filename.
|
||||||
|
|
||||||
>>> analyzer = component.getAdapter(classifier, name=classifier.analyzer)
|
>>> analyzer = component.getAdapter(classifier, name=classifier.analyzer)
|
||||||
>>> statements = analyzer.extractStatements(infoSet)
|
>>> statements = analyzer.extractStatements(infoSet)
|
||||||
>>> statements
|
>>> statements
|
||||||
|
@ -138,6 +141,13 @@ So we are now ready to have the whole stuff run in one call.
|
||||||
>>> len(webbg.getResources((concepts['ownedby'],)))
|
>>> len(webbg.getResources((concepts['ownedby'],)))
|
||||||
3
|
3
|
||||||
|
|
||||||
|
We can repeat the process without getting additional assignments.
|
||||||
|
|
||||||
|
>>> for name in rnames[1:]:
|
||||||
|
... classifier.process(resources[name])
|
||||||
|
>>> len(webbg.getResources())
|
||||||
|
4
|
||||||
|
|
||||||
|
|
||||||
Fin de partie
|
Fin de partie
|
||||||
=============
|
=============
|
||||||
|
|
|
@ -69,10 +69,13 @@ class Classifier(AdapterBase):
|
||||||
statement.subject = resource
|
statement.subject = resource
|
||||||
if statement.predicate is None:
|
if statement.predicate is None:
|
||||||
statement.predicate = defaultPredicate
|
statement.predicate = defaultPredicate
|
||||||
self.assignConcept(statement)
|
self.assignConcept(statement.subject, statement.object,
|
||||||
|
statement.predicate)
|
||||||
|
|
||||||
def assignConcept(self, statement):
|
def assignConcept(self, resource, concept, predicate):
|
||||||
statement.object.assignResource(statement.subject, statement.predicate)
|
resources = concept.getResources([predicate])
|
||||||
|
if resource not in resources:
|
||||||
|
concept.assignResource(resource, predicate)
|
||||||
|
|
||||||
|
|
||||||
class Extractor(object):
|
class Extractor(object):
|
||||||
|
|
|
@ -43,7 +43,6 @@ class ClassifierView(ConceptView):
|
||||||
cta = adapted(self.context)
|
cta = adapted(self.context)
|
||||||
if cta is not None:
|
if cta is not None:
|
||||||
for r in collectResources(self.context):
|
for r in collectResources(self.context):
|
||||||
print '***', r.title
|
|
||||||
cta.process(r)
|
cta.process(r)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -3,13 +3,11 @@
|
||||||
<configure
|
<configure
|
||||||
xmlns:zope="http://namespaces.zope.org/zope"
|
xmlns:zope="http://namespaces.zope.org/zope"
|
||||||
xmlns:browser="http://namespaces.zope.org/browser"
|
xmlns:browser="http://namespaces.zope.org/browser"
|
||||||
i18n_domain="zope"
|
i18n_domain="zope">
|
||||||
>
|
|
||||||
|
|
||||||
<zope:adapter
|
<zope:adapter
|
||||||
factory="loops.classifier.base.Classifier"
|
factory="loops.classifier.base.Classifier"
|
||||||
trusted="True" />
|
trusted="True" />
|
||||||
|
|
||||||
<zope:class class="loops.classifier.base.Classifier">
|
<zope:class class="loops.classifier.base.Classifier">
|
||||||
<require permission="zope.View"
|
<require permission="zope.View"
|
||||||
interface="loops.classifier.interfaces.IClassifier" />
|
interface="loops.classifier.interfaces.IClassifier" />
|
||||||
|
@ -19,9 +17,7 @@
|
||||||
|
|
||||||
<zope:adapter
|
<zope:adapter
|
||||||
factory="loops.classifier.standard.FilenameExtractor"
|
factory="loops.classifier.standard.FilenameExtractor"
|
||||||
name="filename"
|
name="filename" trusted="True" />
|
||||||
trusted="True" />
|
|
||||||
|
|
||||||
<zope:class class="loops.classifier.standard.FilenameExtractor">
|
<zope:class class="loops.classifier.standard.FilenameExtractor">
|
||||||
<require permission="zope.View"
|
<require permission="zope.View"
|
||||||
interface="loops.classifier.interfaces.IExtractor" />
|
interface="loops.classifier.interfaces.IExtractor" />
|
||||||
|
@ -30,10 +26,28 @@
|
||||||
</zope:class>
|
</zope:class>
|
||||||
|
|
||||||
<zope:adapter
|
<zope:adapter
|
||||||
factory="loops.classifier.sample.SampleAnalyzer"
|
factory="loops.classifier.standard.PathExtractor"
|
||||||
name="sample"
|
name="path" trusted="True" />
|
||||||
trusted="True" />
|
<zope:class class="loops.classifier.standard.PathExtractor">
|
||||||
|
<require permission="zope.View"
|
||||||
|
interface="loops.classifier.interfaces.IExtractor" />
|
||||||
|
<require permission="zope.ManageContent"
|
||||||
|
set_schema="loops.classifier.interfaces.IExtractor" />
|
||||||
|
</zope:class>
|
||||||
|
|
||||||
|
<zope:adapter
|
||||||
|
factory="loops.classifier.standard.WordBasedAnalyzer"
|
||||||
|
name="word-based" trusted="True" />
|
||||||
|
<zope:class class="loops.classifier.standard.WordBasedAnalyzer">
|
||||||
|
<require permission="zope.View"
|
||||||
|
interface="loops.classifier.interfaces.IAnalyzer" />
|
||||||
|
<require permission="zope.ManageContent"
|
||||||
|
set_schema="loops.classifier.interfaces.IAnalyzer" />
|
||||||
|
</zope:class>
|
||||||
|
|
||||||
|
<zope:adapter
|
||||||
|
factory="loops.classifier.sample.SampleAnalyzer"
|
||||||
|
name="sample" trusted="True" />
|
||||||
<zope:class class="loops.classifier.sample.SampleAnalyzer">
|
<zope:class class="loops.classifier.sample.SampleAnalyzer">
|
||||||
<require permission="zope.View"
|
<require permission="zope.View"
|
||||||
interface="loops.classifier.interfaces.IAnalyzer" />
|
interface="loops.classifier.interfaces.IAnalyzer" />
|
||||||
|
|
|
@ -32,6 +32,7 @@ from cybertools.typology.interfaces import IType
|
||||||
from loops.classifier.base import Analyzer
|
from loops.classifier.base import Analyzer
|
||||||
from loops.classifier.base import Statement
|
from loops.classifier.base import Statement
|
||||||
from loops.common import adapted
|
from loops.common import adapted
|
||||||
|
from loops.query import ConceptQuery
|
||||||
|
|
||||||
|
|
||||||
class SampleAnalyzer(Analyzer):
|
class SampleAnalyzer(Analyzer):
|
||||||
|
@ -46,6 +47,10 @@ class SampleAnalyzer(Analyzer):
|
||||||
resource.
|
resource.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@Lazy
|
||||||
|
def query(self):
|
||||||
|
return ConceptQuery(self.context)
|
||||||
|
|
||||||
def handleCustomer(self, name):
|
def handleCustomer(self, name):
|
||||||
custTypes = self.getTypes(('institution', 'customer',))
|
custTypes = self.getTypes(('institution', 'customer',))
|
||||||
for c in self.findConcepts(name):
|
for c in self.findConcepts(name):
|
||||||
|
@ -90,8 +95,7 @@ class SampleAnalyzer(Analyzer):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def findConcepts(self, name):
|
def findConcepts(self, name):
|
||||||
cat = component.getUtility(ICatalog)
|
return self.query.query(name, 'loops:concept:*')
|
||||||
return cat.searchResults(loops_text=name)
|
|
||||||
|
|
||||||
@Lazy
|
@Lazy
|
||||||
def conceptManager(self):
|
def conceptManager(self):
|
||||||
|
|
|
@ -22,11 +22,16 @@ Standard implementations of classifier components.
|
||||||
$Id$
|
$Id$
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from zope.cachedescriptors.property import Lazy
|
||||||
from zope.component import adapts
|
from zope.component import adapts
|
||||||
|
|
||||||
from loops.classifier.base import Extractor
|
from loops.classifier.base import Analyzer, Extractor
|
||||||
from loops.classifier.base import InformationSet
|
from loops.classifier.base import InformationSet
|
||||||
|
from loops.classifier.base import Statement
|
||||||
from loops.interfaces import IExternalFile
|
from loops.interfaces import IExternalFile
|
||||||
|
from loops.query import ConceptQuery
|
||||||
|
|
||||||
|
|
||||||
class FilenameExtractor(Extractor):
|
class FilenameExtractor(Extractor):
|
||||||
|
@ -39,3 +44,40 @@ class FilenameExtractor(Extractor):
|
||||||
def extractInformationSet(self):
|
def extractInformationSet(self):
|
||||||
filename = self.context.externalAddress
|
filename = self.context.externalAddress
|
||||||
return InformationSet(filename=filename)
|
return InformationSet(filename=filename)
|
||||||
|
|
||||||
|
|
||||||
|
class PathExtractor(Extractor):
|
||||||
|
|
||||||
|
adapts(IExternalFile)
|
||||||
|
|
||||||
|
def __init__(self, context):
|
||||||
|
self.context = context
|
||||||
|
|
||||||
|
def extractInformationSet(self):
|
||||||
|
params = self.context.storageParams
|
||||||
|
if 'subdir' in params:
|
||||||
|
return InformationSet(path=params['subdir'])
|
||||||
|
else:
|
||||||
|
return InformationSet()
|
||||||
|
|
||||||
|
|
||||||
|
class WordBasedAnalyzer(Analyzer):
|
||||||
|
|
||||||
|
@Lazy
|
||||||
|
def query(self):
|
||||||
|
return ConceptQuery(self.context)
|
||||||
|
|
||||||
|
def extractStatements(self, informationSet):
|
||||||
|
result = []
|
||||||
|
for key, value in informationSet.items():
|
||||||
|
words = self.split(value)
|
||||||
|
for w in words:
|
||||||
|
result.extend([Statement(c) for c in self.findConcepts(w)])
|
||||||
|
return result
|
||||||
|
|
||||||
|
def split(self, text):
|
||||||
|
return re.split('\W+', text)
|
||||||
|
|
||||||
|
def findConcepts(self, word):
|
||||||
|
return self.query.query(word, 'loops:concept:*')
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue