more extractors and analyzers
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2108 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
96d0462b18
commit
907dda565a
6 changed files with 88 additions and 16 deletions
|
@ -84,6 +84,9 @@ and follow the classifier step by step.
|
|||
>>> infoSet
|
||||
{'filename': 'cust_im_contract_webbg_20071015.txt'}
|
||||
|
||||
Let's now use the sample analyzer - an example that interprets very carefully
|
||||
the underscore-separated parts of the filename.
|
||||
|
||||
>>> analyzer = component.getAdapter(classifier, name=classifier.analyzer)
|
||||
>>> statements = analyzer.extractStatements(infoSet)
|
||||
>>> statements
|
||||
|
@ -138,6 +141,13 @@ So we are now ready to have the whole stuff run in one call.
|
|||
>>> len(webbg.getResources((concepts['ownedby'],)))
|
||||
3
|
||||
|
||||
We can repeat the process without getting additional assignments.
|
||||
|
||||
>>> for name in rnames[1:]:
|
||||
... classifier.process(resources[name])
|
||||
>>> len(webbg.getResources())
|
||||
4
|
||||
|
||||
|
||||
Fin de partie
|
||||
=============
|
||||
|
|
|
@ -69,10 +69,13 @@ class Classifier(AdapterBase):
|
|||
statement.subject = resource
|
||||
if statement.predicate is None:
|
||||
statement.predicate = defaultPredicate
|
||||
self.assignConcept(statement)
|
||||
self.assignConcept(statement.subject, statement.object,
|
||||
statement.predicate)
|
||||
|
||||
def assignConcept(self, statement):
|
||||
statement.object.assignResource(statement.subject, statement.predicate)
|
||||
def assignConcept(self, resource, concept, predicate):
|
||||
resources = concept.getResources([predicate])
|
||||
if resource not in resources:
|
||||
concept.assignResource(resource, predicate)
|
||||
|
||||
|
||||
class Extractor(object):
|
||||
|
|
|
@ -43,7 +43,6 @@ class ClassifierView(ConceptView):
|
|||
cta = adapted(self.context)
|
||||
if cta is not None:
|
||||
for r in collectResources(self.context):
|
||||
print '***', r.title
|
||||
cta.process(r)
|
||||
return True
|
||||
|
||||
|
|
|
@ -3,13 +3,11 @@
|
|||
<configure
|
||||
xmlns:zope="http://namespaces.zope.org/zope"
|
||||
xmlns:browser="http://namespaces.zope.org/browser"
|
||||
i18n_domain="zope"
|
||||
>
|
||||
i18n_domain="zope">
|
||||
|
||||
<zope:adapter
|
||||
factory="loops.classifier.base.Classifier"
|
||||
trusted="True" />
|
||||
|
||||
<zope:class class="loops.classifier.base.Classifier">
|
||||
<require permission="zope.View"
|
||||
interface="loops.classifier.interfaces.IClassifier" />
|
||||
|
@ -19,9 +17,7 @@
|
|||
|
||||
<zope:adapter
|
||||
factory="loops.classifier.standard.FilenameExtractor"
|
||||
name="filename"
|
||||
trusted="True" />
|
||||
|
||||
name="filename" trusted="True" />
|
||||
<zope:class class="loops.classifier.standard.FilenameExtractor">
|
||||
<require permission="zope.View"
|
||||
interface="loops.classifier.interfaces.IExtractor" />
|
||||
|
@ -30,10 +26,28 @@
|
|||
</zope:class>
|
||||
|
||||
<zope:adapter
|
||||
factory="loops.classifier.sample.SampleAnalyzer"
|
||||
name="sample"
|
||||
trusted="True" />
|
||||
factory="loops.classifier.standard.PathExtractor"
|
||||
name="path" trusted="True" />
|
||||
<zope:class class="loops.classifier.standard.PathExtractor">
|
||||
<require permission="zope.View"
|
||||
interface="loops.classifier.interfaces.IExtractor" />
|
||||
<require permission="zope.ManageContent"
|
||||
set_schema="loops.classifier.interfaces.IExtractor" />
|
||||
</zope:class>
|
||||
|
||||
<zope:adapter
|
||||
factory="loops.classifier.standard.WordBasedAnalyzer"
|
||||
name="word-based" trusted="True" />
|
||||
<zope:class class="loops.classifier.standard.WordBasedAnalyzer">
|
||||
<require permission="zope.View"
|
||||
interface="loops.classifier.interfaces.IAnalyzer" />
|
||||
<require permission="zope.ManageContent"
|
||||
set_schema="loops.classifier.interfaces.IAnalyzer" />
|
||||
</zope:class>
|
||||
|
||||
<zope:adapter
|
||||
factory="loops.classifier.sample.SampleAnalyzer"
|
||||
name="sample" trusted="True" />
|
||||
<zope:class class="loops.classifier.sample.SampleAnalyzer">
|
||||
<require permission="zope.View"
|
||||
interface="loops.classifier.interfaces.IAnalyzer" />
|
||||
|
|
|
@ -32,6 +32,7 @@ from cybertools.typology.interfaces import IType
|
|||
from loops.classifier.base import Analyzer
|
||||
from loops.classifier.base import Statement
|
||||
from loops.common import adapted
|
||||
from loops.query import ConceptQuery
|
||||
|
||||
|
||||
class SampleAnalyzer(Analyzer):
|
||||
|
@ -46,6 +47,10 @@ class SampleAnalyzer(Analyzer):
|
|||
resource.
|
||||
"""
|
||||
|
||||
@Lazy
|
||||
def query(self):
|
||||
return ConceptQuery(self.context)
|
||||
|
||||
def handleCustomer(self, name):
|
||||
custTypes = self.getTypes(('institution', 'customer',))
|
||||
for c in self.findConcepts(name):
|
||||
|
@ -90,8 +95,7 @@ class SampleAnalyzer(Analyzer):
|
|||
return result
|
||||
|
||||
def findConcepts(self, name):
|
||||
cat = component.getUtility(ICatalog)
|
||||
return cat.searchResults(loops_text=name)
|
||||
return self.query.query(name, 'loops:concept:*')
|
||||
|
||||
@Lazy
|
||||
def conceptManager(self):
|
||||
|
|
|
@ -22,11 +22,16 @@ Standard implementations of classifier components.
|
|||
$Id$
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from zope.cachedescriptors.property import Lazy
|
||||
from zope.component import adapts
|
||||
|
||||
from loops.classifier.base import Extractor
|
||||
from loops.classifier.base import Analyzer, Extractor
|
||||
from loops.classifier.base import InformationSet
|
||||
from loops.classifier.base import Statement
|
||||
from loops.interfaces import IExternalFile
|
||||
from loops.query import ConceptQuery
|
||||
|
||||
|
||||
class FilenameExtractor(Extractor):
|
||||
|
@ -39,3 +44,40 @@ class FilenameExtractor(Extractor):
|
|||
def extractInformationSet(self):
|
||||
filename = self.context.externalAddress
|
||||
return InformationSet(filename=filename)
|
||||
|
||||
|
||||
class PathExtractor(Extractor):
|
||||
|
||||
adapts(IExternalFile)
|
||||
|
||||
def __init__(self, context):
|
||||
self.context = context
|
||||
|
||||
def extractInformationSet(self):
|
||||
params = self.context.storageParams
|
||||
if 'subdir' in params:
|
||||
return InformationSet(path=params['subdir'])
|
||||
else:
|
||||
return InformationSet()
|
||||
|
||||
|
||||
class WordBasedAnalyzer(Analyzer):
|
||||
|
||||
@Lazy
|
||||
def query(self):
|
||||
return ConceptQuery(self.context)
|
||||
|
||||
def extractStatements(self, informationSet):
|
||||
result = []
|
||||
for key, value in informationSet.items():
|
||||
words = self.split(value)
|
||||
for w in words:
|
||||
result.extend([Statement(c) for c in self.findConcepts(w)])
|
||||
return result
|
||||
|
||||
def split(self, text):
|
||||
return re.split('\W+', text)
|
||||
|
||||
def findConcepts(self, word):
|
||||
return self.query.query(word, 'loops:concept:*')
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue