From bee421bece87f2a5c89952b5ddacb20cb4b47f32 Mon Sep 17 00:00:00 2001 From: helmutm Date: Mon, 12 Nov 2007 16:21:36 +0000 Subject: [PATCH] minor improvements, esp for classifier git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2174 fd906abe-77d9-0310-91a1-e0d9ade77398 --- browser/concept_macros.pt | 3 +++ browser/loops.css | 4 ++++ classifier/base.py | 48 ++++++++++++++++++++++++++++++++++++--- classifier/sample.py | 9 +------- classifier/standard.py | 23 ++++++++++--------- 5 files changed, 65 insertions(+), 22 deletions(-) diff --git a/browser/concept_macros.pt b/browser/concept_macros.pt index ca92748..9d386c8 100644 --- a/browser/concept_macros.pt +++ b/browser/concept_macros.pt @@ -12,6 +12,9 @@

Title

+

+ Description

diff --git a/browser/loops.css b/browser/loops.css index c02ba75..ba29dc1 100644 --- a/browser/loops.css +++ b/browser/loops.css @@ -15,6 +15,10 @@ pre { max-height: 35em; } +table.listing td { + white-space: normal; +} + .box div.body div.even { background-color: #f4f4f4; } diff --git a/classifier/base.py b/classifier/base.py index 622883b..0c9c4ed 100644 --- a/classifier/base.py +++ b/classifier/base.py @@ -22,17 +22,20 @@ Adapters and others classes for analyzing resources. $Id$ """ +from itertools import tee from zope.cachedescriptors.property import Lazy from zope import component from zope.component import adapts from zope.event import notify from zope.interface import implements from zope.traversing.api import getName, getParent +from cybertools.typology.interfaces import IType from loops.classifier.interfaces import IClassifier, IExtractor, IAnalyzer from loops.classifier.interfaces import IInformationSet, IStatement from loops.common import AdapterBase, adapted from loops.interfaces import IResource, IConcept +from loops.query import ConceptQuery from loops.resource import Resource from loops.setup import addAndConfigureObject from loops.type import TypeInterfaceSourceList @@ -50,6 +53,24 @@ class Classifier(AdapterBase): _contextAttributes = list(IClassifier) + list(IConcept) + logLevel = 5 + + @Lazy + def conceptManager(self): + return self.context.getConceptManager() + + @Lazy + def defaultPredicate(self): + return self.conceptManager.getDefaultPredicate() + + @Lazy + def predicateType(self): + return self.conceptManager.getPredicateType() + + @Lazy + def typeConcept(self): + return self.conceptManager.getTypeConcept() + def getOptions(self): return getattr(self.context, '_options', []) def setOptions(self, value): @@ -57,19 +78,25 @@ class Classifier(AdapterBase): options = property(getOptions, setOptions) def process(self, resource): + self.log('Processing %s' % resource.title, 3) infoSet = InformationSet() for name in self.extractors.split(): extractor = component.getAdapter(adapted(resource), IExtractor, name=name) infoSet.update(extractor.extractInformationSet()) analyzer = component.getAdapter(self, IAnalyzer, name=self.analyzer) statements = analyzer.extractStatements(infoSet) - defaultPredicate = self.context.getConceptManager().getDefaultPredicate() for statement in statements: + object = statement.object + qualifiers = IType(object).qualifiers + if 'system' in qualifiers: + continue if statement.subject is None: statement.subject = resource if statement.predicate is None: - statement.predicate = defaultPredicate - self.assignConcept(statement.subject, statement.object, + statement.predicate = self.defaultPredicate + self.log('Assigning: %s %s %s' % (statement.subject.title, + statement.predicate.title, object.title), 5) + self.assignConcept(statement.subject, object, statement.predicate) def assignConcept(self, resource, concept, predicate): @@ -77,6 +104,10 @@ class Classifier(AdapterBase): if resource not in resources: concept.assignResource(resource, predicate) + def log(self, message, level=5): + if level >= self.logLevel: + print 'Classifier %s:' % getName(self.context), message + class Extractor(object): @@ -101,6 +132,17 @@ class Analyzer(object): def extractStatements(self, informationSet): return [] + @Lazy + def query(self): + return ConceptQuery(self.context) + + def findConcepts(self, word): + r1, r2 = tee(self.query.query(word, 'loops:concept:*')) + names = ', '.join(c.title for c in r2) + self.context.log('Searching for concept using "%s", result: %s' + % (word, names), 2) + return r1 + class InformationSet(dict): diff --git a/classifier/sample.py b/classifier/sample.py index 89c1fa4..5e7c5f9 100644 --- a/classifier/sample.py +++ b/classifier/sample.py @@ -26,13 +26,13 @@ from zope import component from zope.app.catalog.interfaces import ICatalog from zope.cachedescriptors.property import Lazy from zope.component import adapts +from zope.traversing.api import getName from cybertools.organize.interfaces import IPerson from cybertools.typology.interfaces import IType from loops.classifier.base import Analyzer from loops.classifier.base import Statement from loops.common import adapted -from loops.query import ConceptQuery class SampleAnalyzer(Analyzer): @@ -47,10 +47,6 @@ class SampleAnalyzer(Analyzer): resource. """ - @Lazy - def query(self): - return ConceptQuery(self.context) - def handleCustomer(self, name): custTypes = self.getTypes(('institution', 'customer',)) for c in self.findConcepts(name): @@ -94,9 +90,6 @@ class SampleAnalyzer(Analyzer): result.extend(self.handleOwner(parts.pop(0))) return result - def findConcepts(self, name): - return self.query.query(name, 'loops:concept:*') - @Lazy def conceptManager(self): return self.context.context.getConceptManager() diff --git a/classifier/standard.py b/classifier/standard.py index dc5c795..f15d027 100644 --- a/classifier/standard.py +++ b/classifier/standard.py @@ -22,16 +22,16 @@ Standard implementations of classifier components. $Id$ """ +import os import re - from zope.cachedescriptors.property import Lazy from zope.component import adapts +from zope.traversing.api import getName from loops.classifier.base import Analyzer, Extractor from loops.classifier.base import InformationSet from loops.classifier.base import Statement from loops.interfaces import IExternalFile -from loops.query import ConceptQuery class FilenameExtractor(Extractor): @@ -42,7 +42,7 @@ class FilenameExtractor(Extractor): self.context = context def extractInformationSet(self): - filename = self.context.externalAddress + filename, ext = os.path.splitext(self.context.externalAddress) return InformationSet(filename=filename) @@ -63,21 +63,22 @@ class PathExtractor(Extractor): class WordBasedAnalyzer(Analyzer): - @Lazy - def query(self): - return ConceptQuery(self.context) + stopWords = [u'and', u'und'] def extractStatements(self, informationSet): result = [] for key, value in informationSet.items(): words = self.split(value) for w in words: - result.extend([Statement(c) for c in self.findConcepts(w)]) + if w in self.stopWords: + continue + if len(w) > 1: + result.extend([Statement(c) for c in self.findConcepts(w)]) return result + wordPattern = '\\'.join(list(' .,+*%&-!?/:_[](){}')) + def split(self, text): - return re.split('\W+', text) - - def findConcepts(self, word): - return self.query.query(word, 'loops:concept:*') + return re.split('[%s]+' % self.wordPattern, text) + #return re.split(r'[\W_]+', text)