minor improvements, esp for classifier
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@2174 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
		
							parent
							
								
									368dd0c022
								
							
						
					
					
						commit
						bee421bece
					
				
					 5 changed files with 65 additions and 22 deletions
				
			
		| 
						 | 
				
			
			@ -12,6 +12,9 @@
 | 
			
		|||
    <h1 tal:attributes="ondblclick item/openEditWindow">
 | 
			
		||||
      <span tal:content="item/title">Title</span>
 | 
			
		||||
    </h1>
 | 
			
		||||
      <p tal:define="description description|item/description"
 | 
			
		||||
         tal:condition="description">
 | 
			
		||||
        <i tal:content="description">Description</i></p>
 | 
			
		||||
</metal:title>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,6 +15,10 @@ pre {
 | 
			
		|||
    max-height: 35em;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
table.listing td {
 | 
			
		||||
    white-space: normal;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.box div.body div.even {
 | 
			
		||||
    background-color: #f4f4f4;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,17 +22,20 @@ Adapters and others classes for analyzing resources.
 | 
			
		|||
$Id$
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from itertools import tee
 | 
			
		||||
from zope.cachedescriptors.property import Lazy
 | 
			
		||||
from zope import component
 | 
			
		||||
from zope.component import adapts
 | 
			
		||||
from zope.event import notify
 | 
			
		||||
from zope.interface import implements
 | 
			
		||||
from zope.traversing.api import getName, getParent
 | 
			
		||||
from cybertools.typology.interfaces import IType
 | 
			
		||||
 | 
			
		||||
from loops.classifier.interfaces import IClassifier, IExtractor, IAnalyzer
 | 
			
		||||
from loops.classifier.interfaces import IInformationSet, IStatement
 | 
			
		||||
from loops.common import AdapterBase, adapted
 | 
			
		||||
from loops.interfaces import IResource, IConcept
 | 
			
		||||
from loops.query import ConceptQuery
 | 
			
		||||
from loops.resource import Resource
 | 
			
		||||
from loops.setup import addAndConfigureObject
 | 
			
		||||
from loops.type import TypeInterfaceSourceList
 | 
			
		||||
| 
						 | 
				
			
			@ -50,6 +53,24 @@ class Classifier(AdapterBase):
 | 
			
		|||
 | 
			
		||||
    _contextAttributes = list(IClassifier) + list(IConcept)
 | 
			
		||||
 | 
			
		||||
    logLevel = 5
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def conceptManager(self):
 | 
			
		||||
        return self.context.getConceptManager()
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def defaultPredicate(self):
 | 
			
		||||
        return self.conceptManager.getDefaultPredicate()
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def predicateType(self):
 | 
			
		||||
        return self.conceptManager.getPredicateType()
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def typeConcept(self):
 | 
			
		||||
        return self.conceptManager.getTypeConcept()
 | 
			
		||||
 | 
			
		||||
    def getOptions(self):
 | 
			
		||||
        return getattr(self.context, '_options', [])
 | 
			
		||||
    def setOptions(self, value):
 | 
			
		||||
| 
						 | 
				
			
			@ -57,19 +78,25 @@ class Classifier(AdapterBase):
 | 
			
		|||
    options = property(getOptions, setOptions)
 | 
			
		||||
 | 
			
		||||
    def process(self, resource):
 | 
			
		||||
        self.log('Processing %s' % resource.title, 3)
 | 
			
		||||
        infoSet = InformationSet()
 | 
			
		||||
        for name in self.extractors.split():
 | 
			
		||||
            extractor = component.getAdapter(adapted(resource), IExtractor, name=name)
 | 
			
		||||
            infoSet.update(extractor.extractInformationSet())
 | 
			
		||||
        analyzer = component.getAdapter(self, IAnalyzer, name=self.analyzer)
 | 
			
		||||
        statements = analyzer.extractStatements(infoSet)
 | 
			
		||||
        defaultPredicate = self.context.getConceptManager().getDefaultPredicate()
 | 
			
		||||
        for statement in statements:
 | 
			
		||||
            object = statement.object
 | 
			
		||||
            qualifiers = IType(object).qualifiers
 | 
			
		||||
            if 'system' in qualifiers:
 | 
			
		||||
                continue
 | 
			
		||||
            if statement.subject is None:
 | 
			
		||||
                statement.subject = resource
 | 
			
		||||
            if statement.predicate is None:
 | 
			
		||||
                statement.predicate = defaultPredicate
 | 
			
		||||
            self.assignConcept(statement.subject, statement.object,
 | 
			
		||||
                statement.predicate = self.defaultPredicate
 | 
			
		||||
            self.log('Assigning: %s %s %s' % (statement.subject.title,
 | 
			
		||||
                     statement.predicate.title, object.title), 5)
 | 
			
		||||
            self.assignConcept(statement.subject, object,
 | 
			
		||||
                               statement.predicate)
 | 
			
		||||
 | 
			
		||||
    def assignConcept(self, resource, concept, predicate):
 | 
			
		||||
| 
						 | 
				
			
			@ -77,6 +104,10 @@ class Classifier(AdapterBase):
 | 
			
		|||
        if resource not in resources:
 | 
			
		||||
            concept.assignResource(resource, predicate)
 | 
			
		||||
 | 
			
		||||
    def log(self, message, level=5):
 | 
			
		||||
        if level >= self.logLevel:
 | 
			
		||||
            print 'Classifier %s:' % getName(self.context), message
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Extractor(object):
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -101,6 +132,17 @@ class Analyzer(object):
 | 
			
		|||
    def extractStatements(self, informationSet):
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def query(self):
 | 
			
		||||
        return ConceptQuery(self.context)
 | 
			
		||||
 | 
			
		||||
    def findConcepts(self, word):
 | 
			
		||||
        r1, r2 = tee(self.query.query(word, 'loops:concept:*'))
 | 
			
		||||
        names = ', '.join(c.title for c in r2)
 | 
			
		||||
        self.context.log('Searching for concept using "%s", result: %s'
 | 
			
		||||
                       % (word, names), 2)
 | 
			
		||||
        return r1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class InformationSet(dict):
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,13 +26,13 @@ from zope import component
 | 
			
		|||
from zope.app.catalog.interfaces import ICatalog
 | 
			
		||||
from zope.cachedescriptors.property import Lazy
 | 
			
		||||
from zope.component import adapts
 | 
			
		||||
from zope.traversing.api import getName
 | 
			
		||||
 | 
			
		||||
from cybertools.organize.interfaces import IPerson
 | 
			
		||||
from cybertools.typology.interfaces import IType
 | 
			
		||||
from loops.classifier.base import Analyzer
 | 
			
		||||
from loops.classifier.base import Statement
 | 
			
		||||
from loops.common import adapted
 | 
			
		||||
from loops.query import ConceptQuery
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SampleAnalyzer(Analyzer):
 | 
			
		||||
| 
						 | 
				
			
			@ -47,10 +47,6 @@ class SampleAnalyzer(Analyzer):
 | 
			
		|||
        resource.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def query(self):
 | 
			
		||||
        return ConceptQuery(self.context)
 | 
			
		||||
 | 
			
		||||
    def handleCustomer(self, name):
 | 
			
		||||
        custTypes = self.getTypes(('institution', 'customer',))
 | 
			
		||||
        for c in self.findConcepts(name):
 | 
			
		||||
| 
						 | 
				
			
			@ -94,9 +90,6 @@ class SampleAnalyzer(Analyzer):
 | 
			
		|||
            result.extend(self.handleOwner(parts.pop(0)))
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def findConcepts(self, name):
 | 
			
		||||
        return self.query.query(name, 'loops:concept:*')
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def conceptManager(self):
 | 
			
		||||
        return self.context.context.getConceptManager()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,16 +22,16 @@ Standard implementations of classifier components.
 | 
			
		|||
$Id$
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from zope.cachedescriptors.property import Lazy
 | 
			
		||||
from zope.component import adapts
 | 
			
		||||
from zope.traversing.api import getName
 | 
			
		||||
 | 
			
		||||
from loops.classifier.base import Analyzer, Extractor
 | 
			
		||||
from loops.classifier.base import InformationSet
 | 
			
		||||
from loops.classifier.base import Statement
 | 
			
		||||
from loops.interfaces import IExternalFile
 | 
			
		||||
from loops.query import ConceptQuery
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FilenameExtractor(Extractor):
 | 
			
		||||
| 
						 | 
				
			
			@ -42,7 +42,7 @@ class FilenameExtractor(Extractor):
 | 
			
		|||
        self.context = context
 | 
			
		||||
 | 
			
		||||
    def extractInformationSet(self):
 | 
			
		||||
        filename = self.context.externalAddress
 | 
			
		||||
        filename, ext = os.path.splitext(self.context.externalAddress)
 | 
			
		||||
        return InformationSet(filename=filename)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -63,21 +63,22 @@ class PathExtractor(Extractor):
 | 
			
		|||
 | 
			
		||||
class WordBasedAnalyzer(Analyzer):
 | 
			
		||||
 | 
			
		||||
    @Lazy
 | 
			
		||||
    def query(self):
 | 
			
		||||
        return ConceptQuery(self.context)
 | 
			
		||||
    stopWords = [u'and', u'und']
 | 
			
		||||
 | 
			
		||||
    def extractStatements(self, informationSet):
 | 
			
		||||
        result = []
 | 
			
		||||
        for key, value in informationSet.items():
 | 
			
		||||
            words = self.split(value)
 | 
			
		||||
            for w in words:
 | 
			
		||||
                result.extend([Statement(c) for c in self.findConcepts(w)])
 | 
			
		||||
                if w in self.stopWords:
 | 
			
		||||
                    continue
 | 
			
		||||
                if len(w) > 1:
 | 
			
		||||
                    result.extend([Statement(c) for c in self.findConcepts(w)])
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    wordPattern = '\\'.join(list(' .,+*%&-!?/:_[](){}'))
 | 
			
		||||
 | 
			
		||||
    def split(self, text):
 | 
			
		||||
        return re.split('\W+', text)
 | 
			
		||||
 | 
			
		||||
    def findConcepts(self, word):
 | 
			
		||||
        return self.query.query(word, 'loops:concept:*')
 | 
			
		||||
        return re.split('[%s]+' % self.wordPattern, text)
 | 
			
		||||
        #return re.split(r'[\W_]+', text)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue