classifier: Python3 fixes

This commit is contained in:
Helmut Merz 2024-09-26 22:30:52 +02:00
parent bf1fda008c
commit 992b5c012d
3 changed files with 27 additions and 65 deletions

View file

@ -4,8 +4,6 @@ loops - Linked Objects for Organization and Processing Services
Automatic classification of resources. Automatic classification of resources.
($Id$)
Setting up a loops Site and Utilities Setting up a loops Site and Utilities
===================================== =====================================
@ -39,7 +37,7 @@ from external files so we have something to work with.
>>> tExternalCollection = concepts['extcollection'] >>> tExternalCollection = concepts['extcollection']
>>> coll01 = addObject(concepts, Concept, 'coll01', >>> coll01 = addObject(concepts, Concept, 'coll01',
... title=u'Collection One', conceptType=tExternalCollection) ... title='Collection One', conceptType=tExternalCollection)
>>> aColl01 = adapted(coll01) >>> aColl01 = adapted(coll01)
>>> aColl01.baseAddress = dataDir >>> aColl01.baseAddress = dataDir
>>> aColl01.address = '' >>> aColl01.address = ''
@ -49,7 +47,7 @@ from external files so we have something to work with.
7 7
>>> rnames = list(sorted(resources.keys())) >>> rnames = list(sorted(resources.keys()))
>>> rnames[0] >>> rnames[0]
u'cust_im_contract_webbg_20071015.txt' 'cust_im_contract_webbg_20071015.txt'
Filename-based Classification Filename-based Classification
@ -76,7 +74,7 @@ and follow the classifier step by step.
>>> from loops.classifier.interfaces import IExtractor, IAnalyzer >>> from loops.classifier.interfaces import IExtractor, IAnalyzer
>>> infoSet = InformationSet() >>> infoSet = InformationSet()
>>> for name in classifier.extractors.split(): >>> for name in classifier.extractors.split():
... print 'extractor:', name ... print('extractor:', name)
... extractor = component.getAdapter(adapted(r1), IExtractor, name=name) ... extractor = component.getAdapter(adapted(r1), IExtractor, name=name)
... infoSet.update(extractor.extractInformationSet()) ... infoSet.update(extractor.extractInformationSet())
extractor: filename extractor: filename
@ -96,32 +94,32 @@ So there seems to be something missing - we have to create concepts
that may be identified as being candidates for classification. that may be identified as being candidates for classification.
>>> tInstitution = addObject(concepts, Concept, 'institution', >>> tInstitution = addObject(concepts, Concept, 'institution',
... title=u'Institution', conceptType=concepts['type']) ... title='Institution', conceptType=concepts['type'])
>>> cust_im = addObject(concepts, Concept, 'im_editors', >>> cust_im = addObject(concepts, Concept, 'im_editors',
... title=u'im Editors', conceptType=tInstitution) ... title='im Editors', conceptType=tInstitution)
>>> cust_mc = addObject(concepts, Concept, 'mc_consulting', >>> cust_mc = addObject(concepts, Concept, 'mc_consulting',
... title=u'MC Management Consulting', conceptType=tInstitution) ... title='MC Management Consulting', conceptType=tInstitution)
>>> tDoctype = addObject(concepts, Concept, 'doctype', >>> tDoctype = addObject(concepts, Concept, 'doctype',
... title=u'Document Type', conceptType=concepts['type']) ... title='Document Type', conceptType=concepts['type'])
>>> dt_note = addObject(concepts, Concept, 'dt_note', >>> dt_note = addObject(concepts, Concept, 'dt_note',
... title=u'Note', conceptType=tDoctype) ... title='Note', conceptType=tDoctype)
>>> dt_contract = addObject(concepts, Concept, 'dt_contract', >>> dt_contract = addObject(concepts, Concept, 'dt_contract',
... title=u'Contract', conceptType=tDoctype) ... title='Contract', conceptType=tDoctype)
>>> tPerson = concepts['person'] >>> tPerson = concepts['person']
>>> webbg = addObject(concepts, Concept, 'webbg', >>> webbg = addObject(concepts, Concept, 'webbg',
... title=u'Gerald Webb', conceptType=tPerson) ... title='Gerald Webb', conceptType=tPerson)
>>> smitha = addObject(concepts, Concept, 'smitha', >>> smitha = addObject(concepts, Concept, 'smitha',
... title=u'Angelina Smith', conceptType=tPerson) ... title='Angelina Smith', conceptType=tPerson)
>>> watersj = addObject(concepts, Concept, 'watersj', >>> watersj = addObject(concepts, Concept, 'watersj',
... title=u'Jerry Waters', conceptType=tPerson) ... title='Jerry Waters', conceptType=tPerson)
>>> millerj = addObject(concepts, Concept, 'millerj', >>> millerj = addObject(concepts, Concept, 'millerj',
... title=u'Jeannie Miller', conceptType=tPerson) ... title='Jeannie Miller', conceptType=tPerson)
>>> t.indexAll(concepts, resources) >>> t.indexAll(concepts, resources)
>>> from zope.app.catalog.interfaces import ICatalog >>> from zope.catalog.interfaces import ICatalog
>>> cat = component.getUtility(ICatalog) >>> cat = component.getUtility(ICatalog)
>>> statements = analyzer.extractStatements(infoSet) >>> statements = analyzer.extractStatements(infoSet)
@ -135,7 +133,7 @@ So we are now ready to have the whole stuff run in one call.
Classifier fileclassifier: Assigning: ... Classifier fileclassifier: Assigning: ...
>>> list(sorted([c.title for c in r1.getConcepts()])) >>> list(sorted([c.title for c in r1.getConcepts()]))
[u'Collection One', u'Contract', u'External File', u'Gerald Webb', u'im Editors'] ['Collection One', 'Contract', 'External File', 'Gerald Webb', 'im Editors']
>>> for name in rnames[1:]: >>> for name in rnames[1:]:
... classifier.process(resources[name]) ... classifier.process(resources[name])

View file

@ -1,23 +1,6 @@
# # looops.classifier.base
# Copyright (c) 2015 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
""" """ Adapters and others classes for analyzing resources.
Adapters and others classes for analyzing resources.
""" """
from itertools import tee from itertools import tee
@ -26,7 +9,7 @@ from zope.cachedescriptors.property import Lazy
from zope import component from zope import component
from zope.component import adapts from zope.component import adapts
from zope.event import notify from zope.event import notify
from zope.interface import implements from zope.interface import implementer
from zope.traversing.api import getName, getParent from zope.traversing.api import getName, getParent
from cybertools.typology.interfaces import IType from cybertools.typology.interfaces import IType
@ -44,11 +27,11 @@ logger = getLogger('Classifier')
TypeInterfaceSourceList.typeInterfaces += (IClassifier,) TypeInterfaceSourceList.typeInterfaces += (IClassifier,)
@implementer(IClassifier)
class Classifier(AdapterBase): class Classifier(AdapterBase):
""" A concept adapter for analyzing resources. """ A concept adapter for analyzing resources.
""" """
implements(IClassifier)
adapts(IConcept) adapts(IConcept)
_contextAttributes = list(IClassifier) + list(IConcept) _contextAttributes = list(IClassifier) + list(IConcept)
@ -112,9 +95,9 @@ class Classifier(AdapterBase):
logger.info(u'%s: %s' % (getName(self.context), message)) logger.info(u'%s: %s' % (getName(self.context), message))
@implementer(IExtractor)
class Extractor(object): class Extractor(object):
implements(IExtractor)
adapts(IResource) adapts(IResource)
def __init__(self, context): def __init__(self, context):
@ -124,9 +107,9 @@ class Extractor(object):
return InformationSet() return InformationSet()
@implementer(IAnalyzer)
class Analyzer(object): class Analyzer(object):
implements(IAnalyzer)
adapts(IClassifier) adapts(IClassifier)
def __init__(self, context): def __init__(self, context):
@ -147,15 +130,15 @@ class Analyzer(object):
return r1 return r1
@implementer(IInformationSet)
class InformationSet(dict): class InformationSet(dict):
implements(IInformationSet) pass
@implementer(IStatement)
class Statement(object): class Statement(object):
implements(IStatement)
def __init__(self, object=None, predicate=None, subject=None, relevance=100): def __init__(self, object=None, predicate=None, subject=None, relevance=100):
self.subject = subject self.subject = subject
self.predicate = predicate self.predicate = predicate

View file

@ -1,30 +1,11 @@
# # loops.classifier.sample
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
""" """ Sample classifier implementation.
Sample classifier implementation.
$Id$
""" """
from zope import component from zope import component
from zope.app.catalog.interfaces import ICatalog
from zope.cachedescriptors.property import Lazy from zope.cachedescriptors.property import Lazy
from zope.catalog.interfaces import ICatalog
from zope.component import adapts from zope.component import adapts
from zope.traversing.api import getName from zope.traversing.api import getName