From 1f4582f7a0d5ea76379fbb0126803ae37e567261 Mon Sep 17 00:00:00 2001 From: helmutm Date: Thu, 8 Jul 2010 17:26:08 +0000 Subject: [PATCH] work in progress: office file - processing document properties git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@3917 fd906abe-77d9-0310-91a1-e0d9ade77398 --- integrator/README.txt | 19 ++++++++- integrator/collection.py | 2 +- integrator/interfaces.py | 7 +++- integrator/office/__init__.py | 4 ++ integrator/office/base.py | 72 +++++++++++++++++++++++++++++++++++ integrator/tests.py | 2 +- integrator/testsetup.py | 7 ++++ 7 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 integrator/office/__init__.py create mode 100644 integrator/office/base.py diff --git a/integrator/README.txt b/integrator/README.txt index d166874..109aedd 100644 --- a/integrator/README.txt +++ b/integrator/README.txt @@ -27,7 +27,7 @@ configuration): >>> concepts, resources, views = t.setup() >>> len(concepts) + len(resources) - 17 + 18 External Collections @@ -211,6 +211,23 @@ Uploading Resources with HTTP PUT Requests u'file1' +Extracting Document Properties from MS Office Files +=================================================== + + >>> from loops.resource import Resource + >>> tOfficeFile = concepts['officefile'] + >>> path = os.path.join(dataDir, 'office') + >>> officeFile = addAndConfigureObject(resources, Resource, 'test.docx', + ... title=u'Example Word File', resourceType=tOfficeFile, + ... storageParams=dict(subdirectory=path)) + >>> aOfficeFile = adapted(officeFile) + >>> aOfficeFile.externalAddress = 'example.docx' + + >>> content = aOfficeFile.data + >>> len(content) + 195808 + + Fin de partie ============= diff --git a/integrator/collection.py b/integrator/collection.py index 5c85228..2a04185 100644 --- a/integrator/collection.py +++ b/integrator/collection.py @@ -167,11 +167,11 @@ class DirectoryCollectionProvider(object): container, Resource, name, title=title, resourceType=extFileType, - externalAddress=addr, storageName='fullpath', storageParams=dict(subdirectory=directory), contentType=contentType, ) + adapted(obj).externalAddress = addr # must be set lasst yield obj def getDirectory(self, client): diff --git a/integrator/interfaces.py b/integrator/interfaces.py index 25c6b93..219312c 100644 --- a/integrator/interfaces.py +++ b/integrator/interfaces.py @@ -25,7 +25,7 @@ $Id$ from zope.interface import Interface, Attribute from zope import interface, component, schema -from loops.interfaces import IConceptSchema, ILoopsAdapter +from loops.interfaces import IConceptSchema, ILoopsAdapter, IExternalFile from loops.util import _ @@ -114,3 +114,8 @@ class IExternalCollectionProvider(Interface): e.g. 'image/*', '*/*'. """ +class IOfficeFile(IExternalFile): + """ An external file that references a MS Office (2007/2010) file. + It provides access to the document content and properties. + """ + diff --git a/integrator/office/__init__.py b/integrator/office/__init__.py new file mode 100644 index 0000000..4bc90fb --- /dev/null +++ b/integrator/office/__init__.py @@ -0,0 +1,4 @@ +""" +$Id$ +""" + diff --git a/integrator/office/base.py b/integrator/office/base.py new file mode 100644 index 0000000..b3ba4d5 --- /dev/null +++ b/integrator/office/base.py @@ -0,0 +1,72 @@ +# +# Copyright (c) 2010 Helmut Merz helmutm@cy55.de +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Resource adapter(s) for MS Office files. + +$Id$ +""" + +from xml.dom.minidom import parseString +from zipfile import ZipFile +from zope.cachedescriptors.property import Lazy +from zope import component +from zope.component import adapts +from zope.interface import implements +from zope.traversing.api import getName, getParent + +from cybertools.storage.interfaces import IExternalStorage +from loops.common import AdapterBase, adapted +from loops.integrator.interfaces import IOfficeFile +from loops.interfaces import IResource, IExternalFile +from loops.resource import ExternalFileAdapter +from loops.type import TypeInterfaceSourceList + + +TypeInterfaceSourceList.typeInterfaces += (IOfficeFile,) + + +class OfficeFile(ExternalFileAdapter): + """ An external file that references a MS Office (2007/2010) file. + It provides access to the document content and properties. + """ + + implements(IOfficeFile) + + propertyMap = dict(version=u'Revision:') + + def setExternalAddress(self, addr): + super(OfficeFile, self).setExternalAddress(addr) + self.processDocument() + externalAddress = property(ExternalFileAdapter.getExternalAddress, + setExternalAddress) + + def processDocument(self): + storage = component.getUtility(IExternalStorage, name=self.storageName) + subDir = self.storageParams.get('subdirectory') + fn = storage.getDir(self.externalAddress, subDir) + # open ZIP file, process properties, set version property in file + zf = ZipFile(fn, 'a') + #print '***', zf.namelist() + propsXml = zf.read('docProps/custom.xml') + dom = parseString(propsXml) + props = dom.getElementsByTagName('property') + for p in props: + pass + #print '***', p.getAttribute('name'), p.childNodes[0].childNodes[0].data + zf.close() diff --git a/integrator/tests.py b/integrator/tests.py index 7ff96d6..1087054 100755 --- a/integrator/tests.py +++ b/integrator/tests.py @@ -6,7 +6,7 @@ from zope.interface.verify import verifyClass #from loops.versioning import versionable class Test(unittest.TestCase): - "Basic tests for the expert sub-package." + "Basic tests for the integrator sub-package." def testSomething(self): pass diff --git a/integrator/testsetup.py b/integrator/testsetup.py index ed453b3..e09b67d 100644 --- a/integrator/testsetup.py +++ b/integrator/testsetup.py @@ -16,6 +16,8 @@ from loops.interfaces import IFile, IExternalFile from loops.concept import Concept from loops.resource import Resource, FileAdapter, ExternalFileAdapter from loops.integrator.interfaces import IExternalSourceInfo, IExternalCollection +from loops.integrator.interfaces import IOfficeFile +from loops.integrator.office.base import OfficeFile from loops.knowledge.setup import SetupManager as KnowledgeSetupManager from loops.setup import SetupManager, addAndConfigureObject from loops.tests.setup import TestSite as BaseTestSite @@ -34,6 +36,7 @@ class TestSite(BaseTestSite): component.provideAdapter(FileAdapter, provides=IFile) component.provideAdapter(ExternalFileAdapter, provides=IExternalFile) + component.provideAdapter(OfficeFile, provides=IOfficeFile) component.provideUtility(fullPathStorage(), IExternalStorage, name='fullpath') @@ -48,6 +51,10 @@ class TestSite(BaseTestSite): tExtCollection = addAndConfigureObject(concepts, Concept, 'extcollection', title=u'External Collection', conceptType=tType, typeInterface=IExternalCollection) + tOfficeFile = addAndConfigureObject(concepts, Concept, 'officefile', + title=u'MS Office File', conceptType=tType, + typeInterface=IOfficeFile, + options=['storage:fullpath']) self.indexAll(concepts, resources) return concepts, resources, views