diff --git a/integrator/README.txt b/integrator/README.txt index cba0381..62b6532 100644 --- a/integrator/README.txt +++ b/integrator/README.txt @@ -231,7 +231,7 @@ Extracting Document Properties from MS Office Files >>> path = os.path.join(dataDir, 'office') >>> fn = os.path.join(path, 'example.docx') >>> os.path.getsize(fn) - 20337... + 23561... >>> officeFile = addAndConfigureObject(resources, Resource, 'test.docx', ... title=u'Example Word File', resourceType=tOfficeFile, @@ -241,7 +241,7 @@ Extracting Document Properties from MS Office Files >>> content = aOfficeFile.data >>> len(content) - 17409 + 20327 Clean up: >>> shutil.copy(fn + '.sav', fn) diff --git a/integrator/browser.py b/integrator/browser.py index ad103ad..aeafa87 100644 --- a/integrator/browser.py +++ b/integrator/browser.py @@ -40,8 +40,8 @@ class ExternalCollectionView(ConceptView): def update(self): if 'update' in self.request.form: cta = adapted(self.context) - if cta is not None: - cta.update() + cta.request = self.request + cta.update() if cta.updateMessage is not None: self.request.form['message'] = cta.updateMessage return True diff --git a/integrator/collection.py b/integrator/collection.py index 33c1912..35d4a9c 100644 --- a/integrator/collection.py +++ b/integrator/collection.py @@ -101,6 +101,7 @@ class ExternalCollectionAdapter(AdapterBase): adobj = adapted(obj) directory = provider.getDirectory(self) adobj.storageParams=dict(subdirectory=directory) + adobj.request = self.request adobj.externalAddress = addr # collect error information if adobj.processingErrors: @@ -209,6 +210,7 @@ class DirectoryCollectionProvider(object): contentType=contentType, ) adobj = adapted(obj) + adobj.request = client.request adobj.externalAddress = addr # must be set last # collect error information if adobj.processingErrors: diff --git a/integrator/office/base.py b/integrator/office/base.py index d4d86e5..67fe4bc 100644 --- a/integrator/office/base.py +++ b/integrator/office/base.py @@ -52,16 +52,12 @@ class OfficeFile(ExternalFileAdapter): implements(IOfficeFile) - _adapterAttributes = ExternalFileAdapter._adapterAttributes + ( - 'processingErrors',) - propertyMap = {u'Revision:': 'version'} propFileName = 'docProps/custom.xml' + corePropFileName = 'docProps/core.xml' fileExtensions = ('.docm', '.docx', 'dotm', 'dotx', 'pptx', 'potx', 'ppsx', '.xlsm', '.xlsx', '.xltm', '.xltx') - processingErrors = [] - @Lazy def logger(self): return getLogger('loops.integrator.office.base.OfficeFile') @@ -91,20 +87,32 @@ class OfficeFile(ExternalFileAdapter): from logging import getLogger self.logger.warn(e) return [] + if self.corePropFileName not in zf.namelist(): + self.logger.warn('Core properties not found in file %s.' % + self.externalAddress) if self.propFileName not in zf.namelist(): self.logger.warn('Custom properties not found in file %s.' % self.externalAddress) propsXml = zf.read(self.propFileName) + corePropsXml = zf.read(self.corePropFileName) + # TODO: read core.xml, return both trees in dictionary zf.close() - return etree.fromstring(propsXml) + return {'custom': etree.fromstring(propsXml), + 'core': etree.fromstring(corePropsXml)} def getDocProperty(self, pname): - for p in self.docPropertyDom: + for p in self.docPropertyDom['custom']: name = p.attrib.get('name') if name == pname: return p[0].text return None + def getCoreProperty(self, pname): + for p in self.docPropertyDom['core']: + if p.tag.endswith(pname): + return p.text + return None + def processDocument(self): changed = False docVersion = None @@ -112,11 +120,14 @@ class OfficeFile(ExternalFileAdapter): strType = ('{http://schemas.openxmlformats.org/' 'officeDocument/2006/docPropsVTypes}lpwstr') attributes = {} - dom = self.docPropertyDom + # get dc:description from core.xml + desc = self.getCoreProperty('description') + if desc is not None: + attributes['comments'] = desc + dom = self.docPropertyDom['custom'] for p in dom: name = p.attrib.get('name') value = p[0].text - #print '***', self.externalAddress, name, value, p[0].tag attr = self.propertyMap.get(name) if attr == 'version': docVersion = value diff --git a/integrator/testdata/office/example.docx b/integrator/testdata/office/example.docx index 0ab00cb..7f17545 100644 Binary files a/integrator/testdata/office/example.docx and b/integrator/testdata/office/example.docx differ diff --git a/integrator/testdata/office/example.docx.sav b/integrator/testdata/office/example.docx.sav index 0ab00cb..7f17545 100644 Binary files a/integrator/testdata/office/example.docx.sav and b/integrator/testdata/office/example.docx.sav differ diff --git a/resource.py b/resource.py index 274fa5e..9035c1e 100644 --- a/resource.py +++ b/resource.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2011 Helmut Merz helmutm@cy55.de +# Copyright (c) 2012 Helmut Merz helmutm@cy55.de # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,8 +18,6 @@ """ Definition of the Concept class. - -$Id$ """ from cStringIO import StringIO @@ -352,7 +350,10 @@ class ExternalFileAdapter(FileAdapter): implements(IExternalFile) _adapterAttributes = (FileAdapter._adapterAttributes - + ('storageParams', 'externalAddress', 'uniqueAddress')) + + ('storageParams', 'externalAddress', 'uniqueAddress', + 'processingErrors')) + + processingErrors = [] def getStorageParams(self): params = getattr(self.context, '_storageParams', None)