extend processing of office files: error handling, handle description property as comments

This commit is contained in:
Helmut Merz 2012-10-08 11:50:51 +02:00
parent 027b661110
commit 19b50d9e8f
7 changed files with 31 additions and 17 deletions

View file

@ -231,7 +231,7 @@ Extracting Document Properties from MS Office Files
>>> path = os.path.join(dataDir, 'office')
>>> fn = os.path.join(path, 'example.docx')
>>> os.path.getsize(fn)
20337...
23561...
>>> officeFile = addAndConfigureObject(resources, Resource, 'test.docx',
... title=u'Example Word File', resourceType=tOfficeFile,
@ -241,7 +241,7 @@ Extracting Document Properties from MS Office Files
>>> content = aOfficeFile.data
>>> len(content)
17409
20327
Clean up:
>>> shutil.copy(fn + '.sav', fn)

View file

@ -40,7 +40,7 @@ class ExternalCollectionView(ConceptView):
def update(self):
if 'update' in self.request.form:
cta = adapted(self.context)
if cta is not None:
cta.request = self.request
cta.update()
if cta.updateMessage is not None:
self.request.form['message'] = cta.updateMessage

View file

@ -101,6 +101,7 @@ class ExternalCollectionAdapter(AdapterBase):
adobj = adapted(obj)
directory = provider.getDirectory(self)
adobj.storageParams=dict(subdirectory=directory)
adobj.request = self.request
adobj.externalAddress = addr
# collect error information
if adobj.processingErrors:
@ -209,6 +210,7 @@ class DirectoryCollectionProvider(object):
contentType=contentType,
)
adobj = adapted(obj)
adobj.request = client.request
adobj.externalAddress = addr # must be set last
# collect error information
if adobj.processingErrors:

View file

@ -52,16 +52,12 @@ class OfficeFile(ExternalFileAdapter):
implements(IOfficeFile)
_adapterAttributes = ExternalFileAdapter._adapterAttributes + (
'processingErrors',)
propertyMap = {u'Revision:': 'version'}
propFileName = 'docProps/custom.xml'
corePropFileName = 'docProps/core.xml'
fileExtensions = ('.docm', '.docx', 'dotm', 'dotx', 'pptx', 'potx', 'ppsx',
'.xlsm', '.xlsx', '.xltm', '.xltx')
processingErrors = []
@Lazy
def logger(self):
return getLogger('loops.integrator.office.base.OfficeFile')
@ -91,20 +87,32 @@ class OfficeFile(ExternalFileAdapter):
from logging import getLogger
self.logger.warn(e)
return []
if self.corePropFileName not in zf.namelist():
self.logger.warn('Core properties not found in file %s.' %
self.externalAddress)
if self.propFileName not in zf.namelist():
self.logger.warn('Custom properties not found in file %s.' %
self.externalAddress)
propsXml = zf.read(self.propFileName)
corePropsXml = zf.read(self.corePropFileName)
# TODO: read core.xml, return both trees in dictionary
zf.close()
return etree.fromstring(propsXml)
return {'custom': etree.fromstring(propsXml),
'core': etree.fromstring(corePropsXml)}
def getDocProperty(self, pname):
for p in self.docPropertyDom:
for p in self.docPropertyDom['custom']:
name = p.attrib.get('name')
if name == pname:
return p[0].text
return None
def getCoreProperty(self, pname):
for p in self.docPropertyDom['core']:
if p.tag.endswith(pname):
return p.text
return None
def processDocument(self):
changed = False
docVersion = None
@ -112,11 +120,14 @@ class OfficeFile(ExternalFileAdapter):
strType = ('{http://schemas.openxmlformats.org/'
'officeDocument/2006/docPropsVTypes}lpwstr')
attributes = {}
dom = self.docPropertyDom
# get dc:description from core.xml
desc = self.getCoreProperty('description')
if desc is not None:
attributes['comments'] = desc
dom = self.docPropertyDom['custom']
for p in dom:
name = p.attrib.get('name')
value = p[0].text
#print '***', self.externalAddress, name, value, p[0].tag
attr = self.propertyMap.get(name)
if attr == 'version':
docVersion = value

Binary file not shown.

Binary file not shown.

View file

@ -1,5 +1,5 @@
#
# Copyright (c) 2011 Helmut Merz helmutm@cy55.de
# Copyright (c) 2012 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -18,8 +18,6 @@
"""
Definition of the Concept class.
$Id$
"""
from cStringIO import StringIO
@ -352,7 +350,10 @@ class ExternalFileAdapter(FileAdapter):
implements(IExternalFile)
_adapterAttributes = (FileAdapter._adapterAttributes
+ ('storageParams', 'externalAddress', 'uniqueAddress'))
+ ('storageParams', 'externalAddress', 'uniqueAddress',
'processingErrors'))
processingErrors = []
def getStorageParams(self):
params = getattr(self.context, '_storageParams', None)