extend processing of office files: error handling, handle description property as comments
This commit is contained in:
parent
027b661110
commit
19b50d9e8f
7 changed files with 31 additions and 17 deletions
|
@ -231,7 +231,7 @@ Extracting Document Properties from MS Office Files
|
|||
>>> path = os.path.join(dataDir, 'office')
|
||||
>>> fn = os.path.join(path, 'example.docx')
|
||||
>>> os.path.getsize(fn)
|
||||
20337...
|
||||
23561...
|
||||
|
||||
>>> officeFile = addAndConfigureObject(resources, Resource, 'test.docx',
|
||||
... title=u'Example Word File', resourceType=tOfficeFile,
|
||||
|
@ -241,7 +241,7 @@ Extracting Document Properties from MS Office Files
|
|||
|
||||
>>> content = aOfficeFile.data
|
||||
>>> len(content)
|
||||
17409
|
||||
20327
|
||||
|
||||
Clean up:
|
||||
>>> shutil.copy(fn + '.sav', fn)
|
||||
|
|
|
@ -40,7 +40,7 @@ class ExternalCollectionView(ConceptView):
|
|||
def update(self):
|
||||
if 'update' in self.request.form:
|
||||
cta = adapted(self.context)
|
||||
if cta is not None:
|
||||
cta.request = self.request
|
||||
cta.update()
|
||||
if cta.updateMessage is not None:
|
||||
self.request.form['message'] = cta.updateMessage
|
||||
|
|
|
@ -101,6 +101,7 @@ class ExternalCollectionAdapter(AdapterBase):
|
|||
adobj = adapted(obj)
|
||||
directory = provider.getDirectory(self)
|
||||
adobj.storageParams=dict(subdirectory=directory)
|
||||
adobj.request = self.request
|
||||
adobj.externalAddress = addr
|
||||
# collect error information
|
||||
if adobj.processingErrors:
|
||||
|
@ -209,6 +210,7 @@ class DirectoryCollectionProvider(object):
|
|||
contentType=contentType,
|
||||
)
|
||||
adobj = adapted(obj)
|
||||
adobj.request = client.request
|
||||
adobj.externalAddress = addr # must be set last
|
||||
# collect error information
|
||||
if adobj.processingErrors:
|
||||
|
|
|
@ -52,16 +52,12 @@ class OfficeFile(ExternalFileAdapter):
|
|||
|
||||
implements(IOfficeFile)
|
||||
|
||||
_adapterAttributes = ExternalFileAdapter._adapterAttributes + (
|
||||
'processingErrors',)
|
||||
|
||||
propertyMap = {u'Revision:': 'version'}
|
||||
propFileName = 'docProps/custom.xml'
|
||||
corePropFileName = 'docProps/core.xml'
|
||||
fileExtensions = ('.docm', '.docx', 'dotm', 'dotx', 'pptx', 'potx', 'ppsx',
|
||||
'.xlsm', '.xlsx', '.xltm', '.xltx')
|
||||
|
||||
processingErrors = []
|
||||
|
||||
@Lazy
|
||||
def logger(self):
|
||||
return getLogger('loops.integrator.office.base.OfficeFile')
|
||||
|
@ -91,20 +87,32 @@ class OfficeFile(ExternalFileAdapter):
|
|||
from logging import getLogger
|
||||
self.logger.warn(e)
|
||||
return []
|
||||
if self.corePropFileName not in zf.namelist():
|
||||
self.logger.warn('Core properties not found in file %s.' %
|
||||
self.externalAddress)
|
||||
if self.propFileName not in zf.namelist():
|
||||
self.logger.warn('Custom properties not found in file %s.' %
|
||||
self.externalAddress)
|
||||
propsXml = zf.read(self.propFileName)
|
||||
corePropsXml = zf.read(self.corePropFileName)
|
||||
# TODO: read core.xml, return both trees in dictionary
|
||||
zf.close()
|
||||
return etree.fromstring(propsXml)
|
||||
return {'custom': etree.fromstring(propsXml),
|
||||
'core': etree.fromstring(corePropsXml)}
|
||||
|
||||
def getDocProperty(self, pname):
|
||||
for p in self.docPropertyDom:
|
||||
for p in self.docPropertyDom['custom']:
|
||||
name = p.attrib.get('name')
|
||||
if name == pname:
|
||||
return p[0].text
|
||||
return None
|
||||
|
||||
def getCoreProperty(self, pname):
|
||||
for p in self.docPropertyDom['core']:
|
||||
if p.tag.endswith(pname):
|
||||
return p.text
|
||||
return None
|
||||
|
||||
def processDocument(self):
|
||||
changed = False
|
||||
docVersion = None
|
||||
|
@ -112,11 +120,14 @@ class OfficeFile(ExternalFileAdapter):
|
|||
strType = ('{http://schemas.openxmlformats.org/'
|
||||
'officeDocument/2006/docPropsVTypes}lpwstr')
|
||||
attributes = {}
|
||||
dom = self.docPropertyDom
|
||||
# get dc:description from core.xml
|
||||
desc = self.getCoreProperty('description')
|
||||
if desc is not None:
|
||||
attributes['comments'] = desc
|
||||
dom = self.docPropertyDom['custom']
|
||||
for p in dom:
|
||||
name = p.attrib.get('name')
|
||||
value = p[0].text
|
||||
#print '***', self.externalAddress, name, value, p[0].tag
|
||||
attr = self.propertyMap.get(name)
|
||||
if attr == 'version':
|
||||
docVersion = value
|
||||
|
|
BIN
integrator/testdata/office/example.docx
vendored
BIN
integrator/testdata/office/example.docx
vendored
Binary file not shown.
BIN
integrator/testdata/office/example.docx.sav
vendored
BIN
integrator/testdata/office/example.docx.sav
vendored
Binary file not shown.
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (c) 2011 Helmut Merz helmutm@cy55.de
|
||||
# Copyright (c) 2012 Helmut Merz helmutm@cy55.de
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
@ -18,8 +18,6 @@
|
|||
|
||||
"""
|
||||
Definition of the Concept class.
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
@ -352,7 +350,10 @@ class ExternalFileAdapter(FileAdapter):
|
|||
implements(IExternalFile)
|
||||
|
||||
_adapterAttributes = (FileAdapter._adapterAttributes
|
||||
+ ('storageParams', 'externalAddress', 'uniqueAddress'))
|
||||
+ ('storageParams', 'externalAddress', 'uniqueAddress',
|
||||
'processingErrors'))
|
||||
|
||||
processingErrors = []
|
||||
|
||||
def getStorageParams(self):
|
||||
params = getattr(self.context, '_storageParams', None)
|
||||
|
|
Loading…
Add table
Reference in a new issue