194 lines
6.8 KiB
Python
194 lines
6.8 KiB
Python
#
|
|
# Copyright (c) 2014 Helmut Merz helmutm@cy55.de
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
#
|
|
|
|
"""
|
|
Resource adapter(s) for MS Office files.
|
|
"""
|
|
|
|
from datetime import date, datetime, timedelta
|
|
from logging import getLogger
|
|
from lxml import etree
|
|
import os
|
|
import shutil
|
|
from time import strptime
|
|
from zipfile import ZipFile, BadZipfile
|
|
from zope.cachedescriptors.property import Lazy
|
|
from zope import component
|
|
from zope.component import adapts
|
|
from zope.interface import implements
|
|
from zope.traversing.api import getName, getParent
|
|
|
|
from cybertools.storage.interfaces import IExternalStorage
|
|
from loops.common import AdapterBase, adapted
|
|
from loops.integrator.interfaces import IOfficeFile
|
|
from loops.interfaces import IResource, IExternalFile
|
|
from loops.resource import ExternalFileAdapter
|
|
from loops.type import TypeInterfaceSourceList
|
|
from loops.versioning.interfaces import IVersionable
|
|
|
|
|
|
TypeInterfaceSourceList.typeInterfaces += (IOfficeFile,)
|
|
|
|
|
|
class OfficeFile(ExternalFileAdapter):
|
|
""" An external file that references a MS Office (2007/2010) file.
|
|
It provides access to the document content and properties.
|
|
"""
|
|
|
|
implements(IOfficeFile)
|
|
|
|
_adapterAttributes = (ExternalFileAdapter._adapterAttributes +
|
|
('documentPropertiesAccessible',))
|
|
|
|
propertyMap = {u'Revision:': 'version'}
|
|
propFileName = 'docProps/custom.xml'
|
|
corePropFileName = 'docProps/core.xml'
|
|
fileExtensions = ('.docm', '.docx', 'dotm', 'dotx', 'pptx', 'potx', 'ppsx',
|
|
'.xlsm', '.xlsx', '.xltm', '.xltx')
|
|
|
|
def getDocumentPropertiesAccessible(self):
|
|
return getattr(self.context, '_documentPropertiesAccessible', True)
|
|
def setDocumentPropertiesAccessible(self, value):
|
|
self.context._documentPropertiesAccessible = value
|
|
documentPropertiesAccessible = property(
|
|
getDocumentPropertiesAccessible, setDocumentPropertiesAccessible)
|
|
|
|
@Lazy
|
|
def logger(self):
|
|
return getLogger('loops.integrator.office.base.OfficeFile')
|
|
|
|
def setExternalAddress(self, addr):
|
|
super(OfficeFile, self).setExternalAddress(addr)
|
|
root, ext = os.path.splitext(self.externalAddress)
|
|
if ext.lower() in self.fileExtensions:
|
|
self.processDocument()
|
|
externalAddress = property(ExternalFileAdapter.getExternalAddress,
|
|
setExternalAddress)
|
|
|
|
@Lazy
|
|
def docFilename(self):
|
|
subDir = self.storageParams.get('subdirectory')
|
|
return self.storage.getDir(self.externalAddress, subDir)
|
|
|
|
@Lazy
|
|
def docPropertyDom(self):
|
|
fn = self.docFilename
|
|
result = dict(core=[], custom=[])
|
|
if not os.path.exists(fn):
|
|
# may happen before file has been created
|
|
return result
|
|
root, ext = os.path.splitext(fn)
|
|
if not ext.lower() in self.fileExtensions:
|
|
return result
|
|
try:
|
|
zf = ZipFile(fn, 'r')
|
|
self.documentPropertiesAccessible = True
|
|
except (IOError, BadZipfile), e:
|
|
from logging import getLogger
|
|
self.logger.warn(e)
|
|
self.documentPropertiesAccessible = False
|
|
return result
|
|
if self.corePropFileName not in zf.namelist():
|
|
self.logger.warn('Core properties not found in file %s.' %
|
|
self.externalAddress)
|
|
else:
|
|
result['core'] = etree.fromstring(zf.read(self.corePropFileName))
|
|
if self.propFileName not in zf.namelist():
|
|
self.logger.warn('Custom properties not found in file %s.' %
|
|
self.externalAddress)
|
|
else:
|
|
result['custom'] = etree.fromstring(zf.read(self.propFileName))
|
|
zf.close()
|
|
return result
|
|
|
|
def getDocProperty(self, pname):
|
|
for p in self.docPropertyDom['custom']:
|
|
name = p.attrib.get('name')
|
|
if name == pname:
|
|
return p[0].text
|
|
return None
|
|
|
|
def getCoreProperty(self, pname):
|
|
for p in self.docPropertyDom['core']:
|
|
if p.tag.endswith(pname):
|
|
return p.text
|
|
return None
|
|
|
|
def processDocument(self):
|
|
changed = False
|
|
docVersion = None
|
|
version = IVersionable(self.context).versionId
|
|
strType = ('{http://schemas.openxmlformats.org/'
|
|
'officeDocument/2006/docPropsVTypes}lpwstr')
|
|
attributes = {}
|
|
# get dc:description from core.xml
|
|
desc = self.getCoreProperty('description')
|
|
if not self.documentPropertiesAccessible:
|
|
return
|
|
if desc is not None:
|
|
attributes['comments'] = desc
|
|
dom = self.docPropertyDom['custom']
|
|
for p in dom:
|
|
name = p.attrib.get('name')
|
|
value = p[0].text
|
|
attr = self.propertyMap.get(name)
|
|
if attr == 'version':
|
|
docVersion = value
|
|
if docVersion and docVersion != version:
|
|
# update XML
|
|
p[0] = etree.Element(strType)
|
|
p[0].text = version
|
|
changed = True
|
|
elif attr is not None:
|
|
attributes[attr] = value
|
|
fn = self.docFilename
|
|
if changed:
|
|
newFn = fn + '.new'
|
|
zf = ZipFile(fn, 'r')
|
|
newZf = ZipFile(newFn, 'w')
|
|
for info in zf.infolist():
|
|
name = info.filename
|
|
if name != self.propFileName:
|
|
newZf.writestr(info, zf.read(name))
|
|
newZf.writestr(self.propFileName, etree.tostring(dom))
|
|
newZf.close()
|
|
shutil.move(newFn, fn)
|
|
errors = self.update(attributes)
|
|
if errors:
|
|
self.processingErrors = errors
|
|
|
|
def update(self, attributes):
|
|
# to be implemented by subclass
|
|
pass
|
|
|
|
|
|
def parseDate(s):
|
|
if not s:
|
|
return None
|
|
try:
|
|
tt = strptime(s, '%Y-%m-%dT%H:%M:%SZ')
|
|
except ValueError:
|
|
return None
|
|
# try:
|
|
# tt = strptime(s, '%d.%m.%y')
|
|
# except ValueError:
|
|
# tt = strptime(s, '%d.%m.%Y')
|
|
dt = datetime(*tt[:6]) + timedelta(hours=2)
|
|
return date(dt.year, dt.month, dt.day)
|
|
#return date(*strptime(s, '%Y-%m-%dT%H:%M:%SZ')[:3])
|
|
|