# # Copyright (c) 2012 Helmut Merz helmutm@cy55.de # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # """ Resource adapter(s) for MS Office files. """ from datetime import date, datetime, timedelta from logging import getLogger from lxml import etree import os import shutil from time import strptime from zipfile import ZipFile from zope.cachedescriptors.property import Lazy from zope import component from zope.component import adapts from zope.interface import implements from zope.traversing.api import getName, getParent from cybertools.storage.interfaces import IExternalStorage from loops.common import AdapterBase, adapted from loops.integrator.interfaces import IOfficeFile from loops.interfaces import IResource, IExternalFile from loops.resource import ExternalFileAdapter from loops.type import TypeInterfaceSourceList from loops.versioning.interfaces import IVersionable TypeInterfaceSourceList.typeInterfaces += (IOfficeFile,) class OfficeFile(ExternalFileAdapter): """ An external file that references a MS Office (2007/2010) file. It provides access to the document content and properties. """ implements(IOfficeFile) propertyMap = {u'Revision:': 'version'} propFileName = 'docProps/custom.xml' corePropFileName = 'docProps/core.xml' fileExtensions = ('.docm', '.docx', 'dotm', 'dotx', 'pptx', 'potx', 'ppsx', '.xlsm', '.xlsx', '.xltm', '.xltx') @Lazy def logger(self): return getLogger('loops.integrator.office.base.OfficeFile') def setExternalAddress(self, addr): super(OfficeFile, self).setExternalAddress(addr) root, ext = os.path.splitext(self.externalAddress) if ext.lower() in self.fileExtensions: self.processDocument() externalAddress = property(ExternalFileAdapter.getExternalAddress, setExternalAddress) @Lazy def docFilename(self): subDir = self.storageParams.get('subdirectory') return self.storage.getDir(self.externalAddress, subDir) @Lazy def docPropertyDom(self): fn = self.docFilename root, ext = os.path.splitext(fn) if not ext.lower() in self.fileExtensions: return [] try: zf = ZipFile(fn, 'r') except IOError, e: from logging import getLogger self.logger.warn(e) return [] if self.corePropFileName not in zf.namelist(): self.logger.warn('Core properties not found in file %s.' % self.externalAddress) if self.propFileName not in zf.namelist(): self.logger.warn('Custom properties not found in file %s.' % self.externalAddress) propsXml = zf.read(self.propFileName) corePropsXml = zf.read(self.corePropFileName) # TODO: read core.xml, return both trees in dictionary zf.close() return {'custom': etree.fromstring(propsXml), 'core': etree.fromstring(corePropsXml)} def getDocProperty(self, pname): for p in self.docPropertyDom['custom']: name = p.attrib.get('name') if name == pname: return p[0].text return None def getCoreProperty(self, pname): for p in self.docPropertyDom['core']: if p.tag.endswith(pname): return p.text return None def processDocument(self): changed = False docVersion = None version = IVersionable(self.context).versionId strType = ('{http://schemas.openxmlformats.org/' 'officeDocument/2006/docPropsVTypes}lpwstr') attributes = {} # get dc:description from core.xml desc = self.getCoreProperty('description') if desc is not None: attributes['comments'] = desc dom = self.docPropertyDom['custom'] for p in dom: name = p.attrib.get('name') value = p[0].text attr = self.propertyMap.get(name) if attr == 'version': docVersion = value if docVersion and docVersion != version: # update XML p[0] = etree.Element(strType) p[0].text = version changed = True elif attr is not None: attributes[attr] = value fn = self.docFilename if changed: newFn = fn + '.new' zf = ZipFile(fn, 'r') newZf = ZipFile(newFn, 'w') for info in zf.infolist(): name = info.filename if name != self.propFileName: newZf.writestr(info, zf.read(name)) newZf.writestr(self.propFileName, etree.tostring(dom)) newZf.close() shutil.move(newFn, fn) errors = self.update(attributes) if errors: self.processingErrors = errors def update(self, attributes): # to be implemented by subclass pass def parseDate(s): if not s: return None try: tt = strptime(s, '%Y-%m-%dT%H:%M:%SZ') except ValueError: return None # try: # tt = strptime(s, '%d.%m.%y') # except ValueError: # tt = strptime(s, '%d.%m.%Y') dt = datetime(*tt[:6]) + timedelta(hours=2) return date(dt.year, dt.month, dt.day) #return date(*strptime(s, '%Y-%m-%dT%H:%M:%SZ')[:3])