document generation with images: embedding of image OK

This commit is contained in:
Helmut Merz 2012-12-11 12:27:06 +01:00
parent 77eb301edd
commit b0a01bae1a
3 changed files with 36 additions and 17 deletions

View file

@ -21,19 +21,27 @@ Working with MHT Files
>>> f.close() >>> f.close()
>>> imagePath = os.path.join(basePath, 'test_image.jpg') >>> imagePath = os.path.join(basePath, 'test_image.jpg')
>>> f = open(imagePath, 'rt')
>>> imageData = f.read()
>>> f.close()
>>> body = '''<img src="test_image.jpg" /> >>> body = '''<div class="WordSection1">
... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75"
... style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square">
... <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/>
... </v:shape>
... </div>
... ''' ... '''
>>> from cybertools.docgen.mht import MHTFile >>> from cybertools.docgen.mht import MHTFile
>>> document = MHTFile(data, body) >>> document = MHTFile(data, body)
>>> document.addImage(imagePath) # TODO: provide imageData, path >>> document.addImage(imageData, 'files/test_image.jpg')
>>> document.insertBody() >>> document.insertBody()
>>> output = document.asString() >>> output = document.asString()
>>> len(data), len(output) >>> len(data), len(output)
(294996, 295346) (294996, 336268)
>>> outPath = os.path.join(basePath, 'out_doc.mht') >>> outPath = os.path.join(basePath, 'out_doc.mht')
>>> #f = open(outPath, 'wt') >>> #f = open(outPath, 'wt')

View file

@ -22,6 +22,7 @@ Working with MHT Files.
import base64 import base64
import email import email
import mimetypes
import os import os
@ -31,15 +32,18 @@ class MHTFile(object):
#encoding = 'ISO8859-15' #encoding = 'ISO8859-15'
encoding = 'Windows-1252' encoding = 'Windows-1252'
bodyMarker = 'lxdoc_body' bodyMarker = 'lxdoc_body'
foldernameSuffix = 'Dateien'
indexes = dict(body=2, filelist=-2) indexes = dict(body=2, filelist=-2)
path = documentName = None
imageTemplate = ('\n' imageTemplate = ('\n'
'Content-Location: file:///C:/AF2749EC/%(docname)s-Dateien/$(imgname)s\n' 'Content-Location: %(path)s/%(docname)s-%(suffix)s/%(imgname)s\n'
'Content-Transfer-Encoding: base64\n' 'Content-Transfer-Encoding: base64\n'
'Content-Type: %(ctype)s\n\n%(imgdata)s\n\n') 'Content-Type: %(ctype)s\n\n%(imgdata)s\n\n')
filelistItemTemplate = ' <o:File HRef=3D"%s"/>\n' filelistItemTemplate = ' <o:File HRef=3D"%s"/>\n'
filelistPattern ='<o:File HRef=3D"filelist.xml"/>' filelistPattern =' <o:File HRef=3D"filelist.xml"/>'
def __init__(self, data, body): def __init__(self, data, body):
self.data = data self.data = data
@ -50,23 +54,30 @@ class MHTFile(object):
self.htmlDoc = HTMLDoc(body) self.htmlDoc = HTMLDoc(body)
self.lastImageNum = 0 self.lastImageNum = 0
self.imageMappings = [] self.imageMappings = []
#print '***', len(self.parts)
for idx, part in enumerate(self.msg.walk()): for idx, part in enumerate(self.msg.walk()):
# print '***', idx, , part.get_content_type() docPath = part['Content-Location']
if idx == 1: contentType = part.get_content_type()
docPath = part['Content-Location'] #print '***', idx, docPath, contentType
self.documentName = docPath if idx == self.indexes['body'] - 1:
# TODO: collect existing images to provide consistent naming self.path, docname = os.path.split(docPath)
self.documentName, ext = os.path.splitext(docname)
if contentType.startswith('image/'):
self.lastImageNum += 1
#print '###', self.path, self.documentName, self.lastImageNum
def getImageRefs(self): def getImageRefs(self):
return self.htmlDoc.getImageRefs() return self.htmlDoc.getImageRefs()
def addImage(self, imageData, path='image001.jpg', contentType='image/jpeg'): def addImage(self, imageData, path, contentType='image/jpeg'):
contentType, enc = mimetypes.guess_type(path)
bp, ext = os.path.splitext(path)
self.lastImageNum += 1
name = 'image%03i%s' % (self.lastImageNum, ext)
self.imageMappings.append((path, name))
flpos = self.indexes['filelist'] flpos = self.indexes['filelist']
# TODO: get contentType from path vars = dict(path=self.path, docname=self.documentName,
# TODO: generate name, update self.imageMappings suffix=self.foldernameSuffix,
name = path imgname=name, ctype=contentType,
vars = dict(docname=self.documentName, imgname=name, ctype=contentType,
imgdata=base64.encodestring(imageData)) imgdata=base64.encodestring(imageData))
content = self. imageTemplate % vars content = self. imageTemplate % vars
self.parts.insert(flpos, content) self.parts.insert(flpos, content)
@ -76,7 +87,7 @@ class MHTFile(object):
def insertBody(self): def insertBody(self):
# self.htmlDoc.updateImageRefs(self.imageMappings) self.htmlDoc.updateImageRefs(self.imageMappings)
# TODO: convert changed self.htmlDoc to new body # TODO: convert changed self.htmlDoc to new body
content = self.body.encode(self.encoding) content = self.body.encode(self.encoding)
bodyIndex = self.indexes['body'] bodyIndex = self.indexes['body']

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB