document generation with images: embedding of image OK

This commit is contained in:
Helmut Merz 2012-12-11 12:27:06 +01:00
parent 77eb301edd
commit b0a01bae1a
3 changed files with 36 additions and 17 deletions

View file

@ -21,19 +21,27 @@ Working with MHT Files
>>> f.close()
>>> imagePath = os.path.join(basePath, 'test_image.jpg')
>>> f = open(imagePath, 'rt')
>>> imageData = f.read()
>>> f.close()
>>> body = '''<img src="test_image.jpg" />
>>> body = '''<div class="WordSection1">
... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75"
... style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square">
... <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/>
... </v:shape>
... </div>
... '''
>>> from cybertools.docgen.mht import MHTFile
>>> document = MHTFile(data, body)
>>> document.addImage(imagePath) # TODO: provide imageData, path
>>> document.addImage(imageData, 'files/test_image.jpg')
>>> document.insertBody()
>>> output = document.asString()
>>> len(data), len(output)
(294996, 295346)
(294996, 336268)
>>> outPath = os.path.join(basePath, 'out_doc.mht')
>>> #f = open(outPath, 'wt')

View file

@ -22,6 +22,7 @@ Working with MHT Files.
import base64
import email
import mimetypes
import os
@ -31,10 +32,13 @@ class MHTFile(object):
#encoding = 'ISO8859-15'
encoding = 'Windows-1252'
bodyMarker = 'lxdoc_body'
foldernameSuffix = 'Dateien'
indexes = dict(body=2, filelist=-2)
path = documentName = None
imageTemplate = ('\n'
'Content-Location: file:///C:/AF2749EC/%(docname)s-Dateien/$(imgname)s\n'
'Content-Location: %(path)s/%(docname)s-%(suffix)s/%(imgname)s\n'
'Content-Transfer-Encoding: base64\n'
'Content-Type: %(ctype)s\n\n%(imgdata)s\n\n')
@ -50,23 +54,30 @@ class MHTFile(object):
self.htmlDoc = HTMLDoc(body)
self.lastImageNum = 0
self.imageMappings = []
#print '***', len(self.parts)
for idx, part in enumerate(self.msg.walk()):
# print '***', idx, , part.get_content_type()
if idx == 1:
docPath = part['Content-Location']
self.documentName = docPath
# TODO: collect existing images to provide consistent naming
contentType = part.get_content_type()
#print '***', idx, docPath, contentType
if idx == self.indexes['body'] - 1:
self.path, docname = os.path.split(docPath)
self.documentName, ext = os.path.splitext(docname)
if contentType.startswith('image/'):
self.lastImageNum += 1
#print '###', self.path, self.documentName, self.lastImageNum
def getImageRefs(self):
return self.htmlDoc.getImageRefs()
def addImage(self, imageData, path='image001.jpg', contentType='image/jpeg'):
def addImage(self, imageData, path, contentType='image/jpeg'):
contentType, enc = mimetypes.guess_type(path)
bp, ext = os.path.splitext(path)
self.lastImageNum += 1
name = 'image%03i%s' % (self.lastImageNum, ext)
self.imageMappings.append((path, name))
flpos = self.indexes['filelist']
# TODO: get contentType from path
# TODO: generate name, update self.imageMappings
name = path
vars = dict(docname=self.documentName, imgname=name, ctype=contentType,
vars = dict(path=self.path, docname=self.documentName,
suffix=self.foldernameSuffix,
imgname=name, ctype=contentType,
imgdata=base64.encodestring(imageData))
content = self. imageTemplate % vars
self.parts.insert(flpos, content)
@ -76,7 +87,7 @@ class MHTFile(object):
def insertBody(self):
# self.htmlDoc.updateImageRefs(self.imageMappings)
self.htmlDoc.updateImageRefs(self.imageMappings)
# TODO: convert changed self.htmlDoc to new body
content = self.body.encode(self.encoding)
bodyIndex = self.indexes['body']

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB