document (.mht file) generation with images OK

This commit is contained in:
Helmut Merz 2012-12-19 11:09:39 +01:00
parent 87d323a550
commit c62122dcd9
2 changed files with 42 additions and 20 deletions

View file

@ -20,12 +20,7 @@ Working with MHT Files
>>> data = f.read()
>>> f.close()
>>> imagePath = os.path.join(basePath, 'test_image.jpg')
>>> f = open(imagePath, 'rt')
>>> imageData = f.read()
>>> f.close()
>>> body = '''<div class="WordSection1">
>>> xbody = '''<div class="WordSection1">
... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75"
... style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square">
... <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/>
@ -33,15 +28,27 @@ Working with MHT Files
... </div>
... '''
>>> body = '''<div class="WordSection1">
... <img src="files/test_image.jpg" />
... </div>
... '''
>>> from cybertools.docgen.mht import MHTFile
>>> document = MHTFile(data, body)
>>> document.addImage(imageData, 'files/test_image.jpg')
>>> imageRefs = document.htmlDoc.getImageRefs()
>>> for path in imageRefs:
... imagePath = os.path.join(basePath, os.path.basename(path))
... f = open(imagePath, 'rt')
... imageData = f.read()
... f.close()
... document.addImage(imageData, path)
>>> document.insertBody()
>>> output = document.asString()
>>> len(data), len(output)
(294996, 336268)
(294996, 336140)
>>> outPath = os.path.join(basePath, 'out_doc.mht')
>>> #f = open(outPath, 'wt')

View file

@ -21,10 +21,14 @@ Working with MHT Files.
"""
import base64
from cStringIO import StringIO
import email
import Image
import mimetypes
import os
from cybertools.text.lib.BeautifulSoup import BeautifulSoup, Tag
class MHTFile(object):
@ -53,7 +57,7 @@ class MHTFile(object):
self.body = body
self.htmlDoc = HTMLDoc(body)
self.lastImageNum = 0
self.imageMappings = []
self.imageMappings = {}
for idx, part in enumerate(self.msg.walk()):
docPath = part['Content-Location']
contentType = part.get_content_type()
@ -68,28 +72,30 @@ class MHTFile(object):
def getImageRefs(self):
return self.htmlDoc.getImageRefs()
def addImage(self, imageData, path, contentType='image/jpeg'):
def addImage(self, imageData, path):
image = Image.open(StringIO(imageData))
width, height = image.size
contentType, enc = mimetypes.guess_type(path)
bp, ext = os.path.splitext(path)
self.lastImageNum += 1
name = 'image%03i%s' % (self.lastImageNum, ext)
self.imageMappings.append((path, name))
self.imageMappings[path] = (name, width, height)
flpos = self.indexes['filelist']
vars = dict(path=self.path, docname=self.documentName,
suffix=self.foldernameSuffix,
imgname=name, ctype=contentType,
imgdata=base64.encodestring(imageData))
content = self. imageTemplate % vars
self.parts.insert(flpos, content)
self.parts.insert(flpos, str(content))
filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern
filelist = self.parts[flpos]
self.parts[flpos] = filelist.replace(self.filelistPattern, filelistRep)
self.parts[flpos] = str(filelist.replace(self.filelistPattern, filelistRep))
def insertBody(self):
self.htmlDoc.updateImageRefs(self.imageMappings)
# TODO: convert changed self.htmlDoc to new body
content = self.body.encode(self.encoding)
path = '-'.join((self.documentName, self.foldernameSuffix))
self.htmlDoc.updateImageRefs(self.imageMappings, path)
content = self.htmlDoc.doc.renderContents(self.encoding)
bodyIndex = self.indexes['body']
baseDocument = self.parts[bodyIndex]
self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker,
@ -106,11 +112,20 @@ class HTMLDoc(object):
def __init__(self, data):
self.data = data
self.doc = BeautifulSoup(data)
def getImageRefs(self):
return []
return [img['src'] for img in self.doc('img')]
def updateImageRefs(self, mappings):
for old, new in mappings:
pass
def updateImageRefs(self, mappings, path=''):
for img in self.doc('img'):
name, width, height = mappings[img['src']]
imgdata = Tag(self.doc, 'v:imagedata')
imgdata['src'] = '/'.join((path, name))
imgdata.isSelfClosing = True
img.append(imgdata)
del img['src']
img['style'] = 'width:%spt;height:%spt' % (width, height)
img.isSelfClosing = False
img.name='v:shape'