document (.mht file) generation with images OK
This commit is contained in:
parent
87d323a550
commit
c62122dcd9
2 changed files with 42 additions and 20 deletions
|
@ -20,12 +20,7 @@ Working with MHT Files
|
||||||
>>> data = f.read()
|
>>> data = f.read()
|
||||||
>>> f.close()
|
>>> f.close()
|
||||||
|
|
||||||
>>> imagePath = os.path.join(basePath, 'test_image.jpg')
|
>>> xbody = '''<div class="WordSection1">
|
||||||
>>> f = open(imagePath, 'rt')
|
|
||||||
>>> imageData = f.read()
|
|
||||||
>>> f.close()
|
|
||||||
|
|
||||||
>>> body = '''<div class="WordSection1">
|
|
||||||
... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75"
|
... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75"
|
||||||
... style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square">
|
... style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square">
|
||||||
... <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/>
|
... <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/>
|
||||||
|
@ -33,15 +28,27 @@ Working with MHT Files
|
||||||
... </div>
|
... </div>
|
||||||
... '''
|
... '''
|
||||||
|
|
||||||
|
>>> body = '''<div class="WordSection1">
|
||||||
|
... <img src="files/test_image.jpg" />
|
||||||
|
... </div>
|
||||||
|
... '''
|
||||||
|
|
||||||
>>> from cybertools.docgen.mht import MHTFile
|
>>> from cybertools.docgen.mht import MHTFile
|
||||||
>>> document = MHTFile(data, body)
|
>>> document = MHTFile(data, body)
|
||||||
>>> document.addImage(imageData, 'files/test_image.jpg')
|
|
||||||
|
>>> imageRefs = document.htmlDoc.getImageRefs()
|
||||||
|
>>> for path in imageRefs:
|
||||||
|
... imagePath = os.path.join(basePath, os.path.basename(path))
|
||||||
|
... f = open(imagePath, 'rt')
|
||||||
|
... imageData = f.read()
|
||||||
|
... f.close()
|
||||||
|
... document.addImage(imageData, path)
|
||||||
|
|
||||||
>>> document.insertBody()
|
>>> document.insertBody()
|
||||||
|
|
||||||
>>> output = document.asString()
|
>>> output = document.asString()
|
||||||
>>> len(data), len(output)
|
>>> len(data), len(output)
|
||||||
(294996, 336268)
|
(294996, 336140)
|
||||||
|
|
||||||
>>> outPath = os.path.join(basePath, 'out_doc.mht')
|
>>> outPath = os.path.join(basePath, 'out_doc.mht')
|
||||||
>>> #f = open(outPath, 'wt')
|
>>> #f = open(outPath, 'wt')
|
||||||
|
|
|
@ -21,10 +21,14 @@ Working with MHT Files.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
from cStringIO import StringIO
|
||||||
import email
|
import email
|
||||||
|
import Image
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from cybertools.text.lib.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class MHTFile(object):
|
class MHTFile(object):
|
||||||
|
|
||||||
|
@ -53,7 +57,7 @@ class MHTFile(object):
|
||||||
self.body = body
|
self.body = body
|
||||||
self.htmlDoc = HTMLDoc(body)
|
self.htmlDoc = HTMLDoc(body)
|
||||||
self.lastImageNum = 0
|
self.lastImageNum = 0
|
||||||
self.imageMappings = []
|
self.imageMappings = {}
|
||||||
for idx, part in enumerate(self.msg.walk()):
|
for idx, part in enumerate(self.msg.walk()):
|
||||||
docPath = part['Content-Location']
|
docPath = part['Content-Location']
|
||||||
contentType = part.get_content_type()
|
contentType = part.get_content_type()
|
||||||
|
@ -68,28 +72,30 @@ class MHTFile(object):
|
||||||
def getImageRefs(self):
|
def getImageRefs(self):
|
||||||
return self.htmlDoc.getImageRefs()
|
return self.htmlDoc.getImageRefs()
|
||||||
|
|
||||||
def addImage(self, imageData, path, contentType='image/jpeg'):
|
def addImage(self, imageData, path):
|
||||||
|
image = Image.open(StringIO(imageData))
|
||||||
|
width, height = image.size
|
||||||
contentType, enc = mimetypes.guess_type(path)
|
contentType, enc = mimetypes.guess_type(path)
|
||||||
bp, ext = os.path.splitext(path)
|
bp, ext = os.path.splitext(path)
|
||||||
self.lastImageNum += 1
|
self.lastImageNum += 1
|
||||||
name = 'image%03i%s' % (self.lastImageNum, ext)
|
name = 'image%03i%s' % (self.lastImageNum, ext)
|
||||||
self.imageMappings.append((path, name))
|
self.imageMappings[path] = (name, width, height)
|
||||||
flpos = self.indexes['filelist']
|
flpos = self.indexes['filelist']
|
||||||
vars = dict(path=self.path, docname=self.documentName,
|
vars = dict(path=self.path, docname=self.documentName,
|
||||||
suffix=self.foldernameSuffix,
|
suffix=self.foldernameSuffix,
|
||||||
imgname=name, ctype=contentType,
|
imgname=name, ctype=contentType,
|
||||||
imgdata=base64.encodestring(imageData))
|
imgdata=base64.encodestring(imageData))
|
||||||
content = self. imageTemplate % vars
|
content = self. imageTemplate % vars
|
||||||
self.parts.insert(flpos, content)
|
self.parts.insert(flpos, str(content))
|
||||||
filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern
|
filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern
|
||||||
filelist = self.parts[flpos]
|
filelist = self.parts[flpos]
|
||||||
self.parts[flpos] = filelist.replace(self.filelistPattern, filelistRep)
|
self.parts[flpos] = str(filelist.replace(self.filelistPattern, filelistRep))
|
||||||
|
|
||||||
|
|
||||||
def insertBody(self):
|
def insertBody(self):
|
||||||
self.htmlDoc.updateImageRefs(self.imageMappings)
|
path = '-'.join((self.documentName, self.foldernameSuffix))
|
||||||
# TODO: convert changed self.htmlDoc to new body
|
self.htmlDoc.updateImageRefs(self.imageMappings, path)
|
||||||
content = self.body.encode(self.encoding)
|
content = self.htmlDoc.doc.renderContents(self.encoding)
|
||||||
bodyIndex = self.indexes['body']
|
bodyIndex = self.indexes['body']
|
||||||
baseDocument = self.parts[bodyIndex]
|
baseDocument = self.parts[bodyIndex]
|
||||||
self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker,
|
self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker,
|
||||||
|
@ -106,11 +112,20 @@ class HTMLDoc(object):
|
||||||
|
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
self.data = data
|
self.data = data
|
||||||
|
self.doc = BeautifulSoup(data)
|
||||||
|
|
||||||
def getImageRefs(self):
|
def getImageRefs(self):
|
||||||
return []
|
return [img['src'] for img in self.doc('img')]
|
||||||
|
|
||||||
def updateImageRefs(self, mappings):
|
def updateImageRefs(self, mappings, path=''):
|
||||||
for old, new in mappings:
|
for img in self.doc('img'):
|
||||||
pass
|
name, width, height = mappings[img['src']]
|
||||||
|
imgdata = Tag(self.doc, 'v:imagedata')
|
||||||
|
imgdata['src'] = '/'.join((path, name))
|
||||||
|
imgdata.isSelfClosing = True
|
||||||
|
img.append(imgdata)
|
||||||
|
del img['src']
|
||||||
|
img['style'] = 'width:%spt;height:%spt' % (width, height)
|
||||||
|
img.isSelfClosing = False
|
||||||
|
img.name='v:shape'
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue