work in progress: document generation with embedded images via MHT file
This commit is contained in:
parent
efe5ff20da
commit
77eb301edd
2 changed files with 61 additions and 17 deletions
|
@ -22,18 +22,18 @@ Working with MHT Files
|
||||||
|
|
||||||
>>> imagePath = os.path.join(basePath, 'test_image.jpg')
|
>>> imagePath = os.path.join(basePath, 'test_image.jpg')
|
||||||
|
|
||||||
>>> from cybertools.docgen.mht import MHTFile
|
|
||||||
>>> document = MHTFile(data)
|
|
||||||
>>> document.addImage(imagePath)
|
|
||||||
|
|
||||||
>>> body = '''<img src="test_image.jpg" />
|
>>> body = '''<img src="test_image.jpg" />
|
||||||
... '''
|
... '''
|
||||||
|
|
||||||
>>> document.setBody(body)
|
>>> from cybertools.docgen.mht import MHTFile
|
||||||
|
>>> document = MHTFile(data, body)
|
||||||
|
>>> document.addImage(imagePath) # TODO: provide imageData, path
|
||||||
|
|
||||||
|
>>> document.insertBody()
|
||||||
|
|
||||||
>>> output = document.asString()
|
>>> output = document.asString()
|
||||||
>>> len(data), len(output)
|
>>> len(data), len(output)
|
||||||
(294996, 295017)
|
(294996, 295346)
|
||||||
|
|
||||||
>>> outPath = os.path.join(basePath, 'out_doc.mht')
|
>>> outPath = os.path.join(basePath, 'out_doc.mht')
|
||||||
>>> #f = open(outPath, 'wt')
|
>>> #f = open(outPath, 'wt')
|
||||||
|
|
|
@ -20,8 +20,9 @@
|
||||||
Working with MHT Files.
|
Working with MHT Files.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from email import message_from_string
|
import base64
|
||||||
#from email.multipart import MIMEMultipart
|
import email
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
class MHTFile(object):
|
class MHTFile(object):
|
||||||
|
@ -32,30 +33,73 @@ class MHTFile(object):
|
||||||
bodyMarker = 'lxdoc_body'
|
bodyMarker = 'lxdoc_body'
|
||||||
indexes = dict(body=2, filelist=-2)
|
indexes = dict(body=2, filelist=-2)
|
||||||
|
|
||||||
|
imageTemplate = ('\n'
|
||||||
|
'Content-Location: file:///C:/AF2749EC/%(docname)s-Dateien/$(imgname)s\n'
|
||||||
|
'Content-Transfer-Encoding: base64\n'
|
||||||
|
'Content-Type: %(ctype)s\n\n%(imgdata)s\n\n')
|
||||||
|
|
||||||
def __init__(self, data):
|
filelistItemTemplate = ' <o:File HRef=3D"%s"/>\n'
|
||||||
|
filelistPattern ='<o:File HRef=3D"filelist.xml"/>'
|
||||||
|
|
||||||
|
def __init__(self, data, body):
|
||||||
self.data = data
|
self.data = data
|
||||||
self.msg = message_from_string(data)
|
self.msg = email.message_from_string(data)
|
||||||
self.boundary = self.msg.get_boundary()
|
self.boundary = self.msg.get_boundary()
|
||||||
self.parts = data.split(self.boundary)
|
self.parts = data.split(self.boundary)
|
||||||
|
self.body = body
|
||||||
|
self.htmlDoc = HTMLDoc(body)
|
||||||
|
self.lastImageNum = 0
|
||||||
|
self.imageMappings = []
|
||||||
#print '***', len(self.parts)
|
#print '***', len(self.parts)
|
||||||
#for idx, part in enumerate(self.msg.walk()):
|
for idx, part in enumerate(self.msg.walk()):
|
||||||
# print '***', idx, part['Content-Location'], part.get_content_type()
|
# print '***', idx, , part.get_content_type()
|
||||||
|
if idx == 1:
|
||||||
|
docPath = part['Content-Location']
|
||||||
|
self.documentName = docPath
|
||||||
|
# TODO: collect existing images to provide consistent naming
|
||||||
|
|
||||||
def addImage(self, imagePath):
|
def getImageRefs(self):
|
||||||
pass
|
return self.htmlDoc.getImageRefs()
|
||||||
|
|
||||||
def setBody(self, body):
|
def addImage(self, imageData, path='image001.jpg', contentType='image/jpeg'):
|
||||||
content = body.encode(self.encoding)
|
flpos = self.indexes['filelist']
|
||||||
|
# TODO: get contentType from path
|
||||||
|
# TODO: generate name, update self.imageMappings
|
||||||
|
name = path
|
||||||
|
vars = dict(docname=self.documentName, imgname=name, ctype=contentType,
|
||||||
|
imgdata=base64.encodestring(imageData))
|
||||||
|
content = self. imageTemplate % vars
|
||||||
|
self.parts.insert(flpos, content)
|
||||||
|
filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern
|
||||||
|
filelist = self.parts[flpos]
|
||||||
|
self.parts[flpos] = filelist.replace(self.filelistPattern, filelistRep)
|
||||||
|
|
||||||
|
|
||||||
|
def insertBody(self):
|
||||||
|
# self.htmlDoc.updateImageRefs(self.imageMappings)
|
||||||
|
# TODO: convert changed self.htmlDoc to new body
|
||||||
|
content = self.body.encode(self.encoding)
|
||||||
bodyIndex = self.indexes['body']
|
bodyIndex = self.indexes['body']
|
||||||
baseDocument = self.parts[bodyIndex]
|
baseDocument = self.parts[bodyIndex]
|
||||||
self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker,
|
self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker,
|
||||||
self.quopri(content))
|
self.quopri(content))
|
||||||
|
|
||||||
def asString(self):
|
def asString(self):
|
||||||
#msg = MIMEMultipart('related')
|
|
||||||
return self.boundary.join(self.parts)
|
return self.boundary.join(self.parts)
|
||||||
|
|
||||||
def quopri(self, s):
|
def quopri(self, s):
|
||||||
return s.replace('="', '=3D"')
|
return s.replace('="', '=3D"')
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLDoc(object):
|
||||||
|
|
||||||
|
def __init__(self, data):
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
def getImageRefs(self):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def updateImageRefs(self, mappings):
|
||||||
|
for old, new in mappings:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue