work in progress: document generation with embedded images via MHT file

This commit is contained in:
Helmut Merz 2012-12-04 11:41:30 +01:00
parent 34359f53d0
commit efe5ff20da
2 changed files with 38 additions and 6 deletions

View file

@ -26,15 +26,19 @@ Working with MHT Files
>>> document = MHTFile(data)
>>> document.addImage(imagePath)
>>> body = '''
>>> body = '''<img src="test_image.jpg" />
... '''
>>> document.setBody(body)
>>> output = document.asString()
>>> len(data), len(output)
(294996, 295017)
>>> outPath = os.path.join(basePath, 'out_doc.mht')
>>> f = open(outPath, 'wt')
>>> f.write(document.data)
>>> f.close()
>>> #f = open(outPath, 'wt')
>>> #f.write(document.asString())
>>> #f.close()
>>> os.unlink(outPath)
>>> #os.unlink(outPath)

View file

@ -20,14 +20,42 @@
Working with MHT Files.
"""
from email import message_from_string
#from email.multipart import MIMEMultipart
class MHTFile(object):
#encoding = 'UTF-8'
#encoding = 'ISO8859-15'
encoding = 'Windows-1252'
bodyMarker = 'lxdoc_body'
indexes = dict(body=2, filelist=-2)
def __init__(self, data):
self.data = data
self.msg = message_from_string(data)
self.boundary = self.msg.get_boundary()
self.parts = data.split(self.boundary)
#print '***', len(self.parts)
#for idx, part in enumerate(self.msg.walk()):
# print '***', idx, part['Content-Location'], part.get_content_type()
def addImage(self, imagePath):
pass
def setBody(self, body):
pass
content = body.encode(self.encoding)
bodyIndex = self.indexes['body']
baseDocument = self.parts[bodyIndex]
self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker,
self.quopri(content))
def asString(self):
#msg = MIMEMultipart('related')
return self.boundary.join(self.parts)
def quopri(self, s):
return s.replace('="', '=3D"')