work in progress: document generation with embedded images via MHT file
This commit is contained in:
		
							parent
							
								
									34359f53d0
								
							
						
					
					
						commit
						efe5ff20da
					
				
					 2 changed files with 38 additions and 6 deletions
				
			
		| 
						 | 
				
			
			@ -26,15 +26,19 @@ Working with MHT Files
 | 
			
		|||
  >>> document = MHTFile(data)
 | 
			
		||||
  >>> document.addImage(imagePath)
 | 
			
		||||
 | 
			
		||||
  >>> body = '''
 | 
			
		||||
  >>> body = '''<img src="test_image.jpg" />
 | 
			
		||||
  ... '''
 | 
			
		||||
 | 
			
		||||
  >>> document.setBody(body)
 | 
			
		||||
 | 
			
		||||
  >>> output = document.asString()
 | 
			
		||||
  >>> len(data), len(output)
 | 
			
		||||
  (294996, 295017)
 | 
			
		||||
 | 
			
		||||
  >>> outPath = os.path.join(basePath, 'out_doc.mht')
 | 
			
		||||
  >>> f = open(outPath, 'wt')
 | 
			
		||||
  >>> f.write(document.data)
 | 
			
		||||
  >>> f.close()
 | 
			
		||||
  >>> #f = open(outPath, 'wt')
 | 
			
		||||
  >>> #f.write(document.asString())
 | 
			
		||||
  >>> #f.close()
 | 
			
		||||
 | 
			
		||||
  >>> os.unlink(outPath)
 | 
			
		||||
  >>> #os.unlink(outPath)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,14 +20,42 @@
 | 
			
		|||
Working with MHT Files.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from email import message_from_string
 | 
			
		||||
#from email.multipart import MIMEMultipart
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MHTFile(object):
 | 
			
		||||
 | 
			
		||||
    #encoding = 'UTF-8'
 | 
			
		||||
    #encoding = 'ISO8859-15'
 | 
			
		||||
    encoding = 'Windows-1252'
 | 
			
		||||
    bodyMarker = 'lxdoc_body'
 | 
			
		||||
    indexes = dict(body=2, filelist=-2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def __init__(self, data):
 | 
			
		||||
        self.data = data
 | 
			
		||||
        self.msg = message_from_string(data)
 | 
			
		||||
        self.boundary = self.msg.get_boundary()
 | 
			
		||||
        self.parts = data.split(self.boundary)
 | 
			
		||||
        #print '***', len(self.parts)
 | 
			
		||||
        #for idx, part in enumerate(self.msg.walk()):
 | 
			
		||||
        #    print '***', idx, part['Content-Location'], part.get_content_type()
 | 
			
		||||
 | 
			
		||||
    def addImage(self, imagePath):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def setBody(self, body):
 | 
			
		||||
        pass
 | 
			
		||||
        content = body.encode(self.encoding)
 | 
			
		||||
        bodyIndex = self.indexes['body']
 | 
			
		||||
        baseDocument = self.parts[bodyIndex]
 | 
			
		||||
        self.parts[bodyIndex] =  baseDocument.replace(self.bodyMarker, 
 | 
			
		||||
                                        self.quopri(content))
 | 
			
		||||
 | 
			
		||||
    def asString(self):
 | 
			
		||||
        #msg = MIMEMultipart('related')
 | 
			
		||||
        return self.boundary.join(self.parts)
 | 
			
		||||
 | 
			
		||||
    def quopri(self, s):
 | 
			
		||||
        return s.replace('="', '=3D"')
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue