work in progress: document generation with embedded images via MHT file
This commit is contained in:
		
							parent
							
								
									34359f53d0
								
							
						
					
					
						commit
						efe5ff20da
					
				
					 2 changed files with 38 additions and 6 deletions
				
			
		| 
						 | 
					@ -26,15 +26,19 @@ Working with MHT Files
 | 
				
			||||||
  >>> document = MHTFile(data)
 | 
					  >>> document = MHTFile(data)
 | 
				
			||||||
  >>> document.addImage(imagePath)
 | 
					  >>> document.addImage(imagePath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  >>> body = '''
 | 
					  >>> body = '''<img src="test_image.jpg" />
 | 
				
			||||||
  ... '''
 | 
					  ... '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  >>> document.setBody(body)
 | 
					  >>> document.setBody(body)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  >>> output = document.asString()
 | 
				
			||||||
 | 
					  >>> len(data), len(output)
 | 
				
			||||||
 | 
					  (294996, 295017)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  >>> outPath = os.path.join(basePath, 'out_doc.mht')
 | 
					  >>> outPath = os.path.join(basePath, 'out_doc.mht')
 | 
				
			||||||
  >>> f = open(outPath, 'wt')
 | 
					  >>> #f = open(outPath, 'wt')
 | 
				
			||||||
  >>> f.write(document.data)
 | 
					  >>> #f.write(document.asString())
 | 
				
			||||||
  >>> f.close()
 | 
					  >>> #f.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  >>> os.unlink(outPath)
 | 
					  >>> #os.unlink(outPath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -20,14 +20,42 @@
 | 
				
			||||||
Working with MHT Files.
 | 
					Working with MHT Files.
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from email import message_from_string
 | 
				
			||||||
 | 
					#from email.multipart import MIMEMultipart
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class MHTFile(object):
 | 
					class MHTFile(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #encoding = 'UTF-8'
 | 
				
			||||||
 | 
					    #encoding = 'ISO8859-15'
 | 
				
			||||||
 | 
					    encoding = 'Windows-1252'
 | 
				
			||||||
 | 
					    bodyMarker = 'lxdoc_body'
 | 
				
			||||||
 | 
					    indexes = dict(body=2, filelist=-2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, data):
 | 
					    def __init__(self, data):
 | 
				
			||||||
        self.data = data
 | 
					        self.data = data
 | 
				
			||||||
 | 
					        self.msg = message_from_string(data)
 | 
				
			||||||
 | 
					        self.boundary = self.msg.get_boundary()
 | 
				
			||||||
 | 
					        self.parts = data.split(self.boundary)
 | 
				
			||||||
 | 
					        #print '***', len(self.parts)
 | 
				
			||||||
 | 
					        #for idx, part in enumerate(self.msg.walk()):
 | 
				
			||||||
 | 
					        #    print '***', idx, part['Content-Location'], part.get_content_type()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def addImage(self, imagePath):
 | 
					    def addImage(self, imagePath):
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def setBody(self, body):
 | 
					    def setBody(self, body):
 | 
				
			||||||
        pass
 | 
					        content = body.encode(self.encoding)
 | 
				
			||||||
 | 
					        bodyIndex = self.indexes['body']
 | 
				
			||||||
 | 
					        baseDocument = self.parts[bodyIndex]
 | 
				
			||||||
 | 
					        self.parts[bodyIndex] =  baseDocument.replace(self.bodyMarker, 
 | 
				
			||||||
 | 
					                                        self.quopri(content))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def asString(self):
 | 
				
			||||||
 | 
					        #msg = MIMEMultipart('related')
 | 
				
			||||||
 | 
					        return self.boundary.join(self.parts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def quopri(self, s):
 | 
				
			||||||
 | 
					        return s.replace('="', '=3D"')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue