document (.mht file) generation with images OK
This commit is contained in:
		
							parent
							
								
									87d323a550
								
							
						
					
					
						commit
						c62122dcd9
					
				
					 2 changed files with 42 additions and 20 deletions
				
			
		|  | @ -20,12 +20,7 @@ Working with MHT Files | ||||||
|   >>> data = f.read() |   >>> data = f.read() | ||||||
|   >>> f.close() |   >>> f.close() | ||||||
| 
 | 
 | ||||||
|   >>> imagePath = os.path.join(basePath, 'test_image.jpg') |   >>> xbody = '''<div class="WordSection1"> | ||||||
|   >>> f = open(imagePath, 'rt') |  | ||||||
|   >>> imageData = f.read() |  | ||||||
|   >>> f.close() |  | ||||||
| 
 |  | ||||||
|   >>> body = '''<div class="WordSection1"> |  | ||||||
|   ... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75" |   ... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75" | ||||||
|   ...     style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square"> |   ...     style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square"> | ||||||
|   ...   <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/> |   ...   <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/> | ||||||
|  | @ -33,15 +28,27 @@ Working with MHT Files | ||||||
|   ... </div> |   ... </div> | ||||||
|   ... ''' |   ... ''' | ||||||
| 
 | 
 | ||||||
|  |   >>> body = '''<div class="WordSection1"> | ||||||
|  |   ... <img src="files/test_image.jpg" /> | ||||||
|  |   ... </div> | ||||||
|  |   ... ''' | ||||||
|  | 
 | ||||||
|   >>> from cybertools.docgen.mht import MHTFile |   >>> from cybertools.docgen.mht import MHTFile | ||||||
|   >>> document = MHTFile(data, body) |   >>> document = MHTFile(data, body) | ||||||
|   >>> document.addImage(imageData, 'files/test_image.jpg') | 
 | ||||||
|  |   >>> imageRefs = document.htmlDoc.getImageRefs() | ||||||
|  |   >>> for path in imageRefs: | ||||||
|  |   ...     imagePath = os.path.join(basePath, os.path.basename(path)) | ||||||
|  |   ...     f = open(imagePath, 'rt') | ||||||
|  |   ...     imageData = f.read() | ||||||
|  |   ...     f.close() | ||||||
|  |   ...     document.addImage(imageData, path) | ||||||
| 
 | 
 | ||||||
|   >>> document.insertBody() |   >>> document.insertBody() | ||||||
| 
 | 
 | ||||||
|   >>> output = document.asString() |   >>> output = document.asString() | ||||||
|   >>> len(data), len(output) |   >>> len(data), len(output) | ||||||
|   (294996, 336268) |   (294996, 336140) | ||||||
| 
 | 
 | ||||||
|   >>> outPath = os.path.join(basePath, 'out_doc.mht') |   >>> outPath = os.path.join(basePath, 'out_doc.mht') | ||||||
|   >>> #f = open(outPath, 'wt') |   >>> #f = open(outPath, 'wt') | ||||||
|  |  | ||||||
|  | @ -21,10 +21,14 @@ Working with MHT Files. | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| import base64 | import base64 | ||||||
|  | from cStringIO import StringIO | ||||||
| import email | import email | ||||||
|  | import Image | ||||||
| import mimetypes | import mimetypes | ||||||
| import os | import os | ||||||
| 
 | 
 | ||||||
|  | from cybertools.text.lib.BeautifulSoup import BeautifulSoup, Tag | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class MHTFile(object): | class MHTFile(object): | ||||||
| 
 | 
 | ||||||
|  | @ -53,7 +57,7 @@ class MHTFile(object): | ||||||
|         self.body = body |         self.body = body | ||||||
|         self.htmlDoc = HTMLDoc(body) |         self.htmlDoc = HTMLDoc(body) | ||||||
|         self.lastImageNum = 0 |         self.lastImageNum = 0 | ||||||
|         self.imageMappings = [] |         self.imageMappings = {} | ||||||
|         for idx, part in enumerate(self.msg.walk()): |         for idx, part in enumerate(self.msg.walk()): | ||||||
|             docPath = part['Content-Location'] |             docPath = part['Content-Location'] | ||||||
|             contentType = part.get_content_type() |             contentType = part.get_content_type() | ||||||
|  | @ -68,28 +72,30 @@ class MHTFile(object): | ||||||
|     def getImageRefs(self): |     def getImageRefs(self): | ||||||
|         return self.htmlDoc.getImageRefs() |         return self.htmlDoc.getImageRefs() | ||||||
| 
 | 
 | ||||||
|     def addImage(self, imageData, path, contentType='image/jpeg'): |     def addImage(self, imageData, path): | ||||||
|  |         image = Image.open(StringIO(imageData)) | ||||||
|  |         width, height = image.size | ||||||
|         contentType, enc = mimetypes.guess_type(path) |         contentType, enc = mimetypes.guess_type(path) | ||||||
|         bp, ext = os.path.splitext(path) |         bp, ext = os.path.splitext(path) | ||||||
|         self.lastImageNum += 1 |         self.lastImageNum += 1 | ||||||
|         name = 'image%03i%s' % (self.lastImageNum, ext) |         name = 'image%03i%s' % (self.lastImageNum, ext) | ||||||
|         self.imageMappings.append((path, name)) |         self.imageMappings[path] = (name, width, height) | ||||||
|         flpos = self.indexes['filelist'] |         flpos = self.indexes['filelist'] | ||||||
|         vars = dict(path=self.path, docname=self.documentName,   |         vars = dict(path=self.path, docname=self.documentName,   | ||||||
|                     suffix=self.foldernameSuffix, |                     suffix=self.foldernameSuffix, | ||||||
|                     imgname=name, ctype=contentType, |                     imgname=name, ctype=contentType, | ||||||
|                     imgdata=base64.encodestring(imageData)) |                     imgdata=base64.encodestring(imageData)) | ||||||
|         content = self. imageTemplate % vars |         content = self. imageTemplate % vars | ||||||
|         self.parts.insert(flpos, content) |         self.parts.insert(flpos, str(content)) | ||||||
|         filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern |         filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern | ||||||
|         filelist = self.parts[flpos] |         filelist = self.parts[flpos] | ||||||
|         self.parts[flpos] = filelist.replace(self.filelistPattern, filelistRep) |         self.parts[flpos] = str(filelist.replace(self.filelistPattern, filelistRep)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|     def insertBody(self): |     def insertBody(self): | ||||||
|         self.htmlDoc.updateImageRefs(self.imageMappings) |         path = '-'.join((self.documentName, self.foldernameSuffix)) | ||||||
|         # TODO: convert changed self.htmlDoc to new body |         self.htmlDoc.updateImageRefs(self.imageMappings, path) | ||||||
|         content = self.body.encode(self.encoding) |         content = self.htmlDoc.doc.renderContents(self.encoding) | ||||||
|         bodyIndex = self.indexes['body'] |         bodyIndex = self.indexes['body'] | ||||||
|         baseDocument = self.parts[bodyIndex] |         baseDocument = self.parts[bodyIndex] | ||||||
|         self.parts[bodyIndex] =  baseDocument.replace(self.bodyMarker,  |         self.parts[bodyIndex] =  baseDocument.replace(self.bodyMarker,  | ||||||
|  | @ -106,11 +112,20 @@ class HTMLDoc(object): | ||||||
| 
 | 
 | ||||||
|     def __init__(self, data): |     def __init__(self, data): | ||||||
|         self.data = data |         self.data = data | ||||||
|  |         self.doc = BeautifulSoup(data) | ||||||
| 
 | 
 | ||||||
|     def getImageRefs(self): |     def getImageRefs(self): | ||||||
|         return [] |         return [img['src'] for img in self.doc('img')] | ||||||
| 
 | 
 | ||||||
|     def updateImageRefs(self, mappings): |     def updateImageRefs(self, mappings, path=''): | ||||||
|         for old, new in mappings: |         for img in self.doc('img'): | ||||||
|             pass |             name, width, height = mappings[img['src']] | ||||||
|  |             imgdata = Tag(self.doc, 'v:imagedata') | ||||||
|  |             imgdata['src'] = '/'.join((path, name)) | ||||||
|  |             imgdata.isSelfClosing = True | ||||||
|  |             img.append(imgdata) | ||||||
|  |             del img['src'] | ||||||
|  |             img['style'] = 'width:%spt;height:%spt' % (width, height) | ||||||
|  |             img.isSelfClosing = False | ||||||
|  |             img.name='v:shape' | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue