document generation with images: embedding of image OK

2012-12-11 12:27:06 +01:00 · 2012-12-11 12:27:06 +01:00 · b0a01bae1a
commit b0a01bae1a
parent 77eb301edd
3 changed files with 36 additions and 17 deletions
--- a/docgen/README.txt
+++ b/docgen/README.txt
@ -21,19 +21,27 @@ Working with MHT Files
  >>> f.close()
  >>> imagePath = os.path.join(basePath, 'test_image.jpg')
  >>> f = open(imagePath, 'rt')
  >>> imageData = f.read()
  >>> f.close()
-  >>> body = '''<img src="test_image.jpg" />
+  >>> body = '''<div class="WordSection1">
  ... <v:shape id="Grafik_x0020_2" o:spid="_x0000_i1025" type="#_x0000_t75"
  ...     style="width:320pt;height:240pt;visibility:visible;mso-wrap-style:square">
  ...   <v:imagedata src="FB-Besprechungsprotokoll-Dateien/image002.jpg" o:title=""/>
  ... </v:shape>
  ... </div>
  ... '''
  >>> from cybertools.docgen.mht import MHTFile
  >>> document = MHTFile(data, body)
-  >>> document.addImage(imagePath)  # TODO: provide imageData, path
+  >>> document.addImage(imageData, 'files/test_image.jpg')
  >>> document.insertBody()
  >>> output = document.asString()
  >>> len(data), len(output)
-  (294996, 295346)
+  (294996, 336268)
  >>> outPath = os.path.join(basePath, 'out_doc.mht')
  >>> #f = open(outPath, 'wt')
--- a/docgen/mht.py
+++ b/docgen/mht.py
@ -22,6 +22,7 @@ Working with MHT Files.
 import base64
 import email
 import mimetypes
 import os
@ -31,10 +32,13 @@ class MHTFile(object):
    #encoding = 'ISO8859-15'
    encoding = 'Windows-1252'
    bodyMarker = 'lxdoc_body'
    foldernameSuffix = 'Dateien'
    indexes = dict(body=2, filelist=-2)
    path = documentName = None
    imageTemplate = ('\n'
-        'Content-Location: file:///C:/AF2749EC/%(docname)s-Dateien/$(imgname)s\n'
+        'Content-Location: %(path)s/%(docname)s-%(suffix)s/%(imgname)s\n'
        'Content-Transfer-Encoding: base64\n'
        'Content-Type: %(ctype)s\n\n%(imgdata)s\n\n')
@ -50,23 +54,30 @@ class MHTFile(object):
        self.htmlDoc = HTMLDoc(body)
        self.lastImageNum = 0
        self.imageMappings = []
        #print '***', len(self.parts)
        for idx, part in enumerate(self.msg.walk()):
        #    print '***', idx, , part.get_content_type()
            if idx == 1:
            docPath = part['Content-Location']
-        self.documentName = docPath
+            contentType = part.get_content_type()
-        # TODO: collect existing images to provide consistent naming
+            #print '***', idx, docPath, contentType 
            if idx == self.indexes['body'] - 1:
                self.path, docname = os.path.split(docPath)
                self.documentName, ext = os.path.splitext(docname)
            if contentType.startswith('image/'):
                self.lastImageNum += 1
        #print '###', self.path, self.documentName, self.lastImageNum
    def getImageRefs(self):
        return self.htmlDoc.getImageRefs()
-    def addImage(self, imageData, path='image001.jpg', contentType='image/jpeg'):
+    def addImage(self, imageData, path, contentType='image/jpeg'):
        contentType, enc = mimetypes.guess_type(path)
        bp, ext = os.path.splitext(path)
        self.lastImageNum += 1
        name = 'image%03i%s' % (self.lastImageNum, ext)
        self.imageMappings.append((path, name))
        flpos = self.indexes['filelist']
-        # TODO: get contentType from path
+        vars = dict(path=self.path, docname=self.documentName,  
-        # TODO: generate name, update self.imageMappings
+                    suffix=self.foldernameSuffix,
-        name = path
+                    imgname=name, ctype=contentType,
        vars = dict(docname=self.documentName, imgname=name, ctype=contentType,
                    imgdata=base64.encodestring(imageData))
        content = self. imageTemplate % vars
        self.parts.insert(flpos, content)
@ -76,7 +87,7 @@ class MHTFile(object):
    def insertBody(self):
-        # self.htmlDoc.updateImageRefs(self.imageMappings)
+        self.htmlDoc.updateImageRefs(self.imageMappings)
        # TODO: convert changed self.htmlDoc to new body
        content = self.body.encode(self.encoding)
        bodyIndex = self.indexes['body']
--- a/docgen/testing/test_image.jpg
+++ b/docgen/testing/test_image.jpg