docgen: Python3 fixes

This commit is contained in:
Helmut Merz 2024-09-22 11:16:47 +02:00
parent 4b84e816b4
commit ee9d062833
4 changed files with 19 additions and 29 deletions

View file

@ -39,7 +39,7 @@ Working with MHT Files
>>> imageRefs = document.htmlDoc.getImageRefs() >>> imageRefs = document.htmlDoc.getImageRefs()
>>> for path in imageRefs: >>> for path in imageRefs:
... imagePath = os.path.join(basePath, os.path.basename(path)) ... imagePath = os.path.join(basePath, os.path.basename(path))
... f = open(imagePath, 'rt') ... f = open(imagePath, 'rb')
... imageData = f.read() ... imageData = f.read()
... f.close() ... f.close()
... document.addImage(imageData, path) ... document.addImage(imageData, path)
@ -48,7 +48,9 @@ Working with MHT Files
>>> output = document.asString() >>> output = document.asString()
>>> len(data), len(output) >>> len(data), len(output)
(294996, 336142) (290577, 331234)
was (Py2) (294996, 336142)
>>> outPath = os.path.join(basePath, 'out_doc.mht') >>> outPath = os.path.join(basePath, 'out_doc.mht')
>>> #f = open(outPath, 'wt') >>> #f = open(outPath, 'wt')

View file

@ -1,33 +1,17 @@
# # cybertools.docgen.mht
# Copyright (c) 2012 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
""" """
Working with MHT Files. Working with MHT Files.
""" """
import base64 import base64
from cStringIO import StringIO from io import BytesIO, StringIO
import email import email
from PIL import Image from PIL import Image
import mimetypes import mimetypes
import os import os
from cybertools.text.lib.BeautifulSoup import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
class MHTFile(object): class MHTFile(object):
@ -72,7 +56,7 @@ class MHTFile(object):
return self.htmlDoc.getImageRefs() return self.htmlDoc.getImageRefs()
def addImage(self, imageData, path): def addImage(self, imageData, path):
image = Image.open(StringIO(imageData)) image = Image.open(BytesIO(imageData))
width, height = image.size width, height = image.size
contentType, enc = mimetypes.guess_type(path) contentType, enc = mimetypes.guess_type(path)
bp, ext = os.path.splitext(path) bp, ext = os.path.splitext(path)
@ -83,7 +67,7 @@ class MHTFile(object):
vars = dict(path=self.path, docname=self.documentName, vars = dict(path=self.path, docname=self.documentName,
suffix=self.foldernameSuffix, suffix=self.foldernameSuffix,
imgname=name, ctype=contentType, imgname=name, ctype=contentType,
imgdata=base64.encodestring(imageData)) imgdata=base64.b64encode(imageData))
content = self. imageTemplate % vars content = self. imageTemplate % vars
self.parts.insert(flpos, str(content)) self.parts.insert(flpos, str(content))
filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern
@ -94,7 +78,7 @@ class MHTFile(object):
def insertBody(self): def insertBody(self):
path = '-'.join((self.documentName, self.foldernameSuffix)) path = '-'.join((self.documentName, self.foldernameSuffix))
self.htmlDoc.updateImageRefs(self.imageMappings, path) self.htmlDoc.updateImageRefs(self.imageMappings, path)
content = self.htmlDoc.doc.renderContents(self.encoding) content = self.htmlDoc.doc.renderContents(self.encoding).decode(self.encoding)
bodyIndex = self.indexes['body'] bodyIndex = self.indexes['body']
baseDocument = self.parts[bodyIndex] baseDocument = self.parts[bodyIndex]
self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker, self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker,
@ -111,7 +95,7 @@ class HTMLDoc(object):
def __init__(self, data): def __init__(self, data):
self.data = data self.data = data
self.doc = BeautifulSoup(data) self.doc = BeautifulSoup(data, features='lxml')
def getImageRefs(self): def getImageRefs(self):
return [img['src'] for img in self.doc('img')] return [img['src'] for img in self.doc('img')]
@ -119,12 +103,12 @@ class HTMLDoc(object):
def updateImageRefs(self, mappings, path=''): def updateImageRefs(self, mappings, path=''):
for img in self.doc('img'): for img in self.doc('img'):
name, width, height = mappings[img['src']] name, width, height = mappings[img['src']]
imgdata = Tag(self.doc, 'v:imagedata') imgdata = Tag(self.doc, name='v:imagedata')
imgdata['src'] = '/'.join((path, name)) imgdata['src'] = '/'.join((path, name))
imgdata.isSelfClosing = True #imgdata.isSelfClosing = True
img.append(imgdata) img.append(imgdata)
del img['src'] del img['src']
img['style'] = 'width:%spt;height:%spt' % (width, height) img['style'] = 'width:%spt;height:%spt' % (width, height)
img.isSelfClosing = False #img.isSelfClosing = False
img.name='v:shape' img.name='v:shape'

View file

@ -1,16 +1,19 @@
# cybertools.util.docgen.tests
""" """
unit tests, doc tests unit tests, doc tests
""" """
import unittest, doctest import unittest, doctest
import warnings
from zope.interface.verify import verifyClass from zope.interface.verify import verifyClass
from zope.interface import implements
class Test(unittest.TestCase): class Test(unittest.TestCase):
"Basic tests for the docgen package." "Basic tests for the docgen package."
def testInterfaces(self): def testInterfaces(self):
warnings.filterwarnings('ignore', category=DeprecationWarning)
pass pass

View file

@ -16,6 +16,7 @@ dependencies = [
"BTrees", "BTrees",
"lxml", "lxml",
"persistent", "persistent",
"pillow",
"zope.app.container", "zope.app.container",
"zope.app.rotterdam", "zope.app.rotterdam",
"zope.app.testing", "zope.app.testing",