From ee9d062833cc7b5dc8292fa5e995ec312d0d6aea Mon Sep 17 00:00:00 2001 From: Helmut Merz Date: Sun, 22 Sep 2024 11:16:47 +0200 Subject: [PATCH] docgen: Python3 fixes --- cybertools/docgen/README.txt | 6 ++++-- cybertools/docgen/mht.py | 36 ++++++++++-------------------------- cybertools/docgen/tests.py | 5 ++++- pyproject.toml | 1 + 4 files changed, 19 insertions(+), 29 deletions(-) diff --git a/cybertools/docgen/README.txt b/cybertools/docgen/README.txt index 2888b29..a72969e 100644 --- a/cybertools/docgen/README.txt +++ b/cybertools/docgen/README.txt @@ -39,7 +39,7 @@ Working with MHT Files >>> imageRefs = document.htmlDoc.getImageRefs() >>> for path in imageRefs: ... imagePath = os.path.join(basePath, os.path.basename(path)) - ... f = open(imagePath, 'rt') + ... f = open(imagePath, 'rb') ... imageData = f.read() ... f.close() ... document.addImage(imageData, path) @@ -48,7 +48,9 @@ Working with MHT Files >>> output = document.asString() >>> len(data), len(output) - (294996, 336142) + (290577, 331234) + +was (Py2) (294996, 336142) >>> outPath = os.path.join(basePath, 'out_doc.mht') >>> #f = open(outPath, 'wt') diff --git a/cybertools/docgen/mht.py b/cybertools/docgen/mht.py index 9064e38..e0c0366 100644 --- a/cybertools/docgen/mht.py +++ b/cybertools/docgen/mht.py @@ -1,33 +1,17 @@ -# -# Copyright (c) 2012 Helmut Merz helmutm@cy55.de -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# +# cybertools.docgen.mht """ Working with MHT Files. """ import base64 -from cStringIO import StringIO +from io import BytesIO, StringIO import email from PIL import Image import mimetypes import os -from cybertools.text.lib.BeautifulSoup import BeautifulSoup, Tag +from bs4 import BeautifulSoup, Tag class MHTFile(object): @@ -72,7 +56,7 @@ class MHTFile(object): return self.htmlDoc.getImageRefs() def addImage(self, imageData, path): - image = Image.open(StringIO(imageData)) + image = Image.open(BytesIO(imageData)) width, height = image.size contentType, enc = mimetypes.guess_type(path) bp, ext = os.path.splitext(path) @@ -83,7 +67,7 @@ class MHTFile(object): vars = dict(path=self.path, docname=self.documentName, suffix=self.foldernameSuffix, imgname=name, ctype=contentType, - imgdata=base64.encodestring(imageData)) + imgdata=base64.b64encode(imageData)) content = self. imageTemplate % vars self.parts.insert(flpos, str(content)) filelistRep = (self.filelistItemTemplate % name) + self.filelistPattern @@ -94,7 +78,7 @@ class MHTFile(object): def insertBody(self): path = '-'.join((self.documentName, self.foldernameSuffix)) self.htmlDoc.updateImageRefs(self.imageMappings, path) - content = self.htmlDoc.doc.renderContents(self.encoding) + content = self.htmlDoc.doc.renderContents(self.encoding).decode(self.encoding) bodyIndex = self.indexes['body'] baseDocument = self.parts[bodyIndex] self.parts[bodyIndex] = baseDocument.replace(self.bodyMarker, @@ -111,7 +95,7 @@ class HTMLDoc(object): def __init__(self, data): self.data = data - self.doc = BeautifulSoup(data) + self.doc = BeautifulSoup(data, features='lxml') def getImageRefs(self): return [img['src'] for img in self.doc('img')] @@ -119,12 +103,12 @@ class HTMLDoc(object): def updateImageRefs(self, mappings, path=''): for img in self.doc('img'): name, width, height = mappings[img['src']] - imgdata = Tag(self.doc, 'v:imagedata') + imgdata = Tag(self.doc, name='v:imagedata') imgdata['src'] = '/'.join((path, name)) - imgdata.isSelfClosing = True + #imgdata.isSelfClosing = True img.append(imgdata) del img['src'] img['style'] = 'width:%spt;height:%spt' % (width, height) - img.isSelfClosing = False + #img.isSelfClosing = False img.name='v:shape' diff --git a/cybertools/docgen/tests.py b/cybertools/docgen/tests.py index e74753b..16f621b 100755 --- a/cybertools/docgen/tests.py +++ b/cybertools/docgen/tests.py @@ -1,16 +1,19 @@ +# cybertools.util.docgen.tests + """ unit tests, doc tests """ import unittest, doctest +import warnings from zope.interface.verify import verifyClass -from zope.interface import implements class Test(unittest.TestCase): "Basic tests for the docgen package." def testInterfaces(self): + warnings.filterwarnings('ignore', category=DeprecationWarning) pass diff --git a/pyproject.toml b/pyproject.toml index 2ddbefa..f331949 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "BTrees", "lxml", "persistent", + "pillow", "zope.app.container", "zope.app.rotterdam", "zope.app.testing",