From 0e060bc34e87b397b4f7b1fb275f15867fe506d2 Mon Sep 17 00:00:00 2001 From: Helmut Merz Date: Thu, 10 May 2012 10:19:12 +0200 Subject: [PATCH] sanitize HTML before stripping tags in order to avoid empty results with certain Word formattings --- util/html.py | 1 + 1 file changed, 1 insertion(+) diff --git a/util/html.py b/util/html.py index 51b15df..69aade7 100644 --- a/util/html.py +++ b/util/html.py @@ -79,6 +79,7 @@ def checkStyle(k): def stripAll(value): + value = sanitize(value) def collectText(tags): for tag in tags: if type(tag) is NavigableString: