diff --git a/util/html.py b/util/html.py
index f3dba46..56abd5a 100644
--- a/util/html.py
+++ b/util/html.py
@@ -20,6 +20,8 @@
Strip HTML tags and other HTML-related utilities.
"""
+import re
+
from cybertools.text.lib.BeautifulSoup import BeautifulSoup, Comment
from cybertools.text.lib.BeautifulSoup import Declaration, NavigableString
@@ -32,9 +34,11 @@ validAttrs = ('align alt border cellpadding cellspacing class colspan '
validStyles = 'font-style font-weight'.split()
validStyleParts = 'border padding'.split()
+escCommPattern = re.compile(r'<\!--\[if .*?\!\[endif\]-->', re.DOTALL)
+
def sanitize(value, validTags=validTags, validAttrs=validAttrs,
- validStyles=validStyles):
+ validStyles=validStyles, stripEscapedComments=True):
soup = BeautifulSoup(value)
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
comment.extract()
@@ -51,7 +55,10 @@ def sanitize(value, validTags=validTags, validAttrs=validAttrs,
if val:
attrs.append((attr, val))
tag.attrs = attrs
- return soup.renderContents().decode('utf8')
+ result = soup.renderContents()
+ if stripEscapedComments:
+ result = escCommPattern.sub(u'', result)
+ return result.decode('utf8')
def sanitizeStyle(value, validStyles=validStyles):