provide stripping of HTML comments

This commit is contained in:
Helmut Merz 2013-01-18 16:36:04 +01:00
parent da946ff560
commit ea0999a5c0
2 changed files with 14 additions and 4 deletions

View file

@ -1,5 +1,5 @@
#
# Copyright (c) 2012 Helmut Merz helmutm@cy55.de
# Copyright (c) 2013 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -76,6 +76,13 @@ def checkStyle(k):
return False
def stripComments(value):
soup = BeautifulSoup(value)
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
comment.extract()
return soup.renderContents().decode('utf8')
def stripAll(value):
value = sanitize(value)
def collectText(tags):

View file

@ -2,9 +2,7 @@
Tweaking HTML text
==================
$Id$
>>> from cybertools.util.html import sanitize
>>> from cybertools.util.html import sanitize, stripComments
>>> input = """<html>
... <p class="standard" style="font-size: 200%; font-weight: bold">
@ -28,6 +26,11 @@ All comments are stripped from the HTML input.
>>> sanitize(input2)
u'\n<p>text</p>\n\n<p>text</p>'
It's also possible to remove only the comments from the HTML input.
>>> stripComments(input2)
u'<html>\n<p>text</p>\n\n<p>text</p></html>'
It is also possible to strip all HTML tags from the input string.
>>> from cybertools.util.html import stripAll