provide stripping of HTML comments
This commit is contained in:
parent
da946ff560
commit
ea0999a5c0
2 changed files with 14 additions and 4 deletions
|
@ -1,5 +1,5 @@
|
||||||
#
|
#
|
||||||
# Copyright (c) 2012 Helmut Merz helmutm@cy55.de
|
# Copyright (c) 2013 Helmut Merz helmutm@cy55.de
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
@ -76,6 +76,13 @@ def checkStyle(k):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def stripComments(value):
|
||||||
|
soup = BeautifulSoup(value)
|
||||||
|
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
|
||||||
|
comment.extract()
|
||||||
|
return soup.renderContents().decode('utf8')
|
||||||
|
|
||||||
|
|
||||||
def stripAll(value):
|
def stripAll(value):
|
||||||
value = sanitize(value)
|
value = sanitize(value)
|
||||||
def collectText(tags):
|
def collectText(tags):
|
||||||
|
|
|
@ -2,9 +2,7 @@
|
||||||
Tweaking HTML text
|
Tweaking HTML text
|
||||||
==================
|
==================
|
||||||
|
|
||||||
$Id$
|
>>> from cybertools.util.html import sanitize, stripComments
|
||||||
|
|
||||||
>>> from cybertools.util.html import sanitize
|
|
||||||
|
|
||||||
>>> input = """<html>
|
>>> input = """<html>
|
||||||
... <p class="standard" style="font-size: 200%; font-weight: bold">
|
... <p class="standard" style="font-size: 200%; font-weight: bold">
|
||||||
|
@ -28,6 +26,11 @@ All comments are stripped from the HTML input.
|
||||||
>>> sanitize(input2)
|
>>> sanitize(input2)
|
||||||
u'\n<p>text</p>\n\n<p>text</p>'
|
u'\n<p>text</p>\n\n<p>text</p>'
|
||||||
|
|
||||||
|
It's also possible to remove only the comments from the HTML input.
|
||||||
|
|
||||||
|
>>> stripComments(input2)
|
||||||
|
u'<html>\n<p>text</p>\n\n<p>text</p></html>'
|
||||||
|
|
||||||
It is also possible to strip all HTML tags from the input string.
|
It is also possible to strip all HTML tags from the input string.
|
||||||
|
|
||||||
>>> from cybertools.util.html import stripAll
|
>>> from cybertools.util.html import stripAll
|
||||||
|
|
Loading…
Add table
Reference in a new issue