52 lines
1.3 KiB
Text
52 lines
1.3 KiB
Text
==================
|
|
Tweaking HTML text
|
|
==================
|
|
|
|
>>> from cybertools.util.html import sanitize, stripComments
|
|
|
|
>>> input = """<html>
|
|
... <p class="standard" style="font-size: 200%; font-weight: bold">
|
|
... <a href="blubb"><b>Text</b>, and more</a>
|
|
... </p>
|
|
... </html>"""
|
|
|
|
Sanitize HTML
|
|
-------------
|
|
|
|
>>> sanitize(input, validAttrs=['style'])
|
|
u'\n<p style="font-weight: bold">\n<a><b>Text</b>, and more</a>\n</p>\n'
|
|
|
|
>>> sanitize(input, ['p', 'b'], ['class'])
|
|
u'\n<p class="standard">\n<b>Text</b>, and more\n</p>\n'
|
|
|
|
All comments are stripped from the HTML input.
|
|
|
|
>>> input2 = """<html>
|
|
... <p>text</p>
|
|
... <!-- comment -->
|
|
... <p>text</p>"""
|
|
|
|
>>> sanitize(input2)
|
|
u'\n<p>text</p>\n\n<p>text</p>'
|
|
|
|
It's also possible to remove only the comments from the HTML input.
|
|
|
|
>>> stripComments(input2)
|
|
u'<html>\n<p>text</p>\n\n<p>text</p></html>'
|
|
|
|
It is also possible to strip all HTML tags from the input string.
|
|
|
|
>>> from cybertools.util.html import stripAll
|
|
>>> stripAll(input)
|
|
u'Text, and more'
|
|
|
|
Extract first part of an HTML text
|
|
----------------------------------
|
|
|
|
>>> from cybertools.util.html import extractFirstPart
|
|
|
|
>>> extractFirstPart(input)
|
|
u'<p>\n<a href="blubb"><b>Text</b>, and more</a>\n</p>'
|
|
|
|
>>> extractFirstPart(input2)
|
|
u'<p>text</p>'
|