52 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			52 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| ==================
 | |
| Tweaking HTML text
 | |
| ==================
 | |
| 
 | |
|   >>> from cybertools.util.html import sanitize, stripComments
 | |
| 
 | |
|   >>> input = """<html>
 | |
|   ... <p class="standard" style="font-size: 200%; font-weight: bold">
 | |
|   ...   <a href="blubb"><b>Text</b>, and more</a>
 | |
|   ... </p>
 | |
|   ... </html>"""
 | |
| 
 | |
| Sanitize HTML
 | |
| -------------
 | |
| 
 | |
|   >>> sanitize(input, validAttrs=['style'])
 | |
|   u'\n<p style="font-weight: bold">\n<a><b>Text</b>, and more</a>\n</p>\n'
 | |
| 
 | |
|   >>> sanitize(input, ['p', 'b'], ['class'])
 | |
|   u'\n<p class="standard">\n<b>Text</b>, and more\n</p>\n'
 | |
| 
 | |
| All comments are stripped from the HTML input.
 | |
| 
 | |
|   >>> input2 = """<html>
 | |
|   ... <p>text</p>
 | |
|   ... <!-- comment -->
 | |
|   ... <p>text</p>"""
 | |
| 
 | |
|   >>> sanitize(input2)
 | |
|   u'\n<p>text</p>\n\n<p>text</p>'
 | |
| 
 | |
| It's also possible to remove only the comments from the HTML input.
 | |
| 
 | |
|   >>> stripComments(input2)
 | |
|   u'<html>\n<p>text</p>\n\n<p>text</p></html>'
 | |
| 
 | |
| It is also possible to strip all HTML tags from the input string.
 | |
| 
 | |
|   >>> from cybertools.util.html import stripAll
 | |
|   >>> stripAll(input)
 | |
|   u'Text, and more'
 | |
| 
 | |
| Extract first part of an HTML text
 | |
| ----------------------------------
 | |
| 
 | |
|   >>> from cybertools.util.html import extractFirstPart
 | |
| 
 | |
|   >>> extractFirstPart(input)
 | |
|   u'<p>\n<a href="blubb"><b>Text</b>, and more</a>\n</p>'
 | |
| 
 | |
|   >>> extractFirstPart(input2)
 | |
|   u'<p>text</p>'
 |