From 191484c2e5037554a49aac605d345d8ce1f25534 Mon Sep 17 00:00:00 2001
From: Helmut Merz <helmutm@cy55.de>
Date: Sun, 31 Mar 2013 11:04:55 +0200
Subject: [PATCH 1/2] remove empty line; fix doctest

---
 util/format.py  | 1 -
 util/format.txt | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/util/format.py b/util/format.py
index 2a0f911..1299778 100644
--- a/util/format.py
+++ b/util/format.py
@@ -56,7 +56,6 @@ def formatNumber(num, type='decimal', lang='de', pattern=u'#,##0.00;-#,##0.00'):
     return fmt.format(num, pattern=pattern)
 
 
-
 def toStr(value, encoding='UTF-8'):
     if isinstance(value, unicode):
         return value.encode(encoding)
diff --git a/util/format.txt b/util/format.txt
index 32c0f9b..b96450a 100644
--- a/util/format.txt
+++ b/util/format.txt
@@ -14,4 +14,4 @@ Basic Formatting Functions
   u'21.08.2006 17:37:13'
 
   >>> format.formatNumber(17.2)
-  u'17,2'
+  u'17,20'

From 0bb012a9c6513b49d225cca5c9be1f2a830e2d84 Mon Sep 17 00:00:00 2001
From: Helmut Merz <helmutm@cy55.de>
Date: Mon, 1 Apr 2013 10:38:17 +0200
Subject: [PATCH 2/2] add utility function for extracting first part of a text

---
 util/html.py  | 12 ++++++++++++
 util/html.txt | 14 ++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/util/html.py b/util/html.py
index 56abd5a..25cf8f2 100644
--- a/util/html.py
+++ b/util/html.py
@@ -36,6 +36,7 @@ validStyleParts = 'border padding'.split()
 
 escCommPattern = re.compile(r'&lt;\!--\[if .*?\!\[endif\]--&gt;', re.DOTALL)
 
+sentencePattern = re.compile(r'[:.\?\!]')
 
 def sanitize(value, validTags=validTags, validAttrs=validAttrs,
                     validStyles=validStyles, stripEscapedComments=True):
@@ -104,3 +105,14 @@ def stripAll(value):
     text = u''.join(data).replace(u'\n', u'').replace(u'&nbsp;', u' ')
     return text
 
+
+def extractFirstPart(value):
+    soup = BeautifulSoup(value)
+    for tag in soup.findAll(True):
+        if tag.name in ('p',):
+            part = tag.renderContents()
+            break
+    else:
+        text = stripAll(value)
+        part = sentencePattern.split(text)[0]
+    return ('<p>%s</p>' % part).decode('utf8')
diff --git a/util/html.txt b/util/html.txt
index b94cd9f..fd1980c 100644
--- a/util/html.txt
+++ b/util/html.txt
@@ -10,6 +10,9 @@ Tweaking HTML text
   ... </p>
   ... </html>"""
 
+Sanitize HTML
+-------------
+
   >>> sanitize(input, validAttrs=['style'])
   u'\n<p style="font-weight: bold">\n<a><b>Text</b>, and more</a>\n</p>\n'
 
@@ -36,3 +39,14 @@ It is also possible to strip all HTML tags from the input string.
   >>> from cybertools.util.html import stripAll
   >>> stripAll(input)
   u'Text, and more'
+
+Extract first part of an HTML text
+----------------------------------
+
+  >>> from cybertools.util.html import extractFirstPart
+
+  >>> extractFirstPart(input)
+  u'<p>\n<a href="blubb"><b>Text</b>, and more</a>\n</p>'
+
+  >>> extractFirstPart(input2)
+  u'<p>text</p>'