diff --git a/util/iterate.py b/util/iterate.py new file mode 100644 index 0000000..6df52a6 --- /dev/null +++ b/util/iterate.py @@ -0,0 +1,55 @@ +# +# Copyright (c) 2012 Helmut Merz helmutm@cy55.de +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Iterator and generator utilities. +""" + +from itertools import islice + + +class BatchIterator(object): + + def __init__(self, data, limit=20, start=0): + self.data = iter(data) + self.limit = limit + self.start = start + self.count = 0 + self.batch = 0 + self.exhausted = False + + def __iter__(self): + return self + + def next(self): + if self.count >= (self.batch + 1) * self.limit: + raise StopIteration + if self.start: + for i in islice(self.data, 0, self.start*self.limit): + pass + self.start = 0 + self.count += 1 + try: + return self.data.next() + except StopIteration: + self.exhausted = True + raise + + def advance(self, batches=1): + self.batch += batches + return not self.exhausted diff --git a/util/iterate.txt b/util/iterate.txt new file mode 100644 index 0000000..4acacb8 --- /dev/null +++ b/util/iterate.txt @@ -0,0 +1,54 @@ +================================ +Iterator and Generator Utilities +================================ + + +Batch Iterator +============== + +A batch iterator only provides a limited number of items in one +series of access steps. + + >>> from cybertools.util.iterate import BatchIterator + +We create a BatchIterator upon a base iterator. The BatchIterator +only gives us a limited portion of the values provided by the base +iterator. + + >>> it = BatchIterator(xrange(30)) + >>> list(it) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + >>> list(it) + [] + +Now we advance to the next batch. The return value tells us that the +base iterator is not exhausted yet. + + >>> it.advance() + True + >>> list(it) + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + +Advancing would not help if the base iterator is exhausted. + + >>> it.advance() + False + >>> list(it) + [] + +We can also immediately start at the second batch by providing the ``start`` +argument to the BatchIterator constructor. + + >>> it = BatchIterator(xrange(30), start=1) + >>> list(it) + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + +We can use another limit (i.e. the batch size) via the BatchIterator constructor. + + >>> it = BatchIterator(xrange(30), start=1, limit=8) + >>> list(it) + [8, 9, 10, 11, 12, 13, 14, 15] + >>> it.advance() + True + >>> list(it) + [16, 17, 18, 19, 20, 21, 22, 23] diff --git a/util/tests.py b/util/tests.py index a5ef08e..dcf2605 100755 --- a/util/tests.py +++ b/util/tests.py @@ -25,6 +25,7 @@ def test_suite(): doctest.DocFileSuite('defer.txt', optionflags=flags), doctest.DocFileSuite('format.txt', optionflags=flags), doctest.DocFileSuite('html.txt', optionflags=flags), + doctest.DocFileSuite('iterate.txt', optionflags=flags), doctest.DocFileSuite('multikey.txt', optionflags=flags), doctest.DocFileSuite('property.txt', optionflags=flags), doctest.DocFileSuite('json.txt', optionflags=flags),