From c8b3e250d718ee650701d7b6d36eb8bd7ad1a0f6 Mon Sep 17 00:00:00 2001 From: helmutm Date: Sun, 24 Jun 2007 08:37:55 +0000 Subject: [PATCH] provide a base class for crawling jobs git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1804 fd906abe-77d9-0310-91a1-e0d9ade77398 --- agent/README.txt | 13 +++++++------ agent/crawl/base.py | 41 +++++++++++++++++++++++++++++++++++++++++ agent/interfaces.py | 9 +++++++++ agent/testing/crawl.py | 17 ++++++----------- 4 files changed, 63 insertions(+), 17 deletions(-) create mode 100644 agent/crawl/base.py diff --git a/agent/README.txt b/agent/README.txt index 77d4431..b6dfa6a 100644 --- a/agent/README.txt +++ b/agent/README.txt @@ -38,27 +38,28 @@ Configuration (per job) - schedule, repeating pattern, conditions - following job(s), e.g. to start a transfer immediately after a crawl - >>> scheduler = agent.scheduler +How does this work? +------------------- - >>> from time import time >>> from loops.agent.schedule import Job - >>> class TestJob(Job): ... def execute(self, **kw): ... d = super(TestJob, self).execute(**kw) ... print 'executing' ... return d + >>> from time import time + >>> scheduler = agent.scheduler >>> scheduler.schedule(TestJob(), int(time())) >>> tester.iterate() executing We can set up a more realistic example using the dummy crawler and transporter -classes from testing. +classes from the testing package. - >>> from testing.crawl import CrawlingJob - >>> from testing.transport import Transporter, TransportJob + >>> from loops.agent.testing.crawl import CrawlingJob + >>> from loops.agent.testing.transport import Transporter, TransportJob >>> crawl = CrawlingJob() >>> transporter = Transporter() diff --git a/agent/crawl/base.py b/agent/crawl/base.py new file mode 100644 index 0000000..1f1ec0b --- /dev/null +++ b/agent/crawl/base.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2007 Helmut Merz helmutm@cy55.de +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Filesystem crawler. + +$Id$ +""" + +from zope.interface import implements + +from loops.agent.interfaces import ICrawlingJob +from loops.agent.schedule import Job + + +class CrawlingJob(Job): + + implements(ICrawlingJob) + + def __init__(self): + self.predefinedMetadata = {} + super(CrawlingJob, self).__init__() + + def execute(self, **kw): + return self.collect(**kw) + diff --git a/agent/interfaces.py b/agent/interfaces.py index e10c462..3f14d0c 100644 --- a/agent/interfaces.py +++ b/agent/interfaces.py @@ -104,6 +104,15 @@ class ICrawlingJob(IScheduledJob): """ +class IResource(Interface): + """ Represents a data object that is collected by a crawler and + will be transferred to the server. + """ + + data = Attribute("A string, file, or similar representation of the " + "resource's content") + + class IMetadataSet(Interface): """ Metadata associated with a resource. """ diff --git a/agent/testing/crawl.py b/agent/testing/crawl.py index a71bcc3..0ffead7 100644 --- a/agent/testing/crawl.py +++ b/agent/testing/crawl.py @@ -26,23 +26,16 @@ from twisted.internet import reactor from twisted.internet.defer import Deferred from zope.interface import implements -from loops.agent.interfaces import ICrawlingJob, IMetadataSet +from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet from loops.agent.schedule import Job +from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob -class CrawlingJob(Job): - - implements(ICrawlingJob) - - def __init__(self): - self.predefinedMetadata = {} - super(CrawlingJob, self).__init__() - - def execute(self, **kw): - return self.collect(**kw) +class CrawlingJob(BaseCrawlingJob): def collect(self, **criteria): deferred = self.deferred = Deferred() + # replace this with the real stuff: reactor.callLater(0, self.dataAvailable) return deferred @@ -57,4 +50,6 @@ class Metadata(object): class DummyResource(object): + implements(IResource) + data = 'Dummy resource data for testing purposes.'