provide a base class for crawling jobs

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1804 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-06-24 08:37:55 +00:00
parent e05e3bd6e4
commit c8b3e250d7
4 changed files with 63 additions and 17 deletions

View file

@ -38,27 +38,28 @@ Configuration (per job)
- schedule, repeating pattern, conditions - schedule, repeating pattern, conditions
- following job(s), e.g. to start a transfer immediately after a crawl - following job(s), e.g. to start a transfer immediately after a crawl
>>> scheduler = agent.scheduler How does this work?
-------------------
>>> from time import time
>>> from loops.agent.schedule import Job >>> from loops.agent.schedule import Job
>>> class TestJob(Job): >>> class TestJob(Job):
... def execute(self, **kw): ... def execute(self, **kw):
... d = super(TestJob, self).execute(**kw) ... d = super(TestJob, self).execute(**kw)
... print 'executing' ... print 'executing'
... return d ... return d
>>> from time import time
>>> scheduler = agent.scheduler
>>> scheduler.schedule(TestJob(), int(time())) >>> scheduler.schedule(TestJob(), int(time()))
>>> tester.iterate() >>> tester.iterate()
executing executing
We can set up a more realistic example using the dummy crawler and transporter We can set up a more realistic example using the dummy crawler and transporter
classes from testing. classes from the testing package.
>>> from testing.crawl import CrawlingJob >>> from loops.agent.testing.crawl import CrawlingJob
>>> from testing.transport import Transporter, TransportJob >>> from loops.agent.testing.transport import Transporter, TransportJob
>>> crawl = CrawlingJob() >>> crawl = CrawlingJob()
>>> transporter = Transporter() >>> transporter = Transporter()

41
agent/crawl/base.py Normal file
View file

@ -0,0 +1,41 @@
#
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
Filesystem crawler.
$Id$
"""
from zope.interface import implements
from loops.agent.interfaces import ICrawlingJob
from loops.agent.schedule import Job
class CrawlingJob(Job):
implements(ICrawlingJob)
def __init__(self):
self.predefinedMetadata = {}
super(CrawlingJob, self).__init__()
def execute(self, **kw):
return self.collect(**kw)

View file

@ -104,6 +104,15 @@ class ICrawlingJob(IScheduledJob):
""" """
class IResource(Interface):
""" Represents a data object that is collected by a crawler and
will be transferred to the server.
"""
data = Attribute("A string, file, or similar representation of the "
"resource's content")
class IMetadataSet(Interface): class IMetadataSet(Interface):
""" Metadata associated with a resource. """ Metadata associated with a resource.
""" """

View file

@ -26,23 +26,16 @@ from twisted.internet import reactor
from twisted.internet.defer import Deferred from twisted.internet.defer import Deferred
from zope.interface import implements from zope.interface import implements
from loops.agent.interfaces import ICrawlingJob, IMetadataSet from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
from loops.agent.schedule import Job from loops.agent.schedule import Job
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
class CrawlingJob(Job): class CrawlingJob(BaseCrawlingJob):
implements(ICrawlingJob)
def __init__(self):
self.predefinedMetadata = {}
super(CrawlingJob, self).__init__()
def execute(self, **kw):
return self.collect(**kw)
def collect(self, **criteria): def collect(self, **criteria):
deferred = self.deferred = Deferred() deferred = self.deferred = Deferred()
# replace this with the real stuff:
reactor.callLater(0, self.dataAvailable) reactor.callLater(0, self.dataAvailable)
return deferred return deferred
@ -57,4 +50,6 @@ class Metadata(object):
class DummyResource(object): class DummyResource(object):
implements(IResource)
data = 'Dummy resource data for testing purposes.' data = 'Dummy resource data for testing purposes.'