provide a base class for crawling jobs
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1804 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
e05e3bd6e4
commit
c8b3e250d7
4 changed files with 63 additions and 17 deletions
|
@ -38,27 +38,28 @@ Configuration (per job)
|
||||||
- schedule, repeating pattern, conditions
|
- schedule, repeating pattern, conditions
|
||||||
- following job(s), e.g. to start a transfer immediately after a crawl
|
- following job(s), e.g. to start a transfer immediately after a crawl
|
||||||
|
|
||||||
>>> scheduler = agent.scheduler
|
How does this work?
|
||||||
|
-------------------
|
||||||
|
|
||||||
>>> from time import time
|
|
||||||
>>> from loops.agent.schedule import Job
|
>>> from loops.agent.schedule import Job
|
||||||
|
|
||||||
>>> class TestJob(Job):
|
>>> class TestJob(Job):
|
||||||
... def execute(self, **kw):
|
... def execute(self, **kw):
|
||||||
... d = super(TestJob, self).execute(**kw)
|
... d = super(TestJob, self).execute(**kw)
|
||||||
... print 'executing'
|
... print 'executing'
|
||||||
... return d
|
... return d
|
||||||
|
|
||||||
|
>>> from time import time
|
||||||
|
>>> scheduler = agent.scheduler
|
||||||
>>> scheduler.schedule(TestJob(), int(time()))
|
>>> scheduler.schedule(TestJob(), int(time()))
|
||||||
|
|
||||||
>>> tester.iterate()
|
>>> tester.iterate()
|
||||||
executing
|
executing
|
||||||
|
|
||||||
We can set up a more realistic example using the dummy crawler and transporter
|
We can set up a more realistic example using the dummy crawler and transporter
|
||||||
classes from testing.
|
classes from the testing package.
|
||||||
|
|
||||||
>>> from testing.crawl import CrawlingJob
|
>>> from loops.agent.testing.crawl import CrawlingJob
|
||||||
>>> from testing.transport import Transporter, TransportJob
|
>>> from loops.agent.testing.transport import Transporter, TransportJob
|
||||||
|
|
||||||
>>> crawl = CrawlingJob()
|
>>> crawl = CrawlingJob()
|
||||||
>>> transporter = Transporter()
|
>>> transporter = Transporter()
|
||||||
|
|
41
agent/crawl/base.py
Normal file
41
agent/crawl/base.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
#
|
||||||
|
|
||||||
|
"""
|
||||||
|
Filesystem crawler.
|
||||||
|
|
||||||
|
$Id$
|
||||||
|
"""
|
||||||
|
|
||||||
|
from zope.interface import implements
|
||||||
|
|
||||||
|
from loops.agent.interfaces import ICrawlingJob
|
||||||
|
from loops.agent.schedule import Job
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlingJob(Job):
|
||||||
|
|
||||||
|
implements(ICrawlingJob)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.predefinedMetadata = {}
|
||||||
|
super(CrawlingJob, self).__init__()
|
||||||
|
|
||||||
|
def execute(self, **kw):
|
||||||
|
return self.collect(**kw)
|
||||||
|
|
|
@ -104,6 +104,15 @@ class ICrawlingJob(IScheduledJob):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class IResource(Interface):
|
||||||
|
""" Represents a data object that is collected by a crawler and
|
||||||
|
will be transferred to the server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = Attribute("A string, file, or similar representation of the "
|
||||||
|
"resource's content")
|
||||||
|
|
||||||
|
|
||||||
class IMetadataSet(Interface):
|
class IMetadataSet(Interface):
|
||||||
""" Metadata associated with a resource.
|
""" Metadata associated with a resource.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -26,23 +26,16 @@ from twisted.internet import reactor
|
||||||
from twisted.internet.defer import Deferred
|
from twisted.internet.defer import Deferred
|
||||||
from zope.interface import implements
|
from zope.interface import implements
|
||||||
|
|
||||||
from loops.agent.interfaces import ICrawlingJob, IMetadataSet
|
from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
|
||||||
from loops.agent.schedule import Job
|
from loops.agent.schedule import Job
|
||||||
|
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
|
||||||
|
|
||||||
|
|
||||||
class CrawlingJob(Job):
|
class CrawlingJob(BaseCrawlingJob):
|
||||||
|
|
||||||
implements(ICrawlingJob)
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.predefinedMetadata = {}
|
|
||||||
super(CrawlingJob, self).__init__()
|
|
||||||
|
|
||||||
def execute(self, **kw):
|
|
||||||
return self.collect(**kw)
|
|
||||||
|
|
||||||
def collect(self, **criteria):
|
def collect(self, **criteria):
|
||||||
deferred = self.deferred = Deferred()
|
deferred = self.deferred = Deferred()
|
||||||
|
# replace this with the real stuff:
|
||||||
reactor.callLater(0, self.dataAvailable)
|
reactor.callLater(0, self.dataAvailable)
|
||||||
return deferred
|
return deferred
|
||||||
|
|
||||||
|
@ -57,4 +50,6 @@ class Metadata(object):
|
||||||
|
|
||||||
class DummyResource(object):
|
class DummyResource(object):
|
||||||
|
|
||||||
|
implements(IResource)
|
||||||
|
|
||||||
data = 'Dummy resource data for testing purposes.'
|
data = 'Dummy resource data for testing purposes.'
|
||||||
|
|
Loading…
Add table
Reference in a new issue