provide a base class for crawling jobs
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1804 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
e05e3bd6e4
commit
c8b3e250d7
4 changed files with 63 additions and 17 deletions
|
@ -38,27 +38,28 @@ Configuration (per job)
|
|||
- schedule, repeating pattern, conditions
|
||||
- following job(s), e.g. to start a transfer immediately after a crawl
|
||||
|
||||
>>> scheduler = agent.scheduler
|
||||
How does this work?
|
||||
-------------------
|
||||
|
||||
>>> from time import time
|
||||
>>> from loops.agent.schedule import Job
|
||||
|
||||
>>> class TestJob(Job):
|
||||
... def execute(self, **kw):
|
||||
... d = super(TestJob, self).execute(**kw)
|
||||
... print 'executing'
|
||||
... return d
|
||||
|
||||
>>> from time import time
|
||||
>>> scheduler = agent.scheduler
|
||||
>>> scheduler.schedule(TestJob(), int(time()))
|
||||
|
||||
>>> tester.iterate()
|
||||
executing
|
||||
|
||||
We can set up a more realistic example using the dummy crawler and transporter
|
||||
classes from testing.
|
||||
classes from the testing package.
|
||||
|
||||
>>> from testing.crawl import CrawlingJob
|
||||
>>> from testing.transport import Transporter, TransportJob
|
||||
>>> from loops.agent.testing.crawl import CrawlingJob
|
||||
>>> from loops.agent.testing.transport import Transporter, TransportJob
|
||||
|
||||
>>> crawl = CrawlingJob()
|
||||
>>> transporter = Transporter()
|
||||
|
|
41
agent/crawl/base.py
Normal file
41
agent/crawl/base.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
#
|
||||
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
"""
|
||||
Filesystem crawler.
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
from zope.interface import implements
|
||||
|
||||
from loops.agent.interfaces import ICrawlingJob
|
||||
from loops.agent.schedule import Job
|
||||
|
||||
|
||||
class CrawlingJob(Job):
|
||||
|
||||
implements(ICrawlingJob)
|
||||
|
||||
def __init__(self):
|
||||
self.predefinedMetadata = {}
|
||||
super(CrawlingJob, self).__init__()
|
||||
|
||||
def execute(self, **kw):
|
||||
return self.collect(**kw)
|
||||
|
|
@ -104,6 +104,15 @@ class ICrawlingJob(IScheduledJob):
|
|||
"""
|
||||
|
||||
|
||||
class IResource(Interface):
|
||||
""" Represents a data object that is collected by a crawler and
|
||||
will be transferred to the server.
|
||||
"""
|
||||
|
||||
data = Attribute("A string, file, or similar representation of the "
|
||||
"resource's content")
|
||||
|
||||
|
||||
class IMetadataSet(Interface):
|
||||
""" Metadata associated with a resource.
|
||||
"""
|
||||
|
|
|
@ -26,23 +26,16 @@ from twisted.internet import reactor
|
|||
from twisted.internet.defer import Deferred
|
||||
from zope.interface import implements
|
||||
|
||||
from loops.agent.interfaces import ICrawlingJob, IMetadataSet
|
||||
from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
|
||||
from loops.agent.schedule import Job
|
||||
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
|
||||
|
||||
|
||||
class CrawlingJob(Job):
|
||||
|
||||
implements(ICrawlingJob)
|
||||
|
||||
def __init__(self):
|
||||
self.predefinedMetadata = {}
|
||||
super(CrawlingJob, self).__init__()
|
||||
|
||||
def execute(self, **kw):
|
||||
return self.collect(**kw)
|
||||
class CrawlingJob(BaseCrawlingJob):
|
||||
|
||||
def collect(self, **criteria):
|
||||
deferred = self.deferred = Deferred()
|
||||
# replace this with the real stuff:
|
||||
reactor.callLater(0, self.dataAvailable)
|
||||
return deferred
|
||||
|
||||
|
@ -57,4 +50,6 @@ class Metadata(object):
|
|||
|
||||
class DummyResource(object):
|
||||
|
||||
implements(IResource)
|
||||
|
||||
data = 'Dummy resource data for testing purposes.'
|
||||
|
|
Loading…
Add table
Reference in a new issue