set up dummy crawler and transport for testing; implement crawl-transfer sequence via job scheduler

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1800 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-06-22 15:15:25 +00:00
parent 9d89ab30ec
commit 621efb6081
7 changed files with 205 additions and 27 deletions

View file

@ -10,6 +10,12 @@ collecting informations and transferring them to the loops server.
This package does not depend on zope or the other loops packages This package does not depend on zope or the other loops packages
but represents a standalone application. but represents a standalone application.
But we need a reactor for working with Twisted; in order not to block
testing when running the reactor we use reactor.iterate() calls
wrapped in a ``tester`` object.
>>> from loops.agent.tests import tester
Basic Implementation, Agent Core Basic Implementation, Agent Core
================================ ================================
@ -24,23 +30,6 @@ most cases also an errback method).
>>> agent = Agent() >>> agent = Agent()
Browser-based User Interface
============================
The user interface is provided via a browser-based application
based on Twisted and Nevow.
Configuration Management
========================
Functionality
- Storage of configuration parameters
- Interface to the browser-based user interface that allows the
editing of configuration parameters
Scheduling Scheduling
========== ==========
@ -60,11 +49,9 @@ Configuration (per job)
... print 'executing' ... print 'executing'
... return d ... return d
>>> scheduler.schedule(TestJob(), int(time())+1) >>> scheduler.schedule(TestJob(), int(time()))
>>> from twisted.internet import reactor >>> tester.iterate()
>>> ignore = reactor.callLater(2, reactor.stop)
>>> reactor.run()
executing executing
@ -85,6 +72,21 @@ Configuration (per crawl job)
- predefined metadata - predefined metadata
The Dummy Crawler
-----------------
>>> from testing.crawl import CrawlingJob
>>> from testing.transport import Transporter, TransportJob
>>> crawl = CrawlingJob()
>>> transporter = Transporter()
>>> transport = TransportJob(transporter)
>>> crawl.successors.append(transport)
>>> scheduler.schedule(crawl, int(time()))
>>> tester.iterate()
Transferring: Dummy resource data for testing purposes.
Local File System Local File System
----------------- -----------------
@ -181,3 +183,20 @@ Configuration (per install/update job)
- command: install, update, remove - command: install, update, remove
- package names - package names
Configuration Management
========================
Functionality
- Storage of configuration parameters
- Interface to the browser-based user interface that allows the
editing of configuration parameters
Browser-based User Interface
============================
The user interface is provided via a browser-based application
based on Twisted and Nevow.

View file

@ -43,7 +43,7 @@ class IScheduler(Interface):
def schedule(job, startTime): def schedule(job, startTime):
""" Register the job given for execution at the intended start """ Register the job given for execution at the intended start
date/time. date/time and return the job.
""" """
def getJobsToExecute(startTime=None): def getJobsToExecute(startTime=None):
@ -95,8 +95,12 @@ class ICrawlingJob(IScheduledJob):
'for all resources found.') 'for all resources found.')
def collect(**criteria): def collect(**criteria):
""" Return a collection of resource/metadata pairs that should be transferred """ Return a deferred that upon callback will provide a
to the server using the selection criteria given. collection of resource/metadata pairs that should be transferred
to the server.
Use the selection criteria given to filter the resources that
should be collected.
""" """
@ -140,6 +144,13 @@ class ITransporter(Interface):
""" """
class ITransportJob(IScheduledJob):
""" A job managing the the transfer of a resource to the server.
"""
transporter = Attribute('The transporter object to user for transer.')
class IConfigurator(Interface): class IConfigurator(Interface):
""" Manages (stores and receives) configuration information. """ Manages (stores and receives) configuration information.
""" """

View file

@ -72,7 +72,8 @@ class Job(object):
def finishRun(self, result): def finishRun(self, result):
for job in self.successors: for job in self.successors:
job.run(job, **job.params) job.params['result'] = result
job.run(**job.params)
# TODO: remove from queue # TODO: remove from queue
# TODO: logging # TODO: logging
# TODO: reschedule if told by configuration # TODO: reschedule if told by configuration

View file

@ -0,0 +1,4 @@
"""
$Id$
"""

60
agent/testing/crawl.py Normal file
View file

@ -0,0 +1,60 @@
#
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
A dummy crawler for testing purposes.
$Id$
"""
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from zope.interface import implements
from loops.agent.interfaces import ICrawlingJob, IMetadataSet
from loops.agent.schedule import Job
class CrawlingJob(Job):
implements(ICrawlingJob)
def __init__(self):
self.predefinedMetadata = {}
super(CrawlingJob, self).__init__()
def execute(self, **kw):
return self.collect(**kw)
def collect(self, **criteria):
deferred = self.deferred = Deferred()
reactor.callLater(0, self.dataAvailable)
return deferred
def dataAvailable(self):
self.deferred.callback([(DummyResource(), Metadata())])
class Metadata(object):
implements(IMetadataSet)
class DummyResource(object):
data = 'Dummy resource data for testing purposes.'

View file

@ -0,0 +1,68 @@
#
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
A dummy transport for testing purposes.
$Id$
"""
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from zope.interface import implements
from loops.agent.interfaces import ITransportJob, ITransporter
from loops.agent.schedule import Job
class Transporter(object):
implements(ITransporter)
serverURL = None
method = None
machineName = None
userName = None
password = None
def transfer(self, resource, metadata=None, resourceType=file):
if resourceType is file:
data = resource.read()
resource.close()
elif resourceType is str:
data = resource
print 'Transferring:', data
return Deferred()
class TransportJob(Job):
implements(ITransportJob)
def __init__(self, transporter):
super(TransportJob, self).__init__()
self.transporter = transporter
def execute(self, **kw):
result = kw.get('result')
if result is None:
print 'No data available.'
else:
for r in result:
d = self.transporter.transfer(r[0].data, r[1], str)
return Deferred()

View file

@ -1,3 +1,8 @@
#
# Run with ``trial2.4 tests.py`` to execute the twisted unit tests.
# Run with ``python tests.py`` to execute the doctests.
#
# $Id$ # $Id$
import unittest as standard_unittest import unittest as standard_unittest
@ -11,6 +16,16 @@ from loops.agent.core import Agent
from loops.agent.schedule import Job from loops.agent.schedule import Job
class Tester(object):
def iterate(self, n=10, delays={}):
for i in range(n):
delay = delays.get(i, 0)
reactor.iterate(delay)
tester = Tester()
class TestJob(Job): class TestJob(Job):
def execute(self, deferred, **kw): def execute(self, deferred, **kw):
@ -40,9 +55,9 @@ class Test(unittest.TestCase):
def test_suite(): def test_suite():
flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
return standard_unittest.TestSuite(( return standard_unittest.TestSuite((
standard_unittest.makeSuite(Test), #standard_unittest.makeSuite(Test),
doctest.DocFileSuite('README.txt', optionflags=flags), doctest.DocFileSuite('README.txt', optionflags=flags),
)) ))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main(defaultTest='test_suite') standard_unittest.main(defaultTest='test_suite')