set up dummy crawler and transport for testing; implement crawl-transfer sequence via job scheduler

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1800 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-06-22 15:15:25 +00:00
parent 9d89ab30ec
commit 621efb6081
7 changed files with 205 additions and 27 deletions

View file

@ -10,6 +10,12 @@ collecting informations and transferring them to the loops server.
This package does not depend on zope or the other loops packages
but represents a standalone application.
But we need a reactor for working with Twisted; in order not to block
testing when running the reactor we use reactor.iterate() calls
wrapped in a ``tester`` object.
>>> from loops.agent.tests import tester
Basic Implementation, Agent Core
================================
@ -24,23 +30,6 @@ most cases also an errback method).
>>> agent = Agent()
Browser-based User Interface
============================
The user interface is provided via a browser-based application
based on Twisted and Nevow.
Configuration Management
========================
Functionality
- Storage of configuration parameters
- Interface to the browser-based user interface that allows the
editing of configuration parameters
Scheduling
==========
@ -60,11 +49,9 @@ Configuration (per job)
... print 'executing'
... return d
>>> scheduler.schedule(TestJob(), int(time())+1)
>>> scheduler.schedule(TestJob(), int(time()))
>>> from twisted.internet import reactor
>>> ignore = reactor.callLater(2, reactor.stop)
>>> reactor.run()
>>> tester.iterate()
executing
@ -85,6 +72,21 @@ Configuration (per crawl job)
- predefined metadata
The Dummy Crawler
-----------------
>>> from testing.crawl import CrawlingJob
>>> from testing.transport import Transporter, TransportJob
>>> crawl = CrawlingJob()
>>> transporter = Transporter()
>>> transport = TransportJob(transporter)
>>> crawl.successors.append(transport)
>>> scheduler.schedule(crawl, int(time()))
>>> tester.iterate()
Transferring: Dummy resource data for testing purposes.
Local File System
-----------------
@ -181,3 +183,20 @@ Configuration (per install/update job)
- command: install, update, remove
- package names
Configuration Management
========================
Functionality
- Storage of configuration parameters
- Interface to the browser-based user interface that allows the
editing of configuration parameters
Browser-based User Interface
============================
The user interface is provided via a browser-based application
based on Twisted and Nevow.

View file

@ -43,7 +43,7 @@ class IScheduler(Interface):
def schedule(job, startTime):
""" Register the job given for execution at the intended start
date/time.
date/time and return the job.
"""
def getJobsToExecute(startTime=None):
@ -95,8 +95,12 @@ class ICrawlingJob(IScheduledJob):
'for all resources found.')
def collect(**criteria):
""" Return a collection of resource/metadata pairs that should be transferred
to the server using the selection criteria given.
""" Return a deferred that upon callback will provide a
collection of resource/metadata pairs that should be transferred
to the server.
Use the selection criteria given to filter the resources that
should be collected.
"""
@ -140,6 +144,13 @@ class ITransporter(Interface):
"""
class ITransportJob(IScheduledJob):
""" A job managing the the transfer of a resource to the server.
"""
transporter = Attribute('The transporter object to user for transer.')
class IConfigurator(Interface):
""" Manages (stores and receives) configuration information.
"""

View file

@ -72,7 +72,8 @@ class Job(object):
def finishRun(self, result):
for job in self.successors:
job.run(job, **job.params)
job.params['result'] = result
job.run(**job.params)
# TODO: remove from queue
# TODO: logging
# TODO: reschedule if told by configuration

View file

@ -0,0 +1,4 @@
"""
$Id$
"""

60
agent/testing/crawl.py Normal file
View file

@ -0,0 +1,60 @@
#
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
A dummy crawler for testing purposes.
$Id$
"""
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from zope.interface import implements
from loops.agent.interfaces import ICrawlingJob, IMetadataSet
from loops.agent.schedule import Job
class CrawlingJob(Job):
implements(ICrawlingJob)
def __init__(self):
self.predefinedMetadata = {}
super(CrawlingJob, self).__init__()
def execute(self, **kw):
return self.collect(**kw)
def collect(self, **criteria):
deferred = self.deferred = Deferred()
reactor.callLater(0, self.dataAvailable)
return deferred
def dataAvailable(self):
self.deferred.callback([(DummyResource(), Metadata())])
class Metadata(object):
implements(IMetadataSet)
class DummyResource(object):
data = 'Dummy resource data for testing purposes.'

View file

@ -0,0 +1,68 @@
#
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
A dummy transport for testing purposes.
$Id$
"""
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from zope.interface import implements
from loops.agent.interfaces import ITransportJob, ITransporter
from loops.agent.schedule import Job
class Transporter(object):
implements(ITransporter)
serverURL = None
method = None
machineName = None
userName = None
password = None
def transfer(self, resource, metadata=None, resourceType=file):
if resourceType is file:
data = resource.read()
resource.close()
elif resourceType is str:
data = resource
print 'Transferring:', data
return Deferred()
class TransportJob(Job):
implements(ITransportJob)
def __init__(self, transporter):
super(TransportJob, self).__init__()
self.transporter = transporter
def execute(self, **kw):
result = kw.get('result')
if result is None:
print 'No data available.'
else:
for r in result:
d = self.transporter.transfer(r[0].data, r[1], str)
return Deferred()

View file

@ -1,3 +1,8 @@
#
# Run with ``trial2.4 tests.py`` to execute the twisted unit tests.
# Run with ``python tests.py`` to execute the doctests.
#
# $Id$
import unittest as standard_unittest
@ -11,6 +16,16 @@ from loops.agent.core import Agent
from loops.agent.schedule import Job
class Tester(object):
def iterate(self, n=10, delays={}):
for i in range(n):
delay = delays.get(i, 0)
reactor.iterate(delay)
tester = Tester()
class TestJob(Job):
def execute(self, deferred, **kw):
@ -40,9 +55,9 @@ class Test(unittest.TestCase):
def test_suite():
flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
return standard_unittest.TestSuite((
standard_unittest.makeSuite(Test),
#standard_unittest.makeSuite(Test),
doctest.DocFileSuite('README.txt', optionflags=flags),
))
if __name__ == '__main__':
unittest.main(defaultTest='test_suite')
standard_unittest.main(defaultTest='test_suite')