set up dummy crawler and transport for testing; implement crawl-transfer sequence via job scheduler
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1800 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
9d89ab30ec
commit
621efb6081
7 changed files with 205 additions and 27 deletions
|
@ -10,6 +10,12 @@ collecting informations and transferring them to the loops server.
|
|||
This package does not depend on zope or the other loops packages
|
||||
but represents a standalone application.
|
||||
|
||||
But we need a reactor for working with Twisted; in order not to block
|
||||
testing when running the reactor we use reactor.iterate() calls
|
||||
wrapped in a ``tester`` object.
|
||||
|
||||
>>> from loops.agent.tests import tester
|
||||
|
||||
|
||||
Basic Implementation, Agent Core
|
||||
================================
|
||||
|
@ -24,23 +30,6 @@ most cases also an errback method).
|
|||
>>> agent = Agent()
|
||||
|
||||
|
||||
Browser-based User Interface
|
||||
============================
|
||||
|
||||
The user interface is provided via a browser-based application
|
||||
based on Twisted and Nevow.
|
||||
|
||||
|
||||
Configuration Management
|
||||
========================
|
||||
|
||||
Functionality
|
||||
|
||||
- Storage of configuration parameters
|
||||
- Interface to the browser-based user interface that allows the
|
||||
editing of configuration parameters
|
||||
|
||||
|
||||
Scheduling
|
||||
==========
|
||||
|
||||
|
@ -60,11 +49,9 @@ Configuration (per job)
|
|||
... print 'executing'
|
||||
... return d
|
||||
|
||||
>>> scheduler.schedule(TestJob(), int(time())+1)
|
||||
>>> scheduler.schedule(TestJob(), int(time()))
|
||||
|
||||
>>> from twisted.internet import reactor
|
||||
>>> ignore = reactor.callLater(2, reactor.stop)
|
||||
>>> reactor.run()
|
||||
>>> tester.iterate()
|
||||
executing
|
||||
|
||||
|
||||
|
@ -85,6 +72,21 @@ Configuration (per crawl job)
|
|||
|
||||
- predefined metadata
|
||||
|
||||
The Dummy Crawler
|
||||
-----------------
|
||||
|
||||
>>> from testing.crawl import CrawlingJob
|
||||
>>> from testing.transport import Transporter, TransportJob
|
||||
|
||||
>>> crawl = CrawlingJob()
|
||||
>>> transporter = Transporter()
|
||||
>>> transport = TransportJob(transporter)
|
||||
>>> crawl.successors.append(transport)
|
||||
>>> scheduler.schedule(crawl, int(time()))
|
||||
|
||||
>>> tester.iterate()
|
||||
Transferring: Dummy resource data for testing purposes.
|
||||
|
||||
Local File System
|
||||
-----------------
|
||||
|
||||
|
@ -181,3 +183,20 @@ Configuration (per install/update job)
|
|||
- command: install, update, remove
|
||||
- package names
|
||||
|
||||
|
||||
Configuration Management
|
||||
========================
|
||||
|
||||
Functionality
|
||||
|
||||
- Storage of configuration parameters
|
||||
- Interface to the browser-based user interface that allows the
|
||||
editing of configuration parameters
|
||||
|
||||
|
||||
Browser-based User Interface
|
||||
============================
|
||||
|
||||
The user interface is provided via a browser-based application
|
||||
based on Twisted and Nevow.
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ class IScheduler(Interface):
|
|||
|
||||
def schedule(job, startTime):
|
||||
""" Register the job given for execution at the intended start
|
||||
date/time.
|
||||
date/time and return the job.
|
||||
"""
|
||||
|
||||
def getJobsToExecute(startTime=None):
|
||||
|
@ -95,8 +95,12 @@ class ICrawlingJob(IScheduledJob):
|
|||
'for all resources found.')
|
||||
|
||||
def collect(**criteria):
|
||||
""" Return a collection of resource/metadata pairs that should be transferred
|
||||
to the server using the selection criteria given.
|
||||
""" Return a deferred that upon callback will provide a
|
||||
collection of resource/metadata pairs that should be transferred
|
||||
to the server.
|
||||
|
||||
Use the selection criteria given to filter the resources that
|
||||
should be collected.
|
||||
"""
|
||||
|
||||
|
||||
|
@ -140,6 +144,13 @@ class ITransporter(Interface):
|
|||
"""
|
||||
|
||||
|
||||
class ITransportJob(IScheduledJob):
|
||||
""" A job managing the the transfer of a resource to the server.
|
||||
"""
|
||||
|
||||
transporter = Attribute('The transporter object to user for transer.')
|
||||
|
||||
|
||||
class IConfigurator(Interface):
|
||||
""" Manages (stores and receives) configuration information.
|
||||
"""
|
||||
|
|
|
@ -72,7 +72,8 @@ class Job(object):
|
|||
|
||||
def finishRun(self, result):
|
||||
for job in self.successors:
|
||||
job.run(job, **job.params)
|
||||
job.params['result'] = result
|
||||
job.run(**job.params)
|
||||
# TODO: remove from queue
|
||||
# TODO: logging
|
||||
# TODO: reschedule if told by configuration
|
||||
|
|
4
agent/testing/__init__.py
Normal file
4
agent/testing/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
"""
|
||||
$Id$
|
||||
"""
|
||||
|
60
agent/testing/crawl.py
Normal file
60
agent/testing/crawl.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
#
|
||||
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
"""
|
||||
A dummy crawler for testing purposes.
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
from twisted.internet import reactor
|
||||
from twisted.internet.defer import Deferred
|
||||
from zope.interface import implements
|
||||
|
||||
from loops.agent.interfaces import ICrawlingJob, IMetadataSet
|
||||
from loops.agent.schedule import Job
|
||||
|
||||
|
||||
class CrawlingJob(Job):
|
||||
|
||||
implements(ICrawlingJob)
|
||||
|
||||
def __init__(self):
|
||||
self.predefinedMetadata = {}
|
||||
super(CrawlingJob, self).__init__()
|
||||
|
||||
def execute(self, **kw):
|
||||
return self.collect(**kw)
|
||||
|
||||
def collect(self, **criteria):
|
||||
deferred = self.deferred = Deferred()
|
||||
reactor.callLater(0, self.dataAvailable)
|
||||
return deferred
|
||||
|
||||
def dataAvailable(self):
|
||||
self.deferred.callback([(DummyResource(), Metadata())])
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
|
||||
implements(IMetadataSet)
|
||||
|
||||
|
||||
class DummyResource(object):
|
||||
|
||||
data = 'Dummy resource data for testing purposes.'
|
68
agent/testing/transport.py
Normal file
68
agent/testing/transport.py
Normal file
|
@ -0,0 +1,68 @@
|
|||
#
|
||||
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
"""
|
||||
A dummy transport for testing purposes.
|
||||
|
||||
$Id$
|
||||
"""
|
||||
|
||||
from twisted.internet import reactor
|
||||
from twisted.internet.defer import Deferred
|
||||
from zope.interface import implements
|
||||
|
||||
from loops.agent.interfaces import ITransportJob, ITransporter
|
||||
from loops.agent.schedule import Job
|
||||
|
||||
|
||||
class Transporter(object):
|
||||
|
||||
implements(ITransporter)
|
||||
|
||||
serverURL = None
|
||||
method = None
|
||||
machineName = None
|
||||
userName = None
|
||||
password = None
|
||||
|
||||
def transfer(self, resource, metadata=None, resourceType=file):
|
||||
if resourceType is file:
|
||||
data = resource.read()
|
||||
resource.close()
|
||||
elif resourceType is str:
|
||||
data = resource
|
||||
print 'Transferring:', data
|
||||
return Deferred()
|
||||
|
||||
|
||||
class TransportJob(Job):
|
||||
|
||||
implements(ITransportJob)
|
||||
|
||||
def __init__(self, transporter):
|
||||
super(TransportJob, self).__init__()
|
||||
self.transporter = transporter
|
||||
|
||||
def execute(self, **kw):
|
||||
result = kw.get('result')
|
||||
if result is None:
|
||||
print 'No data available.'
|
||||
else:
|
||||
for r in result:
|
||||
d = self.transporter.transfer(r[0].data, r[1], str)
|
||||
return Deferred()
|
|
@ -1,3 +1,8 @@
|
|||
#
|
||||
# Run with ``trial2.4 tests.py`` to execute the twisted unit tests.
|
||||
# Run with ``python tests.py`` to execute the doctests.
|
||||
#
|
||||
|
||||
# $Id$
|
||||
|
||||
import unittest as standard_unittest
|
||||
|
@ -11,6 +16,16 @@ from loops.agent.core import Agent
|
|||
from loops.agent.schedule import Job
|
||||
|
||||
|
||||
class Tester(object):
|
||||
|
||||
def iterate(self, n=10, delays={}):
|
||||
for i in range(n):
|
||||
delay = delays.get(i, 0)
|
||||
reactor.iterate(delay)
|
||||
|
||||
tester = Tester()
|
||||
|
||||
|
||||
class TestJob(Job):
|
||||
|
||||
def execute(self, deferred, **kw):
|
||||
|
@ -40,9 +55,9 @@ class Test(unittest.TestCase):
|
|||
def test_suite():
|
||||
flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
|
||||
return standard_unittest.TestSuite((
|
||||
standard_unittest.makeSuite(Test),
|
||||
#standard_unittest.makeSuite(Test),
|
||||
doctest.DocFileSuite('README.txt', optionflags=flags),
|
||||
))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(defaultTest='test_suite')
|
||||
standard_unittest.main(defaultTest='test_suite')
|
||||
|
|
Loading…
Add table
Reference in a new issue