set up dummy crawler and transport for testing; implement crawl-transfer sequence via job scheduler
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1800 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
9d89ab30ec
commit
621efb6081
7 changed files with 205 additions and 27 deletions
|
@ -10,6 +10,12 @@ collecting informations and transferring them to the loops server.
|
||||||
This package does not depend on zope or the other loops packages
|
This package does not depend on zope or the other loops packages
|
||||||
but represents a standalone application.
|
but represents a standalone application.
|
||||||
|
|
||||||
|
But we need a reactor for working with Twisted; in order not to block
|
||||||
|
testing when running the reactor we use reactor.iterate() calls
|
||||||
|
wrapped in a ``tester`` object.
|
||||||
|
|
||||||
|
>>> from loops.agent.tests import tester
|
||||||
|
|
||||||
|
|
||||||
Basic Implementation, Agent Core
|
Basic Implementation, Agent Core
|
||||||
================================
|
================================
|
||||||
|
@ -24,23 +30,6 @@ most cases also an errback method).
|
||||||
>>> agent = Agent()
|
>>> agent = Agent()
|
||||||
|
|
||||||
|
|
||||||
Browser-based User Interface
|
|
||||||
============================
|
|
||||||
|
|
||||||
The user interface is provided via a browser-based application
|
|
||||||
based on Twisted and Nevow.
|
|
||||||
|
|
||||||
|
|
||||||
Configuration Management
|
|
||||||
========================
|
|
||||||
|
|
||||||
Functionality
|
|
||||||
|
|
||||||
- Storage of configuration parameters
|
|
||||||
- Interface to the browser-based user interface that allows the
|
|
||||||
editing of configuration parameters
|
|
||||||
|
|
||||||
|
|
||||||
Scheduling
|
Scheduling
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
@ -60,11 +49,9 @@ Configuration (per job)
|
||||||
... print 'executing'
|
... print 'executing'
|
||||||
... return d
|
... return d
|
||||||
|
|
||||||
>>> scheduler.schedule(TestJob(), int(time())+1)
|
>>> scheduler.schedule(TestJob(), int(time()))
|
||||||
|
|
||||||
>>> from twisted.internet import reactor
|
>>> tester.iterate()
|
||||||
>>> ignore = reactor.callLater(2, reactor.stop)
|
|
||||||
>>> reactor.run()
|
|
||||||
executing
|
executing
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,6 +72,21 @@ Configuration (per crawl job)
|
||||||
|
|
||||||
- predefined metadata
|
- predefined metadata
|
||||||
|
|
||||||
|
The Dummy Crawler
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
>>> from testing.crawl import CrawlingJob
|
||||||
|
>>> from testing.transport import Transporter, TransportJob
|
||||||
|
|
||||||
|
>>> crawl = CrawlingJob()
|
||||||
|
>>> transporter = Transporter()
|
||||||
|
>>> transport = TransportJob(transporter)
|
||||||
|
>>> crawl.successors.append(transport)
|
||||||
|
>>> scheduler.schedule(crawl, int(time()))
|
||||||
|
|
||||||
|
>>> tester.iterate()
|
||||||
|
Transferring: Dummy resource data for testing purposes.
|
||||||
|
|
||||||
Local File System
|
Local File System
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
@ -181,3 +183,20 @@ Configuration (per install/update job)
|
||||||
- command: install, update, remove
|
- command: install, update, remove
|
||||||
- package names
|
- package names
|
||||||
|
|
||||||
|
|
||||||
|
Configuration Management
|
||||||
|
========================
|
||||||
|
|
||||||
|
Functionality
|
||||||
|
|
||||||
|
- Storage of configuration parameters
|
||||||
|
- Interface to the browser-based user interface that allows the
|
||||||
|
editing of configuration parameters
|
||||||
|
|
||||||
|
|
||||||
|
Browser-based User Interface
|
||||||
|
============================
|
||||||
|
|
||||||
|
The user interface is provided via a browser-based application
|
||||||
|
based on Twisted and Nevow.
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ class IScheduler(Interface):
|
||||||
|
|
||||||
def schedule(job, startTime):
|
def schedule(job, startTime):
|
||||||
""" Register the job given for execution at the intended start
|
""" Register the job given for execution at the intended start
|
||||||
date/time.
|
date/time and return the job.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def getJobsToExecute(startTime=None):
|
def getJobsToExecute(startTime=None):
|
||||||
|
@ -95,8 +95,12 @@ class ICrawlingJob(IScheduledJob):
|
||||||
'for all resources found.')
|
'for all resources found.')
|
||||||
|
|
||||||
def collect(**criteria):
|
def collect(**criteria):
|
||||||
""" Return a collection of resource/metadata pairs that should be transferred
|
""" Return a deferred that upon callback will provide a
|
||||||
to the server using the selection criteria given.
|
collection of resource/metadata pairs that should be transferred
|
||||||
|
to the server.
|
||||||
|
|
||||||
|
Use the selection criteria given to filter the resources that
|
||||||
|
should be collected.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@ -140,6 +144,13 @@ class ITransporter(Interface):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ITransportJob(IScheduledJob):
|
||||||
|
""" A job managing the the transfer of a resource to the server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
transporter = Attribute('The transporter object to user for transer.')
|
||||||
|
|
||||||
|
|
||||||
class IConfigurator(Interface):
|
class IConfigurator(Interface):
|
||||||
""" Manages (stores and receives) configuration information.
|
""" Manages (stores and receives) configuration information.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -72,7 +72,8 @@ class Job(object):
|
||||||
|
|
||||||
def finishRun(self, result):
|
def finishRun(self, result):
|
||||||
for job in self.successors:
|
for job in self.successors:
|
||||||
job.run(job, **job.params)
|
job.params['result'] = result
|
||||||
|
job.run(**job.params)
|
||||||
# TODO: remove from queue
|
# TODO: remove from queue
|
||||||
# TODO: logging
|
# TODO: logging
|
||||||
# TODO: reschedule if told by configuration
|
# TODO: reschedule if told by configuration
|
||||||
|
|
4
agent/testing/__init__.py
Normal file
4
agent/testing/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
"""
|
||||||
|
$Id$
|
||||||
|
"""
|
||||||
|
|
60
agent/testing/crawl.py
Normal file
60
agent/testing/crawl.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
#
|
||||||
|
|
||||||
|
"""
|
||||||
|
A dummy crawler for testing purposes.
|
||||||
|
|
||||||
|
$Id$
|
||||||
|
"""
|
||||||
|
|
||||||
|
from twisted.internet import reactor
|
||||||
|
from twisted.internet.defer import Deferred
|
||||||
|
from zope.interface import implements
|
||||||
|
|
||||||
|
from loops.agent.interfaces import ICrawlingJob, IMetadataSet
|
||||||
|
from loops.agent.schedule import Job
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlingJob(Job):
|
||||||
|
|
||||||
|
implements(ICrawlingJob)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.predefinedMetadata = {}
|
||||||
|
super(CrawlingJob, self).__init__()
|
||||||
|
|
||||||
|
def execute(self, **kw):
|
||||||
|
return self.collect(**kw)
|
||||||
|
|
||||||
|
def collect(self, **criteria):
|
||||||
|
deferred = self.deferred = Deferred()
|
||||||
|
reactor.callLater(0, self.dataAvailable)
|
||||||
|
return deferred
|
||||||
|
|
||||||
|
def dataAvailable(self):
|
||||||
|
self.deferred.callback([(DummyResource(), Metadata())])
|
||||||
|
|
||||||
|
|
||||||
|
class Metadata(object):
|
||||||
|
|
||||||
|
implements(IMetadataSet)
|
||||||
|
|
||||||
|
|
||||||
|
class DummyResource(object):
|
||||||
|
|
||||||
|
data = 'Dummy resource data for testing purposes.'
|
68
agent/testing/transport.py
Normal file
68
agent/testing/transport.py
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
#
|
||||||
|
|
||||||
|
"""
|
||||||
|
A dummy transport for testing purposes.
|
||||||
|
|
||||||
|
$Id$
|
||||||
|
"""
|
||||||
|
|
||||||
|
from twisted.internet import reactor
|
||||||
|
from twisted.internet.defer import Deferred
|
||||||
|
from zope.interface import implements
|
||||||
|
|
||||||
|
from loops.agent.interfaces import ITransportJob, ITransporter
|
||||||
|
from loops.agent.schedule import Job
|
||||||
|
|
||||||
|
|
||||||
|
class Transporter(object):
|
||||||
|
|
||||||
|
implements(ITransporter)
|
||||||
|
|
||||||
|
serverURL = None
|
||||||
|
method = None
|
||||||
|
machineName = None
|
||||||
|
userName = None
|
||||||
|
password = None
|
||||||
|
|
||||||
|
def transfer(self, resource, metadata=None, resourceType=file):
|
||||||
|
if resourceType is file:
|
||||||
|
data = resource.read()
|
||||||
|
resource.close()
|
||||||
|
elif resourceType is str:
|
||||||
|
data = resource
|
||||||
|
print 'Transferring:', data
|
||||||
|
return Deferred()
|
||||||
|
|
||||||
|
|
||||||
|
class TransportJob(Job):
|
||||||
|
|
||||||
|
implements(ITransportJob)
|
||||||
|
|
||||||
|
def __init__(self, transporter):
|
||||||
|
super(TransportJob, self).__init__()
|
||||||
|
self.transporter = transporter
|
||||||
|
|
||||||
|
def execute(self, **kw):
|
||||||
|
result = kw.get('result')
|
||||||
|
if result is None:
|
||||||
|
print 'No data available.'
|
||||||
|
else:
|
||||||
|
for r in result:
|
||||||
|
d = self.transporter.transfer(r[0].data, r[1], str)
|
||||||
|
return Deferred()
|
|
@ -1,3 +1,8 @@
|
||||||
|
#
|
||||||
|
# Run with ``trial2.4 tests.py`` to execute the twisted unit tests.
|
||||||
|
# Run with ``python tests.py`` to execute the doctests.
|
||||||
|
#
|
||||||
|
|
||||||
# $Id$
|
# $Id$
|
||||||
|
|
||||||
import unittest as standard_unittest
|
import unittest as standard_unittest
|
||||||
|
@ -11,6 +16,16 @@ from loops.agent.core import Agent
|
||||||
from loops.agent.schedule import Job
|
from loops.agent.schedule import Job
|
||||||
|
|
||||||
|
|
||||||
|
class Tester(object):
|
||||||
|
|
||||||
|
def iterate(self, n=10, delays={}):
|
||||||
|
for i in range(n):
|
||||||
|
delay = delays.get(i, 0)
|
||||||
|
reactor.iterate(delay)
|
||||||
|
|
||||||
|
tester = Tester()
|
||||||
|
|
||||||
|
|
||||||
class TestJob(Job):
|
class TestJob(Job):
|
||||||
|
|
||||||
def execute(self, deferred, **kw):
|
def execute(self, deferred, **kw):
|
||||||
|
@ -40,9 +55,9 @@ class Test(unittest.TestCase):
|
||||||
def test_suite():
|
def test_suite():
|
||||||
flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
|
flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
|
||||||
return standard_unittest.TestSuite((
|
return standard_unittest.TestSuite((
|
||||||
standard_unittest.makeSuite(Test),
|
#standard_unittest.makeSuite(Test),
|
||||||
doctest.DocFileSuite('README.txt', optionflags=flags),
|
doctest.DocFileSuite('README.txt', optionflags=flags),
|
||||||
))
|
))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main(defaultTest='test_suite')
|
standard_unittest.main(defaultTest='test_suite')
|
||||||
|
|
Loading…
Add table
Reference in a new issue