work in progress: crawling: let the doctest use the full procedure via master and controller

git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2489 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2008-03-31 17:10:08 +00:00
parent c3e078ca31
commit 922ede26b3
8 changed files with 153 additions and 169 deletions

View file

@ -6,3 +6,4 @@ $Id$
from cybertools.agent.base import agent, control, job, log, schedule from cybertools.agent.base import agent, control, job, log, schedule
from cybertools.agent.core import agent, control, schedule from cybertools.agent.core import agent, control, schedule
from cybertools.agent.crawl import base

View file

@ -75,6 +75,9 @@ class Master(Agent):
def setupAgents(self, agentSpecs): def setupAgents(self, agentSpecs):
for spec in agentSpecs: for spec in agentSpecs:
agent = agents(self, spec.type) agent = agents(self, spec.type)
if agent is None:
print spec.type
return
agent.name = spec.name agent.name = spec.name
self.children[spec.name] = agent self.children[spec.name] = agent

View file

@ -56,6 +56,10 @@ class SampleController(Controller):
def _getAgents(self): def _getAgents(self):
return [AgentSpecification(name, type) for name, type in self.agents] return [AgentSpecification(name, type) for name, type in self.agents]
def createAgent(self, agentType, name):
spec = AgentSpecification(name, agentType)
self.agent.setupAgents([spec])
def enterJob(self, jobType, agent): def enterJob(self, jobType, agent):
self.jobNumber += 1 self.jobNumber += 1
spec = JobSpecification(jobType, '%05i' % self.jobNumber, agent=agent) spec = JobSpecification(jobType, '%05i' % self.jobNumber, agent=agent)

View file

@ -1,31 +0,0 @@
================================================
Agents for Job Execution and Communication Tasks
================================================
Agents collect informations and transfer them e.g. to a loops server.
($Id$)
This package does not depend on zope or the other loops packages
but represents a standalone application.
But we need a reactor for working with Twisted; in order not to block
testing when running the reactor we use reactor.iterate() calls
wrapped in a ``tester`` object.
>>> from cybertools.agent.tests import tester
Master Agent
============
The agent uses Twisted's cooperative multitasking model.
This means that all calls to services (like crawler, transporter, ...)
return a deferred that must be supplied with a callback method (and in
most cases also an errback method).
>>> #from cybertools.agent.core.agent import Master
>>> #master = Master()

View file

@ -1,36 +1,46 @@
================================================ ================================================
Agents for Job Execution and Communication Tasks Agents for Job Execution and Communication Tasks
================================================ ================================================
Agents collect informations and transfer them e.g. to a loops server. ($Id$)
($Id: README.txt 2413 2008-02-23 14:07:15Z helmutm $) >>> from cybertools.agent.base.agent import Master
This package does not depend on zope or the other loops packages >>> config = '''
but represents a standalone application. ... controller(name='core.sample')
... scheduler(name='core')
But we need a reactor for working with Twisted; in order not to block ... logger(name='default', standard=30)
testing when running the reactor we use reactor.iterate() calls ... '''
wrapped in a ``tester`` object. >>> master = Master(config)
>>> master.setup()
>>> from cybertools.agent.tests import tester
Crawler
Crawler =======
============
The agent uses Twisted's cooperative multitasking model.
The agent uses Twisted's cooperative multitasking model.
Crawler is the base class for all derived crawlers like the filesystem crawler
Crawler is the base class for all derived Crawlers like the filesystem crawler and the mailcrawler. The SampleCrawler returns a deferred that already had a
and the mailcrawler. The SampleCrawler returns a deferred that already had a callback being called, so it will return at once.
callback being called, so it will return at once.
Returns a deferred that must be supplied with a callback method (and in Returns a deferred that must be supplied with a callback method (and in
most cases also an errback method). most cases also an errback method).
>>> from cybertools.agent.crawl.base import SampleCrawler We create the sample crawler via the master's controller. The sample
>>> from twisted.internet import defer controller provides a simple method for this purpose.
>>> crawler = SampleCrawler()
>>> deferred = crawler.collect() >>> controller = master.controllers[0]
SampleCrawler is collecting. >>> controller.createAgent('crawl.sample', 'crawler01')
In the next step we request the start of a job, again via the controller.
>>> controller.enterJob('sample', 'crawler01')
The job is not executed immediately - we have to hand over control to
the twisted reactor first.
>>> from cybertools.agent.tests import tester
>>> tester.iterate()
SampleCrawler is collecting.

View file

@ -1,54 +1,54 @@
# #
# Copyright (c) 2008 Helmut Merz helmutm@cy55.de # Copyright (c) 2008 Helmut Merz helmutm@cy55.de
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or # the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version. # (at your option) any later version.
# #
# This program is distributed in the hope that it will be useful, # This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. # GNU General Public License for more details.
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# #
""" """
Crawl base and sample classes. Crawl base and sample classes.
$Id: base.py $Id: base.py
""" """
from zope.interface import implements from zope.interface import implements
from cybertools.agent.base.agent import Agent from cybertools.agent.base.agent import Master
from cybertools.agent.interfaces import ICrawler from cybertools.agent.core.agent import QueueableAgent
from cybertools.agent.components import agents from cybertools.agent.interfaces import ICrawler
from twisted.internet.defer import succeed from cybertools.agent.components import agents
from twisted.internet.defer import succeed
class Crawler(object):
class Crawler(QueueableAgent):
implements(ICrawler)
implements(ICrawler)
def __init__(self):
pass def process(self):
return self.collect()
def collect(self, filter=None):
d = defer.succeed([]) def collect(self, filter=None):
return d d = defer.succeed([])
return d
class SampleCrawler(Crawler):
class SampleCrawler(Crawler):
def collect(self, filter=None):
print 'SampleCrawler is collecting.' def collect(self, filter=None):
d = succeed([]) print 'SampleCrawler is collecting.'
return d d = succeed([])
return d
agents.register(SampleCrawler, Agent, name='crawl.sample') agents.register(SampleCrawler, Master, name='crawl.sample')

View file

@ -1,47 +1,46 @@
# #
# Copyright (c) 2008 Helmut Merz helmutm@cy55.de # Copyright (c) 2008 Helmut Merz helmutm@cy55.de
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or # the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version. # (at your option) any later version.
# #
# This program is distributed in the hope that it will be useful, # This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. # GNU General Public License for more details.
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# #
""" """
Crawl base and sample classes. Crawl base and sample classes.
$Id: base.py $Id: base.py
""" """
from zope.interface import implements from zope.interface import implements
from cybertools.agent.agent import Agent from cybertools.agent.agent import Agent
from cybertools.agent.interfaces import ICrawler from cybertools.agent.crawl.base import Crawler
from cybertools.agent.crawl.base import Crawler as BaseCrawler from cybertools.agent.components import agents
from cybertools.agent.components import agents from twisted.internet.defer import succeed
from twisted.internet.defer import succeed
class MailCrawler(Crawler):
class MailCrawler(Crawler):
def __init__(self, params):
def __init__(self, params): self.params = params
self.params = params
def collect(self, filter=None):
def collect(self, filter=None): print 'MailCrawler is collecting.'
print 'MailCrawler is collecting.' d = succeed([])
d = succeed([]) return d
return d
#would it make sense to register this one at the AdapterFactory?
#would it make sense to register this one at the AdapterFactory? #or should it also just serve as base class for OutlookCrawler
#or should it also just serve as base class for OutlookCrawler #KMailCrawler etc. ?
#KMailCrawler etc. ?
#agents.register(MailCrawler, Agent, name='crawl.mail') #agents.register(MailCrawler, Agent, name='crawl.mail')

View file

@ -42,9 +42,7 @@ def test_suite():
testSuite = unittest.TestSuite(( testSuite = unittest.TestSuite((
unittest.makeSuite(Test), unittest.makeSuite(Test),
DocFileSuite('README.txt', optionflags=flags), DocFileSuite('README.txt', optionflags=flags),
DocFileSuite('core/README.txt', optionflags=flags),
DocFileSuite('crawl/README.txt', optionflags=flags), DocFileSuite('crawl/README.txt', optionflags=flags),
#DocFileSuite('transport/httpput.txt', optionflags=flags),
)) ))
return testSuite return testSuite