loops/agent/core.py
scrat c3ffee3437 core.py
*agent now creates an empty temporary directory in
 which all jobs should create their jobdirectories

ui/web.py
* outlook crawler has changed, adapted ui methods
  accordingly to access OutlookResource objects
  instead of email.MIME lists
* using agent object as an attribute in the
  AgentHome class which is now passed through when
  necessary
* changed form for creating OutlookCrawlJobs to provide
  possibility to select whether inbox, subfolders or
  both foldertypes should be crawled and to specify
  a regular expression for subfolder selection
  todo: add scheduler options
* changed the way how jobdetails are displayed
* changed display of crawling ressources:
  now it is also possible to display the job folders
  and enter them to view the mail objects

* added first sketch of a filesystem crawler form,
  without functionality at the moment

known bugs: currently having problems with character encodings

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1950 fd906abe-77d9-0310-91a1-e0d9ade77398
2007-08-22 13:57:22 +00:00

92 lines
3.2 KiB
Python

#
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
The real agent stuff.
$Id$
"""
from time import time
import tempfile
from zope.interface import implements
from loops.agent.interfaces import IAgent
from loops.agent.config import Configurator
from loops.agent.crawl import filesystem
from loops.agent.log import Logger
from loops.agent.schedule import Scheduler, Stopper
from loops.agent.transport import base
crawlTypes = dict(
filesystem=filesystem.CrawlingJob,
)
transportTypes = dict(
httpput=base.Transporter,
)
class Agent(object):
implements(IAgent)
crawlTypes = crawlTypes
transportTypes = transportTypes
def __init__(self, conf=None):
config = self.config = Configurator('ui', 'crawl', 'transport', 'logging')
config.load(conf)
self.scheduler = Scheduler(self)
self.stopper = Stopper()
self.stopper.scheduler = self.scheduler
self.logger = Logger(self)
self.tempdir = tempfile.mkdtemp(prefix='loops_')
def scheduleJobsFromConfig(self, stop=False):
config = self.config
scheduler = self.scheduler
lastJob = None
for idx, info in enumerate(config.crawl):
crawlType = info.type
factory = self.crawlTypes.get(crawlType)
if factory is not None:
job = lastJob = factory()
job.params = dict((name, value)
for name, value in info.items()
if name not in job.baseProperties)
transportType = info.transport or 'httpput'
factory = self.transportTypes.get(transportType)
if factory is not None:
params = dict(config.transport.items())
transporter = factory(self, **params)
# TODO: configure transporter or - better -
# set up transporter(s) just once
job.successors.append(transporter.createJob())
job.repeat = info.repeat or 0
self.scheduler.schedule(job, info.starttime or int(time()))
# TODO: remove job from config
# TODO: put repeating info in config
# TODO: remember last run for repeating job
if stop:
if lastJob is not None:
lastTrJob = lastJob.successors[-1]
lastTrJob.successors.append(self.stopper)
else:
self.scheduler.schedule(self.stopper)