From ad281796eabf726dffac30542e9b3172f55b6ff2 Mon Sep 17 00:00:00 2001 From: helmutm Date: Fri, 8 Jun 2007 17:13:32 +0000 Subject: [PATCH] work in progress: loops.agent specification git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1780 fd906abe-77d9-0310-91a1-e0d9ade77398 --- agent/README.txt | 147 ++++++++++++++++++++++++++++++++---- agent/config.py | 18 ++++- agent/{agent.py => core.py} | 0 agent/crawl/filesystem.py | 2 +- agent/interfaces.py | 89 +++++++++++++++++++--- agent/loops.tac | 5 +- 6 files changed, 232 insertions(+), 29 deletions(-) rename agent/{agent.py => core.py} (100%) diff --git a/agent/README.txt b/agent/README.txt index 15c16fd..6a9b27a 100644 --- a/agent/README.txt +++ b/agent/README.txt @@ -11,35 +11,152 @@ This package does not depend on zope or the other loops packages but represents a standalone application. -Basic Configuration -=================== +Basic Implementation, Agent Core +================================ -Parameter(s): URL of target loops site. +The agent uses Twisted's cooperative multitasking model. -(Future extension: use more than one target loops site) +This means that all calls to services (like crawler, transporter, ...) +return a deferred that must be supplied with a callback method (and in +most cases also an errback method). +Browser-based User Interface +============================ + +The user interface is provided via a browser-based application +based on Twisted and Nevow. + + +Configuration Management +======================== + +Functionality + +- Storage of configuration parameters +- Interface to the browser-based user interface that allows the + editing of configuration parameters + + +Scheduling +========== + +Configuration (per job) + +- schedule, repeating pattern, conditions +- following job(s), e.g. to start a transfer immediately after a crawl + + +Crawling +======== + +General +------- + +Functionality + +- search for new or changed resources according to the search and + filter criteria +- keep a record of resources transferred already in order to avoid + duplicate transfers (?) + +Configuration (per crawl job) + +- predefined metadata + Local File System -================= +----------------- -Configuration -------------- +Configuration (per crawl job) -Parameters: directories to search, schedules. +- directories to search +- filter criteria, e.g. file type -Logging info ------------- +Metadata sources +- path, filename E-Mail-Clients -============== +-------------- + +Configuration (per crawl job) + +- folders to search +- filter criteria (e.g. sender, receiver, subject patterns) + +Metadata sources + +- folder names (path) +- header fields (sender, receiver, subject, ...) + +Special handling + +- HTML vs. plain text content: if a mail contains both HTML and plain + text parts the transfer may be limited to one of these parts (configuration + setting) +- attachments may be ignored (configuration setting; useful when attachments + are copied to the local filesystem and transferred from there anyways) -Software Update +Transport +========= + +Configuration + +- URL of the target loops site, e.g. http://z3.loops.cy55.de/bwp/d5 +- username, password for logging in to loops +- machine name: name under which the client computer is know to the + loops server +- Transfer method, e.g. PUT + +The following information is intended for the default transfer +protocol/method HTTP PUT but probably also pertains to other protocols +like e.g. FTP. + +Format/Information structure +---------------------------- + +- Metadata URL (for storing or accessing metadata sets - optional, see below): + ``$loopsSiteURL/resource_meta/$machine_name/$service/$path.xml`` +- Resource URL (for storing or accessing the real resources): + ``$loopsSiteURL/resource_data/$machine_name/$service/$path`` +- ``$service`` names the crawler service, e.g. "filesystem" or "outlook" +- ``$path`` represents the full path, possibly with drive specification in front + (for filesystem resources on Windows), with special characters URL-escaped + +Note that the URL uniquely identifies the resource on the local computer, +so a resource transferred with the exact location (path and filename) +on the local computer as a resource transferred previously will overwrite +the old version, so that the classification of the resource within loops +won't get lost. (This is of no relevance to emails.) + +Metadata sets are XML files with metadata for the associated resource. +Usually a metadata set has the extension ".xml"; if the extension is ".zip" +the metadata file is a compressed file that will be expanded on the +server. + +Data files may also be compressed in which case there must be a corresponding +entry in the associated metadata set. + + +Logging +======= + +Configuration + +- log format(s) +- log file(s) (or other forms of persistence) + + +Software Loader =============== +Configuration (general) -Fin de partie -============= +- source list: URL(s) of site(s) providing updated or additional packages + +Configuration (per install/update job) + +- command: install, update, remove +- package names -(tearDown) diff --git a/agent/config.py b/agent/config.py index db79be2..b201788 100644 --- a/agent/config.py +++ b/agent/config.py @@ -26,9 +26,25 @@ from zope.interface import implements from loops.agent.interfaces import IConfigurator -class Configurator(object) +class Configurator(object): implements(IConfigurator) def loadConfiguration(self): pass + + def addConfigOption(self, key, value): + setattr(self, key, value) + + def getConfigOption(self, key, value): + return getattr(self, key, None) + + +conf = Configurator() + +# this is just for convenience during the development phase, +# thus we can retrieve the port easily via ``conf.ui.web.port`` +conf.addConfigOption('ui', Configurator()) +conf.ui.addConfigOption('web', Configurator()) +conf.ui.web.addConfigOption('port', 10095) + diff --git a/agent/agent.py b/agent/core.py similarity index 100% rename from agent/agent.py rename to agent/core.py diff --git a/agent/crawl/filesystem.py b/agent/crawl/filesystem.py index f82040b..776634c 100644 --- a/agent/crawl/filesystem.py +++ b/agent/crawl/filesystem.py @@ -22,5 +22,5 @@ Filesystem crawler. $Id$ """ -from loops.agent.interfaces import ICrawler +from loops.agent.interfaces import ICrawlingJob diff --git a/agent/interfaces.py b/agent/interfaces.py index 8b6f23a..d0fdba2 100644 --- a/agent/interfaces.py +++ b/agent/interfaces.py @@ -60,6 +60,8 @@ class IScheduledJob(Interface): startTime = Attribute('Date/time at which the job should be executed.') params = Attribute('Mapping with key/value pairs to be passed to the ' 'execute method call as keyword parameters.') + successors = Attribute('Jobs to execute immediately after this ' + 'one has been finished.') def execute(**kw): """ Execute the job. @@ -83,34 +85,56 @@ class ILogRecord(Interface): """ -class ICrawler(Interface): +class ICrawlingJob(IScheduledJob): """ Collects resources. """ + predefinedMetadata = Attribute('A mapping with metadata to be used ' + 'for all resources found.') + def collect(**criteria): - """ Return a collection of resources that should be transferred - the the server using the selection criteria given. + """ Return a collection of resource/metadata pairs that should be transferred + to the server using the selection criteria given. + """ + + +class IMetadataSet(Interface): + """ Metadata associated with a resource. + """ + + def asXML(): + """ Return an XML string representing the metadata set. + + If this metadata set contains other metadata sets + (nested metadata) this will be converted to XML as well. + """ + + def setData(key, value): + """ Set a metadata element. + + The value may be a string or another metadata set + (nested metadata). """ class ITransporter(Interface): - """ Transfers collected resources to the server. A resource need - not be transferred immediately, resources may be be collected - first and transferred later together, e.g. as a compressed file. + """ Transfers collected resources to the server. """ serverURL = Attribute('URL of the server the resources will be ' 'transferred to. The URL also determines the ' 'transfer protocol, e.g. HTTP or FTP.') method = Attribute('Transport method, e.g. PUT.') + machineName = Attribute('Name under which the local machine is ' + 'known to the server.') + userName = Attribute('User name for logging in to the server.') + password = Attribute('Password for logging in to the server.') - def transfer(resource, resourceType=file): + def transfer(resource, metadata=None, resourceType=file): """ Transfer the resource (typically just a file that may be read) to the server. - """ - def commit(): - """ Transfer all resources not yet transferred. + The resource may be associated with a metadata set. """ @@ -122,7 +146,52 @@ class IConfigurator(Interface): """ Find the configuration settings and load them. """ + def setConfigOption(key, value): + """ Directly set a certain configuration option. + """ + def getConfigOption(key): """ Return the value for the configuration option identified by the key given. + + In addition config options must be directly accessible + via attribute notation. """ + + +class IPackageManager(Interface): + """ Allows to install, update, or remove software packages (plugins, + typically as Python eggs) from a server. + """ + + sources = Attribute('A list of URLs that provide software packages. ') + + def getInstalledPackages(): + """ Return a list of dictionaries, format: + [{'name': name, 'version': version, + 'date': date_time_of_installation,}, ...] + """ + + def getUpdateCandidates(): + """ Return a list of dictionaries with information about updateable + packages. + """ + + def installPackage(name, version=None, source=None): + """ Install a package. + If version is not given try to get the most recent one. + If source is not given search the sources attribute for the + first fit. + """ + + def updatePackage(name, version=None, source=None): + """ Update a package. + If version is not given try to get the most recent one. + If source is not given search the sources attribute for the + first fit. + """ + + def removePackage(name): + """ Remove a package from this agent. + """ + diff --git a/agent/loops.tac b/agent/loops.tac index f1fac4f..084f8ae 100644 --- a/agent/loops.tac +++ b/agent/loops.tac @@ -1,10 +1,11 @@ from twisted.application import internet, service from nevow import appserver -from loops.agent.agent import startAgent +from loops.agent.core import startAgent from loops.agent.ui.web import AgentHome +from loops.agent.config import conf -port = 10095 +port = conf.ui.web.port or 10095 application = service.Application('LoopsAgent')