From 8582252967883b60b233e436f50242c0bc088356 Mon Sep 17 00:00:00 2001 From: helmutm Date: Thu, 8 May 2008 10:12:57 +0000 Subject: [PATCH] work in progress: structuring of resources and metadata git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2580 fd906abe-77d9-0310-91a1-e0d9ade77398 --- agent/base/control.py | 2 ++ agent/base/job.py | 2 -- agent/crawl/base.py | 13 +++++++------ agent/crawl/filesystem.txt | 6 +++++- agent/crawl/mail.py | 15 ++++++++------- agent/interfaces.py | 35 +++++++++++++---------------------- 6 files changed, 35 insertions(+), 38 deletions(-) diff --git a/agent/base/control.py b/agent/base/control.py index e62813b..1e6dc14 100644 --- a/agent/base/control.py +++ b/agent/base/control.py @@ -53,10 +53,12 @@ class Controller(object): class SampleController(Controller): jobNumber = 0 + result = None agents = (('sample01', 'base.sample'),) def notify(self, identifier, state, result=None, message=''): + self.result = result msg = ('Job %s %s; result: %s; %s' % (identifier, state, result, message)) print msg diff --git a/agent/base/job.py b/agent/base/job.py index b3d9981..b722733 100644 --- a/agent/base/job.py +++ b/agent/base/job.py @@ -61,5 +61,3 @@ class Job(object): newJob.successors = [s.copy() for s in self.successors] jobs.register(Job, Scheduler, name='sample') - - diff --git a/agent/crawl/base.py b/agent/crawl/base.py index dedad18..188b481 100644 --- a/agent/crawl/base.py +++ b/agent/crawl/base.py @@ -61,16 +61,17 @@ class Resource(object): implements(IResource) - data = None - path = "" - application = "" - metadata = None + application = 'sample' - def __init__(self, data, path="", application="", metadata=None): + def __init__(self, data=None, file=None, path=None, application=None, + metadata=None): self.data = data + self.file = file self.path = path - self.application = application + if application: + self.application = application self.metadata = metadata + self.subResources = [] class Metadata(dict): diff --git a/agent/crawl/filesystem.txt b/agent/crawl/filesystem.txt index 39a0ba0..2755c99 100644 --- a/agent/crawl/filesystem.txt +++ b/agent/crawl/filesystem.txt @@ -35,4 +35,8 @@ the twisted reactor first. >>> from cybertools.agent.tests import tester >>> tester.iterate() - Job 00001 completed; result: [..., ...]; \ No newline at end of file + Job 00001 completed; result: [..., ...]; + + >>> r0 = controller.result[0] + >>> r0.metadata, r0.data.read() + ({'path': '...file1.txt'}, 'Data from file1.txt') diff --git a/agent/crawl/mail.py b/agent/crawl/mail.py index 77b5008..78a7b5a 100644 --- a/agent/crawl/mail.py +++ b/agent/crawl/mail.py @@ -39,21 +39,21 @@ class MailCrawler(Crawler): def collect(self, filter=None): print 'MailCrawler is collecting.' - # d = self.crawlFolders() - d = succeed([]) + d = self.crawlFolders() return d def fetchCriteria(self): pass def crawlFolders(self): - pass + return succeed([]) def loadMailsFromFolder(self, folder): pass - def createResource(self, mail, path="", application="", metadata=None): - resource = MailResource(mail, path, application, metadata) + def createResource(self, mail, path=None, application=None, metadata=None): + resource = MailResource(mail, path=path, application=application, + metadata=metadata) self.result.append(resource) def login(self): @@ -61,6 +61,7 @@ class MailCrawler(Crawler): class MailResource(Resource): - pass -agents.register(MailCrawler, Master, name='crawl.mail') \ No newline at end of file + application = 'outlook' + +agents.register(MailCrawler, Master, name='crawl.mail') diff --git a/agent/interfaces.py b/agent/interfaces.py index e524de9..e602972 100644 --- a/agent/interfaces.py +++ b/agent/interfaces.py @@ -196,21 +196,6 @@ class IScheduledJob(Interface): """ -class ICrawlingJob(IScheduledJob): - """ A job specifying a crawling task. - """ - - predefinedMetadata = Attribute('A mapping with metadata to be used ' - 'for all resources found.') - - -class ITransportJob(IScheduledJob): - """ A job managing the the transfer of a resource to the server. - """ - - transporter = Attribute('The transporter agent to use for transfer.') - - # information objects class IResource(Interface): @@ -218,17 +203,24 @@ class IResource(Interface): will be transferred to the server. """ - data = Attribute('A string, file, or similar representation of the ' + data = Attribute('A string representation of the ' 'resource\'s content; may be None if the receiver of ' - 'the information can retrieve the date from the path ' - 'given.') - path = Attribute('A filesystem path or some other information ' - 'uniquely identifying the resource on the client ' - 'machine for the current user.') + 'the information can retrieve the data from the file or path ' + 'attribute.') + file = Attribute('A file-like object providing the data via its read() ' + 'method; may be None if the data or path attribute ' + 'is given.') + path = Attribute('A filesystem path for accessing the resource; may be ' + 'None if the data or file attribute is given.') + identifier = Attribute('A string (usually derived from the path) that ' + 'uniquely identifies the resource.') application = Attribute('The name of the application that provided ' 'the resource, e.g. "filesystem" or "mail".') metadata = Attribute('Information describing this resource; ' 'should be an IMetadataSet object.') + subResources = Attribute('A collection of resources that are inherently ' + 'connected to or parts of this resource, e.g. attachments ' + 'of an email. Will be None or empty in most cases.') class IMetadataSet(Interface): @@ -278,4 +270,3 @@ class ILogRecord(Interface): """ Return a string representation suitable for writing to a log file. """ -