diff --git a/agent/crawl/base.py b/agent/crawl/base.py index baa7944..0902484 100644 --- a/agent/crawl/base.py +++ b/agent/crawl/base.py @@ -49,4 +49,10 @@ class Metadata(object): def __init__(self, data=dict()): self.data = data + def asXml(self): + # TODO... + return '' + + def set(self, key, value): + self.data['key'] = value diff --git a/agent/crawl/filesystem.py b/agent/crawl/filesystem.py index 0b4b6f1..c1dcab6 100644 --- a/agent/crawl/filesystem.py +++ b/agent/crawl/filesystem.py @@ -36,15 +36,14 @@ from loops.agent.crawl.base import Metadata class CrawlingJob(BaseCrawlingJob): def collect(self): - self.data = [] - #deferred = reactor.deferToThread(self.crawlFilesystem, dataAvailable) - deferred = self.deferred = Deferred() - self.internalDeferred = coiterate(self.crawlFilesystem()) - self.internalDeferred.addCallback(self.finished) - return deferred + self.collected = [] + coiterate(self.crawlFilesystem()).addCallback(self.finished) + # TODO: addErrback() + self.deferred = Deferred() + return self.deferred def finished(self, result): - self.deferred.callback(self.data) + self.deferred.callback(self.collected) def crawlFilesystem(self): criteria = self.params @@ -59,8 +58,11 @@ class CrawlingJob(BaseCrawlingJob): mtime = datetime.fromtimestamp( os.stat(filename)[stat.ST_MTIME]) # TODO: check modification time - self.data.append((FileResource(filename), - Metadata(dict()))) + meta = dict( + path=filename, + ) + self.collected.append((FileResource(filename), + Metadata(meta))) yield None diff --git a/agent/crawl/filesystem.txt b/agent/crawl/filesystem.txt index e569a03..26a63b9 100644 --- a/agent/crawl/filesystem.txt +++ b/agent/crawl/filesystem.txt @@ -35,6 +35,8 @@ We are now ready to schedule the job and let the reactor execute it. >>> scheduler.schedule(crawlJob, int(time())) >>> tester.iterate() + Metadata: {'path': '...data...file1.txt'} Transferring: Data from file1.txt + Metadata: {'path': '...data...subdir...file2.txt'} Transferring: Data from file2.txt diff --git a/agent/interfaces.py b/agent/interfaces.py index 7c30acf..44798bf 100644 --- a/agent/interfaces.py +++ b/agent/interfaces.py @@ -124,7 +124,7 @@ class IMetadataSet(Interface): (nested metadata) this will be converted to XML as well. """ - def setData(key, value): + def set(key, value): """ Set a metadata element. The value may be a string or another metadata set diff --git a/agent/testing/crawl.py b/agent/testing/crawl.py index 110f3ef..52929a8 100644 --- a/agent/testing/crawl.py +++ b/agent/testing/crawl.py @@ -26,7 +26,7 @@ from twisted.internet import reactor from twisted.internet.defer import Deferred from zope.interface import implements -from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet +from loops.agent.interfaces import ICrawlingJob, IResource from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob @@ -39,12 +39,7 @@ class CrawlingJob(BaseCrawlingJob): return deferred def dataAvailable(self): - self.deferred.callback([(DummyResource(), Metadata())]) - - -class Metadata(object): - - implements(IMetadataSet) + self.deferred.callback([(DummyResource(), None)]) class DummyResource(object): diff --git a/agent/testing/transport.py b/agent/testing/transport.py index ae1dad4..d755546 100644 --- a/agent/testing/transport.py +++ b/agent/testing/transport.py @@ -55,6 +55,8 @@ class Transporter(BaseTransporter): data.close() else: text = data + if metadata is not None: + print 'Metadata:', metadata.data print 'Transferring:', text return Deferred()