work in progress: filesystem crawler - added basic metadata handling

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1893 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-08-09 14:55:07 +00:00
parent 81d2092e0a
commit db31c2d252
6 changed files with 24 additions and 17 deletions

View file

@ -49,4 +49,10 @@ class Metadata(object):
def __init__(self, data=dict()):
self.data = data
def asXml(self):
# TODO...
return ''
def set(self, key, value):
self.data['key'] = value

View file

@ -36,15 +36,14 @@ from loops.agent.crawl.base import Metadata
class CrawlingJob(BaseCrawlingJob):
def collect(self):
self.data = []
#deferred = reactor.deferToThread(self.crawlFilesystem, dataAvailable)
deferred = self.deferred = Deferred()
self.internalDeferred = coiterate(self.crawlFilesystem())
self.internalDeferred.addCallback(self.finished)
return deferred
self.collected = []
coiterate(self.crawlFilesystem()).addCallback(self.finished)
# TODO: addErrback()
self.deferred = Deferred()
return self.deferred
def finished(self, result):
self.deferred.callback(self.data)
self.deferred.callback(self.collected)
def crawlFilesystem(self):
criteria = self.params
@ -59,8 +58,11 @@ class CrawlingJob(BaseCrawlingJob):
mtime = datetime.fromtimestamp(
os.stat(filename)[stat.ST_MTIME])
# TODO: check modification time
self.data.append((FileResource(filename),
Metadata(dict())))
meta = dict(
path=filename,
)
self.collected.append((FileResource(filename),
Metadata(meta)))
yield None

View file

@ -35,6 +35,8 @@ We are now ready to schedule the job and let the reactor execute it.
>>> scheduler.schedule(crawlJob, int(time()))
>>> tester.iterate()
Metadata: {'path': '...data...file1.txt'}
Transferring: Data from file1.txt
Metadata: {'path': '...data...subdir...file2.txt'}
Transferring: Data from file2.txt

View file

@ -124,7 +124,7 @@ class IMetadataSet(Interface):
(nested metadata) this will be converted to XML as well.
"""
def setData(key, value):
def set(key, value):
""" Set a metadata element.
The value may be a string or another metadata set

View file

@ -26,7 +26,7 @@ from twisted.internet import reactor
from twisted.internet.defer import Deferred
from zope.interface import implements
from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
from loops.agent.interfaces import ICrawlingJob, IResource
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
@ -39,12 +39,7 @@ class CrawlingJob(BaseCrawlingJob):
return deferred
def dataAvailable(self):
self.deferred.callback([(DummyResource(), Metadata())])
class Metadata(object):
implements(IMetadataSet)
self.deferred.callback([(DummyResource(), None)])
class DummyResource(object):

View file

@ -55,6 +55,8 @@ class Transporter(BaseTransporter):
data.close()
else:
text = data
if metadata is not None:
print 'Metadata:', metadata.data
print 'Transferring:', text
return Deferred()