work in progress: filesystem crawler - added basic metadata handling

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1893 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-08-09 14:55:07 +00:00
parent 81d2092e0a
commit db31c2d252
6 changed files with 24 additions and 17 deletions

View file

@ -49,4 +49,10 @@ class Metadata(object):
def __init__(self, data=dict()): def __init__(self, data=dict()):
self.data = data self.data = data
def asXml(self):
# TODO...
return ''
def set(self, key, value):
self.data['key'] = value

View file

@ -36,15 +36,14 @@ from loops.agent.crawl.base import Metadata
class CrawlingJob(BaseCrawlingJob): class CrawlingJob(BaseCrawlingJob):
def collect(self): def collect(self):
self.data = [] self.collected = []
#deferred = reactor.deferToThread(self.crawlFilesystem, dataAvailable) coiterate(self.crawlFilesystem()).addCallback(self.finished)
deferred = self.deferred = Deferred() # TODO: addErrback()
self.internalDeferred = coiterate(self.crawlFilesystem()) self.deferred = Deferred()
self.internalDeferred.addCallback(self.finished) return self.deferred
return deferred
def finished(self, result): def finished(self, result):
self.deferred.callback(self.data) self.deferred.callback(self.collected)
def crawlFilesystem(self): def crawlFilesystem(self):
criteria = self.params criteria = self.params
@ -59,8 +58,11 @@ class CrawlingJob(BaseCrawlingJob):
mtime = datetime.fromtimestamp( mtime = datetime.fromtimestamp(
os.stat(filename)[stat.ST_MTIME]) os.stat(filename)[stat.ST_MTIME])
# TODO: check modification time # TODO: check modification time
self.data.append((FileResource(filename), meta = dict(
Metadata(dict()))) path=filename,
)
self.collected.append((FileResource(filename),
Metadata(meta)))
yield None yield None

View file

@ -35,6 +35,8 @@ We are now ready to schedule the job and let the reactor execute it.
>>> scheduler.schedule(crawlJob, int(time())) >>> scheduler.schedule(crawlJob, int(time()))
>>> tester.iterate() >>> tester.iterate()
Metadata: {'path': '...data...file1.txt'}
Transferring: Data from file1.txt Transferring: Data from file1.txt
Metadata: {'path': '...data...subdir...file2.txt'}
Transferring: Data from file2.txt Transferring: Data from file2.txt

View file

@ -124,7 +124,7 @@ class IMetadataSet(Interface):
(nested metadata) this will be converted to XML as well. (nested metadata) this will be converted to XML as well.
""" """
def setData(key, value): def set(key, value):
""" Set a metadata element. """ Set a metadata element.
The value may be a string or another metadata set The value may be a string or another metadata set

View file

@ -26,7 +26,7 @@ from twisted.internet import reactor
from twisted.internet.defer import Deferred from twisted.internet.defer import Deferred
from zope.interface import implements from zope.interface import implements
from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet from loops.agent.interfaces import ICrawlingJob, IResource
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
@ -39,12 +39,7 @@ class CrawlingJob(BaseCrawlingJob):
return deferred return deferred
def dataAvailable(self): def dataAvailable(self):
self.deferred.callback([(DummyResource(), Metadata())]) self.deferred.callback([(DummyResource(), None)])
class Metadata(object):
implements(IMetadataSet)
class DummyResource(object): class DummyResource(object):

View file

@ -55,6 +55,8 @@ class Transporter(BaseTransporter):
data.close() data.close()
else: else:
text = data text = data
if metadata is not None:
print 'Metadata:', metadata.data
print 'Transferring:', text print 'Transferring:', text
return Deferred() return Deferred()