work in progress: filesystem crawler - added basic metadata handling
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1893 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
81d2092e0a
commit
db31c2d252
6 changed files with 24 additions and 17 deletions
|
@ -49,4 +49,10 @@ class Metadata(object):
|
|||
def __init__(self, data=dict()):
|
||||
self.data = data
|
||||
|
||||
def asXml(self):
|
||||
# TODO...
|
||||
return ''
|
||||
|
||||
def set(self, key, value):
|
||||
self.data['key'] = value
|
||||
|
||||
|
|
|
@ -36,15 +36,14 @@ from loops.agent.crawl.base import Metadata
|
|||
class CrawlingJob(BaseCrawlingJob):
|
||||
|
||||
def collect(self):
|
||||
self.data = []
|
||||
#deferred = reactor.deferToThread(self.crawlFilesystem, dataAvailable)
|
||||
deferred = self.deferred = Deferred()
|
||||
self.internalDeferred = coiterate(self.crawlFilesystem())
|
||||
self.internalDeferred.addCallback(self.finished)
|
||||
return deferred
|
||||
self.collected = []
|
||||
coiterate(self.crawlFilesystem()).addCallback(self.finished)
|
||||
# TODO: addErrback()
|
||||
self.deferred = Deferred()
|
||||
return self.deferred
|
||||
|
||||
def finished(self, result):
|
||||
self.deferred.callback(self.data)
|
||||
self.deferred.callback(self.collected)
|
||||
|
||||
def crawlFilesystem(self):
|
||||
criteria = self.params
|
||||
|
@ -59,8 +58,11 @@ class CrawlingJob(BaseCrawlingJob):
|
|||
mtime = datetime.fromtimestamp(
|
||||
os.stat(filename)[stat.ST_MTIME])
|
||||
# TODO: check modification time
|
||||
self.data.append((FileResource(filename),
|
||||
Metadata(dict())))
|
||||
meta = dict(
|
||||
path=filename,
|
||||
)
|
||||
self.collected.append((FileResource(filename),
|
||||
Metadata(meta)))
|
||||
yield None
|
||||
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ We are now ready to schedule the job and let the reactor execute it.
|
|||
>>> scheduler.schedule(crawlJob, int(time()))
|
||||
|
||||
>>> tester.iterate()
|
||||
Metadata: {'path': '...data...file1.txt'}
|
||||
Transferring: Data from file1.txt
|
||||
Metadata: {'path': '...data...subdir...file2.txt'}
|
||||
Transferring: Data from file2.txt
|
||||
|
||||
|
|
|
@ -124,7 +124,7 @@ class IMetadataSet(Interface):
|
|||
(nested metadata) this will be converted to XML as well.
|
||||
"""
|
||||
|
||||
def setData(key, value):
|
||||
def set(key, value):
|
||||
""" Set a metadata element.
|
||||
|
||||
The value may be a string or another metadata set
|
||||
|
|
|
@ -26,7 +26,7 @@ from twisted.internet import reactor
|
|||
from twisted.internet.defer import Deferred
|
||||
from zope.interface import implements
|
||||
|
||||
from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
|
||||
from loops.agent.interfaces import ICrawlingJob, IResource
|
||||
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
|
||||
|
||||
|
||||
|
@ -39,12 +39,7 @@ class CrawlingJob(BaseCrawlingJob):
|
|||
return deferred
|
||||
|
||||
def dataAvailable(self):
|
||||
self.deferred.callback([(DummyResource(), Metadata())])
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
|
||||
implements(IMetadataSet)
|
||||
self.deferred.callback([(DummyResource(), None)])
|
||||
|
||||
|
||||
class DummyResource(object):
|
||||
|
|
|
@ -55,6 +55,8 @@ class Transporter(BaseTransporter):
|
|||
data.close()
|
||||
else:
|
||||
text = data
|
||||
if metadata is not None:
|
||||
print 'Metadata:', metadata.data
|
||||
print 'Transferring:', text
|
||||
return Deferred()
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue