work in progress: filesystem crawler - added basic metadata handling
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1893 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
81d2092e0a
commit
db31c2d252
6 changed files with 24 additions and 17 deletions
|
@ -49,4 +49,10 @@ class Metadata(object):
|
||||||
def __init__(self, data=dict()):
|
def __init__(self, data=dict()):
|
||||||
self.data = data
|
self.data = data
|
||||||
|
|
||||||
|
def asXml(self):
|
||||||
|
# TODO...
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def set(self, key, value):
|
||||||
|
self.data['key'] = value
|
||||||
|
|
||||||
|
|
|
@ -36,15 +36,14 @@ from loops.agent.crawl.base import Metadata
|
||||||
class CrawlingJob(BaseCrawlingJob):
|
class CrawlingJob(BaseCrawlingJob):
|
||||||
|
|
||||||
def collect(self):
|
def collect(self):
|
||||||
self.data = []
|
self.collected = []
|
||||||
#deferred = reactor.deferToThread(self.crawlFilesystem, dataAvailable)
|
coiterate(self.crawlFilesystem()).addCallback(self.finished)
|
||||||
deferred = self.deferred = Deferred()
|
# TODO: addErrback()
|
||||||
self.internalDeferred = coiterate(self.crawlFilesystem())
|
self.deferred = Deferred()
|
||||||
self.internalDeferred.addCallback(self.finished)
|
return self.deferred
|
||||||
return deferred
|
|
||||||
|
|
||||||
def finished(self, result):
|
def finished(self, result):
|
||||||
self.deferred.callback(self.data)
|
self.deferred.callback(self.collected)
|
||||||
|
|
||||||
def crawlFilesystem(self):
|
def crawlFilesystem(self):
|
||||||
criteria = self.params
|
criteria = self.params
|
||||||
|
@ -59,8 +58,11 @@ class CrawlingJob(BaseCrawlingJob):
|
||||||
mtime = datetime.fromtimestamp(
|
mtime = datetime.fromtimestamp(
|
||||||
os.stat(filename)[stat.ST_MTIME])
|
os.stat(filename)[stat.ST_MTIME])
|
||||||
# TODO: check modification time
|
# TODO: check modification time
|
||||||
self.data.append((FileResource(filename),
|
meta = dict(
|
||||||
Metadata(dict())))
|
path=filename,
|
||||||
|
)
|
||||||
|
self.collected.append((FileResource(filename),
|
||||||
|
Metadata(meta)))
|
||||||
yield None
|
yield None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,8 @@ We are now ready to schedule the job and let the reactor execute it.
|
||||||
>>> scheduler.schedule(crawlJob, int(time()))
|
>>> scheduler.schedule(crawlJob, int(time()))
|
||||||
|
|
||||||
>>> tester.iterate()
|
>>> tester.iterate()
|
||||||
|
Metadata: {'path': '...data...file1.txt'}
|
||||||
Transferring: Data from file1.txt
|
Transferring: Data from file1.txt
|
||||||
|
Metadata: {'path': '...data...subdir...file2.txt'}
|
||||||
Transferring: Data from file2.txt
|
Transferring: Data from file2.txt
|
||||||
|
|
||||||
|
|
|
@ -124,7 +124,7 @@ class IMetadataSet(Interface):
|
||||||
(nested metadata) this will be converted to XML as well.
|
(nested metadata) this will be converted to XML as well.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def setData(key, value):
|
def set(key, value):
|
||||||
""" Set a metadata element.
|
""" Set a metadata element.
|
||||||
|
|
||||||
The value may be a string or another metadata set
|
The value may be a string or another metadata set
|
||||||
|
|
|
@ -26,7 +26,7 @@ from twisted.internet import reactor
|
||||||
from twisted.internet.defer import Deferred
|
from twisted.internet.defer import Deferred
|
||||||
from zope.interface import implements
|
from zope.interface import implements
|
||||||
|
|
||||||
from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
|
from loops.agent.interfaces import ICrawlingJob, IResource
|
||||||
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
|
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,12 +39,7 @@ class CrawlingJob(BaseCrawlingJob):
|
||||||
return deferred
|
return deferred
|
||||||
|
|
||||||
def dataAvailable(self):
|
def dataAvailable(self):
|
||||||
self.deferred.callback([(DummyResource(), Metadata())])
|
self.deferred.callback([(DummyResource(), None)])
|
||||||
|
|
||||||
|
|
||||||
class Metadata(object):
|
|
||||||
|
|
||||||
implements(IMetadataSet)
|
|
||||||
|
|
||||||
|
|
||||||
class DummyResource(object):
|
class DummyResource(object):
|
||||||
|
|
|
@ -55,6 +55,8 @@ class Transporter(BaseTransporter):
|
||||||
data.close()
|
data.close()
|
||||||
else:
|
else:
|
||||||
text = data
|
text = data
|
||||||
|
if metadata is not None:
|
||||||
|
print 'Metadata:', metadata.data
|
||||||
print 'Transferring:', text
|
print 'Transferring:', text
|
||||||
return Deferred()
|
return Deferred()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue