move metadata into Resource class; more on httpput transport

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1897 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2007-08-09 17:51:17 +00:00
parent 0618ac5b73
commit f2e5ba51b9
9 changed files with 71 additions and 46 deletions

View file

@ -42,17 +42,18 @@ class CrawlingJob(Job):
return self.collect() return self.collect()
class Metadata(object): class Metadata(dict):
implements(IMetadataSet) implements(IMetadataSet)
def __init__(self, data=dict()): def __init__(self, data=dict()):
self.data = data for k in data:
self[k] = data[k]
def asXml(self): def asXml(self):
# TODO... # TODO...
return '' return ''
def set(self, key, value): def set(self, key, value):
self.data['key'] = value self['key'] = value

View file

@ -22,7 +22,8 @@ Filesystem crawler.
$Id$ $Id$
""" """
import os, re, stat import os
from fnmatch import filter
from datetime import datetime from datetime import datetime
from twisted.internet.defer import Deferred from twisted.internet.defer import Deferred
from twisted.internet.task import coiterate from twisted.internet.task import coiterate
@ -46,32 +47,31 @@ class CrawlingJob(BaseCrawlingJob):
self.deferred.callback(self.collected) self.deferred.callback(self.collected)
def crawlFilesystem(self): def crawlFilesystem(self):
criteria = self.params directory = self.params.get('directory')
directory = criteria.get('directory') pattern = self.params.get('pattern') or '*'
pattern = re.compile(criteria.get('pattern') or '.*') lastRun = self.params.get('lastrun') or datetime(1980, 1, 1)
for path, dirs, files in os.walk(directory): for path, dirs, files in os.walk(directory):
if '.svn' in dirs: if '.svn' in dirs:
del dirs[dirs.index('.svn')] del dirs[dirs.index('.svn')]
for f in files: for f in filter(files, pattern):
if pattern.match(f): filename = os.path.join(path, f)
filename = os.path.join(path, f) mtime = datetime.fromtimestamp(os.path.getmtime(filename))
mtime = datetime.fromtimestamp( if mtime <= lastRun: # file not changed
os.stat(filename)[stat.ST_MTIME]) continue
# TODO: check modification time meta = dict(
meta = dict( path=filename,
path=filename, )
) self.collected.append(FileResource(filename, Metadata(meta)))
self.collected.append((FileResource(filename), yield None
Metadata(meta)))
yield None
class FileResource(object): class FileResource(object):
implements(IResource) implements(IResource)
def __init__(self, path): def __init__(self, path, metadata=None):
self.path = path self.path = path
self.metadata = metadata
@property @property
def data(self): def data(self):

View file

@ -112,9 +112,12 @@ class IResource(Interface):
data = Attribute("A string, file, or similar representation of the " data = Attribute("A string, file, or similar representation of the "
"resource's content") "resource's content")
metadata = Attribute('Information describing this resource; '
'should be an IMetadataSet object.')
class IMetadataSet(Interface): class IMetadataSet(Interface):
""" Metadata associated with a resource. """ Metadata associated with a resource; sort of a mapping.
""" """
def asXML(): def asXML():
@ -145,11 +148,9 @@ class ITransporter(Interface):
userName = Attribute('User name for logging in to the server.') userName = Attribute('User name for logging in to the server.')
password = Attribute('Password for logging in to the server.') password = Attribute('Password for logging in to the server.')
def transfer(resource, metadata=None): def transfer(resource):
""" Transfer the resource (typically just a file that may """ Transfer the resource (an object providing IResource)
be read) to the server. to the server.
The resource may be associated with a metadata set.
""" """

View file

@ -39,7 +39,7 @@ class CrawlingJob(BaseCrawlingJob):
return deferred return deferred
def dataAvailable(self): def dataAvailable(self):
self.deferred.callback([(DummyResource(), None)]) self.deferred.callback([DummyResource()])
class DummyResource(object): class DummyResource(object):
@ -47,3 +47,4 @@ class DummyResource(object):
implements(IResource) implements(IResource)
data = 'Dummy resource data for testing purposes.' data = 'Dummy resource data for testing purposes.'
metadata = None

View file

@ -40,8 +40,8 @@ class TransportJob(BaseJob):
if result is None: if result is None:
print 'No data available.' print 'No data available.'
else: else:
for res, meta in result: for resource in result:
d = self.transporter.transfer(res.data, meta) d = self.transporter.transfer(resource)
return Deferred() return Deferred()
@ -49,14 +49,16 @@ class Transporter(BaseTransporter):
jobFactory = TransportJob jobFactory = TransportJob
def transfer(self, data, metadata=None): def transfer(self, resource):
data = resource.data
if type(data) is file: if type(data) is file:
text = data.read() text = data.read()
data.close() data.close()
else: else:
text = data text = data
metadata = resource.metadata
if metadata is not None: if metadata is not None:
print 'Metadata:', metadata.data print 'Metadata:', metadata
print 'Transferring:', text print 'Transferring:', text
return Deferred() return Deferred()

View file

@ -62,6 +62,7 @@ def test_suite():
#standard_unittest.makeSuite(Test), #standard_unittest.makeSuite(Test),
doctest.DocFileSuite('README.txt', optionflags=flags), doctest.DocFileSuite('README.txt', optionflags=flags),
doctest.DocFileSuite('crawl/filesystem.txt', optionflags=flags), doctest.DocFileSuite('crawl/filesystem.txt', optionflags=flags),
doctest.DocFileSuite('transport/httpput.txt', optionflags=flags),
)) ))
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -48,9 +48,10 @@ class Transporter(object):
def __init__(self, agent): def __init__(self, agent):
self.agent = agent self.agent = agent
def transfer(self, data, metadata=None): def transfer(self, resource):
data = resource.data
if type(data) is file: if type(data) is file:
data = text.read() text = data.read()
data.close() data.close()
else: else:
text = data text = data

View file

@ -23,7 +23,7 @@ $Id$
""" """
from twisted.internet import reactor from twisted.internet import reactor
from twisted.internet.defer import Deferred from twisted.web.client import getPage
from zope.interface import implements from zope.interface import implements
from loops.agent.interfaces import ITransporter, ITransportJob from loops.agent.interfaces import ITransporter, ITransportJob
@ -38,13 +38,13 @@ class TransportJob(Job):
super(TransportJob, self).__init__() super(TransportJob, self).__init__()
self.transporter = transporter self.transporter = transporter
def execute(self, **kw): def execute(self):
result = kw.get('result') result = kw.get('result')
if result is None: if result is None:
print 'No data available.' print 'No data available.'
else: else:
for r in result: for resource, metadata in result:
d = self.transporter.transfer(r[0].data, r[1], str) d = self.transporter.transfer(resource.data, metadata)
return Deferred() return Deferred()
@ -62,15 +62,18 @@ class Transporter(object):
def __init__(self, agent): def __init__(self, agent):
self.agent = agent self.agent = agent
config = agent.config conf = agent.config
# TODO: get settings from conf
def transfer(self, resource, metadata=None, resourceType=file): def transfer(self, resource):
if resourceType is file: data = resource.data
data = resource.read() if type(data) is file:
text = resource.read()
resource.close() resource.close()
elif resourceType is str: else:
data = resource text = data
print 'Transferring:', data metadata = resource.metadata
return Deferred() url = self.serverURL + self.makePath(metadata)
d = getPage(url, method='PUT', postData=text)
return d

View file

@ -0,0 +1,15 @@
======================================================
loops.agent.transport.httpput - The HTTP PUT Transport
======================================================
($Id$)
>>> from time import time
>>> from loops.agent.core import Agent
>>> from loops.agent.transport.httpput import Transporter, TransportJob
>>> agent = Agent()
>>> transporter = Transporter(agent)
>>> job = TransportJob(transporter)