move metadata into Resource class; more on httpput transport
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1897 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
0618ac5b73
commit
f2e5ba51b9
9 changed files with 71 additions and 46 deletions
|
@ -42,17 +42,18 @@ class CrawlingJob(Job):
|
|||
return self.collect()
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
class Metadata(dict):
|
||||
|
||||
implements(IMetadataSet)
|
||||
|
||||
def __init__(self, data=dict()):
|
||||
self.data = data
|
||||
for k in data:
|
||||
self[k] = data[k]
|
||||
|
||||
def asXml(self):
|
||||
# TODO...
|
||||
return ''
|
||||
|
||||
def set(self, key, value):
|
||||
self.data['key'] = value
|
||||
self['key'] = value
|
||||
|
||||
|
|
|
@ -22,7 +22,8 @@ Filesystem crawler.
|
|||
$Id$
|
||||
"""
|
||||
|
||||
import os, re, stat
|
||||
import os
|
||||
from fnmatch import filter
|
||||
from datetime import datetime
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.task import coiterate
|
||||
|
@ -46,32 +47,31 @@ class CrawlingJob(BaseCrawlingJob):
|
|||
self.deferred.callback(self.collected)
|
||||
|
||||
def crawlFilesystem(self):
|
||||
criteria = self.params
|
||||
directory = criteria.get('directory')
|
||||
pattern = re.compile(criteria.get('pattern') or '.*')
|
||||
directory = self.params.get('directory')
|
||||
pattern = self.params.get('pattern') or '*'
|
||||
lastRun = self.params.get('lastrun') or datetime(1980, 1, 1)
|
||||
for path, dirs, files in os.walk(directory):
|
||||
if '.svn' in dirs:
|
||||
del dirs[dirs.index('.svn')]
|
||||
for f in files:
|
||||
if pattern.match(f):
|
||||
filename = os.path.join(path, f)
|
||||
mtime = datetime.fromtimestamp(
|
||||
os.stat(filename)[stat.ST_MTIME])
|
||||
# TODO: check modification time
|
||||
meta = dict(
|
||||
path=filename,
|
||||
)
|
||||
self.collected.append((FileResource(filename),
|
||||
Metadata(meta)))
|
||||
yield None
|
||||
for f in filter(files, pattern):
|
||||
filename = os.path.join(path, f)
|
||||
mtime = datetime.fromtimestamp(os.path.getmtime(filename))
|
||||
if mtime <= lastRun: # file not changed
|
||||
continue
|
||||
meta = dict(
|
||||
path=filename,
|
||||
)
|
||||
self.collected.append(FileResource(filename, Metadata(meta)))
|
||||
yield None
|
||||
|
||||
|
||||
class FileResource(object):
|
||||
|
||||
implements(IResource)
|
||||
|
||||
def __init__(self, path):
|
||||
def __init__(self, path, metadata=None):
|
||||
self.path = path
|
||||
self.metadata = metadata
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
|
|
|
@ -112,9 +112,12 @@ class IResource(Interface):
|
|||
data = Attribute("A string, file, or similar representation of the "
|
||||
"resource's content")
|
||||
|
||||
metadata = Attribute('Information describing this resource; '
|
||||
'should be an IMetadataSet object.')
|
||||
|
||||
|
||||
class IMetadataSet(Interface):
|
||||
""" Metadata associated with a resource.
|
||||
""" Metadata associated with a resource; sort of a mapping.
|
||||
"""
|
||||
|
||||
def asXML():
|
||||
|
@ -145,11 +148,9 @@ class ITransporter(Interface):
|
|||
userName = Attribute('User name for logging in to the server.')
|
||||
password = Attribute('Password for logging in to the server.')
|
||||
|
||||
def transfer(resource, metadata=None):
|
||||
""" Transfer the resource (typically just a file that may
|
||||
be read) to the server.
|
||||
|
||||
The resource may be associated with a metadata set.
|
||||
def transfer(resource):
|
||||
""" Transfer the resource (an object providing IResource)
|
||||
to the server.
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ class CrawlingJob(BaseCrawlingJob):
|
|||
return deferred
|
||||
|
||||
def dataAvailable(self):
|
||||
self.deferred.callback([(DummyResource(), None)])
|
||||
self.deferred.callback([DummyResource()])
|
||||
|
||||
|
||||
class DummyResource(object):
|
||||
|
@ -47,3 +47,4 @@ class DummyResource(object):
|
|||
implements(IResource)
|
||||
|
||||
data = 'Dummy resource data for testing purposes.'
|
||||
metadata = None
|
||||
|
|
|
@ -40,8 +40,8 @@ class TransportJob(BaseJob):
|
|||
if result is None:
|
||||
print 'No data available.'
|
||||
else:
|
||||
for res, meta in result:
|
||||
d = self.transporter.transfer(res.data, meta)
|
||||
for resource in result:
|
||||
d = self.transporter.transfer(resource)
|
||||
return Deferred()
|
||||
|
||||
|
||||
|
@ -49,14 +49,16 @@ class Transporter(BaseTransporter):
|
|||
|
||||
jobFactory = TransportJob
|
||||
|
||||
def transfer(self, data, metadata=None):
|
||||
def transfer(self, resource):
|
||||
data = resource.data
|
||||
if type(data) is file:
|
||||
text = data.read()
|
||||
data.close()
|
||||
else:
|
||||
text = data
|
||||
metadata = resource.metadata
|
||||
if metadata is not None:
|
||||
print 'Metadata:', metadata.data
|
||||
print 'Metadata:', metadata
|
||||
print 'Transferring:', text
|
||||
return Deferred()
|
||||
|
||||
|
|
|
@ -62,6 +62,7 @@ def test_suite():
|
|||
#standard_unittest.makeSuite(Test),
|
||||
doctest.DocFileSuite('README.txt', optionflags=flags),
|
||||
doctest.DocFileSuite('crawl/filesystem.txt', optionflags=flags),
|
||||
doctest.DocFileSuite('transport/httpput.txt', optionflags=flags),
|
||||
))
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -48,9 +48,10 @@ class Transporter(object):
|
|||
def __init__(self, agent):
|
||||
self.agent = agent
|
||||
|
||||
def transfer(self, data, metadata=None):
|
||||
def transfer(self, resource):
|
||||
data = resource.data
|
||||
if type(data) is file:
|
||||
data = text.read()
|
||||
text = data.read()
|
||||
data.close()
|
||||
else:
|
||||
text = data
|
||||
|
|
|
@ -23,7 +23,7 @@ $Id$
|
|||
"""
|
||||
|
||||
from twisted.internet import reactor
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.web.client import getPage
|
||||
from zope.interface import implements
|
||||
|
||||
from loops.agent.interfaces import ITransporter, ITransportJob
|
||||
|
@ -38,13 +38,13 @@ class TransportJob(Job):
|
|||
super(TransportJob, self).__init__()
|
||||
self.transporter = transporter
|
||||
|
||||
def execute(self, **kw):
|
||||
def execute(self):
|
||||
result = kw.get('result')
|
||||
if result is None:
|
||||
print 'No data available.'
|
||||
else:
|
||||
for r in result:
|
||||
d = self.transporter.transfer(r[0].data, r[1], str)
|
||||
for resource, metadata in result:
|
||||
d = self.transporter.transfer(resource.data, metadata)
|
||||
return Deferred()
|
||||
|
||||
|
||||
|
@ -62,15 +62,18 @@ class Transporter(object):
|
|||
|
||||
def __init__(self, agent):
|
||||
self.agent = agent
|
||||
config = agent.config
|
||||
conf = agent.config
|
||||
# TODO: get settings from conf
|
||||
|
||||
def transfer(self, resource, metadata=None, resourceType=file):
|
||||
if resourceType is file:
|
||||
data = resource.read()
|
||||
def transfer(self, resource):
|
||||
data = resource.data
|
||||
if type(data) is file:
|
||||
text = resource.read()
|
||||
resource.close()
|
||||
elif resourceType is str:
|
||||
data = resource
|
||||
print 'Transferring:', data
|
||||
return Deferred()
|
||||
|
||||
else:
|
||||
text = data
|
||||
metadata = resource.metadata
|
||||
url = self.serverURL + self.makePath(metadata)
|
||||
d = getPage(url, method='PUT', postData=text)
|
||||
return d
|
||||
|
||||
|
|
15
agent/transport/httpput.txt
Normal file
15
agent/transport/httpput.txt
Normal file
|
@ -0,0 +1,15 @@
|
|||
======================================================
|
||||
loops.agent.transport.httpput - The HTTP PUT Transport
|
||||
======================================================
|
||||
|
||||
($Id$)
|
||||
|
||||
>>> from time import time
|
||||
|
||||
>>> from loops.agent.core import Agent
|
||||
>>> from loops.agent.transport.httpput import Transporter, TransportJob
|
||||
|
||||
>>> agent = Agent()
|
||||
>>> transporter = Transporter(agent)
|
||||
>>> job = TransportJob(transporter)
|
||||
|
Loading…
Add table
Reference in a new issue