move metadata into Resource class; more on httpput transport
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1897 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
0618ac5b73
commit
f2e5ba51b9
9 changed files with 71 additions and 46 deletions
|
@ -42,17 +42,18 @@ class CrawlingJob(Job):
|
||||||
return self.collect()
|
return self.collect()
|
||||||
|
|
||||||
|
|
||||||
class Metadata(object):
|
class Metadata(dict):
|
||||||
|
|
||||||
implements(IMetadataSet)
|
implements(IMetadataSet)
|
||||||
|
|
||||||
def __init__(self, data=dict()):
|
def __init__(self, data=dict()):
|
||||||
self.data = data
|
for k in data:
|
||||||
|
self[k] = data[k]
|
||||||
|
|
||||||
def asXml(self):
|
def asXml(self):
|
||||||
# TODO...
|
# TODO...
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def set(self, key, value):
|
def set(self, key, value):
|
||||||
self.data['key'] = value
|
self['key'] = value
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,8 @@ Filesystem crawler.
|
||||||
$Id$
|
$Id$
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os, re, stat
|
import os
|
||||||
|
from fnmatch import filter
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from twisted.internet.defer import Deferred
|
from twisted.internet.defer import Deferred
|
||||||
from twisted.internet.task import coiterate
|
from twisted.internet.task import coiterate
|
||||||
|
@ -46,23 +47,21 @@ class CrawlingJob(BaseCrawlingJob):
|
||||||
self.deferred.callback(self.collected)
|
self.deferred.callback(self.collected)
|
||||||
|
|
||||||
def crawlFilesystem(self):
|
def crawlFilesystem(self):
|
||||||
criteria = self.params
|
directory = self.params.get('directory')
|
||||||
directory = criteria.get('directory')
|
pattern = self.params.get('pattern') or '*'
|
||||||
pattern = re.compile(criteria.get('pattern') or '.*')
|
lastRun = self.params.get('lastrun') or datetime(1980, 1, 1)
|
||||||
for path, dirs, files in os.walk(directory):
|
for path, dirs, files in os.walk(directory):
|
||||||
if '.svn' in dirs:
|
if '.svn' in dirs:
|
||||||
del dirs[dirs.index('.svn')]
|
del dirs[dirs.index('.svn')]
|
||||||
for f in files:
|
for f in filter(files, pattern):
|
||||||
if pattern.match(f):
|
|
||||||
filename = os.path.join(path, f)
|
filename = os.path.join(path, f)
|
||||||
mtime = datetime.fromtimestamp(
|
mtime = datetime.fromtimestamp(os.path.getmtime(filename))
|
||||||
os.stat(filename)[stat.ST_MTIME])
|
if mtime <= lastRun: # file not changed
|
||||||
# TODO: check modification time
|
continue
|
||||||
meta = dict(
|
meta = dict(
|
||||||
path=filename,
|
path=filename,
|
||||||
)
|
)
|
||||||
self.collected.append((FileResource(filename),
|
self.collected.append(FileResource(filename, Metadata(meta)))
|
||||||
Metadata(meta)))
|
|
||||||
yield None
|
yield None
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,8 +69,9 @@ class FileResource(object):
|
||||||
|
|
||||||
implements(IResource)
|
implements(IResource)
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path, metadata=None):
|
||||||
self.path = path
|
self.path = path
|
||||||
|
self.metadata = metadata
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def data(self):
|
def data(self):
|
||||||
|
|
|
@ -112,9 +112,12 @@ class IResource(Interface):
|
||||||
data = Attribute("A string, file, or similar representation of the "
|
data = Attribute("A string, file, or similar representation of the "
|
||||||
"resource's content")
|
"resource's content")
|
||||||
|
|
||||||
|
metadata = Attribute('Information describing this resource; '
|
||||||
|
'should be an IMetadataSet object.')
|
||||||
|
|
||||||
|
|
||||||
class IMetadataSet(Interface):
|
class IMetadataSet(Interface):
|
||||||
""" Metadata associated with a resource.
|
""" Metadata associated with a resource; sort of a mapping.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def asXML():
|
def asXML():
|
||||||
|
@ -145,11 +148,9 @@ class ITransporter(Interface):
|
||||||
userName = Attribute('User name for logging in to the server.')
|
userName = Attribute('User name for logging in to the server.')
|
||||||
password = Attribute('Password for logging in to the server.')
|
password = Attribute('Password for logging in to the server.')
|
||||||
|
|
||||||
def transfer(resource, metadata=None):
|
def transfer(resource):
|
||||||
""" Transfer the resource (typically just a file that may
|
""" Transfer the resource (an object providing IResource)
|
||||||
be read) to the server.
|
to the server.
|
||||||
|
|
||||||
The resource may be associated with a metadata set.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ class CrawlingJob(BaseCrawlingJob):
|
||||||
return deferred
|
return deferred
|
||||||
|
|
||||||
def dataAvailable(self):
|
def dataAvailable(self):
|
||||||
self.deferred.callback([(DummyResource(), None)])
|
self.deferred.callback([DummyResource()])
|
||||||
|
|
||||||
|
|
||||||
class DummyResource(object):
|
class DummyResource(object):
|
||||||
|
@ -47,3 +47,4 @@ class DummyResource(object):
|
||||||
implements(IResource)
|
implements(IResource)
|
||||||
|
|
||||||
data = 'Dummy resource data for testing purposes.'
|
data = 'Dummy resource data for testing purposes.'
|
||||||
|
metadata = None
|
||||||
|
|
|
@ -40,8 +40,8 @@ class TransportJob(BaseJob):
|
||||||
if result is None:
|
if result is None:
|
||||||
print 'No data available.'
|
print 'No data available.'
|
||||||
else:
|
else:
|
||||||
for res, meta in result:
|
for resource in result:
|
||||||
d = self.transporter.transfer(res.data, meta)
|
d = self.transporter.transfer(resource)
|
||||||
return Deferred()
|
return Deferred()
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,14 +49,16 @@ class Transporter(BaseTransporter):
|
||||||
|
|
||||||
jobFactory = TransportJob
|
jobFactory = TransportJob
|
||||||
|
|
||||||
def transfer(self, data, metadata=None):
|
def transfer(self, resource):
|
||||||
|
data = resource.data
|
||||||
if type(data) is file:
|
if type(data) is file:
|
||||||
text = data.read()
|
text = data.read()
|
||||||
data.close()
|
data.close()
|
||||||
else:
|
else:
|
||||||
text = data
|
text = data
|
||||||
|
metadata = resource.metadata
|
||||||
if metadata is not None:
|
if metadata is not None:
|
||||||
print 'Metadata:', metadata.data
|
print 'Metadata:', metadata
|
||||||
print 'Transferring:', text
|
print 'Transferring:', text
|
||||||
return Deferred()
|
return Deferred()
|
||||||
|
|
||||||
|
|
|
@ -62,6 +62,7 @@ def test_suite():
|
||||||
#standard_unittest.makeSuite(Test),
|
#standard_unittest.makeSuite(Test),
|
||||||
doctest.DocFileSuite('README.txt', optionflags=flags),
|
doctest.DocFileSuite('README.txt', optionflags=flags),
|
||||||
doctest.DocFileSuite('crawl/filesystem.txt', optionflags=flags),
|
doctest.DocFileSuite('crawl/filesystem.txt', optionflags=flags),
|
||||||
|
doctest.DocFileSuite('transport/httpput.txt', optionflags=flags),
|
||||||
))
|
))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -48,9 +48,10 @@ class Transporter(object):
|
||||||
def __init__(self, agent):
|
def __init__(self, agent):
|
||||||
self.agent = agent
|
self.agent = agent
|
||||||
|
|
||||||
def transfer(self, data, metadata=None):
|
def transfer(self, resource):
|
||||||
|
data = resource.data
|
||||||
if type(data) is file:
|
if type(data) is file:
|
||||||
data = text.read()
|
text = data.read()
|
||||||
data.close()
|
data.close()
|
||||||
else:
|
else:
|
||||||
text = data
|
text = data
|
||||||
|
|
|
@ -23,7 +23,7 @@ $Id$
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from twisted.internet import reactor
|
from twisted.internet import reactor
|
||||||
from twisted.internet.defer import Deferred
|
from twisted.web.client import getPage
|
||||||
from zope.interface import implements
|
from zope.interface import implements
|
||||||
|
|
||||||
from loops.agent.interfaces import ITransporter, ITransportJob
|
from loops.agent.interfaces import ITransporter, ITransportJob
|
||||||
|
@ -38,13 +38,13 @@ class TransportJob(Job):
|
||||||
super(TransportJob, self).__init__()
|
super(TransportJob, self).__init__()
|
||||||
self.transporter = transporter
|
self.transporter = transporter
|
||||||
|
|
||||||
def execute(self, **kw):
|
def execute(self):
|
||||||
result = kw.get('result')
|
result = kw.get('result')
|
||||||
if result is None:
|
if result is None:
|
||||||
print 'No data available.'
|
print 'No data available.'
|
||||||
else:
|
else:
|
||||||
for r in result:
|
for resource, metadata in result:
|
||||||
d = self.transporter.transfer(r[0].data, r[1], str)
|
d = self.transporter.transfer(resource.data, metadata)
|
||||||
return Deferred()
|
return Deferred()
|
||||||
|
|
||||||
|
|
||||||
|
@ -62,15 +62,18 @@ class Transporter(object):
|
||||||
|
|
||||||
def __init__(self, agent):
|
def __init__(self, agent):
|
||||||
self.agent = agent
|
self.agent = agent
|
||||||
config = agent.config
|
conf = agent.config
|
||||||
|
# TODO: get settings from conf
|
||||||
|
|
||||||
def transfer(self, resource, metadata=None, resourceType=file):
|
def transfer(self, resource):
|
||||||
if resourceType is file:
|
data = resource.data
|
||||||
data = resource.read()
|
if type(data) is file:
|
||||||
|
text = resource.read()
|
||||||
resource.close()
|
resource.close()
|
||||||
elif resourceType is str:
|
else:
|
||||||
data = resource
|
text = data
|
||||||
print 'Transferring:', data
|
metadata = resource.metadata
|
||||||
return Deferred()
|
url = self.serverURL + self.makePath(metadata)
|
||||||
|
d = getPage(url, method='PUT', postData=text)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
15
agent/transport/httpput.txt
Normal file
15
agent/transport/httpput.txt
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
======================================================
|
||||||
|
loops.agent.transport.httpput - The HTTP PUT Transport
|
||||||
|
======================================================
|
||||||
|
|
||||||
|
($Id$)
|
||||||
|
|
||||||
|
>>> from time import time
|
||||||
|
|
||||||
|
>>> from loops.agent.core import Agent
|
||||||
|
>>> from loops.agent.transport.httpput import Transporter, TransportJob
|
||||||
|
|
||||||
|
>>> agent = Agent()
|
||||||
|
>>> transporter = Transporter(agent)
|
||||||
|
>>> job = TransportJob(transporter)
|
||||||
|
|
Loading…
Add table
Reference in a new issue