more on resources: extend interface and base class

git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2585 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
helmutm 2008-05-09 07:47:39 +00:00
parent 6cce2a4339
commit ac5013a026
6 changed files with 29 additions and 22 deletions

View file

@ -61,16 +61,18 @@ class Resource(object):
implements(IResource)
data = file = path = None
type = 'sample'
contentType = 'text/plain'
encoding = ''
application = 'sample'
metadata = None
def __init__(self, data=None, file=None, path=None, application=None,
metadata=None):
self.data = data
self.file = file
self.path = path
if application:
self.application = application
self.metadata = metadata
def __init__(self, data=None, **kw):
if data is not None:
self.data = data
for k, v in kw.items():
setattr(self, k, v)
self.subResources = []
@ -79,8 +81,7 @@ class Metadata(dict):
implements(IMetadataSet)
def __init__(self, data=dict()):
for k in data:
self[k] = data[k]
self.update(data)
def asXML(self):
# TODO...

View file

@ -35,7 +35,6 @@ from cybertools.agent.crawl.base import Crawler
from cybertools.agent.util.task import coiterate
class FilesystemCrawler(Crawler):
def collect(self):
@ -67,7 +66,7 @@ class FilesystemCrawler(Crawler):
meta = dict(
path=filename,
)
self.collected.append(FileResource(filename, Metadata(meta)))
self.collected.append(FileResource(path=filename, metadata=Metadata(meta)))
yield None
agents.register(FilesystemCrawler, Master, name='crawl.filesystem')
@ -75,13 +74,12 @@ agents.register(FilesystemCrawler, Master, name='crawl.filesystem')
class FileResource(Resource):
def __init__(self, path, metadata=None):
self.path = path
self.metadata = metadata
type = 'file'
application = 'filesystem'
@property
def data(self):
return open(self.path, 'r')
f = open(self.path, 'r')
text = f.read()
f.close()
return text

View file

@ -38,5 +38,5 @@ the twisted reactor first.
Job 00001 completed; result: [..., ...];
>>> r0 = controller.result[0]
>>> r0.metadata, r0.data.read()
>>> r0.metadata, r0.data
({'path': '...file1.txt'}, 'Data from file1.txt')

View file

@ -59,9 +59,11 @@ class MailCrawler(Crawler):
def login(self):
pass
agents.register(MailCrawler, Master, name='crawl.mail')
class MailResource(Resource):
application = 'outlook'
type = 'email'
application = 'mailclient'
agents.register(MailCrawler, Master, name='crawl.mail')

View file

@ -130,7 +130,7 @@ class OutlookCrawler(MailCrawler):
# Create the mime email object
msg = self.createEmailMime(record)
# Create a resource and append it to the result list
self.createResource(msg, folder, "Microsoft Office Outlook")
self.createResource(msg, application='outlook')
yield None
def login(self):

View file

@ -214,6 +214,12 @@ class IResource(Interface):
'None if the data or file attribute is given.')
identifier = Attribute('A string (usually derived from the path) that '
'uniquely identifies the resource.')
type = Attribute('A string denoting the type of the resource, e.g. '
'"file" or "email".')
contentType = Attribute('A string denoting the MIME type of the data, '
'e.g. "text/plain" or "application/octet-stream"')
encoding = Attribute('Optional: a string denoting the encoding of the '
'file data, e.g. "UTF-8".')
application = Attribute('The name of the application that provided '
'the resource, e.g. "filesystem" or "mail".')
metadata = Attribute('Information describing this resource; '