From c8b3e250d718ee650701d7b6d36eb8bd7ad1a0f6 Mon Sep 17 00:00:00 2001
From: helmutm <helmutm@fd906abe-77d9-0310-91a1-e0d9ade77398>
Date: Sun, 24 Jun 2007 08:37:55 +0000
Subject: [PATCH] provide a base class for crawling jobs

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1804 fd906abe-77d9-0310-91a1-e0d9ade77398
---
 agent/README.txt       | 13 +++++++------
 agent/crawl/base.py    | 41 +++++++++++++++++++++++++++++++++++++++++
 agent/interfaces.py    |  9 +++++++++
 agent/testing/crawl.py | 17 ++++++-----------
 4 files changed, 63 insertions(+), 17 deletions(-)
 create mode 100644 agent/crawl/base.py

diff --git a/agent/README.txt b/agent/README.txt
index 77d4431..b6dfa6a 100644
--- a/agent/README.txt
+++ b/agent/README.txt
@@ -38,27 +38,28 @@ Configuration (per job)
 - schedule, repeating pattern, conditions
 - following job(s), e.g. to start a transfer immediately after a crawl
 
-  >>> scheduler = agent.scheduler
+How does this work?
+-------------------
 
-  >>> from time import time
   >>> from loops.agent.schedule import Job
-
   >>> class TestJob(Job):
   ...     def execute(self, **kw):
   ...         d = super(TestJob, self).execute(**kw)
   ...         print 'executing'
   ...         return d
 
+  >>> from time import time
+  >>> scheduler = agent.scheduler
   >>> scheduler.schedule(TestJob(), int(time()))
 
   >>> tester.iterate()
   executing
 
 We can set up a more realistic example using the dummy crawler and transporter
-classes from testing.
+classes from the testing package.
 
-  >>> from testing.crawl import CrawlingJob
-  >>> from testing.transport import Transporter, TransportJob
+  >>> from loops.agent.testing.crawl import CrawlingJob
+  >>> from loops.agent.testing.transport import Transporter, TransportJob
 
   >>> crawl = CrawlingJob()
   >>> transporter = Transporter()
diff --git a/agent/crawl/base.py b/agent/crawl/base.py
new file mode 100644
index 0000000..1f1ec0b
--- /dev/null
+++ b/agent/crawl/base.py
@@ -0,0 +1,41 @@
+#
+#  Copyright (c) 2007 Helmut Merz helmutm@cy55.de
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+"""
+Filesystem crawler.
+
+$Id$
+"""
+
+from zope.interface import implements
+
+from loops.agent.interfaces import ICrawlingJob
+from loops.agent.schedule import Job
+
+
+class CrawlingJob(Job):
+
+    implements(ICrawlingJob)
+
+    def __init__(self):
+        self.predefinedMetadata = {}
+        super(CrawlingJob, self).__init__()
+
+    def execute(self, **kw):
+        return self.collect(**kw)
+
diff --git a/agent/interfaces.py b/agent/interfaces.py
index e10c462..3f14d0c 100644
--- a/agent/interfaces.py
+++ b/agent/interfaces.py
@@ -104,6 +104,15 @@ class ICrawlingJob(IScheduledJob):
         """
 
 
+class IResource(Interface):
+    """ Represents a data object that is collected by a crawler and
+        will be transferred to the server.
+    """
+
+    data = Attribute("A string, file, or similar representation of the "
+                     "resource's content")
+
+
 class IMetadataSet(Interface):
     """ Metadata associated with a resource.
     """
diff --git a/agent/testing/crawl.py b/agent/testing/crawl.py
index a71bcc3..0ffead7 100644
--- a/agent/testing/crawl.py
+++ b/agent/testing/crawl.py
@@ -26,23 +26,16 @@ from twisted.internet import reactor
 from twisted.internet.defer import Deferred
 from zope.interface import implements
 
-from loops.agent.interfaces import ICrawlingJob, IMetadataSet
+from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
 from loops.agent.schedule import Job
+from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
 
 
-class CrawlingJob(Job):
-
-    implements(ICrawlingJob)
-
-    def __init__(self):
-        self.predefinedMetadata = {}
-        super(CrawlingJob, self).__init__()
-
-    def execute(self, **kw):
-        return self.collect(**kw)
+class CrawlingJob(BaseCrawlingJob):
 
     def collect(self, **criteria):
         deferred = self.deferred = Deferred()
+        # replace this with the real stuff:
         reactor.callLater(0, self.dataAvailable)
         return deferred
 
@@ -57,4 +50,6 @@ class Metadata(object):
 
 class DummyResource(object):
 
+    implements(IResource)
+
     data = 'Dummy resource data for testing purposes.'