provide a base class for crawling jobs
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1804 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
		
							parent
							
								
									e05e3bd6e4
								
							
						
					
					
						commit
						c8b3e250d7
					
				
					 4 changed files with 63 additions and 17 deletions
				
			
		|  | @ -38,27 +38,28 @@ Configuration (per job) | ||||||
| - schedule, repeating pattern, conditions | - schedule, repeating pattern, conditions | ||||||
| - following job(s), e.g. to start a transfer immediately after a crawl | - following job(s), e.g. to start a transfer immediately after a crawl | ||||||
| 
 | 
 | ||||||
|   >>> scheduler = agent.scheduler | How does this work? | ||||||
|  | ------------------- | ||||||
| 
 | 
 | ||||||
|   >>> from time import time |  | ||||||
|   >>> from loops.agent.schedule import Job |   >>> from loops.agent.schedule import Job | ||||||
| 
 |  | ||||||
|   >>> class TestJob(Job): |   >>> class TestJob(Job): | ||||||
|   ...     def execute(self, **kw): |   ...     def execute(self, **kw): | ||||||
|   ...         d = super(TestJob, self).execute(**kw) |   ...         d = super(TestJob, self).execute(**kw) | ||||||
|   ...         print 'executing' |   ...         print 'executing' | ||||||
|   ...         return d |   ...         return d | ||||||
| 
 | 
 | ||||||
|  |   >>> from time import time | ||||||
|  |   >>> scheduler = agent.scheduler | ||||||
|   >>> scheduler.schedule(TestJob(), int(time())) |   >>> scheduler.schedule(TestJob(), int(time())) | ||||||
| 
 | 
 | ||||||
|   >>> tester.iterate() |   >>> tester.iterate() | ||||||
|   executing |   executing | ||||||
| 
 | 
 | ||||||
| We can set up a more realistic example using the dummy crawler and transporter | We can set up a more realistic example using the dummy crawler and transporter | ||||||
| classes from testing. | classes from the testing package. | ||||||
| 
 | 
 | ||||||
|   >>> from testing.crawl import CrawlingJob |   >>> from loops.agent.testing.crawl import CrawlingJob | ||||||
|   >>> from testing.transport import Transporter, TransportJob |   >>> from loops.agent.testing.transport import Transporter, TransportJob | ||||||
| 
 | 
 | ||||||
|   >>> crawl = CrawlingJob() |   >>> crawl = CrawlingJob() | ||||||
|   >>> transporter = Transporter() |   >>> transporter = Transporter() | ||||||
|  |  | ||||||
							
								
								
									
										41
									
								
								agent/crawl/base.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								agent/crawl/base.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,41 @@ | ||||||
|  | # | ||||||
|  | #  Copyright (c) 2007 Helmut Merz helmutm@cy55.de | ||||||
|  | # | ||||||
|  | #  This program is free software; you can redistribute it and/or modify | ||||||
|  | #  it under the terms of the GNU General Public License as published by | ||||||
|  | #  the Free Software Foundation; either version 2 of the License, or | ||||||
|  | #  (at your option) any later version. | ||||||
|  | # | ||||||
|  | #  This program is distributed in the hope that it will be useful, | ||||||
|  | #  but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | #  GNU General Public License for more details. | ||||||
|  | # | ||||||
|  | #  You should have received a copy of the GNU General Public License | ||||||
|  | #  along with this program; if not, write to the Free Software | ||||||
|  | #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | ||||||
|  | # | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | Filesystem crawler. | ||||||
|  | 
 | ||||||
|  | $Id$ | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from zope.interface import implements | ||||||
|  | 
 | ||||||
|  | from loops.agent.interfaces import ICrawlingJob | ||||||
|  | from loops.agent.schedule import Job | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class CrawlingJob(Job): | ||||||
|  | 
 | ||||||
|  |     implements(ICrawlingJob) | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         self.predefinedMetadata = {} | ||||||
|  |         super(CrawlingJob, self).__init__() | ||||||
|  | 
 | ||||||
|  |     def execute(self, **kw): | ||||||
|  |         return self.collect(**kw) | ||||||
|  | 
 | ||||||
|  | @ -104,6 +104,15 @@ class ICrawlingJob(IScheduledJob): | ||||||
|         """ |         """ | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class IResource(Interface): | ||||||
|  |     """ Represents a data object that is collected by a crawler and | ||||||
|  |         will be transferred to the server. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     data = Attribute("A string, file, or similar representation of the " | ||||||
|  |                      "resource's content") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class IMetadataSet(Interface): | class IMetadataSet(Interface): | ||||||
|     """ Metadata associated with a resource. |     """ Metadata associated with a resource. | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|  | @ -26,23 +26,16 @@ from twisted.internet import reactor | ||||||
| from twisted.internet.defer import Deferred | from twisted.internet.defer import Deferred | ||||||
| from zope.interface import implements | from zope.interface import implements | ||||||
| 
 | 
 | ||||||
| from loops.agent.interfaces import ICrawlingJob, IMetadataSet | from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet | ||||||
| from loops.agent.schedule import Job | from loops.agent.schedule import Job | ||||||
|  | from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class CrawlingJob(Job): | class CrawlingJob(BaseCrawlingJob): | ||||||
| 
 |  | ||||||
|     implements(ICrawlingJob) |  | ||||||
| 
 |  | ||||||
|     def __init__(self): |  | ||||||
|         self.predefinedMetadata = {} |  | ||||||
|         super(CrawlingJob, self).__init__() |  | ||||||
| 
 |  | ||||||
|     def execute(self, **kw): |  | ||||||
|         return self.collect(**kw) |  | ||||||
| 
 | 
 | ||||||
|     def collect(self, **criteria): |     def collect(self, **criteria): | ||||||
|         deferred = self.deferred = Deferred() |         deferred = self.deferred = Deferred() | ||||||
|  |         # replace this with the real stuff: | ||||||
|         reactor.callLater(0, self.dataAvailable) |         reactor.callLater(0, self.dataAvailable) | ||||||
|         return deferred |         return deferred | ||||||
| 
 | 
 | ||||||
|  | @ -57,4 +50,6 @@ class Metadata(object): | ||||||
| 
 | 
 | ||||||
| class DummyResource(object): | class DummyResource(object): | ||||||
| 
 | 
 | ||||||
|  |     implements(IResource) | ||||||
|  | 
 | ||||||
|     data = 'Dummy resource data for testing purposes.' |     data = 'Dummy resource data for testing purposes.' | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 helmutm
						helmutm