work in progress: filesystem crawler - added basic metadata handling
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1893 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
		
							parent
							
								
									81d2092e0a
								
							
						
					
					
						commit
						db31c2d252
					
				
					 6 changed files with 24 additions and 17 deletions
				
			
		|  | @ -49,4 +49,10 @@ class Metadata(object): | ||||||
|     def __init__(self, data=dict()): |     def __init__(self, data=dict()): | ||||||
|         self.data = data |         self.data = data | ||||||
| 
 | 
 | ||||||
|  |     def asXml(self): | ||||||
|  |         # TODO... | ||||||
|  |         return '' | ||||||
|  | 
 | ||||||
|  |     def set(self, key, value): | ||||||
|  |         self.data['key'] = value | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -36,15 +36,14 @@ from loops.agent.crawl.base import Metadata | ||||||
| class CrawlingJob(BaseCrawlingJob): | class CrawlingJob(BaseCrawlingJob): | ||||||
| 
 | 
 | ||||||
|     def collect(self): |     def collect(self): | ||||||
|         self.data = [] |         self.collected = [] | ||||||
|         #deferred = reactor.deferToThread(self.crawlFilesystem, dataAvailable) |         coiterate(self.crawlFilesystem()).addCallback(self.finished) | ||||||
|         deferred = self.deferred = Deferred() |         # TODO: addErrback() | ||||||
|         self.internalDeferred = coiterate(self.crawlFilesystem()) |         self.deferred = Deferred() | ||||||
|         self.internalDeferred.addCallback(self.finished) |         return self.deferred | ||||||
|         return deferred |  | ||||||
| 
 | 
 | ||||||
|     def finished(self, result): |     def finished(self, result): | ||||||
|         self.deferred.callback(self.data) |         self.deferred.callback(self.collected) | ||||||
| 
 | 
 | ||||||
|     def crawlFilesystem(self): |     def crawlFilesystem(self): | ||||||
|         criteria = self.params |         criteria = self.params | ||||||
|  | @ -59,8 +58,11 @@ class CrawlingJob(BaseCrawlingJob): | ||||||
|                     mtime = datetime.fromtimestamp( |                     mtime = datetime.fromtimestamp( | ||||||
|                                 os.stat(filename)[stat.ST_MTIME]) |                                 os.stat(filename)[stat.ST_MTIME]) | ||||||
|                     # TODO: check modification time |                     # TODO: check modification time | ||||||
|                     self.data.append((FileResource(filename), |                     meta = dict( | ||||||
|                                       Metadata(dict()))) |                         path=filename, | ||||||
|  |                     ) | ||||||
|  |                     self.collected.append((FileResource(filename), | ||||||
|  |                                            Metadata(meta))) | ||||||
|                     yield None |                     yield None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -35,6 +35,8 @@ We are now ready to schedule the job and let the reactor execute it. | ||||||
|   >>> scheduler.schedule(crawlJob, int(time())) |   >>> scheduler.schedule(crawlJob, int(time())) | ||||||
| 
 | 
 | ||||||
|   >>> tester.iterate() |   >>> tester.iterate() | ||||||
|  |   Metadata: {'path': '...data...file1.txt'} | ||||||
|   Transferring: Data from file1.txt |   Transferring: Data from file1.txt | ||||||
|  |   Metadata: {'path': '...data...subdir...file2.txt'} | ||||||
|   Transferring: Data from file2.txt |   Transferring: Data from file2.txt | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -124,7 +124,7 @@ class IMetadataSet(Interface): | ||||||
|             (nested metadata) this will be converted to XML as well. |             (nested metadata) this will be converted to XML as well. | ||||||
|         """ |         """ | ||||||
| 
 | 
 | ||||||
|     def setData(key, value): |     def set(key, value): | ||||||
|         """ Set a metadata element. |         """ Set a metadata element. | ||||||
| 
 | 
 | ||||||
|             The value may be a string or another metadata set |             The value may be a string or another metadata set | ||||||
|  |  | ||||||
|  | @ -26,7 +26,7 @@ from twisted.internet import reactor | ||||||
| from twisted.internet.defer import Deferred | from twisted.internet.defer import Deferred | ||||||
| from zope.interface import implements | from zope.interface import implements | ||||||
| 
 | 
 | ||||||
| from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet | from loops.agent.interfaces import ICrawlingJob, IResource | ||||||
| from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob | from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -39,12 +39,7 @@ class CrawlingJob(BaseCrawlingJob): | ||||||
|         return deferred |         return deferred | ||||||
| 
 | 
 | ||||||
|     def dataAvailable(self): |     def dataAvailable(self): | ||||||
|         self.deferred.callback([(DummyResource(), Metadata())]) |         self.deferred.callback([(DummyResource(), None)]) | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class Metadata(object): |  | ||||||
| 
 |  | ||||||
|     implements(IMetadataSet) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class DummyResource(object): | class DummyResource(object): | ||||||
|  |  | ||||||
|  | @ -55,6 +55,8 @@ class Transporter(BaseTransporter): | ||||||
|             data.close() |             data.close() | ||||||
|         else: |         else: | ||||||
|             text = data |             text = data | ||||||
|  |         if metadata is not None: | ||||||
|  |             print 'Metadata:', metadata.data | ||||||
|         print 'Transferring:', text |         print 'Transferring:', text | ||||||
|         return Deferred() |         return Deferred() | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 helmutm
						helmutm