loops/agent/crawl/filesystem.py
helmutm b9ba07ad95 provide Agent.scheduleJobsFromConfig() as part of the start-up procedure; work in progress: filesystem crawler
git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1861 fd906abe-77d9-0310-91a1-e0d9ade77398
2007-08-01 16:15:29 +00:00

66 lines
2 KiB
Python

#
# Copyright (c) 2007 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
Filesystem crawler.
$Id$
"""
import os
import re
import stat
from twisted.internet.defer import Deferred
from zope.interface import implements
from loops.agent.interfaces import ICrawlingJob, IResource, IMetadataSet
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
class CrawlingJob(BaseCrawlingJob):
def collect(self, **criteria):
deferred = reactor.deferToThread(self.crawlFilesystem, dataAvailable)
return deferred
def dataAvailable(self):
self.deferred.callback([(FileResource(), Metadata())])
def crawlFilesystem(self, **criteria):
directory = criteria.get('directory')
pattern = re.compile(criteria.get('pattern') or '.*')
for path, dirs, files in os.walk(directory):
if '.svn' in dirs:
del dirs[dirs.index('.svn')]
for f in files:
if pattern.match(f):
mtime = os.stat(os.path.join(path, f))[stat.ST_MTIME]
yield (os.path.join(path[len(directory)+1:], f),
datetime.fromtimestamp(mtime))
class Metadata(object):
implements(IMetadataSet)
class FileResource(object):
implements(IResource)
data = 'Dummy resource data for testing purposes.'