*agent now creates an empty temporary directory in
 which all jobs should create their jobdirectories

ui/web.py
* outlook crawler has changed, adapted ui methods
  accordingly to access OutlookResource objects
  instead of email.MIME lists
* using agent object as an attribute in the
  AgentHome class which is now passed through when
  necessary
* changed form for creating OutlookCrawlJobs to provide
  possibility to select whether inbox, subfolders or
  both foldertypes should be crawled and to specify
  a regular expression for subfolder selection
  todo: add scheduler options
* changed the way how jobdetails are displayed
* changed display of crawling ressources:
  now it is also possible to display the job folders
  and enter them to view the mail objects

* added first sketch of a filesystem crawler form,
  without functionality at the moment

known bugs: currently having problems with character encodings

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1950 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
scrat 2007-08-22 13:57:22 +00:00
parent 993356b8ae
commit c3ffee3437
8 changed files with 1132 additions and 432 deletions

View file

@ -23,6 +23,7 @@ $Id$
""" """
from time import time from time import time
import tempfile
from zope.interface import implements from zope.interface import implements
from loops.agent.interfaces import IAgent from loops.agent.interfaces import IAgent
from loops.agent.config import Configurator from loops.agent.config import Configurator
@ -55,6 +56,7 @@ class Agent(object):
self.stopper = Stopper() self.stopper = Stopper()
self.stopper.scheduler = self.scheduler self.stopper.scheduler = self.scheduler
self.logger = Logger(self) self.logger = Logger(self)
self.tempdir = tempfile.mkdtemp(prefix='loops_')
def scheduleJobsFromConfig(self, stop=False): def scheduleJobsFromConfig(self, stop=False):
config = self.config config = self.config

View file

@ -9,39 +9,74 @@ Tobias Schmid 26.07.2007
""" """
import win32com.client import win32com.client
import re
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from twisted.internet.defer import Deferred
from twisted.internet.task import coiterate
from zope.interface import implements
from loops.agent.interfaces import IResource
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
from loops.agent.crawl.base import Metadata
# DEBUG FLAGS # DEBUG FLAGS
DEBUG = 0 DEBUG = 1
DEBUG_WRITELINE = 1 DEBUG_WRITELINE = 1
# some constants # some constants
COMMASPACE = ', ' COMMASPACE = ', '
class MSOutlook: class CrawlingJob(BaseCrawlingJob):
def __init__(self):
self.outlookFound = 0 keys = ""
inbox = ""
subfolders = ""
pattern = ""
def collect(self):
self.collected = []
coiterate(self.crawlOutlook()).addCallback(self.finished)
# TODO: addErrback()
self.deferred = Deferred()
return self.deferred
def finished(self, result):
self.deferred.callback(self.collected)
def crawlOutlook(self):
outlookFound = 0
try: try:
self.oOutlookApp = \ oOutlookApp = \
win32com.client.gencache.EnsureDispatch("Outlook.Application") win32com.client.gencache.EnsureDispatch("Outlook.Application")
self.outlookFound = 1 outlookFound = 1
except: except:
print "MSOutlook: unable to load Outlook" print "MSOutlook: unable to load Outlook"
self.records = [] records = []
self.mailList = []
if not outlookFound:
def loadInbox(self, keys=None, subfolders=False):
if not self.outlookFound:
return return
# fetch the params
criteria = self.params
self.keys = criteria.get('keys')
self.inbox = criteria.get('inbox') #boolean
self.subfolders = criteria.get('subfolders') #boolean
self.pattern = criteria.get('pattern')
if self.pattern != '':
self.pattern = re.compile(criteria.get('pattern') or '.*')
else:
self.pattern = None
if DEBUG_WRITELINE: if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> starting' print 'MSOutlook.loadInbox() ===> starting'
# this should use more try/except blocks or nested blocks # catch Inbox folder
onMAPI = self.oOutlookApp.GetNamespace("MAPI") onMAPI = oOutlookApp.GetNamespace("MAPI")
ofInbox = \ ofInbox = \
onMAPI.GetDefaultFolder(win32com.client.constants.olFolderInbox) onMAPI.GetDefaultFolder(win32com.client.constants.olFolderInbox)
@ -49,106 +84,32 @@ class MSOutlook:
if DEBUG_WRITELINE: if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> fetch mails of inbox folder' print 'MSOutlook.loadInbox() ===> fetch mails of inbox folder'
for om in range(len(ofInbox.Items)): # fetch mails from inbox
mail = ofInbox.Items.Item(om + 1) if self.inbox:
if mail.Class == win32com.client.constants.olMail: self.loadEmail(ofInbox)
if keys is None:
# if we were't give a set of keys to use
# then build up a list of keys that we will be
# able to process
# I didn't include fields of type time, though
# those could probably be interpreted
keys = []
for key in mail._prop_map_get_:
if isinstance(getattr(mail, key), (int, str, unicode)):
keys.append(key)
if DEBUG:
keys.sort()
print 'Fields\n======================================'
for key in keys:
print key
record = {}
for key in keys:
record[key] = getattr(mail, key)
# Create the container (outer) email message. # fetch mails of inbox subfolders
msg = MIMEMultipart() if self.subfolders and self.pattern is None:
# subject
msg['Subject'] = record['Subject'].encode('utf-8')
# sender
sender = str(record['SenderName'].encode('utf-8')) #SenderEmailAddress
msg['From'] = sender
#recipients
recipients = []
for rec in range(record['Recipients'].__len__()):
recipients.append(getattr(record['Recipients'].Item(rec+1), 'Address'))
msg['To'] = COMMASPACE.join(recipients)
# message
msg.preamble = record['Body'].encode('utf-8')
# add the email message to the list
self.mailList.append(msg)
self.records.append(record)
"""
* Fetch the mails of the inbox subfolders
"""
if subfolders:
if DEBUG_WRITELINE: if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> fetch emails of subfolders' print 'MSOutlook.loadInbox() ===> fetch emails of subfolders'
lInboxSubfolders = getattr(ofInbox, 'Folders') lInboxSubfolders = getattr(ofInbox, 'Folders')
for of in range(lInboxSubfolders.__len__()): for of in range(lInboxSubfolders.__len__()):
# get a MAPI-folder object # get a MAPI-folder object and load its emails
oFolder = lInboxSubfolders.Item(of + 1) self.loadEmail(lInboxSubfolders.Item(of + 1))
# get items of the folder
folderItems = getattr(oFolder, 'Items')
for item in range(len(folderItems)):
mail = folderItems.Item(item+1)
if mail.Class == win32com.client.constants.olMail:
if keys is None:
keys = []
for key in mail._prop_map_get_:
if isinstance(getattr(mail, key), (int, str, unicode)):
keys.append(key)
if DEBUG: # pattern, just read the specified subfolder
keys.sort() elif self.subfolders and self.pattern:
print 'Fiels\n======================================='
for key in keys:
print key
record = {} if DEBUG_WRITELINE:
for key in keys: print 'MSOutlook.loadInbox() ===> fetch emails of specified subfolder'
record[key] = getattr(mail, key) lInboxSubfolders = getattr(ofInbox, 'Folders')
if DEBUG: for of in range(lInboxSubfolders.__len__()):
print of # get a MAPI-folder object and load its emails
if self.pattern.match(getattr(lInboxSubfolders.Item(of + 1), 'Name')):
self.loadEmail(lInboxSubfolders.Item(of + 1)) #oFolder
# Create the container (outer) email message.
msg = MIMEMultipart()
# subject
msg['Subject'] = record['Subject'].encode('utf-8')
# sender
sender == record['SenderName'].encode('utf-8') #SenderEmailAddress
msg['From'] = sender
# recipients
for rec in range(record['Recipients'].__len__()):
recipients.append(getattr(record['Recipients'].Item(rec+1), 'Address'))
msg['To'] = COMMASPACE.join(recipients)
# message
msg.preamble = record['Body'].encode('utf-8')
# add the email message to the list
self.mailList.append(msg)
self.records.append(record)
if DEBUG: if DEBUG:
print 'number of mails in Inbox:', len(ofInbox.Items) print 'number of mails in Inbox:', len(ofInbox.Items)
@ -157,7 +118,7 @@ class MSOutlook:
# get Count-Attribute of _Folders class # get Count-Attribute of _Folders class
iInboxSubfoldersCount = getattr(lInboxSubfolders, 'Count') iInboxSubfoldersCount = getattr(lInboxSubfolders, 'Count')
# the Item-Method of the _Folders class returns a MAPIFolder object # the Item-Method of the _Folders class returns a MAPIFolder object
oFolder = lInboxSubfolders.Item(0) #1 oFolder = lInboxSubfolders.Item(1)
print 'Count of Inbox-SubFolders:', iInboxSubfoldersCount print 'Count of Inbox-SubFolders:', iInboxSubfoldersCount
print 'Inbox sub folders (Folder/Mails):' print 'Inbox sub folders (Folder/Mails):'
@ -168,45 +129,78 @@ class MSOutlook:
if DEBUG_WRITELINE: if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> ending' print 'MSOutlook.loadInbox() ===> ending'
yield '1'
return self.mailList
def loadEmail(self, oFolder):
# get items of the folder
folderItems = getattr(oFolder, 'Items')
for item in range(len(folderItems)):
mail = folderItems.Item(item+1)
if mail.Class == win32com.client.constants.olMail:
if self.keys is None:
self.keys = []
for key in mail._prop_map_get_:
if isinstance(getattr(mail, key), (int, str, unicode)):
self.keys.append(key)
if __name__ == '__main__': if DEBUG:
if DEBUG: self.keys.sort()
print 'attempting to load Outlook' print 'Fiels\n======================================='
oOutlook = MSOutlook() for key in self.keys:
# delayed check for Outlook on win32 box print key
if not oOutlook.outlookFound:
print 'Outlook not found'
sys.exit(1)
fieldsMail = ['Body', record = {}
'HTMLBody', for key in self.keys:
'CC', record[key] = getattr(mail, key)
'SenderName', if DEBUG:
'Recipients', print str(item)
'To',
'Attachments',
'Subject'
]
# 'BodyFormat', removed BodyFormat temporarily because it is not available in Outlook.9 (Office2000)
# 'SenderEmailAddress', replaced by SenderName
if DEBUG: # Create the mime email object
import time msg = self.createEmailMime(record)
print 'loading records...'
startTime = time.time()
mails = oOutlook.loadInbox(fieldsMail) # list with mime objects
self.collected.append((OutlookResource(msg)))
for elem in mails:
print str(elem)
if DEBUG_WRITELINE: def createEmailMime(self, emails):
print '***Back in main() with some emails in a list....***' # Create the container (outer) email message.
print 'Mails fetched from MSOutlook inbox folder:', mails.__len__() msg = MIMEMultipart()
# subject
msg['Subject'] = emails['Subject'].encode('utf-8')
# sender
if emails.has_key('SenderEmailAddress'):
sender = str(emails['SenderEmailAddress'].encode('utf-8'))
else:
sender = str(emails['SenderName'].encode('utf-8'))
msg['From'] = sender
#recipients
recipients = []
if emails.has_key('Recipients'):
for rec in range(emails['Recipients'].__len__()):
recipients.append(getattr(emails['Recipients'].Item(rec+1), 'Address'))
msg['To'] = COMMASPACE.join(recipients)
else:
recipients.append(emails['To'])
msg['To'] = COMMASPACE.join(recipients)
# message
msg.preamble = emails['Body'].encode('utf-8')
return msg
class OutlookResource(object):
implements(IResource)
def __init__(self, oEmail):
self.oEmail = oEmail
@property
def data(self):
return self.oEmail
if DEBUG:
print 'loading took %f seconds' % (time.time() - startTime)

View file

@ -0,0 +1,228 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> //-->
<html xmlns:nevow="http://nevow.com/ns/nevow/0.1">
<!-- Outlook Mails Page for loops.agent UI Version: 0.1 //-->
<nevow:invisible nevow:render="header_fragment" />
<body>
<div class="body">
<div nevow:render="top_fragment">
</div>
<div id="menu">
<div nevow:render="navigation_fragment">
</div>
<br/>
<br/>
<div class="box">
<h4>User Mode</h4>
<div class="body">
<b>Current Mode: </b><p nevow:render="getActiveUserMode"/>
</div>
</div>
</div>
<div id="content">
<div></div>
<div></div>
<div class="content-1" id="2.body" ondblclick="">
<div class="line-block">
<div class="line"><br /></div>
</div>
<div class="section">
<h3><a id="agent-ui-startpage" name="agent-ui-startpage">Agent: Create Filesystem Crawler Job</a></h3>
<ul class="simple">
<li>Configuration page for Filesystem Crawler Jobs</li>
</ul>
</div>
<div class="section">
<h3><a id="form-overview" name="form-overview">Overview</a></h3>
<ul class="simple">
<li><b>File Collection</b><div nevow:render="data" nevow:data="displayViewForm"/></li>
</ul>
<div align="center">
<table border="0" cellpadding="10">
<p nevow:render="displayFiles">
<tr nevow:pattern="CollectedFiles" nevow:render="data"/>
</p>
</table>
<form name="FileCrawlForm" action="submitFilesystemCrawlJob" method="POST">
<fieldset>
<legend>
Filesystem Crawl Settings
</legend>
<fieldset>
<legend>Directories to crawl</legend>
<table align="center">
<tr>
<td>
<label for="lblRecursiveDirs">Directories to crawl <b>recursively</b></label>
<p>
<i>please use ';' as delimiters</i>
</p>
<p>
<i>you can also use patterns like regular expressions </i>
</p>
</td>
<td>
<textarea name="rec_directories" id="lblRecursiveDirs" rows="10" cols="40"></textarea>
</td>
</tr>
<tr>
<td>
<label for="lblDirs">Directories to crawl non-recursively</label>
<p>
<i>please use ';' as delimiters</i>
</p>
<p>
<i>you can also use patterns like regular expressions </i>
</p>
</td>
<td>
<textarea name="rec_directories" id="lblDirs" rows="10" cols="40"></textarea>
</td>
</tr>
</table>
</fieldset>
<fieldset>
<legend>
Filter criteria patterns
</legend>
<table align="center">
<tr>
<td>
<label for="lblApplyFileSize">Apply size criteria : </label>
</td>
<td>
<input id="lblApplyFileSize" name="applyFileSize" type="checkbox"/>
</td>
</tr>
<tr>
<td>
<label for="lblFileSizeLimit">Collect files that are : </label>
</td>
<td>
<select id="lblFileSizeLimit" name="selectFileSizeLimit">
<option>greater</option>
<option>less</option>
<option>equal</option>
</select>
</td>
</tr>
<tr>
<td>
<label for="lblFileSize">than (kByte): </label>
</td>
<td>
<input id="lblFileSize" name="fileSize"
type="text" size="12" maxlength="20" />
</td>
</tr>
<tr>
<td>
<label for="lblMaximumSize">Maximum file size: </label>
<p>
<i>no size means that transferred files might be very large!</i>
</p>
</td>
<td>
<input id="lblMaximumSize" name="maximumSize"
type="text" size="12" maxlength="20" />
</td>
</tr>
<tr>
<td>
<label for="lblApplyDateCriteria">Apply date criteria: </label>
</td>
<td>
<input id="lblApplyDateCriteria" name="applyDateCriteria" type="checkbox"/>
</td>
</tr>
<tr>
<td>
<label for="lblDateCriteria">Collect files that are: </label>
</td>
<td>
<select id="lblDateCriteria" name="selectDateCriteria">
<option>created</option>
<option>modified</option>
<option>accessed</option>
</select>
</td>
</tr>
<tr>
<td>
<label for="lblTimestampCompare"></label>
</td>
<td>
<select id="lblTimestampCompare" name="selectTimeStampCompare">
<option>before</option>
<option>after</option>
<option>exactly on</option>
</select>
</td>
</tr>
<tr>
<td>
<label for="lblTimeStamp">Collect files that are : </label>
</td>
<td>
<input id="lblTimeStamp" name="selectDateCriteria" type="text" size="20"/>
</td>
</tr>
</table>
</fieldset>
<fieldset>
<legend>
Job Interval
</legend>
<table align="center">
<tr>
<td>
<label for="lblmailCrawlIntervaloneTime">One Time: </label>
</td>
<td>
<input type="radio" id="lblmailCrawlIntervaloneTime" name="mailCrawlInterval"
value="oneTime" checked="checked" />
</td>
</tr>
<tr>
<td>
<label for="lblmailCrawlIntervalScheduler">Use Scheduler: </label>
</td>
<td>
<input type="radio" id="lblmailCrawlIntervalScheduler" name="mailCrawlInterval"
value="Scheduler" />
</td>
</tr>
</table>
</fieldset>
<input type="submit" name="startCrawlJob" value="Save and Start" />
</fieldset>
</form>
<div nevow:render="systemMessage"/>
</div>
</div>
</div>
</div>
<div id="sub-section" define-macro="sub-section">
</div>
<div nevow:render="footer_fragment">
</div>
</div>
</body>
</html>

View file

@ -47,36 +47,29 @@
<div class="section"> <div class="section">
<h3><a id="form-overview" name="form-overview">Overview</a></h3> <h3><a id="form-overview" name="form-overview">Overview</a></h3>
<ul class="simple"> <ul class="simple">
<li><b>Mail Collection</b><div nevow:render="data" nevow:data="displayViewForm"/></li> <li><b>Mail in Detail</b><div nevow:render="data" nevow:data="displayViewForm"/></li>
</ul> </ul>
<div nevow:render="systemMessage"/> <div nevow:render="systemMessage"/>
<div align="center"> <div align="center">
<table class="listing"> <table class="listing" style="width:200px">
<thead> <thead>
<tr> <tr>
<th> <th>
From Field
</th> </th>
<th> <th>
CC Value
</th>
<th>
Subject
</th>
<th>
Date
</th>
<th>
Source Folder
</th> </th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
<p nevow:render="displayOutlookMails"> <p nevow:render="displayOutlookMail">
</p> </p>
</tbody> </tbody>
</table> </table>
</div> </div>
<br/>
<a href="viewRessources" alt="back to Ressources overview"><b>[back to Ressources overview]</b></a>
</div> </div>
</div> </div>
</div> </div>

View file

@ -65,22 +65,29 @@
<table align="center"> <table align="center">
<tr> <tr>
<td> <td>
<label for="lblsearchFolder">Folder to search: </label> <label for="lblCrawlInbox">Crawl inbox</label>
</td> </td>
<td> <td>
<input id="lblsearchFolder" name="searchFolder" type="text" <input id="lblCrawlInbox" name="inbox" type="checkbox"
size="12" maxlength="20"/> value="inbox"/><!-- outlook job param //-->
</td> </td>
</tr> </tr>
<tr> <tr>
<td> <td>
<label for="lblselectSubfolder">Include subfolder: </label> <label for="lblCrawlSubfolders">Crawl subfolders</label>
</td> </td>
<td> <td>
<select id="lblselectSubfolder" name="selectSubfolder"> <input type="checkbox" id="lblCrawlSubfolders" name="subfolders"
<option>Yes</option> value = "subfolders"/><!-- outlook job param //-->
<option>No</option> </td>
</select> </tr>
<tr>
<td>
<label for="lblSubFolderPattern">Pattern for subfolders to include</label>
</td>
<td>
<input type="text" maxlength="20" size="16" id="lblSubFolderPattern"
name="pattern"/><!-- outlook job param //-->
</td> </td>
</tr> </tr>
</table> </table>

View file

@ -16,19 +16,19 @@
<div class="content odd menu-3"> <div class="content odd menu-3">
<a href="http://localhost:8080/collectOutlookMails" class=""> <a href="http://localhost:8080/collectOutlookMails" class="">
collect Outlook Mails add outlook crawl job
</a> </a>
</div> </div>
<div class="content odd menu-3"> <div class="content odd menu-3">
<a href="http://localhost:8080/viewOutlookMails" class=""> <a href="http://localhost:8080/collectFilesystem" class="">
view Outlook Mails add filesystem crawl job</a>
</a>
</div> </div>
<div class="content odd menu-3"> <div class="content odd menu-3">
<a href="http://localhost:8080/addjob" class=""> <a href="http://localhost:8080/viewRessources" class="">
add job</a> view collected ressources
</a>
</div> </div>
<div class="content odd menu-3"> <div class="content odd menu-3">

View file

@ -0,0 +1,79 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> //-->
<html xmlns:nevow="http://nevow.com/ns/nevow/0.1">
<!-- Ressource Page for loops.agent UI Version: 0.1 //-->
<nevow:invisible nevow:render="header_fragment" />
<body>
<div class="body">
<div nevow:render="top_fragment">
</div>
<div id="menu">
<div nevow:render="navigation_fragment">
</div>
<br/>
<br/>
<div class="box">
<h4>User Mode</h4>
<div class="body">
<b>Current Mode: </b><p nevow:render="getActiveUserMode"/>
</div>
</div>
</div>
<div id="content">
<div></div>
<div></div>
<div class="content-1" id="2.body" ondblclick="">
<div class="line-block">
<div class="line"><br /></div>
</div>
<div class="section">
<h3><a id="agent-ui-startpage" name="agent-ui-startpage">Agent: collected ressources</a></h3>
<ul class="simple">
<li>All currently available objects that were collected by loops jobs</li>
</ul>
</div>
<div class="section">
<h3><a id="form-overview" name="form-overview">Overview</a></h3>
<ul class="simple">
<li><b>Ressource Collection</b><div nevow:render="data" nevow:data="displayViewForm"/></li>
</ul>
<div nevow:render="systemMessage"/>
<div align="center">
<table class="listing">
<thead>
<p nevow:render="displayRessourceHeaders">
</p>
</thead>
<tbody>
<p nevow:render="displayRessources">
</p>
</tbody>
</table>
</div>
</div>
</div>
</div>
<div id="sub-section" define-macro="sub-section">
</div>
<div nevow:render="footer_fragment">
</div>
</div>
</body>
</html>

File diff suppressed because it is too large Load diff