*agent now creates an empty temporary directory in
 which all jobs should create their jobdirectories

ui/web.py
* outlook crawler has changed, adapted ui methods
  accordingly to access OutlookResource objects
  instead of email.MIME lists
* using agent object as an attribute in the
  AgentHome class which is now passed through when
  necessary
* changed form for creating OutlookCrawlJobs to provide
  possibility to select whether inbox, subfolders or
  both foldertypes should be crawled and to specify
  a regular expression for subfolder selection
  todo: add scheduler options
* changed the way how jobdetails are displayed
* changed display of crawling ressources:
  now it is also possible to display the job folders
  and enter them to view the mail objects

* added first sketch of a filesystem crawler form,
  without functionality at the moment

known bugs: currently having problems with character encodings

git-svn-id: svn://svn.cy55.de/Zope3/src/loops/trunk@1950 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
scrat 2007-08-22 13:57:22 +00:00
parent 993356b8ae
commit c3ffee3437
8 changed files with 1132 additions and 432 deletions

View file

@ -23,6 +23,7 @@ $Id$
"""
from time import time
import tempfile
from zope.interface import implements
from loops.agent.interfaces import IAgent
from loops.agent.config import Configurator
@ -55,6 +56,7 @@ class Agent(object):
self.stopper = Stopper()
self.stopper.scheduler = self.scheduler
self.logger = Logger(self)
self.tempdir = tempfile.mkdtemp(prefix='loops_')
def scheduleJobsFromConfig(self, stop=False):
config = self.config

View file

@ -9,146 +9,107 @@ Tobias Schmid 26.07.2007
"""
import win32com.client
import re
from email.mime.multipart import MIMEMultipart
from twisted.internet.defer import Deferred
from twisted.internet.task import coiterate
from zope.interface import implements
from loops.agent.interfaces import IResource
from loops.agent.crawl.base import CrawlingJob as BaseCrawlingJob
from loops.agent.crawl.base import Metadata
# DEBUG FLAGS
DEBUG = 0
DEBUG = 1
DEBUG_WRITELINE = 1
# some constants
COMMASPACE = ', '
class MSOutlook:
def __init__(self):
self.outlookFound = 0
class CrawlingJob(BaseCrawlingJob):
keys = ""
inbox = ""
subfolders = ""
pattern = ""
def collect(self):
self.collected = []
coiterate(self.crawlOutlook()).addCallback(self.finished)
# TODO: addErrback()
self.deferred = Deferred()
return self.deferred
def finished(self, result):
self.deferred.callback(self.collected)
def crawlOutlook(self):
outlookFound = 0
try:
self.oOutlookApp = \
oOutlookApp = \
win32com.client.gencache.EnsureDispatch("Outlook.Application")
self.outlookFound = 1
outlookFound = 1
except:
print "MSOutlook: unable to load Outlook"
self.records = []
self.mailList = []
def loadInbox(self, keys=None, subfolders=False):
if not self.outlookFound:
records = []
if not outlookFound:
return
# fetch the params
criteria = self.params
self.keys = criteria.get('keys')
self.inbox = criteria.get('inbox') #boolean
self.subfolders = criteria.get('subfolders') #boolean
self.pattern = criteria.get('pattern')
if self.pattern != '':
self.pattern = re.compile(criteria.get('pattern') or '.*')
else:
self.pattern = None
if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> starting'
# this should use more try/except blocks or nested blocks
onMAPI = self.oOutlookApp.GetNamespace("MAPI")
# catch Inbox folder
onMAPI = oOutlookApp.GetNamespace("MAPI")
ofInbox = \
onMAPI.GetDefaultFolder(win32com.client.constants.olFolderInbox)
# fetch the mails of the inbox folder
if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> fetch mails of inbox folder'
for om in range(len(ofInbox.Items)):
mail = ofInbox.Items.Item(om + 1)
if mail.Class == win32com.client.constants.olMail:
if keys is None:
# if we were't give a set of keys to use
# then build up a list of keys that we will be
# able to process
# I didn't include fields of type time, though
# those could probably be interpreted
keys = []
for key in mail._prop_map_get_:
if isinstance(getattr(mail, key), (int, str, unicode)):
keys.append(key)
if DEBUG:
keys.sort()
print 'Fields\n======================================'
for key in keys:
print key
record = {}
for key in keys:
record[key] = getattr(mail, key)
# Create the container (outer) email message.
msg = MIMEMultipart()
# subject
msg['Subject'] = record['Subject'].encode('utf-8')
# sender
sender = str(record['SenderName'].encode('utf-8')) #SenderEmailAddress
msg['From'] = sender
#recipients
recipients = []
for rec in range(record['Recipients'].__len__()):
recipients.append(getattr(record['Recipients'].Item(rec+1), 'Address'))
msg['To'] = COMMASPACE.join(recipients)
# message
msg.preamble = record['Body'].encode('utf-8')
# add the email message to the list
self.mailList.append(msg)
self.records.append(record)
"""
* Fetch the mails of the inbox subfolders
"""
if subfolders:
# fetch mails from inbox
if self.inbox:
self.loadEmail(ofInbox)
# fetch mails of inbox subfolders
if self.subfolders and self.pattern is None:
if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> fetch emails of subfolders'
lInboxSubfolders = getattr(ofInbox, 'Folders')
for of in range(lInboxSubfolders.__len__()):
# get a MAPI-folder object
oFolder = lInboxSubfolders.Item(of + 1)
# get items of the folder
folderItems = getattr(oFolder, 'Items')
for item in range(len(folderItems)):
mail = folderItems.Item(item+1)
if mail.Class == win32com.client.constants.olMail:
if keys is None:
keys = []
for key in mail._prop_map_get_:
if isinstance(getattr(mail, key), (int, str, unicode)):
keys.append(key)
if DEBUG:
keys.sort()
print 'Fiels\n======================================='
for key in keys:
print key
record = {}
for key in keys:
record[key] = getattr(mail, key)
if DEBUG:
print of
# Create the container (outer) email message.
msg = MIMEMultipart()
# subject
msg['Subject'] = record['Subject'].encode('utf-8')
# sender
sender == record['SenderName'].encode('utf-8') #SenderEmailAddress
msg['From'] = sender
# recipients
for rec in range(record['Recipients'].__len__()):
recipients.append(getattr(record['Recipients'].Item(rec+1), 'Address'))
msg['To'] = COMMASPACE.join(recipients)
# message
msg.preamble = record['Body'].encode('utf-8')
# add the email message to the list
self.mailList.append(msg)
self.records.append(record)
# get a MAPI-folder object and load its emails
self.loadEmail(lInboxSubfolders.Item(of + 1))
# pattern, just read the specified subfolder
elif self.subfolders and self.pattern:
if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> fetch emails of specified subfolder'
lInboxSubfolders = getattr(ofInbox, 'Folders')
for of in range(lInboxSubfolders.__len__()):
# get a MAPI-folder object and load its emails
if self.pattern.match(getattr(lInboxSubfolders.Item(of + 1), 'Name')):
self.loadEmail(lInboxSubfolders.Item(of + 1)) #oFolder
if DEBUG:
print 'number of mails in Inbox:', len(ofInbox.Items)
@ -157,7 +118,7 @@ class MSOutlook:
# get Count-Attribute of _Folders class
iInboxSubfoldersCount = getattr(lInboxSubfolders, 'Count')
# the Item-Method of the _Folders class returns a MAPIFolder object
oFolder = lInboxSubfolders.Item(0) #1
oFolder = lInboxSubfolders.Item(1)
print 'Count of Inbox-SubFolders:', iInboxSubfoldersCount
print 'Inbox sub folders (Folder/Mails):'
@ -168,45 +129,78 @@ class MSOutlook:
if DEBUG_WRITELINE:
print 'MSOutlook.loadInbox() ===> ending'
yield '1'
def loadEmail(self, oFolder):
# get items of the folder
folderItems = getattr(oFolder, 'Items')
for item in range(len(folderItems)):
mail = folderItems.Item(item+1)
if mail.Class == win32com.client.constants.olMail:
if self.keys is None:
self.keys = []
for key in mail._prop_map_get_:
if isinstance(getattr(mail, key), (int, str, unicode)):
self.keys.append(key)
if DEBUG:
self.keys.sort()
print 'Fiels\n======================================='
for key in self.keys:
print key
record = {}
for key in self.keys:
record[key] = getattr(mail, key)
if DEBUG:
print str(item)
return self.mailList
# Create the mime email object
msg = self.createEmailMime(record)
# list with mime objects
self.collected.append((OutlookResource(msg)))
def createEmailMime(self, emails):
# Create the container (outer) email message.
msg = MIMEMultipart()
# subject
msg['Subject'] = emails['Subject'].encode('utf-8')
# sender
if emails.has_key('SenderEmailAddress'):
sender = str(emails['SenderEmailAddress'].encode('utf-8'))
else:
sender = str(emails['SenderName'].encode('utf-8'))
msg['From'] = sender
#recipients
recipients = []
if __name__ == '__main__':
if DEBUG:
print 'attempting to load Outlook'
oOutlook = MSOutlook()
# delayed check for Outlook on win32 box
if not oOutlook.outlookFound:
print 'Outlook not found'
sys.exit(1)
if emails.has_key('Recipients'):
for rec in range(emails['Recipients'].__len__()):
recipients.append(getattr(emails['Recipients'].Item(rec+1), 'Address'))
msg['To'] = COMMASPACE.join(recipients)
else:
recipients.append(emails['To'])
msg['To'] = COMMASPACE.join(recipients)
# message
msg.preamble = emails['Body'].encode('utf-8')
return msg
class OutlookResource(object):
fieldsMail = ['Body',
'HTMLBody',
'CC',
'SenderName',
'Recipients',
'To',
'Attachments',
'Subject'
]
# 'BodyFormat', removed BodyFormat temporarily because it is not available in Outlook.9 (Office2000)
# 'SenderEmailAddress', replaced by SenderName
implements(IResource)
if DEBUG:
import time
print 'loading records...'
startTime = time.time()
mails = oOutlook.loadInbox(fieldsMail)
def __init__(self, oEmail):
self.oEmail = oEmail
for elem in mails:
print str(elem)
@property
def data(self):
return self.oEmail
if DEBUG_WRITELINE:
print '***Back in main() with some emails in a list....***'
print 'Mails fetched from MSOutlook inbox folder:', mails.__len__()
if DEBUG:
print 'loading took %f seconds' % (time.time() - startTime)

View file

@ -0,0 +1,228 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> //-->
<html xmlns:nevow="http://nevow.com/ns/nevow/0.1">
<!-- Outlook Mails Page for loops.agent UI Version: 0.1 //-->
<nevow:invisible nevow:render="header_fragment" />
<body>
<div class="body">
<div nevow:render="top_fragment">
</div>
<div id="menu">
<div nevow:render="navigation_fragment">
</div>
<br/>
<br/>
<div class="box">
<h4>User Mode</h4>
<div class="body">
<b>Current Mode: </b><p nevow:render="getActiveUserMode"/>
</div>
</div>
</div>
<div id="content">
<div></div>
<div></div>
<div class="content-1" id="2.body" ondblclick="">
<div class="line-block">
<div class="line"><br /></div>
</div>
<div class="section">
<h3><a id="agent-ui-startpage" name="agent-ui-startpage">Agent: Create Filesystem Crawler Job</a></h3>
<ul class="simple">
<li>Configuration page for Filesystem Crawler Jobs</li>
</ul>
</div>
<div class="section">
<h3><a id="form-overview" name="form-overview">Overview</a></h3>
<ul class="simple">
<li><b>File Collection</b><div nevow:render="data" nevow:data="displayViewForm"/></li>
</ul>
<div align="center">
<table border="0" cellpadding="10">
<p nevow:render="displayFiles">
<tr nevow:pattern="CollectedFiles" nevow:render="data"/>
</p>
</table>
<form name="FileCrawlForm" action="submitFilesystemCrawlJob" method="POST">
<fieldset>
<legend>
Filesystem Crawl Settings
</legend>
<fieldset>
<legend>Directories to crawl</legend>
<table align="center">
<tr>
<td>
<label for="lblRecursiveDirs">Directories to crawl <b>recursively</b></label>
<p>
<i>please use ';' as delimiters</i>
</p>
<p>
<i>you can also use patterns like regular expressions </i>
</p>
</td>
<td>
<textarea name="rec_directories" id="lblRecursiveDirs" rows="10" cols="40"></textarea>
</td>
</tr>
<tr>
<td>
<label for="lblDirs">Directories to crawl non-recursively</label>
<p>
<i>please use ';' as delimiters</i>
</p>
<p>
<i>you can also use patterns like regular expressions </i>
</p>
</td>
<td>
<textarea name="rec_directories" id="lblDirs" rows="10" cols="40"></textarea>
</td>
</tr>
</table>
</fieldset>
<fieldset>
<legend>
Filter criteria patterns
</legend>
<table align="center">
<tr>
<td>
<label for="lblApplyFileSize">Apply size criteria : </label>
</td>
<td>
<input id="lblApplyFileSize" name="applyFileSize" type="checkbox"/>
</td>
</tr>
<tr>
<td>
<label for="lblFileSizeLimit">Collect files that are : </label>
</td>
<td>
<select id="lblFileSizeLimit" name="selectFileSizeLimit">
<option>greater</option>
<option>less</option>
<option>equal</option>
</select>
</td>
</tr>
<tr>
<td>
<label for="lblFileSize">than (kByte): </label>
</td>
<td>
<input id="lblFileSize" name="fileSize"
type="text" size="12" maxlength="20" />
</td>
</tr>
<tr>
<td>
<label for="lblMaximumSize">Maximum file size: </label>
<p>
<i>no size means that transferred files might be very large!</i>
</p>
</td>
<td>
<input id="lblMaximumSize" name="maximumSize"
type="text" size="12" maxlength="20" />
</td>
</tr>
<tr>
<td>
<label for="lblApplyDateCriteria">Apply date criteria: </label>
</td>
<td>
<input id="lblApplyDateCriteria" name="applyDateCriteria" type="checkbox"/>
</td>
</tr>
<tr>
<td>
<label for="lblDateCriteria">Collect files that are: </label>
</td>
<td>
<select id="lblDateCriteria" name="selectDateCriteria">
<option>created</option>
<option>modified</option>
<option>accessed</option>
</select>
</td>
</tr>
<tr>
<td>
<label for="lblTimestampCompare"></label>
</td>
<td>
<select id="lblTimestampCompare" name="selectTimeStampCompare">
<option>before</option>
<option>after</option>
<option>exactly on</option>
</select>
</td>
</tr>
<tr>
<td>
<label for="lblTimeStamp">Collect files that are : </label>
</td>
<td>
<input id="lblTimeStamp" name="selectDateCriteria" type="text" size="20"/>
</td>
</tr>
</table>
</fieldset>
<fieldset>
<legend>
Job Interval
</legend>
<table align="center">
<tr>
<td>
<label for="lblmailCrawlIntervaloneTime">One Time: </label>
</td>
<td>
<input type="radio" id="lblmailCrawlIntervaloneTime" name="mailCrawlInterval"
value="oneTime" checked="checked" />
</td>
</tr>
<tr>
<td>
<label for="lblmailCrawlIntervalScheduler">Use Scheduler: </label>
</td>
<td>
<input type="radio" id="lblmailCrawlIntervalScheduler" name="mailCrawlInterval"
value="Scheduler" />
</td>
</tr>
</table>
</fieldset>
<input type="submit" name="startCrawlJob" value="Save and Start" />
</fieldset>
</form>
<div nevow:render="systemMessage"/>
</div>
</div>
</div>
</div>
<div id="sub-section" define-macro="sub-section">
</div>
<div nevow:render="footer_fragment">
</div>
</div>
</body>
</html>

View file

@ -47,36 +47,29 @@
<div class="section">
<h3><a id="form-overview" name="form-overview">Overview</a></h3>
<ul class="simple">
<li><b>Mail Collection</b><div nevow:render="data" nevow:data="displayViewForm"/></li>
<li><b>Mail in Detail</b><div nevow:render="data" nevow:data="displayViewForm"/></li>
</ul>
<div nevow:render="systemMessage"/>
<div align="center">
<table class="listing">
<table class="listing" style="width:200px">
<thead>
<tr>
<th>
From
Field
</th>
<th>
CC
</th>
<th>
Subject
</th>
<th>
Date
</th>
<th>
Source Folder
Value
</th>
</tr>
</thead>
<tbody>
<p nevow:render="displayOutlookMails">
<p nevow:render="displayOutlookMail">
</p>
</tbody>
</table>
</div>
<br/>
<a href="viewRessources" alt="back to Ressources overview"><b>[back to Ressources overview]</b></a>
</div>
</div>
</div>

View file

@ -65,22 +65,29 @@
<table align="center">
<tr>
<td>
<label for="lblsearchFolder">Folder to search: </label>
<label for="lblCrawlInbox">Crawl inbox</label>
</td>
<td>
<input id="lblsearchFolder" name="searchFolder" type="text"
size="12" maxlength="20"/>
<input id="lblCrawlInbox" name="inbox" type="checkbox"
value="inbox"/><!-- outlook job param //-->
</td>
</tr>
<tr>
<td>
<label for="lblselectSubfolder">Include subfolder: </label>
<label for="lblCrawlSubfolders">Crawl subfolders</label>
</td>
<td>
<select id="lblselectSubfolder" name="selectSubfolder">
<option>Yes</option>
<option>No</option>
</select>
<input type="checkbox" id="lblCrawlSubfolders" name="subfolders"
value = "subfolders"/><!-- outlook job param //-->
</td>
</tr>
<tr>
<td>
<label for="lblSubFolderPattern">Pattern for subfolders to include</label>
</td>
<td>
<input type="text" maxlength="20" size="16" id="lblSubFolderPattern"
name="pattern"/><!-- outlook job param //-->
</td>
</tr>
</table>

View file

@ -16,19 +16,19 @@
<div class="content odd menu-3">
<a href="http://localhost:8080/collectOutlookMails" class="">
collect Outlook Mails
add outlook crawl job
</a>
</div>
<div class="content odd menu-3">
<a href="http://localhost:8080/viewOutlookMails" class="">
view Outlook Mails
</a>
<a href="http://localhost:8080/collectFilesystem" class="">
add filesystem crawl job</a>
</div>
<div class="content odd menu-3">
<a href="http://localhost:8080/addjob" class="">
add job</a>
<a href="http://localhost:8080/viewRessources" class="">
view collected ressources
</a>
</div>
<div class="content odd menu-3">

View file

@ -0,0 +1,79 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> //-->
<html xmlns:nevow="http://nevow.com/ns/nevow/0.1">
<!-- Ressource Page for loops.agent UI Version: 0.1 //-->
<nevow:invisible nevow:render="header_fragment" />
<body>
<div class="body">
<div nevow:render="top_fragment">
</div>
<div id="menu">
<div nevow:render="navigation_fragment">
</div>
<br/>
<br/>
<div class="box">
<h4>User Mode</h4>
<div class="body">
<b>Current Mode: </b><p nevow:render="getActiveUserMode"/>
</div>
</div>
</div>
<div id="content">
<div></div>
<div></div>
<div class="content-1" id="2.body" ondblclick="">
<div class="line-block">
<div class="line"><br /></div>
</div>
<div class="section">
<h3><a id="agent-ui-startpage" name="agent-ui-startpage">Agent: collected ressources</a></h3>
<ul class="simple">
<li>All currently available objects that were collected by loops jobs</li>
</ul>
</div>
<div class="section">
<h3><a id="form-overview" name="form-overview">Overview</a></h3>
<ul class="simple">
<li><b>Ressource Collection</b><div nevow:render="data" nevow:data="displayViewForm"/></li>
</ul>
<div nevow:render="systemMessage"/>
<div align="center">
<table class="listing">
<thead>
<p nevow:render="displayRessourceHeaders">
</p>
</thead>
<tbody>
<p nevow:render="displayRessources">
</p>
</tbody>
</table>
</div>
</div>
</div>
</div>
<div id="sub-section" define-macro="sub-section">
</div>
<div nevow:render="footer_fragment">
</div>
</div>
</body>
</html>

File diff suppressed because it is too large Load diff