 c367f55637
			
		
	
	
		c367f55637
		
	
	
	
	
		
			
			remote.py: now the file on the remote server gets the basename of the local file outlook.py: added utf-8 encoding for the mail content to avoid problems with special characters git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2759 fd906abe-77d9-0310-91a1-e0d9ade77398
		
			
				
	
	
		
			231 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			231 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #
 | |
| #  Copyright (c) 2008 Helmut Merz helmutm@cy55.de
 | |
| #
 | |
| #  This program is free software; you can redistribute it and/or modify
 | |
| #  it under the terms of the GNU General Public License as published by
 | |
| #  the Free Software Foundation; either version 2 of the License, or
 | |
| #  (at your option) any later version.
 | |
| #
 | |
| #  This program is distributed in the hope that it will be useful,
 | |
| #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| #  GNU General Public License for more details.
 | |
| #
 | |
| #  You should have received a copy of the GNU General Public License
 | |
| #  along with this program; if not, write to the Free Software
 | |
| #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| #
 | |
| 
 | |
| """
 | |
| Outlook Crawler Class.
 | |
| 
 | |
| $Id$
 | |
| """
 | |
| 
 | |
| import re
 | |
| from email import MIMEMultipart
 | |
| import tempfile
 | |
| import os
 | |
| 
 | |
| from zope.interface import implements
 | |
| from twisted.internet import defer
 | |
| #from pywintypes import com_error
 | |
| #The watsup import is needed as soon as we start handling the Outlook Pop-Up
 | |
| #again
 | |
| #This should also be integrated within the wrapper-api for doctests
 | |
| #from watsup.winGuiAuto import findTopWindow, findControl, findControls, clickButton, \
 | |
| #                              getComboboxItems, selectComboboxItem, setCheckBox
 | |
| 
 | |
| from cybertools.agent.base.agent import Agent, Master
 | |
| from cybertools.agent.crawl.mail import MailCrawler
 | |
| from cybertools.agent.crawl.mail import MailResource
 | |
| from cybertools.agent.crawl.filesystem import FileResource
 | |
| from cybertools.agent.components import agents
 | |
| from cybertools.agent.system.windows import api
 | |
| from cybertools.agent.util.task import coiterate
 | |
| from cybertools.agent.system.windows.codepages import codepages
 | |
| 
 | |
| # some constants
 | |
| COMMASPACE = ', '
 | |
| 
 | |
| class OutlookCrawler(MailCrawler):
 | |
| 
 | |
|     keys = ""
 | |
|     inbox = ""
 | |
|     subfolders = ""
 | |
|     pattern = ""
 | |
| 
 | |
|     def collect(self, filter=None):
 | |
|         self.result = []
 | |
|         self.d = defer.Deferred()
 | |
|         self.oOutlookApp = None
 | |
|         if self.findOutlook():
 | |
|             self.fetchCriteria()
 | |
|             coiterate(self.crawlFolders()).addCallback(self.finished).addErrback(self.error)
 | |
|         else:
 | |
|             pass
 | |
|             #self.d.addErrback([])
 | |
|         return self.d
 | |
| 
 | |
|     def error(self, reason):
 | |
|         print '***** error',
 | |
|         print reason
 | |
| 
 | |
|     def finished(self, result):
 | |
|         self.d.callback(self.result)
 | |
| 
 | |
|     def fetchCriteria(self):
 | |
|         criteria = self.params
 | |
|         self.keys = criteria.get('keys')
 | |
|         self.inbox = criteria.get('inbox') #boolean
 | |
|         self.subfolders = criteria.get('subfolders') #boolean
 | |
|         self.pattern = criteria.get('pattern')
 | |
|         if self.pattern != '' and self.pattern != None:
 | |
|             self.pattern = re.compile(criteria.get('pattern') or '.*')
 | |
| 
 | |
|     def crawlFolders(self):
 | |
|         onMAPI = self.oOutlookApp.GetNamespace("MAPI")
 | |
|         ofInbox = \
 | |
|             onMAPI.GetDefaultFolder(api.client.constants.olFolderInbox)
 | |
|         # fetch mails from inbox
 | |
|         if self.inbox:
 | |
|             for m in self.loadMailsFromFolder(ofInbox):
 | |
|                 yield None
 | |
|         # fetch mails of inbox subfolders
 | |
|         if self.subfolders and self.pattern is None:
 | |
|             lInboxSubfolders = getattr(ofInbox, 'Folders')
 | |
|             for of in range(lInboxSubfolders.__len__()):
 | |
|                 # get a MAPI-subfolder object and load its emails
 | |
|                 for m in self.loadMailsFromFolder(lInboxSubfolders.Item(of + 1)):
 | |
|                     yield None
 | |
|         elif self.subfolders and self.pattern:
 | |
|             lInboxSubfolders = getattr(ofInbox, 'Folders')
 | |
|             for of in range(lInboxSubfolders.__len__()):
 | |
|                 # get specified MAPI-subfolder object and load its emails
 | |
|                 if self.pattern.match(getattr(lInboxSubfolders.Item(of + 1), 'Name')):
 | |
|                     for m in self.loadMailsFromFolder(lInboxSubfolders.Item(of + 1)):
 | |
|                         yield None
 | |
| 
 | |
|     def loadMailsFromFolder(self, folder):
 | |
|         # get items of the folder
 | |
|         folderItems = getattr(folder, 'Items')
 | |
|         for item in range(len(folderItems)):
 | |
|             mail = folderItems.Item(item+1)
 | |
|             if mail.Class == api.client.constants.olMail:
 | |
|                 if self.keys is None:
 | |
|                     self.keys = []
 | |
|                     for key in mail._prop_map_get_.items():
 | |
|                         try:
 | |
|                             if isinstance(key[0], (int, str, unicode, bool)):
 | |
|                                 self.keys.append(key[0])
 | |
|                         except api.com_error:
 | |
|                             pass
 | |
|                 record = {}
 | |
|                 for key in self.keys:
 | |
|                     try:
 | |
|                         if (hasattr(mail, key)):
 | |
|                             value = getattr(mail, key)
 | |
|                             if isinstance(value, (int, str, unicode, bool)):
 | |
|                                 record[key] = value
 | |
|                             else:
 | |
|                                 record[key] = None
 | |
|                     except:
 | |
|                         pass
 | |
|                 metadata = self.assembleMetadata(folder, record)
 | |
|                 # Create a resource and append it to the result list
 | |
|                 self.createResource(mail, folder, metadata)
 | |
|                 yield None
 | |
| 
 | |
|     def findOutlook(self):
 | |
|         outlookFound = False
 | |
|         try:
 | |
|             self.oOutlookApp = \
 | |
|                 api.client.gencache.EnsureDispatch("Outlook.Application")
 | |
|             outlookFound = True
 | |
|         except com_error:
 | |
|             pass
 | |
|         return outlookFound
 | |
| 
 | |
|     def assembleMetadata(self, folder, mailAttr):
 | |
|         meta = {}
 | |
|         for key in mailAttr.keys():
 | |
|             if isinstance(mailAttr[key], (str, unicode))\
 | |
|                and mailAttr[key] != 'Body' and mailAttr[key] != 'HTMLBody':
 | |
|                 meta[key] = mailAttr[key].encode('utf-8')
 | |
|             elif isinstance(mailAttr[key], (list, tuple, dict)):
 | |
|                 lst = []
 | |
|                 for rec in mailAttr[key]:
 | |
|                     lst.append(rec)
 | |
|                     meta[key] = COMMASPACE.join(lst)
 | |
|             else:
 | |
|                 meta[key] = mailAttr[key]
 | |
|         meta["path"] = folder
 | |
|         metadata = self.createMetadata(meta)
 | |
|         return metadata
 | |
|     
 | |
|     def createResource(self, mail, folder, metadata):
 | |
|         enc = None
 | |
|         textType = "application/octet-stream"
 | |
|         attachments = []
 | |
|         mailContent = ""
 | |
|         ident = None
 | |
|         if (hasattr(mail, 'BodyFormat')):
 | |
|             value = getattr(mail, 'BodyFormat')
 | |
|             if value == 1:
 | |
|                 #1: it is a plain text mail, that is maybe decorated with
 | |
|                 #some html Tags by Outlook for formatting
 | |
|                 #so save it as plain text mail
 | |
|                 if hasattr(mail, 'Body'):
 | |
|                     mailContent = getattr(mail, 'Body')
 | |
|                     textType = "text/plain"
 | |
|                 else:
 | |
|                     mailContent = ""
 | |
|                     textType = "text/plain"
 | |
|             elif value == 2:
 | |
|                 #2: it is a HTML mail
 | |
|                 if hasattr(mail, 'HTMLBody'):
 | |
|                     mailContent = getattr(mail, 'HTMLBody')
 | |
|                     textType = "text/html"
 | |
|                 else:
 | |
|                     mailContent = ""
 | |
|                     textType = "text/html"
 | |
|         else:
 | |
|             #Could not determine BodyFormat. Try to retrieve plain text
 | |
|             if hasattr(mail, 'Body'):
 | |
|                 mailContent = getattr(mail, 'Body')
 | |
|             else:
 | |
|                 mailContent = ""
 | |
|         if hasattr(mail, 'InternetCodepage'):
 | |
|             Codepage = getattr(mail, 'InternetCodepage')
 | |
|             if codepages.has_key(Codepage):
 | |
|                 enc = codepages[Codepage]
 | |
|         if hasattr(mail, 'EntryID'):
 | |
|             ident = getattr(mail, 'EntryID')
 | |
|         if hasattr(mail, 'Attachments'):
 | |
|             attachedElems = getattr(mail, 'Attachments')
 | |
|             for item in range(1, len(attachedElems)+1):
 | |
|                 fileHandle, filePath = tempfile.mkstemp(prefix="outlook")
 | |
|                 attachedItem = attachedElems.Item(item)
 | |
|                 attachedItem.SaveAsFile(filePath)
 | |
|                 os.close(fileHandle)
 | |
|                 metadat = self.createMetadata(dict(filename=filePath))
 | |
|                 fileRes = FileResource(data=None,
 | |
|                                        path=filePath,
 | |
|                                        metadata=metadat)
 | |
|                 attachments.append(fileRes)
 | |
|         fileHandle, filePath = tempfile.mkstemp(prefix="olmail")
 | |
|         filePointer = os.fdopen(fileHandle, "w")
 | |
|         mailContent = mailContent.encode('utf-8')
 | |
|         filePointer.write(mailContent)
 | |
|         filePointer.close()
 | |
|         resource = MailResource(data=mailContent,
 | |
|                                 contentType=textType,
 | |
|                                 encoding=enc,
 | |
|                                 path=filePath,
 | |
|                                 application='outlook',
 | |
|                                 identifier=ident,
 | |
|                                 metadata=metadata,
 | |
|                                 subResources=attachments)
 | |
|         self.result.append(resource)
 | |
| 
 | |
| agents.register(OutlookCrawler, Master, name='crawl.outlook')
 |