cybertools/external/dsv.py
helmutm 0cf8f4846e improvements for reader and loader implementations
git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@3817 fd906abe-77d9-0310-91a1-e0d9ade77398
2010-04-23 19:38:00 +00:00

101 lines
3.3 KiB
Python

#
# Copyright (c) 2009 Helmut Merz helmutm@cy55.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
Base implementation for import adapters.
$Id$
"""
import csv
from datetime import date, timedelta
from time import strptime
from zope import component
from zope.interface import implements
from zope.cachedescriptors.property import Lazy
from cybertools.external.base import BaseReader
from cybertools.external.element import Element
xls2csv = '%(cpath)s -f %%Y-%%m-%%d %(fpath)s.xls >%(fpath)s.csv'
class CsvReader(BaseReader):
encoding = 'UTF-8'
elementFactories = {None: Element}
fieldNames = ()
start = stop = sortKey = None
def read(self, input):
result = []
for x in range(self.start or 0):
input.readline() # skip lines on top
reader = csv.DictReader(input, self.fieldNames)
lastIdentifiers = {}
rows = list(reader)[:self.stop]
if self.sortKey:
rows.sort(key=self.sortKey)
for idx, row in enumerate(rows):
if self.ignoreRow(idx, row):
continue
currentElements = {}
for k, v in row.items():
k, v = self.preprocessField(k, v)
if k is None:
continue
type = None
if '.' in k:
type, k = k.split('.', 1)
element = currentElements.get(type)
if element is None:
ef = self.elementFactories.get(type)
if ef is None:
raise ValueError('Missing element factory for %r.' % type)
if ef == 'ignore':
continue
element = currentElements[type] = ef()
element.type = type
element[k] = v # ?TODO: unmarshall
for element in sorted(currentElements.values(), key=lambda x: x.order):
id = element.identifier
if not id or id != lastIdentifiers.get(element.type):
element.setParent(currentElements)
result.append(element)
lastIdentifiers[element.type] = id
return result
def ignoreRow(self, idx, row):
return False
def preprocessField(self, k, v):
return k, v
def getDate(self, value, correctBug=False):
if not value:
return value
try:
v = strptime(value, '%Y-%m-%d')
except ValueError:
return value
else:
d = date(*v[:3])
if correctBug:
d -= timedelta(4 * 365 + 2)
return d