diff --git a/external/base.py b/external/base.py index 2c08aa4..5610ba5 100644 --- a/external/base.py +++ b/external/base.py @@ -55,15 +55,15 @@ class BaseLoader(object): self.logger = getLogger('Loader') self.groups = {} - def load(self, elements): - self.loadRecursive(elements) + def load(self, elements, recur=True): + self.loadRecursive(elements, recur) self.transcript.write('Rows loaded: %(count)i; changes: %(changed)i; ' 'errors: %(errors)i\n' % self.summary) - def loadRecursive(self, elements): + def loadRecursive(self, elements, recur=True): for element in elements: element.execute(self) - if element.subElements is not None: + if recur and element.subElements is not None: self.loadRecursive(element.subElements) self.summary['count'] += 1 @@ -73,6 +73,12 @@ class BaseLoader(object): self.summary['errors'] += 1 self.logger.error(message) + def warn(self, message): + self.transcript.write(message + '\n') + self.errors.append(message) + self.summary['warnings'] += 1 + self.logger.warn(message) + def change(self, message=None): if message is not None: self.transcript.write(message + '\n') diff --git a/external/dsv.py b/external/dsv.py index dbcf282..0007ac5 100644 --- a/external/dsv.py +++ b/external/dsv.py @@ -38,17 +38,26 @@ xls2csv = '%(cpath)s -f %%Y-%%m-%%d %(fpath)s.xls >%(fpath)s.csv' class CsvReader(BaseReader): + encoding = 'UTF-8' elementFactories = {None: Element} fieldNames = () - start = stop = None + start = stop = sortKey = None def read(self, input): result = [] + for x in range(self.start or 0): + input.readline() # skip lines on top reader = csv.DictReader(input, self.fieldNames) lastIdentifiers = {} - for idx, row in enumerate(list(reader)[self.start:self.stop]): + rows = list(reader)[:self.stop] + if self.sortKey: + rows.sort(key=self.sortKey) + for idx, row in enumerate(rows): + if self.ignoreRow(idx, row): + continue currentElements = {} for k, v in row.items(): + k, v = self.preprocessField(k, v) if k is None: continue type = None @@ -59,6 +68,8 @@ class CsvReader(BaseReader): ef = self.elementFactories.get(type) if ef is None: raise ValueError('Missing element factory for %r.' % type) + if ef == 'ignore': + continue element = currentElements[type] = ef() element.type = type element[k] = v # ?TODO: unmarshall @@ -70,6 +81,12 @@ class CsvReader(BaseReader): lastIdentifiers[element.type] = id return result + def ignoreRow(self, idx, row): + return False + + def preprocessField(self, k, v): + return k, v + def getDate(self, value, correctBug=False): if not value: return value