From 4321f24b387ecce8838e8946ecc2d633e4f43006 Mon Sep 17 00:00:00 2001 From: helmutm Date: Mon, 8 Oct 2007 11:58:24 +0000 Subject: [PATCH] more on rstat.getDataFrame() git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2099 fd906abe-77d9-0310-91a1-e0d9ade77398 --- pyscript/rstat.py | 28 ++++++++++++++++++++-------- pyscript/rstat.txt | 3 ++- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/pyscript/rstat.py b/pyscript/rstat.py index 44a86de..c5f95d0 100644 --- a/pyscript/rstat.py +++ b/pyscript/rstat.py @@ -79,25 +79,37 @@ class RStat(object): (rowId, columnId, value). Elements with a columnId that is not present in all rows is omitted. """ - def checkColumnId(rows, columnId): - for row in rows.values(): - if columnId not in row: + def checkId(mapping, id): + for element in mapping.values(): + if id not in element: return False return True data = sorted(data) rows = {} + columns = {} + dataMapping = {} for rowId, columnId, value in data: - element = rows.setdefault(rowId, []) - element.append(columnId) + rows.setdefault(rowId, []).append(columnId) + columns.setdefault(columnId, []).append(rowId) + dataMapping[(rowId, columnId)] = value columnsToOmit = [] for rowId, row in rows.items(): for columnId in row: - if not checkColumnId(rows, columnId): + if not checkId(rows, columnId): columnsToOmit.append(columnId) + rowsToOmit = [] + for columnId, column in columns.items(): + for rowId in column: + if not checkId(columns, rowId): + rowsToOmit.append(rowId) r.library('ltm') result = {} - for rowId, columnId, value in data: - if columnId not in columnsToOmit: + #for rowId, columnId, value in data: + #if columnId not in columnsToOmit: + # if rowId not in rowsToOmit: + # result.setdefault(rowId, []).append(value) + for (rowId, columnId), value in sorted(dataMapping.items()): + if rowId not in rowsToOmit: result.setdefault(rowId, []).append(value) self.intermediateData = result matrix = rpy.with_mode(rpy.NO_CONVERSION, r.data_frame)(**result) diff --git a/pyscript/rstat.txt b/pyscript/rstat.txt index 2b2e7d7..da6c6d9 100644 --- a/pyscript/rstat.txt +++ b/pyscript/rstat.txt @@ -24,9 +24,10 @@ Using the R Statistics Package with Python Scripts ... ('r3', 'c1', 0), ... ('r3', 'c2', 1), ... ] + ... data = [('q050', 'user1', 0), ('q044', 'user1', 1), ('q039', 'user1', 1), ('q041', 'user1', 0), ('q036', 'user1', 0), ('q053', 'user1', 0), ('q049', 'user1', 1), ('q057', 'user1', 0), ('q059', 'user1', 0), ('q060', 'user1', 0), ('q056', 'user1', 1), ('q047', 'user1', 1), ('q057', 'user1', 0), ('q041', 'user1', 1), ('q039', 'user1', 1), ('q038', 'user1', 1), ('q046', 'user1', 1), ('q040', 'user1', 1), ('q056', 'user1', 0), ('q059', 'user10', 0), ('q050', 'user10', 0), ('q058', 'user10', 0), ('q040', 'user10', 0), ('q052', 'user10', 0), ('q055', 'user10', 1), ('q039', 'user10', 1), ('q057', 'user10', 0), ('q049', 'user10', 1), ('q051', 'user10', 1), ('q041', 'user10', 1), ('q047', 'user10', 1), ('q038', 'user10', 1), ('q056', 'user10', 0), ('q037', 'user10', 1), ('q055', 'user10', 1), ('q054', 'user10', 0), ('q041', 'user10', 1), ('q044', 'user10', 1), ('q049', 'user10', 1), ('q048', 'user10', 1), ('q061', 'user10', 0), ('q056', 'user10', 1), ('q058', 'user10', 0), ('q042', 'user10', 0), ('q041', 'user10', 1), ('q036', 'user10', 1), ('q040', 'user10', 1), ('q059', 'user10', 1), ('q058', 'user10', 1), ('q057', 'user10', 0), ('q054', 'user10', 1), ('q037', 'user1', 1), ('q038', 'user1', 1), ('q047', 'user1', 1), ('q041', 'user1', 1), ('q051', 'user1', 1), ('q048', 'user1', 1), ('q058', 'user1', 1), ('q060', 'user1', 1)] ... matrix = rstat.getDataFrame(data) ... #return matrix - ... return rstat.intermediateData + ... #return rstat.intermediateData ... x = rpy.with_mode(rpy.BASIC_CONVERSION, r.descript)(matrix) ... return x ... """