/[formed]/trunk/tools/anonym/anoymncsv.py
ViewVC logotype

Contents of /trunk/tools/anonym/anoymncsv.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 230 - (show annotations)
Mon Feb 18 15:03:06 2008 UTC (17 years ago) by teichmann
File MIME type: text/x-python
File size: 6132 byte(s)
Separated CSV export by '\t' now.

1 #!/usr/bin/env python
2 #
3 # (c) 2008 by Intevation GmbH
4 # author: Sascha L. Teichmann ([email protected])
5 #
6
7 import sys
8 #import os
9 import codecs
10 #import getopt
11 import psycopg2 as dbapi
12 import psycopg2.extras as extras
13 from types import StringTypes
14
15 dbapi.extensions.register_type(dbapi.extensions.UNICODE)
16
17 SEP = "\t"
18
19 DBNAME = "ka_inte00002_db"
20 HOST = "laodameia"
21 PORT = 5434
22 USER = "postgres"
23 PASSWORD = ""
24
25 ANON_ON = \
26 """SELECT wert FROM ka_konfiguration_tbl
27 WHERE bez = 'anon_transfer'"""
28
29 NUM_EINV = \
30 """SELECT count(*) FROM master_tbl_anonym_view
31 WHERE einverstaendniserklaerung = 1"""
32
33 SELECT_MASTER_TBL = \
34 """SELECT * FROM master_tbl_anonym_view
35 WHERE einverstaendniserklaerung = 1"""
36
37 RG_VIEWS = (
38 "rg_kompetenzfestellung_tbl_anonym_view",
39 "rg_angebote_berufliche_qualifizierung_tbl_anonym_view",
40 "rg_angebote_berufsvorbereitung_tbl_anonym_view",
41 "rg_angebote_bildenden_bereich_tbl_anonym_view",
42 "rg_angebote_lebensbewaeltigung_tbl_anonym_view")
43
44 SELECT_RG = \
45 """SELECT * FROM %s WHERE master_id = %d"""
46
47 BLACK_LIST = set(['id', 'master_id', 'uuid_id', 'bearbeiter_id'])
48
49 def strReplace(x):
50 if type(x) in StringTypes:
51 return x.replace("\r", "").replace("\n", " ").replace(SEP, " ")
52 if x is None: return ""
53 return x
54
55 class RepeatGroup:
56
57 def __init__(self, columns):
58 self.columns = columns
59
60 def headerFields(self):
61 return [a[0] for a in self.columns if a[0] not in BLACK_LIST]
62
63 def valueFields(self):
64 return [strReplace(a[2]) for a in self.columns if a[0] not in BLACK_LIST]
65
66 def numFields(self):
67 s = 0
68 for a in self.columns:
69 if a[0] not in BLACK_LIST:
70 s += 1
71 return s
72
73 class Case(RepeatGroup):
74 def __init__(self, columns):
75 RepeatGroup.__init__(self, columns)
76 self.rgs = []
77
78 def fetchRepeatGroups(self, cur):
79 master_id = self.columns[0][2]
80 print >> sys.stderr, "master: %d" % master_id
81 for view in RG_VIEWS:
82 cur.execute(SELECT_RG % (view, int(master_id)))
83 rs = []
84 while True:
85 row = cur.fetchone()
86 if not row: break
87 cs = row._index.items() # XXX: hackish!
88 cs.sort(lambda a, b: cmp(a[1], b[1]))
89 cs = [(a[0], a[1], row[a[1]]) for a in cs]
90 rs.append(RepeatGroup(cs))
91 print >> sys.stderr, "\t%d %s" % (len(rs), view)
92 self.rgs.append(rs)
93
94 class Cases:
95 def __init__(self):
96 self.cases = []
97
98 def appendCase(self, case):
99 self.cases.append(case)
100
101 def fetchRepeatGroups(self, cur):
102 for case in self.cases:
103 case.fetchRepeatGroups(cur)
104
105 def maxRepeatGroups(self):
106 mc = [0] * len(RG_VIEWS)
107
108 for rgs in [c.rgs for c in self.cases]:
109 for i in range(len(mc)):
110 mc[i] = max(mc[i], len(rgs[i]))
111
112 for l in mc:
113 print >> sys.stderr, "max len: %d" % l
114
115 self.maxRGs = mc
116
117 def noneEmptyRepeatGroup(self, i):
118 for case in self.cases:
119 rgs = case.rgs
120 if len(rgs[i]):
121 return rgs[i][0]
122 return None
123
124 def generateHeader(self, out):
125 if not self.cases:
126 return ""
127 case = self.cases[0]
128 headers = case.headerFields()
129 for i in range(len(RG_VIEWS)):
130 rg = self.noneEmptyRepeatGroup(i)
131 if rg is None:
132 continue
133 hs = rg.headerFields()
134 for j in range(self.maxRGs[i]):
135 if j > 0:
136 headers.extend([ "%s_%d" % (x, j) for x in hs])
137 else:
138 headers.extend(hs)
139 print >> out, "#%s" % SEP.join(headers)
140
141 def dumpFields(self, out):
142 fills = []
143 for i in range(len(RG_VIEWS)):
144 rg = self.noneEmptyRepeatGroup(i)
145 if rg is None: fills.append(0)
146 else: fills.append(rg.numFields())
147
148 for case in self.cases:
149 line = case.valueFields()
150 for m, rg, fill in zip(self.maxRGs, case.rgs, fills):
151 count = 0
152 for r in rg:
153 line.extend(r.valueFields())
154 count += 1
155 while count < m:
156 line.extend([''] * fill)
157 count += 1
158 print >> out, "%s" % SEP.join(map(lambda x: u"%s" % x, line))
159
160
161 def main():
162 Writer = codecs.getwriter("latin-1")
163 out = Writer(sys.stdout)
164 con, cur = None, None
165 try:
166 #PASSWORD = raw_input("> ")
167 PASSWORD = ""
168 con = dbapi.connect(
169 database = DBNAME,
170 host = HOST,
171 port = PORT,
172 user = USER,
173 password = PASSWORD)
174
175 cur = con.cursor(cursor_factory=extras.DictCursor)
176 cur.execute(ANON_ON)
177 row = cur.next()
178
179 if row:
180 if row[0] != 'on':
181 print >> sys.stderr, "Anonymer Transfer nicht gestattet"
182 return
183 else:
184 print >> sys.stderr, "Anonymer Transfer nicht gestattet"
185 return
186
187 cur.execute(NUM_EINV)
188 row = cur.next()
189
190 if not row:
191 print >> sys.stderr, "Keine Akten gefunden"
192 return
193
194 print >> sys.stderr, "Uebermittelbare Akten: %d" % row[0]
195
196 cur.execute(SELECT_MASTER_TBL)
197
198 cases = Cases()
199
200 while True:
201 row = cur.fetchone()
202 if not row: break
203 columns = row._index.items() # XXX: hackish!
204 columns.sort(lambda a, b: cmp(a[1], b[1]))
205 columns = [(a[0], a[1], row[a[1]]) for a in columns]
206 cases.appendCase(Case(columns))
207
208 cases.fetchRepeatGroups(cur)
209 cases.maxRepeatGroups()
210 cases.generateHeader(out)
211 cases.dumpFields(out)
212
213 finally:
214 if cur:
215 try: cur.close()
216 except: pass
217 if con:
218 try: con.close()
219 except: pass
220
221
222 if __name__ == "__main__":
223 main()

Properties

Name Value
svn:executable *

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26