/[formed]/trunk/tools/anonym/joincsv.py
ViewVC logotype

Contents of /trunk/tools/anonym/joincsv.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 239 - (show annotations)
Mon Feb 25 21:51:02 2008 UTC (17 years ago) by teichmann
File MIME type: text/x-python
File size: 2385 byte(s)
Fixed logic bug in CSV joiner.

1 #!/usr/bin/env python
2 #
3 # (c) 2008 by Intevation GmbH
4 # author: Sascha L. Teichmann ([email protected])
5 #
6 import sys
7 import re
8 import codecs
9
10 SEP = '\t'
11
12 FKZ = re.compile(r".*ka_([0-9a-z]+)_db.csv$")
13
14 class CSV:
15 def __init__(self, f, fkz=None):
16 self.fkz = fkz
17 line = f.readline()
18 line = line.replace('\r', '').replace('\n', '')
19 line = line.split(SEP)
20 line[0] = line[0].replace('#', '', 1)
21 self.headers = line
22 self.indexed_headers = dict(zip(line, xrange(len(line))))
23 rows = []
24 for line in f:
25 line = line.replace('\r', '').replace('\n', '')
26 if not line: continue
27 line = line.split(SEP)
28 rows.append(line)
29 self.rows = rows
30
31 def find(self, header):
32 return self.indexed_headers.get(header, -1)
33
34 def main():
35 csvs = []
36 for arg in sys.argv[1:]:
37 print >> sys.stderr, "file: %s" % arg
38 m = FKZ.match(arg)
39 if m: fkz = m.group(1).upper()
40 else: fkz = None
41 f = None
42 try:
43 f = codecs.open(arg, "r", "latin1")
44 csvs.append(CSV(f, fkz))
45 finally:
46 if f:
47 try: f.close()
48 except: pass
49
50 csvs.sort(lambda a, b: -cmp(len(a.headers), len(b.headers)))
51
52 headers = set()
53
54 for csv in csvs:
55 for h in csv.headers:
56 headers.add(h)
57
58 order = []
59 for header in headers:
60 order.append((max([csv.find(header) for csv in csvs]), header))
61 order.sort(lambda a, b: cmp(a[0], b[0]))
62
63 order = [x[1] for x in order]
64
65 print >> sys.stderr, "headers: %d" % len(headers)
66
67 Writer = codecs.getwriter("latin1")
68
69 try:
70 f = Writer(sys.stdout)
71 f.write("#%s\r\n" % SEP.join(order))
72 for csv in csvs:
73 for row in csv.rows:
74 line = []
75 for header in order:
76 if header == 'fkz' and csv.fkz:
77 line.append(csv.fkz)
78 else:
79 idx = csv.find(header)
80 if idx >= 0: line.append(row[idx])
81 else: line.append('')
82 line = SEP.join(line)
83 f.write("%s\r\n" % line)
84 finally:
85 if f:
86 try: f.close()
87 except: pass
88
89 if __name__ == "__main__":
90 main()

Properties

Name Value
svn:executable *

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26