/[formed]/trunk/tools/anonym/joincsv.py
ViewVC logotype

Annotation of /trunk/tools/anonym/joincsv.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 239 - (hide annotations)
Mon Feb 25 21:51:02 2008 UTC (17 years ago) by teichmann
File MIME type: text/x-python
File size: 2385 byte(s)
Fixed logic bug in CSV joiner.

1 teichmann 236 #!/usr/bin/env python
2     #
3     # (c) 2008 by Intevation GmbH
4     # author: Sascha L. Teichmann ([email protected])
5     #
6     import sys
7 teichmann 239 import re
8 teichmann 236 import codecs
9    
10     SEP = '\t'
11    
12 teichmann 239 FKZ = re.compile(r".*ka_([0-9a-z]+)_db.csv$")
13    
14     class CSV:
15     def __init__(self, f, fkz=None):
16     self.fkz = fkz
17     line = f.readline()
18     line = line.replace('\r', '').replace('\n', '')
19     line = line.split(SEP)
20     line[0] = line[0].replace('#', '', 1)
21     self.headers = line
22     self.indexed_headers = dict(zip(line, xrange(len(line))))
23     rows = []
24     for line in f:
25     line = line.replace('\r', '').replace('\n', '')
26     if not line: continue
27     line = line.split(SEP)
28     rows.append(line)
29     self.rows = rows
30    
31     def find(self, header):
32     return self.indexed_headers.get(header, -1)
33    
34 teichmann 236 def main():
35     csvs = []
36 teichmann 239 for arg in sys.argv[1:]:
37 teichmann 236 print >> sys.stderr, "file: %s" % arg
38 teichmann 239 m = FKZ.match(arg)
39     if m: fkz = m.group(1).upper()
40     else: fkz = None
41 teichmann 236 f = None
42     try:
43     f = codecs.open(arg, "r", "latin1")
44 teichmann 239 csvs.append(CSV(f, fkz))
45 teichmann 236 finally:
46     if f:
47     try: f.close()
48     except: pass
49    
50 teichmann 239 csvs.sort(lambda a, b: -cmp(len(a.headers), len(b.headers)))
51 teichmann 236
52 teichmann 239 headers = set()
53 teichmann 236
54 teichmann 239 for csv in csvs:
55     for h in csv.headers:
56     headers.add(h)
57 teichmann 236
58 teichmann 239 order = []
59     for header in headers:
60     order.append((max([csv.find(header) for csv in csvs]), header))
61     order.sort(lambda a, b: cmp(a[0], b[0]))
62 teichmann 236
63 teichmann 239 order = [x[1] for x in order]
64 teichmann 236
65 teichmann 239 print >> sys.stderr, "headers: %d" % len(headers)
66    
67     Writer = codecs.getwriter("latin1")
68    
69     try:
70     f = Writer(sys.stdout)
71     f.write("#%s\r\n" % SEP.join(order))
72     for csv in csvs:
73     for row in csv.rows:
74     line = []
75     for header in order:
76     if header == 'fkz' and csv.fkz:
77     line.append(csv.fkz)
78     else:
79     idx = csv.find(header)
80     if idx >= 0: line.append(row[idx])
81     else: line.append('')
82     line = SEP.join(line)
83     f.write("%s\r\n" % line)
84     finally:
85     if f:
86     try: f.close()
87     except: pass
88    
89 teichmann 236 if __name__ == "__main__":
90     main()

Properties

Name Value
svn:executable *

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26