1 |
teichmann |
236 |
#!/usr/bin/env python |
2 |
|
|
# |
3 |
|
|
# (c) 2008 by Intevation GmbH |
4 |
|
|
# author: Sascha L. Teichmann ([email protected]) |
5 |
|
|
# |
6 |
|
|
import sys |
7 |
teichmann |
239 |
import re |
8 |
teichmann |
236 |
import codecs |
9 |
|
|
|
10 |
|
|
SEP = '\t' |
11 |
|
|
|
12 |
teichmann |
239 |
FKZ = re.compile(r".*ka_([0-9a-z]+)_db.csv$") |
13 |
|
|
|
14 |
|
|
class CSV: |
15 |
|
|
def __init__(self, f, fkz=None): |
16 |
|
|
self.fkz = fkz |
17 |
|
|
line = f.readline() |
18 |
|
|
line = line.replace('\r', '').replace('\n', '') |
19 |
|
|
line = line.split(SEP) |
20 |
|
|
line[0] = line[0].replace('#', '', 1) |
21 |
|
|
self.headers = line |
22 |
|
|
self.indexed_headers = dict(zip(line, xrange(len(line)))) |
23 |
|
|
rows = [] |
24 |
|
|
for line in f: |
25 |
|
|
line = line.replace('\r', '').replace('\n', '') |
26 |
|
|
if not line: continue |
27 |
|
|
line = line.split(SEP) |
28 |
|
|
rows.append(line) |
29 |
|
|
self.rows = rows |
30 |
|
|
|
31 |
|
|
def find(self, header): |
32 |
|
|
return self.indexed_headers.get(header, -1) |
33 |
|
|
|
34 |
teichmann |
236 |
def main(): |
35 |
|
|
csvs = [] |
36 |
teichmann |
239 |
for arg in sys.argv[1:]: |
37 |
teichmann |
236 |
print >> sys.stderr, "file: %s" % arg |
38 |
teichmann |
239 |
m = FKZ.match(arg) |
39 |
|
|
if m: fkz = m.group(1).upper() |
40 |
|
|
else: fkz = None |
41 |
teichmann |
236 |
f = None |
42 |
|
|
try: |
43 |
|
|
f = codecs.open(arg, "r", "latin1") |
44 |
teichmann |
239 |
csvs.append(CSV(f, fkz)) |
45 |
teichmann |
236 |
finally: |
46 |
|
|
if f: |
47 |
|
|
try: f.close() |
48 |
|
|
except: pass |
49 |
|
|
|
50 |
teichmann |
239 |
csvs.sort(lambda a, b: -cmp(len(a.headers), len(b.headers))) |
51 |
teichmann |
236 |
|
52 |
teichmann |
239 |
headers = set() |
53 |
teichmann |
236 |
|
54 |
teichmann |
239 |
for csv in csvs: |
55 |
|
|
for h in csv.headers: |
56 |
|
|
headers.add(h) |
57 |
teichmann |
236 |
|
58 |
teichmann |
239 |
order = [] |
59 |
|
|
for header in headers: |
60 |
|
|
order.append((max([csv.find(header) for csv in csvs]), header)) |
61 |
|
|
order.sort(lambda a, b: cmp(a[0], b[0])) |
62 |
teichmann |
236 |
|
63 |
teichmann |
239 |
order = [x[1] for x in order] |
64 |
teichmann |
236 |
|
65 |
teichmann |
239 |
print >> sys.stderr, "headers: %d" % len(headers) |
66 |
|
|
|
67 |
|
|
Writer = codecs.getwriter("latin1") |
68 |
|
|
|
69 |
|
|
try: |
70 |
|
|
f = Writer(sys.stdout) |
71 |
|
|
f.write("#%s\r\n" % SEP.join(order)) |
72 |
|
|
for csv in csvs: |
73 |
|
|
for row in csv.rows: |
74 |
|
|
line = [] |
75 |
|
|
for header in order: |
76 |
|
|
if header == 'fkz' and csv.fkz: |
77 |
|
|
line.append(csv.fkz) |
78 |
|
|
else: |
79 |
|
|
idx = csv.find(header) |
80 |
|
|
if idx >= 0: line.append(row[idx]) |
81 |
|
|
else: line.append('') |
82 |
|
|
line = SEP.join(line) |
83 |
|
|
f.write("%s\r\n" % line) |
84 |
|
|
finally: |
85 |
|
|
if f: |
86 |
|
|
try: f.close() |
87 |
|
|
except: pass |
88 |
|
|
|
89 |
teichmann |
236 |
if __name__ == "__main__": |
90 |
|
|
main() |