1 |
#!/usr/bin/env python |
2 |
# |
3 |
# (c) 2008 by Intevation GmbH |
4 |
# author: Sascha L. Teichmann ([email protected]) |
5 |
# |
6 |
import sys |
7 |
import re |
8 |
import codecs |
9 |
|
10 |
SEP = '\t' |
11 |
|
12 |
FKZ = re.compile(r".*ka_([0-9a-z]+)_db.csv$") |
13 |
|
14 |
class CSV: |
15 |
def __init__(self, f, fkz=None): |
16 |
self.fkz = fkz |
17 |
line = f.readline() |
18 |
line = line.replace('\r', '').replace('\n', '') |
19 |
line = line.split(SEP) |
20 |
line[0] = line[0].replace('#', '', 1) |
21 |
self.headers = line |
22 |
self.indexed_headers = dict(zip(line, xrange(len(line)))) |
23 |
rows = [] |
24 |
for line in f: |
25 |
line = line.replace('\r', '').replace('\n', '') |
26 |
if not line: continue |
27 |
line = line.split(SEP) |
28 |
rows.append(line) |
29 |
self.rows = rows |
30 |
|
31 |
def find(self, header): |
32 |
return self.indexed_headers.get(header, -1) |
33 |
|
34 |
def main(): |
35 |
csvs = [] |
36 |
for arg in sys.argv[1:]: |
37 |
print >> sys.stderr, "file: %s" % arg |
38 |
m = FKZ.match(arg) |
39 |
if m: fkz = m.group(1).upper() |
40 |
else: fkz = None |
41 |
f = None |
42 |
try: |
43 |
f = codecs.open(arg, "r", "latin1") |
44 |
csvs.append(CSV(f, fkz)) |
45 |
finally: |
46 |
if f: |
47 |
try: f.close() |
48 |
except: pass |
49 |
|
50 |
csvs.sort(lambda a, b: -cmp(len(a.headers), len(b.headers))) |
51 |
|
52 |
headers = set() |
53 |
|
54 |
for csv in csvs: |
55 |
for h in csv.headers: |
56 |
headers.add(h) |
57 |
|
58 |
order = [] |
59 |
for header in headers: |
60 |
order.append((max([csv.find(header) for csv in csvs]), header)) |
61 |
order.sort(lambda a, b: cmp(a[0], b[0])) |
62 |
|
63 |
order = [x[1] for x in order] |
64 |
|
65 |
print >> sys.stderr, "headers: %d" % len(headers) |
66 |
|
67 |
Writer = codecs.getwriter("latin1") |
68 |
|
69 |
try: |
70 |
f = Writer(sys.stdout) |
71 |
f.write("#%s\r\n" % SEP.join(order)) |
72 |
for csv in csvs: |
73 |
for row in csv.rows: |
74 |
line = [] |
75 |
for header in order: |
76 |
if header == 'fkz' and csv.fkz: |
77 |
line.append(csv.fkz) |
78 |
else: |
79 |
idx = csv.find(header) |
80 |
if idx >= 0: line.append(row[idx]) |
81 |
else: line.append('') |
82 |
line = SEP.join(line) |
83 |
f.write("%s\r\n" % line) |
84 |
finally: |
85 |
if f: |
86 |
try: f.close() |
87 |
except: pass |
88 |
|
89 |
if __name__ == "__main__": |
90 |
main() |