/[formed]/trunk/tools/anonym/joincsv.py

Diff of /trunk/tools/anonym/joincsv.py

Parent Directory | Revision Log | View Patch Patch

-revision 238 by teichmann,
Mon Feb 25 17:08:20 2008 UTC
+revision 239 by teichmann,
Mon Feb 25 21:51:02 2008 UTC
 Line 4
  # author: Sascha L. Teichmann ([email protected])
  #
  import sys
+ import re
  import codecs
  SEP = '\t'
+ FKZ = re.compile(r".*ka_([0-9a-z]+)_db.csv$")
+ class CSV:
+     def __init__(self, f, fkz=None):
+         self.fkz = fkz
+         line = f.readline()
+         line = line.replace('\r', '').replace('\n', '')
+         line = line.split(SEP)
+         line[0] = line[0].replace('#', '', 1)
+         self.headers = line
+         self.indexed_headers = dict(zip(line, xrange(len(line))))
+         rows = []
+         for line in f:
+             line = line.replace('\r', '').replace('\n', '')
+             if not line: continue
+             line = line.split(SEP)
+             rows.append(line)
+         self.rows = rows
+     def find(self, header):
+         return self.indexed_headers.get(header, -1)
  def main():
      csvs = []
-     max_cols = -1;
+     for arg in sys.argv[1:]:
-     max_idx  = -1;
-     for idx, arg in enumerate(sys.argv[1:]):
          print >> sys.stderr, "file: %s" % arg
+         m = FKZ.match(arg)
+         if m: fkz = m.group(1).upper()
+         else: fkz = None
          f = None
          try:
              f = codecs.open(arg, "r", "latin1")
-             csv = []
+             csvs.append(CSV(f, fkz))
-             for line in f:
-                 line = line.replace('\r', '').replace('\n', '')
-                 if not line: continue
-                 line = line.split(SEP)
-                 l = len(line)
-                 line[0] = line[0].replace('#', '', 1)
-                 if l > max_cols:
-                     max_cols = l
-                     max_idx = idx
-                 csv.append(line)
-             csvs.append(csv)
          finally:
              if f:
                  try: f.close()
                  except: pass
-     print >> sys.stderr, "max_cols: %d" % max_cols
+     csvs.sort(lambda a, b: -cmp(len(a.headers), len(b.headers)))
-     print >> sys.stderr, "max_idx: %d" % max_idx
+     headers = set()
-     maps = []
+     for csv in csvs:
-     for i in csvs:
+         for h in csv.headers:
-         maps.append([-1] * max_cols)
+             headers.add(h)
-     master = csvs[max_idx]
+     order = []
+     for header in headers:
-     for j, m in enumerate(master[0]):
+         order.append((max([csv.find(header) for csv in csvs]), header))
-         for i in range(len(csvs)):
+     order.sort(lambda a, b: cmp(a[0], b[0]))
-             try:
-                 idx = csvs[i][0].index(m)
+     order = [x[1] for x in order]
-                 maps[i][j] = idx
-             except ValueError:
+     print >> sys.stderr, "headers: %d" % len(headers)
-                 pass
+     Writer = codecs.getwriter("latin1")
-     Writer = codecs.getwriter("latin-1")
-     f = Writer(sys.stdout)
+     try:
-     f.write("#%s\r\n" % SEP.join(master[0]))
+         f = Writer(sys.stdout)
+         f.write("#%s\r\n" % SEP.join(order))
-     for row, csv in enumerate(csvs):
+         for csv in csvs:
-         map = maps[row]
+             for row in csv.rows:
-         for c in csv[1:]:
+                 line = []
-             line = []
+                 for header in order:
-             for j in xrange(max_cols):
+                     if header == 'fkz' and csv.fkz:
-                 idx = map[j]
+                         line.append(csv.fkz)
-                 if idx >= 0:
+                     else:
-                     line.append(c[idx])
+                         idx = csv.find(header)
-                 else:
+                         if idx >= 0: line.append(row[idx])
-                     line.append('')
+                         else:        line.append('')
-             all = SEP.join(line)
+                 line = SEP.join(line)
-             f.write("%s\r\n" % all)
+                 f.write("%s\r\n" % line)
+     finally:
+         if f:
+             try: f.close()
+             except: pass
  if __name__ == "__main__":
      main()

 Legend:



Removed from v.238
 


changed lines


 
Added in v.239
 Legend:



Removed from v.238
 


changed lines


 
Added in v.239
-Removed from v.238
+Added in v.239

[email protected]	ViewVC Help
Powered by ViewVC 1.1.26