2 |
# Authors: |
# Authors: |
3 |
# Bernhard Herzog <[email protected]> |
# Bernhard Herzog <[email protected]> |
4 |
# Jan-Oliver Wagner <[email protected]> |
# Jan-Oliver Wagner <[email protected]> |
5 |
|
# Frank Koormann <[email protected]> |
6 |
# |
# |
7 |
# This program is free software under the GPL (>=v2) |
# This program is free software under the GPL (>=v2) |
8 |
# Read the file COPYING coming with Thuban for details. |
# Read the file COPYING coming with Thuban for details. |
17 |
import inspect |
import inspect |
18 |
import warnings |
import warnings |
19 |
|
|
20 |
|
from base import TitledObject |
21 |
|
|
22 |
import dbflib |
import dbflib |
23 |
|
|
24 |
# the field types supported by a Table instance. |
# the field types supported by a Table instance. |
111 |
self.index = index |
self.index = index |
112 |
|
|
113 |
|
|
114 |
class DBFTable(OldTableInterfaceMixin): |
class DBFTable(TitledObject, OldTableInterfaceMixin): |
115 |
|
|
116 |
""" |
""" |
117 |
Table interface for the data in a DBF file |
Table interface for the data in a DBF file |
133 |
|
|
134 |
def __init__(self, filename): |
def __init__(self, filename): |
135 |
self.filename = filename |
self.filename = filename |
136 |
|
|
137 |
|
# Omit the extension in the title as it's not really needed and |
138 |
|
# it can be confusing because dbflib removes extensions and |
139 |
|
# appends some variations of '.dbf' before it tries to open the |
140 |
|
# file. So the title could be e.g. myshapefile.shp when the real |
141 |
|
# filename is myshapefile.dbf |
142 |
|
title = os.path.splitext(os.path.basename(self.filename))[0] |
143 |
|
TitledObject.__init__(self, title) |
144 |
|
|
145 |
self.dbf = dbflib.DBFFile(filename) |
self.dbf = dbflib.DBFFile(filename) |
146 |
|
|
147 |
# If true, self.dbf is open for writing. |
# If true, self.dbf is open for writing. |
159 |
self.column_map[name] = col |
self.column_map[name] = col |
160 |
self.column_map[index] = col |
self.column_map[index] = col |
161 |
|
|
|
def Title(self): |
|
|
"""Return the title of the table. |
|
|
|
|
|
The title is simply the basename of the filename |
|
|
""" |
|
|
return os.path.basename(self.filename) |
|
|
|
|
162 |
def NumRows(self): |
def NumRows(self): |
163 |
"""Return the number of rows in the table""" |
"""Return the number of rows in the table""" |
164 |
return self.dbf.record_count() |
return self.dbf.record_count() |
237 |
|
|
238 |
# DBF specific interface parts. |
# DBF specific interface parts. |
239 |
|
|
240 |
|
def Width(self, col): |
241 |
|
"""Return column width""" |
242 |
|
return self.column_map[col].width |
243 |
|
|
244 |
def Destroy(self): |
def Destroy(self): |
245 |
self.dbf.close() |
self.dbf.close() |
246 |
self.dbf = None |
self.dbf = None |
278 |
self.type = type |
self.type = type |
279 |
self.index = index |
self.index = index |
280 |
|
|
281 |
class MemoryTable(OldTableInterfaceMixin): |
class MemoryTable(TitledObject, OldTableInterfaceMixin): |
282 |
|
|
283 |
"""Very simple table implementation that operates on a list of tuples""" |
"""Very simple table implementation that operates on a list of tuples""" |
284 |
|
|
290 |
data -- List of tuples, one for each row of data |
data -- List of tuples, one for each row of data |
291 |
""" |
""" |
292 |
self.data = data |
self.data = data |
293 |
|
title = 'MemoryTable' |
294 |
|
TitledObject.__init__(self, title) |
295 |
|
|
296 |
# Create the column information objects |
# Create the column information objects |
297 |
self.columns = [] |
self.columns = [] |
303 |
self.column_map[name] = col |
self.column_map[name] = col |
304 |
self.column_map[index] = col |
self.column_map[index] = col |
305 |
|
|
|
def Title(self): |
|
|
"""Return 'MemoryTable' |
|
|
|
|
|
Override in derived classes to have a more meaningful title. |
|
|
""" |
|
|
return "MemoryTable" |
|
|
|
|
306 |
def NumColumns(self): |
def NumColumns(self): |
307 |
"""Return the number of columns in the table""" |
"""Return the number of columns in the table""" |
308 |
return len(self.columns) |
return len(self.columns) |
358 |
return min(values), max(values) |
return min(values), max(values) |
359 |
|
|
360 |
def UniqueValues(self, col): |
def UniqueValues(self, col): |
361 |
"""Return a sorted list of all unique values in the column col""" |
"""Return a sorted list of all unique values in the column col |
362 |
|
|
363 |
|
col can be either column index or name. |
364 |
|
""" |
365 |
dict = {} |
dict = {} |
366 |
|
|
367 |
for i in range(self.NumRows()): |
for i in range(self.NumRows()): |
372 |
values.sort() |
values.sort() |
373 |
return values |
return values |
374 |
|
|
375 |
|
def Width(self, col): |
376 |
|
"""Return the maximum width of values in the column |
377 |
|
|
378 |
|
The return value is the the maximum length of string |
379 |
|
representation of the values in the column (represented by index |
380 |
|
or name). |
381 |
|
""" |
382 |
|
max = 0 |
383 |
|
|
384 |
|
type = self.column_map[col].type |
385 |
|
index = self.column_map[col].index |
386 |
|
values = [row[index] for row in self.data] |
387 |
|
if not values: |
388 |
|
return None |
389 |
|
|
390 |
|
if type == FIELDTYPE_DOUBLE: |
391 |
|
format = "%.12f" |
392 |
|
elif type == FIELDTYPE_INT: |
393 |
|
format = "%d" |
394 |
|
else: |
395 |
|
format = "%s" |
396 |
|
for value in values: |
397 |
|
l = len(format % value) |
398 |
|
if l > max: |
399 |
|
max = l |
400 |
|
|
401 |
|
return max |
402 |
|
|
403 |
def Dependencies(self): |
def Dependencies(self): |
404 |
"""Return an empty sequence. The MemoryTable doesn't depend on anything |
"""Return an empty sequence. The MemoryTable doesn't depend on anything |
405 |
""" |
""" |
411 |
# TODO: Allow values to be a dictionary and write the single |
# TODO: Allow values to be a dictionary and write the single |
412 |
# fields that are specified. |
# fields that are specified. |
413 |
self.data[record] = values |
self.data[record] = values |
414 |
|
|
415 |
|
|
416 |
|
|
417 |
|
def _find_dbf_column_names(names): |
418 |
|
"""Determine the column names to use in a DBF file |
419 |
|
|
420 |
|
DBF files have a length limit of 10 characters on the column names |
421 |
|
so when writing an arbitrary Thuban table to a DBF file we may have |
422 |
|
we may have to rename some of the columns making sure that they're |
423 |
|
unique in the DBF file too. |
424 |
|
|
425 |
|
Names that are already short enough will stay the same. Longer names |
426 |
|
will be truncated to 10 characters or if that isn't unique it will |
427 |
|
be truncated more and filled up with digits. |
428 |
|
|
429 |
|
The parameter names should be a list of the column names. The return |
430 |
|
value will be a dictionary mapping the names in the input list to |
431 |
|
the names to use in the DBF file. |
432 |
|
""" |
433 |
|
# mapping from the original names in table to the names in the DBF |
434 |
|
# file |
435 |
|
name_map = {} |
436 |
|
|
437 |
|
# First, we keep all names that are already short enough |
438 |
|
for i in range(len(names) - 1, -1, -1): |
439 |
|
if len(names[i]) <= 10: |
440 |
|
name_map[names[i]] = names[i] |
441 |
|
del names[i] |
442 |
|
|
443 |
|
# dict used as a set of all names already used as DBF column names |
444 |
|
used = name_map.copy() |
445 |
|
|
446 |
|
# Go through all longer names. If the name truncated to 10 |
447 |
|
# characters is not used already, we use that. Otherwise we truncate |
448 |
|
# it more and append numbers until we get an unused name |
449 |
|
for name in names: |
450 |
|
truncated = name[:10] |
451 |
|
num = 0; numstr = "" |
452 |
|
#print "truncated", truncated, num |
453 |
|
while truncated in used and len(numstr) < 10: |
454 |
|
num += 1 |
455 |
|
numstr = str(num) |
456 |
|
truncated = name[:10 - len(numstr)] + numstr |
457 |
|
#print "truncated", truncated, num |
458 |
|
if len(numstr) >= 10: |
459 |
|
# This case should never happen in practice as tables with |
460 |
|
# 10^10 columns seem very unlikely :) |
461 |
|
raise ValueError("Can't find unique dbf column name") |
462 |
|
|
463 |
|
name_map[name] = truncated |
464 |
|
used[truncated] = 1 |
465 |
|
|
466 |
|
return name_map |
467 |
|
|
468 |
|
def table_to_dbf(table, filename, records = None): |
469 |
|
"""Create the dbf file filename from the table""" |
470 |
|
dbf = dbflib.create(filename) |
471 |
|
|
472 |
|
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString, |
473 |
|
FIELDTYPE_INT: dbflib.FTInteger, |
474 |
|
FIELDTYPE_DOUBLE: dbflib.FTDouble} |
475 |
|
|
476 |
|
|
477 |
|
name_map = _find_dbf_column_names([col.name for col in table.Columns()]) |
478 |
|
|
479 |
|
# Initialise the header. Distinguish between DBFTable and others. |
480 |
|
for col in table.Columns(): |
481 |
|
width = table.Width(col.name) |
482 |
|
if col.type == FIELDTYPE_DOUBLE: |
483 |
|
prec = getattr(col, "prec", 12) |
484 |
|
else: |
485 |
|
prec = 0 |
486 |
|
dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type], |
487 |
|
width, prec) |
488 |
|
|
489 |
|
if records is None: |
490 |
|
records = range(table.NumRows()) |
491 |
|
|
492 |
|
recNum = 0 |
493 |
|
for i in records: |
494 |
|
record = {} |
495 |
|
for key, value in table.ReadRowAsDict(i).items(): |
496 |
|
record[name_map[key]] = value |
497 |
|
dbf.write_record(recNum, record) |
498 |
|
recNum += 1 |
499 |
|
dbf.close() |
500 |
|
|
501 |
|
def table_to_csv(table, filename, records = None): |
502 |
|
"""Export table to csv file.""" |
503 |
|
|
504 |
|
file = open(filename,"w") |
505 |
|
columns = table.Columns() |
506 |
|
if columns: |
507 |
|
header = "#%s" % columns[0].name |
508 |
|
for col in columns[1:]: |
509 |
|
header = header + ",%s" % col.name |
510 |
|
header = header + "\n" |
511 |
|
file.write(header) |
512 |
|
|
513 |
|
if records is None: |
514 |
|
records = range(table.NumRows()) |
515 |
|
|
516 |
|
for i in records: |
517 |
|
record = table.ReadRowAsDict(i) |
518 |
|
if len(record): |
519 |
|
line = "%s" % record[columns[0].name] |
520 |
|
for col in columns[1:]: |
521 |
|
line = line + ",%s" % record[col.name] |
522 |
|
line = line + "\n" |
523 |
|
file.write(line) |
524 |
|
file.close() |
525 |
|
|