33 |
dbflib.FTDouble: FIELDTYPE_DOUBLE} |
dbflib.FTDouble: FIELDTYPE_DOUBLE} |
34 |
|
|
35 |
|
|
|
class OldTableInterfaceMixin: |
|
|
|
|
|
"""Mixin to implement the old table interface using the new one""" |
|
|
|
|
|
def __deprecation_warning(self): |
|
|
"""Issue a DeprecationWarning for code hat uses the old interface""" |
|
|
callername = inspect.currentframe().f_back.f_code.co_name |
|
|
warnings.warn("The %s method of the old table interface" |
|
|
" is deprecated" % callername, |
|
|
DeprecationWarning, stacklevel = 3) |
|
|
|
|
|
def record_count(self): |
|
|
self.__deprecation_warning() |
|
|
return self.NumRows() |
|
|
|
|
|
def field_count(self): |
|
|
self.__deprecation_warning() |
|
|
return self.NumColumns() |
|
|
|
|
|
def field_info(self, field): |
|
|
"""Return a tuple (type, name, width, prec) for the field no. field |
|
|
|
|
|
type is the data type of the field, name the name, width the |
|
|
field width in characters and prec the decimal precision. width |
|
|
and prec will be zero if the information returned by the Column |
|
|
method doesn't provide values for them. |
|
|
""" |
|
|
self.__deprecation_warning() |
|
|
col = self.Column(field) |
|
|
return (col.type, col.name, |
|
|
getattr(col, "width", 0), getattr(col, "prec", 0)) |
|
|
|
|
|
def field_info_by_name(self, col): |
|
|
self.__deprecation_warning() |
|
|
try: |
|
|
return self.field_info(col) |
|
|
except KeyError: |
|
|
# FIXME: It may be that field_info raises other exceptions |
|
|
# when the name is not a valid column name. |
|
|
return None |
|
|
|
|
|
def field_range(self, fieldName): |
|
|
self.__deprecation_warning() |
|
|
min, max = self.ValueRange(fieldName) |
|
|
return ((min, None), (max, None)) |
|
|
|
|
|
def GetUniqueValues(self, field): |
|
|
self.__deprecation_warning() |
|
|
return self.UniqueValues(field) |
|
|
|
|
|
def read_record(self, r): |
|
|
self.__deprecation_warning() |
|
|
return self.ReadRowAsDict(r) |
|
|
|
|
|
|
|
|
|
|
36 |
class DBFColumn: |
class DBFColumn: |
37 |
|
|
38 |
"""Description of a column in a DBFTable |
"""Description of a column in a DBFTable |
55 |
self.index = index |
self.index = index |
56 |
|
|
57 |
|
|
58 |
class DBFTable(TitledObject, OldTableInterfaceMixin): |
class DBFTable(TitledObject): |
59 |
|
|
60 |
""" |
""" |
61 |
Table interface for the data in a DBF file |
Table interface for the data in a DBF file |
76 |
# work because a DBF file object buffers some data |
# work because a DBF file object buffers some data |
77 |
|
|
78 |
def __init__(self, filename): |
def __init__(self, filename): |
79 |
self.filename = filename |
self.filename = os.path.abspath(filename) |
80 |
|
|
81 |
# Omit the extension in the title as it's not really needed and |
# Omit the extension in the title as it's not really needed and |
82 |
# it can be confusing because dbflib removes extensions and |
# it can be confusing because dbflib removes extensions and |
131 |
""" |
""" |
132 |
return self.column_map.has_key(col) |
return self.column_map.has_key(col) |
133 |
|
|
134 |
def ReadRowAsDict(self, row): |
def RowIdToOrdinal(self, gid): |
135 |
"""Return the entire row as a dictionary with column names as keys""" |
"""Return the row ordinal given its id |
136 |
|
|
137 |
|
Since for DBFTables the row id is the row number, return the |
138 |
|
value unchanged. |
139 |
|
""" |
140 |
|
return gid |
141 |
|
|
142 |
|
def RowOrdinalToId(self, num): |
143 |
|
"""Return the rowid for given its ordinal |
144 |
|
|
145 |
|
Since for DBFTables the row id is the row number, return the |
146 |
|
value unchanged. |
147 |
|
""" |
148 |
|
return num |
149 |
|
|
150 |
|
def ReadRowAsDict(self, row, row_is_ordinal = 0): |
151 |
|
"""Return the entire row as a dictionary with column names as keys |
152 |
|
|
153 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
154 |
|
is always the row number. |
155 |
|
""" |
156 |
return self.dbf.read_record(row) |
return self.dbf.read_record(row) |
157 |
|
|
158 |
def ReadValue(self, row, col): |
def ReadValue(self, row, col, row_is_ordinal = 0): |
159 |
"""Return the value of the specified row and column |
"""Return the value of the specified row and column |
160 |
|
|
161 |
The col parameter may be the index of the column or its name. |
The col parameter may be the index of the column or its name. |
162 |
|
|
163 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
164 |
|
is always the row number. |
165 |
""" |
""" |
166 |
return self.dbf.read_record(row)[self.column_map[col].name] |
return self.dbf.read_attribute(row, self.column_map[col].index) |
167 |
|
|
168 |
def ValueRange(self, col): |
def ValueRange(self, col): |
169 |
"""Return the minimum and maximum values of the values in the column |
"""Return the minimum and maximum values of the values in the column |
245 |
self.type = type |
self.type = type |
246 |
self.index = index |
self.index = index |
247 |
|
|
248 |
class MemoryTable(TitledObject, OldTableInterfaceMixin): |
class MemoryTable(TitledObject): |
249 |
|
|
250 |
"""Very simple table implementation that operates on a list of tuples""" |
"""Very simple table implementation that operates on a list of tuples""" |
251 |
|
|
298 |
"""Return the number of rows in the table""" |
"""Return the number of rows in the table""" |
299 |
return len(self.data) |
return len(self.data) |
300 |
|
|
301 |
def ReadValue(self, row, col): |
def RowIdToOrdinal(self, gid): |
302 |
|
"""Return the row ordinal given its id |
303 |
|
|
304 |
|
Since for MemoryTables the row id is the row number, return the |
305 |
|
value unchanged. |
306 |
|
""" |
307 |
|
return gid |
308 |
|
|
309 |
|
def RowOrdinalToId(self, num): |
310 |
|
"""Return the rowid for given its ordinal |
311 |
|
|
312 |
|
Since for MemoryTables the row id is the row number, return the |
313 |
|
value unchanged. |
314 |
|
""" |
315 |
|
return num |
316 |
|
|
317 |
|
def ReadValue(self, row, col, row_is_ordinal = 0): |
318 |
"""Return the value of the specified row and column |
"""Return the value of the specified row and column |
319 |
|
|
320 |
The col parameter may be the index of the column or its name. |
The col parameter may be the index of the column or its name. |
321 |
|
|
322 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
323 |
|
is always the row number. |
324 |
""" |
""" |
325 |
return self.data[row][self.column_map[col].index] |
return self.data[row][self.column_map[col].index] |
326 |
|
|
327 |
def ReadRowAsDict(self, index): |
def ReadRowAsDict(self, index, row_is_ordinal = 0): |
328 |
"""Return the entire row as a dictionary with column names as keys""" |
"""Return the entire row as a dictionary with column names as keys |
329 |
|
|
330 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
331 |
|
is always the row number. |
332 |
|
""" |
333 |
return dict([(col.name, self.data[index][col.index]) |
return dict([(col.name, self.data[index][col.index]) |
334 |
for col in self.columns]) |
for col in self.columns]) |
335 |
|
|
403 |
self.data[record] = values |
self.data[record] = values |
404 |
|
|
405 |
|
|
406 |
def table_to_dbf(table, filename): |
|
407 |
"""Create the dbf file filename from the table""" |
def _find_dbf_column_names(names): |
408 |
|
"""Determine the column names to use in a DBF file |
409 |
|
|
410 |
|
DBF files have a length limit of 10 characters on the column names |
411 |
|
so when writing an arbitrary Thuban table to a DBF file we may have |
412 |
|
we may have to rename some of the columns making sure that they're |
413 |
|
unique in the DBF file too. |
414 |
|
|
415 |
|
Names that are already short enough will stay the same. Longer names |
416 |
|
will be truncated to 10 characters or if that isn't unique it will |
417 |
|
be truncated more and filled up with digits. |
418 |
|
|
419 |
|
The parameter names should be a list of the column names. The return |
420 |
|
value will be a dictionary mapping the names in the input list to |
421 |
|
the names to use in the DBF file. |
422 |
|
""" |
423 |
|
# mapping from the original names in table to the names in the DBF |
424 |
|
# file |
425 |
|
name_map = {} |
426 |
|
|
427 |
|
# First, we keep all names that are already short enough |
428 |
|
for i in range(len(names) - 1, -1, -1): |
429 |
|
if len(names[i]) <= 10: |
430 |
|
name_map[names[i]] = names[i] |
431 |
|
del names[i] |
432 |
|
|
433 |
|
# dict used as a set of all names already used as DBF column names |
434 |
|
used = name_map.copy() |
435 |
|
|
436 |
|
# Go through all longer names. If the name truncated to 10 |
437 |
|
# characters is not used already, we use that. Otherwise we truncate |
438 |
|
# it more and append numbers until we get an unused name |
439 |
|
for name in names: |
440 |
|
truncated = name[:10] |
441 |
|
num = 0; numstr = "" |
442 |
|
#print "truncated", truncated, num |
443 |
|
while truncated in used and len(numstr) < 10: |
444 |
|
num += 1 |
445 |
|
numstr = str(num) |
446 |
|
truncated = name[:10 - len(numstr)] + numstr |
447 |
|
#print "truncated", truncated, num |
448 |
|
if len(numstr) >= 10: |
449 |
|
# This case should never happen in practice as tables with |
450 |
|
# 10^10 columns seem very unlikely :) |
451 |
|
raise ValueError("Can't find unique dbf column name") |
452 |
|
|
453 |
|
name_map[name] = truncated |
454 |
|
used[truncated] = 1 |
455 |
|
|
456 |
|
return name_map |
457 |
|
|
458 |
|
def table_to_dbf(table, filename, rows = None): |
459 |
|
"""Create the dbf file filename from the table. |
460 |
|
|
461 |
|
If rows is not None (the default) then it must be a list of row |
462 |
|
indices to be saved to the file, otherwise all rows are saved. |
463 |
|
""" |
464 |
|
|
465 |
dbf = dbflib.create(filename) |
dbf = dbflib.create(filename) |
466 |
|
|
467 |
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString, |
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString, |
468 |
FIELDTYPE_INT: dbflib.FTInteger, |
FIELDTYPE_INT: dbflib.FTInteger, |
469 |
FIELDTYPE_DOUBLE: dbflib.FTDouble} |
FIELDTYPE_DOUBLE: dbflib.FTDouble} |
470 |
|
|
471 |
|
|
472 |
|
name_map = _find_dbf_column_names([col.name for col in table.Columns()]) |
473 |
|
|
474 |
# Initialise the header. Distinguish between DBFTable and others. |
# Initialise the header. Distinguish between DBFTable and others. |
475 |
for col in table.Columns(): |
for col in table.Columns(): |
476 |
width = table.Width(col.name) |
width = table.Width(col.name) |
478 |
prec = getattr(col, "prec", 12) |
prec = getattr(col, "prec", 12) |
479 |
else: |
else: |
480 |
prec = 0 |
prec = 0 |
481 |
dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec) |
dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type], |
482 |
|
width, prec) |
483 |
|
|
484 |
|
if rows is None: |
485 |
|
rows = range(table.NumRows()) |
486 |
|
|
487 |
for i in range(table.NumRows()): |
recNum = 0 |
488 |
record = table.ReadRowAsDict(i) |
for i in rows: |
489 |
dbf.write_record(i, record) |
record = {} |
490 |
|
for key, value in table.ReadRowAsDict(i).items(): |
491 |
|
record[name_map[key]] = value |
492 |
|
dbf.write_record(recNum, record) |
493 |
|
recNum += 1 |
494 |
dbf.close() |
dbf.close() |
495 |
|
|
496 |
def table_to_csv(table, filename): |
def table_to_csv(table, filename, rows = None): |
497 |
"""Export table to csv file.""" |
"""Export table to csv file. |
498 |
|
|
499 |
|
If rows is not None (the default) then it must be a list of row |
500 |
|
indices to be saved to the file, otherwise all rows are saved. |
501 |
|
""" |
502 |
|
|
503 |
file = open(filename,"w") |
file = open(filename,"w") |
504 |
columns = table.Columns() |
columns = table.Columns() |
509 |
header = header + "\n" |
header = header + "\n" |
510 |
file.write(header) |
file.write(header) |
511 |
|
|
512 |
for i in range(table.NumRows()): |
if rows is None: |
513 |
|
rows = range(table.NumRows()) |
514 |
|
|
515 |
|
for i in rows: |
516 |
record = table.ReadRowAsDict(i) |
record = table.ReadRowAsDict(i) |
517 |
if len(record): |
if len(record): |
518 |
line = "%s" % record[columns[0].name] |
line = "%s" % record[columns[0].name] |