132 |
# work because a DBF file object buffers some data |
# work because a DBF file object buffers some data |
133 |
|
|
134 |
def __init__(self, filename): |
def __init__(self, filename): |
135 |
self.filename = filename |
self.filename = os.path.abspath(filename) |
136 |
title = os.path.basename(self.filename) |
|
137 |
|
# Omit the extension in the title as it's not really needed and |
138 |
|
# it can be confusing because dbflib removes extensions and |
139 |
|
# appends some variations of '.dbf' before it tries to open the |
140 |
|
# file. So the title could be e.g. myshapefile.shp when the real |
141 |
|
# filename is myshapefile.dbf |
142 |
|
title = os.path.splitext(os.path.basename(self.filename))[0] |
143 |
TitledObject.__init__(self, title) |
TitledObject.__init__(self, title) |
144 |
|
|
145 |
self.dbf = dbflib.DBFFile(filename) |
self.dbf = dbflib.DBFFile(filename) |
146 |
|
|
147 |
# If true, self.dbf is open for writing. |
# If true, self.dbf is open for writing. |
187 |
""" |
""" |
188 |
return self.column_map.has_key(col) |
return self.column_map.has_key(col) |
189 |
|
|
190 |
def ReadRowAsDict(self, row): |
def RowIdToOrdinal(self, gid): |
191 |
"""Return the entire row as a dictionary with column names as keys""" |
"""Return the row ordinal given its id |
192 |
|
|
193 |
|
Since for DBFTables the row id is the row number, return the |
194 |
|
value unchanged. |
195 |
|
""" |
196 |
|
return gid |
197 |
|
|
198 |
|
def RowOrdinalToId(self, num): |
199 |
|
"""Return the rowid for given its ordinal |
200 |
|
|
201 |
|
Since for DBFTables the row id is the row number, return the |
202 |
|
value unchanged. |
203 |
|
""" |
204 |
|
return num |
205 |
|
|
206 |
|
def ReadRowAsDict(self, row, row_is_ordinal = 0): |
207 |
|
"""Return the entire row as a dictionary with column names as keys |
208 |
|
|
209 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
210 |
|
is always the row number. |
211 |
|
""" |
212 |
return self.dbf.read_record(row) |
return self.dbf.read_record(row) |
213 |
|
|
214 |
def ReadValue(self, row, col): |
def ReadValue(self, row, col, row_is_ordinal = 0): |
215 |
"""Return the value of the specified row and column |
"""Return the value of the specified row and column |
216 |
|
|
217 |
The col parameter may be the index of the column or its name. |
The col parameter may be the index of the column or its name. |
218 |
|
|
219 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
220 |
|
is always the row number. |
221 |
""" |
""" |
222 |
return self.dbf.read_record(row)[self.column_map[col].name] |
return self.dbf.read_record(row)[self.column_map[col].name] |
223 |
|
|
354 |
"""Return the number of rows in the table""" |
"""Return the number of rows in the table""" |
355 |
return len(self.data) |
return len(self.data) |
356 |
|
|
357 |
def ReadValue(self, row, col): |
def RowIdToOrdinal(self, gid): |
358 |
|
"""Return the row ordinal given its id |
359 |
|
|
360 |
|
Since for MemoryTables the row id is the row number, return the |
361 |
|
value unchanged. |
362 |
|
""" |
363 |
|
return gid |
364 |
|
|
365 |
|
def RowOrdinalToId(self, num): |
366 |
|
"""Return the rowid for given its ordinal |
367 |
|
|
368 |
|
Since for MemoryTables the row id is the row number, return the |
369 |
|
value unchanged. |
370 |
|
""" |
371 |
|
return num |
372 |
|
|
373 |
|
def ReadValue(self, row, col, row_is_ordinal = 0): |
374 |
"""Return the value of the specified row and column |
"""Return the value of the specified row and column |
375 |
|
|
376 |
The col parameter may be the index of the column or its name. |
The col parameter may be the index of the column or its name. |
377 |
|
|
378 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
379 |
|
is always the row number. |
380 |
""" |
""" |
381 |
return self.data[row][self.column_map[col].index] |
return self.data[row][self.column_map[col].index] |
382 |
|
|
383 |
def ReadRowAsDict(self, index): |
def ReadRowAsDict(self, index, row_is_ordinal = 0): |
384 |
"""Return the entire row as a dictionary with column names as keys""" |
"""Return the entire row as a dictionary with column names as keys |
385 |
|
|
386 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
387 |
|
is always the row number. |
388 |
|
""" |
389 |
return dict([(col.name, self.data[index][col.index]) |
return dict([(col.name, self.data[index][col.index]) |
390 |
for col in self.columns]) |
for col in self.columns]) |
391 |
|
|
459 |
self.data[record] = values |
self.data[record] = values |
460 |
|
|
461 |
|
|
462 |
def table_to_dbf(table, filename): |
|
463 |
"""Create the dbf file filename from the table""" |
def _find_dbf_column_names(names): |
464 |
|
"""Determine the column names to use in a DBF file |
465 |
|
|
466 |
|
DBF files have a length limit of 10 characters on the column names |
467 |
|
so when writing an arbitrary Thuban table to a DBF file we may have |
468 |
|
we may have to rename some of the columns making sure that they're |
469 |
|
unique in the DBF file too. |
470 |
|
|
471 |
|
Names that are already short enough will stay the same. Longer names |
472 |
|
will be truncated to 10 characters or if that isn't unique it will |
473 |
|
be truncated more and filled up with digits. |
474 |
|
|
475 |
|
The parameter names should be a list of the column names. The return |
476 |
|
value will be a dictionary mapping the names in the input list to |
477 |
|
the names to use in the DBF file. |
478 |
|
""" |
479 |
|
# mapping from the original names in table to the names in the DBF |
480 |
|
# file |
481 |
|
name_map = {} |
482 |
|
|
483 |
|
# First, we keep all names that are already short enough |
484 |
|
for i in range(len(names) - 1, -1, -1): |
485 |
|
if len(names[i]) <= 10: |
486 |
|
name_map[names[i]] = names[i] |
487 |
|
del names[i] |
488 |
|
|
489 |
|
# dict used as a set of all names already used as DBF column names |
490 |
|
used = name_map.copy() |
491 |
|
|
492 |
|
# Go through all longer names. If the name truncated to 10 |
493 |
|
# characters is not used already, we use that. Otherwise we truncate |
494 |
|
# it more and append numbers until we get an unused name |
495 |
|
for name in names: |
496 |
|
truncated = name[:10] |
497 |
|
num = 0; numstr = "" |
498 |
|
#print "truncated", truncated, num |
499 |
|
while truncated in used and len(numstr) < 10: |
500 |
|
num += 1 |
501 |
|
numstr = str(num) |
502 |
|
truncated = name[:10 - len(numstr)] + numstr |
503 |
|
#print "truncated", truncated, num |
504 |
|
if len(numstr) >= 10: |
505 |
|
# This case should never happen in practice as tables with |
506 |
|
# 10^10 columns seem very unlikely :) |
507 |
|
raise ValueError("Can't find unique dbf column name") |
508 |
|
|
509 |
|
name_map[name] = truncated |
510 |
|
used[truncated] = 1 |
511 |
|
|
512 |
|
return name_map |
513 |
|
|
514 |
|
def table_to_dbf(table, filename, rows = None): |
515 |
|
"""Create the dbf file filename from the table. |
516 |
|
|
517 |
|
If rows is not None (the default) then it must be a list of row |
518 |
|
indices to be saved to the file, otherwise all rows are saved. |
519 |
|
""" |
520 |
|
|
521 |
dbf = dbflib.create(filename) |
dbf = dbflib.create(filename) |
522 |
|
|
523 |
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString, |
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString, |
524 |
FIELDTYPE_INT: dbflib.FTInteger, |
FIELDTYPE_INT: dbflib.FTInteger, |
525 |
FIELDTYPE_DOUBLE: dbflib.FTDouble} |
FIELDTYPE_DOUBLE: dbflib.FTDouble} |
526 |
|
|
527 |
|
|
528 |
|
name_map = _find_dbf_column_names([col.name for col in table.Columns()]) |
529 |
|
|
530 |
# Initialise the header. Distinguish between DBFTable and others. |
# Initialise the header. Distinguish between DBFTable and others. |
531 |
for col in table.Columns(): |
for col in table.Columns(): |
532 |
width = table.Width(col.name) |
width = table.Width(col.name) |
534 |
prec = getattr(col, "prec", 12) |
prec = getattr(col, "prec", 12) |
535 |
else: |
else: |
536 |
prec = 0 |
prec = 0 |
537 |
dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec) |
dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type], |
538 |
|
width, prec) |
539 |
|
|
540 |
|
if rows is None: |
541 |
|
rows = range(table.NumRows()) |
542 |
|
|
543 |
for i in range(table.NumRows()): |
recNum = 0 |
544 |
record = table.ReadRowAsDict(i) |
for i in rows: |
545 |
dbf.write_record(i, record) |
record = {} |
546 |
|
for key, value in table.ReadRowAsDict(i).items(): |
547 |
|
record[name_map[key]] = value |
548 |
|
dbf.write_record(recNum, record) |
549 |
|
recNum += 1 |
550 |
dbf.close() |
dbf.close() |
551 |
|
|
552 |
def table_to_csv(table, filename): |
def table_to_csv(table, filename, rows = None): |
553 |
"""Export table to csv file.""" |
"""Export table to csv file. |
554 |
|
|
555 |
|
If rows is not None (the default) then it must be a list of row |
556 |
|
indices to be saved to the file, otherwise all rows are saved. |
557 |
|
""" |
558 |
|
|
559 |
file = open(filename,"w") |
file = open(filename,"w") |
560 |
columns = table.Columns() |
columns = table.Columns() |
565 |
header = header + "\n" |
header = header + "\n" |
566 |
file.write(header) |
file.write(header) |
567 |
|
|
568 |
for i in range(table.NumRows()): |
if rows is None: |
569 |
|
rows = range(table.NumRows()) |
570 |
|
|
571 |
|
for i in rows: |
572 |
record = table.ReadRowAsDict(i) |
record = table.ReadRowAsDict(i) |
573 |
if len(record): |
if len(record): |
574 |
line = "%s" % record[columns[0].name] |
line = "%s" % record[columns[0].name] |