1 |
# Copyright (c) 2001, 2002, 2003 by Intevation GmbH |
# Copyright (c) 2001, 2002, 2003 by Intevation GmbH |
2 |
# Authors: |
# Authors: |
3 |
# Bernhard Herzog <[email protected]> |
# Bernhard Herzog <[email protected]> |
4 |
|
# Jan-Oliver Wagner <[email protected]> |
5 |
|
# Frank Koormann <[email protected]> |
6 |
# |
# |
7 |
# This program is free software under the GPL (>=v2) |
# This program is free software under the GPL (>=v2) |
8 |
# Read the file COPYING coming with Thuban for details. |
# Read the file COPYING coming with Thuban for details. |
13 |
|
|
14 |
__version__ = "$Revision$" |
__version__ = "$Revision$" |
15 |
|
|
16 |
|
import os |
17 |
|
import inspect |
18 |
|
import warnings |
19 |
|
|
20 |
|
from base import TitledObject |
21 |
|
|
22 |
import dbflib |
import dbflib |
23 |
|
|
24 |
# the field types supported by a Table instance. |
# the field types supported by a Table instance. |
32 |
dbflib.FTInteger: FIELDTYPE_INT, |
dbflib.FTInteger: FIELDTYPE_INT, |
33 |
dbflib.FTDouble: FIELDTYPE_DOUBLE} |
dbflib.FTDouble: FIELDTYPE_DOUBLE} |
34 |
|
|
|
class Table: |
|
35 |
|
|
36 |
|
class DBFColumn: |
37 |
|
|
38 |
|
"""Description of a column in a DBFTable |
39 |
|
|
40 |
|
Instances have the following public attributes: |
41 |
|
|
42 |
|
name -- Name of the column |
43 |
|
type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\ |
44 |
|
FIELDTYPE_DOUBLE) |
45 |
|
index -- The index of the column |
46 |
|
width -- the width of the data in the column |
47 |
|
prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE) |
48 |
""" |
""" |
|
Represent a table of data. |
|
49 |
|
|
50 |
Currently this is basically just a wrapper around dbflib. |
def __init__(self, name, type, width, prec, index): |
51 |
|
self.name = name |
52 |
|
self.type = type |
53 |
|
self.width = width |
54 |
|
self.prec = prec |
55 |
|
self.index = index |
56 |
|
|
57 |
|
|
58 |
|
class DBFTable(TitledObject): |
59 |
|
|
60 |
|
""" |
61 |
|
Table interface for the data in a DBF file |
62 |
""" |
""" |
63 |
|
|
64 |
# Implementation strategy regarding writing to a DBF file: |
# Implementation strategy regarding writing to a DBF file: |
76 |
# work because a DBF file object buffers some data |
# work because a DBF file object buffers some data |
77 |
|
|
78 |
def __init__(self, filename): |
def __init__(self, filename): |
79 |
self.filename = filename |
self.filename = os.path.abspath(filename) |
80 |
|
|
81 |
|
# Omit the extension in the title as it's not really needed and |
82 |
|
# it can be confusing because dbflib removes extensions and |
83 |
|
# appends some variations of '.dbf' before it tries to open the |
84 |
|
# file. So the title could be e.g. myshapefile.shp when the real |
85 |
|
# filename is myshapefile.dbf |
86 |
|
title = os.path.splitext(os.path.basename(self.filename))[0] |
87 |
|
TitledObject.__init__(self, title) |
88 |
|
|
89 |
self.dbf = dbflib.DBFFile(filename) |
self.dbf = dbflib.DBFFile(filename) |
90 |
|
|
91 |
# If true, self.dbf is open for writing. |
# If true, self.dbf is open for writing. |
92 |
self._writable = 0 |
self._writable = 0 |
93 |
|
|
94 |
def Destroy(self): |
# Create the column information objects |
95 |
self.dbf.close() |
self.columns = [] |
96 |
self.dbf = None |
self.column_map = {} |
97 |
|
for i in range(self.NumColumns()): |
98 |
|
ftype, name, width, prec = self.dbf.field_info(i) |
99 |
|
ftype = dbflib_fieldtypes[ftype] |
100 |
|
index = len(self.columns) |
101 |
|
col = DBFColumn(name, ftype, width, prec, index) |
102 |
|
self.columns.append(col) |
103 |
|
self.column_map[name] = col |
104 |
|
self.column_map[index] = col |
105 |
|
|
106 |
def record_count(self): |
def NumRows(self): |
107 |
"""Return the number of records""" |
"""Return the number of rows in the table""" |
108 |
return self.dbf.record_count() |
return self.dbf.record_count() |
109 |
|
|
110 |
def field_count(self): |
def NumColumns(self): |
111 |
"""Return the number of fields in a record""" |
"""Return the number of columns in the table""" |
112 |
return self.dbf.field_count() |
return self.dbf.field_count() |
113 |
|
|
114 |
def field_info(self, field): |
def Columns(self): |
115 |
"""Return a tuple (type, name, width, prec) for the field no. field |
"""Return the table's colum definitions |
116 |
|
|
117 |
type is the data type of the field, name the name, width the |
The return value is a sequence of DBFColumn instances, one for |
118 |
field width in characters and prec the decimal precision. |
each column. |
119 |
""" |
""" |
120 |
type, name, width, prec = self.dbf.field_info(field) |
return self.columns |
121 |
type = dbflib_fieldtypes[type] |
|
122 |
return type, name, width, prec |
def Column(self, col): |
123 |
|
"""Return information about the column given by its name or index |
124 |
|
|
125 |
def field_info_by_name(self, fieldName): |
The returned object is an instance of DBFColumn |
126 |
count = self.field_count() |
""" |
127 |
|
return self.column_map[col] |
128 |
|
|
129 |
for i in range(count): |
def HasColumn(self, col): |
130 |
info = self.field_info(i) |
"""Return whether the table has a column with the given name or index |
131 |
if info[1] == fieldName: |
""" |
132 |
return info |
return self.column_map.has_key(col) |
133 |
|
|
134 |
return None |
def RowIdToOrdinal(self, gid): |
135 |
|
"""Return the row ordinal given its id |
136 |
|
|
137 |
def field_range(self, fieldName): |
Since for DBFTables the row id is the row number, return the |
138 |
"""Finds the first occurences of the minimum and maximum values |
value unchanged. |
139 |
in the table for the given field. |
""" |
140 |
|
return gid |
141 |
|
|
142 |
This assumes that the standard comparison operators (<, >, etc.) |
def RowOrdinalToId(self, num): |
143 |
will work for the given data. |
"""Return the rowid for given its ordinal |
144 |
|
|
145 |
Returns a tuple ((min, rec), (max, rec)) where: |
Since for DBFTables the row id is the row number, return the |
146 |
min is the minimum value |
value unchanged. |
147 |
max is the maximum value |
""" |
148 |
rec is the record number where the value was found. One |
return num |
|
should check that the record number of min is not |
|
|
the same as the record number of max. |
|
149 |
|
|
150 |
Returns None if there are no records |
def ReadRowAsDict(self, row, row_is_ordinal = 0): |
151 |
|
"""Return the entire row as a dictionary with column names as keys |
152 |
|
|
153 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
154 |
|
is always the row number. |
155 |
""" |
""" |
156 |
|
return self.dbf.read_record(row) |
157 |
|
|
158 |
|
def ReadValue(self, row, col, row_is_ordinal = 0): |
159 |
|
"""Return the value of the specified row and column |
160 |
|
|
161 |
count = self.record_count() |
The col parameter may be the index of the column or its name. |
162 |
|
|
163 |
if count == 0: |
The row_is_ordinal is ignored for DBF tables because the row id |
164 |
return None |
is always the row number. |
165 |
|
""" |
166 |
|
return self.dbf.read_attribute(row, self.column_map[col].index) |
167 |
|
|
168 |
rec = self.read_record(0) |
def ValueRange(self, col): |
169 |
|
"""Return the minimum and maximum values of the values in the column |
170 |
|
|
171 |
min = rec[fieldName] |
The return value is a tuple (min, max) unless the table is empty |
172 |
min_rec = 0 |
in which case the return value is None. |
173 |
|
""" |
174 |
|
count = self.NumRows() |
175 |
|
|
176 |
max = rec[fieldName] |
if count == 0: |
177 |
max_rec = 0 |
return None |
178 |
|
|
179 |
|
min = max = self.ReadValue(0, col) |
180 |
for i in range(1, count): |
for i in range(1, count): |
181 |
rec = self.read_record(i) |
value = self.ReadValue(i, col) |
182 |
data = rec[fieldName] |
if value < min: |
183 |
|
min = value |
184 |
|
elif value > max: |
185 |
|
max = value |
186 |
|
|
187 |
if data < min: |
return (min, max) |
|
min = data |
|
|
min_rec = rec |
|
|
elif data > max: |
|
|
max = data |
|
|
max_rec = rec |
|
|
|
|
|
return ((min, min_rec), (max, max_rec)) |
|
|
|
|
|
def GetUniqueValues(self, fieldName): |
|
|
"""Return a list of all unique entries in the table for the given |
|
|
field name. |
|
|
""" |
|
188 |
|
|
189 |
|
def UniqueValues(self, col): |
190 |
|
"""Return a sorted list of all unique values in the column col""" |
191 |
dict = {} |
dict = {} |
192 |
|
|
193 |
for i in range(0, self.record_count()): |
for i in range(self.NumRows()): |
194 |
rec = self.read_record(i) |
value = self.ReadValue(i, col) |
195 |
data = rec[fieldName] |
dict[value] = 0 |
196 |
|
|
197 |
if not dict.has_key(data): |
values = dict.keys() |
198 |
dict[data] = 0 |
values.sort() |
199 |
|
return values |
200 |
|
|
201 |
|
def Dependencies(self): |
202 |
|
"""Return an empty sequence. The DBFTable doesn't depend on anything""" |
203 |
|
return () |
204 |
|
|
205 |
|
# DBF specific interface parts. |
206 |
|
|
207 |
|
def Width(self, col): |
208 |
|
"""Return column width""" |
209 |
|
return self.column_map[col].width |
210 |
|
|
211 |
return dict.keys() |
def Destroy(self): |
212 |
|
self.dbf.close() |
213 |
def read_record(self, record): |
self.dbf = None |
|
"""Return the record no. record as a dict mapping field names to values |
|
|
""" |
|
|
return self.dbf.read_record(record) |
|
214 |
|
|
215 |
def write_record(self, record, values): |
def write_record(self, record, values): |
216 |
"""Write the values into the record |
"""Write the values into the record |
233 |
self.dbf.write_record(record, values) |
self.dbf.write_record(record, values) |
234 |
self.dbf.commit() |
self.dbf.commit() |
235 |
|
|
236 |
|
def FileName(self): |
237 |
|
"""Return the filename the DBFTable was instantiated with""" |
238 |
|
return self.filename |
239 |
|
|
240 |
|
|
241 |
|
class MemoryColumn: |
242 |
|
|
243 |
|
def __init__(self, name, type, index): |
244 |
|
self.name = name |
245 |
|
self.type = type |
246 |
|
self.index = index |
247 |
|
|
248 |
|
class MemoryTable(TitledObject): |
249 |
|
|
250 |
|
"""Very simple table implementation that operates on a list of tuples""" |
251 |
|
|
252 |
|
def __init__(self, fields, data): |
253 |
|
"""Initialize the MemoryTable |
254 |
|
|
255 |
|
Parameters: |
256 |
|
fields -- List of (name, field_type) pairs |
257 |
|
data -- List of tuples, one for each row of data |
258 |
|
""" |
259 |
|
self.data = data |
260 |
|
title = 'MemoryTable' |
261 |
|
TitledObject.__init__(self, title) |
262 |
|
|
263 |
|
# Create the column information objects |
264 |
|
self.columns = [] |
265 |
|
self.column_map = {} |
266 |
|
for name, ftype in fields: |
267 |
|
index = len(self.columns) |
268 |
|
col = MemoryColumn(name, ftype, index) |
269 |
|
self.columns.append(col) |
270 |
|
self.column_map[name] = col |
271 |
|
self.column_map[index] = col |
272 |
|
|
273 |
|
def NumColumns(self): |
274 |
|
"""Return the number of columns in the table""" |
275 |
|
return len(self.columns) |
276 |
|
|
277 |
|
def Column(self, col): |
278 |
|
"""Return information about the column given by its name or index |
279 |
|
|
280 |
|
The returned object is an instance of MemoryColumn. |
281 |
|
""" |
282 |
|
return self.column_map[col] |
283 |
|
|
284 |
|
def Columns(self): |
285 |
|
"""Return the table's colum definitions |
286 |
|
|
287 |
|
The return value is a sequence of MemoryColumn instances, one |
288 |
|
for each column. |
289 |
|
""" |
290 |
|
return self.columns |
291 |
|
|
292 |
|
def HasColumn(self, col): |
293 |
|
"""Return whether the table has a column with the given name or index |
294 |
|
""" |
295 |
|
return self.column_map.has_key(col) |
296 |
|
|
297 |
|
def NumRows(self): |
298 |
|
"""Return the number of rows in the table""" |
299 |
|
return len(self.data) |
300 |
|
|
301 |
|
def RowIdToOrdinal(self, gid): |
302 |
|
"""Return the row ordinal given its id |
303 |
|
|
304 |
|
Since for MemoryTables the row id is the row number, return the |
305 |
|
value unchanged. |
306 |
|
""" |
307 |
|
return gid |
308 |
|
|
309 |
|
def RowOrdinalToId(self, num): |
310 |
|
"""Return the rowid for given its ordinal |
311 |
|
|
312 |
|
Since for MemoryTables the row id is the row number, return the |
313 |
|
value unchanged. |
314 |
|
""" |
315 |
|
return num |
316 |
|
|
317 |
|
def ReadValue(self, row, col, row_is_ordinal = 0): |
318 |
|
"""Return the value of the specified row and column |
319 |
|
|
320 |
|
The col parameter may be the index of the column or its name. |
321 |
|
|
322 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
323 |
|
is always the row number. |
324 |
|
""" |
325 |
|
return self.data[row][self.column_map[col].index] |
326 |
|
|
327 |
|
def ReadRowAsDict(self, index, row_is_ordinal = 0): |
328 |
|
"""Return the entire row as a dictionary with column names as keys |
329 |
|
|
330 |
|
The row_is_ordinal is ignored for DBF tables because the row id |
331 |
|
is always the row number. |
332 |
|
""" |
333 |
|
return dict([(col.name, self.data[index][col.index]) |
334 |
|
for col in self.columns]) |
335 |
|
|
336 |
|
def ValueRange(self, col): |
337 |
|
"""Return the minimum and maximum values of the values in the column |
338 |
|
|
339 |
|
The return value is a tuple (min, max) unless the table is empty |
340 |
|
in which case the return value is None. |
341 |
|
""" |
342 |
|
|
343 |
|
index = self.column_map[col].index |
344 |
|
values = [row[index] for row in self.data] |
345 |
|
if not values: |
346 |
|
return None |
347 |
|
|
348 |
|
return min(values), max(values) |
349 |
|
|
350 |
|
def UniqueValues(self, col): |
351 |
|
"""Return a sorted list of all unique values in the column col |
352 |
|
|
353 |
|
col can be either column index or name. |
354 |
|
""" |
355 |
|
dict = {} |
356 |
|
|
357 |
|
for i in range(self.NumRows()): |
358 |
|
value = self.ReadValue(i, col) |
359 |
|
dict[value] = 0 |
360 |
|
|
361 |
|
values = dict.keys() |
362 |
|
values.sort() |
363 |
|
return values |
364 |
|
|
365 |
|
def Width(self, col): |
366 |
|
"""Return the maximum width of values in the column |
367 |
|
|
368 |
|
The return value is the the maximum length of string |
369 |
|
representation of the values in the column (represented by index |
370 |
|
or name). |
371 |
|
""" |
372 |
|
max = 0 |
373 |
|
|
374 |
|
type = self.column_map[col].type |
375 |
|
index = self.column_map[col].index |
376 |
|
values = [row[index] for row in self.data] |
377 |
|
if not values: |
378 |
|
return None |
379 |
|
|
380 |
|
if type == FIELDTYPE_DOUBLE: |
381 |
|
format = "%.12f" |
382 |
|
elif type == FIELDTYPE_INT: |
383 |
|
format = "%d" |
384 |
|
else: |
385 |
|
format = "%s" |
386 |
|
for value in values: |
387 |
|
l = len(format % value) |
388 |
|
if l > max: |
389 |
|
max = l |
390 |
|
|
391 |
|
return max |
392 |
|
|
393 |
|
def Dependencies(self): |
394 |
|
"""Return an empty sequence. The MemoryTable doesn't depend on anything |
395 |
|
""" |
396 |
|
return () |
397 |
|
|
398 |
|
def write_record(self, record, values): |
399 |
|
# TODO: Check for correct lenght and perhaps also |
400 |
|
# for correct types in case values is a tuple. How to report problems? |
401 |
|
# TODO: Allow values to be a dictionary and write the single |
402 |
|
# fields that are specified. |
403 |
|
self.data[record] = values |
404 |
|
|
405 |
|
|
406 |
|
|
407 |
|
def _find_dbf_column_names(names): |
408 |
|
"""Determine the column names to use in a DBF file |
409 |
|
|
410 |
|
DBF files have a length limit of 10 characters on the column names |
411 |
|
so when writing an arbitrary Thuban table to a DBF file we may have |
412 |
|
we may have to rename some of the columns making sure that they're |
413 |
|
unique in the DBF file too. |
414 |
|
|
415 |
|
Names that are already short enough will stay the same. Longer names |
416 |
|
will be truncated to 10 characters or if that isn't unique it will |
417 |
|
be truncated more and filled up with digits. |
418 |
|
|
419 |
|
The parameter names should be a list of the column names. The return |
420 |
|
value will be a dictionary mapping the names in the input list to |
421 |
|
the names to use in the DBF file. |
422 |
|
""" |
423 |
|
# mapping from the original names in table to the names in the DBF |
424 |
|
# file |
425 |
|
name_map = {} |
426 |
|
|
427 |
|
# First, we keep all names that are already short enough |
428 |
|
for i in range(len(names) - 1, -1, -1): |
429 |
|
if len(names[i]) <= 10: |
430 |
|
name_map[names[i]] = names[i] |
431 |
|
del names[i] |
432 |
|
|
433 |
|
# dict used as a set of all names already used as DBF column names |
434 |
|
used = name_map.copy() |
435 |
|
|
436 |
|
# Go through all longer names. If the name truncated to 10 |
437 |
|
# characters is not used already, we use that. Otherwise we truncate |
438 |
|
# it more and append numbers until we get an unused name |
439 |
|
for name in names: |
440 |
|
truncated = name[:10] |
441 |
|
num = 0; numstr = "" |
442 |
|
#print "truncated", truncated, num |
443 |
|
while truncated in used and len(numstr) < 10: |
444 |
|
num += 1 |
445 |
|
numstr = str(num) |
446 |
|
truncated = name[:10 - len(numstr)] + numstr |
447 |
|
#print "truncated", truncated, num |
448 |
|
if len(numstr) >= 10: |
449 |
|
# This case should never happen in practice as tables with |
450 |
|
# 10^10 columns seem very unlikely :) |
451 |
|
raise ValueError("Can't find unique dbf column name") |
452 |
|
|
453 |
|
name_map[name] = truncated |
454 |
|
used[truncated] = 1 |
455 |
|
|
456 |
|
return name_map |
457 |
|
|
458 |
|
def table_to_dbf(table, filename, rows = None): |
459 |
|
"""Create the dbf file filename from the table. |
460 |
|
|
461 |
|
If rows is not None (the default) then it must be a list of row |
462 |
|
indices to be saved to the file, otherwise all rows are saved. |
463 |
|
""" |
464 |
|
|
465 |
|
dbf = dbflib.create(filename) |
466 |
|
|
467 |
|
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString, |
468 |
|
FIELDTYPE_INT: dbflib.FTInteger, |
469 |
|
FIELDTYPE_DOUBLE: dbflib.FTDouble} |
470 |
|
|
471 |
|
|
472 |
|
name_map = _find_dbf_column_names([col.name for col in table.Columns()]) |
473 |
|
|
474 |
|
# Initialise the header. Distinguish between DBFTable and others. |
475 |
|
for col in table.Columns(): |
476 |
|
width = table.Width(col.name) |
477 |
|
if col.type == FIELDTYPE_DOUBLE: |
478 |
|
prec = getattr(col, "prec", 12) |
479 |
|
else: |
480 |
|
prec = 0 |
481 |
|
dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type], |
482 |
|
width, prec) |
483 |
|
|
484 |
|
if rows is None: |
485 |
|
rows = range(table.NumRows()) |
486 |
|
|
487 |
|
recNum = 0 |
488 |
|
for i in rows: |
489 |
|
record = {} |
490 |
|
for key, value in table.ReadRowAsDict(i).items(): |
491 |
|
record[name_map[key]] = value |
492 |
|
dbf.write_record(recNum, record) |
493 |
|
recNum += 1 |
494 |
|
dbf.close() |
495 |
|
|
496 |
|
def table_to_csv(table, filename, rows = None): |
497 |
|
"""Export table to csv file. |
498 |
|
|
499 |
|
If rows is not None (the default) then it must be a list of row |
500 |
|
indices to be saved to the file, otherwise all rows are saved. |
501 |
|
""" |
502 |
|
|
503 |
|
file = open(filename,"w") |
504 |
|
columns = table.Columns() |
505 |
|
if columns: |
506 |
|
header = "#%s" % columns[0].name |
507 |
|
for col in columns[1:]: |
508 |
|
header = header + ",%s" % col.name |
509 |
|
header = header + "\n" |
510 |
|
file.write(header) |
511 |
|
|
512 |
|
if rows is None: |
513 |
|
rows = range(table.NumRows()) |
514 |
|
|
515 |
|
for i in rows: |
516 |
|
record = table.ReadRowAsDict(i) |
517 |
|
if len(record): |
518 |
|
line = "%s" % record[columns[0].name] |
519 |
|
for col in columns[1:]: |
520 |
|
line = line + ",%s" % record[col.name] |
521 |
|
line = line + "\n" |
522 |
|
file.write(line) |
523 |
|
file.close() |
524 |
|
|