1 |
bh |
590 |
# Copyright (c) 2001, 2002, 2003 by Intevation GmbH |
2 |
bh |
6 |
# Authors: |
3 |
|
|
# Bernhard Herzog <[email protected]> |
4 |
jan |
806 |
# Jan-Oliver Wagner <[email protected]> |
5 |
frank |
1025 |
# Frank Koormann <[email protected]> |
6 |
bh |
6 |
# |
7 |
|
|
# This program is free software under the GPL (>=v2) |
8 |
|
|
# Read the file COPYING coming with Thuban for details. |
9 |
|
|
|
10 |
|
|
""" |
11 |
|
|
Classes for handling tables of data. |
12 |
|
|
""" |
13 |
|
|
|
14 |
|
|
__version__ = "$Revision$" |
15 |
|
|
|
16 |
bh |
998 |
import os |
17 |
bh |
839 |
import inspect |
18 |
|
|
import warnings |
19 |
|
|
|
20 |
jan |
1019 |
from base import TitledObject |
21 |
|
|
|
22 |
bh |
6 |
import dbflib |
23 |
|
|
|
24 |
|
|
# the field types supported by a Table instance. |
25 |
jonathan |
474 |
FIELDTYPE_INT = "int" |
26 |
|
|
FIELDTYPE_STRING = "string" |
27 |
|
|
FIELDTYPE_DOUBLE = "double" |
28 |
bh |
6 |
|
29 |
|
|
|
30 |
|
|
# map the dbflib constants for the field types to our constants |
31 |
|
|
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING, |
32 |
|
|
dbflib.FTInteger: FIELDTYPE_INT, |
33 |
|
|
dbflib.FTDouble: FIELDTYPE_DOUBLE} |
34 |
|
|
|
35 |
jan |
806 |
|
36 |
bh |
818 |
class OldTableInterfaceMixin: |
37 |
jan |
806 |
|
38 |
bh |
818 |
"""Mixin to implement the old table interface using the new one""" |
39 |
jan |
806 |
|
40 |
bh |
839 |
def __deprecation_warning(self): |
41 |
|
|
"""Issue a DeprecationWarning for code hat uses the old interface""" |
42 |
|
|
callername = inspect.currentframe().f_back.f_code.co_name |
43 |
|
|
warnings.warn("The %s method of the old table interface" |
44 |
|
|
" is deprecated" % callername, |
45 |
|
|
DeprecationWarning, stacklevel = 3) |
46 |
|
|
|
47 |
bh |
818 |
def record_count(self): |
48 |
bh |
839 |
self.__deprecation_warning() |
49 |
bh |
818 |
return self.NumRows() |
50 |
jan |
806 |
|
51 |
|
|
def field_count(self): |
52 |
bh |
839 |
self.__deprecation_warning() |
53 |
bh |
818 |
return self.NumColumns() |
54 |
jan |
806 |
|
55 |
bh |
818 |
def field_info(self, field): |
56 |
|
|
"""Return a tuple (type, name, width, prec) for the field no. field |
57 |
jan |
806 |
|
58 |
bh |
818 |
type is the data type of the field, name the name, width the |
59 |
|
|
field width in characters and prec the decimal precision. width |
60 |
|
|
and prec will be zero if the information returned by the Column |
61 |
|
|
method doesn't provide values for them. |
62 |
|
|
""" |
63 |
bh |
839 |
self.__deprecation_warning() |
64 |
bh |
818 |
col = self.Column(field) |
65 |
|
|
return (col.type, col.name, |
66 |
|
|
getattr(col, "width", 0), getattr(col, "prec", 0)) |
67 |
jan |
806 |
|
68 |
bh |
818 |
def field_info_by_name(self, col): |
69 |
bh |
839 |
self.__deprecation_warning() |
70 |
bh |
818 |
try: |
71 |
|
|
return self.field_info(col) |
72 |
|
|
except KeyError: |
73 |
|
|
# FIXME: It may be that field_info raises other exceptions |
74 |
|
|
# when the name is not a valid column name. |
75 |
|
|
return None |
76 |
jan |
806 |
|
77 |
bh |
818 |
def field_range(self, fieldName): |
78 |
bh |
839 |
self.__deprecation_warning() |
79 |
bh |
818 |
min, max = self.ValueRange(fieldName) |
80 |
|
|
return ((min, None), (max, None)) |
81 |
jan |
806 |
|
82 |
bh |
818 |
def GetUniqueValues(self, field): |
83 |
bh |
839 |
self.__deprecation_warning() |
84 |
bh |
818 |
return self.UniqueValues(field) |
85 |
jan |
806 |
|
86 |
bh |
818 |
def read_record(self, r): |
87 |
bh |
839 |
self.__deprecation_warning() |
88 |
bh |
818 |
return self.ReadRowAsDict(r) |
89 |
bh |
6 |
|
90 |
bh |
818 |
|
91 |
|
|
|
92 |
|
|
class DBFColumn: |
93 |
|
|
|
94 |
|
|
"""Description of a column in a DBFTable |
95 |
|
|
|
96 |
|
|
Instances have the following public attributes: |
97 |
|
|
|
98 |
|
|
name -- Name of the column |
99 |
|
|
type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\ |
100 |
|
|
FIELDTYPE_DOUBLE) |
101 |
|
|
index -- The index of the column |
102 |
|
|
width -- the width of the data in the column |
103 |
|
|
prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE) |
104 |
bh |
6 |
""" |
105 |
bh |
818 |
|
106 |
|
|
def __init__(self, name, type, width, prec, index): |
107 |
|
|
self.name = name |
108 |
|
|
self.type = type |
109 |
|
|
self.width = width |
110 |
|
|
self.prec = prec |
111 |
|
|
self.index = index |
112 |
|
|
|
113 |
|
|
|
114 |
jan |
1019 |
class DBFTable(TitledObject, OldTableInterfaceMixin): |
115 |
bh |
818 |
|
116 |
|
|
""" |
117 |
bh |
765 |
Table interface for the data in a DBF file |
118 |
bh |
6 |
""" |
119 |
|
|
|
120 |
bh |
286 |
# Implementation strategy regarding writing to a DBF file: |
121 |
|
|
# |
122 |
|
|
# Most of the time Thuban only needs to read from a table and it is |
123 |
|
|
# important that Thuban can work with read-only files. Therefore the |
124 |
|
|
# DBF file is opened only for reading initially. Only when |
125 |
|
|
# write_record is called we try to open the DBF file for writing as |
126 |
bh |
590 |
# well. If that succeeds the read/write DBF file will be used for |
127 |
|
|
# all IO afterwards. |
128 |
bh |
286 |
# |
129 |
|
|
# It's important to use the same DBF file object for both reading |
130 |
|
|
# and writing to make sure that reading a records after writing |
131 |
|
|
# returns the new values. With two separate objects this wouldn't |
132 |
|
|
# work because a DBF file object buffers some data |
133 |
|
|
|
134 |
bh |
6 |
def __init__(self, filename): |
135 |
|
|
self.filename = filename |
136 |
jan |
1019 |
title = os.path.basename(self.filename) |
137 |
|
|
TitledObject.__init__(self, title) |
138 |
bh |
284 |
self.dbf = dbflib.DBFFile(filename) |
139 |
bh |
6 |
|
140 |
bh |
286 |
# If true, self.dbf is open for writing. |
141 |
|
|
self._writable = 0 |
142 |
|
|
|
143 |
bh |
818 |
# Create the column information objects |
144 |
|
|
self.columns = [] |
145 |
|
|
self.column_map = {} |
146 |
|
|
for i in range(self.NumColumns()): |
147 |
|
|
ftype, name, width, prec = self.dbf.field_info(i) |
148 |
|
|
ftype = dbflib_fieldtypes[ftype] |
149 |
|
|
index = len(self.columns) |
150 |
|
|
col = DBFColumn(name, ftype, width, prec, index) |
151 |
|
|
self.columns.append(col) |
152 |
|
|
self.column_map[name] = col |
153 |
|
|
self.column_map[index] = col |
154 |
bh |
257 |
|
155 |
bh |
818 |
def NumRows(self): |
156 |
|
|
"""Return the number of rows in the table""" |
157 |
bh |
6 |
return self.dbf.record_count() |
158 |
|
|
|
159 |
bh |
818 |
def NumColumns(self): |
160 |
|
|
"""Return the number of columns in the table""" |
161 |
bh |
6 |
return self.dbf.field_count() |
162 |
|
|
|
163 |
bh |
818 |
def Columns(self): |
164 |
|
|
"""Return the table's colum definitions |
165 |
bh |
6 |
|
166 |
bh |
818 |
The return value is a sequence of DBFColumn instances, one for |
167 |
|
|
each column. |
168 |
bh |
6 |
""" |
169 |
bh |
818 |
return self.columns |
170 |
bh |
6 |
|
171 |
bh |
818 |
def Column(self, col): |
172 |
|
|
"""Return information about the column given by its name or index |
173 |
jonathan |
467 |
|
174 |
bh |
818 |
The returned object is an instance of DBFColumn |
175 |
|
|
""" |
176 |
|
|
return self.column_map[col] |
177 |
jonathan |
467 |
|
178 |
bh |
839 |
def HasColumn(self, col): |
179 |
|
|
"""Return whether the table has a column with the given name or index |
180 |
|
|
""" |
181 |
|
|
return self.column_map.has_key(col) |
182 |
|
|
|
183 |
bh |
818 |
def ReadRowAsDict(self, row): |
184 |
|
|
"""Return the entire row as a dictionary with column names as keys""" |
185 |
|
|
return self.dbf.read_record(row) |
186 |
jonathan |
467 |
|
187 |
bh |
818 |
def ReadValue(self, row, col): |
188 |
|
|
"""Return the value of the specified row and column |
189 |
jonathan |
628 |
|
190 |
bh |
818 |
The col parameter may be the index of the column or its name. |
191 |
|
|
""" |
192 |
|
|
return self.dbf.read_record(row)[self.column_map[col].name] |
193 |
jonathan |
628 |
|
194 |
bh |
818 |
def ValueRange(self, col): |
195 |
|
|
"""Return the minimum and maximum values of the values in the column |
196 |
jonathan |
628 |
|
197 |
bh |
818 |
The return value is a tuple (min, max) unless the table is empty |
198 |
|
|
in which case the return value is None. |
199 |
jonathan |
628 |
""" |
200 |
bh |
818 |
count = self.NumRows() |
201 |
jonathan |
628 |
|
202 |
|
|
if count == 0: |
203 |
|
|
return None |
204 |
|
|
|
205 |
bh |
818 |
min = max = self.ReadValue(0, col) |
206 |
jonathan |
628 |
for i in range(1, count): |
207 |
bh |
818 |
value = self.ReadValue(i, col) |
208 |
|
|
if value < min: |
209 |
|
|
min = value |
210 |
|
|
elif value > max: |
211 |
|
|
max = value |
212 |
jonathan |
628 |
|
213 |
bh |
818 |
return (min, max) |
214 |
jonathan |
628 |
|
215 |
bh |
818 |
def UniqueValues(self, col): |
216 |
|
|
"""Return a sorted list of all unique values in the column col""" |
217 |
|
|
dict = {} |
218 |
jonathan |
628 |
|
219 |
bh |
818 |
for i in range(self.NumRows()): |
220 |
|
|
value = self.ReadValue(i, col) |
221 |
|
|
dict[value] = 0 |
222 |
jonathan |
628 |
|
223 |
bh |
818 |
values = dict.keys() |
224 |
|
|
values.sort() |
225 |
|
|
return values |
226 |
jonathan |
628 |
|
227 |
bh |
984 |
def Dependencies(self): |
228 |
|
|
"""Return an empty sequence. The DBFTable doesn't depend on anything""" |
229 |
|
|
return () |
230 |
jonathan |
628 |
|
231 |
bh |
818 |
# DBF specific interface parts. |
232 |
jonathan |
628 |
|
233 |
frank |
1025 |
def Precision(self, col): |
234 |
|
|
"""Return column precision""" |
235 |
|
|
return self.column_map[col].prec |
236 |
|
|
|
237 |
|
|
def Width(self, col): |
238 |
|
|
"""Return column width""" |
239 |
|
|
return self.column_map[col].width |
240 |
|
|
|
241 |
bh |
818 |
def Destroy(self): |
242 |
|
|
self.dbf.close() |
243 |
|
|
self.dbf = None |
244 |
jonathan |
628 |
|
245 |
bh |
274 |
def write_record(self, record, values): |
246 |
|
|
"""Write the values into the record |
247 |
|
|
|
248 |
|
|
The values parameter may either be a dictionary or a sequence. |
249 |
|
|
|
250 |
|
|
If it's a dictionary the keys must be the names of the fields |
251 |
|
|
and their value must have a suitable type. Only the fields |
252 |
|
|
actually contained in the dictionary are written. Fields for |
253 |
|
|
which there's no item in the dict are not modified. |
254 |
|
|
|
255 |
|
|
If it's a sequence, all fields must be present in the right |
256 |
|
|
order. |
257 |
|
|
""" |
258 |
bh |
286 |
if not self._writable: |
259 |
|
|
new_dbf = dbflib.DBFFile(self.filename, "r+b") |
260 |
|
|
self.dbf.close() |
261 |
|
|
self.dbf = new_dbf |
262 |
|
|
self._writable = 1 |
263 |
|
|
self.dbf.write_record(record, values) |
264 |
|
|
self.dbf.commit() |
265 |
jonathan |
467 |
|
266 |
bh |
994 |
def FileName(self): |
267 |
|
|
"""Return the filename the DBFTable was instantiated with""" |
268 |
|
|
return self.filename |
269 |
bh |
765 |
|
270 |
|
|
|
271 |
bh |
818 |
class MemoryColumn: |
272 |
|
|
|
273 |
|
|
def __init__(self, name, type, index): |
274 |
|
|
self.name = name |
275 |
|
|
self.type = type |
276 |
|
|
self.index = index |
277 |
|
|
|
278 |
jan |
1019 |
class MemoryTable(TitledObject, OldTableInterfaceMixin): |
279 |
bh |
818 |
|
280 |
|
|
"""Very simple table implementation that operates on a list of tuples""" |
281 |
|
|
|
282 |
|
|
def __init__(self, fields, data): |
283 |
|
|
"""Initialize the MemoryTable |
284 |
|
|
|
285 |
|
|
Parameters: |
286 |
|
|
fields -- List of (name, field_type) pairs |
287 |
|
|
data -- List of tuples, one for each row of data |
288 |
|
|
""" |
289 |
|
|
self.data = data |
290 |
jan |
1019 |
title = 'MemoryTable' |
291 |
|
|
TitledObject.__init__(self, title) |
292 |
bh |
818 |
|
293 |
|
|
# Create the column information objects |
294 |
|
|
self.columns = [] |
295 |
|
|
self.column_map = {} |
296 |
|
|
for name, ftype in fields: |
297 |
|
|
index = len(self.columns) |
298 |
|
|
col = MemoryColumn(name, ftype, index) |
299 |
|
|
self.columns.append(col) |
300 |
|
|
self.column_map[name] = col |
301 |
|
|
self.column_map[index] = col |
302 |
|
|
|
303 |
|
|
def NumColumns(self): |
304 |
|
|
"""Return the number of columns in the table""" |
305 |
|
|
return len(self.columns) |
306 |
|
|
|
307 |
|
|
def Column(self, col): |
308 |
|
|
"""Return information about the column given by its name or index |
309 |
|
|
|
310 |
|
|
The returned object is an instance of MemoryColumn. |
311 |
|
|
""" |
312 |
|
|
return self.column_map[col] |
313 |
|
|
|
314 |
|
|
def Columns(self): |
315 |
|
|
"""Return the table's colum definitions |
316 |
|
|
|
317 |
|
|
The return value is a sequence of MemoryColumn instances, one |
318 |
|
|
for each column. |
319 |
|
|
""" |
320 |
|
|
return self.columns |
321 |
|
|
|
322 |
bh |
839 |
def HasColumn(self, col): |
323 |
|
|
"""Return whether the table has a column with the given name or index |
324 |
|
|
""" |
325 |
|
|
return self.column_map.has_key(col) |
326 |
|
|
|
327 |
bh |
818 |
def NumRows(self): |
328 |
|
|
"""Return the number of rows in the table""" |
329 |
|
|
return len(self.data) |
330 |
|
|
|
331 |
|
|
def ReadValue(self, row, col): |
332 |
|
|
"""Return the value of the specified row and column |
333 |
|
|
|
334 |
|
|
The col parameter may be the index of the column or its name. |
335 |
|
|
""" |
336 |
|
|
return self.data[row][self.column_map[col].index] |
337 |
|
|
|
338 |
|
|
def ReadRowAsDict(self, index): |
339 |
|
|
"""Return the entire row as a dictionary with column names as keys""" |
340 |
|
|
return dict([(col.name, self.data[index][col.index]) |
341 |
|
|
for col in self.columns]) |
342 |
|
|
|
343 |
|
|
def ValueRange(self, col): |
344 |
|
|
"""Return the minimum and maximum values of the values in the column |
345 |
|
|
|
346 |
|
|
The return value is a tuple (min, max) unless the table is empty |
347 |
|
|
in which case the return value is None. |
348 |
|
|
""" |
349 |
|
|
|
350 |
|
|
index = self.column_map[col].index |
351 |
|
|
values = [row[index] for row in self.data] |
352 |
|
|
if not values: |
353 |
|
|
return None |
354 |
|
|
|
355 |
|
|
return min(values), max(values) |
356 |
|
|
|
357 |
|
|
def UniqueValues(self, col): |
358 |
frank |
1025 |
"""Return a sorted list of all unique values in the column col |
359 |
|
|
|
360 |
|
|
col can be either column index or name. |
361 |
|
|
""" |
362 |
bh |
818 |
dict = {} |
363 |
|
|
|
364 |
|
|
for i in range(self.NumRows()): |
365 |
|
|
value = self.ReadValue(i, col) |
366 |
|
|
dict[value] = 0 |
367 |
|
|
|
368 |
|
|
values = dict.keys() |
369 |
|
|
values.sort() |
370 |
|
|
return values |
371 |
|
|
|
372 |
frank |
1025 |
def Width(self, col): |
373 |
|
|
"""Return the maximum width of values in the column |
374 |
|
|
|
375 |
|
|
The return value is the the maximum length of string representation |
376 |
|
|
of the values in the column (represented by index or name).""" |
377 |
|
|
max = 0 |
378 |
|
|
|
379 |
|
|
type = self.column_map[col].type |
380 |
|
|
index = self.column_map[col].index |
381 |
|
|
values = [row[index] for row in self.data] |
382 |
|
|
if not values: |
383 |
|
|
return None |
384 |
|
|
|
385 |
|
|
if type == FIELDTYPE_DOUBLE: |
386 |
|
|
prec = self.Precision(col) |
387 |
|
|
format = "%%.%df" % prec |
388 |
|
|
elif type == FIELDTYPE_INT: |
389 |
|
|
format = "%d" |
390 |
|
|
else: |
391 |
|
|
format = "%s" |
392 |
|
|
for value in values: |
393 |
|
|
l = len(format % value) |
394 |
|
|
if l > max: |
395 |
|
|
max = l |
396 |
|
|
|
397 |
|
|
return max |
398 |
|
|
|
399 |
|
|
def Precision(self, col): |
400 |
|
|
"""Return the precision of the column |
401 |
|
|
|
402 |
|
|
The return value is the maximum number of numeric characters after the |
403 |
|
|
decimal if column type is double. Else precision zero is returned. |
404 |
|
|
The column can be represented by index or name. |
405 |
|
|
""" |
406 |
|
|
|
407 |
|
|
type = self.column_map[col].type |
408 |
|
|
if type == FIELDTYPE_DOUBLE: |
409 |
|
|
index = self.column_map[col].index |
410 |
|
|
values = [row[index] for row in self.data] |
411 |
|
|
if not values: |
412 |
|
|
return 0 |
413 |
|
|
|
414 |
|
|
max = 0 |
415 |
|
|
for value in values: |
416 |
|
|
l = len(str(value % 1)) |
417 |
|
|
if l > max: |
418 |
|
|
max = l |
419 |
|
|
if max > 2: |
420 |
|
|
return max - 2 |
421 |
|
|
else: |
422 |
|
|
return 0 |
423 |
|
|
else: |
424 |
|
|
return 0 |
425 |
|
|
|
426 |
bh |
984 |
def Dependencies(self): |
427 |
|
|
"""Return an empty sequence. The MemoryTable doesn't depend on anything |
428 |
|
|
""" |
429 |
|
|
return () |
430 |
bh |
818 |
|
431 |
|
|
def write_record(self, record, values): |
432 |
|
|
# TODO: Check for correct lenght and perhaps also |
433 |
|
|
# for correct types in case values is a tuple. How to report problems? |
434 |
|
|
# TODO: Allow values to be a dictionary and write the single |
435 |
|
|
# fields that are specified. |
436 |
|
|
self.data[record] = values |
437 |
frank |
1025 |
|
438 |
|
|
|
439 |
|
|
def table_to_dbf(table, filename): |
440 |
|
|
"""Create the dbf file filename from the table""" |
441 |
|
|
dbf = dbflib.create(filename) |
442 |
|
|
|
443 |
|
|
dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString, |
444 |
|
|
FIELDTYPE_INT: dbflib.FTInteger, |
445 |
|
|
FIELDTYPE_DOUBLE: dbflib.FTDouble} |
446 |
|
|
|
447 |
|
|
# Initialise the header. Distinguish between DBFTable and others. |
448 |
|
|
for col in table.Columns(): |
449 |
|
|
prec = table.Precision(col.name) |
450 |
|
|
width = table.Width(col.name) |
451 |
|
|
dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec) |
452 |
|
|
|
453 |
|
|
for i in range(table.NumRows()): |
454 |
|
|
record = table.ReadRowAsDict(i) |
455 |
|
|
dbf.write_record(i, record) |
456 |
|
|
dbf.close() |
457 |
|
|
|
458 |
|
|
def table_to_csv(table, filename): |
459 |
|
|
"""Export table to csv file.""" |
460 |
|
|
|
461 |
|
|
file = open(filename,"w") |
462 |
|
|
columns = table.Columns() |
463 |
|
|
if columns: |
464 |
|
|
header = "#%s" % columns[0].name |
465 |
|
|
for col in columns[1:]: |
466 |
|
|
header = header + ",%s" % col.name |
467 |
|
|
header = header + "\n" |
468 |
|
|
file.write(header) |
469 |
|
|
|
470 |
|
|
for i in range(table.NumRows()): |
471 |
|
|
record = table.ReadRowAsDict(i) |
472 |
|
|
if len(record): |
473 |
|
|
line = "%s" % record[columns[0].name] |
474 |
|
|
for col in columns[1:]: |
475 |
|
|
line = line + ",%s" % record[col.name] |
476 |
|
|
line = line + "\n" |
477 |
|
|
file.write(line) |
478 |
|
|
file.close() |
479 |
|
|
|