1 |
# Copyright (c) 2001, 2002, 2003 by Intevation GmbH |
2 |
# Authors: |
3 |
# Bernhard Herzog <[email protected]> |
4 |
# Jan-Oliver Wagner <[email protected]> |
5 |
# |
6 |
# This program is free software under the GPL (>=v2) |
7 |
# Read the file COPYING coming with Thuban for details. |
8 |
|
9 |
""" |
10 |
Classes for handling tables of data. |
11 |
""" |
12 |
|
13 |
__version__ = "$Revision$" |
14 |
|
15 |
import inspect |
16 |
import warnings |
17 |
|
18 |
import dbflib |
19 |
|
20 |
# the field types supported by a Table instance. |
21 |
FIELDTYPE_INT = "int" |
22 |
FIELDTYPE_STRING = "string" |
23 |
FIELDTYPE_DOUBLE = "double" |
24 |
|
25 |
|
26 |
# map the dbflib constants for the field types to our constants |
27 |
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING, |
28 |
dbflib.FTInteger: FIELDTYPE_INT, |
29 |
dbflib.FTDouble: FIELDTYPE_DOUBLE} |
30 |
|
31 |
|
32 |
class OldTableInterfaceMixin: |
33 |
|
34 |
"""Mixin to implement the old table interface using the new one""" |
35 |
|
36 |
def __deprecation_warning(self): |
37 |
"""Issue a DeprecationWarning for code hat uses the old interface""" |
38 |
callername = inspect.currentframe().f_back.f_code.co_name |
39 |
warnings.warn("The %s method of the old table interface" |
40 |
" is deprecated" % callername, |
41 |
DeprecationWarning, stacklevel = 3) |
42 |
|
43 |
def record_count(self): |
44 |
self.__deprecation_warning() |
45 |
return self.NumRows() |
46 |
|
47 |
def field_count(self): |
48 |
self.__deprecation_warning() |
49 |
return self.NumColumns() |
50 |
|
51 |
def field_info(self, field): |
52 |
"""Return a tuple (type, name, width, prec) for the field no. field |
53 |
|
54 |
type is the data type of the field, name the name, width the |
55 |
field width in characters and prec the decimal precision. width |
56 |
and prec will be zero if the information returned by the Column |
57 |
method doesn't provide values for them. |
58 |
""" |
59 |
self.__deprecation_warning() |
60 |
col = self.Column(field) |
61 |
return (col.type, col.name, |
62 |
getattr(col, "width", 0), getattr(col, "prec", 0)) |
63 |
|
64 |
def field_info_by_name(self, col): |
65 |
self.__deprecation_warning() |
66 |
try: |
67 |
return self.field_info(col) |
68 |
except KeyError: |
69 |
# FIXME: It may be that field_info raises other exceptions |
70 |
# when the name is not a valid column name. |
71 |
return None |
72 |
|
73 |
def field_range(self, fieldName): |
74 |
self.__deprecation_warning() |
75 |
min, max = self.ValueRange(fieldName) |
76 |
return ((min, None), (max, None)) |
77 |
|
78 |
def GetUniqueValues(self, field): |
79 |
self.__deprecation_warning() |
80 |
return self.UniqueValues(field) |
81 |
|
82 |
def read_record(self, r): |
83 |
self.__deprecation_warning() |
84 |
return self.ReadRowAsDict(r) |
85 |
|
86 |
|
87 |
|
88 |
class DBFColumn: |
89 |
|
90 |
"""Description of a column in a DBFTable |
91 |
|
92 |
Instances have the following public attributes: |
93 |
|
94 |
name -- Name of the column |
95 |
type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\ |
96 |
FIELDTYPE_DOUBLE) |
97 |
index -- The index of the column |
98 |
width -- the width of the data in the column |
99 |
prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE) |
100 |
""" |
101 |
|
102 |
def __init__(self, name, type, width, prec, index): |
103 |
self.name = name |
104 |
self.type = type |
105 |
self.width = width |
106 |
self.prec = prec |
107 |
self.index = index |
108 |
|
109 |
|
110 |
class DBFTable(OldTableInterfaceMixin): |
111 |
|
112 |
""" |
113 |
Table interface for the data in a DBF file |
114 |
""" |
115 |
|
116 |
# Implementation strategy regarding writing to a DBF file: |
117 |
# |
118 |
# Most of the time Thuban only needs to read from a table and it is |
119 |
# important that Thuban can work with read-only files. Therefore the |
120 |
# DBF file is opened only for reading initially. Only when |
121 |
# write_record is called we try to open the DBF file for writing as |
122 |
# well. If that succeeds the read/write DBF file will be used for |
123 |
# all IO afterwards. |
124 |
# |
125 |
# It's important to use the same DBF file object for both reading |
126 |
# and writing to make sure that reading a records after writing |
127 |
# returns the new values. With two separate objects this wouldn't |
128 |
# work because a DBF file object buffers some data |
129 |
|
130 |
def __init__(self, filename): |
131 |
self.filename = filename |
132 |
self.dbf = dbflib.DBFFile(filename) |
133 |
|
134 |
# If true, self.dbf is open for writing. |
135 |
self._writable = 0 |
136 |
|
137 |
# Create the column information objects |
138 |
self.columns = [] |
139 |
self.column_map = {} |
140 |
for i in range(self.NumColumns()): |
141 |
ftype, name, width, prec = self.dbf.field_info(i) |
142 |
ftype = dbflib_fieldtypes[ftype] |
143 |
index = len(self.columns) |
144 |
col = DBFColumn(name, ftype, width, prec, index) |
145 |
self.columns.append(col) |
146 |
self.column_map[name] = col |
147 |
self.column_map[index] = col |
148 |
|
149 |
def NumRows(self): |
150 |
"""Return the number of rows in the table""" |
151 |
return self.dbf.record_count() |
152 |
|
153 |
def NumColumns(self): |
154 |
"""Return the number of columns in the table""" |
155 |
return self.dbf.field_count() |
156 |
|
157 |
def Columns(self): |
158 |
"""Return the table's colum definitions |
159 |
|
160 |
The return value is a sequence of DBFColumn instances, one for |
161 |
each column. |
162 |
""" |
163 |
return self.columns |
164 |
|
165 |
def Column(self, col): |
166 |
"""Return information about the column given by its name or index |
167 |
|
168 |
The returned object is an instance of DBFColumn |
169 |
""" |
170 |
return self.column_map[col] |
171 |
|
172 |
def HasColumn(self, col): |
173 |
"""Return whether the table has a column with the given name or index |
174 |
""" |
175 |
return self.column_map.has_key(col) |
176 |
|
177 |
def ReadRowAsDict(self, row): |
178 |
"""Return the entire row as a dictionary with column names as keys""" |
179 |
return self.dbf.read_record(row) |
180 |
|
181 |
def ReadValue(self, row, col): |
182 |
"""Return the value of the specified row and column |
183 |
|
184 |
The col parameter may be the index of the column or its name. |
185 |
""" |
186 |
return self.dbf.read_record(row)[self.column_map[col].name] |
187 |
|
188 |
def ValueRange(self, col): |
189 |
"""Return the minimum and maximum values of the values in the column |
190 |
|
191 |
The return value is a tuple (min, max) unless the table is empty |
192 |
in which case the return value is None. |
193 |
""" |
194 |
count = self.NumRows() |
195 |
|
196 |
if count == 0: |
197 |
return None |
198 |
|
199 |
min = max = self.ReadValue(0, col) |
200 |
for i in range(1, count): |
201 |
value = self.ReadValue(i, col) |
202 |
if value < min: |
203 |
min = value |
204 |
elif value > max: |
205 |
max = value |
206 |
|
207 |
return (min, max) |
208 |
|
209 |
def UniqueValues(self, col): |
210 |
"""Return a sorted list of all unique values in the column col""" |
211 |
dict = {} |
212 |
|
213 |
for i in range(self.NumRows()): |
214 |
value = self.ReadValue(i, col) |
215 |
dict[value] = 0 |
216 |
|
217 |
values = dict.keys() |
218 |
values.sort() |
219 |
return values |
220 |
|
221 |
|
222 |
# DBF specific interface parts. |
223 |
|
224 |
def Destroy(self): |
225 |
self.dbf.close() |
226 |
self.dbf = None |
227 |
|
228 |
def write_record(self, record, values): |
229 |
"""Write the values into the record |
230 |
|
231 |
The values parameter may either be a dictionary or a sequence. |
232 |
|
233 |
If it's a dictionary the keys must be the names of the fields |
234 |
and their value must have a suitable type. Only the fields |
235 |
actually contained in the dictionary are written. Fields for |
236 |
which there's no item in the dict are not modified. |
237 |
|
238 |
If it's a sequence, all fields must be present in the right |
239 |
order. |
240 |
""" |
241 |
if not self._writable: |
242 |
new_dbf = dbflib.DBFFile(self.filename, "r+b") |
243 |
self.dbf.close() |
244 |
self.dbf = new_dbf |
245 |
self._writable = 1 |
246 |
self.dbf.write_record(record, values) |
247 |
self.dbf.commit() |
248 |
|
249 |
|
250 |
|
251 |
class MemoryColumn: |
252 |
|
253 |
def __init__(self, name, type, index): |
254 |
self.name = name |
255 |
self.type = type |
256 |
self.index = index |
257 |
|
258 |
class MemoryTable(OldTableInterfaceMixin): |
259 |
|
260 |
"""Very simple table implementation that operates on a list of tuples""" |
261 |
|
262 |
def __init__(self, fields, data): |
263 |
"""Initialize the MemoryTable |
264 |
|
265 |
Parameters: |
266 |
fields -- List of (name, field_type) pairs |
267 |
data -- List of tuples, one for each row of data |
268 |
""" |
269 |
self.data = data |
270 |
|
271 |
# Create the column information objects |
272 |
self.columns = [] |
273 |
self.column_map = {} |
274 |
for name, ftype in fields: |
275 |
index = len(self.columns) |
276 |
col = MemoryColumn(name, ftype, index) |
277 |
self.columns.append(col) |
278 |
self.column_map[name] = col |
279 |
self.column_map[index] = col |
280 |
|
281 |
def NumColumns(self): |
282 |
"""Return the number of columns in the table""" |
283 |
return len(self.columns) |
284 |
|
285 |
def Column(self, col): |
286 |
"""Return information about the column given by its name or index |
287 |
|
288 |
The returned object is an instance of MemoryColumn. |
289 |
""" |
290 |
return self.column_map[col] |
291 |
|
292 |
def Columns(self): |
293 |
"""Return the table's colum definitions |
294 |
|
295 |
The return value is a sequence of MemoryColumn instances, one |
296 |
for each column. |
297 |
""" |
298 |
return self.columns |
299 |
|
300 |
def HasColumn(self, col): |
301 |
"""Return whether the table has a column with the given name or index |
302 |
""" |
303 |
return self.column_map.has_key(col) |
304 |
|
305 |
def NumRows(self): |
306 |
"""Return the number of rows in the table""" |
307 |
return len(self.data) |
308 |
|
309 |
def ReadValue(self, row, col): |
310 |
"""Return the value of the specified row and column |
311 |
|
312 |
The col parameter may be the index of the column or its name. |
313 |
""" |
314 |
return self.data[row][self.column_map[col].index] |
315 |
|
316 |
def ReadRowAsDict(self, index): |
317 |
"""Return the entire row as a dictionary with column names as keys""" |
318 |
return dict([(col.name, self.data[index][col.index]) |
319 |
for col in self.columns]) |
320 |
|
321 |
def ValueRange(self, col): |
322 |
"""Return the minimum and maximum values of the values in the column |
323 |
|
324 |
The return value is a tuple (min, max) unless the table is empty |
325 |
in which case the return value is None. |
326 |
""" |
327 |
|
328 |
index = self.column_map[col].index |
329 |
values = [row[index] for row in self.data] |
330 |
if not values: |
331 |
return None |
332 |
|
333 |
return min(values), max(values) |
334 |
|
335 |
def UniqueValues(self, col): |
336 |
"""Return a sorted list of all unique values in the column col""" |
337 |
dict = {} |
338 |
|
339 |
for i in range(self.NumRows()): |
340 |
value = self.ReadValue(i, col) |
341 |
dict[value] = 0 |
342 |
|
343 |
values = dict.keys() |
344 |
values.sort() |
345 |
return values |
346 |
|
347 |
|
348 |
def write_record(self, record, values): |
349 |
# TODO: Check for correct lenght and perhaps also |
350 |
# for correct types in case values is a tuple. How to report problems? |
351 |
# TODO: Allow values to be a dictionary and write the single |
352 |
# fields that are specified. |
353 |
self.data[record] = values |