/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1025 - (show annotations)
Mon May 26 11:46:17 2003 UTC (21 years, 9 months ago) by frank
Original Path: trunk/thuban/Thuban/Model/table.py
File MIME type: text/x-python
File size: 14851 byte(s)
(DBFTable.Width, MemoryTable.Width):
	Return width (in characters) for a column.
(DBFTable.Precision, MemoryTable.Precision): Return decimal precision.
(table_to_dbf): Write table to dbf file.
(table_to_csv): Write table to csv file.

1 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 # Jan-Oliver Wagner <[email protected]>
5 # Frank Koormann <[email protected]>
6 #
7 # This program is free software under the GPL (>=v2)
8 # Read the file COPYING coming with Thuban for details.
9
10 """
11 Classes for handling tables of data.
12 """
13
14 __version__ = "$Revision$"
15
16 import os
17 import inspect
18 import warnings
19
20 from base import TitledObject
21
22 import dbflib
23
24 # the field types supported by a Table instance.
25 FIELDTYPE_INT = "int"
26 FIELDTYPE_STRING = "string"
27 FIELDTYPE_DOUBLE = "double"
28
29
30 # map the dbflib constants for the field types to our constants
31 dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32 dbflib.FTInteger: FIELDTYPE_INT,
33 dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36 class OldTableInterfaceMixin:
37
38 """Mixin to implement the old table interface using the new one"""
39
40 def __deprecation_warning(self):
41 """Issue a DeprecationWarning for code hat uses the old interface"""
42 callername = inspect.currentframe().f_back.f_code.co_name
43 warnings.warn("The %s method of the old table interface"
44 " is deprecated" % callername,
45 DeprecationWarning, stacklevel = 3)
46
47 def record_count(self):
48 self.__deprecation_warning()
49 return self.NumRows()
50
51 def field_count(self):
52 self.__deprecation_warning()
53 return self.NumColumns()
54
55 def field_info(self, field):
56 """Return a tuple (type, name, width, prec) for the field no. field
57
58 type is the data type of the field, name the name, width the
59 field width in characters and prec the decimal precision. width
60 and prec will be zero if the information returned by the Column
61 method doesn't provide values for them.
62 """
63 self.__deprecation_warning()
64 col = self.Column(field)
65 return (col.type, col.name,
66 getattr(col, "width", 0), getattr(col, "prec", 0))
67
68 def field_info_by_name(self, col):
69 self.__deprecation_warning()
70 try:
71 return self.field_info(col)
72 except KeyError:
73 # FIXME: It may be that field_info raises other exceptions
74 # when the name is not a valid column name.
75 return None
76
77 def field_range(self, fieldName):
78 self.__deprecation_warning()
79 min, max = self.ValueRange(fieldName)
80 return ((min, None), (max, None))
81
82 def GetUniqueValues(self, field):
83 self.__deprecation_warning()
84 return self.UniqueValues(field)
85
86 def read_record(self, r):
87 self.__deprecation_warning()
88 return self.ReadRowAsDict(r)
89
90
91
92 class DBFColumn:
93
94 """Description of a column in a DBFTable
95
96 Instances have the following public attributes:
97
98 name -- Name of the column
99 type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100 FIELDTYPE_DOUBLE)
101 index -- The index of the column
102 width -- the width of the data in the column
103 prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104 """
105
106 def __init__(self, name, type, width, prec, index):
107 self.name = name
108 self.type = type
109 self.width = width
110 self.prec = prec
111 self.index = index
112
113
114 class DBFTable(TitledObject, OldTableInterfaceMixin):
115
116 """
117 Table interface for the data in a DBF file
118 """
119
120 # Implementation strategy regarding writing to a DBF file:
121 #
122 # Most of the time Thuban only needs to read from a table and it is
123 # important that Thuban can work with read-only files. Therefore the
124 # DBF file is opened only for reading initially. Only when
125 # write_record is called we try to open the DBF file for writing as
126 # well. If that succeeds the read/write DBF file will be used for
127 # all IO afterwards.
128 #
129 # It's important to use the same DBF file object for both reading
130 # and writing to make sure that reading a records after writing
131 # returns the new values. With two separate objects this wouldn't
132 # work because a DBF file object buffers some data
133
134 def __init__(self, filename):
135 self.filename = filename
136 title = os.path.basename(self.filename)
137 TitledObject.__init__(self, title)
138 self.dbf = dbflib.DBFFile(filename)
139
140 # If true, self.dbf is open for writing.
141 self._writable = 0
142
143 # Create the column information objects
144 self.columns = []
145 self.column_map = {}
146 for i in range(self.NumColumns()):
147 ftype, name, width, prec = self.dbf.field_info(i)
148 ftype = dbflib_fieldtypes[ftype]
149 index = len(self.columns)
150 col = DBFColumn(name, ftype, width, prec, index)
151 self.columns.append(col)
152 self.column_map[name] = col
153 self.column_map[index] = col
154
155 def NumRows(self):
156 """Return the number of rows in the table"""
157 return self.dbf.record_count()
158
159 def NumColumns(self):
160 """Return the number of columns in the table"""
161 return self.dbf.field_count()
162
163 def Columns(self):
164 """Return the table's colum definitions
165
166 The return value is a sequence of DBFColumn instances, one for
167 each column.
168 """
169 return self.columns
170
171 def Column(self, col):
172 """Return information about the column given by its name or index
173
174 The returned object is an instance of DBFColumn
175 """
176 return self.column_map[col]
177
178 def HasColumn(self, col):
179 """Return whether the table has a column with the given name or index
180 """
181 return self.column_map.has_key(col)
182
183 def ReadRowAsDict(self, row):
184 """Return the entire row as a dictionary with column names as keys"""
185 return self.dbf.read_record(row)
186
187 def ReadValue(self, row, col):
188 """Return the value of the specified row and column
189
190 The col parameter may be the index of the column or its name.
191 """
192 return self.dbf.read_record(row)[self.column_map[col].name]
193
194 def ValueRange(self, col):
195 """Return the minimum and maximum values of the values in the column
196
197 The return value is a tuple (min, max) unless the table is empty
198 in which case the return value is None.
199 """
200 count = self.NumRows()
201
202 if count == 0:
203 return None
204
205 min = max = self.ReadValue(0, col)
206 for i in range(1, count):
207 value = self.ReadValue(i, col)
208 if value < min:
209 min = value
210 elif value > max:
211 max = value
212
213 return (min, max)
214
215 def UniqueValues(self, col):
216 """Return a sorted list of all unique values in the column col"""
217 dict = {}
218
219 for i in range(self.NumRows()):
220 value = self.ReadValue(i, col)
221 dict[value] = 0
222
223 values = dict.keys()
224 values.sort()
225 return values
226
227 def Dependencies(self):
228 """Return an empty sequence. The DBFTable doesn't depend on anything"""
229 return ()
230
231 # DBF specific interface parts.
232
233 def Precision(self, col):
234 """Return column precision"""
235 return self.column_map[col].prec
236
237 def Width(self, col):
238 """Return column width"""
239 return self.column_map[col].width
240
241 def Destroy(self):
242 self.dbf.close()
243 self.dbf = None
244
245 def write_record(self, record, values):
246 """Write the values into the record
247
248 The values parameter may either be a dictionary or a sequence.
249
250 If it's a dictionary the keys must be the names of the fields
251 and their value must have a suitable type. Only the fields
252 actually contained in the dictionary are written. Fields for
253 which there's no item in the dict are not modified.
254
255 If it's a sequence, all fields must be present in the right
256 order.
257 """
258 if not self._writable:
259 new_dbf = dbflib.DBFFile(self.filename, "r+b")
260 self.dbf.close()
261 self.dbf = new_dbf
262 self._writable = 1
263 self.dbf.write_record(record, values)
264 self.dbf.commit()
265
266 def FileName(self):
267 """Return the filename the DBFTable was instantiated with"""
268 return self.filename
269
270
271 class MemoryColumn:
272
273 def __init__(self, name, type, index):
274 self.name = name
275 self.type = type
276 self.index = index
277
278 class MemoryTable(TitledObject, OldTableInterfaceMixin):
279
280 """Very simple table implementation that operates on a list of tuples"""
281
282 def __init__(self, fields, data):
283 """Initialize the MemoryTable
284
285 Parameters:
286 fields -- List of (name, field_type) pairs
287 data -- List of tuples, one for each row of data
288 """
289 self.data = data
290 title = 'MemoryTable'
291 TitledObject.__init__(self, title)
292
293 # Create the column information objects
294 self.columns = []
295 self.column_map = {}
296 for name, ftype in fields:
297 index = len(self.columns)
298 col = MemoryColumn(name, ftype, index)
299 self.columns.append(col)
300 self.column_map[name] = col
301 self.column_map[index] = col
302
303 def NumColumns(self):
304 """Return the number of columns in the table"""
305 return len(self.columns)
306
307 def Column(self, col):
308 """Return information about the column given by its name or index
309
310 The returned object is an instance of MemoryColumn.
311 """
312 return self.column_map[col]
313
314 def Columns(self):
315 """Return the table's colum definitions
316
317 The return value is a sequence of MemoryColumn instances, one
318 for each column.
319 """
320 return self.columns
321
322 def HasColumn(self, col):
323 """Return whether the table has a column with the given name or index
324 """
325 return self.column_map.has_key(col)
326
327 def NumRows(self):
328 """Return the number of rows in the table"""
329 return len(self.data)
330
331 def ReadValue(self, row, col):
332 """Return the value of the specified row and column
333
334 The col parameter may be the index of the column or its name.
335 """
336 return self.data[row][self.column_map[col].index]
337
338 def ReadRowAsDict(self, index):
339 """Return the entire row as a dictionary with column names as keys"""
340 return dict([(col.name, self.data[index][col.index])
341 for col in self.columns])
342
343 def ValueRange(self, col):
344 """Return the minimum and maximum values of the values in the column
345
346 The return value is a tuple (min, max) unless the table is empty
347 in which case the return value is None.
348 """
349
350 index = self.column_map[col].index
351 values = [row[index] for row in self.data]
352 if not values:
353 return None
354
355 return min(values), max(values)
356
357 def UniqueValues(self, col):
358 """Return a sorted list of all unique values in the column col
359
360 col can be either column index or name.
361 """
362 dict = {}
363
364 for i in range(self.NumRows()):
365 value = self.ReadValue(i, col)
366 dict[value] = 0
367
368 values = dict.keys()
369 values.sort()
370 return values
371
372 def Width(self, col):
373 """Return the maximum width of values in the column
374
375 The return value is the the maximum length of string representation
376 of the values in the column (represented by index or name)."""
377 max = 0
378
379 type = self.column_map[col].type
380 index = self.column_map[col].index
381 values = [row[index] for row in self.data]
382 if not values:
383 return None
384
385 if type == FIELDTYPE_DOUBLE:
386 prec = self.Precision(col)
387 format = "%%.%df" % prec
388 elif type == FIELDTYPE_INT:
389 format = "%d"
390 else:
391 format = "%s"
392 for value in values:
393 l = len(format % value)
394 if l > max:
395 max = l
396
397 return max
398
399 def Precision(self, col):
400 """Return the precision of the column
401
402 The return value is the maximum number of numeric characters after the
403 decimal if column type is double. Else precision zero is returned.
404 The column can be represented by index or name.
405 """
406
407 type = self.column_map[col].type
408 if type == FIELDTYPE_DOUBLE:
409 index = self.column_map[col].index
410 values = [row[index] for row in self.data]
411 if not values:
412 return 0
413
414 max = 0
415 for value in values:
416 l = len(str(value % 1))
417 if l > max:
418 max = l
419 if max > 2:
420 return max - 2
421 else:
422 return 0
423 else:
424 return 0
425
426 def Dependencies(self):
427 """Return an empty sequence. The MemoryTable doesn't depend on anything
428 """
429 return ()
430
431 def write_record(self, record, values):
432 # TODO: Check for correct lenght and perhaps also
433 # for correct types in case values is a tuple. How to report problems?
434 # TODO: Allow values to be a dictionary and write the single
435 # fields that are specified.
436 self.data[record] = values
437
438
439 def table_to_dbf(table, filename):
440 """Create the dbf file filename from the table"""
441 dbf = dbflib.create(filename)
442
443 dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
444 FIELDTYPE_INT: dbflib.FTInteger,
445 FIELDTYPE_DOUBLE: dbflib.FTDouble}
446
447 # Initialise the header. Distinguish between DBFTable and others.
448 for col in table.Columns():
449 prec = table.Precision(col.name)
450 width = table.Width(col.name)
451 dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec)
452
453 for i in range(table.NumRows()):
454 record = table.ReadRowAsDict(i)
455 dbf.write_record(i, record)
456 dbf.close()
457
458 def table_to_csv(table, filename):
459 """Export table to csv file."""
460
461 file = open(filename,"w")
462 columns = table.Columns()
463 if columns:
464 header = "#%s" % columns[0].name
465 for col in columns[1:]:
466 header = header + ",%s" % col.name
467 header = header + "\n"
468 file.write(header)
469
470 for i in range(table.NumRows()):
471 record = table.ReadRowAsDict(i)
472 if len(record):
473 line = "%s" % record[columns[0].name]
474 for col in columns[1:]:
475 line = line + ",%s" % record[col.name]
476 line = line + "\n"
477 file.write(line)
478 file.close()
479

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26