/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2734 - (show annotations)
Thu Mar 1 12:42:59 2007 UTC (18 years ago) by bramz
File MIME type: text/x-python
File size: 16329 byte(s)
made a copy
1 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 # Jan-Oliver Wagner <[email protected]>
5 # Frank Koormann <[email protected]>
6 #
7 # This program is free software under the GPL (>=v2)
8 # Read the file COPYING coming with Thuban for details.
9
10 """
11 Classes for handling tables of data.
12 """
13
14 __version__ = "$Revision$"
15
16 import os
17 import inspect
18 import warnings
19
20 from base import TitledObject
21
22 import dbflib
23
24 # the field types supported by a Table instance.
25 FIELDTYPE_INT = "int"
26 FIELDTYPE_STRING = "string"
27 FIELDTYPE_DOUBLE = "double"
28
29
30 # map the dbflib constants for the field types to our constants
31 dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32 dbflib.FTInteger: FIELDTYPE_INT,
33 dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36 class DBFColumn:
37
38 """Description of a column in a DBFTable
39
40 Instances have the following public attributes:
41
42 name -- Name of the column
43 type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
44 FIELDTYPE_DOUBLE)
45 index -- The index of the column
46 width -- the width of the data in the column
47 prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
48 """
49
50 def __init__(self, name, type, width, prec, index):
51 self.name = name
52 self.type = type
53 self.width = width
54 self.prec = prec
55 self.index = index
56
57
58 class DBFTable(TitledObject):
59
60 """
61 Table interface for the data in a DBF file
62 """
63
64 # Implementation strategy regarding writing to a DBF file:
65 #
66 # Most of the time Thuban only needs to read from a table and it is
67 # important that Thuban can work with read-only files. Therefore the
68 # DBF file is opened only for reading initially. Only when
69 # write_record is called we try to open the DBF file for writing as
70 # well. If that succeeds the read/write DBF file will be used for
71 # all IO afterwards.
72 #
73 # It's important to use the same DBF file object for both reading
74 # and writing to make sure that reading a records after writing
75 # returns the new values. With two separate objects this wouldn't
76 # work because a DBF file object buffers some data
77
78 def __init__(self, filename):
79 self.filename = os.path.abspath(filename)
80
81 # Omit the extension in the title as it's not really needed and
82 # it can be confusing because dbflib removes extensions and
83 # appends some variations of '.dbf' before it tries to open the
84 # file. So the title could be e.g. myshapefile.shp when the real
85 # filename is myshapefile.dbf
86 title = os.path.splitext(os.path.basename(self.filename))[0]
87 TitledObject.__init__(self, title)
88
89 self.dbf = dbflib.DBFFile(filename)
90
91 # If true, self.dbf is open for writing.
92 self._writable = 0
93
94 # Create the column information objects
95 self.columns = []
96 self.column_map = {}
97 for i in range(self.NumColumns()):
98 ftype, name, width, prec = self.dbf.field_info(i)
99 ftype = dbflib_fieldtypes[ftype]
100 index = len(self.columns)
101 col = DBFColumn(name, ftype, width, prec, index)
102 self.columns.append(col)
103 self.column_map[name] = col
104 self.column_map[index] = col
105
106 def NumRows(self):
107 """Return the number of rows in the table"""
108 return self.dbf.record_count()
109
110 def NumColumns(self):
111 """Return the number of columns in the table"""
112 return self.dbf.field_count()
113
114 def Columns(self):
115 """Return the table's colum definitions
116
117 The return value is a sequence of DBFColumn instances, one for
118 each column.
119 """
120 return self.columns
121
122 def Column(self, col):
123 """Return information about the column given by its name or index
124
125 The returned object is an instance of DBFColumn
126 """
127 return self.column_map[col]
128
129 def HasColumn(self, col):
130 """Return whether the table has a column with the given name or index
131 """
132 return self.column_map.has_key(col)
133
134 def RowIdToOrdinal(self, gid):
135 """Return the row ordinal given its id
136
137 Since for DBFTables the row id is the row number, return the
138 value unchanged.
139 """
140 return gid
141
142 def RowOrdinalToId(self, num):
143 """Return the rowid for given its ordinal
144
145 Since for DBFTables the row id is the row number, return the
146 value unchanged.
147 """
148 return num
149
150 def ReadRowAsDict(self, row, row_is_ordinal = 0):
151 """Return the entire row as a dictionary with column names as keys
152
153 The row_is_ordinal is ignored for DBF tables because the row id
154 is always the row number.
155 """
156 return self.dbf.read_record(row)
157
158 def ReadValue(self, row, col, row_is_ordinal = 0):
159 """Return the value of the specified row and column
160
161 The col parameter may be the index of the column or its name.
162
163 The row_is_ordinal is ignored for DBF tables because the row id
164 is always the row number.
165 """
166 return self.dbf.read_attribute(row, self.column_map[col].index)
167
168 def ValueRange(self, col):
169 """Return the minimum and maximum values of the values in the column
170
171 The return value is a tuple (min, max) unless the table is empty
172 in which case the return value is None.
173 """
174 count = self.NumRows()
175
176 if count == 0:
177 return None
178
179 min = max = self.ReadValue(0, col)
180 for i in range(1, count):
181 value = self.ReadValue(i, col)
182 if value < min:
183 min = value
184 elif value > max:
185 max = value
186
187 return (min, max)
188
189 def UniqueValues(self, col):
190 """Return a sorted list of all unique values in the column col"""
191 dict = {}
192
193 for i in range(self.NumRows()):
194 value = self.ReadValue(i, col)
195 dict[value] = 0
196
197 values = dict.keys()
198 values.sort()
199 return values
200
201 def Dependencies(self):
202 """Return an empty sequence. The DBFTable doesn't depend on anything"""
203 return ()
204
205 # DBF specific interface parts.
206
207 def Width(self, col):
208 """Return column width"""
209 return self.column_map[col].width
210
211 def Destroy(self):
212 self.dbf.close()
213 self.dbf = None
214
215 def write_record(self, record, values):
216 """Write the values into the record
217
218 The values parameter may either be a dictionary or a sequence.
219
220 If it's a dictionary the keys must be the names of the fields
221 and their value must have a suitable type. Only the fields
222 actually contained in the dictionary are written. Fields for
223 which there's no item in the dict are not modified.
224
225 If it's a sequence, all fields must be present in the right
226 order.
227 """
228 if not self._writable:
229 new_dbf = dbflib.DBFFile(self.filename, "r+b")
230 self.dbf.close()
231 self.dbf = new_dbf
232 self._writable = 1
233 self.dbf.write_record(record, values)
234 self.dbf.commit()
235
236 def FileName(self):
237 """Return the filename the DBFTable was instantiated with"""
238 return self.filename
239
240
241 class MemoryColumn:
242
243 def __init__(self, name, type, index):
244 self.name = name
245 self.type = type
246 self.index = index
247
248 class MemoryTable(TitledObject):
249
250 """Very simple table implementation that operates on a list of tuples"""
251
252 def __init__(self, fields, data):
253 """Initialize the MemoryTable
254
255 Parameters:
256 fields -- List of (name, field_type) pairs
257 data -- List of tuples, one for each row of data
258 """
259 self.data = data
260 title = 'MemoryTable'
261 TitledObject.__init__(self, title)
262
263 # Create the column information objects
264 self.columns = []
265 self.column_map = {}
266 for name, ftype in fields:
267 index = len(self.columns)
268 col = MemoryColumn(name, ftype, index)
269 self.columns.append(col)
270 self.column_map[name] = col
271 self.column_map[index] = col
272
273 def NumColumns(self):
274 """Return the number of columns in the table"""
275 return len(self.columns)
276
277 def Column(self, col):
278 """Return information about the column given by its name or index
279
280 The returned object is an instance of MemoryColumn.
281 """
282 return self.column_map[col]
283
284 def Columns(self):
285 """Return the table's colum definitions
286
287 The return value is a sequence of MemoryColumn instances, one
288 for each column.
289 """
290 return self.columns
291
292 def HasColumn(self, col):
293 """Return whether the table has a column with the given name or index
294 """
295 return self.column_map.has_key(col)
296
297 def NumRows(self):
298 """Return the number of rows in the table"""
299 return len(self.data)
300
301 def RowIdToOrdinal(self, gid):
302 """Return the row ordinal given its id
303
304 Since for MemoryTables the row id is the row number, return the
305 value unchanged.
306 """
307 return gid
308
309 def RowOrdinalToId(self, num):
310 """Return the rowid for given its ordinal
311
312 Since for MemoryTables the row id is the row number, return the
313 value unchanged.
314 """
315 return num
316
317 def ReadValue(self, row, col, row_is_ordinal = 0):
318 """Return the value of the specified row and column
319
320 The col parameter may be the index of the column or its name.
321
322 The row_is_ordinal is ignored for DBF tables because the row id
323 is always the row number.
324 """
325 return self.data[row][self.column_map[col].index]
326
327 def ReadRowAsDict(self, index, row_is_ordinal = 0):
328 """Return the entire row as a dictionary with column names as keys
329
330 The row_is_ordinal is ignored for DBF tables because the row id
331 is always the row number.
332 """
333 return dict([(col.name, self.data[index][col.index])
334 for col in self.columns])
335
336 def ValueRange(self, col):
337 """Return the minimum and maximum values of the values in the column
338
339 The return value is a tuple (min, max) unless the table is empty
340 in which case the return value is None.
341 """
342
343 index = self.column_map[col].index
344 values = [row[index] for row in self.data]
345 if not values:
346 return None
347
348 return min(values), max(values)
349
350 def UniqueValues(self, col):
351 """Return a sorted list of all unique values in the column col
352
353 col can be either column index or name.
354 """
355 dict = {}
356
357 for i in range(self.NumRows()):
358 value = self.ReadValue(i, col)
359 dict[value] = 0
360
361 values = dict.keys()
362 values.sort()
363 return values
364
365 def Width(self, col):
366 """Return the maximum width of values in the column
367
368 The return value is the the maximum length of string
369 representation of the values in the column (represented by index
370 or name).
371 """
372 max = 0
373
374 type = self.column_map[col].type
375 index = self.column_map[col].index
376 values = [row[index] for row in self.data]
377 if not values:
378 return None
379
380 if type == FIELDTYPE_DOUBLE:
381 format = "%.12f"
382 elif type == FIELDTYPE_INT:
383 format = "%d"
384 else:
385 format = "%s"
386 for value in values:
387 l = len(format % value)
388 if l > max:
389 max = l
390
391 return max
392
393 def Dependencies(self):
394 """Return an empty sequence. The MemoryTable doesn't depend on anything
395 """
396 return ()
397
398 def write_record(self, record, values):
399 # TODO: Check for correct lenght and perhaps also
400 # for correct types in case values is a tuple. How to report problems?
401 # TODO: Allow values to be a dictionary and write the single
402 # fields that are specified.
403 self.data[record] = values
404
405
406
407 def _find_dbf_column_names(names):
408 """Determine the column names to use in a DBF file
409
410 DBF files have a length limit of 10 characters on the column names
411 so when writing an arbitrary Thuban table to a DBF file we may have
412 we may have to rename some of the columns making sure that they're
413 unique in the DBF file too.
414
415 Names that are already short enough will stay the same. Longer names
416 will be truncated to 10 characters or if that isn't unique it will
417 be truncated more and filled up with digits.
418
419 The parameter names should be a list of the column names. The return
420 value will be a dictionary mapping the names in the input list to
421 the names to use in the DBF file.
422 """
423 # mapping from the original names in table to the names in the DBF
424 # file
425 name_map = {}
426
427 # First, we keep all names that are already short enough
428 for i in range(len(names) - 1, -1, -1):
429 if len(names[i]) <= 10:
430 name_map[names[i]] = names[i]
431 del names[i]
432
433 # dict used as a set of all names already used as DBF column names
434 used = name_map.copy()
435
436 # Go through all longer names. If the name truncated to 10
437 # characters is not used already, we use that. Otherwise we truncate
438 # it more and append numbers until we get an unused name
439 for name in names:
440 truncated = name[:10]
441 num = 0; numstr = ""
442 #print "truncated", truncated, num
443 while truncated in used and len(numstr) < 10:
444 num += 1
445 numstr = str(num)
446 truncated = name[:10 - len(numstr)] + numstr
447 #print "truncated", truncated, num
448 if len(numstr) >= 10:
449 # This case should never happen in practice as tables with
450 # 10^10 columns seem very unlikely :)
451 raise ValueError("Can't find unique dbf column name")
452
453 name_map[name] = truncated
454 used[truncated] = 1
455
456 return name_map
457
458 def table_to_dbf(table, filename, rows = None):
459 """Create the dbf file filename from the table.
460
461 If rows is not None (the default) then it must be a list of row
462 indices to be saved to the file, otherwise all rows are saved.
463 """
464
465 dbf = dbflib.create(filename)
466
467 dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
468 FIELDTYPE_INT: dbflib.FTInteger,
469 FIELDTYPE_DOUBLE: dbflib.FTDouble}
470
471
472 name_map = _find_dbf_column_names([col.name for col in table.Columns()])
473
474 # Initialise the header. Distinguish between DBFTable and others.
475 for col in table.Columns():
476 width = table.Width(col.name)
477 if col.type == FIELDTYPE_DOUBLE:
478 prec = getattr(col, "prec", 12)
479 else:
480 prec = 0
481 dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
482 width, prec)
483
484 if rows is None:
485 rows = range(table.NumRows())
486
487 recNum = 0
488 for i in rows:
489 record = {}
490 for key, value in table.ReadRowAsDict(i).items():
491 record[name_map[key]] = value
492 dbf.write_record(recNum, record)
493 recNum += 1
494 dbf.close()
495
496 def table_to_csv(table, filename, rows = None):
497 """Export table to csv file.
498
499 If rows is not None (the default) then it must be a list of row
500 indices to be saved to the file, otherwise all rows are saved.
501 """
502
503 file = open(filename,"w")
504 columns = table.Columns()
505 if columns:
506 header = "#%s" % columns[0].name
507 for col in columns[1:]:
508 header = header + ",%s" % col.name
509 header = header + "\n"
510 file.write(header)
511
512 if rows is None:
513 rows = range(table.NumRows())
514
515 for i in rows:
516 record = table.ReadRowAsDict(i)
517 if len(record):
518 line = "%s" % record[columns[0].name]
519 for col in columns[1:]:
520 line = line + ",%s" % record[col.name]
521 line = line + "\n"
522 file.write(line)
523 file.close()
524

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26