/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1919 - (show annotations)
Mon Nov 3 17:33:36 2003 UTC (21 years, 4 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/table.py
File MIME type: text/x-python
File size: 18280 byte(s)
(DBFTable.ReadValue): Use the new
read_attribute method of the dbf objects

1 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 # Jan-Oliver Wagner <[email protected]>
5 # Frank Koormann <[email protected]>
6 #
7 # This program is free software under the GPL (>=v2)
8 # Read the file COPYING coming with Thuban for details.
9
10 """
11 Classes for handling tables of data.
12 """
13
14 __version__ = "$Revision$"
15
16 import os
17 import inspect
18 import warnings
19
20 from base import TitledObject
21
22 import dbflib
23
24 # the field types supported by a Table instance.
25 FIELDTYPE_INT = "int"
26 FIELDTYPE_STRING = "string"
27 FIELDTYPE_DOUBLE = "double"
28
29
30 # map the dbflib constants for the field types to our constants
31 dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32 dbflib.FTInteger: FIELDTYPE_INT,
33 dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36 class OldTableInterfaceMixin:
37
38 """Mixin to implement the old table interface using the new one"""
39
40 def __deprecation_warning(self):
41 """Issue a DeprecationWarning for code hat uses the old interface"""
42 callername = inspect.currentframe().f_back.f_code.co_name
43 warnings.warn("The %s method of the old table interface"
44 " is deprecated" % callername,
45 DeprecationWarning, stacklevel = 3)
46
47 def record_count(self):
48 self.__deprecation_warning()
49 return self.NumRows()
50
51 def field_count(self):
52 self.__deprecation_warning()
53 return self.NumColumns()
54
55 def field_info(self, field):
56 """Return a tuple (type, name, width, prec) for the field no. field
57
58 type is the data type of the field, name the name, width the
59 field width in characters and prec the decimal precision. width
60 and prec will be zero if the information returned by the Column
61 method doesn't provide values for them.
62 """
63 self.__deprecation_warning()
64 col = self.Column(field)
65 return (col.type, col.name,
66 getattr(col, "width", 0), getattr(col, "prec", 0))
67
68 def field_info_by_name(self, col):
69 self.__deprecation_warning()
70 try:
71 return self.field_info(col)
72 except KeyError:
73 # FIXME: It may be that field_info raises other exceptions
74 # when the name is not a valid column name.
75 return None
76
77 def field_range(self, fieldName):
78 self.__deprecation_warning()
79 min, max = self.ValueRange(fieldName)
80 return ((min, None), (max, None))
81
82 def GetUniqueValues(self, field):
83 self.__deprecation_warning()
84 return self.UniqueValues(field)
85
86 def read_record(self, r):
87 self.__deprecation_warning()
88 return self.ReadRowAsDict(r)
89
90
91
92 class DBFColumn:
93
94 """Description of a column in a DBFTable
95
96 Instances have the following public attributes:
97
98 name -- Name of the column
99 type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100 FIELDTYPE_DOUBLE)
101 index -- The index of the column
102 width -- the width of the data in the column
103 prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104 """
105
106 def __init__(self, name, type, width, prec, index):
107 self.name = name
108 self.type = type
109 self.width = width
110 self.prec = prec
111 self.index = index
112
113
114 class DBFTable(TitledObject, OldTableInterfaceMixin):
115
116 """
117 Table interface for the data in a DBF file
118 """
119
120 # Implementation strategy regarding writing to a DBF file:
121 #
122 # Most of the time Thuban only needs to read from a table and it is
123 # important that Thuban can work with read-only files. Therefore the
124 # DBF file is opened only for reading initially. Only when
125 # write_record is called we try to open the DBF file for writing as
126 # well. If that succeeds the read/write DBF file will be used for
127 # all IO afterwards.
128 #
129 # It's important to use the same DBF file object for both reading
130 # and writing to make sure that reading a records after writing
131 # returns the new values. With two separate objects this wouldn't
132 # work because a DBF file object buffers some data
133
134 def __init__(self, filename):
135 self.filename = os.path.abspath(filename)
136
137 # Omit the extension in the title as it's not really needed and
138 # it can be confusing because dbflib removes extensions and
139 # appends some variations of '.dbf' before it tries to open the
140 # file. So the title could be e.g. myshapefile.shp when the real
141 # filename is myshapefile.dbf
142 title = os.path.splitext(os.path.basename(self.filename))[0]
143 TitledObject.__init__(self, title)
144
145 self.dbf = dbflib.DBFFile(filename)
146
147 # If true, self.dbf is open for writing.
148 self._writable = 0
149
150 # Create the column information objects
151 self.columns = []
152 self.column_map = {}
153 for i in range(self.NumColumns()):
154 ftype, name, width, prec = self.dbf.field_info(i)
155 ftype = dbflib_fieldtypes[ftype]
156 index = len(self.columns)
157 col = DBFColumn(name, ftype, width, prec, index)
158 self.columns.append(col)
159 self.column_map[name] = col
160 self.column_map[index] = col
161
162 def NumRows(self):
163 """Return the number of rows in the table"""
164 return self.dbf.record_count()
165
166 def NumColumns(self):
167 """Return the number of columns in the table"""
168 return self.dbf.field_count()
169
170 def Columns(self):
171 """Return the table's colum definitions
172
173 The return value is a sequence of DBFColumn instances, one for
174 each column.
175 """
176 return self.columns
177
178 def Column(self, col):
179 """Return information about the column given by its name or index
180
181 The returned object is an instance of DBFColumn
182 """
183 return self.column_map[col]
184
185 def HasColumn(self, col):
186 """Return whether the table has a column with the given name or index
187 """
188 return self.column_map.has_key(col)
189
190 def RowIdToOrdinal(self, gid):
191 """Return the row ordinal given its id
192
193 Since for DBFTables the row id is the row number, return the
194 value unchanged.
195 """
196 return gid
197
198 def RowOrdinalToId(self, num):
199 """Return the rowid for given its ordinal
200
201 Since for DBFTables the row id is the row number, return the
202 value unchanged.
203 """
204 return num
205
206 def ReadRowAsDict(self, row, row_is_ordinal = 0):
207 """Return the entire row as a dictionary with column names as keys
208
209 The row_is_ordinal is ignored for DBF tables because the row id
210 is always the row number.
211 """
212 return self.dbf.read_record(row)
213
214 def ReadValue(self, row, col, row_is_ordinal = 0):
215 """Return the value of the specified row and column
216
217 The col parameter may be the index of the column or its name.
218
219 The row_is_ordinal is ignored for DBF tables because the row id
220 is always the row number.
221 """
222 return self.dbf.read_attribute(row, self.column_map[col].index)
223
224 def ValueRange(self, col):
225 """Return the minimum and maximum values of the values in the column
226
227 The return value is a tuple (min, max) unless the table is empty
228 in which case the return value is None.
229 """
230 count = self.NumRows()
231
232 if count == 0:
233 return None
234
235 min = max = self.ReadValue(0, col)
236 for i in range(1, count):
237 value = self.ReadValue(i, col)
238 if value < min:
239 min = value
240 elif value > max:
241 max = value
242
243 return (min, max)
244
245 def UniqueValues(self, col):
246 """Return a sorted list of all unique values in the column col"""
247 dict = {}
248
249 for i in range(self.NumRows()):
250 value = self.ReadValue(i, col)
251 dict[value] = 0
252
253 values = dict.keys()
254 values.sort()
255 return values
256
257 def Dependencies(self):
258 """Return an empty sequence. The DBFTable doesn't depend on anything"""
259 return ()
260
261 # DBF specific interface parts.
262
263 def Width(self, col):
264 """Return column width"""
265 return self.column_map[col].width
266
267 def Destroy(self):
268 self.dbf.close()
269 self.dbf = None
270
271 def write_record(self, record, values):
272 """Write the values into the record
273
274 The values parameter may either be a dictionary or a sequence.
275
276 If it's a dictionary the keys must be the names of the fields
277 and their value must have a suitable type. Only the fields
278 actually contained in the dictionary are written. Fields for
279 which there's no item in the dict are not modified.
280
281 If it's a sequence, all fields must be present in the right
282 order.
283 """
284 if not self._writable:
285 new_dbf = dbflib.DBFFile(self.filename, "r+b")
286 self.dbf.close()
287 self.dbf = new_dbf
288 self._writable = 1
289 self.dbf.write_record(record, values)
290 self.dbf.commit()
291
292 def FileName(self):
293 """Return the filename the DBFTable was instantiated with"""
294 return self.filename
295
296
297 class MemoryColumn:
298
299 def __init__(self, name, type, index):
300 self.name = name
301 self.type = type
302 self.index = index
303
304 class MemoryTable(TitledObject, OldTableInterfaceMixin):
305
306 """Very simple table implementation that operates on a list of tuples"""
307
308 def __init__(self, fields, data):
309 """Initialize the MemoryTable
310
311 Parameters:
312 fields -- List of (name, field_type) pairs
313 data -- List of tuples, one for each row of data
314 """
315 self.data = data
316 title = 'MemoryTable'
317 TitledObject.__init__(self, title)
318
319 # Create the column information objects
320 self.columns = []
321 self.column_map = {}
322 for name, ftype in fields:
323 index = len(self.columns)
324 col = MemoryColumn(name, ftype, index)
325 self.columns.append(col)
326 self.column_map[name] = col
327 self.column_map[index] = col
328
329 def NumColumns(self):
330 """Return the number of columns in the table"""
331 return len(self.columns)
332
333 def Column(self, col):
334 """Return information about the column given by its name or index
335
336 The returned object is an instance of MemoryColumn.
337 """
338 return self.column_map[col]
339
340 def Columns(self):
341 """Return the table's colum definitions
342
343 The return value is a sequence of MemoryColumn instances, one
344 for each column.
345 """
346 return self.columns
347
348 def HasColumn(self, col):
349 """Return whether the table has a column with the given name or index
350 """
351 return self.column_map.has_key(col)
352
353 def NumRows(self):
354 """Return the number of rows in the table"""
355 return len(self.data)
356
357 def RowIdToOrdinal(self, gid):
358 """Return the row ordinal given its id
359
360 Since for MemoryTables the row id is the row number, return the
361 value unchanged.
362 """
363 return gid
364
365 def RowOrdinalToId(self, num):
366 """Return the rowid for given its ordinal
367
368 Since for MemoryTables the row id is the row number, return the
369 value unchanged.
370 """
371 return num
372
373 def ReadValue(self, row, col, row_is_ordinal = 0):
374 """Return the value of the specified row and column
375
376 The col parameter may be the index of the column or its name.
377
378 The row_is_ordinal is ignored for DBF tables because the row id
379 is always the row number.
380 """
381 return self.data[row][self.column_map[col].index]
382
383 def ReadRowAsDict(self, index, row_is_ordinal = 0):
384 """Return the entire row as a dictionary with column names as keys
385
386 The row_is_ordinal is ignored for DBF tables because the row id
387 is always the row number.
388 """
389 return dict([(col.name, self.data[index][col.index])
390 for col in self.columns])
391
392 def ValueRange(self, col):
393 """Return the minimum and maximum values of the values in the column
394
395 The return value is a tuple (min, max) unless the table is empty
396 in which case the return value is None.
397 """
398
399 index = self.column_map[col].index
400 values = [row[index] for row in self.data]
401 if not values:
402 return None
403
404 return min(values), max(values)
405
406 def UniqueValues(self, col):
407 """Return a sorted list of all unique values in the column col
408
409 col can be either column index or name.
410 """
411 dict = {}
412
413 for i in range(self.NumRows()):
414 value = self.ReadValue(i, col)
415 dict[value] = 0
416
417 values = dict.keys()
418 values.sort()
419 return values
420
421 def Width(self, col):
422 """Return the maximum width of values in the column
423
424 The return value is the the maximum length of string
425 representation of the values in the column (represented by index
426 or name).
427 """
428 max = 0
429
430 type = self.column_map[col].type
431 index = self.column_map[col].index
432 values = [row[index] for row in self.data]
433 if not values:
434 return None
435
436 if type == FIELDTYPE_DOUBLE:
437 format = "%.12f"
438 elif type == FIELDTYPE_INT:
439 format = "%d"
440 else:
441 format = "%s"
442 for value in values:
443 l = len(format % value)
444 if l > max:
445 max = l
446
447 return max
448
449 def Dependencies(self):
450 """Return an empty sequence. The MemoryTable doesn't depend on anything
451 """
452 return ()
453
454 def write_record(self, record, values):
455 # TODO: Check for correct lenght and perhaps also
456 # for correct types in case values is a tuple. How to report problems?
457 # TODO: Allow values to be a dictionary and write the single
458 # fields that are specified.
459 self.data[record] = values
460
461
462
463 def _find_dbf_column_names(names):
464 """Determine the column names to use in a DBF file
465
466 DBF files have a length limit of 10 characters on the column names
467 so when writing an arbitrary Thuban table to a DBF file we may have
468 we may have to rename some of the columns making sure that they're
469 unique in the DBF file too.
470
471 Names that are already short enough will stay the same. Longer names
472 will be truncated to 10 characters or if that isn't unique it will
473 be truncated more and filled up with digits.
474
475 The parameter names should be a list of the column names. The return
476 value will be a dictionary mapping the names in the input list to
477 the names to use in the DBF file.
478 """
479 # mapping from the original names in table to the names in the DBF
480 # file
481 name_map = {}
482
483 # First, we keep all names that are already short enough
484 for i in range(len(names) - 1, -1, -1):
485 if len(names[i]) <= 10:
486 name_map[names[i]] = names[i]
487 del names[i]
488
489 # dict used as a set of all names already used as DBF column names
490 used = name_map.copy()
491
492 # Go through all longer names. If the name truncated to 10
493 # characters is not used already, we use that. Otherwise we truncate
494 # it more and append numbers until we get an unused name
495 for name in names:
496 truncated = name[:10]
497 num = 0; numstr = ""
498 #print "truncated", truncated, num
499 while truncated in used and len(numstr) < 10:
500 num += 1
501 numstr = str(num)
502 truncated = name[:10 - len(numstr)] + numstr
503 #print "truncated", truncated, num
504 if len(numstr) >= 10:
505 # This case should never happen in practice as tables with
506 # 10^10 columns seem very unlikely :)
507 raise ValueError("Can't find unique dbf column name")
508
509 name_map[name] = truncated
510 used[truncated] = 1
511
512 return name_map
513
514 def table_to_dbf(table, filename, rows = None):
515 """Create the dbf file filename from the table.
516
517 If rows is not None (the default) then it must be a list of row
518 indices to be saved to the file, otherwise all rows are saved.
519 """
520
521 dbf = dbflib.create(filename)
522
523 dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
524 FIELDTYPE_INT: dbflib.FTInteger,
525 FIELDTYPE_DOUBLE: dbflib.FTDouble}
526
527
528 name_map = _find_dbf_column_names([col.name for col in table.Columns()])
529
530 # Initialise the header. Distinguish between DBFTable and others.
531 for col in table.Columns():
532 width = table.Width(col.name)
533 if col.type == FIELDTYPE_DOUBLE:
534 prec = getattr(col, "prec", 12)
535 else:
536 prec = 0
537 dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
538 width, prec)
539
540 if rows is None:
541 rows = range(table.NumRows())
542
543 recNum = 0
544 for i in rows:
545 record = {}
546 for key, value in table.ReadRowAsDict(i).items():
547 record[name_map[key]] = value
548 dbf.write_record(recNum, record)
549 recNum += 1
550 dbf.close()
551
552 def table_to_csv(table, filename, rows = None):
553 """Export table to csv file.
554
555 If rows is not None (the default) then it must be a list of row
556 indices to be saved to the file, otherwise all rows are saved.
557 """
558
559 file = open(filename,"w")
560 columns = table.Columns()
561 if columns:
562 header = "#%s" % columns[0].name
563 for col in columns[1:]:
564 header = header + ",%s" % col.name
565 header = header + "\n"
566 file.write(header)
567
568 if rows is None:
569 rows = range(table.NumRows())
570
571 for i in rows:
572 record = table.ReadRowAsDict(i)
573 if len(record):
574 line = "%s" % record[columns[0].name]
575 for col in columns[1:]:
576 line = line + ",%s" % record[col.name]
577 line = line + "\n"
578 file.write(line)
579 file.close()
580

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26