/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1919 - (hide annotations)
Mon Nov 3 17:33:36 2003 UTC (21 years, 4 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/table.py
File MIME type: text/x-python
File size: 18280 byte(s)
(DBFTable.ReadValue): Use the new
read_attribute method of the dbf objects

1 bh 590 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 bh 6 # Authors:
3     # Bernhard Herzog <[email protected]>
4 jan 806 # Jan-Oliver Wagner <[email protected]>
5 frank 1025 # Frank Koormann <[email protected]>
6 bh 6 #
7     # This program is free software under the GPL (>=v2)
8     # Read the file COPYING coming with Thuban for details.
9    
10     """
11     Classes for handling tables of data.
12     """
13    
14     __version__ = "$Revision$"
15    
16 bh 998 import os
17 bh 839 import inspect
18     import warnings
19    
20 jan 1019 from base import TitledObject
21    
22 bh 6 import dbflib
23    
24     # the field types supported by a Table instance.
25 jonathan 474 FIELDTYPE_INT = "int"
26     FIELDTYPE_STRING = "string"
27     FIELDTYPE_DOUBLE = "double"
28 bh 6
29    
30     # map the dbflib constants for the field types to our constants
31     dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32     dbflib.FTInteger: FIELDTYPE_INT,
33     dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
35 jan 806
36 bh 818 class OldTableInterfaceMixin:
37 jan 806
38 bh 818 """Mixin to implement the old table interface using the new one"""
39 jan 806
40 bh 839 def __deprecation_warning(self):
41     """Issue a DeprecationWarning for code hat uses the old interface"""
42     callername = inspect.currentframe().f_back.f_code.co_name
43     warnings.warn("The %s method of the old table interface"
44     " is deprecated" % callername,
45     DeprecationWarning, stacklevel = 3)
46    
47 bh 818 def record_count(self):
48 bh 839 self.__deprecation_warning()
49 bh 818 return self.NumRows()
50 jan 806
51     def field_count(self):
52 bh 839 self.__deprecation_warning()
53 bh 818 return self.NumColumns()
54 jan 806
55 bh 818 def field_info(self, field):
56     """Return a tuple (type, name, width, prec) for the field no. field
57 jan 806
58 bh 818 type is the data type of the field, name the name, width the
59     field width in characters and prec the decimal precision. width
60     and prec will be zero if the information returned by the Column
61     method doesn't provide values for them.
62     """
63 bh 839 self.__deprecation_warning()
64 bh 818 col = self.Column(field)
65     return (col.type, col.name,
66     getattr(col, "width", 0), getattr(col, "prec", 0))
67 jan 806
68 bh 818 def field_info_by_name(self, col):
69 bh 839 self.__deprecation_warning()
70 bh 818 try:
71     return self.field_info(col)
72     except KeyError:
73     # FIXME: It may be that field_info raises other exceptions
74     # when the name is not a valid column name.
75     return None
76 jan 806
77 bh 818 def field_range(self, fieldName):
78 bh 839 self.__deprecation_warning()
79 bh 818 min, max = self.ValueRange(fieldName)
80     return ((min, None), (max, None))
81 jan 806
82 bh 818 def GetUniqueValues(self, field):
83 bh 839 self.__deprecation_warning()
84 bh 818 return self.UniqueValues(field)
85 jan 806
86 bh 818 def read_record(self, r):
87 bh 839 self.__deprecation_warning()
88 bh 818 return self.ReadRowAsDict(r)
89 bh 6
90 bh 818
91    
92     class DBFColumn:
93    
94     """Description of a column in a DBFTable
95    
96     Instances have the following public attributes:
97    
98     name -- Name of the column
99     type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100     FIELDTYPE_DOUBLE)
101     index -- The index of the column
102     width -- the width of the data in the column
103     prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104 bh 6 """
105 bh 818
106     def __init__(self, name, type, width, prec, index):
107     self.name = name
108     self.type = type
109     self.width = width
110     self.prec = prec
111     self.index = index
112    
113    
114 jan 1019 class DBFTable(TitledObject, OldTableInterfaceMixin):
115 bh 818
116     """
117 bh 765 Table interface for the data in a DBF file
118 bh 6 """
119    
120 bh 286 # Implementation strategy regarding writing to a DBF file:
121     #
122     # Most of the time Thuban only needs to read from a table and it is
123     # important that Thuban can work with read-only files. Therefore the
124     # DBF file is opened only for reading initially. Only when
125     # write_record is called we try to open the DBF file for writing as
126 bh 590 # well. If that succeeds the read/write DBF file will be used for
127     # all IO afterwards.
128 bh 286 #
129     # It's important to use the same DBF file object for both reading
130     # and writing to make sure that reading a records after writing
131     # returns the new values. With two separate objects this wouldn't
132     # work because a DBF file object buffers some data
133    
134 bh 6 def __init__(self, filename):
135 bh 1599 self.filename = os.path.abspath(filename)
136 bh 1078
137     # Omit the extension in the title as it's not really needed and
138     # it can be confusing because dbflib removes extensions and
139     # appends some variations of '.dbf' before it tries to open the
140     # file. So the title could be e.g. myshapefile.shp when the real
141     # filename is myshapefile.dbf
142     title = os.path.splitext(os.path.basename(self.filename))[0]
143 jan 1019 TitledObject.__init__(self, title)
144 bh 1078
145 bh 284 self.dbf = dbflib.DBFFile(filename)
146 bh 6
147 bh 286 # If true, self.dbf is open for writing.
148     self._writable = 0
149    
150 bh 818 # Create the column information objects
151     self.columns = []
152     self.column_map = {}
153     for i in range(self.NumColumns()):
154     ftype, name, width, prec = self.dbf.field_info(i)
155     ftype = dbflib_fieldtypes[ftype]
156     index = len(self.columns)
157     col = DBFColumn(name, ftype, width, prec, index)
158     self.columns.append(col)
159     self.column_map[name] = col
160     self.column_map[index] = col
161 bh 257
162 bh 818 def NumRows(self):
163     """Return the number of rows in the table"""
164 bh 6 return self.dbf.record_count()
165    
166 bh 818 def NumColumns(self):
167     """Return the number of columns in the table"""
168 bh 6 return self.dbf.field_count()
169    
170 bh 818 def Columns(self):
171     """Return the table's colum definitions
172 bh 6
173 bh 818 The return value is a sequence of DBFColumn instances, one for
174     each column.
175 bh 6 """
176 bh 818 return self.columns
177 bh 6
178 bh 818 def Column(self, col):
179     """Return information about the column given by its name or index
180 jonathan 467
181 bh 818 The returned object is an instance of DBFColumn
182     """
183     return self.column_map[col]
184 jonathan 467
185 bh 839 def HasColumn(self, col):
186     """Return whether the table has a column with the given name or index
187     """
188     return self.column_map.has_key(col)
189    
190 bh 1662 def RowIdToOrdinal(self, gid):
191     """Return the row ordinal given its id
192    
193     Since for DBFTables the row id is the row number, return the
194     value unchanged.
195     """
196     return gid
197    
198     def RowOrdinalToId(self, num):
199     """Return the rowid for given its ordinal
200    
201     Since for DBFTables the row id is the row number, return the
202     value unchanged.
203     """
204     return num
205    
206     def ReadRowAsDict(self, row, row_is_ordinal = 0):
207     """Return the entire row as a dictionary with column names as keys
208    
209     The row_is_ordinal is ignored for DBF tables because the row id
210     is always the row number.
211     """
212 bh 818 return self.dbf.read_record(row)
213 jonathan 467
214 bh 1662 def ReadValue(self, row, col, row_is_ordinal = 0):
215 bh 818 """Return the value of the specified row and column
216 jonathan 628
217 bh 818 The col parameter may be the index of the column or its name.
218 bh 1662
219     The row_is_ordinal is ignored for DBF tables because the row id
220     is always the row number.
221 bh 818 """
222 bh 1919 return self.dbf.read_attribute(row, self.column_map[col].index)
223 jonathan 628
224 bh 818 def ValueRange(self, col):
225     """Return the minimum and maximum values of the values in the column
226 jonathan 628
227 bh 818 The return value is a tuple (min, max) unless the table is empty
228     in which case the return value is None.
229 jonathan 628 """
230 bh 818 count = self.NumRows()
231 jonathan 628
232     if count == 0:
233     return None
234    
235 bh 818 min = max = self.ReadValue(0, col)
236 jonathan 628 for i in range(1, count):
237 bh 818 value = self.ReadValue(i, col)
238     if value < min:
239     min = value
240     elif value > max:
241     max = value
242 jonathan 628
243 bh 818 return (min, max)
244 jonathan 628
245 bh 818 def UniqueValues(self, col):
246     """Return a sorted list of all unique values in the column col"""
247     dict = {}
248 jonathan 628
249 bh 818 for i in range(self.NumRows()):
250     value = self.ReadValue(i, col)
251     dict[value] = 0
252 jonathan 628
253 bh 818 values = dict.keys()
254     values.sort()
255     return values
256 jonathan 628
257 bh 984 def Dependencies(self):
258     """Return an empty sequence. The DBFTable doesn't depend on anything"""
259     return ()
260 jonathan 628
261 bh 818 # DBF specific interface parts.
262 jonathan 628
263 frank 1025 def Width(self, col):
264     """Return column width"""
265     return self.column_map[col].width
266    
267 bh 818 def Destroy(self):
268     self.dbf.close()
269     self.dbf = None
270 jonathan 628
271 bh 274 def write_record(self, record, values):
272     """Write the values into the record
273    
274     The values parameter may either be a dictionary or a sequence.
275    
276     If it's a dictionary the keys must be the names of the fields
277     and their value must have a suitable type. Only the fields
278     actually contained in the dictionary are written. Fields for
279     which there's no item in the dict are not modified.
280    
281     If it's a sequence, all fields must be present in the right
282     order.
283     """
284 bh 286 if not self._writable:
285     new_dbf = dbflib.DBFFile(self.filename, "r+b")
286     self.dbf.close()
287     self.dbf = new_dbf
288     self._writable = 1
289     self.dbf.write_record(record, values)
290     self.dbf.commit()
291 jonathan 467
292 bh 994 def FileName(self):
293     """Return the filename the DBFTable was instantiated with"""
294     return self.filename
295 bh 765
296    
297 bh 818 class MemoryColumn:
298    
299     def __init__(self, name, type, index):
300     self.name = name
301     self.type = type
302     self.index = index
303    
304 jan 1019 class MemoryTable(TitledObject, OldTableInterfaceMixin):
305 bh 818
306     """Very simple table implementation that operates on a list of tuples"""
307    
308     def __init__(self, fields, data):
309     """Initialize the MemoryTable
310    
311     Parameters:
312     fields -- List of (name, field_type) pairs
313     data -- List of tuples, one for each row of data
314     """
315     self.data = data
316 jan 1019 title = 'MemoryTable'
317     TitledObject.__init__(self, title)
318 bh 818
319     # Create the column information objects
320     self.columns = []
321     self.column_map = {}
322     for name, ftype in fields:
323     index = len(self.columns)
324     col = MemoryColumn(name, ftype, index)
325     self.columns.append(col)
326     self.column_map[name] = col
327     self.column_map[index] = col
328    
329     def NumColumns(self):
330     """Return the number of columns in the table"""
331     return len(self.columns)
332    
333     def Column(self, col):
334     """Return information about the column given by its name or index
335    
336     The returned object is an instance of MemoryColumn.
337     """
338     return self.column_map[col]
339    
340     def Columns(self):
341     """Return the table's colum definitions
342    
343     The return value is a sequence of MemoryColumn instances, one
344     for each column.
345     """
346     return self.columns
347    
348 bh 839 def HasColumn(self, col):
349     """Return whether the table has a column with the given name or index
350     """
351     return self.column_map.has_key(col)
352    
353 bh 818 def NumRows(self):
354     """Return the number of rows in the table"""
355     return len(self.data)
356    
357 bh 1662 def RowIdToOrdinal(self, gid):
358     """Return the row ordinal given its id
359    
360     Since for MemoryTables the row id is the row number, return the
361     value unchanged.
362     """
363     return gid
364    
365     def RowOrdinalToId(self, num):
366     """Return the rowid for given its ordinal
367    
368     Since for MemoryTables the row id is the row number, return the
369     value unchanged.
370     """
371     return num
372    
373     def ReadValue(self, row, col, row_is_ordinal = 0):
374 bh 818 """Return the value of the specified row and column
375    
376     The col parameter may be the index of the column or its name.
377 bh 1662
378     The row_is_ordinal is ignored for DBF tables because the row id
379     is always the row number.
380 bh 818 """
381     return self.data[row][self.column_map[col].index]
382    
383 bh 1662 def ReadRowAsDict(self, index, row_is_ordinal = 0):
384     """Return the entire row as a dictionary with column names as keys
385    
386     The row_is_ordinal is ignored for DBF tables because the row id
387     is always the row number.
388     """
389 bh 818 return dict([(col.name, self.data[index][col.index])
390     for col in self.columns])
391    
392     def ValueRange(self, col):
393     """Return the minimum and maximum values of the values in the column
394    
395     The return value is a tuple (min, max) unless the table is empty
396     in which case the return value is None.
397     """
398    
399     index = self.column_map[col].index
400     values = [row[index] for row in self.data]
401     if not values:
402     return None
403    
404     return min(values), max(values)
405    
406     def UniqueValues(self, col):
407 frank 1025 """Return a sorted list of all unique values in the column col
408    
409     col can be either column index or name.
410     """
411 bh 818 dict = {}
412    
413     for i in range(self.NumRows()):
414     value = self.ReadValue(i, col)
415     dict[value] = 0
416    
417     values = dict.keys()
418     values.sort()
419     return values
420    
421 frank 1025 def Width(self, col):
422     """Return the maximum width of values in the column
423    
424 bh 1043 The return value is the the maximum length of string
425     representation of the values in the column (represented by index
426     or name).
427     """
428 frank 1025 max = 0
429 bh 1043
430 frank 1025 type = self.column_map[col].type
431     index = self.column_map[col].index
432     values = [row[index] for row in self.data]
433     if not values:
434     return None
435    
436     if type == FIELDTYPE_DOUBLE:
437 bh 1043 format = "%.12f"
438 frank 1025 elif type == FIELDTYPE_INT:
439     format = "%d"
440     else:
441     format = "%s"
442     for value in values:
443     l = len(format % value)
444     if l > max:
445     max = l
446    
447     return max
448    
449 bh 984 def Dependencies(self):
450     """Return an empty sequence. The MemoryTable doesn't depend on anything
451     """
452     return ()
453 bh 818
454     def write_record(self, record, values):
455     # TODO: Check for correct lenght and perhaps also
456     # for correct types in case values is a tuple. How to report problems?
457     # TODO: Allow values to be a dictionary and write the single
458     # fields that are specified.
459     self.data[record] = values
460 frank 1025
461    
462 bh 1371
463     def _find_dbf_column_names(names):
464     """Determine the column names to use in a DBF file
465    
466     DBF files have a length limit of 10 characters on the column names
467     so when writing an arbitrary Thuban table to a DBF file we may have
468     we may have to rename some of the columns making sure that they're
469     unique in the DBF file too.
470    
471     Names that are already short enough will stay the same. Longer names
472     will be truncated to 10 characters or if that isn't unique it will
473     be truncated more and filled up with digits.
474    
475     The parameter names should be a list of the column names. The return
476     value will be a dictionary mapping the names in the input list to
477     the names to use in the DBF file.
478     """
479     # mapping from the original names in table to the names in the DBF
480     # file
481     name_map = {}
482    
483     # First, we keep all names that are already short enough
484     for i in range(len(names) - 1, -1, -1):
485     if len(names[i]) <= 10:
486     name_map[names[i]] = names[i]
487     del names[i]
488    
489     # dict used as a set of all names already used as DBF column names
490     used = name_map.copy()
491    
492     # Go through all longer names. If the name truncated to 10
493     # characters is not used already, we use that. Otherwise we truncate
494     # it more and append numbers until we get an unused name
495     for name in names:
496     truncated = name[:10]
497     num = 0; numstr = ""
498     #print "truncated", truncated, num
499     while truncated in used and len(numstr) < 10:
500     num += 1
501     numstr = str(num)
502     truncated = name[:10 - len(numstr)] + numstr
503     #print "truncated", truncated, num
504     if len(numstr) >= 10:
505     # This case should never happen in practice as tables with
506     # 10^10 columns seem very unlikely :)
507     raise ValueError("Can't find unique dbf column name")
508    
509     name_map[name] = truncated
510     used[truncated] = 1
511    
512     return name_map
513    
514 jonathan 1431 def table_to_dbf(table, filename, rows = None):
515     """Create the dbf file filename from the table.
516    
517     If rows is not None (the default) then it must be a list of row
518     indices to be saved to the file, otherwise all rows are saved.
519     """
520    
521 frank 1025 dbf = dbflib.create(filename)
522    
523     dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
524     FIELDTYPE_INT: dbflib.FTInteger,
525     FIELDTYPE_DOUBLE: dbflib.FTDouble}
526    
527 bh 1371
528     name_map = _find_dbf_column_names([col.name for col in table.Columns()])
529    
530 frank 1025 # Initialise the header. Distinguish between DBFTable and others.
531     for col in table.Columns():
532     width = table.Width(col.name)
533 bh 1043 if col.type == FIELDTYPE_DOUBLE:
534     prec = getattr(col, "prec", 12)
535     else:
536     prec = 0
537 bh 1371 dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
538     width, prec)
539 frank 1025
540 jonathan 1431 if rows is None:
541     rows = range(table.NumRows())
542 jonathan 1389
543     recNum = 0
544 jonathan 1431 for i in rows:
545 bh 1371 record = {}
546     for key, value in table.ReadRowAsDict(i).items():
547     record[name_map[key]] = value
548 jonathan 1389 dbf.write_record(recNum, record)
549     recNum += 1
550 frank 1025 dbf.close()
551    
552 jonathan 1431 def table_to_csv(table, filename, rows = None):
553     """Export table to csv file.
554    
555     If rows is not None (the default) then it must be a list of row
556     indices to be saved to the file, otherwise all rows are saved.
557     """
558 frank 1025
559     file = open(filename,"w")
560     columns = table.Columns()
561     if columns:
562     header = "#%s" % columns[0].name
563     for col in columns[1:]:
564     header = header + ",%s" % col.name
565     header = header + "\n"
566     file.write(header)
567    
568 jonathan 1431 if rows is None:
569     rows = range(table.NumRows())
570 jonathan 1389
571 jonathan 1431 for i in rows:
572 frank 1025 record = table.ReadRowAsDict(i)
573     if len(record):
574     line = "%s" % record[columns[0].name]
575     for col in columns[1:]:
576     line = line + ",%s" % record[col.name]
577     line = line + "\n"
578     file.write(line)
579     file.close()
580    

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26