/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1025 - (hide annotations)
Mon May 26 11:46:17 2003 UTC (21 years, 9 months ago) by frank
Original Path: trunk/thuban/Thuban/Model/table.py
File MIME type: text/x-python
File size: 14851 byte(s)
(DBFTable.Width, MemoryTable.Width):
	Return width (in characters) for a column.
(DBFTable.Precision, MemoryTable.Precision): Return decimal precision.
(table_to_dbf): Write table to dbf file.
(table_to_csv): Write table to csv file.

1 bh 590 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 bh 6 # Authors:
3     # Bernhard Herzog <[email protected]>
4 jan 806 # Jan-Oliver Wagner <[email protected]>
5 frank 1025 # Frank Koormann <[email protected]>
6 bh 6 #
7     # This program is free software under the GPL (>=v2)
8     # Read the file COPYING coming with Thuban for details.
9    
10     """
11     Classes for handling tables of data.
12     """
13    
14     __version__ = "$Revision$"
15    
16 bh 998 import os
17 bh 839 import inspect
18     import warnings
19    
20 jan 1019 from base import TitledObject
21    
22 bh 6 import dbflib
23    
24     # the field types supported by a Table instance.
25 jonathan 474 FIELDTYPE_INT = "int"
26     FIELDTYPE_STRING = "string"
27     FIELDTYPE_DOUBLE = "double"
28 bh 6
29    
30     # map the dbflib constants for the field types to our constants
31     dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32     dbflib.FTInteger: FIELDTYPE_INT,
33     dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
35 jan 806
36 bh 818 class OldTableInterfaceMixin:
37 jan 806
38 bh 818 """Mixin to implement the old table interface using the new one"""
39 jan 806
40 bh 839 def __deprecation_warning(self):
41     """Issue a DeprecationWarning for code hat uses the old interface"""
42     callername = inspect.currentframe().f_back.f_code.co_name
43     warnings.warn("The %s method of the old table interface"
44     " is deprecated" % callername,
45     DeprecationWarning, stacklevel = 3)
46    
47 bh 818 def record_count(self):
48 bh 839 self.__deprecation_warning()
49 bh 818 return self.NumRows()
50 jan 806
51     def field_count(self):
52 bh 839 self.__deprecation_warning()
53 bh 818 return self.NumColumns()
54 jan 806
55 bh 818 def field_info(self, field):
56     """Return a tuple (type, name, width, prec) for the field no. field
57 jan 806
58 bh 818 type is the data type of the field, name the name, width the
59     field width in characters and prec the decimal precision. width
60     and prec will be zero if the information returned by the Column
61     method doesn't provide values for them.
62     """
63 bh 839 self.__deprecation_warning()
64 bh 818 col = self.Column(field)
65     return (col.type, col.name,
66     getattr(col, "width", 0), getattr(col, "prec", 0))
67 jan 806
68 bh 818 def field_info_by_name(self, col):
69 bh 839 self.__deprecation_warning()
70 bh 818 try:
71     return self.field_info(col)
72     except KeyError:
73     # FIXME: It may be that field_info raises other exceptions
74     # when the name is not a valid column name.
75     return None
76 jan 806
77 bh 818 def field_range(self, fieldName):
78 bh 839 self.__deprecation_warning()
79 bh 818 min, max = self.ValueRange(fieldName)
80     return ((min, None), (max, None))
81 jan 806
82 bh 818 def GetUniqueValues(self, field):
83 bh 839 self.__deprecation_warning()
84 bh 818 return self.UniqueValues(field)
85 jan 806
86 bh 818 def read_record(self, r):
87 bh 839 self.__deprecation_warning()
88 bh 818 return self.ReadRowAsDict(r)
89 bh 6
90 bh 818
91    
92     class DBFColumn:
93    
94     """Description of a column in a DBFTable
95    
96     Instances have the following public attributes:
97    
98     name -- Name of the column
99     type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100     FIELDTYPE_DOUBLE)
101     index -- The index of the column
102     width -- the width of the data in the column
103     prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104 bh 6 """
105 bh 818
106     def __init__(self, name, type, width, prec, index):
107     self.name = name
108     self.type = type
109     self.width = width
110     self.prec = prec
111     self.index = index
112    
113    
114 jan 1019 class DBFTable(TitledObject, OldTableInterfaceMixin):
115 bh 818
116     """
117 bh 765 Table interface for the data in a DBF file
118 bh 6 """
119    
120 bh 286 # Implementation strategy regarding writing to a DBF file:
121     #
122     # Most of the time Thuban only needs to read from a table and it is
123     # important that Thuban can work with read-only files. Therefore the
124     # DBF file is opened only for reading initially. Only when
125     # write_record is called we try to open the DBF file for writing as
126 bh 590 # well. If that succeeds the read/write DBF file will be used for
127     # all IO afterwards.
128 bh 286 #
129     # It's important to use the same DBF file object for both reading
130     # and writing to make sure that reading a records after writing
131     # returns the new values. With two separate objects this wouldn't
132     # work because a DBF file object buffers some data
133    
134 bh 6 def __init__(self, filename):
135     self.filename = filename
136 jan 1019 title = os.path.basename(self.filename)
137     TitledObject.__init__(self, title)
138 bh 284 self.dbf = dbflib.DBFFile(filename)
139 bh 6
140 bh 286 # If true, self.dbf is open for writing.
141     self._writable = 0
142    
143 bh 818 # Create the column information objects
144     self.columns = []
145     self.column_map = {}
146     for i in range(self.NumColumns()):
147     ftype, name, width, prec = self.dbf.field_info(i)
148     ftype = dbflib_fieldtypes[ftype]
149     index = len(self.columns)
150     col = DBFColumn(name, ftype, width, prec, index)
151     self.columns.append(col)
152     self.column_map[name] = col
153     self.column_map[index] = col
154 bh 257
155 bh 818 def NumRows(self):
156     """Return the number of rows in the table"""
157 bh 6 return self.dbf.record_count()
158    
159 bh 818 def NumColumns(self):
160     """Return the number of columns in the table"""
161 bh 6 return self.dbf.field_count()
162    
163 bh 818 def Columns(self):
164     """Return the table's colum definitions
165 bh 6
166 bh 818 The return value is a sequence of DBFColumn instances, one for
167     each column.
168 bh 6 """
169 bh 818 return self.columns
170 bh 6
171 bh 818 def Column(self, col):
172     """Return information about the column given by its name or index
173 jonathan 467
174 bh 818 The returned object is an instance of DBFColumn
175     """
176     return self.column_map[col]
177 jonathan 467
178 bh 839 def HasColumn(self, col):
179     """Return whether the table has a column with the given name or index
180     """
181     return self.column_map.has_key(col)
182    
183 bh 818 def ReadRowAsDict(self, row):
184     """Return the entire row as a dictionary with column names as keys"""
185     return self.dbf.read_record(row)
186 jonathan 467
187 bh 818 def ReadValue(self, row, col):
188     """Return the value of the specified row and column
189 jonathan 628
190 bh 818 The col parameter may be the index of the column or its name.
191     """
192     return self.dbf.read_record(row)[self.column_map[col].name]
193 jonathan 628
194 bh 818 def ValueRange(self, col):
195     """Return the minimum and maximum values of the values in the column
196 jonathan 628
197 bh 818 The return value is a tuple (min, max) unless the table is empty
198     in which case the return value is None.
199 jonathan 628 """
200 bh 818 count = self.NumRows()
201 jonathan 628
202     if count == 0:
203     return None
204    
205 bh 818 min = max = self.ReadValue(0, col)
206 jonathan 628 for i in range(1, count):
207 bh 818 value = self.ReadValue(i, col)
208     if value < min:
209     min = value
210     elif value > max:
211     max = value
212 jonathan 628
213 bh 818 return (min, max)
214 jonathan 628
215 bh 818 def UniqueValues(self, col):
216     """Return a sorted list of all unique values in the column col"""
217     dict = {}
218 jonathan 628
219 bh 818 for i in range(self.NumRows()):
220     value = self.ReadValue(i, col)
221     dict[value] = 0
222 jonathan 628
223 bh 818 values = dict.keys()
224     values.sort()
225     return values
226 jonathan 628
227 bh 984 def Dependencies(self):
228     """Return an empty sequence. The DBFTable doesn't depend on anything"""
229     return ()
230 jonathan 628
231 bh 818 # DBF specific interface parts.
232 jonathan 628
233 frank 1025 def Precision(self, col):
234     """Return column precision"""
235     return self.column_map[col].prec
236    
237     def Width(self, col):
238     """Return column width"""
239     return self.column_map[col].width
240    
241 bh 818 def Destroy(self):
242     self.dbf.close()
243     self.dbf = None
244 jonathan 628
245 bh 274 def write_record(self, record, values):
246     """Write the values into the record
247    
248     The values parameter may either be a dictionary or a sequence.
249    
250     If it's a dictionary the keys must be the names of the fields
251     and their value must have a suitable type. Only the fields
252     actually contained in the dictionary are written. Fields for
253     which there's no item in the dict are not modified.
254    
255     If it's a sequence, all fields must be present in the right
256     order.
257     """
258 bh 286 if not self._writable:
259     new_dbf = dbflib.DBFFile(self.filename, "r+b")
260     self.dbf.close()
261     self.dbf = new_dbf
262     self._writable = 1
263     self.dbf.write_record(record, values)
264     self.dbf.commit()
265 jonathan 467
266 bh 994 def FileName(self):
267     """Return the filename the DBFTable was instantiated with"""
268     return self.filename
269 bh 765
270    
271 bh 818 class MemoryColumn:
272    
273     def __init__(self, name, type, index):
274     self.name = name
275     self.type = type
276     self.index = index
277    
278 jan 1019 class MemoryTable(TitledObject, OldTableInterfaceMixin):
279 bh 818
280     """Very simple table implementation that operates on a list of tuples"""
281    
282     def __init__(self, fields, data):
283     """Initialize the MemoryTable
284    
285     Parameters:
286     fields -- List of (name, field_type) pairs
287     data -- List of tuples, one for each row of data
288     """
289     self.data = data
290 jan 1019 title = 'MemoryTable'
291     TitledObject.__init__(self, title)
292 bh 818
293     # Create the column information objects
294     self.columns = []
295     self.column_map = {}
296     for name, ftype in fields:
297     index = len(self.columns)
298     col = MemoryColumn(name, ftype, index)
299     self.columns.append(col)
300     self.column_map[name] = col
301     self.column_map[index] = col
302    
303     def NumColumns(self):
304     """Return the number of columns in the table"""
305     return len(self.columns)
306    
307     def Column(self, col):
308     """Return information about the column given by its name or index
309    
310     The returned object is an instance of MemoryColumn.
311     """
312     return self.column_map[col]
313    
314     def Columns(self):
315     """Return the table's colum definitions
316    
317     The return value is a sequence of MemoryColumn instances, one
318     for each column.
319     """
320     return self.columns
321    
322 bh 839 def HasColumn(self, col):
323     """Return whether the table has a column with the given name or index
324     """
325     return self.column_map.has_key(col)
326    
327 bh 818 def NumRows(self):
328     """Return the number of rows in the table"""
329     return len(self.data)
330    
331     def ReadValue(self, row, col):
332     """Return the value of the specified row and column
333    
334     The col parameter may be the index of the column or its name.
335     """
336     return self.data[row][self.column_map[col].index]
337    
338     def ReadRowAsDict(self, index):
339     """Return the entire row as a dictionary with column names as keys"""
340     return dict([(col.name, self.data[index][col.index])
341     for col in self.columns])
342    
343     def ValueRange(self, col):
344     """Return the minimum and maximum values of the values in the column
345    
346     The return value is a tuple (min, max) unless the table is empty
347     in which case the return value is None.
348     """
349    
350     index = self.column_map[col].index
351     values = [row[index] for row in self.data]
352     if not values:
353     return None
354    
355     return min(values), max(values)
356    
357     def UniqueValues(self, col):
358 frank 1025 """Return a sorted list of all unique values in the column col
359    
360     col can be either column index or name.
361     """
362 bh 818 dict = {}
363    
364     for i in range(self.NumRows()):
365     value = self.ReadValue(i, col)
366     dict[value] = 0
367    
368     values = dict.keys()
369     values.sort()
370     return values
371    
372 frank 1025 def Width(self, col):
373     """Return the maximum width of values in the column
374    
375     The return value is the the maximum length of string representation
376     of the values in the column (represented by index or name)."""
377     max = 0
378    
379     type = self.column_map[col].type
380     index = self.column_map[col].index
381     values = [row[index] for row in self.data]
382     if not values:
383     return None
384    
385     if type == FIELDTYPE_DOUBLE:
386     prec = self.Precision(col)
387     format = "%%.%df" % prec
388     elif type == FIELDTYPE_INT:
389     format = "%d"
390     else:
391     format = "%s"
392     for value in values:
393     l = len(format % value)
394     if l > max:
395     max = l
396    
397     return max
398    
399     def Precision(self, col):
400     """Return the precision of the column
401    
402     The return value is the maximum number of numeric characters after the
403     decimal if column type is double. Else precision zero is returned.
404     The column can be represented by index or name.
405     """
406    
407     type = self.column_map[col].type
408     if type == FIELDTYPE_DOUBLE:
409     index = self.column_map[col].index
410     values = [row[index] for row in self.data]
411     if not values:
412     return 0
413    
414     max = 0
415     for value in values:
416     l = len(str(value % 1))
417     if l > max:
418     max = l
419     if max > 2:
420     return max - 2
421     else:
422     return 0
423     else:
424     return 0
425    
426 bh 984 def Dependencies(self):
427     """Return an empty sequence. The MemoryTable doesn't depend on anything
428     """
429     return ()
430 bh 818
431     def write_record(self, record, values):
432     # TODO: Check for correct lenght and perhaps also
433     # for correct types in case values is a tuple. How to report problems?
434     # TODO: Allow values to be a dictionary and write the single
435     # fields that are specified.
436     self.data[record] = values
437 frank 1025
438    
439     def table_to_dbf(table, filename):
440     """Create the dbf file filename from the table"""
441     dbf = dbflib.create(filename)
442    
443     dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
444     FIELDTYPE_INT: dbflib.FTInteger,
445     FIELDTYPE_DOUBLE: dbflib.FTDouble}
446    
447     # Initialise the header. Distinguish between DBFTable and others.
448     for col in table.Columns():
449     prec = table.Precision(col.name)
450     width = table.Width(col.name)
451     dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec)
452    
453     for i in range(table.NumRows()):
454     record = table.ReadRowAsDict(i)
455     dbf.write_record(i, record)
456     dbf.close()
457    
458     def table_to_csv(table, filename):
459     """Export table to csv file."""
460    
461     file = open(filename,"w")
462     columns = table.Columns()
463     if columns:
464     header = "#%s" % columns[0].name
465     for col in columns[1:]:
466     header = header + ",%s" % col.name
467     header = header + "\n"
468     file.write(header)
469    
470     for i in range(table.NumRows()):
471     record = table.ReadRowAsDict(i)
472     if len(record):
473     line = "%s" % record[columns[0].name]
474     for col in columns[1:]:
475     line = line + ",%s" % record[col.name]
476     line = line + "\n"
477     file.write(line)
478     file.close()
479    

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26