/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 806 by jan, Fri May 2 16:43:59 2003 UTC revision 1025 by frank, Mon May 26 11:46:17 2003 UTC
# Line 2  Line 2 
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4  # Jan-Oliver Wagner <[email protected]>  # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 12  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17    import inspect
18    import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 25  dbflib_fieldtypes = {dbflib.FTString: FI Line 32  dbflib_fieldtypes = {dbflib.FTString: FI
32                       dbflib.FTInteger: FIELDTYPE_INT,                       dbflib.FTInteger: FIELDTYPE_INT,
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
 class MemoryTable:  
35    
36      """Quite simple table implementation that operates on a list of tuples.  class OldTableInterfaceMixin:
     All of the data are kept in the memory."""  
37    
38      def __init__(self, fields, data):      """Mixin to implement the old table interface using the new one"""
         """Initialize the MemoryTable  
39    
40          Parameters:      def __deprecation_warning(self):
41          fields -- List of (name, field_type) pairs          """Issue a DeprecationWarning for code hat uses the old interface"""
42          data -- List of tuples, one for each row of data          callername = inspect.currentframe().f_back.f_code.co_name
43          """          warnings.warn("The %s method of the old table interface"
44          self.fields = fields                        " is deprecated" % callername,
45          self.data = data                        DeprecationWarning, stacklevel = 3)
46    
47        def record_count(self):
48            self.__deprecation_warning()
49            return self.NumRows()
50    
51      def field_count(self):      def field_count(self):
52          return len(self.fields)          self.__deprecation_warning()
53            return self.NumColumns()
54    
55      def field_info(self, index):      def field_info(self, field):
56          name, type = self.fields[index]          """Return a tuple (type, name, width, prec) for the field no. field
         return (type, name)  
57    
58      def record_count(self):          type is the data type of the field, name the name, width the
59          return len(self.data)          field width in characters and prec the decimal precision. width
60            and prec will be zero if the information returned by the Column
61            method doesn't provide values for them.
62            """
63            self.__deprecation_warning()
64            col = self.Column(field)
65            return (col.type, col.name,
66                   getattr(col, "width", 0), getattr(col, "prec", 0))
67    
68        def field_info_by_name(self, col):
69            self.__deprecation_warning()
70            try:
71                return self.field_info(col)
72            except KeyError:
73                # FIXME: It may be that field_info raises other exceptions
74                # when the name is not a valid column name.
75                return None
76    
77        def field_range(self, fieldName):
78            self.__deprecation_warning()
79            min, max = self.ValueRange(fieldName)
80            return ((min, None), (max, None))
81    
82        def GetUniqueValues(self, field):
83            self.__deprecation_warning()
84            return self.UniqueValues(field)
85    
86        def read_record(self, r):
87            self.__deprecation_warning()
88            return self.ReadRowAsDict(r)
89    
     def read_record(self, index):  
         return dict([(self.fields[i][0], self.data[index][i])  
                       for i in range(len(self.fields))])  
90    
     def write_record(self, record, values):  
         # TODO: Check for correct lenght and perhaps also  
         # for correct types in case values is a tuple. How to report problems?  
         # TODO: Allow values to be a dictionary and write the single  
         # fields that are specified.  
         self.data[record] = values  
91    
92    class DBFColumn:
93    
94  class DBFTable:      """Description of a column in a DBFTable
95    
96        Instances have the following public attributes:
97    
98        name -- Name of the column
99        type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100                FIELDTYPE_DOUBLE)
101        index -- The index of the column
102        width -- the width of the data in the column
103        prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104        """
105    
106        def __init__(self, name, type, width, prec, index):
107            self.name = name
108            self.type = type
109            self.width = width
110            self.prec = prec
111            self.index = index
112    
113    
114    class DBFTable(TitledObject, OldTableInterfaceMixin):
115    
116      """      """
117      Table interface for the data in a DBF file      Table interface for the data in a DBF file
# Line 84  class DBFTable: Line 133  class DBFTable:
133    
134      def __init__(self, filename):      def __init__(self, filename):
135          self.filename = filename          self.filename = filename
136            title = os.path.basename(self.filename)
137            TitledObject.__init__(self, title)
138          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
139    
140          # If true, self.dbf is open for writing.          # If true, self.dbf is open for writing.
141          self._writable = 0          self._writable = 0
142    
143      def Destroy(self):          # Create the column information objects
144          self.dbf.close()          self.columns = []
145          self.dbf = None          self.column_map = {}
146            for i in range(self.NumColumns()):
147                ftype, name, width, prec = self.dbf.field_info(i)
148                ftype = dbflib_fieldtypes[ftype]
149                index = len(self.columns)
150                col = DBFColumn(name, ftype, width, prec, index)
151                self.columns.append(col)
152                self.column_map[name] = col
153                self.column_map[index] = col
154    
155      def record_count(self):      def NumRows(self):
156          """Return the number of records"""          """Return the number of rows in the table"""
157          return self.dbf.record_count()          return self.dbf.record_count()
158    
159      def field_count(self):      def NumColumns(self):
160          """Return the number of fields in a record"""          """Return the number of columns in the table"""
161          return self.dbf.field_count()          return self.dbf.field_count()
162    
163      def field_info(self, field):      def Columns(self):
164          """Return a tuple (type, name, width, prec) for the field no. field          """Return the table's colum definitions
165    
166          type is the data type of the field, name the name, width the          The return value is a sequence of DBFColumn instances, one for
167          field width in characters and prec the decimal precision.          each column.
168          """          """
169          type, name, width, prec = self.dbf.field_info(field)          return self.columns
         type = dbflib_fieldtypes[type]  
         return type, name, width, prec  
170    
171      def field_info_by_name(self, fieldName):      def Column(self, col):
172          count = self.field_count()          """Return information about the column given by its name or index
173    
174          for i in range(count):          The returned object is an instance of DBFColumn
175              info = self.field_info(i)          """
176              if info[1] == fieldName:          return self.column_map[col]
                 return info  
   
         return None  
   
     def field_range(self, fieldName):  
         """Finds the first occurences of the minimum and maximum values  
         in the table for the given field.  
177    
178          This assumes that the standard comparison operators (<, >, etc.)      def HasColumn(self, col):
179          will work for the given data.          """Return whether the table has a column with the given name or index
180            """
181            return self.column_map.has_key(col)
182    
183          Returns a tuple ((min, rec), (max, rec)) where:      def ReadRowAsDict(self, row):
184              min is the minimum value          """Return the entire row as a dictionary with column names as keys"""
185              max is the maximum value          return self.dbf.read_record(row)
             rec is the record number where the value was found. One  
                 should check that the record number of min is not  
                 the same as the record number of max.  
186    
187          Returns None if there are no records      def ReadValue(self, row, col):
188            """Return the value of the specified row and column
189    
190            The col parameter may be the index of the column or its name.
191          """          """
192            return self.dbf.read_record(row)[self.column_map[col].name]
193    
194        def ValueRange(self, col):
195            """Return the minimum and maximum values of the values in the column
196    
197          count = self.record_count()          The return value is a tuple (min, max) unless the table is empty
198            in which case the return value is None.
199            """
200            count = self.NumRows()
201    
202          if count == 0:          if count == 0:
203              return None              return None
204    
205          rec = self.read_record(0)          min = max = self.ReadValue(0, col)
   
         min = rec[fieldName]  
         min_rec = 0  
   
         max = rec[fieldName]  
         max_rec = 0  
   
206          for i in range(1, count):          for i in range(1, count):
207              rec = self.read_record(i)              value = self.ReadValue(i, col)
208              data = rec[fieldName]              if value < min:
209                    min = value
210                elif value > max:
211                    max = value
212    
213              if data < min:          return (min, max)
                 min = data  
                 min_rec = rec  
             elif data > max:  
                 max = data  
                 max_rec = rec  
   
         return ((min, min_rec), (max, max_rec))  
   
     def GetUniqueValues(self, fieldName):  
         """Return a list of all unique entries in the table for the given  
         field name.  
         """  
214    
215        def UniqueValues(self, col):
216            """Return a sorted list of all unique values in the column col"""
217          dict = {}          dict = {}
218    
219          for i in range(0, self.record_count()):          for i in range(self.NumRows()):
220              rec = self.read_record(i)              value = self.ReadValue(i, col)
221              data = rec[fieldName]              dict[value] = 0
222    
223            values = dict.keys()
224            values.sort()
225            return values
226    
227        def Dependencies(self):
228            """Return an empty sequence. The DBFTable doesn't depend on anything"""
229            return ()
230    
231        # DBF specific interface parts.
232    
233        def Precision(self, col):
234            """Return column precision"""
235            return self.column_map[col].prec
236    
237        def Width(self, col):
238            """Return column width"""
239            return self.column_map[col].width
240    
241              if not dict.has_key(data):      def Destroy(self):
242                  dict[data] = 0          self.dbf.close()
243            self.dbf = None
         return dict.keys()  
   
     def read_record(self, record):  
         """Return the record no. record as a dict mapping field names to values  
         """  
         return self.dbf.read_record(record)  
244    
245      def write_record(self, record, values):      def write_record(self, record, values):
246          """Write the values into the record          """Write the values into the record
# Line 208  class DBFTable: Line 263  class DBFTable:
263          self.dbf.write_record(record, values)          self.dbf.write_record(record, values)
264          self.dbf.commit()          self.dbf.commit()
265    
266        def FileName(self):
267            """Return the filename the DBFTable was instantiated with"""
268            return self.filename
269    
270    
271    class MemoryColumn:
272    
273        def __init__(self, name, type, index):
274            self.name = name
275            self.type = type
276            self.index = index
277    
278    class MemoryTable(TitledObject, OldTableInterfaceMixin):
279    
280        """Very simple table implementation that operates on a list of tuples"""
281    
282        def __init__(self, fields, data):
283            """Initialize the MemoryTable
284    
285            Parameters:
286            fields -- List of (name, field_type) pairs
287            data -- List of tuples, one for each row of data
288            """
289            self.data = data
290            title = 'MemoryTable'
291            TitledObject.__init__(self, title)
292    
293            # Create the column information objects
294            self.columns = []
295            self.column_map = {}
296            for name, ftype in fields:
297                index = len(self.columns)
298                col = MemoryColumn(name, ftype, index)
299                self.columns.append(col)
300                self.column_map[name] = col
301                self.column_map[index] = col
302    
303        def NumColumns(self):
304            """Return the number of columns in the table"""
305            return len(self.columns)
306    
307        def Column(self, col):
308            """Return information about the column given by its name or index
309    
310            The returned object is an instance of MemoryColumn.
311            """
312            return self.column_map[col]
313    
314        def Columns(self):
315            """Return the table's colum definitions
316    
317            The return value is a sequence of MemoryColumn instances, one
318            for each column.
319            """
320            return self.columns
321    
322        def HasColumn(self, col):
323            """Return whether the table has a column with the given name or index
324            """
325            return self.column_map.has_key(col)
326    
327        def NumRows(self):
328            """Return the number of rows in the table"""
329            return len(self.data)
330    
331        def ReadValue(self, row, col):
332            """Return the value of the specified row and column
333    
334            The col parameter may be the index of the column or its name.
335            """
336            return self.data[row][self.column_map[col].index]
337    
338        def ReadRowAsDict(self, index):
339            """Return the entire row as a dictionary with column names as keys"""
340            return dict([(col.name, self.data[index][col.index])
341                          for col in self.columns])
342    
343        def ValueRange(self, col):
344            """Return the minimum and maximum values of the values in the column
345    
346            The return value is a tuple (min, max) unless the table is empty
347            in which case the return value is None.
348            """
349    
350            index = self.column_map[col].index
351            values = [row[index] for row in self.data]
352            if not values:
353                return None
354    
355            return min(values), max(values)
356    
357        def UniqueValues(self, col):
358            """Return a sorted list of all unique values in the column col
359    
360            col can be either column index or name.
361            """
362            dict = {}
363    
364            for i in range(self.NumRows()):
365                value = self.ReadValue(i, col)
366                dict[value] = 0
367    
368            values = dict.keys()
369            values.sort()
370            return values
371    
372        def Width(self, col):
373            """Return the maximum width of values in the column
374    
375            The return value is the the maximum length of string representation
376            of the values in the column (represented by index or name)."""
377            max = 0
378            
379            type  = self.column_map[col].type
380            index = self.column_map[col].index
381            values = [row[index] for row in self.data]
382            if not values:
383                return None
384    
385            if type == FIELDTYPE_DOUBLE:
386                prec = self.Precision(col)
387                format = "%%.%df" % prec
388            elif type == FIELDTYPE_INT:
389                format = "%d"
390            else:
391                format = "%s"
392            for value in values:
393                l = len(format % value)
394                if l > max:
395                    max = l
396    
397            return max
398    
399        def Precision(self, col):
400            """Return the precision of the column
401    
402            The return value is the maximum number of numeric characters after the
403            decimal if column type is double. Else precision zero is returned.
404            The column can be represented by index or name.
405            """
406        
407            type  = self.column_map[col].type
408            if type == FIELDTYPE_DOUBLE:
409                index = self.column_map[col].index
410                values = [row[index] for row in self.data]
411                if not values:
412                    return 0
413                
414                max = 0
415                for value in values:
416                    l = len(str(value % 1))
417                    if l > max:
418                        max = l
419                if max > 2:
420                    return max - 2
421                else:
422                    return 0
423            else:
424                return 0
425              
426        def Dependencies(self):
427            """Return an empty sequence. The MemoryTable doesn't depend on anything
428            """
429            return ()
430    
431        def write_record(self, record, values):
432            # TODO: Check for correct lenght and perhaps also
433            # for correct types in case values is a tuple. How to report problems?
434            # TODO: Allow values to be a dictionary and write the single
435            # fields that are specified.
436            self.data[record] = values
437    
438    
439    def table_to_dbf(table, filename):
440        """Create the dbf file filename from the table"""
441        dbf = dbflib.create(filename)
442    
443        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
444                             FIELDTYPE_INT: dbflib.FTInteger,
445                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
446    
447        # Initialise the header. Distinguish between DBFTable and others.
448        for col in table.Columns():
449            prec  = table.Precision(col.name)
450            width = table.Width(col.name)
451            dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec)
452    
453        for i in range(table.NumRows()):
454            record = table.ReadRowAsDict(i)
455            dbf.write_record(i, record)
456        dbf.close()
457    
458    def table_to_csv(table, filename):
459        """Export table to csv file."""
460    
461        file = open(filename,"w")
462        columns = table.Columns()
463        if columns:
464            header = "#%s" % columns[0].name
465            for col in columns[1:]:
466                header = header + ",%s" % col.name
467            header = header + "\n"
468            file.write(header)
469    
470            for i in range(table.NumRows()):
471                record = table.ReadRowAsDict(i)
472                if len(record):
473                    line = "%s" % record[columns[0].name]
474                    for col in columns[1:]:
475                        line = line + ",%s" % record[col.name]
476                line = line + "\n"
477                file.write(line)
478        file.close()
479    
 # Temporary backwards compatibility  
 Table = DBFTable  

Legend:
Removed from v.806  
changed lines
  Added in v.1025

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26