/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 6 by bh, Tue Aug 28 15:41:52 2001 UTC revision 1025 by frank, Mon May 26 11:46:17 2003 UTC
# Line 1  Line 1 
1  # Copyright (c) 2001 by Intevation GmbH  # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4    # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 11  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17    import inspect
18    import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 24  dbflib_fieldtypes = {dbflib.FTString: FI Line 32  dbflib_fieldtypes = {dbflib.FTString: FI
32                       dbflib.FTInteger: FIELDTYPE_INT,                       dbflib.FTInteger: FIELDTYPE_INT,
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
 class Table:  
35    
36    class OldTableInterfaceMixin:
37    
38        """Mixin to implement the old table interface using the new one"""
39    
40        def __deprecation_warning(self):
41            """Issue a DeprecationWarning for code hat uses the old interface"""
42            callername = inspect.currentframe().f_back.f_code.co_name
43            warnings.warn("The %s method of the old table interface"
44                          " is deprecated" % callername,
45                          DeprecationWarning, stacklevel = 3)
46    
47        def record_count(self):
48            self.__deprecation_warning()
49            return self.NumRows()
50    
51        def field_count(self):
52            self.__deprecation_warning()
53            return self.NumColumns()
54    
55        def field_info(self, field):
56            """Return a tuple (type, name, width, prec) for the field no. field
57    
58            type is the data type of the field, name the name, width the
59            field width in characters and prec the decimal precision. width
60            and prec will be zero if the information returned by the Column
61            method doesn't provide values for them.
62            """
63            self.__deprecation_warning()
64            col = self.Column(field)
65            return (col.type, col.name,
66                   getattr(col, "width", 0), getattr(col, "prec", 0))
67    
68        def field_info_by_name(self, col):
69            self.__deprecation_warning()
70            try:
71                return self.field_info(col)
72            except KeyError:
73                # FIXME: It may be that field_info raises other exceptions
74                # when the name is not a valid column name.
75                return None
76    
77        def field_range(self, fieldName):
78            self.__deprecation_warning()
79            min, max = self.ValueRange(fieldName)
80            return ((min, None), (max, None))
81    
82        def GetUniqueValues(self, field):
83            self.__deprecation_warning()
84            return self.UniqueValues(field)
85    
86        def read_record(self, r):
87            self.__deprecation_warning()
88            return self.ReadRowAsDict(r)
89    
90    
91    
92    class DBFColumn:
93    
94        """Description of a column in a DBFTable
95    
96        Instances have the following public attributes:
97    
98        name -- Name of the column
99        type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100                FIELDTYPE_DOUBLE)
101        index -- The index of the column
102        width -- the width of the data in the column
103        prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104      """      """
     Represent a table of data.  
105    
106      Currently this is basically just a wrapper around dbflib.      def __init__(self, name, type, width, prec, index):
107            self.name = name
108            self.type = type
109            self.width = width
110            self.prec = prec
111            self.index = index
112    
113    
114    class DBFTable(TitledObject, OldTableInterfaceMixin):
115    
116        """
117        Table interface for the data in a DBF file
118      """      """
119    
120        # Implementation strategy regarding writing to a DBF file:
121        #
122        # Most of the time Thuban only needs to read from a table and it is
123        # important that Thuban can work with read-only files. Therefore the
124        # DBF file is opened only for reading initially. Only when
125        # write_record is called we try to open the DBF file for writing as
126        # well. If that succeeds the read/write DBF file will be used for
127        # all IO afterwards.
128        #
129        # It's important to use the same DBF file object for both reading
130        # and writing to make sure that reading a records after writing
131        # returns the new values. With two separate objects this wouldn't
132        # work because a DBF file object buffers some data
133    
134      def __init__(self, filename):      def __init__(self, filename):
135          self.filename = filename          self.filename = filename
136            title = os.path.basename(self.filename)
137            TitledObject.__init__(self, title)
138          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
139    
140      def record_count(self):          # If true, self.dbf is open for writing.
141          """Return the number of records"""          self._writable = 0
142    
143            # Create the column information objects
144            self.columns = []
145            self.column_map = {}
146            for i in range(self.NumColumns()):
147                ftype, name, width, prec = self.dbf.field_info(i)
148                ftype = dbflib_fieldtypes[ftype]
149                index = len(self.columns)
150                col = DBFColumn(name, ftype, width, prec, index)
151                self.columns.append(col)
152                self.column_map[name] = col
153                self.column_map[index] = col
154    
155        def NumRows(self):
156            """Return the number of rows in the table"""
157          return self.dbf.record_count()          return self.dbf.record_count()
158    
159      def field_count(self):      def NumColumns(self):
160          """Return the number of fields in a record"""          """Return the number of columns in the table"""
161          return self.dbf.field_count()          return self.dbf.field_count()
162    
163      def field_info(self, field):      def Columns(self):
164          """Return a tuple (type, name, width, prec) for the field no. field          """Return the table's colum definitions
165    
166          type is the data type of the field, name the name, width the          The return value is a sequence of DBFColumn instances, one for
167          field width in characters and prec the decimal precision.          each column.
168            """
169            return self.columns
170    
171        def Column(self, col):
172            """Return information about the column given by its name or index
173    
174            The returned object is an instance of DBFColumn
175            """
176            return self.column_map[col]
177    
178        def HasColumn(self, col):
179            """Return whether the table has a column with the given name or index
180          """          """
181          type, name, width, prec = self.dbf.field_info(field)          return self.column_map.has_key(col)
         type = dbflib_fieldtypes[type]  
         return type, name, width, prec  
182    
183      def read_record(self, record):      def ReadRowAsDict(self, row):
184          """Return the record no. record as a dict mapping field names to values          """Return the entire row as a dictionary with column names as keys"""
185            return self.dbf.read_record(row)
186    
187        def ReadValue(self, row, col):
188            """Return the value of the specified row and column
189    
190            The col parameter may be the index of the column or its name.
191            """
192            return self.dbf.read_record(row)[self.column_map[col].name]
193    
194        def ValueRange(self, col):
195            """Return the minimum and maximum values of the values in the column
196    
197            The return value is a tuple (min, max) unless the table is empty
198            in which case the return value is None.
199          """          """
200          return self.dbf.read_record(record)          count = self.NumRows()
201    
202            if count == 0:
203                return None
204    
205            min = max = self.ReadValue(0, col)
206            for i in range(1, count):
207                value = self.ReadValue(i, col)
208                if value < min:
209                    min = value
210                elif value > max:
211                    max = value
212    
213            return (min, max)
214    
215        def UniqueValues(self, col):
216            """Return a sorted list of all unique values in the column col"""
217            dict = {}
218    
219            for i in range(self.NumRows()):
220                value = self.ReadValue(i, col)
221                dict[value] = 0
222    
223            values = dict.keys()
224            values.sort()
225            return values
226    
227        def Dependencies(self):
228            """Return an empty sequence. The DBFTable doesn't depend on anything"""
229            return ()
230    
231        # DBF specific interface parts.
232    
233        def Precision(self, col):
234            """Return column precision"""
235            return self.column_map[col].prec
236    
237        def Width(self, col):
238            """Return column width"""
239            return self.column_map[col].width
240    
241        def Destroy(self):
242            self.dbf.close()
243            self.dbf = None
244    
245        def write_record(self, record, values):
246            """Write the values into the record
247    
248            The values parameter may either be a dictionary or a sequence.
249    
250            If it's a dictionary the keys must be the names of the fields
251            and their value must have a suitable type. Only the fields
252            actually contained in the dictionary are written. Fields for
253            which there's no item in the dict are not modified.
254    
255            If it's a sequence, all fields must be present in the right
256            order.
257            """
258            if not self._writable:
259                new_dbf = dbflib.DBFFile(self.filename, "r+b")
260                self.dbf.close()
261                self.dbf = new_dbf
262                self._writable = 1
263            self.dbf.write_record(record, values)
264            self.dbf.commit()
265    
266        def FileName(self):
267            """Return the filename the DBFTable was instantiated with"""
268            return self.filename
269    
270    
271    class MemoryColumn:
272    
273        def __init__(self, name, type, index):
274            self.name = name
275            self.type = type
276            self.index = index
277    
278    class MemoryTable(TitledObject, OldTableInterfaceMixin):
279    
280        """Very simple table implementation that operates on a list of tuples"""
281    
282        def __init__(self, fields, data):
283            """Initialize the MemoryTable
284    
285            Parameters:
286            fields -- List of (name, field_type) pairs
287            data -- List of tuples, one for each row of data
288            """
289            self.data = data
290            title = 'MemoryTable'
291            TitledObject.__init__(self, title)
292    
293            # Create the column information objects
294            self.columns = []
295            self.column_map = {}
296            for name, ftype in fields:
297                index = len(self.columns)
298                col = MemoryColumn(name, ftype, index)
299                self.columns.append(col)
300                self.column_map[name] = col
301                self.column_map[index] = col
302    
303        def NumColumns(self):
304            """Return the number of columns in the table"""
305            return len(self.columns)
306    
307        def Column(self, col):
308            """Return information about the column given by its name or index
309    
310            The returned object is an instance of MemoryColumn.
311            """
312            return self.column_map[col]
313    
314        def Columns(self):
315            """Return the table's colum definitions
316    
317            The return value is a sequence of MemoryColumn instances, one
318            for each column.
319            """
320            return self.columns
321    
322        def HasColumn(self, col):
323            """Return whether the table has a column with the given name or index
324            """
325            return self.column_map.has_key(col)
326    
327        def NumRows(self):
328            """Return the number of rows in the table"""
329            return len(self.data)
330    
331        def ReadValue(self, row, col):
332            """Return the value of the specified row and column
333    
334            The col parameter may be the index of the column or its name.
335            """
336            return self.data[row][self.column_map[col].index]
337    
338        def ReadRowAsDict(self, index):
339            """Return the entire row as a dictionary with column names as keys"""
340            return dict([(col.name, self.data[index][col.index])
341                          for col in self.columns])
342    
343        def ValueRange(self, col):
344            """Return the minimum and maximum values of the values in the column
345    
346            The return value is a tuple (min, max) unless the table is empty
347            in which case the return value is None.
348            """
349    
350            index = self.column_map[col].index
351            values = [row[index] for row in self.data]
352            if not values:
353                return None
354    
355            return min(values), max(values)
356    
357        def UniqueValues(self, col):
358            """Return a sorted list of all unique values in the column col
359    
360            col can be either column index or name.
361            """
362            dict = {}
363    
364            for i in range(self.NumRows()):
365                value = self.ReadValue(i, col)
366                dict[value] = 0
367    
368            values = dict.keys()
369            values.sort()
370            return values
371    
372        def Width(self, col):
373            """Return the maximum width of values in the column
374    
375            The return value is the the maximum length of string representation
376            of the values in the column (represented by index or name)."""
377            max = 0
378            
379            type  = self.column_map[col].type
380            index = self.column_map[col].index
381            values = [row[index] for row in self.data]
382            if not values:
383                return None
384    
385            if type == FIELDTYPE_DOUBLE:
386                prec = self.Precision(col)
387                format = "%%.%df" % prec
388            elif type == FIELDTYPE_INT:
389                format = "%d"
390            else:
391                format = "%s"
392            for value in values:
393                l = len(format % value)
394                if l > max:
395                    max = l
396    
397            return max
398    
399        def Precision(self, col):
400            """Return the precision of the column
401    
402            The return value is the maximum number of numeric characters after the
403            decimal if column type is double. Else precision zero is returned.
404            The column can be represented by index or name.
405            """
406            
407            type  = self.column_map[col].type
408            if type == FIELDTYPE_DOUBLE:
409                index = self.column_map[col].index
410                values = [row[index] for row in self.data]
411                if not values:
412                    return 0
413                
414                max = 0
415                for value in values:
416                    l = len(str(value % 1))
417                    if l > max:
418                        max = l
419                if max > 2:
420                    return max - 2
421                else:
422                    return 0
423            else:
424                return 0
425              
426        def Dependencies(self):
427            """Return an empty sequence. The MemoryTable doesn't depend on anything
428            """
429            return ()
430    
431        def write_record(self, record, values):
432            # TODO: Check for correct lenght and perhaps also
433            # for correct types in case values is a tuple. How to report problems?
434            # TODO: Allow values to be a dictionary and write the single
435            # fields that are specified.
436            self.data[record] = values
437    
438    
439    def table_to_dbf(table, filename):
440        """Create the dbf file filename from the table"""
441        dbf = dbflib.create(filename)
442    
443        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
444                             FIELDTYPE_INT: dbflib.FTInteger,
445                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
446    
447        # Initialise the header. Distinguish between DBFTable and others.
448        for col in table.Columns():
449            prec  = table.Precision(col.name)
450            width = table.Width(col.name)
451            dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec)
452    
453        for i in range(table.NumRows()):
454            record = table.ReadRowAsDict(i)
455            dbf.write_record(i, record)
456        dbf.close()
457    
458    def table_to_csv(table, filename):
459        """Export table to csv file."""
460    
461        file = open(filename,"w")
462        columns = table.Columns()
463        if columns:
464            header = "#%s" % columns[0].name
465            for col in columns[1:]:
466                header = header + ",%s" % col.name
467            header = header + "\n"
468            file.write(header)
469    
470            for i in range(table.NumRows()):
471                record = table.ReadRowAsDict(i)
472                if len(record):
473                    line = "%s" % record[columns[0].name]
474                    for col in columns[1:]:
475                        line = line + ",%s" % record[col.name]
476                line = line + "\n"
477                file.write(line)
478        file.close()
479    

Legend:
Removed from v.6  
changed lines
  Added in v.1025

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26