/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 806 by jan, Fri May 2 16:43:59 2003 UTC revision 1662 by bh, Wed Aug 27 13:51:01 2003 UTC
# Line 2  Line 2 
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4  # Jan-Oliver Wagner <[email protected]>  # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 12  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17    import inspect
18    import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 25  dbflib_fieldtypes = {dbflib.FTString: FI Line 32  dbflib_fieldtypes = {dbflib.FTString: FI
32                       dbflib.FTInteger: FIELDTYPE_INT,                       dbflib.FTInteger: FIELDTYPE_INT,
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
 class MemoryTable:  
35    
36      """Quite simple table implementation that operates on a list of tuples.  class OldTableInterfaceMixin:
     All of the data are kept in the memory."""  
37    
38      def __init__(self, fields, data):      """Mixin to implement the old table interface using the new one"""
         """Initialize the MemoryTable  
39    
40          Parameters:      def __deprecation_warning(self):
41          fields -- List of (name, field_type) pairs          """Issue a DeprecationWarning for code hat uses the old interface"""
42          data -- List of tuples, one for each row of data          callername = inspect.currentframe().f_back.f_code.co_name
43          """          warnings.warn("The %s method of the old table interface"
44          self.fields = fields                        " is deprecated" % callername,
45          self.data = data                        DeprecationWarning, stacklevel = 3)
46    
47        def record_count(self):
48            self.__deprecation_warning()
49            return self.NumRows()
50    
51      def field_count(self):      def field_count(self):
52          return len(self.fields)          self.__deprecation_warning()
53            return self.NumColumns()
54    
55      def field_info(self, index):      def field_info(self, field):
56          name, type = self.fields[index]          """Return a tuple (type, name, width, prec) for the field no. field
         return (type, name)  
57    
58      def record_count(self):          type is the data type of the field, name the name, width the
59          return len(self.data)          field width in characters and prec the decimal precision. width
60            and prec will be zero if the information returned by the Column
61            method doesn't provide values for them.
62            """
63            self.__deprecation_warning()
64            col = self.Column(field)
65            return (col.type, col.name,
66                   getattr(col, "width", 0), getattr(col, "prec", 0))
67    
68        def field_info_by_name(self, col):
69            self.__deprecation_warning()
70            try:
71                return self.field_info(col)
72            except KeyError:
73                # FIXME: It may be that field_info raises other exceptions
74                # when the name is not a valid column name.
75                return None
76    
77      def read_record(self, index):      def field_range(self, fieldName):
78          return dict([(self.fields[i][0], self.data[index][i])          self.__deprecation_warning()
79                        for i in range(len(self.fields))])          min, max = self.ValueRange(fieldName)
80            return ((min, None), (max, None))
81    
82      def write_record(self, record, values):      def GetUniqueValues(self, field):
83          # TODO: Check for correct lenght and perhaps also          self.__deprecation_warning()
84          # for correct types in case values is a tuple. How to report problems?          return self.UniqueValues(field)
85          # TODO: Allow values to be a dictionary and write the single  
86          # fields that are specified.      def read_record(self, r):
87          self.data[record] = values          self.__deprecation_warning()
88            return self.ReadRowAsDict(r)
89    
90    
91    
92    class DBFColumn:
93    
94        """Description of a column in a DBFTable
95    
96        Instances have the following public attributes:
97    
98        name -- Name of the column
99        type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100                FIELDTYPE_DOUBLE)
101        index -- The index of the column
102        width -- the width of the data in the column
103        prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104        """
105    
106        def __init__(self, name, type, width, prec, index):
107            self.name = name
108            self.type = type
109            self.width = width
110            self.prec = prec
111            self.index = index
112    
113    
114  class DBFTable:  class DBFTable(TitledObject, OldTableInterfaceMixin):
115    
116      """      """
117      Table interface for the data in a DBF file      Table interface for the data in a DBF file
# Line 83  class DBFTable: Line 132  class DBFTable:
132      # work because a DBF file object buffers some data      # work because a DBF file object buffers some data
133    
134      def __init__(self, filename):      def __init__(self, filename):
135          self.filename = filename          self.filename = os.path.abspath(filename)
136    
137            # Omit the extension in the title as it's not really needed and
138            # it can be confusing because dbflib removes extensions and
139            # appends some variations of '.dbf' before it tries to open the
140            # file. So the title could be e.g. myshapefile.shp when the real
141            # filename is myshapefile.dbf
142            title = os.path.splitext(os.path.basename(self.filename))[0]
143            TitledObject.__init__(self, title)
144    
145          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
146    
147          # If true, self.dbf is open for writing.          # If true, self.dbf is open for writing.
148          self._writable = 0          self._writable = 0
149    
150      def Destroy(self):          # Create the column information objects
151          self.dbf.close()          self.columns = []
152          self.dbf = None          self.column_map = {}
153            for i in range(self.NumColumns()):
154                ftype, name, width, prec = self.dbf.field_info(i)
155                ftype = dbflib_fieldtypes[ftype]
156                index = len(self.columns)
157                col = DBFColumn(name, ftype, width, prec, index)
158                self.columns.append(col)
159                self.column_map[name] = col
160                self.column_map[index] = col
161    
162      def record_count(self):      def NumRows(self):
163          """Return the number of records"""          """Return the number of rows in the table"""
164          return self.dbf.record_count()          return self.dbf.record_count()
165    
166      def field_count(self):      def NumColumns(self):
167          """Return the number of fields in a record"""          """Return the number of columns in the table"""
168          return self.dbf.field_count()          return self.dbf.field_count()
169    
170      def field_info(self, field):      def Columns(self):
171          """Return a tuple (type, name, width, prec) for the field no. field          """Return the table's colum definitions
172    
173          type is the data type of the field, name the name, width the          The return value is a sequence of DBFColumn instances, one for
174          field width in characters and prec the decimal precision.          each column.
175          """          """
176          type, name, width, prec = self.dbf.field_info(field)          return self.columns
         type = dbflib_fieldtypes[type]  
         return type, name, width, prec  
177    
178      def field_info_by_name(self, fieldName):      def Column(self, col):
179          count = self.field_count()          """Return information about the column given by its name or index
180    
181          for i in range(count):          The returned object is an instance of DBFColumn
182              info = self.field_info(i)          """
183              if info[1] == fieldName:          return self.column_map[col]
                 return info  
184    
185          return None      def HasColumn(self, col):
186            """Return whether the table has a column with the given name or index
187            """
188            return self.column_map.has_key(col)
189    
190      def field_range(self, fieldName):      def RowIdToOrdinal(self, gid):
191          """Finds the first occurences of the minimum and maximum values          """Return the row ordinal given its id
         in the table for the given field.  
192    
193          This assumes that the standard comparison operators (<, >, etc.)          Since for DBFTables the row id is the row number, return the
194          will work for the given data.          value unchanged.
195            """
196            return gid
197    
198        def RowOrdinalToId(self, num):
199            """Return the rowid for given its ordinal
200    
201          Returns a tuple ((min, rec), (max, rec)) where:          Since for DBFTables the row id is the row number, return the
202              min is the minimum value          value unchanged.
203              max is the maximum value          """
204              rec is the record number where the value was found. One          return num
                 should check that the record number of min is not  
                 the same as the record number of max.  
205    
206          Returns None if there are no records      def ReadRowAsDict(self, row, row_is_ordinal = 0):
207            """Return the entire row as a dictionary with column names as keys
208    
209            The row_is_ordinal is ignored for DBF tables because the row id
210            is always the row number.
211          """          """
212            return self.dbf.read_record(row)
213    
214        def ReadValue(self, row, col, row_is_ordinal = 0):
215            """Return the value of the specified row and column
216    
217          count = self.record_count()          The col parameter may be the index of the column or its name.
218    
219          if count == 0:          The row_is_ordinal is ignored for DBF tables because the row id
220              return None          is always the row number.
221            """
222            return self.dbf.read_record(row)[self.column_map[col].name]
223    
224          rec = self.read_record(0)      def ValueRange(self, col):
225            """Return the minimum and maximum values of the values in the column
226    
227          min = rec[fieldName]          The return value is a tuple (min, max) unless the table is empty
228          min_rec = 0          in which case the return value is None.
229            """
230            count = self.NumRows()
231    
232          max = rec[fieldName]          if count == 0:
233          max_rec = 0              return None
234    
235            min = max = self.ReadValue(0, col)
236          for i in range(1, count):          for i in range(1, count):
237              rec = self.read_record(i)              value = self.ReadValue(i, col)
238              data = rec[fieldName]              if value < min:
239                    min = value
240                elif value > max:
241                    max = value
242    
243              if data < min:          return (min, max)
                 min = data  
                 min_rec = rec  
             elif data > max:  
                 max = data  
                 max_rec = rec  
   
         return ((min, min_rec), (max, max_rec))  
   
     def GetUniqueValues(self, fieldName):  
         """Return a list of all unique entries in the table for the given  
         field name.  
         """  
244    
245        def UniqueValues(self, col):
246            """Return a sorted list of all unique values in the column col"""
247          dict = {}          dict = {}
248    
249          for i in range(0, self.record_count()):          for i in range(self.NumRows()):
250              rec = self.read_record(i)              value = self.ReadValue(i, col)
251              data = rec[fieldName]              dict[value] = 0
252    
253              if not dict.has_key(data):          values = dict.keys()
254                  dict[data] = 0          values.sort()
255            return values
256    
257        def Dependencies(self):
258            """Return an empty sequence. The DBFTable doesn't depend on anything"""
259            return ()
260    
261        # DBF specific interface parts.
262    
263        def Width(self, col):
264            """Return column width"""
265            return self.column_map[col].width
266    
267          return dict.keys()      def Destroy(self):
268            self.dbf.close()
269      def read_record(self, record):          self.dbf = None
         """Return the record no. record as a dict mapping field names to values  
         """  
         return self.dbf.read_record(record)  
270    
271      def write_record(self, record, values):      def write_record(self, record, values):
272          """Write the values into the record          """Write the values into the record
# Line 208  class DBFTable: Line 289  class DBFTable:
289          self.dbf.write_record(record, values)          self.dbf.write_record(record, values)
290          self.dbf.commit()          self.dbf.commit()
291    
292        def FileName(self):
293            """Return the filename the DBFTable was instantiated with"""
294            return self.filename
295    
296    
297    class MemoryColumn:
298    
299        def __init__(self, name, type, index):
300            self.name = name
301            self.type = type
302            self.index = index
303    
304    class MemoryTable(TitledObject, OldTableInterfaceMixin):
305    
306        """Very simple table implementation that operates on a list of tuples"""
307    
308        def __init__(self, fields, data):
309            """Initialize the MemoryTable
310    
311            Parameters:
312            fields -- List of (name, field_type) pairs
313            data -- List of tuples, one for each row of data
314            """
315            self.data = data
316            title = 'MemoryTable'
317            TitledObject.__init__(self, title)
318    
319            # Create the column information objects
320            self.columns = []
321            self.column_map = {}
322            for name, ftype in fields:
323                index = len(self.columns)
324                col = MemoryColumn(name, ftype, index)
325                self.columns.append(col)
326                self.column_map[name] = col
327                self.column_map[index] = col
328    
329        def NumColumns(self):
330            """Return the number of columns in the table"""
331            return len(self.columns)
332    
333        def Column(self, col):
334            """Return information about the column given by its name or index
335    
336            The returned object is an instance of MemoryColumn.
337            """
338            return self.column_map[col]
339    
340        def Columns(self):
341            """Return the table's colum definitions
342    
343            The return value is a sequence of MemoryColumn instances, one
344            for each column.
345            """
346            return self.columns
347    
348        def HasColumn(self, col):
349            """Return whether the table has a column with the given name or index
350            """
351            return self.column_map.has_key(col)
352    
353        def NumRows(self):
354            """Return the number of rows in the table"""
355            return len(self.data)
356    
357        def RowIdToOrdinal(self, gid):
358            """Return the row ordinal given its id
359    
360            Since for MemoryTables the row id is the row number, return the
361            value unchanged.
362            """
363            return gid
364    
365        def RowOrdinalToId(self, num):
366            """Return the rowid for given its ordinal
367    
368            Since for MemoryTables the row id is the row number, return the
369            value unchanged.
370            """
371            return num
372    
373        def ReadValue(self, row, col, row_is_ordinal = 0):
374            """Return the value of the specified row and column
375    
376            The col parameter may be the index of the column or its name.
377    
378            The row_is_ordinal is ignored for DBF tables because the row id
379            is always the row number.
380            """
381            return self.data[row][self.column_map[col].index]
382    
383        def ReadRowAsDict(self, index, row_is_ordinal = 0):
384            """Return the entire row as a dictionary with column names as keys
385    
386            The row_is_ordinal is ignored for DBF tables because the row id
387            is always the row number.
388            """
389            return dict([(col.name, self.data[index][col.index])
390                          for col in self.columns])
391    
392        def ValueRange(self, col):
393            """Return the minimum and maximum values of the values in the column
394    
395            The return value is a tuple (min, max) unless the table is empty
396            in which case the return value is None.
397            """
398    
399            index = self.column_map[col].index
400            values = [row[index] for row in self.data]
401            if not values:
402                return None
403    
404            return min(values), max(values)
405    
406        def UniqueValues(self, col):
407            """Return a sorted list of all unique values in the column col
408    
409            col can be either column index or name.
410            """
411            dict = {}
412    
413            for i in range(self.NumRows()):
414                value = self.ReadValue(i, col)
415                dict[value] = 0
416    
417            values = dict.keys()
418            values.sort()
419            return values
420    
421        def Width(self, col):
422            """Return the maximum width of values in the column
423    
424            The return value is the the maximum length of string
425            representation of the values in the column (represented by index
426            or name).
427            """
428            max = 0
429    
430            type  = self.column_map[col].type
431            index = self.column_map[col].index
432            values = [row[index] for row in self.data]
433            if not values:
434                return None
435    
436            if type == FIELDTYPE_DOUBLE:
437                format = "%.12f"
438            elif type == FIELDTYPE_INT:
439                format = "%d"
440            else:
441                format = "%s"
442            for value in values:
443                l = len(format % value)
444                if l > max:
445                    max = l
446    
447            return max
448    
449        def Dependencies(self):
450            """Return an empty sequence. The MemoryTable doesn't depend on anything
451            """
452            return ()
453    
454        def write_record(self, record, values):
455            # TODO: Check for correct lenght and perhaps also
456            # for correct types in case values is a tuple. How to report problems?
457            # TODO: Allow values to be a dictionary and write the single
458            # fields that are specified.
459            self.data[record] = values
460    
461    
462    
463    def _find_dbf_column_names(names):
464        """Determine the column names to use in a DBF file
465    
466        DBF files have a length limit of 10 characters on the column names
467        so when writing an arbitrary Thuban table to a DBF file we may have
468        we may have to rename some of the columns making sure that they're
469        unique in the DBF file too.
470    
471        Names that are already short enough will stay the same. Longer names
472        will be truncated to 10 characters or if that isn't unique it will
473        be truncated more and filled up with digits.
474    
475        The parameter names should be a list of the column names. The return
476        value will be a dictionary mapping the names in the input list to
477        the names to use in the DBF file.
478        """
479        # mapping from the original names in table to the names in the DBF
480        # file
481        name_map = {}
482    
483        # First, we keep all names that are already short enough
484        for i in range(len(names) - 1, -1, -1):
485            if len(names[i]) <= 10:
486                name_map[names[i]] = names[i]
487                del names[i]
488    
489        # dict used as a set of all names already used as DBF column names
490        used = name_map.copy()
491    
492        # Go through all longer names. If the name truncated to 10
493        # characters is not used already, we use that. Otherwise we truncate
494        # it more and append numbers until we get an unused name
495        for name in names:
496            truncated = name[:10]
497            num = 0; numstr = ""
498            #print "truncated", truncated, num
499            while truncated in used and len(numstr) < 10:
500                num += 1
501                numstr = str(num)
502                truncated = name[:10 - len(numstr)] + numstr
503                #print "truncated", truncated, num
504            if len(numstr) >= 10:
505                # This case should never happen in practice as tables with
506                # 10^10 columns seem very unlikely :)
507                raise ValueError("Can't find unique dbf column name")
508    
509            name_map[name] = truncated
510            used[truncated] = 1
511    
512        return name_map
513    
514    def table_to_dbf(table, filename, rows = None):
515        """Create the dbf file filename from the table.
516        
517        If rows is not None (the default) then it must be a list of row
518        indices to be saved to the file, otherwise all rows are saved.
519        """
520    
521        dbf = dbflib.create(filename)
522    
523        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
524                             FIELDTYPE_INT: dbflib.FTInteger,
525                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
526    
527    
528        name_map = _find_dbf_column_names([col.name for col in table.Columns()])
529    
530        # Initialise the header. Distinguish between DBFTable and others.
531        for col in table.Columns():
532            width = table.Width(col.name)
533            if col.type == FIELDTYPE_DOUBLE:
534                prec = getattr(col, "prec", 12)
535            else:
536                prec = 0
537            dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
538                          width, prec)
539    
540        if rows is None:
541            rows = range(table.NumRows())
542    
543        recNum = 0
544        for i in rows:
545            record = {}
546            for key, value in table.ReadRowAsDict(i).items():
547                record[name_map[key]] = value
548            dbf.write_record(recNum, record)
549            recNum += 1
550        dbf.close()
551    
552    def table_to_csv(table, filename, rows = None):
553        """Export table to csv file.
554        
555        If rows is not None (the default) then it must be a list of row
556        indices to be saved to the file, otherwise all rows are saved.
557        """
558    
559        file = open(filename,"w")
560        columns = table.Columns()
561        if columns:
562            header = "#%s" % columns[0].name
563            for col in columns[1:]:
564                header = header + ",%s" % col.name
565            header = header + "\n"
566            file.write(header)
567    
568            if rows is None:
569                rows = range(table.NumRows())
570    
571            for i in rows:
572                record = table.ReadRowAsDict(i)
573                if len(record):
574                    line = "%s" % record[columns[0].name]
575                    for col in columns[1:]:
576                        line = line + ",%s" % record[col.name]
577                line = line + "\n"
578                file.write(line)
579        file.close()
580    
 # Temporary backwards compatibility  
 Table = DBFTable  

Legend:
Removed from v.806  
changed lines
  Added in v.1662

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26