/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 765 by bh, Tue Apr 29 12:42:14 2003 UTC revision 1599 by bh, Mon Aug 18 12:45:28 2003 UTC
# Line 1  Line 1 
1  # Copyright (c) 2001, 2002, 2003 by Intevation GmbH  # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4    # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 11  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17    import inspect
18    import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 24  dbflib_fieldtypes = {dbflib.FTString: FI Line 32  dbflib_fieldtypes = {dbflib.FTString: FI
32                       dbflib.FTInteger: FIELDTYPE_INT,                       dbflib.FTInteger: FIELDTYPE_INT,
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
35  class DBFTable:  
36    class OldTableInterfaceMixin:
37    
38        """Mixin to implement the old table interface using the new one"""
39    
40        def __deprecation_warning(self):
41            """Issue a DeprecationWarning for code hat uses the old interface"""
42            callername = inspect.currentframe().f_back.f_code.co_name
43            warnings.warn("The %s method of the old table interface"
44                          " is deprecated" % callername,
45                          DeprecationWarning, stacklevel = 3)
46    
47        def record_count(self):
48            self.__deprecation_warning()
49            return self.NumRows()
50    
51        def field_count(self):
52            self.__deprecation_warning()
53            return self.NumColumns()
54    
55        def field_info(self, field):
56            """Return a tuple (type, name, width, prec) for the field no. field
57    
58            type is the data type of the field, name the name, width the
59            field width in characters and prec the decimal precision. width
60            and prec will be zero if the information returned by the Column
61            method doesn't provide values for them.
62            """
63            self.__deprecation_warning()
64            col = self.Column(field)
65            return (col.type, col.name,
66                   getattr(col, "width", 0), getattr(col, "prec", 0))
67    
68        def field_info_by_name(self, col):
69            self.__deprecation_warning()
70            try:
71                return self.field_info(col)
72            except KeyError:
73                # FIXME: It may be that field_info raises other exceptions
74                # when the name is not a valid column name.
75                return None
76    
77        def field_range(self, fieldName):
78            self.__deprecation_warning()
79            min, max = self.ValueRange(fieldName)
80            return ((min, None), (max, None))
81    
82        def GetUniqueValues(self, field):
83            self.__deprecation_warning()
84            return self.UniqueValues(field)
85    
86        def read_record(self, r):
87            self.__deprecation_warning()
88            return self.ReadRowAsDict(r)
89    
90    
91    
92    class DBFColumn:
93    
94        """Description of a column in a DBFTable
95    
96        Instances have the following public attributes:
97    
98        name -- Name of the column
99        type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100                FIELDTYPE_DOUBLE)
101        index -- The index of the column
102        width -- the width of the data in the column
103        prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104        """
105    
106        def __init__(self, name, type, width, prec, index):
107            self.name = name
108            self.type = type
109            self.width = width
110            self.prec = prec
111            self.index = index
112    
113    
114    class DBFTable(TitledObject, OldTableInterfaceMixin):
115    
116      """      """
117      Table interface for the data in a DBF file      Table interface for the data in a DBF file
# Line 45  class DBFTable: Line 132  class DBFTable:
132      # work because a DBF file object buffers some data      # work because a DBF file object buffers some data
133    
134      def __init__(self, filename):      def __init__(self, filename):
135          self.filename = filename          self.filename = os.path.abspath(filename)
136    
137            # Omit the extension in the title as it's not really needed and
138            # it can be confusing because dbflib removes extensions and
139            # appends some variations of '.dbf' before it tries to open the
140            # file. So the title could be e.g. myshapefile.shp when the real
141            # filename is myshapefile.dbf
142            title = os.path.splitext(os.path.basename(self.filename))[0]
143            TitledObject.__init__(self, title)
144    
145          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
146    
147          # If true, self.dbf is open for writing.          # If true, self.dbf is open for writing.
148          self._writable = 0          self._writable = 0
149    
150      def Destroy(self):          # Create the column information objects
151          self.dbf.close()          self.columns = []
152          self.dbf = None          self.column_map = {}
153            for i in range(self.NumColumns()):
154                ftype, name, width, prec = self.dbf.field_info(i)
155                ftype = dbflib_fieldtypes[ftype]
156                index = len(self.columns)
157                col = DBFColumn(name, ftype, width, prec, index)
158                self.columns.append(col)
159                self.column_map[name] = col
160                self.column_map[index] = col
161    
162      def record_count(self):      def NumRows(self):
163          """Return the number of records"""          """Return the number of rows in the table"""
164          return self.dbf.record_count()          return self.dbf.record_count()
165    
166      def field_count(self):      def NumColumns(self):
167          """Return the number of fields in a record"""          """Return the number of columns in the table"""
168          return self.dbf.field_count()          return self.dbf.field_count()
169    
170      def field_info(self, field):      def Columns(self):
171          """Return a tuple (type, name, width, prec) for the field no. field          """Return the table's colum definitions
172    
173          type is the data type of the field, name the name, width the          The return value is a sequence of DBFColumn instances, one for
174          field width in characters and prec the decimal precision.          each column.
175          """          """
176          type, name, width, prec = self.dbf.field_info(field)          return self.columns
         type = dbflib_fieldtypes[type]  
         return type, name, width, prec  
   
     def field_info_by_name(self, fieldName):  
         count = self.field_count()  
177    
178          for i in range(count):      def Column(self, col):
179              info = self.field_info(i)          """Return information about the column given by its name or index
             if info[1] == fieldName:  
                 return info  
180    
181          return None          The returned object is an instance of DBFColumn
182            """
183      def field_range(self, fieldName):          return self.column_map[col]
         """Finds the first occurences of the minimum and maximum values  
         in the table for the given field.  
184    
185          This assumes that the standard comparison operators (<, >, etc.)      def HasColumn(self, col):
186          will work for the given data.          """Return whether the table has a column with the given name or index
187            """
188            return self.column_map.has_key(col)
189    
190          Returns a tuple ((min, rec), (max, rec)) where:      def ReadRowAsDict(self, row):
191              min is the minimum value          """Return the entire row as a dictionary with column names as keys"""
192              max is the maximum value          return self.dbf.read_record(row)
             rec is the record number where the value was found. One  
                 should check that the record number of min is not  
                 the same as the record number of max.  
193    
194          Returns None if there are no records      def ReadValue(self, row, col):
195            """Return the value of the specified row and column
196    
197            The col parameter may be the index of the column or its name.
198          """          """
199            return self.dbf.read_record(row)[self.column_map[col].name]
200    
201        def ValueRange(self, col):
202            """Return the minimum and maximum values of the values in the column
203    
204          count = self.record_count()          The return value is a tuple (min, max) unless the table is empty
205            in which case the return value is None.
206            """
207            count = self.NumRows()
208    
209          if count == 0:          if count == 0:
210              return None              return None
211    
212          rec = self.read_record(0)          min = max = self.ReadValue(0, col)
   
         min = rec[fieldName]  
         min_rec = 0  
   
         max = rec[fieldName]  
         max_rec = 0  
   
213          for i in range(1, count):          for i in range(1, count):
214              rec = self.read_record(i)              value = self.ReadValue(i, col)
215              data = rec[fieldName]              if value < min:
216                    min = value
217                elif value > max:
218                    max = value
219    
220              if data < min:          return (min, max)
                 min = data  
                 min_rec = rec  
             elif data > max:  
                 max = data  
                 max_rec = rec  
   
         return ((min, min_rec), (max, max_rec))  
   
     def GetUniqueValues(self, fieldName):  
         """Return a list of all unique entries in the table for the given  
         field name.  
         """  
221    
222        def UniqueValues(self, col):
223            """Return a sorted list of all unique values in the column col"""
224          dict = {}          dict = {}
225    
226          for i in range(0, self.record_count()):          for i in range(self.NumRows()):
227              rec = self.read_record(i)              value = self.ReadValue(i, col)
228              data = rec[fieldName]              dict[value] = 0
229    
230              if not dict.has_key(data):          values = dict.keys()
231                  dict[data] = 0          values.sort()
232            return values
233    
234        def Dependencies(self):
235            """Return an empty sequence. The DBFTable doesn't depend on anything"""
236            return ()
237    
238        # DBF specific interface parts.
239    
240        def Width(self, col):
241            """Return column width"""
242            return self.column_map[col].width
243    
244          return dict.keys()      def Destroy(self):
245            self.dbf.close()
246      def read_record(self, record):          self.dbf = None
         """Return the record no. record as a dict mapping field names to values  
         """  
         return self.dbf.read_record(record)  
247    
248      def write_record(self, record, values):      def write_record(self, record, values):
249          """Write the values into the record          """Write the values into the record
# Line 170  class DBFTable: Line 266  class DBFTable:
266          self.dbf.write_record(record, values)          self.dbf.write_record(record, values)
267          self.dbf.commit()          self.dbf.commit()
268    
269        def FileName(self):
270            """Return the filename the DBFTable was instantiated with"""
271            return self.filename
272    
273    
274    class MemoryColumn:
275    
276        def __init__(self, name, type, index):
277            self.name = name
278            self.type = type
279            self.index = index
280    
281    class MemoryTable(TitledObject, OldTableInterfaceMixin):
282    
283        """Very simple table implementation that operates on a list of tuples"""
284    
285        def __init__(self, fields, data):
286            """Initialize the MemoryTable
287    
288            Parameters:
289            fields -- List of (name, field_type) pairs
290            data -- List of tuples, one for each row of data
291            """
292            self.data = data
293            title = 'MemoryTable'
294            TitledObject.__init__(self, title)
295    
296            # Create the column information objects
297            self.columns = []
298            self.column_map = {}
299            for name, ftype in fields:
300                index = len(self.columns)
301                col = MemoryColumn(name, ftype, index)
302                self.columns.append(col)
303                self.column_map[name] = col
304                self.column_map[index] = col
305    
306        def NumColumns(self):
307            """Return the number of columns in the table"""
308            return len(self.columns)
309    
310        def Column(self, col):
311            """Return information about the column given by its name or index
312    
313            The returned object is an instance of MemoryColumn.
314            """
315            return self.column_map[col]
316    
317        def Columns(self):
318            """Return the table's colum definitions
319    
320            The return value is a sequence of MemoryColumn instances, one
321            for each column.
322            """
323            return self.columns
324    
325        def HasColumn(self, col):
326            """Return whether the table has a column with the given name or index
327            """
328            return self.column_map.has_key(col)
329    
330        def NumRows(self):
331            """Return the number of rows in the table"""
332            return len(self.data)
333    
334        def ReadValue(self, row, col):
335            """Return the value of the specified row and column
336    
337            The col parameter may be the index of the column or its name.
338            """
339            return self.data[row][self.column_map[col].index]
340    
341        def ReadRowAsDict(self, index):
342            """Return the entire row as a dictionary with column names as keys"""
343            return dict([(col.name, self.data[index][col.index])
344                          for col in self.columns])
345    
346        def ValueRange(self, col):
347            """Return the minimum and maximum values of the values in the column
348    
349            The return value is a tuple (min, max) unless the table is empty
350            in which case the return value is None.
351            """
352    
353            index = self.column_map[col].index
354            values = [row[index] for row in self.data]
355            if not values:
356                return None
357    
358            return min(values), max(values)
359    
360        def UniqueValues(self, col):
361            """Return a sorted list of all unique values in the column col
362    
363            col can be either column index or name.
364            """
365            dict = {}
366    
367            for i in range(self.NumRows()):
368                value = self.ReadValue(i, col)
369                dict[value] = 0
370    
371            values = dict.keys()
372            values.sort()
373            return values
374    
375        def Width(self, col):
376            """Return the maximum width of values in the column
377    
378            The return value is the the maximum length of string
379            representation of the values in the column (represented by index
380            or name).
381            """
382            max = 0
383    
384            type  = self.column_map[col].type
385            index = self.column_map[col].index
386            values = [row[index] for row in self.data]
387            if not values:
388                return None
389    
390            if type == FIELDTYPE_DOUBLE:
391                format = "%.12f"
392            elif type == FIELDTYPE_INT:
393                format = "%d"
394            else:
395                format = "%s"
396            for value in values:
397                l = len(format % value)
398                if l > max:
399                    max = l
400    
401            return max
402    
403        def Dependencies(self):
404            """Return an empty sequence. The MemoryTable doesn't depend on anything
405            """
406            return ()
407    
408        def write_record(self, record, values):
409            # TODO: Check for correct lenght and perhaps also
410            # for correct types in case values is a tuple. How to report problems?
411            # TODO: Allow values to be a dictionary and write the single
412            # fields that are specified.
413            self.data[record] = values
414    
415    
416    
417    def _find_dbf_column_names(names):
418        """Determine the column names to use in a DBF file
419    
420        DBF files have a length limit of 10 characters on the column names
421        so when writing an arbitrary Thuban table to a DBF file we may have
422        we may have to rename some of the columns making sure that they're
423        unique in the DBF file too.
424    
425        Names that are already short enough will stay the same. Longer names
426        will be truncated to 10 characters or if that isn't unique it will
427        be truncated more and filled up with digits.
428    
429        The parameter names should be a list of the column names. The return
430        value will be a dictionary mapping the names in the input list to
431        the names to use in the DBF file.
432        """
433        # mapping from the original names in table to the names in the DBF
434        # file
435        name_map = {}
436    
437        # First, we keep all names that are already short enough
438        for i in range(len(names) - 1, -1, -1):
439            if len(names[i]) <= 10:
440                name_map[names[i]] = names[i]
441                del names[i]
442    
443        # dict used as a set of all names already used as DBF column names
444        used = name_map.copy()
445    
446        # Go through all longer names. If the name truncated to 10
447        # characters is not used already, we use that. Otherwise we truncate
448        # it more and append numbers until we get an unused name
449        for name in names:
450            truncated = name[:10]
451            num = 0; numstr = ""
452            #print "truncated", truncated, num
453            while truncated in used and len(numstr) < 10:
454                num += 1
455                numstr = str(num)
456                truncated = name[:10 - len(numstr)] + numstr
457                #print "truncated", truncated, num
458            if len(numstr) >= 10:
459                # This case should never happen in practice as tables with
460                # 10^10 columns seem very unlikely :)
461                raise ValueError("Can't find unique dbf column name")
462    
463            name_map[name] = truncated
464            used[truncated] = 1
465    
466        return name_map
467    
468    def table_to_dbf(table, filename, rows = None):
469        """Create the dbf file filename from the table.
470        
471        If rows is not None (the default) then it must be a list of row
472        indices to be saved to the file, otherwise all rows are saved.
473        """
474    
475        dbf = dbflib.create(filename)
476    
477        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
478                             FIELDTYPE_INT: dbflib.FTInteger,
479                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
480    
481    
482        name_map = _find_dbf_column_names([col.name for col in table.Columns()])
483    
484        # Initialise the header. Distinguish between DBFTable and others.
485        for col in table.Columns():
486            width = table.Width(col.name)
487            if col.type == FIELDTYPE_DOUBLE:
488                prec = getattr(col, "prec", 12)
489            else:
490                prec = 0
491            dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
492                          width, prec)
493    
494        if rows is None:
495            rows = range(table.NumRows())
496    
497        recNum = 0
498        for i in rows:
499            record = {}
500            for key, value in table.ReadRowAsDict(i).items():
501                record[name_map[key]] = value
502            dbf.write_record(recNum, record)
503            recNum += 1
504        dbf.close()
505    
506    def table_to_csv(table, filename, rows = None):
507        """Export table to csv file.
508        
509        If rows is not None (the default) then it must be a list of row
510        indices to be saved to the file, otherwise all rows are saved.
511        """
512    
513        file = open(filename,"w")
514        columns = table.Columns()
515        if columns:
516            header = "#%s" % columns[0].name
517            for col in columns[1:]:
518                header = header + ",%s" % col.name
519            header = header + "\n"
520            file.write(header)
521    
522            if rows is None:
523                rows = range(table.NumRows())
524    
525            for i in rows:
526                record = table.ReadRowAsDict(i)
527                if len(record):
528                    line = "%s" % record[columns[0].name]
529                    for col in columns[1:]:
530                        line = line + ",%s" % record[col.name]
531                line = line + "\n"
532                file.write(line)
533        file.close()
534    
 # Temporary backwards compatibility  
 Table = DBFTable  

Legend:
Removed from v.765  
changed lines
  Added in v.1599

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26