/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/thuban/Thuban/Model/table.py revision 765 by bh, Tue Apr 29 12:42:14 2003 UTC branches/WIP-pyshapelib-bramz/Thuban/Model/table.py revision 2734 by bramz, Thu Mar 1 12:42:59 2007 UTC
# Line 1  Line 1 
1  # Copyright (c) 2001, 2002, 2003 by Intevation GmbH  # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4    # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 11  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17    import inspect
18    import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 24  dbflib_fieldtypes = {dbflib.FTString: FI Line 32  dbflib_fieldtypes = {dbflib.FTString: FI
32                       dbflib.FTInteger: FIELDTYPE_INT,                       dbflib.FTInteger: FIELDTYPE_INT,
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
35  class DBFTable:  
36    class DBFColumn:
37    
38        """Description of a column in a DBFTable
39    
40        Instances have the following public attributes:
41    
42        name -- Name of the column
43        type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
44                FIELDTYPE_DOUBLE)
45        index -- The index of the column
46        width -- the width of the data in the column
47        prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
48        """
49    
50        def __init__(self, name, type, width, prec, index):
51            self.name = name
52            self.type = type
53            self.width = width
54            self.prec = prec
55            self.index = index
56    
57    
58    class DBFTable(TitledObject):
59    
60      """      """
61      Table interface for the data in a DBF file      Table interface for the data in a DBF file
# Line 45  class DBFTable: Line 76  class DBFTable:
76      # work because a DBF file object buffers some data      # work because a DBF file object buffers some data
77    
78      def __init__(self, filename):      def __init__(self, filename):
79          self.filename = filename          self.filename = os.path.abspath(filename)
80    
81            # Omit the extension in the title as it's not really needed and
82            # it can be confusing because dbflib removes extensions and
83            # appends some variations of '.dbf' before it tries to open the
84            # file. So the title could be e.g. myshapefile.shp when the real
85            # filename is myshapefile.dbf
86            title = os.path.splitext(os.path.basename(self.filename))[0]
87            TitledObject.__init__(self, title)
88    
89          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
90    
91          # If true, self.dbf is open for writing.          # If true, self.dbf is open for writing.
92          self._writable = 0          self._writable = 0
93    
94      def Destroy(self):          # Create the column information objects
95          self.dbf.close()          self.columns = []
96          self.dbf = None          self.column_map = {}
97            for i in range(self.NumColumns()):
98                ftype, name, width, prec = self.dbf.field_info(i)
99                ftype = dbflib_fieldtypes[ftype]
100                index = len(self.columns)
101                col = DBFColumn(name, ftype, width, prec, index)
102                self.columns.append(col)
103                self.column_map[name] = col
104                self.column_map[index] = col
105    
106      def record_count(self):      def NumRows(self):
107          """Return the number of records"""          """Return the number of rows in the table"""
108          return self.dbf.record_count()          return self.dbf.record_count()
109    
110      def field_count(self):      def NumColumns(self):
111          """Return the number of fields in a record"""          """Return the number of columns in the table"""
112          return self.dbf.field_count()          return self.dbf.field_count()
113    
114      def field_info(self, field):      def Columns(self):
115          """Return a tuple (type, name, width, prec) for the field no. field          """Return the table's colum definitions
116    
117          type is the data type of the field, name the name, width the          The return value is a sequence of DBFColumn instances, one for
118          field width in characters and prec the decimal precision.          each column.
119          """          """
120          type, name, width, prec = self.dbf.field_info(field)          return self.columns
         type = dbflib_fieldtypes[type]  
         return type, name, width, prec  
121    
122      def field_info_by_name(self, fieldName):      def Column(self, col):
123          count = self.field_count()          """Return information about the column given by its name or index
124    
125          for i in range(count):          The returned object is an instance of DBFColumn
126              info = self.field_info(i)          """
127              if info[1] == fieldName:          return self.column_map[col]
128                  return info  
129        def HasColumn(self, col):
130            """Return whether the table has a column with the given name or index
131            """
132            return self.column_map.has_key(col)
133    
134          return None      def RowIdToOrdinal(self, gid):
135            """Return the row ordinal given its id
136    
137      def field_range(self, fieldName):          Since for DBFTables the row id is the row number, return the
138          """Finds the first occurences of the minimum and maximum values          value unchanged.
139          in the table for the given field.          """
140            return gid
141    
142          This assumes that the standard comparison operators (<, >, etc.)      def RowOrdinalToId(self, num):
143          will work for the given data.          """Return the rowid for given its ordinal
144    
145          Returns a tuple ((min, rec), (max, rec)) where:          Since for DBFTables the row id is the row number, return the
146              min is the minimum value          value unchanged.
147              max is the maximum value          """
148              rec is the record number where the value was found. One          return num
                 should check that the record number of min is not  
                 the same as the record number of max.  
149    
150          Returns None if there are no records      def ReadRowAsDict(self, row, row_is_ordinal = 0):
151            """Return the entire row as a dictionary with column names as keys
152    
153            The row_is_ordinal is ignored for DBF tables because the row id
154            is always the row number.
155          """          """
156            return self.dbf.read_record(row)
157    
158        def ReadValue(self, row, col, row_is_ordinal = 0):
159            """Return the value of the specified row and column
160    
161          count = self.record_count()          The col parameter may be the index of the column or its name.
162    
163          if count == 0:          The row_is_ordinal is ignored for DBF tables because the row id
164              return None          is always the row number.
165            """
166            return self.dbf.read_attribute(row, self.column_map[col].index)
167    
168          rec = self.read_record(0)      def ValueRange(self, col):
169            """Return the minimum and maximum values of the values in the column
170    
171          min = rec[fieldName]          The return value is a tuple (min, max) unless the table is empty
172          min_rec = 0          in which case the return value is None.
173            """
174            count = self.NumRows()
175    
176          max = rec[fieldName]          if count == 0:
177          max_rec = 0              return None
178    
179            min = max = self.ReadValue(0, col)
180          for i in range(1, count):          for i in range(1, count):
181              rec = self.read_record(i)              value = self.ReadValue(i, col)
182              data = rec[fieldName]              if value < min:
183                    min = value
184                elif value > max:
185                    max = value
186    
187              if data < min:          return (min, max)
                 min = data  
                 min_rec = rec  
             elif data > max:  
                 max = data  
                 max_rec = rec  
   
         return ((min, min_rec), (max, max_rec))  
   
     def GetUniqueValues(self, fieldName):  
         """Return a list of all unique entries in the table for the given  
         field name.  
         """  
188    
189        def UniqueValues(self, col):
190            """Return a sorted list of all unique values in the column col"""
191          dict = {}          dict = {}
192    
193          for i in range(0, self.record_count()):          for i in range(self.NumRows()):
194              rec = self.read_record(i)              value = self.ReadValue(i, col)
195              data = rec[fieldName]              dict[value] = 0
196    
197            values = dict.keys()
198            values.sort()
199            return values
200    
201        def Dependencies(self):
202            """Return an empty sequence. The DBFTable doesn't depend on anything"""
203            return ()
204    
205        # DBF specific interface parts.
206    
207        def Width(self, col):
208            """Return column width"""
209            return self.column_map[col].width
210    
211              if not dict.has_key(data):      def Destroy(self):
212                  dict[data] = 0          self.dbf.close()
213            self.dbf = None
         return dict.keys()  
   
     def read_record(self, record):  
         """Return the record no. record as a dict mapping field names to values  
         """  
         return self.dbf.read_record(record)  
214    
215      def write_record(self, record, values):      def write_record(self, record, values):
216          """Write the values into the record          """Write the values into the record
# Line 170  class DBFTable: Line 233  class DBFTable:
233          self.dbf.write_record(record, values)          self.dbf.write_record(record, values)
234          self.dbf.commit()          self.dbf.commit()
235    
236        def FileName(self):
237            """Return the filename the DBFTable was instantiated with"""
238            return self.filename
239    
240    
241    class MemoryColumn:
242    
243        def __init__(self, name, type, index):
244            self.name = name
245            self.type = type
246            self.index = index
247    
248    class MemoryTable(TitledObject):
249    
250        """Very simple table implementation that operates on a list of tuples"""
251    
252        def __init__(self, fields, data):
253            """Initialize the MemoryTable
254    
255            Parameters:
256            fields -- List of (name, field_type) pairs
257            data -- List of tuples, one for each row of data
258            """
259            self.data = data
260            title = 'MemoryTable'
261            TitledObject.__init__(self, title)
262    
263            # Create the column information objects
264            self.columns = []
265            self.column_map = {}
266            for name, ftype in fields:
267                index = len(self.columns)
268                col = MemoryColumn(name, ftype, index)
269                self.columns.append(col)
270                self.column_map[name] = col
271                self.column_map[index] = col
272    
273        def NumColumns(self):
274            """Return the number of columns in the table"""
275            return len(self.columns)
276    
277        def Column(self, col):
278            """Return information about the column given by its name or index
279    
280            The returned object is an instance of MemoryColumn.
281            """
282            return self.column_map[col]
283    
284        def Columns(self):
285            """Return the table's colum definitions
286    
287            The return value is a sequence of MemoryColumn instances, one
288            for each column.
289            """
290            return self.columns
291    
292        def HasColumn(self, col):
293            """Return whether the table has a column with the given name or index
294            """
295            return self.column_map.has_key(col)
296    
297        def NumRows(self):
298            """Return the number of rows in the table"""
299            return len(self.data)
300    
301        def RowIdToOrdinal(self, gid):
302            """Return the row ordinal given its id
303    
304            Since for MemoryTables the row id is the row number, return the
305            value unchanged.
306            """
307            return gid
308    
309        def RowOrdinalToId(self, num):
310            """Return the rowid for given its ordinal
311    
312            Since for MemoryTables the row id is the row number, return the
313            value unchanged.
314            """
315            return num
316    
317        def ReadValue(self, row, col, row_is_ordinal = 0):
318            """Return the value of the specified row and column
319    
320            The col parameter may be the index of the column or its name.
321    
322            The row_is_ordinal is ignored for DBF tables because the row id
323            is always the row number.
324            """
325            return self.data[row][self.column_map[col].index]
326    
327        def ReadRowAsDict(self, index, row_is_ordinal = 0):
328            """Return the entire row as a dictionary with column names as keys
329    
330            The row_is_ordinal is ignored for DBF tables because the row id
331            is always the row number.
332            """
333            return dict([(col.name, self.data[index][col.index])
334                          for col in self.columns])
335    
336        def ValueRange(self, col):
337            """Return the minimum and maximum values of the values in the column
338    
339            The return value is a tuple (min, max) unless the table is empty
340            in which case the return value is None.
341            """
342    
343            index = self.column_map[col].index
344            values = [row[index] for row in self.data]
345            if not values:
346                return None
347    
348            return min(values), max(values)
349    
350        def UniqueValues(self, col):
351            """Return a sorted list of all unique values in the column col
352    
353            col can be either column index or name.
354            """
355            dict = {}
356    
357            for i in range(self.NumRows()):
358                value = self.ReadValue(i, col)
359                dict[value] = 0
360    
361            values = dict.keys()
362            values.sort()
363            return values
364    
365        def Width(self, col):
366            """Return the maximum width of values in the column
367    
368            The return value is the the maximum length of string
369            representation of the values in the column (represented by index
370            or name).
371            """
372            max = 0
373    
374            type  = self.column_map[col].type
375            index = self.column_map[col].index
376            values = [row[index] for row in self.data]
377            if not values:
378                return None
379    
380            if type == FIELDTYPE_DOUBLE:
381                format = "%.12f"
382            elif type == FIELDTYPE_INT:
383                format = "%d"
384            else:
385                format = "%s"
386            for value in values:
387                l = len(format % value)
388                if l > max:
389                    max = l
390    
391            return max
392    
393        def Dependencies(self):
394            """Return an empty sequence. The MemoryTable doesn't depend on anything
395            """
396            return ()
397    
398        def write_record(self, record, values):
399            # TODO: Check for correct lenght and perhaps also
400            # for correct types in case values is a tuple. How to report problems?
401            # TODO: Allow values to be a dictionary and write the single
402            # fields that are specified.
403            self.data[record] = values
404    
405    
406    
407    def _find_dbf_column_names(names):
408        """Determine the column names to use in a DBF file
409    
410        DBF files have a length limit of 10 characters on the column names
411        so when writing an arbitrary Thuban table to a DBF file we may have
412        we may have to rename some of the columns making sure that they're
413        unique in the DBF file too.
414    
415        Names that are already short enough will stay the same. Longer names
416        will be truncated to 10 characters or if that isn't unique it will
417        be truncated more and filled up with digits.
418    
419        The parameter names should be a list of the column names. The return
420        value will be a dictionary mapping the names in the input list to
421        the names to use in the DBF file.
422        """
423        # mapping from the original names in table to the names in the DBF
424        # file
425        name_map = {}
426    
427        # First, we keep all names that are already short enough
428        for i in range(len(names) - 1, -1, -1):
429            if len(names[i]) <= 10:
430                name_map[names[i]] = names[i]
431                del names[i]
432    
433        # dict used as a set of all names already used as DBF column names
434        used = name_map.copy()
435    
436        # Go through all longer names. If the name truncated to 10
437        # characters is not used already, we use that. Otherwise we truncate
438        # it more and append numbers until we get an unused name
439        for name in names:
440            truncated = name[:10]
441            num = 0; numstr = ""
442            #print "truncated", truncated, num
443            while truncated in used and len(numstr) < 10:
444                num += 1
445                numstr = str(num)
446                truncated = name[:10 - len(numstr)] + numstr
447                #print "truncated", truncated, num
448            if len(numstr) >= 10:
449                # This case should never happen in practice as tables with
450                # 10^10 columns seem very unlikely :)
451                raise ValueError("Can't find unique dbf column name")
452    
453            name_map[name] = truncated
454            used[truncated] = 1
455    
456        return name_map
457    
458    def table_to_dbf(table, filename, rows = None):
459        """Create the dbf file filename from the table.
460        
461        If rows is not None (the default) then it must be a list of row
462        indices to be saved to the file, otherwise all rows are saved.
463        """
464    
465        dbf = dbflib.create(filename)
466    
467        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
468                             FIELDTYPE_INT: dbflib.FTInteger,
469                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
470    
471    
472        name_map = _find_dbf_column_names([col.name for col in table.Columns()])
473    
474        # Initialise the header. Distinguish between DBFTable and others.
475        for col in table.Columns():
476            width = table.Width(col.name)
477            if col.type == FIELDTYPE_DOUBLE:
478                prec = getattr(col, "prec", 12)
479            else:
480                prec = 0
481            dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
482                          width, prec)
483    
484        if rows is None:
485            rows = range(table.NumRows())
486    
487        recNum = 0
488        for i in rows:
489            record = {}
490            for key, value in table.ReadRowAsDict(i).items():
491                record[name_map[key]] = value
492            dbf.write_record(recNum, record)
493            recNum += 1
494        dbf.close()
495    
496    def table_to_csv(table, filename, rows = None):
497        """Export table to csv file.
498        
499        If rows is not None (the default) then it must be a list of row
500        indices to be saved to the file, otherwise all rows are saved.
501        """
502    
503        file = open(filename,"w")
504        columns = table.Columns()
505        if columns:
506            header = "#%s" % columns[0].name
507            for col in columns[1:]:
508                header = header + ",%s" % col.name
509            header = header + "\n"
510            file.write(header)
511    
512            if rows is None:
513                rows = range(table.NumRows())
514    
515            for i in rows:
516                record = table.ReadRowAsDict(i)
517                if len(record):
518                    line = "%s" % record[columns[0].name]
519                    for col in columns[1:]:
520                        line = line + ",%s" % record[col.name]
521                line = line + "\n"
522                file.write(line)
523        file.close()
524    
 # Temporary backwards compatibility  
 Table = DBFTable  

Legend:
Removed from v.765  
changed lines
  Added in v.2734

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26