/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 994 by bh, Thu May 22 18:05:16 2003 UTC revision 1961 by bh, Wed Nov 19 15:46:03 2003 UTC
# Line 2  Line 2 
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4  # Jan-Oliver Wagner <[email protected]>  # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 12  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17  import inspect  import inspect
18  import warnings  import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 29  dbflib_fieldtypes = {dbflib.FTString: FI Line 33  dbflib_fieldtypes = {dbflib.FTString: FI
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
35    
 class OldTableInterfaceMixin:  
   
     """Mixin to implement the old table interface using the new one"""  
   
     def __deprecation_warning(self):  
         """Issue a DeprecationWarning for code hat uses the old interface"""  
         callername = inspect.currentframe().f_back.f_code.co_name  
         warnings.warn("The %s method of the old table interface"  
                       " is deprecated" % callername,  
                       DeprecationWarning, stacklevel = 3)  
   
     def record_count(self):  
         self.__deprecation_warning()  
         return self.NumRows()  
   
     def field_count(self):  
         self.__deprecation_warning()  
         return self.NumColumns()  
   
     def field_info(self, field):  
         """Return a tuple (type, name, width, prec) for the field no. field  
   
         type is the data type of the field, name the name, width the  
         field width in characters and prec the decimal precision. width  
         and prec will be zero if the information returned by the Column  
         method doesn't provide values for them.  
         """  
         self.__deprecation_warning()  
         col = self.Column(field)  
         return (col.type, col.name,  
                getattr(col, "width", 0), getattr(col, "prec", 0))  
   
     def field_info_by_name(self, col):  
         self.__deprecation_warning()  
         try:  
             return self.field_info(col)  
         except KeyError:  
             # FIXME: It may be that field_info raises other exceptions  
             # when the name is not a valid column name.  
             return None  
   
     def field_range(self, fieldName):  
         self.__deprecation_warning()  
         min, max = self.ValueRange(fieldName)  
         return ((min, None), (max, None))  
   
     def GetUniqueValues(self, field):  
         self.__deprecation_warning()  
         return self.UniqueValues(field)  
   
     def read_record(self, r):  
         self.__deprecation_warning()  
         return self.ReadRowAsDict(r)  
   
   
   
36  class DBFColumn:  class DBFColumn:
37    
38      """Description of a column in a DBFTable      """Description of a column in a DBFTable
# Line 107  class DBFColumn: Line 55  class DBFColumn:
55          self.index = index          self.index = index
56    
57    
58  class DBFTable(OldTableInterfaceMixin):  class DBFTable(TitledObject):
59    
60      """      """
61      Table interface for the data in a DBF file      Table interface for the data in a DBF file
# Line 128  class DBFTable(OldTableInterfaceMixin): Line 76  class DBFTable(OldTableInterfaceMixin):
76      # work because a DBF file object buffers some data      # work because a DBF file object buffers some data
77    
78      def __init__(self, filename):      def __init__(self, filename):
79          self.filename = filename          self.filename = os.path.abspath(filename)
80    
81            # Omit the extension in the title as it's not really needed and
82            # it can be confusing because dbflib removes extensions and
83            # appends some variations of '.dbf' before it tries to open the
84            # file. So the title could be e.g. myshapefile.shp when the real
85            # filename is myshapefile.dbf
86            title = os.path.splitext(os.path.basename(self.filename))[0]
87            TitledObject.__init__(self, title)
88    
89          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
90    
91          # If true, self.dbf is open for writing.          # If true, self.dbf is open for writing.
# Line 174  class DBFTable(OldTableInterfaceMixin): Line 131  class DBFTable(OldTableInterfaceMixin):
131          """          """
132          return self.column_map.has_key(col)          return self.column_map.has_key(col)
133    
134      def ReadRowAsDict(self, row):      def RowIdToOrdinal(self, gid):
135          """Return the entire row as a dictionary with column names as keys"""          """Return the row ordinal given its id
136    
137            Since for DBFTables the row id is the row number, return the
138            value unchanged.
139            """
140            return gid
141    
142        def RowOrdinalToId(self, num):
143            """Return the rowid for given its ordinal
144    
145            Since for DBFTables the row id is the row number, return the
146            value unchanged.
147            """
148            return num
149    
150        def ReadRowAsDict(self, row, row_is_ordinal = 0):
151            """Return the entire row as a dictionary with column names as keys
152    
153            The row_is_ordinal is ignored for DBF tables because the row id
154            is always the row number.
155            """
156          return self.dbf.read_record(row)          return self.dbf.read_record(row)
157    
158      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
159          """Return the value of the specified row and column          """Return the value of the specified row and column
160    
161          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
162    
163            The row_is_ordinal is ignored for DBF tables because the row id
164            is always the row number.
165          """          """
166          return self.dbf.read_record(row)[self.column_map[col].name]          return self.dbf.read_attribute(row, self.column_map[col].index)
167    
168      def ValueRange(self, col):      def ValueRange(self, col):
169          """Return the minimum and maximum values of the values in the column          """Return the minimum and maximum values of the values in the column
# Line 224  class DBFTable(OldTableInterfaceMixin): Line 204  class DBFTable(OldTableInterfaceMixin):
204    
205      # DBF specific interface parts.      # DBF specific interface parts.
206    
207        def Width(self, col):
208            """Return column width"""
209            return self.column_map[col].width
210    
211      def Destroy(self):      def Destroy(self):
212          self.dbf.close()          self.dbf.close()
213          self.dbf = None          self.dbf = None
# Line 261  class MemoryColumn: Line 245  class MemoryColumn:
245          self.type = type          self.type = type
246          self.index = index          self.index = index
247    
248  class MemoryTable(OldTableInterfaceMixin):  class MemoryTable(TitledObject):
249    
250      """Very simple table implementation that operates on a list of tuples"""      """Very simple table implementation that operates on a list of tuples"""
251    
# Line 273  class MemoryTable(OldTableInterfaceMixin Line 257  class MemoryTable(OldTableInterfaceMixin
257          data -- List of tuples, one for each row of data          data -- List of tuples, one for each row of data
258          """          """
259          self.data = data          self.data = data
260            title = 'MemoryTable'
261            TitledObject.__init__(self, title)
262    
263          # Create the column information objects          # Create the column information objects
264          self.columns = []          self.columns = []
# Line 312  class MemoryTable(OldTableInterfaceMixin Line 298  class MemoryTable(OldTableInterfaceMixin
298          """Return the number of rows in the table"""          """Return the number of rows in the table"""
299          return len(self.data)          return len(self.data)
300    
301      def ReadValue(self, row, col):      def RowIdToOrdinal(self, gid):
302            """Return the row ordinal given its id
303    
304            Since for MemoryTables the row id is the row number, return the
305            value unchanged.
306            """
307            return gid
308    
309        def RowOrdinalToId(self, num):
310            """Return the rowid for given its ordinal
311    
312            Since for MemoryTables the row id is the row number, return the
313            value unchanged.
314            """
315            return num
316    
317        def ReadValue(self, row, col, row_is_ordinal = 0):
318          """Return the value of the specified row and column          """Return the value of the specified row and column
319    
320          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
321    
322            The row_is_ordinal is ignored for DBF tables because the row id
323            is always the row number.
324          """          """
325          return self.data[row][self.column_map[col].index]          return self.data[row][self.column_map[col].index]
326    
327      def ReadRowAsDict(self, index):      def ReadRowAsDict(self, index, row_is_ordinal = 0):
328          """Return the entire row as a dictionary with column names as keys"""          """Return the entire row as a dictionary with column names as keys
329    
330            The row_is_ordinal is ignored for DBF tables because the row id
331            is always the row number.
332            """
333          return dict([(col.name, self.data[index][col.index])          return dict([(col.name, self.data[index][col.index])
334                        for col in self.columns])                        for col in self.columns])
335    
# Line 339  class MemoryTable(OldTableInterfaceMixin Line 348  class MemoryTable(OldTableInterfaceMixin
348          return min(values), max(values)          return min(values), max(values)
349    
350      def UniqueValues(self, col):      def UniqueValues(self, col):
351          """Return a sorted list of all unique values in the column col"""          """Return a sorted list of all unique values in the column col
352    
353            col can be either column index or name.
354            """
355          dict = {}          dict = {}
356    
357          for i in range(self.NumRows()):          for i in range(self.NumRows()):
# Line 350  class MemoryTable(OldTableInterfaceMixin Line 362  class MemoryTable(OldTableInterfaceMixin
362          values.sort()          values.sort()
363          return values          return values
364    
365        def Width(self, col):
366            """Return the maximum width of values in the column
367    
368            The return value is the the maximum length of string
369            representation of the values in the column (represented by index
370            or name).
371            """
372            max = 0
373    
374            type  = self.column_map[col].type
375            index = self.column_map[col].index
376            values = [row[index] for row in self.data]
377            if not values:
378                return None
379    
380            if type == FIELDTYPE_DOUBLE:
381                format = "%.12f"
382            elif type == FIELDTYPE_INT:
383                format = "%d"
384            else:
385                format = "%s"
386            for value in values:
387                l = len(format % value)
388                if l > max:
389                    max = l
390    
391            return max
392    
393      def Dependencies(self):      def Dependencies(self):
394          """Return an empty sequence. The MemoryTable doesn't depend on anything          """Return an empty sequence. The MemoryTable doesn't depend on anything
395          """          """
# Line 361  class MemoryTable(OldTableInterfaceMixin Line 401  class MemoryTable(OldTableInterfaceMixin
401          # TODO: Allow values to be a dictionary and write the single          # TODO: Allow values to be a dictionary and write the single
402          # fields that are specified.          # fields that are specified.
403          self.data[record] = values          self.data[record] = values
404    
405    
406    
407    def _find_dbf_column_names(names):
408        """Determine the column names to use in a DBF file
409    
410        DBF files have a length limit of 10 characters on the column names
411        so when writing an arbitrary Thuban table to a DBF file we may have
412        we may have to rename some of the columns making sure that they're
413        unique in the DBF file too.
414    
415        Names that are already short enough will stay the same. Longer names
416        will be truncated to 10 characters or if that isn't unique it will
417        be truncated more and filled up with digits.
418    
419        The parameter names should be a list of the column names. The return
420        value will be a dictionary mapping the names in the input list to
421        the names to use in the DBF file.
422        """
423        # mapping from the original names in table to the names in the DBF
424        # file
425        name_map = {}
426    
427        # First, we keep all names that are already short enough
428        for i in range(len(names) - 1, -1, -1):
429            if len(names[i]) <= 10:
430                name_map[names[i]] = names[i]
431                del names[i]
432    
433        # dict used as a set of all names already used as DBF column names
434        used = name_map.copy()
435    
436        # Go through all longer names. If the name truncated to 10
437        # characters is not used already, we use that. Otherwise we truncate
438        # it more and append numbers until we get an unused name
439        for name in names:
440            truncated = name[:10]
441            num = 0; numstr = ""
442            #print "truncated", truncated, num
443            while truncated in used and len(numstr) < 10:
444                num += 1
445                numstr = str(num)
446                truncated = name[:10 - len(numstr)] + numstr
447                #print "truncated", truncated, num
448            if len(numstr) >= 10:
449                # This case should never happen in practice as tables with
450                # 10^10 columns seem very unlikely :)
451                raise ValueError("Can't find unique dbf column name")
452    
453            name_map[name] = truncated
454            used[truncated] = 1
455    
456        return name_map
457    
458    def table_to_dbf(table, filename, rows = None):
459        """Create the dbf file filename from the table.
460        
461        If rows is not None (the default) then it must be a list of row
462        indices to be saved to the file, otherwise all rows are saved.
463        """
464    
465        dbf = dbflib.create(filename)
466    
467        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
468                             FIELDTYPE_INT: dbflib.FTInteger,
469                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
470    
471    
472        name_map = _find_dbf_column_names([col.name for col in table.Columns()])
473    
474        # Initialise the header. Distinguish between DBFTable and others.
475        for col in table.Columns():
476            width = table.Width(col.name)
477            if col.type == FIELDTYPE_DOUBLE:
478                prec = getattr(col, "prec", 12)
479            else:
480                prec = 0
481            dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
482                          width, prec)
483    
484        if rows is None:
485            rows = range(table.NumRows())
486    
487        recNum = 0
488        for i in rows:
489            record = {}
490            for key, value in table.ReadRowAsDict(i).items():
491                record[name_map[key]] = value
492            dbf.write_record(recNum, record)
493            recNum += 1
494        dbf.close()
495    
496    def table_to_csv(table, filename, rows = None):
497        """Export table to csv file.
498        
499        If rows is not None (the default) then it must be a list of row
500        indices to be saved to the file, otherwise all rows are saved.
501        """
502    
503        file = open(filename,"w")
504        columns = table.Columns()
505        if columns:
506            header = "#%s" % columns[0].name
507            for col in columns[1:]:
508                header = header + ",%s" % col.name
509            header = header + "\n"
510            file.write(header)
511    
512            if rows is None:
513                rows = range(table.NumRows())
514    
515            for i in rows:
516                record = table.ReadRowAsDict(i)
517                if len(record):
518                    line = "%s" % record[columns[0].name]
519                    for col in columns[1:]:
520                        line = line + ",%s" % record[col.name]
521                line = line + "\n"
522                file.write(line)
523        file.close()
524    

Legend:
Removed from v.994  
changed lines
  Added in v.1961

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26