/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory | Revision Log | View Patch Patch

-revision 274 by bh,
Thu Aug 22 16:25:46 2002 UTC
+revision 1961 by bh,
Wed Nov 19 15:46:03 2003 UTC
 Line 1
- # Copyright (c) 2001, 2002 by Intevation GmbH
+ # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
  # Authors:
  # Bernhard Herzog <[email protected]>
+ # Jan-Oliver Wagner <[email protected]>
+ # Frank Koormann <[email protected]>
  #
  # This program is free software under the GPL (>=v2)
  # Read the file COPYING coming with Thuban for details.
-Line 11 
 Classes for handling tables of data.
+Line 13 
 Classes for handling tables of data.
  __version__ = "$Revision$"
+ import os
+ import inspect
+ import warnings
+ from base import TitledObject
  import dbflib
  # the field types supported by a Table instance.
-Line 24 
 dbflib_fieldtypes = {dbflib.FTString: FI
+Line 32 
 dbflib_fieldtypes = {dbflib.FTString: FI
                       dbflib.FTInteger: FIELDTYPE_INT,
                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
- class Table:
+ class DBFColumn:
+     """Description of a column in a DBFTable
+     Instances have the following public attributes:
+     name -- Name of the column
+     type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
+             FIELDTYPE_DOUBLE)
+     index -- The index of the column
+     width -- the width of the data in the column
+     prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
      """
-     Represent a table of data.
-     Currently this is basically just a wrapper around dbflib.
+     def __init__(self, name, type, width, prec, index):
+         self.name = name
+         self.type = type
+         self.width = width
+         self.prec = prec
+         self.index = index
+ class DBFTable(TitledObject):
+     """
+     Table interface for the data in a DBF file
      """
+     # Implementation strategy regarding writing to a DBF file:
+     #
+     # Most of the time Thuban only needs to read from a table and it is
+     # important that Thuban can work with read-only files. Therefore the
+     # DBF file is opened only for reading initially. Only when
+     # write_record is called we try to open the DBF file for writing as
+     # well. If that succeeds the read/write DBF file will be used for
+     # all IO afterwards.
+     #
+     # It's important to use the same DBF file object for both reading
+     # and writing to make sure that reading a records after writing
+     # returns the new values. With two separate objects this wouldn't
+     # work because a DBF file object buffers some data
      def __init__(self, filename):
-         self.filename = filename
+         self.filename = os.path.abspath(filename)
-         self.dbf = dbflib.DBFFile(filename, "r+b")
-     def Destroy(self):
+         # Omit the extension in the title as it's not really needed and
-         self.dbf.close()
+         # it can be confusing because dbflib removes extensions and
-         self.dbf = None
+         # appends some variations of '.dbf' before it tries to open the
+         # file. So the title could be e.g. myshapefile.shp when the real
+         # filename is myshapefile.dbf
+         title = os.path.splitext(os.path.basename(self.filename))[0]
+         TitledObject.__init__(self, title)
+         self.dbf = dbflib.DBFFile(filename)
+         # If true, self.dbf is open for writing.
+         self._writable = 0
+         # Create the column information objects
+         self.columns = []
+         self.column_map = {}
+         for i in range(self.NumColumns()):
+             ftype, name, width, prec = self.dbf.field_info(i)
+             ftype = dbflib_fieldtypes[ftype]
+             index = len(self.columns)
+             col = DBFColumn(name, ftype, width, prec, index)
+             self.columns.append(col)
+             self.column_map[name] = col
+             self.column_map[index] = col
-     def record_count(self):
+     def NumRows(self):
-         """Return the number of records"""
+         """Return the number of rows in the table"""
          return self.dbf.record_count()
-     def field_count(self):
+     def NumColumns(self):
-         """Return the number of fields in a record"""
+         """Return the number of columns in the table"""
          return self.dbf.field_count()
-     def field_info(self, field):
+     def Columns(self):
-         """Return a tuple (type, name, width, prec) for the field no. field
+         """Return the table's colum definitions
-         type is the data type of the field, name the name, width the
+         The return value is a sequence of DBFColumn instances, one for
-         field width in characters and prec the decimal precision.
+         each column.
          """
-         type, name, width, prec = self.dbf.field_info(field)
+         return self.columns
-         type = dbflib_fieldtypes[type]
-         return type, name, width, prec
+     def Column(self, col):
+         """Return information about the column given by its name or index
+         The returned object is an instance of DBFColumn
+         """
+         return self.column_map[col]
+     def HasColumn(self, col):
+         """Return whether the table has a column with the given name or index
+         """
+         return self.column_map.has_key(col)
+     def RowIdToOrdinal(self, gid):
+         """Return the row ordinal given its id
+         Since for DBFTables the row id is the row number, return the
+         value unchanged.
+         """
+         return gid
+     def RowOrdinalToId(self, num):
+         """Return the rowid for given its ordinal
+         Since for DBFTables the row id is the row number, return the
+         value unchanged.
+         """
+         return num
+     def ReadRowAsDict(self, row, row_is_ordinal = 0):
+         """Return the entire row as a dictionary with column names as keys
-     def read_record(self, record):
+         The row_is_ordinal is ignored for DBF tables because the row id
-         """Return the record no. record as a dict mapping field names to values
+         is always the row number.
          """
-         return self.dbf.read_record(record)
+         return self.dbf.read_record(row)
+     def ReadValue(self, row, col, row_is_ordinal = 0):
+         """Return the value of the specified row and column
+         The col parameter may be the index of the column or its name.
+         The row_is_ordinal is ignored for DBF tables because the row id
+         is always the row number.
+         """
+         return self.dbf.read_attribute(row, self.column_map[col].index)
+     def ValueRange(self, col):
+         """Return the minimum and maximum values of the values in the column
+         The return value is a tuple (min, max) unless the table is empty
+         in which case the return value is None.
+         """
+         count = self.NumRows()
+         if count == 0:
+             return None
+         min = max = self.ReadValue(0, col)
+         for i in range(1, count):
+             value = self.ReadValue(i, col)
+             if value < min:
+                 min = value
+             elif value > max:
+                 max = value
+         return (min, max)
+     def UniqueValues(self, col):
+         """Return a sorted list of all unique values in the column col"""
+         dict = {}
+         for i in range(self.NumRows()):
+             value = self.ReadValue(i, col)
+             dict[value] = 0
+         values = dict.keys()
+         values.sort()
+         return values
+     def Dependencies(self):
+         """Return an empty sequence. The DBFTable doesn't depend on anything"""
+         return ()
+     # DBF specific interface parts.
+     def Width(self, col):
+         """Return column width"""
+         return self.column_map[col].width
+     def Destroy(self):
+         self.dbf.close()
+         self.dbf = None
      def write_record(self, record, values):
          """Write the values into the record
-Line 76 
 class Table:
+Line 225 
 class Table:
          If it's a sequence, all fields must be present in the right
          order.
          """
+         if not self._writable:
+             new_dbf = dbflib.DBFFile(self.filename, "r+b")
+             self.dbf.close()
+             self.dbf = new_dbf
+             self._writable = 1
          self.dbf.write_record(record, values)
          self.dbf.commit()
+     def FileName(self):
+         """Return the filename the DBFTable was instantiated with"""
+         return self.filename
+ class MemoryColumn:
+     def __init__(self, name, type, index):
+         self.name = name
+         self.type = type
+         self.index = index
+ class MemoryTable(TitledObject):
+     """Very simple table implementation that operates on a list of tuples"""
+     def __init__(self, fields, data):
+         """Initialize the MemoryTable
+         Parameters:
+         fields -- List of (name, field_type) pairs
+         data -- List of tuples, one for each row of data
+         """
+         self.data = data
+         title = 'MemoryTable'
+         TitledObject.__init__(self, title)
+         # Create the column information objects
+         self.columns = []
+         self.column_map = {}
+         for name, ftype in fields:
+             index = len(self.columns)
+             col = MemoryColumn(name, ftype, index)
+             self.columns.append(col)
+             self.column_map[name] = col
+             self.column_map[index] = col
+     def NumColumns(self):
+         """Return the number of columns in the table"""
+         return len(self.columns)
+     def Column(self, col):
+         """Return information about the column given by its name or index
+         The returned object is an instance of MemoryColumn.
+         """
+         return self.column_map[col]
+     def Columns(self):
+         """Return the table's colum definitions
+         The return value is a sequence of MemoryColumn instances, one
+         for each column.
+         """
+         return self.columns
+     def HasColumn(self, col):
+         """Return whether the table has a column with the given name or index
+         """
+         return self.column_map.has_key(col)
+     def NumRows(self):
+         """Return the number of rows in the table"""
+         return len(self.data)
+     def RowIdToOrdinal(self, gid):
+         """Return the row ordinal given its id
+         Since for MemoryTables the row id is the row number, return the
+         value unchanged.
+         """
+         return gid
+     def RowOrdinalToId(self, num):
+         """Return the rowid for given its ordinal
+         Since for MemoryTables the row id is the row number, return the
+         value unchanged.
+         """
+         return num
+     def ReadValue(self, row, col, row_is_ordinal = 0):
+         """Return the value of the specified row and column
+         The col parameter may be the index of the column or its name.
+         The row_is_ordinal is ignored for DBF tables because the row id
+         is always the row number.
+         """
+         return self.data[row][self.column_map[col].index]
+     def ReadRowAsDict(self, index, row_is_ordinal = 0):
+         """Return the entire row as a dictionary with column names as keys
+         The row_is_ordinal is ignored for DBF tables because the row id
+         is always the row number.
+         """
+         return dict([(col.name, self.data[index][col.index])
+                       for col in self.columns])
+     def ValueRange(self, col):
+         """Return the minimum and maximum values of the values in the column
+         The return value is a tuple (min, max) unless the table is empty
+         in which case the return value is None.
+         """
+         index = self.column_map[col].index
+         values = [row[index] for row in self.data]
+         if not values:
+             return None
+         return min(values), max(values)
+     def UniqueValues(self, col):
+         """Return a sorted list of all unique values in the column col
+         col can be either column index or name.
+         """
+         dict = {}
+         for i in range(self.NumRows()):
+             value = self.ReadValue(i, col)
+             dict[value] = 0
+         values = dict.keys()
+         values.sort()
+         return values
+     def Width(self, col):
+         """Return the maximum width of values in the column
+         The return value is the the maximum length of string
+         representation of the values in the column (represented by index
+         or name).
+         """
+         max = 0
+         type  = self.column_map[col].type
+         index = self.column_map[col].index
+         values = [row[index] for row in self.data]
+         if not values:
+             return None
+         if type == FIELDTYPE_DOUBLE:
+             format = "%.12f"
+         elif type == FIELDTYPE_INT:
+             format = "%d"
+         else:
+             format = "%s"
+         for value in values:
+             l = len(format % value)
+             if l > max:
+                 max = l
+         return max
+     def Dependencies(self):
+         """Return an empty sequence. The MemoryTable doesn't depend on anything
+         """
+         return ()
+     def write_record(self, record, values):
+         # TODO: Check for correct lenght and perhaps also
+         # for correct types in case values is a tuple. How to report problems?
+         # TODO: Allow values to be a dictionary and write the single
+         # fields that are specified.
+         self.data[record] = values
+ def _find_dbf_column_names(names):
+     """Determine the column names to use in a DBF file
+     DBF files have a length limit of 10 characters on the column names
+     so when writing an arbitrary Thuban table to a DBF file we may have
+     we may have to rename some of the columns making sure that they're
+     unique in the DBF file too.
+     Names that are already short enough will stay the same. Longer names
+     will be truncated to 10 characters or if that isn't unique it will
+     be truncated more and filled up with digits.
+     The parameter names should be a list of the column names. The return
+     value will be a dictionary mapping the names in the input list to
+     the names to use in the DBF file.
+     """
+     # mapping from the original names in table to the names in the DBF
+     # file
+     name_map = {}
+     # First, we keep all names that are already short enough
+     for i in range(len(names) - 1, -1, -1):
+         if len(names[i]) <= 10:
+             name_map[names[i]] = names[i]
+             del names[i]
+     # dict used as a set of all names already used as DBF column names
+     used = name_map.copy()
+     # Go through all longer names. If the name truncated to 10
+     # characters is not used already, we use that. Otherwise we truncate
+     # it more and append numbers until we get an unused name
+     for name in names:
+         truncated = name[:10]
+         num = 0; numstr = ""
+         #print "truncated", truncated, num
+         while truncated in used and len(numstr) < 10:
+             num += 1
+             numstr = str(num)
+             truncated = name[:10 - len(numstr)] + numstr
+             #print "truncated", truncated, num
+         if len(numstr) >= 10:
+             # This case should never happen in practice as tables with
+             # 10^10 columns seem very unlikely :)
+             raise ValueError("Can't find unique dbf column name")
+         name_map[name] = truncated
+         used[truncated] = 1
+     return name_map
+ def table_to_dbf(table, filename, rows = None):
+     """Create the dbf file filename from the table.
+     If rows is not None (the default) then it must be a list of row
+     indices to be saved to the file, otherwise all rows are saved.
+     """
+     dbf = dbflib.create(filename)
+     dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
+                          FIELDTYPE_INT: dbflib.FTInteger,
+                          FIELDTYPE_DOUBLE: dbflib.FTDouble}
+     name_map = _find_dbf_column_names([col.name for col in table.Columns()])
+     # Initialise the header. Distinguish between DBFTable and others.
+     for col in table.Columns():
+         width = table.Width(col.name)
+         if col.type == FIELDTYPE_DOUBLE:
+             prec = getattr(col, "prec", 12)
+         else:
+             prec = 0
+         dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
+                       width, prec)
+     if rows is None:
+         rows = range(table.NumRows())
+     recNum = 0
+     for i in rows:
+         record = {}
+         for key, value in table.ReadRowAsDict(i).items():
+             record[name_map[key]] = value
+         dbf.write_record(recNum, record)
+         recNum += 1
+     dbf.close()
+ def table_to_csv(table, filename, rows = None):
+     """Export table to csv file.
+     If rows is not None (the default) then it must be a list of row
+     indices to be saved to the file, otherwise all rows are saved.
+     """
+     file = open(filename,"w")
+     columns = table.Columns()
+     if columns:
+         header = "#%s" % columns[0].name
+         for col in columns[1:]:
+             header = header + ",%s" % col.name
+         header = header + "\n"
+         file.write(header)
+         if rows is None:
+             rows = range(table.NumRows())
+         for i in rows:
+             record = table.ReadRowAsDict(i)
+             if len(record):
+                 line = "%s" % record[columns[0].name]
+                 for col in columns[1:]:
+                     line = line + ",%s" % record[col.name]
+             line = line + "\n"
+             file.write(line)
+     file.close()

 Legend:



Removed from v.274
 


changed lines


 
Added in v.1961
 Legend:



Removed from v.274
 


changed lines


 
Added in v.1961
-Removed from v.274
+Added in v.1961

[email protected]	ViewVC Help
Powered by ViewVC 1.1.26