/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory | Revision Log | View Patch Patch

-revision 1043 by bh,
Mon May 26 19:27:15 2003 UTC
+revision 1919 by bh,
Mon Nov  3 17:33:36 2003 UTC
 Line 132 
 class DBFTable(TitledObject, OldTableInt
      # work because a DBF file object buffers some data
      def __init__(self, filename):
-         self.filename = filename
+         self.filename = os.path.abspath(filename)
-         title = os.path.basename(self.filename)
+         # Omit the extension in the title as it's not really needed and
+         # it can be confusing because dbflib removes extensions and
+         # appends some variations of '.dbf' before it tries to open the
+         # file. So the title could be e.g. myshapefile.shp when the real
+         # filename is myshapefile.dbf
+         title = os.path.splitext(os.path.basename(self.filename))[0]
          TitledObject.__init__(self, title)
          self.dbf = dbflib.DBFFile(filename)
          # If true, self.dbf is open for writing.
-Line 180 
 class DBFTable(TitledObject, OldTableInt
+Line 187 
 class DBFTable(TitledObject, OldTableInt
          """
          return self.column_map.has_key(col)
-     def ReadRowAsDict(self, row):
+     def RowIdToOrdinal(self, gid):
-         """Return the entire row as a dictionary with column names as keys"""
+         """Return the row ordinal given its id
+         Since for DBFTables the row id is the row number, return the
+         value unchanged.
+         """
+         return gid
+     def RowOrdinalToId(self, num):
+         """Return the rowid for given its ordinal
+         Since for DBFTables the row id is the row number, return the
+         value unchanged.
+         """
+         return num
+     def ReadRowAsDict(self, row, row_is_ordinal = 0):
+         """Return the entire row as a dictionary with column names as keys
+         The row_is_ordinal is ignored for DBF tables because the row id
+         is always the row number.
+         """
          return self.dbf.read_record(row)
-     def ReadValue(self, row, col):
+     def ReadValue(self, row, col, row_is_ordinal = 0):
          """Return the value of the specified row and column
          The col parameter may be the index of the column or its name.
+         The row_is_ordinal is ignored for DBF tables because the row id
+         is always the row number.
          """
-         return self.dbf.read_record(row)[self.column_map[col].name]
+         return self.dbf.read_attribute(row, self.column_map[col].index)
      def ValueRange(self, col):
          """Return the minimum and maximum values of the values in the column
-Line 324 
 class MemoryTable(TitledObject, OldTable
+Line 354 
 class MemoryTable(TitledObject, OldTable
          """Return the number of rows in the table"""
          return len(self.data)
-     def ReadValue(self, row, col):
+     def RowIdToOrdinal(self, gid):
+         """Return the row ordinal given its id
+         Since for MemoryTables the row id is the row number, return the
+         value unchanged.
+         """
+         return gid
+     def RowOrdinalToId(self, num):
+         """Return the rowid for given its ordinal
+         Since for MemoryTables the row id is the row number, return the
+         value unchanged.
+         """
+         return num
+     def ReadValue(self, row, col, row_is_ordinal = 0):
          """Return the value of the specified row and column
          The col parameter may be the index of the column or its name.
+         The row_is_ordinal is ignored for DBF tables because the row id
+         is always the row number.
          """
          return self.data[row][self.column_map[col].index]
-     def ReadRowAsDict(self, index):
+     def ReadRowAsDict(self, index, row_is_ordinal = 0):
-         """Return the entire row as a dictionary with column names as keys"""
+         """Return the entire row as a dictionary with column names as keys
+         The row_is_ordinal is ignored for DBF tables because the row id
+         is always the row number.
+         """
          return dict([(col.name, self.data[index][col.index])
                        for col in self.columns])
-Line 406 
 class MemoryTable(TitledObject, OldTable
+Line 459 
 class MemoryTable(TitledObject, OldTable
          self.data[record] = values
- def table_to_dbf(table, filename):
-     """Create the dbf file filename from the table"""
+ def _find_dbf_column_names(names):
+     """Determine the column names to use in a DBF file
+     DBF files have a length limit of 10 characters on the column names
+     so when writing an arbitrary Thuban table to a DBF file we may have
+     we may have to rename some of the columns making sure that they're
+     unique in the DBF file too.
+     Names that are already short enough will stay the same. Longer names
+     will be truncated to 10 characters or if that isn't unique it will
+     be truncated more and filled up with digits.
+     The parameter names should be a list of the column names. The return
+     value will be a dictionary mapping the names in the input list to
+     the names to use in the DBF file.
+     """
+     # mapping from the original names in table to the names in the DBF
+     # file
+     name_map = {}
+     # First, we keep all names that are already short enough
+     for i in range(len(names) - 1, -1, -1):
+         if len(names[i]) <= 10:
+             name_map[names[i]] = names[i]
+             del names[i]
+     # dict used as a set of all names already used as DBF column names
+     used = name_map.copy()
+     # Go through all longer names. If the name truncated to 10
+     # characters is not used already, we use that. Otherwise we truncate
+     # it more and append numbers until we get an unused name
+     for name in names:
+         truncated = name[:10]
+         num = 0; numstr = ""
+         #print "truncated", truncated, num
+         while truncated in used and len(numstr) < 10:
+             num += 1
+             numstr = str(num)
+             truncated = name[:10 - len(numstr)] + numstr
+             #print "truncated", truncated, num
+         if len(numstr) >= 10:
+             # This case should never happen in practice as tables with
+             # 10^10 columns seem very unlikely :)
+             raise ValueError("Can't find unique dbf column name")
+         name_map[name] = truncated
+         used[truncated] = 1
+     return name_map
+ def table_to_dbf(table, filename, rows = None):
+     """Create the dbf file filename from the table.
+     If rows is not None (the default) then it must be a list of row
+     indices to be saved to the file, otherwise all rows are saved.
+     """
      dbf = dbflib.create(filename)
      dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                           FIELDTYPE_INT: dbflib.FTInteger,
                           FIELDTYPE_DOUBLE: dbflib.FTDouble}
+     name_map = _find_dbf_column_names([col.name for col in table.Columns()])
      # Initialise the header. Distinguish between DBFTable and others.
      for col in table.Columns():
          width = table.Width(col.name)
-Line 421 
 def table_to_dbf(table, filename):
+Line 534 
 def table_to_dbf(table, filename):
              prec = getattr(col, "prec", 12)
          else:
              prec = 0
-         dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec)
+         dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
+                       width, prec)
+     if rows is None:
+         rows = range(table.NumRows())
-     for i in range(table.NumRows()):
+     recNum = 0
-         record = table.ReadRowAsDict(i)
+     for i in rows:
-         dbf.write_record(i, record)
+         record = {}
+         for key, value in table.ReadRowAsDict(i).items():
+             record[name_map[key]] = value
+         dbf.write_record(recNum, record)
+         recNum += 1
      dbf.close()
- def table_to_csv(table, filename):
+ def table_to_csv(table, filename, rows = None):
-     """Export table to csv file."""
+     """Export table to csv file.
+     If rows is not None (the default) then it must be a list of row
+     indices to be saved to the file, otherwise all rows are saved.
+     """
      file = open(filename,"w")
      columns = table.Columns()
-Line 440 
 def table_to_csv(table, filename):
+Line 565 
 def table_to_csv(table, filename):
          header = header + "\n"
          file.write(header)
-         for i in range(table.NumRows()):
+         if rows is None:
+             rows = range(table.NumRows())
+         for i in rows:
              record = table.ReadRowAsDict(i)
              if len(record):
                  line = "%s" % record[columns[0].name]

 Legend:



Removed from v.1043
 


changed lines


 
Added in v.1919
 Legend:



Removed from v.1043
 


changed lines


 
Added in v.1919
-Removed from v.1043
+Added in v.1919

[email protected]	ViewVC Help
Powered by ViewVC 1.1.26