/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory | Revision Log | View Patch Patch

-revision 849 by bh,
Wed May  7 11:55:31 2003 UTC
+revision 2681 by dpinte,
Mon May 15 16:11:46 2006 UTC
 Line 18 
 __version__ = "$Revision$"
  # $Source$
  # $Id$
- import os
+ # Pysqlite version 1. and 2. behaves quiet differently
- import weakref
+ # Pysqlite uses a different paramstyle.  The older version
- from sqlite import connect
+ # support format and pyformat while pysqlite2 supports only qmark
+ # and named.
+ # The sqlite2 boolean variable is used to manage specific part of the code
+ try:
+           # Using SQLITE 2.x
+     sqlite2 = True
+     from pysqlite2 import dbapi2 as sqlite
+ except ImportError:
+           # Using SQLITE 1.x
+     sqlite2 = False
+     import sqlite
+ from base import TitledObject
  import table
-Line 40 
 class TransientDatabase:
+Line 53 
 class TransientDatabase:
      def __init__(self, filename):
          self.filename = filename
-         self.conn = connect(filename)
+         self.conn = sqlite.connect(filename)
          # Counters to produce unique table and column names
          self.num_tables = 0
          self.num_cols = 0
-Line 85 
 class ColumnReference:
+Line 98 
 class ColumnReference:
          self.internal_name = internal_name
- class TransientTableBase(table.OldTableInterfaceMixin):
+ class TransientTableBase:
      """Base class for tables in the transient database"""
-Line 160 
 class TransientTableBase(table.OldTableI
+Line 173 
 class TransientTableBase(table.OldTableI
          """
          return self.column_map.has_key(col)
-     def ReadRowAsDict(self, index):
+     def RowIdToOrdinal(self, gid):
+         """Return the row ordinal given its id
+         At the moment the transient tables are only used for tables that
+         don't distinguish between row number and row id, so the value is
+         returned unchanged.
+         """
+         return gid
+     def RowOrdinalToId(self, num):
+         """Return the rowid for given its ordinal
+         At the moment the transient tables are only used for tables that
+         don't distinguish between row number and row id, so the value is
+         returned unchanged.
+         """
+         return num
+     def ReadRowAsDict(self, index, row_is_ordinal = 0):
+         """Return the entire row as a dictionary with column names as keys
+         The row_is_ordinal is ignored because at the moment the
+         transient tables are only used for DBF files where it doesn't
+         matter.
+         """
          # Implementation Strategy: Executing a completely new select
          # statement every time this method is called is too slow. The
          # most important usage is to read the records more or less
-Line 208 
 class TransientTableBase(table.OldTableI
+Line 245 
 class TransientTableBase(table.OldTableI
          self.read_record_last_row = index
          return dict(zip(self.orig_names, result))
-     def ReadValue(self, row, col):
+     def ReadValue(self, row, col, row_is_ordinal = 0):
          """Return the value of the specified row and column
          The col parameter may be the index of the column or its name.
+         The row_is_ordinal is ignored because at the moment the
+         transient tables are only used for DBF files where it doesn't
+         matter.
          """
          # Depending on the actual access patterns of the table data, it
          # might be a bit faster in some circumstances to not implement
-Line 220 
 class TransientTableBase(table.OldTableI
+Line 261 
 class TransientTableBase(table.OldTableI
          return self.ReadRowAsDict(row)[self.column_map[col].name]
      def ValueRange(self, col):
+         # Performance notes:
+         #
+         # In sqlite 2.8.6 the min and max aggregate functions can use an
+         # index but only when used as the only expression in the select
+         # statement (i.e. 'select min(col), max(col) from tbl;' will not
+         # use the index but 'select min(col) from tbl;' will) so we
+         # query the minimum and maximum separately.
+         #
+         # With the separate statements we can take advantage of an index
+         # if it exists. If the index doesn't exist, creating it first
+         # and then using it in the query is slower than the queries
+         # without an index. Creating the index is only an advantage if
+         # the queries are performed multiple times. With the current use
+         # patterns where ValueRange is only used occasionally by the
+         # classification generation dialog creating the index only for
+         # this usage is not really worth it, so we don't.
          col = self.column_map[col]
          iname = col.internal_name
-         min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
+         min = self.db.execute("SELECT min(%s) FROM %s;"
-                                    % (iname, iname, self.tablename))
+                               % (iname, self.tablename))[0]
+         max = self.db.execute("SELECT max(%s) FROM %s;"
+                               % (iname, self.tablename))[0]
          converter = type_converter_map[col.type]
          return (converter(min), converter(max))
      def UniqueValues(self, col):
+         # Performance notes:
+         #
+         # In sqlite 2.8.6 there doesn't seem to be a way to query the
+         # unique items that uses an index. I've tried
+         #
+         #   SELECT col FROM tbl GROUP BY col;
+         #
+         # and
+         #
+         #   SELECT DISTINCT col FROM tbl;
+         #
+         # and in both cases the index is not used. If the index isn't
+         # used it doesn't make sense to call self.ensure_index.
          iname = self.column_map[col].internal_name
          cursor = self.db.cursor()
          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
-Line 240 
 class TransientTableBase(table.OldTableI
+Line 312 
 class TransientTableBase(table.OldTableI
              result.append(row[0])
          return result
+     def Width(self, col):
+         """Return the maximum width of values in the column
+         The return value is the the maximum length of string
+         representation of the values in the column (represented by index
+         or name).
+         """
+         max = 0
+         type  = self.column_map[col].type
+         iname = self.column_map[col].internal_name
+         cursor = self.db.cursor()
+         cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
+         values = [ i[0] for i in cursor.fetchall()]
+         if not values:
+             return None
+         if type == table.FIELDTYPE_DOUBLE:
+             format = "%.12f"
+         elif type == table.FIELDTYPE_INT:
+             format = "%d"
+         else:
+             format = "%s"
+         for value in values:
+             if value is None: continue
+             l = len(format % value)
+             if l > max:
+                 max = l
+         return max
      def SimpleQuery(self, left, comparison, right):
          """Return the indices of all rows that matching a condition.
-Line 263 
 class TransientTableBase(table.OldTableI
+Line 366 
 class TransientTableBase(table.OldTableI
              right_template = right.internal_name
              params = ()
          else:
-             right_template = "%s"
+             if sqlite2:
+                 right_template = "?"
+             else: right_template = "%s"
              params = (right,)
          query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
-Line 280 
 class TransientTableBase(table.OldTableI
+Line 385 
 class TransientTableBase(table.OldTableI
              result.append(row[0])
          return result
+     def Dependencies(self):
+         """Placeholder for a method in a derived class.
+         Return a sequence with the tables and other data objects that
+         self depends on.
+         """
+         raise NotImplementedError
- class TransientTable(TransientTableBase):
+ class TransientTable(TitledObject, TransientTableBase):
      """A Table in a transient DB that starts as the copy of a Thuban Table."""
-Line 292 
 class TransientTable(TransientTableBase)
+Line 405 
 class TransientTable(TransientTableBase)
          interface.
          """
          TransientTableBase.__init__(self, transient_db)
+         TitledObject.__init__(self, table.Title())
          self.create(table)
      def create(self, table):
-Line 309 
 class TransientTable(TransientTableBase)
+Line 423 
 class TransientTable(TransientTableBase)
          # longer than any of the column names
          id_key = max([len(col.name) for col in self.columns]) * "x"
-         insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
+         if sqlite2:
+             insert_template = "INSERT INTO %s (id, %s) VALUES (%s, %s);" \
+                                % (self.tablename,
+                                   ", ".join([col.internal_name
+                                              for col in self.columns]),
+                                   '?',
+                                   ", ".join(["?" for col in self.columns]))
+         else:
+             insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
                                 % (self.tablename,
                                    ", ".join([col.internal_name
                                               for col in self.columns]),
-Line 320 
 class TransientTable(TransientTableBase)
+Line 443 
 class TransientTable(TransientTableBase)
          for i in range(table.NumRows()):
              row = table.ReadRowAsDict(i)
              row[id_key] = i
-             cursor.execute(insert_template, row)
+             if sqlite2:
+                 params = [i]
+                 for col in self.columns:
+                   params.append(row[col.name])
+                 cursor.execute(insert_template, params)
+             else:
+                 cursor.execute(insert_template, row)
          self.db.conn.commit()
- class TransientJoinedTable(TransientTableBase):
+ class TransientJoinedTable(TitledObject, TransientTableBase):
      """A Table in the transient DB that contains a join of two tables"""
      def __init__(self, transient_db, left_table, left_field,
-                  right_table, right_field = None):
+                  right_table, right_field = None, outer_join = False):
          """Create a new table in the transient DB as a join of two tables.
          Both input tables, left_table and right_table must have a
          transient_table method that returns a table object for a table
-         in the trnsient database. The join is performed on the condition
+         in the transient database. The join is performed on the condition
          that the value of the left_field column the the left table is
          equal to the value of the right_field in the right_table.
-         The joined table contains all columns of the input tables with
+         The joined table contains all columns of the input tables,
-         one exception: Any column in the right_table with the same name
+         however, the column names of the right table may be changed
-         as one of the columns in the left_table will be omitted. This is
+         slightly to make them unique in the joined table. This is
-         somewhat of an implementation detail, but is done so that the
+         currently done by appending a sufficient number of underscores
-         column names of the joined table can be the same as the column
+         ('_').
-         names of the input tables without having to create prefixes.
          """
          TransientTableBase.__init__(self, transient_db)
+         self.dependencies = (left_table, right_table)
          self.left_table = left_table.transient_table()
          self.left_field = left_field
          self.right_table = right_table.transient_table()
-Line 354 
 class TransientJoinedTable(TransientTabl
+Line 483 
 class TransientJoinedTable(TransientTabl
              self.right_field = right_field
          else:
              self.right_field = self.left_field
+         self.outer_join = outer_join
+         title = "Join of %(left)s and %(right)s" \
+                 % {"left": self.left_table.Title(),
+                    "right": self.right_table.Title()}
+         TitledObject.__init__(self, title)
          self.create()
      def create(self):
-Line 362 
 class TransientJoinedTable(TransientTabl
+Line 498 
 class TransientJoinedTable(TransientTabl
          self.right_table.ensure_index(self.right_field)
+         # determine the internal column names to join on before
+         # coalescing the column information because if the external
+         # column names are the same they will be mapped to the same
+         # internal name afterwards.
+         internal_left_col = self.left_table.orig_to_internal[self.left_field]
+         internal_right_col =self.right_table.orig_to_internal[self.right_field]
          # Coalesce the column information
          visited = {}
          columns = []
-         for col in self.left_table.columns + self.right_table.columns:
+         newcolumns = []
-             if col.name in visited:
+         for table in (self.left_table, self.right_table):
-                 # We can't allow multiple columns with the same original
+             for col in table.Columns():
-                 # name, so omit this one. FIXME: There should be a
+                 colname = col.name
-                 # better solution.
+                 # We can't allow multiple columns with the same
-                 continue
+                 # original name, so append '_' to this one until
-             columns.append(col)
+                 # it is unique.
-         TransientTableBase.create(self, columns)
+                 # FIXME: There should be a better solution.
+                 while colname in visited:
+                     colname = colname + '_'
+                 columns.append((table.tablename, col))
+                 newcol = ColumnReference(colname, col.type,
+                                             "Col%03d" % (len(newcolumns)+1))
+                 newcolumns.append(newcol)
+                 visited[colname] = 1
+         TransientTableBase.create(self, newcolumns)
          # Copy the joined data to the table.
-         internal_names = [col.internal_name for col in self.columns]
+         newinternal_names = [col.internal_name for col in self.columns]
+         internal_references = ["%s.%s" % (table, col.internal_name)
+                                                     for table, col in columns]
+         if self.outer_join:
+             join_operator = 'LEFT OUTER JOIN'
+         else:
+             join_operator = 'JOIN'
          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
-                 " JOIN %s ON %s = %s;"
+                 " %s %s ON %s.%s = %s.%s;"
                  % (self.tablename,
-                    ", ".join(internal_names),
+                    ", ".join(newinternal_names),
                     self.left_table.tablename,
-                    ", ".join(internal_names),
+                    ", ".join(internal_references),
                     self.left_table.tablename,
+                    join_operator,
                     self.right_table.tablename,
-                    self.orig_to_internal[self.left_field],
+                    self.left_table.tablename,
-                    self.orig_to_internal[self.right_field]))
+                    internal_left_col,
+                    self.right_table.tablename,
+                    internal_right_col))
          self.db.execute(stmt)
+     def Dependencies(self):
+         """Return a tuple with the two tables the join depends on."""
+         return self.dependencies
+     def JoinType(self):
+         """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
+         if self.outer_join:
+             return "LEFT OUTER"
+         else:
+             return "INNER"
- class AutoTransientTable(table.OldTableInterfaceMixin):
+ class AutoTransientTable(TitledObject):
      """Table that copies data to a transient table on demand.
-Line 398 
 class AutoTransientTable(table.OldTableI
+Line 569 
 class AutoTransientTable(table.OldTableI
      """
      def __init__(self, transient_db, table):
+         TitledObject.__init__(self, table.Title())
          self.transient_db = transient_db
          self.table = table
          self.t_table = None
-Line 419 
 class AutoTransientTable(table.OldTableI
+Line 591 
 class AutoTransientTable(table.OldTableI
      def NumColumns(self):
          return self.table.NumColumns()
-     def ReadRowAsDict(self, record):
+     def RowIdToOrdinal(self, gid):
+         """Return the row ordinal given its id"""
+         if self.t_table is not None:
+             return self.t_table.RowIdToOrdinal(gid)
+         else:
+             return self.table.RowIdToOrdinal(gid)
+     def RowOrdinalToId(self, num):
+         """Return the rowid for given its ordinal"""
+         if self.t_table is not None:
+             return self.t_table.RowOrdinalToId(num)
+         else:
+             return self.table.RowOrdinalToId(num)
+     def ReadRowAsDict(self, record, row_is_ordinal = 0):
          """Return the record no. record as a dict mapping field names to values
          """
          if self.t_table is not None:
-             return self.t_table.ReadRowAsDict(record)
+             return self.t_table.ReadRowAsDict(record,
+                                               row_is_ordinal = row_is_ordinal)
          else:
-             return self.table.ReadRowAsDict(record)
+             return self.table.ReadRowAsDict(record,
+                                             row_is_ordinal = row_is_ordinal)
-     def ReadValue(self, row, col):
+     def ReadValue(self, row, col, row_is_ordinal = 0):
          """Return the value of the specified row and column
          The col parameter may be the index of the column or its name.
          """
          if self.t_table is not None:
-             return self.t_table.ReadValue(row, col)
+             return self.t_table.ReadValue(row, col,
+                                           row_is_ordinal = row_is_ordinal)
          else:
-             return self.table.ReadValue(row, col)
+             return self.table.ReadValue(row, col,
+                                         row_is_ordinal = row_is_ordinal)
      def copy_to_transient(self):
          """Internal: Create a transient table and copy the data into it"""
-Line 450 
 class AutoTransientTable(table.OldTableI
+Line 640 
 class AutoTransientTable(table.OldTableI
          return self.t_table
      def ValueRange(self, col):
-         if self.t_table is None:
+         # Performance of sqlite vs. DBF for this method:
-             self.copy_to_transient()
+         #
-         return self.t_table.ValueRange(col)
+         # If the table has been copied to the sqlite database it's
+         # faster to use it even if there is no index on that column.
+         # Otherwise it's faster to simply loop through all rows in the
+         # DBF file. Copying the data to the sqlite database can take
+         # very long for large amounts of data
+         #
+         # Of course if the table is not a DBF file the issue could be
+         # different, although copying the data into sqlite first will
+         # likely always be slower than simply querying the non-sqlite
+         # table directly. Currently only DBFfiles and memory tables are
+         # used as the underlying non-sqlite table, though.
+         if self.t_table is not None:
+             return self.t_table.ValueRange(col)
+         else:
+             return self.table.ValueRange(col)
      def UniqueValues(self, col):
-         if self.t_table is None:
+         # The performance trade-offs for this method are basically the
-             self.copy_to_transient()
+         # same as for ValueRange except that currently there doesn't
-         return self.t_table.UniqueValues(col)
+         # seem to be a way to take advantage of indexes in this case in
+         # sqlite. However, but it's still faster to query the transient
+         # table if it already exists.
+         if self.t_table is not None:
+             return self.t_table.UniqueValues(col)
+         else:
+             return self.table.UniqueValues(col)
      def SimpleQuery(self, left, comparison, right):
          if self.t_table is None:
-Line 465 
 class AutoTransientTable(table.OldTableI
+Line 675 
 class AutoTransientTable(table.OldTableI
          # Make sure to use the column object of the transient table. The
          # left argument is always a column object so we can just ask the
          # t_table for the right object.
-         return self.t_table.SimpleQuery(self.t_table.Column(left.name),
+         if hasattr(right, "name"):
-                                         comparison, right)
+             return self.t_table.SimpleQuery(self.t_table.Column(left.name),
+                                             comparison,
+                                             self.t_table.Column(right.name))
+         else:
+             return self.t_table.SimpleQuery(self.t_table.Column(left.name),
+                                             comparison, right)
+     def Dependencies(self):
+         """Return a tuple containing the original table"""
+         return (self.table,)
+     def Width(self, col):
+         return self.table.Width(col)
+     def write_record(self, row, values):
+         """Write the values to the given row.
+         This is a very experimental feature which doesn't work in all
+         cases, so you better know what you're doing when calling this
+         method.
+         """
+         self.table.write_record(row, values)

 Legend:



Removed from v.849
 


changed lines


 
Added in v.2681
 Legend:



Removed from v.849
 


changed lines


 
Added in v.2681
-Removed from v.849
+Added in v.2681

[email protected]	ViewVC Help
Powered by ViewVC 1.1.26