/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory | Revision Log | View Patch Patch

-revision 1662 by bh,
Wed Aug 27 13:51:01 2003 UTC
+revision 1923 by bh,
Fri Nov  7 12:07:01 2003 UTC
 Line 248 
 class TransientTableBase(table.OldTableI
          return self.ReadRowAsDict(row)[self.column_map[col].name]
      def ValueRange(self, col):
+         # Performance notes:
+         #
+         # In sqlite 2.8.6 the min and max aggregate functions can use an
+         # index but only when used as the only expression in the select
+         # statement (i.e. 'select min(col), max(col) from tbl;' will not
+         # use the index but 'select min(col) from tbl;' will) so we
+         # query the minimum and maximum separately.
+         #
+         # With the separate statements we can take advantage of an index
+         # if it exists. If the index doesn't exist, creating it first
+         # and then using it in the query is slower than the queries
+         # without an index. Creating the index is only an advantage if
+         # the queries are performed multiple times. With the current use
+         # patterns where ValueRange is only used occasionally by the
+         # classification generation dialog creating the index only for
+         # this usage is not really worth it, so we don't.
          col = self.column_map[col]
          iname = col.internal_name
-         min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
+         min = self.db.execute("SELECT min(%s) FROM %s;"
-                                    % (iname, iname, self.tablename))
+                               % (iname, self.tablename))[0]
+         max = self.db.execute("SELECT max(%s) FROM %s;"
+                               % (iname, self.tablename))[0]
          converter = type_converter_map[col.type]
          return (converter(min), converter(max))
      def UniqueValues(self, col):
+         # Performance notes:
+         #
+         # In sqlite 2.8.6 there doesn't seem to be a way to query the
+         # unique items that uses an index. I've tried
+         #
+         #   SELECT col FROM tbl GROUP BY col;
+         #
+         # and
+         #
+         #   SELECT DISTINCT col FROM tbl;
+         #
+         # and in both cases the index is not used. If the index isn't
+         # used it doesn't make sense to call self.ensure_index.
          iname = self.column_map[col].internal_name
          cursor = self.db.cursor()
          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
-Line 579 
 class AutoTransientTable(TitledObject, t
+Line 610 
 class AutoTransientTable(TitledObject, t
          return self.t_table
      def ValueRange(self, col):
-         if self.t_table is None:
+         # Performance of sqlite vs. DBF for this method:
-             self.copy_to_transient()
+         #
-         return self.t_table.ValueRange(col)
+         # If the table has been copied to the sqlite database it's
+         # faster to use it even if there is no index on that column.
+         # Otherwise it's faster to simply loop through all rows in the
+         # DBF file. Copying the data to the sqlite database can take
+         # very long for large amounts of data
+         #
+         # Of course if the table is not a DBF file the issue could be
+         # different, although copying the data into sqlite first will
+         # likely always be slower than simply querying the non-sqlite
+         # table directly. Currently only DBFfiles and memory tables are
+         # used as the underlying non-sqlite table, though.
+         if self.t_table is not None:
+             return self.t_table.ValueRange(col)
+         else:
+             return self.table.ValueRange(col)
      def UniqueValues(self, col):
-         if self.t_table is None:
+         # The performance trade-offs for this method are basically the
-             self.copy_to_transient()
+         # same as for ValueRange except that currently there doesn't
-         return self.t_table.UniqueValues(col)
+         # seem to be a way to take advantage of indexes in this case in
+         # sqlite. However, but it's still faster to query the transient
+         # table if it already exists.
+         if self.t_table is not None:
+             return self.t_table.UniqueValues(col)
+         else:
+             return self.table.UniqueValues(col)
      def SimpleQuery(self, left, comparison, right):
          if self.t_table is None:

 Legend:



Removed from v.1662
 


changed lines


 
Added in v.1923
 Legend:



Removed from v.1662
 


changed lines


 
Added in v.1923
-Removed from v.1662
+Added in v.1923

[email protected]	ViewVC Help
Powered by ViewVC 1.1.26