/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 849 by bh, Wed May 7 11:55:31 2003 UTC revision 1923 by bh, Fri Nov 7 12:07:01 2003 UTC
# Line 18  __version__ = "$Revision$" Line 18  __version__ = "$Revision$"
18  # $Source$  # $Source$
19  # $Id$  # $Id$
20    
 import os  
 import weakref  
21  from sqlite import connect  from sqlite import connect
22    
23    from base import TitledObject
24    
25  import table  import table
26    
27  sql_type_map = {  sql_type_map = {
# Line 160  class TransientTableBase(table.OldTableI Line 160  class TransientTableBase(table.OldTableI
160          """          """
161          return self.column_map.has_key(col)          return self.column_map.has_key(col)
162    
163      def ReadRowAsDict(self, index):      def RowIdToOrdinal(self, gid):
164            """Return the row ordinal given its id
165    
166            At the moment the transient tables are only used for tables that
167            don't distinguish between row number and row id, so the value is
168            returned unchanged.
169            """
170            return gid
171    
172        def RowOrdinalToId(self, num):
173            """Return the rowid for given its ordinal
174    
175            At the moment the transient tables are only used for tables that
176            don't distinguish between row number and row id, so the value is
177            returned unchanged.
178            """
179            return num
180    
181        def ReadRowAsDict(self, index, row_is_ordinal = 0):
182            """Return the entire row as a dictionary with column names as keys
183    
184            The row_is_ordinal is ignored because at the moment the
185            transient tables are only used for DBF files where it doesn't
186            matter.
187            """
188          # Implementation Strategy: Executing a completely new select          # Implementation Strategy: Executing a completely new select
189          # statement every time this method is called is too slow. The          # statement every time this method is called is too slow. The
190          # most important usage is to read the records more or less          # most important usage is to read the records more or less
# Line 208  class TransientTableBase(table.OldTableI Line 232  class TransientTableBase(table.OldTableI
232          self.read_record_last_row = index          self.read_record_last_row = index
233          return dict(zip(self.orig_names, result))          return dict(zip(self.orig_names, result))
234    
235      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
236          """Return the value of the specified row and column          """Return the value of the specified row and column
237    
238          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
239    
240            The row_is_ordinal is ignored because at the moment the
241            transient tables are only used for DBF files where it doesn't
242            matter.
243          """          """
244          # Depending on the actual access patterns of the table data, it          # Depending on the actual access patterns of the table data, it
245          # might be a bit faster in some circumstances to not implement          # might be a bit faster in some circumstances to not implement
# Line 220  class TransientTableBase(table.OldTableI Line 248  class TransientTableBase(table.OldTableI
248          return self.ReadRowAsDict(row)[self.column_map[col].name]          return self.ReadRowAsDict(row)[self.column_map[col].name]
249    
250      def ValueRange(self, col):      def ValueRange(self, col):
251            # Performance notes:
252            #
253            # In sqlite 2.8.6 the min and max aggregate functions can use an
254            # index but only when used as the only expression in the select
255            # statement (i.e. 'select min(col), max(col) from tbl;' will not
256            # use the index but 'select min(col) from tbl;' will) so we
257            # query the minimum and maximum separately.
258            #
259            # With the separate statements we can take advantage of an index
260            # if it exists. If the index doesn't exist, creating it first
261            # and then using it in the query is slower than the queries
262            # without an index. Creating the index is only an advantage if
263            # the queries are performed multiple times. With the current use
264            # patterns where ValueRange is only used occasionally by the
265            # classification generation dialog creating the index only for
266            # this usage is not really worth it, so we don't.
267          col = self.column_map[col]          col = self.column_map[col]
268          iname = col.internal_name          iname = col.internal_name
269          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"          min = self.db.execute("SELECT min(%s) FROM %s;"
270                                     % (iname, iname, self.tablename))                                % (iname, self.tablename))[0]
271            max = self.db.execute("SELECT max(%s) FROM %s;"
272                                  % (iname, self.tablename))[0]
273          converter = type_converter_map[col.type]          converter = type_converter_map[col.type]
274          return (converter(min), converter(max))          return (converter(min), converter(max))
275    
276      def UniqueValues(self, col):      def UniqueValues(self, col):
277            # Performance notes:
278            #
279            # In sqlite 2.8.6 there doesn't seem to be a way to query the
280            # unique items that uses an index. I've tried
281            #
282            #   SELECT col FROM tbl GROUP BY col;
283            #
284            # and
285            #
286            #   SELECT DISTINCT col FROM tbl;
287            #
288            # and in both cases the index is not used. If the index isn't
289            # used it doesn't make sense to call self.ensure_index.
290          iname = self.column_map[col].internal_name          iname = self.column_map[col].internal_name
291          cursor = self.db.cursor()          cursor = self.db.cursor()
292          cursor.execute("SELECT %s FROM %s GROUP BY %s;"          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
# Line 240  class TransientTableBase(table.OldTableI Line 299  class TransientTableBase(table.OldTableI
299              result.append(row[0])              result.append(row[0])
300          return result          return result
301    
302        def Width(self, col):
303            """Return the maximum width of values in the column
304    
305            The return value is the the maximum length of string
306            representation of the values in the column (represented by index
307            or name).
308            """
309            max = 0
310    
311            type  = self.column_map[col].type
312            iname = self.column_map[col].internal_name
313            cursor = self.db.cursor()
314            cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
315            values = [ i[0] for i in cursor.fetchall()]
316            if not values:
317                return None
318    
319            if type == table.FIELDTYPE_DOUBLE:
320                format = "%.12f"
321            elif type == table.FIELDTYPE_INT:
322                format = "%d"
323            else:
324                format = "%s"
325            for value in values:
326                if value is None: continue
327                l = len(format % value)
328                if l > max:
329                    max = l
330    
331            return max
332    
333      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
334          """Return the indices of all rows that matching a condition.          """Return the indices of all rows that matching a condition.
335    
# Line 280  class TransientTableBase(table.OldTableI Line 370  class TransientTableBase(table.OldTableI
370              result.append(row[0])              result.append(row[0])
371          return result          return result
372    
373        def Dependencies(self):
374            """Placeholder for a method in a derived class.
375    
376  class TransientTable(TransientTableBase):          Return a sequence with the tables and other data objects that
377            self depends on.
378            """
379            raise NotImplementedError
380    
381    
382    class TransientTable(TitledObject, TransientTableBase):
383    
384      """A Table in a transient DB that starts as the copy of a Thuban Table."""      """A Table in a transient DB that starts as the copy of a Thuban Table."""
385    
# Line 292  class TransientTable(TransientTableBase) Line 390  class TransientTable(TransientTableBase)
390          interface.          interface.
391          """          """
392          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
393            TitledObject.__init__(self, table.Title())
394          self.create(table)          self.create(table)
395    
396      def create(self, table):      def create(self, table):
# Line 325  class TransientTable(TransientTableBase) Line 424  class TransientTable(TransientTableBase)
424    
425    
426    
427  class TransientJoinedTable(TransientTableBase):  class TransientJoinedTable(TitledObject, TransientTableBase):
428    
429      """A Table in the transient DB that contains a join of two tables"""      """A Table in the transient DB that contains a join of two tables"""
430    
431      def __init__(self, transient_db, left_table, left_field,      def __init__(self, transient_db, left_table, left_field,
432                   right_table, right_field = None):                   right_table, right_field = None, outer_join = False):
433          """Create a new table in the transient DB as a join of two tables.          """Create a new table in the transient DB as a join of two tables.
434    
435          Both input tables, left_table and right_table must have a          Both input tables, left_table and right_table must have a
436          transient_table method that returns a table object for a table          transient_table method that returns a table object for a table
437          in the trnsient database. The join is performed on the condition          in the transient database. The join is performed on the condition
438          that the value of the left_field column the the left table is          that the value of the left_field column the the left table is
439          equal to the value of the right_field in the right_table.          equal to the value of the right_field in the right_table.
440    
441          The joined table contains all columns of the input tables with          The joined table contains all columns of the input tables,
442          one exception: Any column in the right_table with the same name          however, the column names of the right table may be changed
443          as one of the columns in the left_table will be omitted. This is          slightly to make them unique in the joined table. This is
444          somewhat of an implementation detail, but is done so that the          currently done by appending a sufficient number of underscores
445          column names of the joined table can be the same as the column          ('_').
         names of the input tables without having to create prefixes.  
446          """          """
447          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
448            self.dependencies = (left_table, right_table)
449          self.left_table = left_table.transient_table()          self.left_table = left_table.transient_table()
450          self.left_field = left_field          self.left_field = left_field
451          self.right_table = right_table.transient_table()          self.right_table = right_table.transient_table()
# Line 354  class TransientJoinedTable(TransientTabl Line 453  class TransientJoinedTable(TransientTabl
453              self.right_field = right_field              self.right_field = right_field
454          else:          else:
455              self.right_field = self.left_field              self.right_field = self.left_field
456            self.outer_join = outer_join
457    
458            title = "Join of %(left)s and %(right)s" \
459                    % {"left": self.left_table.Title(),
460                       "right": self.right_table.Title()}
461            TitledObject.__init__(self, title)
462    
463          self.create()          self.create()
464    
465      def create(self):      def create(self):
# Line 362  class TransientJoinedTable(TransientTabl Line 468  class TransientJoinedTable(TransientTabl
468    
469          self.right_table.ensure_index(self.right_field)          self.right_table.ensure_index(self.right_field)
470    
471            # determine the internal column names to join on before
472            # coalescing the column information because if the external
473            # column names are the same they will be mapped to the same
474            # internal name afterwards.
475            internal_left_col = self.left_table.orig_to_internal[self.left_field]
476            internal_right_col =self.right_table.orig_to_internal[self.right_field]
477    
478          # Coalesce the column information          # Coalesce the column information
479          visited = {}          visited = {}
480          columns = []          columns = []
481          for col in self.left_table.columns + self.right_table.columns:          newcolumns = []
482              if col.name in visited:          for table in (self.left_table, self.right_table):
483                  # We can't allow multiple columns with the same original              for col in table.Columns():
484                  # name, so omit this one. FIXME: There should be a                  colname = col.name
485                  # better solution.                  # We can't allow multiple columns with the same
486                  continue                  # original name, so append '_' to this one until
487              columns.append(col)                  # it is unique.
488          TransientTableBase.create(self, columns)                  # FIXME: There should be a better solution.
489                    while colname in visited:
490                        colname = colname + '_'
491                    columns.append((table.tablename, col))
492                    newcol = ColumnReference(colname, col.type,
493                                                "Col%03d" % (len(newcolumns)+1))
494                    newcolumns.append(newcol)
495                    visited[colname] = 1
496            TransientTableBase.create(self, newcolumns)
497    
498          # Copy the joined data to the table.          # Copy the joined data to the table.
499          internal_names = [col.internal_name for col in self.columns]          newinternal_names = [col.internal_name for col in self.columns]
500            internal_references = ["%s.%s" % (table, col.internal_name)
501                                                        for table, col in columns]
502            if self.outer_join:
503                join_operator = 'LEFT OUTER JOIN'
504            else:
505                join_operator = 'JOIN'
506          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
507                  " JOIN %s ON %s = %s;"                  " %s %s ON %s.%s = %s.%s;"
508                  % (self.tablename,                  % (self.tablename,
509                     ", ".join(internal_names),                     ", ".join(newinternal_names),
510                       self.left_table.tablename,
511                       ", ".join(internal_references),
512                     self.left_table.tablename,                     self.left_table.tablename,
513                     ", ".join(internal_names),                     join_operator,
514                       self.right_table.tablename,
515                     self.left_table.tablename,                     self.left_table.tablename,
516                       internal_left_col,
517                     self.right_table.tablename,                     self.right_table.tablename,
518                     self.orig_to_internal[self.left_field],                     internal_right_col))
                    self.orig_to_internal[self.right_field]))  
519          self.db.execute(stmt)          self.db.execute(stmt)
520    
521        def Dependencies(self):
522            """Return a tuple with the two tables the join depends on."""
523            return self.dependencies
524    
525        def JoinType(self):
526            """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
527            if self.outer_join:
528                return "LEFT OUTER"
529            else:
530                return "INNER"
531    
532    
533  class AutoTransientTable(table.OldTableInterfaceMixin):  class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
534    
535      """Table that copies data to a transient table on demand.      """Table that copies data to a transient table on demand.
536    
# Line 398  class AutoTransientTable(table.OldTableI Line 539  class AutoTransientTable(table.OldTableI
539      """      """
540    
541      def __init__(self, transient_db, table):      def __init__(self, transient_db, table):
542            TitledObject.__init__(self, table.Title())
543          self.transient_db = transient_db          self.transient_db = transient_db
544          self.table = table          self.table = table
545          self.t_table = None          self.t_table = None
# Line 419  class AutoTransientTable(table.OldTableI Line 561  class AutoTransientTable(table.OldTableI
561      def NumColumns(self):      def NumColumns(self):
562          return self.table.NumColumns()          return self.table.NumColumns()
563    
564      def ReadRowAsDict(self, record):      def RowIdToOrdinal(self, gid):
565            """Return the row ordinal given its id"""
566            if self.t_table is not None:
567                return self.t_table.RowIdToOrdinal(gid)
568            else:
569                return self.table.RowIdToOrdinal(gid)
570    
571        def RowOrdinalToId(self, num):
572            """Return the rowid for given its ordinal"""
573            if self.t_table is not None:
574                return self.t_table.RowOrdinalToId(num)
575            else:
576                return self.table.RowOrdinalToId(num)
577    
578        def ReadRowAsDict(self, record, row_is_ordinal = 0):
579          """Return the record no. record as a dict mapping field names to values          """Return the record no. record as a dict mapping field names to values
580          """          """
581          if self.t_table is not None:          if self.t_table is not None:
582              return self.t_table.ReadRowAsDict(record)              return self.t_table.ReadRowAsDict(record,
583                                                  row_is_ordinal = row_is_ordinal)
584          else:          else:
585              return self.table.ReadRowAsDict(record)              return self.table.ReadRowAsDict(record,
586                                                row_is_ordinal = row_is_ordinal)
587    
588      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
589          """Return the value of the specified row and column          """Return the value of the specified row and column
590    
591          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
592          """          """
593          if self.t_table is not None:          if self.t_table is not None:
594              return self.t_table.ReadValue(row, col)              return self.t_table.ReadValue(row, col,
595                                              row_is_ordinal = row_is_ordinal)
596          else:          else:
597              return self.table.ReadValue(row, col)              return self.table.ReadValue(row, col,
598                                            row_is_ordinal = row_is_ordinal)
599    
600      def copy_to_transient(self):      def copy_to_transient(self):
601          """Internal: Create a transient table and copy the data into it"""          """Internal: Create a transient table and copy the data into it"""
# Line 450  class AutoTransientTable(table.OldTableI Line 610  class AutoTransientTable(table.OldTableI
610          return self.t_table          return self.t_table
611    
612      def ValueRange(self, col):      def ValueRange(self, col):
613          if self.t_table is None:          # Performance of sqlite vs. DBF for this method:
614              self.copy_to_transient()          #
615          return self.t_table.ValueRange(col)          # If the table has been copied to the sqlite database it's
616            # faster to use it even if there is no index on that column.
617            # Otherwise it's faster to simply loop through all rows in the
618            # DBF file. Copying the data to the sqlite database can take
619            # very long for large amounts of data
620            #
621            # Of course if the table is not a DBF file the issue could be
622            # different, although copying the data into sqlite first will
623            # likely always be slower than simply querying the non-sqlite
624            # table directly. Currently only DBFfiles and memory tables are
625            # used as the underlying non-sqlite table, though.
626            if self.t_table is not None:
627                return self.t_table.ValueRange(col)
628            else:
629                return self.table.ValueRange(col)
630    
631      def UniqueValues(self, col):      def UniqueValues(self, col):
632          if self.t_table is None:          # The performance trade-offs for this method are basically the
633              self.copy_to_transient()          # same as for ValueRange except that currently there doesn't
634          return self.t_table.UniqueValues(col)          # seem to be a way to take advantage of indexes in this case in
635            # sqlite. However, but it's still faster to query the transient
636            # table if it already exists.
637            if self.t_table is not None:
638                return self.t_table.UniqueValues(col)
639            else:
640                return self.table.UniqueValues(col)
641    
642      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
643          if self.t_table is None:          if self.t_table is None:
# Line 465  class AutoTransientTable(table.OldTableI Line 645  class AutoTransientTable(table.OldTableI
645          # Make sure to use the column object of the transient table. The          # Make sure to use the column object of the transient table. The
646          # left argument is always a column object so we can just ask the          # left argument is always a column object so we can just ask the
647          # t_table for the right object.          # t_table for the right object.
648          return self.t_table.SimpleQuery(self.t_table.Column(left.name),          if hasattr(right, "name"):
649                                          comparison, right)              return self.t_table.SimpleQuery(self.t_table.Column(left.name),
650                                                comparison,
651                                                self.t_table.Column(right.name))
652            else:
653                return self.t_table.SimpleQuery(self.t_table.Column(left.name),
654                                                comparison, right)
655    
656        def Dependencies(self):
657            """Return a tuple containing the original table"""
658            return (self.table,)
659    
660        def Width(self, col):
661            return self.table.Width(col)

Legend:
Removed from v.849  
changed lines
  Added in v.1923

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26