/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1026 by frank, Mon May 26 11:46:42 2003 UTC revision 1968 by bh, Fri Nov 21 14:33:27 2003 UTC
# Line 18  __version__ = "$Revision$" Line 18  __version__ = "$Revision$"
18  # $Source$  # $Source$
19  # $Id$  # $Id$
20    
 import os  
 import weakref  
21  from sqlite import connect  from sqlite import connect
22    
23  from base import TitledObject  from base import TitledObject
# Line 87  class ColumnReference: Line 85  class ColumnReference:
85          self.internal_name = internal_name          self.internal_name = internal_name
86    
87    
88  class TransientTableBase(table.OldTableInterfaceMixin):  class TransientTableBase:
89    
90      """Base class for tables in the transient database"""      """Base class for tables in the transient database"""
91    
# Line 162  class TransientTableBase(table.OldTableI Line 160  class TransientTableBase(table.OldTableI
160          """          """
161          return self.column_map.has_key(col)          return self.column_map.has_key(col)
162    
163      def ReadRowAsDict(self, index):      def RowIdToOrdinal(self, gid):
164            """Return the row ordinal given its id
165    
166            At the moment the transient tables are only used for tables that
167            don't distinguish between row number and row id, so the value is
168            returned unchanged.
169            """
170            return gid
171    
172        def RowOrdinalToId(self, num):
173            """Return the rowid for given its ordinal
174    
175            At the moment the transient tables are only used for tables that
176            don't distinguish between row number and row id, so the value is
177            returned unchanged.
178            """
179            return num
180    
181        def ReadRowAsDict(self, index, row_is_ordinal = 0):
182            """Return the entire row as a dictionary with column names as keys
183    
184            The row_is_ordinal is ignored because at the moment the
185            transient tables are only used for DBF files where it doesn't
186            matter.
187            """
188          # Implementation Strategy: Executing a completely new select          # Implementation Strategy: Executing a completely new select
189          # statement every time this method is called is too slow. The          # statement every time this method is called is too slow. The
190          # most important usage is to read the records more or less          # most important usage is to read the records more or less
# Line 210  class TransientTableBase(table.OldTableI Line 232  class TransientTableBase(table.OldTableI
232          self.read_record_last_row = index          self.read_record_last_row = index
233          return dict(zip(self.orig_names, result))          return dict(zip(self.orig_names, result))
234    
235      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
236          """Return the value of the specified row and column          """Return the value of the specified row and column
237    
238          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
239    
240            The row_is_ordinal is ignored because at the moment the
241            transient tables are only used for DBF files where it doesn't
242            matter.
243          """          """
244          # Depending on the actual access patterns of the table data, it          # Depending on the actual access patterns of the table data, it
245          # might be a bit faster in some circumstances to not implement          # might be a bit faster in some circumstances to not implement
# Line 222  class TransientTableBase(table.OldTableI Line 248  class TransientTableBase(table.OldTableI
248          return self.ReadRowAsDict(row)[self.column_map[col].name]          return self.ReadRowAsDict(row)[self.column_map[col].name]
249    
250      def ValueRange(self, col):      def ValueRange(self, col):
251            # Performance notes:
252            #
253            # In sqlite 2.8.6 the min and max aggregate functions can use an
254            # index but only when used as the only expression in the select
255            # statement (i.e. 'select min(col), max(col) from tbl;' will not
256            # use the index but 'select min(col) from tbl;' will) so we
257            # query the minimum and maximum separately.
258            #
259            # With the separate statements we can take advantage of an index
260            # if it exists. If the index doesn't exist, creating it first
261            # and then using it in the query is slower than the queries
262            # without an index. Creating the index is only an advantage if
263            # the queries are performed multiple times. With the current use
264            # patterns where ValueRange is only used occasionally by the
265            # classification generation dialog creating the index only for
266            # this usage is not really worth it, so we don't.
267          col = self.column_map[col]          col = self.column_map[col]
268          iname = col.internal_name          iname = col.internal_name
269          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"          min = self.db.execute("SELECT min(%s) FROM %s;"
270                                     % (iname, iname, self.tablename))                                % (iname, self.tablename))[0]
271            max = self.db.execute("SELECT max(%s) FROM %s;"
272                                  % (iname, self.tablename))[0]
273          converter = type_converter_map[col.type]          converter = type_converter_map[col.type]
274          return (converter(min), converter(max))          return (converter(min), converter(max))
275    
276      def UniqueValues(self, col):      def UniqueValues(self, col):
277            # Performance notes:
278            #
279            # In sqlite 2.8.6 there doesn't seem to be a way to query the
280            # unique items that uses an index. I've tried
281            #
282            #   SELECT col FROM tbl GROUP BY col;
283            #
284            # and
285            #
286            #   SELECT DISTINCT col FROM tbl;
287            #
288            # and in both cases the index is not used. If the index isn't
289            # used it doesn't make sense to call self.ensure_index.
290          iname = self.column_map[col].internal_name          iname = self.column_map[col].internal_name
291          cursor = self.db.cursor()          cursor = self.db.cursor()
292          cursor.execute("SELECT %s FROM %s GROUP BY %s;"          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
# Line 245  class TransientTableBase(table.OldTableI Line 302  class TransientTableBase(table.OldTableI
302      def Width(self, col):      def Width(self, col):
303          """Return the maximum width of values in the column          """Return the maximum width of values in the column
304    
305          The return value is the the maximum length of string representation          The return value is the the maximum length of string
306          of the values in the column (represented by index or name)."""          representation of the values in the column (represented by index
307            or name).
308            """
309          max = 0          max = 0
310            
311          type  = self.column_map[col].type          type  = self.column_map[col].type
312          iname = self.column_map[col].internal_name          iname = self.column_map[col].internal_name
313          cursor = self.db.cursor()          cursor = self.db.cursor()
# Line 257  class TransientTableBase(table.OldTableI Line 316  class TransientTableBase(table.OldTableI
316          if not values:          if not values:
317              return None              return None
318    
319          if type == sql_type_map[table.FIELDTYPE_DOUBLE]:          if type == table.FIELDTYPE_DOUBLE:
320              prec = self.Precision(col)              format = "%.12f"
321              format = "%%.%df" % prec          elif type == table.FIELDTYPE_INT:
         elif type == sql_type_map[table.FIELDTYPE_INT]:  
322              format = "%d"              format = "%d"
323          else:          else:
324              format = "%s"              format = "%s"
# Line 272  class TransientTableBase(table.OldTableI Line 330  class TransientTableBase(table.OldTableI
330    
331          return max          return max
332    
     def Precision(self, col):  
         """Return the precision of the column  
   
         The return value is the maximum number of numeric characters after the  
         decimal if column type is double. Else precision zero is returned.  
         The column can be represented by index or name.  
         """  
       
         type  = self.column_map[col].type  
         if type == sql_type_map[table.FIELDTYPE_DOUBLE]:  
             iname = self.column_map[col].internal_name  
             cursor = self.db.cursor()  
             cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))  
             values = [ i[0] for i in cursor.fetchall()]  
             if not values:  
                 return 0  
               
             max = 0  
             for value in values:  
                 if value is None: continue  
                 l = len(str(value % 1))  
                 if l > max:  
                     max = l  
             if max > 2:  
                 return max - 2  
             else:  
                 return 0  
         else:  
             return 0  
   
333      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
334          """Return the indices of all rows that matching a condition.          """Return the indices of all rows that matching a condition.
335    
# Line 410  class TransientJoinedTable(TitledObject, Line 438  class TransientJoinedTable(TitledObject,
438          that the value of the left_field column the the left table is          that the value of the left_field column the the left table is
439          equal to the value of the right_field in the right_table.          equal to the value of the right_field in the right_table.
440    
441          The joined table contains all columns of the input tables with          The joined table contains all columns of the input tables,
442          one exception: Any column in the right_table with the same name          however, the column names of the right table may be changed
443          as one of the columns in the left_table will be omitted. This is          slightly to make them unique in the joined table. This is
444          somewhat of an implementation detail, but is done so that the          currently done by appending a sufficient number of underscores
445          column names of the joined table can be the same as the column          ('_').
         names of the input tables without having to create prefixes.  
446          """          """
447          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
448          self.dependencies = (left_table, right_table)          self.dependencies = (left_table, right_table)
# Line 441  class TransientJoinedTable(TitledObject, Line 468  class TransientJoinedTable(TitledObject,
468    
469          self.right_table.ensure_index(self.right_field)          self.right_table.ensure_index(self.right_field)
470    
471            # determine the internal column names to join on before
472            # coalescing the column information because if the external
473            # column names are the same they will be mapped to the same
474            # internal name afterwards.
475            internal_left_col = self.left_table.orig_to_internal[self.left_field]
476            internal_right_col =self.right_table.orig_to_internal[self.right_field]
477    
478          # Coalesce the column information          # Coalesce the column information
479          visited = {}          visited = {}
480          columns = []          columns = []
481          for col in self.left_table.columns + self.right_table.columns:          newcolumns = []
482              if col.name in visited:          for table in (self.left_table, self.right_table):
483                  # We can't allow multiple columns with the same original              for col in table.Columns():
484                  # name, so omit this one. FIXME: There should be a                  colname = col.name
485                  # better solution.                  # We can't allow multiple columns with the same
486                  continue                  # original name, so append '_' to this one until
487              columns.append(col)                  # it is unique.
488          TransientTableBase.create(self, columns)                  # FIXME: There should be a better solution.
489                    while colname in visited:
490                        colname = colname + '_'
491                    columns.append((table.tablename, col))
492                    newcol = ColumnReference(colname, col.type,
493                                                "Col%03d" % (len(newcolumns)+1))
494                    newcolumns.append(newcol)
495                    visited[colname] = 1
496            TransientTableBase.create(self, newcolumns)
497    
498          # Copy the joined data to the table.          # Copy the joined data to the table.
499          internal_names = [col.internal_name for col in self.columns]          newinternal_names = [col.internal_name for col in self.columns]
500            internal_references = ["%s.%s" % (table, col.internal_name)
501                                                        for table, col in columns]
502          if self.outer_join:          if self.outer_join:
503              join_operator = 'LEFT OUTER JOIN'              join_operator = 'LEFT OUTER JOIN'
504          else:          else:
505              join_operator = 'JOIN'              join_operator = 'JOIN'
506          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
507                  " %s %s ON %s = %s;"                  " %s %s ON %s.%s = %s.%s;"
508                  % (self.tablename,                  % (self.tablename,
509                     ", ".join(internal_names),                     ", ".join(newinternal_names),
510                     self.left_table.tablename,                     self.left_table.tablename,
511                     ", ".join(internal_names),                     ", ".join(internal_references),
512                     self.left_table.tablename,                     self.left_table.tablename,
513                     join_operator,                     join_operator,
514                     self.right_table.tablename,                     self.right_table.tablename,
515                     self.orig_to_internal[self.left_field],                     self.left_table.tablename,
516                     self.orig_to_internal[self.right_field]))                     internal_left_col,
517                       self.right_table.tablename,
518                       internal_right_col))
519          self.db.execute(stmt)          self.db.execute(stmt)
520    
521      def Dependencies(self):      def Dependencies(self):
522          """Return a tuple with the two tables the join depends on."""          """Return a tuple with the two tables the join depends on."""
523          return self.dependencies          return self.dependencies
524    
525        def JoinType(self):
526            """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
527            if self.outer_join:
528                return "LEFT OUTER"
529            else:
530                return "INNER"
531    
532    
533  class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):  class AutoTransientTable(TitledObject):
534    
535      """Table that copies data to a transient table on demand.      """Table that copies data to a transient table on demand.
536    
# Line 508  class AutoTransientTable(TitledObject, t Line 561  class AutoTransientTable(TitledObject, t
561      def NumColumns(self):      def NumColumns(self):
562          return self.table.NumColumns()          return self.table.NumColumns()
563    
564      def ReadRowAsDict(self, record):      def RowIdToOrdinal(self, gid):
565            """Return the row ordinal given its id"""
566            if self.t_table is not None:
567                return self.t_table.RowIdToOrdinal(gid)
568            else:
569                return self.table.RowIdToOrdinal(gid)
570    
571        def RowOrdinalToId(self, num):
572            """Return the rowid for given its ordinal"""
573            if self.t_table is not None:
574                return self.t_table.RowOrdinalToId(num)
575            else:
576                return self.table.RowOrdinalToId(num)
577    
578        def ReadRowAsDict(self, record, row_is_ordinal = 0):
579          """Return the record no. record as a dict mapping field names to values          """Return the record no. record as a dict mapping field names to values
580          """          """
581          if self.t_table is not None:          if self.t_table is not None:
582              return self.t_table.ReadRowAsDict(record)              return self.t_table.ReadRowAsDict(record,
583                                                  row_is_ordinal = row_is_ordinal)
584          else:          else:
585              return self.table.ReadRowAsDict(record)              return self.table.ReadRowAsDict(record,
586                                                row_is_ordinal = row_is_ordinal)
587    
588      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
589          """Return the value of the specified row and column          """Return the value of the specified row and column
590    
591          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
592          """          """
593          if self.t_table is not None:          if self.t_table is not None:
594              return self.t_table.ReadValue(row, col)              return self.t_table.ReadValue(row, col,
595                                              row_is_ordinal = row_is_ordinal)
596          else:          else:
597              return self.table.ReadValue(row, col)              return self.table.ReadValue(row, col,
598                                            row_is_ordinal = row_is_ordinal)
599    
600      def copy_to_transient(self):      def copy_to_transient(self):
601          """Internal: Create a transient table and copy the data into it"""          """Internal: Create a transient table and copy the data into it"""
# Line 539  class AutoTransientTable(TitledObject, t Line 610  class AutoTransientTable(TitledObject, t
610          return self.t_table          return self.t_table
611    
612      def ValueRange(self, col):      def ValueRange(self, col):
613          if self.t_table is None:          # Performance of sqlite vs. DBF for this method:
614              self.copy_to_transient()          #
615          return self.t_table.ValueRange(col)          # If the table has been copied to the sqlite database it's
616            # faster to use it even if there is no index on that column.
617            # Otherwise it's faster to simply loop through all rows in the
618            # DBF file. Copying the data to the sqlite database can take
619            # very long for large amounts of data
620            #
621            # Of course if the table is not a DBF file the issue could be
622            # different, although copying the data into sqlite first will
623            # likely always be slower than simply querying the non-sqlite
624            # table directly. Currently only DBFfiles and memory tables are
625            # used as the underlying non-sqlite table, though.
626            if self.t_table is not None:
627                return self.t_table.ValueRange(col)
628            else:
629                return self.table.ValueRange(col)
630    
631      def UniqueValues(self, col):      def UniqueValues(self, col):
632          if self.t_table is None:          # The performance trade-offs for this method are basically the
633              self.copy_to_transient()          # same as for ValueRange except that currently there doesn't
634          return self.t_table.UniqueValues(col)          # seem to be a way to take advantage of indexes in this case in
635            # sqlite. However, but it's still faster to query the transient
636            # table if it already exists.
637            if self.t_table is not None:
638                return self.t_table.UniqueValues(col)
639            else:
640                return self.table.UniqueValues(col)
641    
642      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
643          if self.t_table is None:          if self.t_table is None:
# Line 566  class AutoTransientTable(TitledObject, t Line 657  class AutoTransientTable(TitledObject, t
657          """Return a tuple containing the original table"""          """Return a tuple containing the original table"""
658          return (self.table,)          return (self.table,)
659    
660        def Width(self, col):
661            return self.table.Width(col)
662    
663        def write_record(self, row, values):
664            """Write the values to the given row.
665    
666            This is a very experimental feature which doesn't work in all
667            cases, so you better know what you're doing when calling this
668            method.
669            """
670            self.table.write_record(row, values)

Legend:
Removed from v.1026  
changed lines
  Added in v.1968

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26