/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/thuban/Thuban/Model/transientdb.py revision 1026 by frank, Mon May 26 11:46:42 2003 UTC branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py revision 2734 by bramz, Thu Mar 1 12:42:59 2007 UTC
# Line 18  __version__ = "$Revision$" Line 18  __version__ = "$Revision$"
18  # $Source$  # $Source$
19  # $Id$  # $Id$
20    
21  import os  # Pysqlite version 1. and 2. behaves quiet differently
22  import weakref  # Pysqlite uses a different paramstyle.  The older version
23  from sqlite import connect  # support format and pyformat while pysqlite2 supports only qmark
24    # and named.
25    # The sqlite2 boolean variable is used to manage specific part of the code
26    try:
27          # Using SQLITE 2.x
28        sqlite2 = True
29        from pysqlite2 import dbapi2 as sqlite
30    except ImportError:
31          # Using SQLITE 1.x
32        sqlite2 = False
33        import sqlite
34        
35    
36  from base import TitledObject  from base import TitledObject
37    
# Line 42  class TransientDatabase: Line 53  class TransientDatabase:
53    
54      def __init__(self, filename):      def __init__(self, filename):
55          self.filename = filename          self.filename = filename
56          self.conn = connect(filename)          self.conn = sqlite.connect(filename)
57          # Counters to produce unique table and column names          # Counters to produce unique table and column names
58          self.num_tables = 0          self.num_tables = 0
59          self.num_cols = 0          self.num_cols = 0
# Line 87  class ColumnReference: Line 98  class ColumnReference:
98          self.internal_name = internal_name          self.internal_name = internal_name
99    
100    
101  class TransientTableBase(table.OldTableInterfaceMixin):  class TransientTableBase:
102    
103      """Base class for tables in the transient database"""      """Base class for tables in the transient database"""
104    
# Line 162  class TransientTableBase(table.OldTableI Line 173  class TransientTableBase(table.OldTableI
173          """          """
174          return self.column_map.has_key(col)          return self.column_map.has_key(col)
175    
176      def ReadRowAsDict(self, index):      def RowIdToOrdinal(self, gid):
177            """Return the row ordinal given its id
178    
179            At the moment the transient tables are only used for tables that
180            don't distinguish between row number and row id, so the value is
181            returned unchanged.
182            """
183            return gid
184    
185        def RowOrdinalToId(self, num):
186            """Return the rowid for given its ordinal
187    
188            At the moment the transient tables are only used for tables that
189            don't distinguish between row number and row id, so the value is
190            returned unchanged.
191            """
192            return num
193    
194        def ReadRowAsDict(self, index, row_is_ordinal = 0):
195            """Return the entire row as a dictionary with column names as keys
196    
197            The row_is_ordinal is ignored because at the moment the
198            transient tables are only used for DBF files where it doesn't
199            matter.
200            """
201          # Implementation Strategy: Executing a completely new select          # Implementation Strategy: Executing a completely new select
202          # statement every time this method is called is too slow. The          # statement every time this method is called is too slow. The
203          # most important usage is to read the records more or less          # most important usage is to read the records more or less
# Line 210  class TransientTableBase(table.OldTableI Line 245  class TransientTableBase(table.OldTableI
245          self.read_record_last_row = index          self.read_record_last_row = index
246          return dict(zip(self.orig_names, result))          return dict(zip(self.orig_names, result))
247    
248      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
249          """Return the value of the specified row and column          """Return the value of the specified row and column
250    
251          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
252    
253            The row_is_ordinal is ignored because at the moment the
254            transient tables are only used for DBF files where it doesn't
255            matter.
256          """          """
257          # Depending on the actual access patterns of the table data, it          # Depending on the actual access patterns of the table data, it
258          # might be a bit faster in some circumstances to not implement          # might be a bit faster in some circumstances to not implement
# Line 222  class TransientTableBase(table.OldTableI Line 261  class TransientTableBase(table.OldTableI
261          return self.ReadRowAsDict(row)[self.column_map[col].name]          return self.ReadRowAsDict(row)[self.column_map[col].name]
262    
263      def ValueRange(self, col):      def ValueRange(self, col):
264            # Performance notes:
265            #
266            # In sqlite 2.8.6 the min and max aggregate functions can use an
267            # index but only when used as the only expression in the select
268            # statement (i.e. 'select min(col), max(col) from tbl;' will not
269            # use the index but 'select min(col) from tbl;' will) so we
270            # query the minimum and maximum separately.
271            #
272            # With the separate statements we can take advantage of an index
273            # if it exists. If the index doesn't exist, creating it first
274            # and then using it in the query is slower than the queries
275            # without an index. Creating the index is only an advantage if
276            # the queries are performed multiple times. With the current use
277            # patterns where ValueRange is only used occasionally by the
278            # classification generation dialog creating the index only for
279            # this usage is not really worth it, so we don't.
280          col = self.column_map[col]          col = self.column_map[col]
281          iname = col.internal_name          iname = col.internal_name
282          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"          min = self.db.execute("SELECT min(%s) FROM %s;"
283                                     % (iname, iname, self.tablename))                                % (iname, self.tablename))[0]
284            max = self.db.execute("SELECT max(%s) FROM %s;"
285                                  % (iname, self.tablename))[0]
286          converter = type_converter_map[col.type]          converter = type_converter_map[col.type]
287          return (converter(min), converter(max))          return (converter(min), converter(max))
288    
289      def UniqueValues(self, col):      def UniqueValues(self, col):
290            # Performance notes:
291            #
292            # In sqlite 2.8.6 there doesn't seem to be a way to query the
293            # unique items that uses an index. I've tried
294            #
295            #   SELECT col FROM tbl GROUP BY col;
296            #
297            # and
298            #
299            #   SELECT DISTINCT col FROM tbl;
300            #
301            # and in both cases the index is not used. If the index isn't
302            # used it doesn't make sense to call self.ensure_index.
303          iname = self.column_map[col].internal_name          iname = self.column_map[col].internal_name
304          cursor = self.db.cursor()          cursor = self.db.cursor()
305          cursor.execute("SELECT %s FROM %s GROUP BY %s;"          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
# Line 245  class TransientTableBase(table.OldTableI Line 315  class TransientTableBase(table.OldTableI
315      def Width(self, col):      def Width(self, col):
316          """Return the maximum width of values in the column          """Return the maximum width of values in the column
317    
318          The return value is the the maximum length of string representation          The return value is the the maximum length of string
319          of the values in the column (represented by index or name)."""          representation of the values in the column (represented by index
320            or name).
321            """
322          max = 0          max = 0
323            
324          type  = self.column_map[col].type          type  = self.column_map[col].type
325          iname = self.column_map[col].internal_name          iname = self.column_map[col].internal_name
326          cursor = self.db.cursor()          cursor = self.db.cursor()
# Line 257  class TransientTableBase(table.OldTableI Line 329  class TransientTableBase(table.OldTableI
329          if not values:          if not values:
330              return None              return None
331    
332          if type == sql_type_map[table.FIELDTYPE_DOUBLE]:          if type == table.FIELDTYPE_DOUBLE:
333              prec = self.Precision(col)              format = "%.12f"
334              format = "%%.%df" % prec          elif type == table.FIELDTYPE_INT:
         elif type == sql_type_map[table.FIELDTYPE_INT]:  
335              format = "%d"              format = "%d"
336          else:          else:
337              format = "%s"              format = "%s"
# Line 272  class TransientTableBase(table.OldTableI Line 343  class TransientTableBase(table.OldTableI
343    
344          return max          return max
345    
     def Precision(self, col):  
         """Return the precision of the column  
   
         The return value is the maximum number of numeric characters after the  
         decimal if column type is double. Else precision zero is returned.  
         The column can be represented by index or name.  
         """  
       
         type  = self.column_map[col].type  
         if type == sql_type_map[table.FIELDTYPE_DOUBLE]:  
             iname = self.column_map[col].internal_name  
             cursor = self.db.cursor()  
             cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))  
             values = [ i[0] for i in cursor.fetchall()]  
             if not values:  
                 return 0  
               
             max = 0  
             for value in values:  
                 if value is None: continue  
                 l = len(str(value % 1))  
                 if l > max:  
                     max = l  
             if max > 2:  
                 return max - 2  
             else:  
                 return 0  
         else:  
             return 0  
   
346      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
347          """Return the indices of all rows that matching a condition.          """Return the indices of all rows that matching a condition.
348    
# Line 325  class TransientTableBase(table.OldTableI Line 366  class TransientTableBase(table.OldTableI
366              right_template = right.internal_name              right_template = right.internal_name
367              params = ()              params = ()
368          else:          else:
369              right_template = "%s"              if sqlite2:
370                    right_template = "?"
371                else: right_template = "%s"
372              params = (right,)              params = (right,)
373    
374          query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \          query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
# Line 380  class TransientTable(TitledObject, Trans Line 423  class TransientTable(TitledObject, Trans
423          # longer than any of the column names          # longer than any of the column names
424          id_key = max([len(col.name) for col in self.columns]) * "x"          id_key = max([len(col.name) for col in self.columns]) * "x"
425    
426          insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \          if sqlite2:
427                insert_template = "INSERT INTO %s (id, %s) VALUES (%s, %s);" \
428                                   % (self.tablename,
429                                      ", ".join([col.internal_name
430                                                 for col in self.columns]),
431                                      '?',
432                                      ", ".join(["?" for col in self.columns]))
433    
434            else:
435                insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
436                                 % (self.tablename,                                 % (self.tablename,
437                                    ", ".join([col.internal_name                                    ", ".join([col.internal_name
438                                               for col in self.columns]),                                               for col in self.columns]),
# Line 391  class TransientTable(TitledObject, Trans Line 443  class TransientTable(TitledObject, Trans
443          for i in range(table.NumRows()):          for i in range(table.NumRows()):
444              row = table.ReadRowAsDict(i)              row = table.ReadRowAsDict(i)
445              row[id_key] = i              row[id_key] = i
446              cursor.execute(insert_template, row)              if sqlite2:
447                    params = [i]
448                    for col in self.columns:
449                      params.append(row[col.name])
450                    cursor.execute(insert_template, params)
451                else:  
452                    cursor.execute(insert_template, row)
453          self.db.conn.commit()          self.db.conn.commit()
454    
455    
# Line 410  class TransientJoinedTable(TitledObject, Line 468  class TransientJoinedTable(TitledObject,
468          that the value of the left_field column the the left table is          that the value of the left_field column the the left table is
469          equal to the value of the right_field in the right_table.          equal to the value of the right_field in the right_table.
470    
471          The joined table contains all columns of the input tables with          The joined table contains all columns of the input tables,
472          one exception: Any column in the right_table with the same name          however, the column names of the right table may be changed
473          as one of the columns in the left_table will be omitted. This is          slightly to make them unique in the joined table. This is
474          somewhat of an implementation detail, but is done so that the          currently done by appending a sufficient number of underscores
475          column names of the joined table can be the same as the column          ('_').
         names of the input tables without having to create prefixes.  
476          """          """
477          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
478          self.dependencies = (left_table, right_table)          self.dependencies = (left_table, right_table)
# Line 441  class TransientJoinedTable(TitledObject, Line 498  class TransientJoinedTable(TitledObject,
498    
499          self.right_table.ensure_index(self.right_field)          self.right_table.ensure_index(self.right_field)
500    
501            # determine the internal column names to join on before
502            # coalescing the column information because if the external
503            # column names are the same they will be mapped to the same
504            # internal name afterwards.
505            internal_left_col = self.left_table.orig_to_internal[self.left_field]
506            internal_right_col =self.right_table.orig_to_internal[self.right_field]
507    
508          # Coalesce the column information          # Coalesce the column information
509          visited = {}          visited = {}
510          columns = []          columns = []
511          for col in self.left_table.columns + self.right_table.columns:          newcolumns = []
512              if col.name in visited:          for table in (self.left_table, self.right_table):
513                  # We can't allow multiple columns with the same original              for col in table.Columns():
514                  # name, so omit this one. FIXME: There should be a                  colname = col.name
515                  # better solution.                  # We can't allow multiple columns with the same
516                  continue                  # original name, so append '_' to this one until
517              columns.append(col)                  # it is unique.
518          TransientTableBase.create(self, columns)                  # FIXME: There should be a better solution.
519                    while colname in visited:
520                        colname = colname + '_'
521                    columns.append((table.tablename, col))
522                    newcol = ColumnReference(colname, col.type,
523                                                "Col%03d" % (len(newcolumns)+1))
524                    newcolumns.append(newcol)
525                    visited[colname] = 1
526            TransientTableBase.create(self, newcolumns)
527    
528          # Copy the joined data to the table.          # Copy the joined data to the table.
529          internal_names = [col.internal_name for col in self.columns]          newinternal_names = [col.internal_name for col in self.columns]
530            internal_references = ["%s.%s" % (table, col.internal_name)
531                                                        for table, col in columns]
532          if self.outer_join:          if self.outer_join:
533              join_operator = 'LEFT OUTER JOIN'              join_operator = 'LEFT OUTER JOIN'
534          else:          else:
535              join_operator = 'JOIN'              join_operator = 'JOIN'
536          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
537                  " %s %s ON %s = %s;"                  " %s %s ON %s.%s = %s.%s;"
538                  % (self.tablename,                  % (self.tablename,
539                     ", ".join(internal_names),                     ", ".join(newinternal_names),
540                     self.left_table.tablename,                     self.left_table.tablename,
541                     ", ".join(internal_names),                     ", ".join(internal_references),
542                     self.left_table.tablename,                     self.left_table.tablename,
543                     join_operator,                     join_operator,
544                     self.right_table.tablename,                     self.right_table.tablename,
545                     self.orig_to_internal[self.left_field],                     self.left_table.tablename,
546                     self.orig_to_internal[self.right_field]))                     internal_left_col,
547                       self.right_table.tablename,
548                       internal_right_col))
549          self.db.execute(stmt)          self.db.execute(stmt)
550    
551      def Dependencies(self):      def Dependencies(self):
552          """Return a tuple with the two tables the join depends on."""          """Return a tuple with the two tables the join depends on."""
553          return self.dependencies          return self.dependencies
554    
555        def JoinType(self):
556            """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
557            if self.outer_join:
558                return "LEFT OUTER"
559            else:
560                return "INNER"
561    
562    
563  class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):  class AutoTransientTable(TitledObject):
564    
565      """Table that copies data to a transient table on demand.      """Table that copies data to a transient table on demand.
566    
# Line 508  class AutoTransientTable(TitledObject, t Line 591  class AutoTransientTable(TitledObject, t
591      def NumColumns(self):      def NumColumns(self):
592          return self.table.NumColumns()          return self.table.NumColumns()
593    
594      def ReadRowAsDict(self, record):      def RowIdToOrdinal(self, gid):
595            """Return the row ordinal given its id"""
596            if self.t_table is not None:
597                return self.t_table.RowIdToOrdinal(gid)
598            else:
599                return self.table.RowIdToOrdinal(gid)
600    
601        def RowOrdinalToId(self, num):
602            """Return the rowid for given its ordinal"""
603            if self.t_table is not None:
604                return self.t_table.RowOrdinalToId(num)
605            else:
606                return self.table.RowOrdinalToId(num)
607    
608        def ReadRowAsDict(self, record, row_is_ordinal = 0):
609          """Return the record no. record as a dict mapping field names to values          """Return the record no. record as a dict mapping field names to values
610          """          """
611          if self.t_table is not None:          if self.t_table is not None:
612              return self.t_table.ReadRowAsDict(record)              return self.t_table.ReadRowAsDict(record,
613                                                  row_is_ordinal = row_is_ordinal)
614          else:          else:
615              return self.table.ReadRowAsDict(record)              return self.table.ReadRowAsDict(record,
616                                                row_is_ordinal = row_is_ordinal)
617    
618      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
619          """Return the value of the specified row and column          """Return the value of the specified row and column
620    
621          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
622          """          """
623          if self.t_table is not None:          if self.t_table is not None:
624              return self.t_table.ReadValue(row, col)              return self.t_table.ReadValue(row, col,
625                                              row_is_ordinal = row_is_ordinal)
626          else:          else:
627              return self.table.ReadValue(row, col)              return self.table.ReadValue(row, col,
628                                            row_is_ordinal = row_is_ordinal)
629    
630      def copy_to_transient(self):      def copy_to_transient(self):
631          """Internal: Create a transient table and copy the data into it"""          """Internal: Create a transient table and copy the data into it"""
# Line 539  class AutoTransientTable(TitledObject, t Line 640  class AutoTransientTable(TitledObject, t
640          return self.t_table          return self.t_table
641    
642      def ValueRange(self, col):      def ValueRange(self, col):
643          if self.t_table is None:          # Performance of sqlite vs. DBF for this method:
644              self.copy_to_transient()          #
645          return self.t_table.ValueRange(col)          # If the table has been copied to the sqlite database it's
646            # faster to use it even if there is no index on that column.
647            # Otherwise it's faster to simply loop through all rows in the
648            # DBF file. Copying the data to the sqlite database can take
649            # very long for large amounts of data
650            #
651            # Of course if the table is not a DBF file the issue could be
652            # different, although copying the data into sqlite first will
653            # likely always be slower than simply querying the non-sqlite
654            # table directly. Currently only DBFfiles and memory tables are
655            # used as the underlying non-sqlite table, though.
656            if self.t_table is not None:
657                return self.t_table.ValueRange(col)
658            else:
659                return self.table.ValueRange(col)
660    
661      def UniqueValues(self, col):      def UniqueValues(self, col):
662          if self.t_table is None:          # The performance trade-offs for this method are basically the
663              self.copy_to_transient()          # same as for ValueRange except that currently there doesn't
664          return self.t_table.UniqueValues(col)          # seem to be a way to take advantage of indexes in this case in
665            # sqlite. However, but it's still faster to query the transient
666            # table if it already exists.
667            if self.t_table is not None:
668                return self.t_table.UniqueValues(col)
669            else:
670                return self.table.UniqueValues(col)
671    
672      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
673          if self.t_table is None:          if self.t_table is None:
# Line 566  class AutoTransientTable(TitledObject, t Line 687  class AutoTransientTable(TitledObject, t
687          """Return a tuple containing the original table"""          """Return a tuple containing the original table"""
688          return (self.table,)          return (self.table,)
689    
690        def Width(self, col):
691            return self.table.Width(col)
692    
693        def write_record(self, row, values):
694            """Write the values to the given row.
695    
696            This is a very experimental feature which doesn't work in all
697            cases, so you better know what you're doing when calling this
698            method.
699            """
700            self.table.write_record(row, values)

Legend:
Removed from v.1026  
changed lines
  Added in v.2734

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26