/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1002 by frank, Thu May 22 19:41:57 2003 UTC revision 2680 by dpinte, Mon May 15 14:36:50 2006 UTC
# Line 18  __version__ = "$Revision$" Line 18  __version__ = "$Revision$"
18  # $Source$  # $Source$
19  # $Id$  # $Id$
20    
21  import os  # Pysqlite version 1. and 2. behaves quiet differently
22  import weakref  # Pysqlite uses a different paramstyle.  The older version
23  from sqlite import connect  # support format and pyformat while pysqlite2 supports only qmark
24    # and named.
25    # The sqlite2 boolean variable is used to manage specific part of the code
26    try:
27            # Using SQLITE 2.x
28      sqlite2 = True
29      from pysqlite2 import dbapi2 as sqlite
30    except ImportError:
31            # Using SQLITE 1.x
32      sqlite2 = False
33      import sqlite
34            
35    
36    from base import TitledObject
37    
38  import table  import table
39    
# Line 40  class TransientDatabase: Line 53  class TransientDatabase:
53    
54      def __init__(self, filename):      def __init__(self, filename):
55          self.filename = filename          self.filename = filename
56          self.conn = connect(filename)          self.conn = sqlite.connect(filename)
57          # Counters to produce unique table and column names          # Counters to produce unique table and column names
58          self.num_tables = 0          self.num_tables = 0
59          self.num_cols = 0          self.num_cols = 0
# Line 85  class ColumnReference: Line 98  class ColumnReference:
98          self.internal_name = internal_name          self.internal_name = internal_name
99    
100    
101  class TransientTableBase(table.OldTableInterfaceMixin):  class TransientTableBase:
102    
103      """Base class for tables in the transient database"""      """Base class for tables in the transient database"""
104    
# Line 160  class TransientTableBase(table.OldTableI Line 173  class TransientTableBase(table.OldTableI
173          """          """
174          return self.column_map.has_key(col)          return self.column_map.has_key(col)
175    
176      def ReadRowAsDict(self, index):      def RowIdToOrdinal(self, gid):
177            """Return the row ordinal given its id
178    
179            At the moment the transient tables are only used for tables that
180            don't distinguish between row number and row id, so the value is
181            returned unchanged.
182            """
183            return gid
184    
185        def RowOrdinalToId(self, num):
186            """Return the rowid for given its ordinal
187    
188            At the moment the transient tables are only used for tables that
189            don't distinguish between row number and row id, so the value is
190            returned unchanged.
191            """
192            return num
193    
194        def ReadRowAsDict(self, index, row_is_ordinal = 0):
195            """Return the entire row as a dictionary with column names as keys
196    
197            The row_is_ordinal is ignored because at the moment the
198            transient tables are only used for DBF files where it doesn't
199            matter.
200            """
201          # Implementation Strategy: Executing a completely new select          # Implementation Strategy: Executing a completely new select
202          # statement every time this method is called is too slow. The          # statement every time this method is called is too slow. The
203          # most important usage is to read the records more or less          # most important usage is to read the records more or less
# Line 208  class TransientTableBase(table.OldTableI Line 245  class TransientTableBase(table.OldTableI
245          self.read_record_last_row = index          self.read_record_last_row = index
246          return dict(zip(self.orig_names, result))          return dict(zip(self.orig_names, result))
247    
248      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
249          """Return the value of the specified row and column          """Return the value of the specified row and column
250    
251          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
252    
253            The row_is_ordinal is ignored because at the moment the
254            transient tables are only used for DBF files where it doesn't
255            matter.
256          """          """
257          # Depending on the actual access patterns of the table data, it          # Depending on the actual access patterns of the table data, it
258          # might be a bit faster in some circumstances to not implement          # might be a bit faster in some circumstances to not implement
# Line 220  class TransientTableBase(table.OldTableI Line 261  class TransientTableBase(table.OldTableI
261          return self.ReadRowAsDict(row)[self.column_map[col].name]          return self.ReadRowAsDict(row)[self.column_map[col].name]
262    
263      def ValueRange(self, col):      def ValueRange(self, col):
264            # Performance notes:
265            #
266            # In sqlite 2.8.6 the min and max aggregate functions can use an
267            # index but only when used as the only expression in the select
268            # statement (i.e. 'select min(col), max(col) from tbl;' will not
269            # use the index but 'select min(col) from tbl;' will) so we
270            # query the minimum and maximum separately.
271            #
272            # With the separate statements we can take advantage of an index
273            # if it exists. If the index doesn't exist, creating it first
274            # and then using it in the query is slower than the queries
275            # without an index. Creating the index is only an advantage if
276            # the queries are performed multiple times. With the current use
277            # patterns where ValueRange is only used occasionally by the
278            # classification generation dialog creating the index only for
279            # this usage is not really worth it, so we don't.
280          col = self.column_map[col]          col = self.column_map[col]
281          iname = col.internal_name          iname = col.internal_name
282          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"          min = self.db.execute("SELECT min(%s) FROM %s;"
283                                     % (iname, iname, self.tablename))                                % (iname, self.tablename))[0]
284            max = self.db.execute("SELECT max(%s) FROM %s;"
285                                  % (iname, self.tablename))[0]
286          converter = type_converter_map[col.type]          converter = type_converter_map[col.type]
287          return (converter(min), converter(max))          return (converter(min), converter(max))
288    
289      def UniqueValues(self, col):      def UniqueValues(self, col):
290            # Performance notes:
291            #
292            # In sqlite 2.8.6 there doesn't seem to be a way to query the
293            # unique items that uses an index. I've tried
294            #
295            #   SELECT col FROM tbl GROUP BY col;
296            #
297            # and
298            #
299            #   SELECT DISTINCT col FROM tbl;
300            #
301            # and in both cases the index is not used. If the index isn't
302            # used it doesn't make sense to call self.ensure_index.
303          iname = self.column_map[col].internal_name          iname = self.column_map[col].internal_name
304          cursor = self.db.cursor()          cursor = self.db.cursor()
305          cursor.execute("SELECT %s FROM %s GROUP BY %s;"          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
# Line 240  class TransientTableBase(table.OldTableI Line 312  class TransientTableBase(table.OldTableI
312              result.append(row[0])              result.append(row[0])
313          return result          return result
314    
315        def Width(self, col):
316            """Return the maximum width of values in the column
317    
318            The return value is the the maximum length of string
319            representation of the values in the column (represented by index
320            or name).
321            """
322            max = 0
323    
324            type  = self.column_map[col].type
325            iname = self.column_map[col].internal_name
326            cursor = self.db.cursor()
327            cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
328            values = [ i[0] for i in cursor.fetchall()]
329            if not values:
330                return None
331    
332            if type == table.FIELDTYPE_DOUBLE:
333                format = "%.12f"
334            elif type == table.FIELDTYPE_INT:
335                format = "%d"
336            else:
337                format = "%s"
338            for value in values:
339                if value is None: continue
340                l = len(format % value)
341                if l > max:
342                    max = l
343    
344            return max
345    
346      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
347          """Return the indices of all rows that matching a condition.          """Return the indices of all rows that matching a condition.
348    
# Line 263  class TransientTableBase(table.OldTableI Line 366  class TransientTableBase(table.OldTableI
366              right_template = right.internal_name              right_template = right.internal_name
367              params = ()              params = ()
368          else:          else:
369              right_template = "%s"              if sqlite2:
370                  right_template = "?"
371                else: right_template = "%s"
372              params = (right,)              params = (right,)
373    
374          query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \          query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
# Line 289  class TransientTableBase(table.OldTableI Line 394  class TransientTableBase(table.OldTableI
394          raise NotImplementedError          raise NotImplementedError
395    
396    
397  class TransientTable(TransientTableBase):  class TransientTable(TitledObject, TransientTableBase):
398    
399      """A Table in a transient DB that starts as the copy of a Thuban Table."""      """A Table in a transient DB that starts as the copy of a Thuban Table."""
400    
# Line 300  class TransientTable(TransientTableBase) Line 405  class TransientTable(TransientTableBase)
405          interface.          interface.
406          """          """
407          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
408          self.title = table.Title()          TitledObject.__init__(self, table.Title())
409          self.create(table)          self.create(table)
410    
     def Title(self):  
         """Return the title of the table.  
   
         The title is the same as that of the original table  
         """  
         return self.title  
   
411      def create(self, table):      def create(self, table):
412          columns = []          columns = []
413          for col in table.Columns():          for col in table.Columns():
# Line 325  class TransientTable(TransientTableBase) Line 423  class TransientTable(TransientTableBase)
423          # longer than any of the column names          # longer than any of the column names
424          id_key = max([len(col.name) for col in self.columns]) * "x"          id_key = max([len(col.name) for col in self.columns]) * "x"
425    
426          insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \          if sqlite2:
427              insert_template = "INSERT INTO %s (id, %s) VALUES (%s, %s);" \
428                                   % (self.tablename,
429                                      ", ".join([col.internal_name
430                                                 for col in self.columns]),
431                                      '?',
432                                      ", ".join(["?" for col in self.columns]))
433    
434            else:
435              insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
436                                 % (self.tablename,                                 % (self.tablename,
437                                    ", ".join([col.internal_name                                    ", ".join([col.internal_name
438                                               for col in self.columns]),                                               for col in self.columns]),
# Line 336  class TransientTable(TransientTableBase) Line 443  class TransientTable(TransientTableBase)
443          for i in range(table.NumRows()):          for i in range(table.NumRows()):
444              row = table.ReadRowAsDict(i)              row = table.ReadRowAsDict(i)
445              row[id_key] = i              row[id_key] = i
446              cursor.execute(insert_template, row)              if sqlite2:
447                  params = [i]
448                  for col in self.columns:
449                    params.append(row[col.name])
450                  cursor.execute(insert_template, params)
451                else:  
452                  cursor.execute(insert_template, row)
453          self.db.conn.commit()          self.db.conn.commit()
454    
455    
456    
457  class TransientJoinedTable(TransientTableBase):  class TransientJoinedTable(TitledObject, TransientTableBase):
458    
459      """A Table in the transient DB that contains a join of two tables"""      """A Table in the transient DB that contains a join of two tables"""
460    
461      def __init__(self, transient_db, left_table, left_field,      def __init__(self, transient_db, left_table, left_field,
462                   right_table, right_field = None):                   right_table, right_field = None, outer_join = False):
463          """Create a new table in the transient DB as a join of two tables.          """Create a new table in the transient DB as a join of two tables.
464    
465          Both input tables, left_table and right_table must have a          Both input tables, left_table and right_table must have a
# Line 355  class TransientJoinedTable(TransientTabl Line 468  class TransientJoinedTable(TransientTabl
468          that the value of the left_field column the the left table is          that the value of the left_field column the the left table is
469          equal to the value of the right_field in the right_table.          equal to the value of the right_field in the right_table.
470    
471          The joined table contains all columns of the input tables with          The joined table contains all columns of the input tables,
472          one exception: Any column in the right_table with the same name          however, the column names of the right table may be changed
473          as one of the columns in the left_table will be omitted. This is          slightly to make them unique in the joined table. This is
474          somewhat of an implementation detail, but is done so that the          currently done by appending a sufficient number of underscores
475          column names of the joined table can be the same as the column          ('_').
         names of the input tables without having to create prefixes.  
476          """          """
477          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
478          self.dependencies = (left_table, right_table)          self.dependencies = (left_table, right_table)
# Line 371  class TransientJoinedTable(TransientTabl Line 483  class TransientJoinedTable(TransientTabl
483              self.right_field = right_field              self.right_field = right_field
484          else:          else:
485              self.right_field = self.left_field              self.right_field = self.left_field
486          self.create()          self.outer_join = outer_join
487    
488            title = "Join of %(left)s and %(right)s" \
489                    % {"left": self.left_table.Title(),
490                       "right": self.right_table.Title()}
491            TitledObject.__init__(self, title)
492    
493      def Title(self):          self.create()
         """Return the title of the joined table"""  
         return "Join of %(left)s and %(right)s" \  
                % {"left": self.left_table.Title(),  
                   "right": self.right_table.Title()}  
494    
495      def create(self):      def create(self):
496          """Internal: Create the table with the joined data"""          """Internal: Create the table with the joined data"""
# Line 385  class TransientJoinedTable(TransientTabl Line 498  class TransientJoinedTable(TransientTabl
498    
499          self.right_table.ensure_index(self.right_field)          self.right_table.ensure_index(self.right_field)
500    
501            # determine the internal column names to join on before
502            # coalescing the column information because if the external
503            # column names are the same they will be mapped to the same
504            # internal name afterwards.
505            internal_left_col = self.left_table.orig_to_internal[self.left_field]
506            internal_right_col =self.right_table.orig_to_internal[self.right_field]
507    
508          # Coalesce the column information          # Coalesce the column information
509          visited = {}          visited = {}
510          columns = []          columns = []
511          for col in self.left_table.columns + self.right_table.columns:          newcolumns = []
512              if col.name in visited:          for table in (self.left_table, self.right_table):
513                  # We can't allow multiple columns with the same original              for col in table.Columns():
514                  # name, so omit this one. FIXME: There should be a                  colname = col.name
515                  # better solution.                  # We can't allow multiple columns with the same
516                  continue                  # original name, so append '_' to this one until
517              columns.append(col)                  # it is unique.
518          TransientTableBase.create(self, columns)                  # FIXME: There should be a better solution.
519                    while colname in visited:
520                        colname = colname + '_'
521                    columns.append((table.tablename, col))
522                    newcol = ColumnReference(colname, col.type,
523                                                "Col%03d" % (len(newcolumns)+1))
524                    newcolumns.append(newcol)
525                    visited[colname] = 1
526            TransientTableBase.create(self, newcolumns)
527    
528          # Copy the joined data to the table.          # Copy the joined data to the table.
529          internal_names = [col.internal_name for col in self.columns]          newinternal_names = [col.internal_name for col in self.columns]
530            internal_references = ["%s.%s" % (table, col.internal_name)
531                                                        for table, col in columns]
532            if self.outer_join:
533                join_operator = 'LEFT OUTER JOIN'
534            else:
535                join_operator = 'JOIN'
536          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
537                  " JOIN %s ON %s = %s;"                  " %s %s ON %s.%s = %s.%s;"
538                  % (self.tablename,                  % (self.tablename,
539                     ", ".join(internal_names),                     ", ".join(newinternal_names),
540                     self.left_table.tablename,                     self.left_table.tablename,
541                     ", ".join(internal_names),                     ", ".join(internal_references),
542                     self.left_table.tablename,                     self.left_table.tablename,
543                       join_operator,
544                     self.right_table.tablename,                     self.right_table.tablename,
545                     self.orig_to_internal[self.left_field],                     self.left_table.tablename,
546                     self.orig_to_internal[self.right_field]))                     internal_left_col,
547                       self.right_table.tablename,
548                       internal_right_col))
549          self.db.execute(stmt)          self.db.execute(stmt)
550    
551      def Dependencies(self):      def Dependencies(self):
552          """Return a tuple with the two tables the join depends on."""          """Return a tuple with the two tables the join depends on."""
553          return self.dependencies          return self.dependencies
554    
555        def JoinType(self):
556            """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
557            if self.outer_join:
558                return "LEFT OUTER"
559            else:
560                return "INNER"
561    
562    
563  class AutoTransientTable(table.OldTableInterfaceMixin):  class AutoTransientTable(TitledObject):
564    
565      """Table that copies data to a transient table on demand.      """Table that copies data to a transient table on demand.
566    
# Line 425  class AutoTransientTable(table.OldTableI Line 569  class AutoTransientTable(table.OldTableI
569      """      """
570    
571      def __init__(self, transient_db, table):      def __init__(self, transient_db, table):
572            TitledObject.__init__(self, table.Title())
573          self.transient_db = transient_db          self.transient_db = transient_db
574          self.table = table          self.table = table
575          self.t_table = None          self.t_table = None
576    
     def Title(self):  
         """Return the title of the table.  
   
         The title is the same as that of the original table.  
         """  
         return self.table.Title()  
   
577      def Columns(self):      def Columns(self):
578          return self.table.Columns()          return self.table.Columns()
579    
# Line 453  class AutoTransientTable(table.OldTableI Line 591  class AutoTransientTable(table.OldTableI
591      def NumColumns(self):      def NumColumns(self):
592          return self.table.NumColumns()          return self.table.NumColumns()
593    
594      def ReadRowAsDict(self, record):      def RowIdToOrdinal(self, gid):
595            """Return the row ordinal given its id"""
596            if self.t_table is not None:
597                return self.t_table.RowIdToOrdinal(gid)
598            else:
599                return self.table.RowIdToOrdinal(gid)
600    
601        def RowOrdinalToId(self, num):
602            """Return the rowid for given its ordinal"""
603            if self.t_table is not None:
604                return self.t_table.RowOrdinalToId(num)
605            else:
606                return self.table.RowOrdinalToId(num)
607    
608        def ReadRowAsDict(self, record, row_is_ordinal = 0):
609          """Return the record no. record as a dict mapping field names to values          """Return the record no. record as a dict mapping field names to values
610          """          """
611          if self.t_table is not None:          if self.t_table is not None:
612              return self.t_table.ReadRowAsDict(record)              return self.t_table.ReadRowAsDict(record,
613                                                  row_is_ordinal = row_is_ordinal)
614          else:          else:
615              return self.table.ReadRowAsDict(record)              return self.table.ReadRowAsDict(record,
616                                                row_is_ordinal = row_is_ordinal)
617    
618      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
619          """Return the value of the specified row and column          """Return the value of the specified row and column
620    
621          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
622          """          """
623          if self.t_table is not None:          if self.t_table is not None:
624              return self.t_table.ReadValue(row, col)              return self.t_table.ReadValue(row, col,
625                                              row_is_ordinal = row_is_ordinal)
626          else:          else:
627              return self.table.ReadValue(row, col)              return self.table.ReadValue(row, col,
628                                            row_is_ordinal = row_is_ordinal)
629    
630      def copy_to_transient(self):      def copy_to_transient(self):
631          """Internal: Create a transient table and copy the data into it"""          """Internal: Create a transient table and copy the data into it"""
# Line 484  class AutoTransientTable(table.OldTableI Line 640  class AutoTransientTable(table.OldTableI
640          return self.t_table          return self.t_table
641    
642      def ValueRange(self, col):      def ValueRange(self, col):
643          if self.t_table is None:          # Performance of sqlite vs. DBF for this method:
644              self.copy_to_transient()          #
645          return self.t_table.ValueRange(col)          # If the table has been copied to the sqlite database it's
646            # faster to use it even if there is no index on that column.
647            # Otherwise it's faster to simply loop through all rows in the
648            # DBF file. Copying the data to the sqlite database can take
649            # very long for large amounts of data
650            #
651            # Of course if the table is not a DBF file the issue could be
652            # different, although copying the data into sqlite first will
653            # likely always be slower than simply querying the non-sqlite
654            # table directly. Currently only DBFfiles and memory tables are
655            # used as the underlying non-sqlite table, though.
656            if self.t_table is not None:
657                return self.t_table.ValueRange(col)
658            else:
659                return self.table.ValueRange(col)
660    
661      def UniqueValues(self, col):      def UniqueValues(self, col):
662          if self.t_table is None:          # The performance trade-offs for this method are basically the
663              self.copy_to_transient()          # same as for ValueRange except that currently there doesn't
664          return self.t_table.UniqueValues(col)          # seem to be a way to take advantage of indexes in this case in
665            # sqlite. However, but it's still faster to query the transient
666            # table if it already exists.
667            if self.t_table is not None:
668                return self.t_table.UniqueValues(col)
669            else:
670                return self.table.UniqueValues(col)
671    
672      def SimpleQuery(self, left, comparison, right):      def SimpleQuery(self, left, comparison, right):
673          if self.t_table is None:          if self.t_table is None:
# Line 510  class AutoTransientTable(table.OldTableI Line 686  class AutoTransientTable(table.OldTableI
686      def Dependencies(self):      def Dependencies(self):
687          """Return a tuple containing the original table"""          """Return a tuple containing the original table"""
688          return (self.table,)          return (self.table,)
689    
690        def Width(self, col):
691            return self.table.Width(col)
692    
693        def write_record(self, row, values):
694            """Write the values to the given row.
695    
696            This is a very experimental feature which doesn't work in all
697            cases, so you better know what you're doing when calling this
698            method.
699            """
700            self.table.write_record(row, values)

Legend:
Removed from v.1002  
changed lines
  Added in v.2680

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26