/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 785 by bh, Wed Apr 30 10:54:03 2003 UTC revision 1662 by bh, Wed Aug 27 13:51:01 2003 UTC
# Line 18  __version__ = "$Revision$" Line 18  __version__ = "$Revision$"
18  # $Source$  # $Source$
19  # $Id$  # $Id$
20    
 import os  
 import weakref  
21  from sqlite import connect  from sqlite import connect
22    
23    from base import TitledObject
24    
25  import table  import table
26    
27  sql_type_map = {  sql_type_map = {
# Line 85  class ColumnReference: Line 85  class ColumnReference:
85          self.internal_name = internal_name          self.internal_name = internal_name
86    
87    
88  class TransientTableBase:  class TransientTableBase(table.OldTableInterfaceMixin):
89    
90      """Base class for tables in the transient database"""      """Base class for tables in the transient database"""
91    
# Line 104  class TransientTableBase: Line 104  class TransientTableBase:
104          self.orig_names = []          self.orig_names = []
105          self.internal_to_orig = {}          self.internal_to_orig = {}
106          self.orig_to_internal = {}          self.orig_to_internal = {}
107            self.column_map = {}
108    
109          # Create the column objects and fill various maps and lists          # Create the column objects and fill various maps and lists
110          for col in self.columns:          for index in range(len(self.columns)):
111                col = self.columns[index]
112              self.name_to_column[col.name] = col              self.name_to_column[col.name] = col
113              self.orig_names.append(col.name)              self.orig_names.append(col.name)
114              self.internal_to_orig[col.internal_name] = col.name              self.internal_to_orig[col.internal_name] = col.name
115              self.orig_to_internal[col.name] = col.internal_name              self.orig_to_internal[col.name] = col.internal_name
116                self.column_map[col.name] = col
117                self.column_map[index] = col
118    
119          # Build the CREATE TABLE statement and create the table in the          # Build the CREATE TABLE statement and create the table in the
120          # database          # database
121          table_types = []          table_types = ["id INTEGER PRIMARY KEY"]
122          for col in self.columns:          for col in self.columns:
123              table_types.append("%s %s" % (col.internal_name,              table_types.append("%s %s" % (col.internal_name,
124                                            sql_type_map[col.type]))                                            sql_type_map[col.type]))
# Line 138  class TransientTableBase: Line 142  class TransientTableBase:
142              self.db.execute(stmt)              self.db.execute(stmt)
143              self.indexed_columns[column] = 1              self.indexed_columns[column] = 1
144    
145      def field_count(self):      def NumColumns(self):
146          return len(self.columns)          return len(self.columns)
147    
148      def field_info(self, i):      def NumRows(self):
         col = self.columns[i]  
         return col.type, col.name, 0, 0  
   
     def field_info_by_name(self, name):  
         for col in self.columns:  
             if col.name == name:  
                 return col.type, col.name, 0, 0  
         else:  
             return None  
   
     def record_count(self):  
149          result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)          result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
150          return int(result[0])          return int(result[0])
151    
152      def read_record(self, index):      def Columns(self):
153            return self.columns
154    
155        def Column(self, col):
156            return self.column_map[col]
157    
158        def HasColumn(self, col):
159            """Return whether the table has a column with the given name or index
160            """
161            return self.column_map.has_key(col)
162    
163        def RowIdToOrdinal(self, gid):
164            """Return the row ordinal given its id
165    
166            At the moment the transient tables are only used for tables that
167            don't distinguish between row number and row id, so the value is
168            returned unchanged.
169            """
170            return gid
171    
172        def RowOrdinalToId(self, num):
173            """Return the rowid for given its ordinal
174    
175            At the moment the transient tables are only used for tables that
176            don't distinguish between row number and row id, so the value is
177            returned unchanged.
178            """
179            return num
180    
181        def ReadRowAsDict(self, index, row_is_ordinal = 0):
182            """Return the entire row as a dictionary with column names as keys
183    
184            The row_is_ordinal is ignored because at the moment the
185            transient tables are only used for DBF files where it doesn't
186            matter.
187            """
188            # Implementation Strategy: Executing a completely new select
189            # statement every time this method is called is too slow. The
190            # most important usage is to read the records more or less
191            # sequentially. This happens e.g. when drawing a layer with a
192            # classification where the shapes are drawn in order of the
193            # shape ids. Another pattern is that the same row is requested
194            # several times in a row. This happens in the table view, for
195            # instance.
196    
197            # We can exploit this to make access faster by having one cursor
198            # open all the time and keeping the last row read around in case
199            # the same row is accessed again the next time and if the row
200            # index is larger than the row we have read last we simply fetch
201            # rows from the cursor until we've reached the requested row. If
202            # the requested row index is smaller then we start a new cursor.
203    
204            # FIXME: So far this scheme seems to work well enough. Obvious
205            # improvements would be to start the cursor at exactly the
206            # requested row (should be efficient and easy to do now that the
207            # id is the primary key) and to perhaps to also start a new
208            # cursor if the requested index is much larger than the last row
209            # so that we don't read and discard lots of the rows.
210    
211            # Check whether we have to start a new cursor
212          if self.read_record_cursor is None or index <self.read_record_last_row:          if self.read_record_cursor is None or index <self.read_record_last_row:
213              stmt = ("SELECT %s FROM %s;"              stmt = ("SELECT %s FROM %s;"
214                      % (", ".join([c.internal_name for c in self.columns]),                      % (", ".join([c.internal_name for c in self.columns]),
# Line 172  class TransientTableBase: Line 224  class TransientTableBase:
224          assert index >= self.read_record_last_row          assert index >= self.read_record_last_row
225    
226          if index == self.read_record_last_row:          if index == self.read_record_last_row:
227              result = self.read_record_last_result              result = self.read_record_last_result
228          else:          else:
229              for i in range(index - self.read_record_last_row):              for i in range(index - self.read_record_last_row):
230                  result = self.read_record_cursor.fetchone()                  result = self.read_record_cursor.fetchone()
# Line 180  class TransientTableBase: Line 232  class TransientTableBase:
232          self.read_record_last_row = index          self.read_record_last_row = index
233          return dict(zip(self.orig_names, result))          return dict(zip(self.orig_names, result))
234    
235      def field_range(self, colname):      def ReadValue(self, row, col, row_is_ordinal = 0):
236          col = self.name_to_column[colname]          """Return the value of the specified row and column
237    
238            The col parameter may be the index of the column or its name.
239    
240            The row_is_ordinal is ignored because at the moment the
241            transient tables are only used for DBF files where it doesn't
242            matter.
243            """
244            # Depending on the actual access patterns of the table data, it
245            # might be a bit faster in some circumstances to not implement
246            # this via ReadRowAsDict, but this simple implementation should
247            # be fast enough for most purposes.
248            return self.ReadRowAsDict(row)[self.column_map[col].name]
249    
250        def ValueRange(self, col):
251            col = self.column_map[col]
252          iname = col.internal_name          iname = col.internal_name
253          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
254                                     % (iname, iname, self.tablename))                                     % (iname, iname, self.tablename))
255          converter = type_converter_map[col.type]          converter = type_converter_map[col.type]
256          return ((converter(min), None), (converter(max), None))          return (converter(min), converter(max))
257    
258      def GetUniqueValues(self, colname):      def UniqueValues(self, col):
259          iname = self.orig_to_internal[colname]          iname = self.column_map[col].internal_name
260          cursor = self.db.cursor()          cursor = self.db.cursor()
261          cursor.execute("SELECT %s FROM %s GROUP BY %s;"          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
262                         % (iname, self.tablename, iname))                         % (iname, self.tablename, iname))
# Line 201  class TransientTableBase: Line 268  class TransientTableBase:
268              result.append(row[0])              result.append(row[0])
269          return result          return result
270    
271        def Width(self, col):
272            """Return the maximum width of values in the column
273    
274            The return value is the the maximum length of string
275            representation of the values in the column (represented by index
276            or name).
277            """
278            max = 0
279    
280            type  = self.column_map[col].type
281            iname = self.column_map[col].internal_name
282            cursor = self.db.cursor()
283            cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
284            values = [ i[0] for i in cursor.fetchall()]
285            if not values:
286                return None
287    
288            if type == table.FIELDTYPE_DOUBLE:
289                format = "%.12f"
290            elif type == table.FIELDTYPE_INT:
291                format = "%d"
292            else:
293                format = "%s"
294            for value in values:
295                if value is None: continue
296                l = len(format % value)
297                if l > max:
298                    max = l
299    
300            return max
301    
302        def SimpleQuery(self, left, comparison, right):
303            """Return the indices of all rows that matching a condition.
304    
305            Parameters:
306               left -- The column object for the left side of the comparison
307    
308               comparison -- The comparison operator as a string. It must be
309                             one of '==', '!=', '<', '<=', '>=', '>'
310    
311               right -- The right hand side of the comparison. It must be
312                        either a column object or a value, i.e. a string,
313                        int or float.
314    
315            The return value is a sorted list of the indices of the rows
316            where the condition is true.
317            """
318            if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
319                raise ValueError("Comparison operator %r not allowed" % comparison)
320    
321  class TransientTable(TransientTableBase):          if hasattr(right, "internal_name"):
322                right_template = right.internal_name
323                params = ()
324            else:
325                right_template = "%s"
326                params = (right,)
327    
328            query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
329                    % (self.tablename, left.internal_name, comparison,
330                       right_template)
331    
332            cursor = self.db.cursor()
333            cursor.execute(query, params)
334            result = []
335            while 1:
336                row = cursor.fetchone()
337                if row is None:
338                    break
339                result.append(row[0])
340            return result
341    
342        def Dependencies(self):
343            """Placeholder for a method in a derived class.
344    
345            Return a sequence with the tables and other data objects that
346            self depends on.
347            """
348            raise NotImplementedError
349    
350    
351    class TransientTable(TitledObject, TransientTableBase):
352    
353      """A Table in a transient DB that starts as the copy of a Thuban Table."""      """A Table in a transient DB that starts as the copy of a Thuban Table."""
354    
# Line 213  class TransientTable(TransientTableBase) Line 359  class TransientTable(TransientTableBase)
359          interface.          interface.
360          """          """
361          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
362            TitledObject.__init__(self, table.Title())
363          self.create(table)          self.create(table)
364    
365      def create(self, table):      def create(self, table):
366          columns = []          columns = []
367          for i in range(table.field_count()):          for col in table.Columns():
368              type, name = table.field_info(i)[:2]              columns.append(ColumnReference(col.name, col.type,
             columns.append(ColumnReference(name, type,  
369                                             self.db.new_column_name()))                                             self.db.new_column_name()))
370          TransientTableBase.create(self, columns)          TransientTableBase.create(self, columns)
371    
372          # copy the input table to the transient db          # copy the input table to the transient db
373          insert_template = "INSERT INTO %s (%s) VALUES (%s);" \  
374            # A key to insert to use for the formatting of the insert
375            # statement. The key must not be equal to any of the column
376            # names so we construct one by building a string of x's that is
377            # longer than any of the column names
378            id_key = max([len(col.name) for col in self.columns]) * "x"
379    
380            insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
381                                 % (self.tablename,                                 % (self.tablename,
382                                    ", ".join([col.internal_name                                    ", ".join([col.internal_name
383                                               for col in self.columns]),                                               for col in self.columns]),
384                                      id_key,
385                                    ", ".join(["%%(%s)s" % col.name                                    ", ".join(["%%(%s)s" % col.name
386                                               for col in self.columns]))                                               for col in self.columns]))
387          cursor = self.db.cursor()          cursor = self.db.cursor()
388          for i in range(table.record_count()):          for i in range(table.NumRows()):
389              cursor.execute(insert_template, table.read_record(i))              row = table.ReadRowAsDict(i)
390                row[id_key] = i
391                cursor.execute(insert_template, row)
392          self.db.conn.commit()          self.db.conn.commit()
393    
394    
395    
396  class TransientJoinedTable(TransientTableBase):  class TransientJoinedTable(TitledObject, TransientTableBase):
397    
398      """A Table in the transient DB that contains a join of two tables"""      """A Table in the transient DB that contains a join of two tables"""
399    
400      def __init__(self, transient_db, left_table, left_field,      def __init__(self, transient_db, left_table, left_field,
401                   right_table, right_field = None):                   right_table, right_field = None, outer_join = False):
402          """Create a new table in the transient DB as a join of two tables.          """Create a new table in the transient DB as a join of two tables.
403    
404          Both input tables, left_table and right_table must have a          Both input tables, left_table and right_table must have a
405          transient_table method that returns a table object for a table          transient_table method that returns a table object for a table
406          in the trnsient database. The join is performed on the condition          in the transient database. The join is performed on the condition
407          that the value of the left_field column the the left table is          that the value of the left_field column the the left table is
408          equal to the value of the right_field in the right_table.          equal to the value of the right_field in the right_table.
409    
410          The joined table contains all columns of the input tables with          The joined table contains all columns of the input tables,
411          one exception: Any column in the right_table with the same name          however, the column names of the right table may be changed
412          as one of the columns in the left_table will be omitted. This is          slightly to make them unique in the joined table. This is
413          somewhat of an implementation detail, but is done so that the          currently done by appending a sufficient number of underscores
414          column names of the joined table can be the same as the column          ('_').
         names of the input tables without having to create prefixes.  
415          """          """
416          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
417            self.dependencies = (left_table, right_table)
418          self.left_table = left_table.transient_table()          self.left_table = left_table.transient_table()
419          self.left_field = left_field          self.left_field = left_field
420          self.right_table = right_table.transient_table()          self.right_table = right_table.transient_table()
# Line 266  class TransientJoinedTable(TransientTabl Line 422  class TransientJoinedTable(TransientTabl
422              self.right_field = right_field              self.right_field = right_field
423          else:          else:
424              self.right_field = self.left_field              self.right_field = self.left_field
425            self.outer_join = outer_join
426    
427            title = "Join of %(left)s and %(right)s" \
428                    % {"left": self.left_table.Title(),
429                       "right": self.right_table.Title()}
430            TitledObject.__init__(self, title)
431    
432          self.create()          self.create()
433    
434      def create(self):      def create(self):
# Line 274  class TransientJoinedTable(TransientTabl Line 437  class TransientJoinedTable(TransientTabl
437    
438          self.right_table.ensure_index(self.right_field)          self.right_table.ensure_index(self.right_field)
439    
440            # determine the internal column names to join on before
441            # coalescing the column information because if the external
442            # column names are the same they will be mapped to the same
443            # internal name afterwards.
444            internal_left_col = self.left_table.orig_to_internal[self.left_field]
445            internal_right_col =self.right_table.orig_to_internal[self.right_field]
446    
447          # Coalesce the column information          # Coalesce the column information
448          visited = {}          visited = {}
449          columns = []          columns = []
450          for col in self.left_table.columns + self.right_table.columns:          newcolumns = []
451              if col.name in visited:          for table in (self.left_table, self.right_table):
452                  continue              for col in table.Columns():
453              columns.append(col)                  colname = col.name
454          TransientTableBase.create(self, columns)                  # We can't allow multiple columns with the same
455                    # original name, so append '_' to this one until
456                    # it is unique.
457                    # FIXME: There should be a better solution.
458                    while colname in visited:
459                        colname = colname + '_'
460                    columns.append((table.tablename, col))
461                    newcol = ColumnReference(colname, col.type,
462                                                "Col%03d" % (len(newcolumns)+1))
463                    newcolumns.append(newcol)
464                    visited[colname] = 1
465            TransientTableBase.create(self, newcolumns)
466    
467          # Copy the joined data to the table.          # Copy the joined data to the table.
468          internal_names = [col.internal_name for col in self.columns]          newinternal_names = [col.internal_name for col in self.columns]
469          stmt = "INSERT INTO %s (%s) SELECT %s FROM %s JOIN %s ON %s = %s;" \          internal_references = ["%s.%s" % (table, col.internal_name)
470                 % (self.tablename,                                                      for table, col in columns]
471                    ", ".join(internal_names),          if self.outer_join:
472                    ", ".join(internal_names),              join_operator = 'LEFT OUTER JOIN'
473                    self.left_table.tablename,          else:
474                    self.right_table.tablename,              join_operator = 'JOIN'
475                    self.orig_to_internal[self.left_field],          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
476                    self.orig_to_internal[self.right_field])                  " %s %s ON %s.%s = %s.%s;"
477                    % (self.tablename,
478                       ", ".join(newinternal_names),
479                       self.left_table.tablename,
480                       ", ".join(internal_references),
481                       self.left_table.tablename,
482                       join_operator,
483                       self.right_table.tablename,
484                       self.left_table.tablename,
485                       internal_left_col,
486                       self.right_table.tablename,
487                       internal_right_col))
488          self.db.execute(stmt)          self.db.execute(stmt)
489    
490        def Dependencies(self):
491            """Return a tuple with the two tables the join depends on."""
492            return self.dependencies
493    
494        def JoinType(self):
495            """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
496            if self.outer_join:
497                return "LEFT OUTER"
498            else:
499                return "INNER"
500    
501    
502  class AutoTransientTable:  class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
503    
504      """Table that copies data to a transient table on demand.      """Table that copies data to a transient table on demand.
505    
# Line 305  class AutoTransientTable: Line 508  class AutoTransientTable:
508      """      """
509    
510      def __init__(self, transient_db, table):      def __init__(self, transient_db, table):
511            TitledObject.__init__(self, table.Title())
512          self.transient_db = transient_db          self.transient_db = transient_db
513          self.table = table          self.table = table
514          self.t_table = None          self.t_table = None
515    
516      def record_count(self):      def Columns(self):
517          """Return the number of records"""          return self.table.Columns()
518          return self.table.record_count()  
519        def Column(self, col):
520      def field_count(self):          return self.table.Column(col)
521          """Return the number of fields in a record"""  
522          return self.table.field_count()      def HasColumn(self, col):
523            """Return whether the table has a column with the given name or index
524      def field_info(self, field):          """
525          """Return a tuple (type, name, width, prec) for the field no. field          return self.table.HasColumn(col)
526    
527          type is the data type of the field, name the name, width the      def NumRows(self):
528          field width in characters and prec the decimal precision.          return self.table.NumRows()
529          """  
530          info = self.table.field_info(field)      def NumColumns(self):
531          if info:          return self.table.NumColumns()
532              info = info[:2] + (0, 0)  
533          return info      def RowIdToOrdinal(self, gid):
534            """Return the row ordinal given its id"""
535      def field_info_by_name(self, fieldName):          if self.t_table is not None:
536          info = self.table.field_info_by_name(fieldName)              return self.t_table.RowIdToOrdinal(gid)
537          if info:          else:
538              info = info[:2] + (0, 0)              return self.table.RowIdToOrdinal(gid)
         return info  
539    
540      def read_record(self, record):      def RowOrdinalToId(self, num):
541            """Return the rowid for given its ordinal"""
542            if self.t_table is not None:
543                return self.t_table.RowOrdinalToId(num)
544            else:
545                return self.table.RowOrdinalToId(num)
546    
547        def ReadRowAsDict(self, record, row_is_ordinal = 0):
548          """Return the record no. record as a dict mapping field names to values          """Return the record no. record as a dict mapping field names to values
549          """          """
550          if self.t_table is not None:          if self.t_table is not None:
551              return self.t_table.read_record(record)              return self.t_table.ReadRowAsDict(record,
552                                                  row_is_ordinal = row_is_ordinal)
553          else:          else:
554              return self.table.read_record(record)              return self.table.ReadRowAsDict(record,
555                                                row_is_ordinal = row_is_ordinal)
556    
557      def write_record(self, record, values):      def ReadValue(self, row, col, row_is_ordinal = 0):
558          raise NotImplementedError          """Return the value of the specified row and column
559    
560            The col parameter may be the index of the column or its name.
561            """
562            if self.t_table is not None:
563                return self.t_table.ReadValue(row, col,
564                                              row_is_ordinal = row_is_ordinal)
565            else:
566                return self.table.ReadValue(row, col,
567                                            row_is_ordinal = row_is_ordinal)
568    
569      def copy_to_transient(self):      def copy_to_transient(self):
570          """Internal: Create a transient table and copy the data into it"""          """Internal: Create a transient table and copy the data into it"""
# Line 357  class AutoTransientTable: Line 578  class AutoTransientTable:
578              self.copy_to_transient()              self.copy_to_transient()
579          return self.t_table          return self.t_table
580    
581      def field_range(self, colname):      def ValueRange(self, col):
582            if self.t_table is None:
583                self.copy_to_transient()
584            return self.t_table.ValueRange(col)
585    
586        def UniqueValues(self, col):
587          if self.t_table is None:          if self.t_table is None:
588              self.copy_to_transient()              self.copy_to_transient()
589          return self.t_table.field_range(colname)          return self.t_table.UniqueValues(col)
590    
591      def GetUniqueValues(self, colname):      def SimpleQuery(self, left, comparison, right):
592          if self.t_table is None:          if self.t_table is None:
593              self.copy_to_transient()              self.copy_to_transient()
594          return self.t_table.GetUniqueValues(colname)          # Make sure to use the column object of the transient table. The
595            # left argument is always a column object so we can just ask the
596            # t_table for the right object.
597            if hasattr(right, "name"):
598                return self.t_table.SimpleQuery(self.t_table.Column(left.name),
599                                                comparison,
600                                                self.t_table.Column(right.name))
601            else:
602                return self.t_table.SimpleQuery(self.t_table.Column(left.name),
603                                                comparison, right)
604    
605        def Dependencies(self):
606            """Return a tuple containing the original table"""
607            return (self.table,)
608    
609        def Width(self, col):
610            return self.table.Width(col)

Legend:
Removed from v.785  
changed lines
  Added in v.1662

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26