/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 777 by bh, Tue Apr 29 14:54:12 2003 UTC revision 1328 by bh, Tue Jul 1 12:01:58 2003 UTC
# Line 22  import os Line 22  import os
22  import weakref  import weakref
23  from sqlite import connect  from sqlite import connect
24    
25    from base import TitledObject
26    
27  import table  import table
28    
29  sql_type_map = {  sql_type_map = {
# Line 85  class ColumnReference: Line 87  class ColumnReference:
87          self.internal_name = internal_name          self.internal_name = internal_name
88    
89    
90  class TransientTableBase:  class TransientTableBase(table.OldTableInterfaceMixin):
91    
92      """Base class for tables in the transient database"""      """Base class for tables in the transient database"""
93    
# Line 96  class TransientTableBase: Line 98  class TransientTableBase:
98          self.indexed_columns = {}          self.indexed_columns = {}
99          self.read_record_cursor = None          self.read_record_cursor = None
100          self.read_record_last_row = None          self.read_record_last_row = None
101            self.read_record_last_result = None
102    
103      def create(self, columns):      def create(self, columns):
104          self.columns = columns          self.columns = columns
# Line 103  class TransientTableBase: Line 106  class TransientTableBase:
106          self.orig_names = []          self.orig_names = []
107          self.internal_to_orig = {}          self.internal_to_orig = {}
108          self.orig_to_internal = {}          self.orig_to_internal = {}
109            self.column_map = {}
110    
111          # Create the column objects and fill various maps and lists          # Create the column objects and fill various maps and lists
112          for col in self.columns:          for index in range(len(self.columns)):
113                col = self.columns[index]
114              self.name_to_column[col.name] = col              self.name_to_column[col.name] = col
115              self.orig_names.append(col.name)              self.orig_names.append(col.name)
116              self.internal_to_orig[col.internal_name] = col.name              self.internal_to_orig[col.internal_name] = col.name
117              self.orig_to_internal[col.name] = col.internal_name              self.orig_to_internal[col.name] = col.internal_name
118                self.column_map[col.name] = col
119                self.column_map[index] = col
120    
121          # Build the CREATE TABLE statement and create the table in the          # Build the CREATE TABLE statement and create the table in the
122          # database          # database
123          table_types = []          table_types = ["id INTEGER PRIMARY KEY"]
124          for col in self.columns:          for col in self.columns:
125              table_types.append("%s %s" % (col.internal_name,              table_types.append("%s %s" % (col.internal_name,
126                                            sql_type_map[col.type]))                                            sql_type_map[col.type]))
# Line 137  class TransientTableBase: Line 144  class TransientTableBase:
144              self.db.execute(stmt)              self.db.execute(stmt)
145              self.indexed_columns[column] = 1              self.indexed_columns[column] = 1
146    
147      def field_count(self):      def NumColumns(self):
148          return len(self.columns)          return len(self.columns)
149    
150      def field_info(self, i):      def NumRows(self):
         col = self.columns[i]  
         return col.type, col.name, 0, 0  
   
     def field_info_by_name(self, name):  
         for col in self.columns:  
             if col.name == name:  
                 return col.type, col.name, 0, 0  
         else:  
             return None  
   
     def record_count(self):  
151          result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)          result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152          return int(result[0])          return int(result[0])
153    
154      def read_record(self, index):      def Columns(self):
155            return self.columns
156    
157        def Column(self, col):
158            return self.column_map[col]
159    
160        def HasColumn(self, col):
161            """Return whether the table has a column with the given name or index
162            """
163            return self.column_map.has_key(col)
164    
165        def ReadRowAsDict(self, index):
166            # Implementation Strategy: Executing a completely new select
167            # statement every time this method is called is too slow. The
168            # most important usage is to read the records more or less
169            # sequentially. This happens e.g. when drawing a layer with a
170            # classification where the shapes are drawn in order of the
171            # shape ids. Another pattern is that the same row is requested
172            # several times in a row. This happens in the table view, for
173            # instance.
174    
175            # We can exploit this to make access faster by having one cursor
176            # open all the time and keeping the last row read around in case
177            # the same row is accessed again the next time and if the row
178            # index is larger than the row we have read last we simply fetch
179            # rows from the cursor until we've reached the requested row. If
180            # the requested row index is smaller then we start a new cursor.
181    
182            # FIXME: So far this scheme seems to work well enough. Obvious
183            # improvements would be to start the cursor at exactly the
184            # requested row (should be efficient and easy to do now that the
185            # id is the primary key) and to perhaps to also start a new
186            # cursor if the requested index is much larger than the last row
187            # so that we don't read and discard lots of the rows.
188    
189            # Check whether we have to start a new cursor
190          if self.read_record_cursor is None or index <self.read_record_last_row:          if self.read_record_cursor is None or index <self.read_record_last_row:
191              stmt = ("SELECT %s FROM %s;"              stmt = ("SELECT %s FROM %s;"
192                      % (", ".join([c.internal_name for c in self.columns]),                      % (", ".join([c.internal_name for c in self.columns]),
# Line 163  class TransientTableBase: Line 194  class TransientTableBase:
194              self.read_record_cursor = self.db.cursor()              self.read_record_cursor = self.db.cursor()
195              self.read_record_cursor.execute(stmt)              self.read_record_cursor.execute(stmt)
196              self.read_record_last_row = -1              self.read_record_last_row = -1
197          for i in range(index - self.read_record_last_row):              self.read_record_last_result = None
198              result = self.read_record_cursor.fetchone()  
199            # Now we should have a cursor at a position less than or equal
200            # to the index so the following if statement will always set
201            # result to a suitable value
202            assert index >= self.read_record_last_row
203    
204            if index == self.read_record_last_row:
205                result = self.read_record_last_result
206            else:
207                for i in range(index - self.read_record_last_row):
208                    result = self.read_record_cursor.fetchone()
209                    self.read_record_last_result = result
210          self.read_record_last_row = index          self.read_record_last_row = index
211          result = dict(zip(self.orig_names, result))          return dict(zip(self.orig_names, result))
212          return result  
213        def ReadValue(self, row, col):
214            """Return the value of the specified row and column
215    
216      def field_range(self, colname):          The col parameter may be the index of the column or its name.
217          col = self.name_to_column[colname]          """
218            # Depending on the actual access patterns of the table data, it
219            # might be a bit faster in some circumstances to not implement
220            # this via ReadRowAsDict, but this simple implementation should
221            # be fast enough for most purposes.
222            return self.ReadRowAsDict(row)[self.column_map[col].name]
223    
224        def ValueRange(self, col):
225            col = self.column_map[col]
226          iname = col.internal_name          iname = col.internal_name
227          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228                                     % (iname, iname, self.tablename))                                     % (iname, iname, self.tablename))
229          converter = type_converter_map[col.type]          converter = type_converter_map[col.type]
230          return ((converter(min), None), (converter(max), None))          return (converter(min), converter(max))
231    
232      def GetUniqueValues(self, colname):      def UniqueValues(self, col):
233          iname = self.orig_to_internal[colname]          iname = self.column_map[col].internal_name
234          cursor = self.db.cursor()          cursor = self.db.cursor()
235          cursor.execute("SELECT %s FROM %s GROUP BY %s;"          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236                         % (iname, self.tablename, iname))                         % (iname, self.tablename, iname))
# Line 190  class TransientTableBase: Line 242  class TransientTableBase:
242              result.append(row[0])              result.append(row[0])
243          return result          return result
244    
245        def Width(self, col):
246            """Return the maximum width of values in the column
247    
248            The return value is the the maximum length of string representation
249            of the values in the column (represented by index or name)."""
250            max = 0
251            
252            type  = self.column_map[col].type
253            iname = self.column_map[col].internal_name
254            cursor = self.db.cursor()
255            cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256            values = [ i[0] for i in cursor.fetchall()]
257            if not values:
258                return None
259    
260            if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
261                format = "%.12f"
262            elif type == sql_type_map[table.FIELDTYPE_INT]:
263                format = "%d"
264            else:
265                format = "%s"
266            for value in values:
267                if value is None: continue
268                l = len(format % value)
269                if l > max:
270                    max = l
271    
272            return max
273    
274        def SimpleQuery(self, left, comparison, right):
275            """Return the indices of all rows that matching a condition.
276    
277            Parameters:
278               left -- The column object for the left side of the comparison
279    
280               comparison -- The comparison operator as a string. It must be
281                             one of '==', '!=', '<', '<=', '>=', '>'
282    
283               right -- The right hand side of the comparison. It must be
284                        either a column object or a value, i.e. a string,
285                        int or float.
286    
287            The return value is a sorted list of the indices of the rows
288            where the condition is true.
289            """
290            if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
291                raise ValueError("Comparison operator %r not allowed" % comparison)
292    
293            if hasattr(right, "internal_name"):
294                right_template = right.internal_name
295                params = ()
296            else:
297                right_template = "%s"
298                params = (right,)
299    
300            query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
301                    % (self.tablename, left.internal_name, comparison,
302                       right_template)
303    
304            cursor = self.db.cursor()
305            cursor.execute(query, params)
306            result = []
307            while 1:
308                row = cursor.fetchone()
309                if row is None:
310                    break
311                result.append(row[0])
312            return result
313    
314        def Dependencies(self):
315            """Placeholder for a method in a derived class.
316    
317            Return a sequence with the tables and other data objects that
318            self depends on.
319            """
320            raise NotImplementedError
321    
322  class TransientTable(TransientTableBase):  
323    class TransientTable(TitledObject, TransientTableBase):
324    
325      """A Table in a transient DB that starts as the copy of a Thuban Table."""      """A Table in a transient DB that starts as the copy of a Thuban Table."""
326    
# Line 202  class TransientTable(TransientTableBase) Line 331  class TransientTable(TransientTableBase)
331          interface.          interface.
332          """          """
333          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
334            TitledObject.__init__(self, table.Title())
335          self.create(table)          self.create(table)
336    
337      def create(self, table):      def create(self, table):
338          columns = []          columns = []
339          for i in range(table.field_count()):          for col in table.Columns():
340              type, name = table.field_info(i)[:2]              columns.append(ColumnReference(col.name, col.type,
             columns.append(ColumnReference(name, type,  
341                                             self.db.new_column_name()))                                             self.db.new_column_name()))
342          TransientTableBase.create(self, columns)          TransientTableBase.create(self, columns)
343    
344          # copy the input table to the transient db          # copy the input table to the transient db
345          insert_template = "INSERT INTO %s (%s) VALUES (%s);" \  
346            # A key to insert to use for the formatting of the insert
347            # statement. The key must not be equal to any of the column
348            # names so we construct one by building a string of x's that is
349            # longer than any of the column names
350            id_key = max([len(col.name) for col in self.columns]) * "x"
351    
352            insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
353                                 % (self.tablename,                                 % (self.tablename,
354                                    ", ".join([col.internal_name                                    ", ".join([col.internal_name
355                                               for col in self.columns]),                                               for col in self.columns]),
356                                      id_key,
357                                    ", ".join(["%%(%s)s" % col.name                                    ", ".join(["%%(%s)s" % col.name
358                                               for col in self.columns]))                                               for col in self.columns]))
359          cursor = self.db.cursor()          cursor = self.db.cursor()
360          for i in range(table.record_count()):          for i in range(table.NumRows()):
361              cursor.execute(insert_template, table.read_record(i))              row = table.ReadRowAsDict(i)
362                row[id_key] = i
363                cursor.execute(insert_template, row)
364          self.db.conn.commit()          self.db.conn.commit()
365    
366    
367    
368  class TransientJoinedTable(TransientTableBase):  class TransientJoinedTable(TitledObject, TransientTableBase):
369    
370      """A Table in the transient DB that contains a join of two tables"""      """A Table in the transient DB that contains a join of two tables"""
371    
372      def __init__(self, transient_db, left_table, left_field,      def __init__(self, transient_db, left_table, left_field,
373                   right_table, right_field = None):                   right_table, right_field = None, outer_join = False):
374          """Create a new table in the transient DB as a join of two tables.          """Create a new table in the transient DB as a join of two tables.
375    
376          Both input tables, left_table and right_table must have a          Both input tables, left_table and right_table must have a
377          transient_table method that returns a table object for a table          transient_table method that returns a table object for a table
378          in the trnsient database. The join is performed on the condition          in the transient database. The join is performed on the condition
379          that the value of the left_field column the the left table is          that the value of the left_field column the the left table is
380          equal to the value of the right_field in the right_table.          equal to the value of the right_field in the right_table.
381    
# Line 248  class TransientJoinedTable(TransientTabl Line 387  class TransientJoinedTable(TransientTabl
387          names of the input tables without having to create prefixes.          names of the input tables without having to create prefixes.
388          """          """
389          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
390            self.dependencies = (left_table, right_table)
391          self.left_table = left_table.transient_table()          self.left_table = left_table.transient_table()
392          self.left_field = left_field          self.left_field = left_field
393          self.right_table = right_table.transient_table()          self.right_table = right_table.transient_table()
# Line 255  class TransientJoinedTable(TransientTabl Line 395  class TransientJoinedTable(TransientTabl
395              self.right_field = right_field              self.right_field = right_field
396          else:          else:
397              self.right_field = self.left_field              self.right_field = self.left_field
398            self.outer_join = outer_join
399    
400            title = "Join of %(left)s and %(right)s" \
401                    % {"left": self.left_table.Title(),
402                       "right": self.right_table.Title()}
403            TitledObject.__init__(self, title)
404    
405          self.create()          self.create()
406    
407      def create(self):      def create(self):
# Line 263  class TransientJoinedTable(TransientTabl Line 410  class TransientJoinedTable(TransientTabl
410    
411          self.right_table.ensure_index(self.right_field)          self.right_table.ensure_index(self.right_field)
412    
413            # determine the internal column names to join on before
414            # coalescing the column information because if the external
415            # column names are the same they will be mapped to the same
416            # internal name afterwards.
417            internal_left_col = self.left_table.orig_to_internal[self.left_field]
418            internal_right_col =self.right_table.orig_to_internal[self.right_field]
419    
420          # Coalesce the column information          # Coalesce the column information
421          visited = {}          visited = {}
422          columns = []          columns = []
423          for col in self.left_table.columns + self.right_table.columns:          for col in self.left_table.columns + self.right_table.columns:
424              if col.name in visited:              if col.name in visited:
425                    # We can't allow multiple columns with the same original
426                    # name, so omit this one. FIXME: There should be a
427                    # better solution.
428                  continue                  continue
429              columns.append(col)              columns.append(col)
430                visited[col.name] = 1
431          TransientTableBase.create(self, columns)          TransientTableBase.create(self, columns)
432    
433          # Copy the joined data to the table.          # Copy the joined data to the table.
434          internal_names = [col.internal_name for col in self.columns]          internal_names = [col.internal_name for col in self.columns]
435          stmt = "INSERT INTO %s (%s) SELECT %s FROM %s JOIN %s ON %s = %s;" \          if self.outer_join:
436                 % (self.tablename,              join_operator = 'LEFT OUTER JOIN'
437                    ", ".join(internal_names),          else:
438                    ", ".join(internal_names),              join_operator = 'JOIN'
439                    self.left_table.tablename,          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
440                    self.right_table.tablename,                  " %s %s ON %s = %s;"
441                    self.orig_to_internal[self.left_field],                  % (self.tablename,
442                    self.orig_to_internal[self.right_field])                     ", ".join(internal_names),
443                       self.left_table.tablename,
444                       ", ".join(internal_names),
445                       self.left_table.tablename,
446                       join_operator,
447                       self.right_table.tablename,
448                       internal_left_col,
449                       internal_right_col))
450          self.db.execute(stmt)          self.db.execute(stmt)
451    
452        def Dependencies(self):
453            """Return a tuple with the two tables the join depends on."""
454            return self.dependencies
455    
456    
457  class AutoTransientTable:  class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
458    
459      """Table that copies data to a transient table on demand.      """Table that copies data to a transient table on demand.
460    
# Line 294  class AutoTransientTable: Line 463  class AutoTransientTable:
463      """      """
464    
465      def __init__(self, transient_db, table):      def __init__(self, transient_db, table):
466            TitledObject.__init__(self, table.Title())
467          self.transient_db = transient_db          self.transient_db = transient_db
468          self.table = table          self.table = table
469          self.t_table = None          self.t_table = None
470    
471      def record_count(self):      def Columns(self):
472          """Return the number of records"""          return self.table.Columns()
         return self.table.record_count()  
   
     def field_count(self):  
         """Return the number of fields in a record"""  
         return self.table.field_count()  
   
     def field_info(self, field):  
         """Return a tuple (type, name, width, prec) for the field no. field  
   
         type is the data type of the field, name the name, width the  
         field width in characters and prec the decimal precision.  
         """  
         info = self.table.field_info(field)  
         if info:  
             info = info[:2] + (0, 0)  
         return info  
   
     def field_info_by_name(self, fieldName):  
         info = self.table.field_info_by_name(fieldName)  
         if info:  
             info = info[:2] + (0, 0)  
         return info  
473    
474      def read_record(self, record):      def Column(self, col):
475            return self.table.Column(col)
476    
477        def HasColumn(self, col):
478            """Return whether the table has a column with the given name or index
479            """
480            return self.table.HasColumn(col)
481    
482        def NumRows(self):
483            return self.table.NumRows()
484    
485        def NumColumns(self):
486            return self.table.NumColumns()
487    
488        def ReadRowAsDict(self, record):
489          """Return the record no. record as a dict mapping field names to values          """Return the record no. record as a dict mapping field names to values
490          """          """
491          if self.t_table is not None:          if self.t_table is not None:
492              return self.t_table.read_record(record)              return self.t_table.ReadRowAsDict(record)
493          else:          else:
494              return self.table.read_record(record)              return self.table.ReadRowAsDict(record)
495    
496      def write_record(self, record, values):      def ReadValue(self, row, col):
497          raise NotImplementedError          """Return the value of the specified row and column
498    
499            The col parameter may be the index of the column or its name.
500            """
501            if self.t_table is not None:
502                return self.t_table.ReadValue(row, col)
503            else:
504                return self.table.ReadValue(row, col)
505    
506      def copy_to_transient(self):      def copy_to_transient(self):
507          """Internal: Create a transient table and copy the data into it"""          """Internal: Create a transient table and copy the data into it"""
# Line 346  class AutoTransientTable: Line 515  class AutoTransientTable:
515              self.copy_to_transient()              self.copy_to_transient()
516          return self.t_table          return self.t_table
517    
518      def field_range(self, colname):      def ValueRange(self, col):
519            if self.t_table is None:
520                self.copy_to_transient()
521            return self.t_table.ValueRange(col)
522    
523        def UniqueValues(self, col):
524          if self.t_table is None:          if self.t_table is None:
525              self.copy_to_transient()              self.copy_to_transient()
526          return self.t_table.field_range(colname)          return self.t_table.UniqueValues(col)
527    
528      def GetUniqueValues(self, colname):      def SimpleQuery(self, left, comparison, right):
529          if self.t_table is None:          if self.t_table is None:
530              self.copy_to_transient()              self.copy_to_transient()
531          return self.t_table.GetUniqueValues(colname)          # Make sure to use the column object of the transient table. The
532            # left argument is always a column object so we can just ask the
533            # t_table for the right object.
534            if hasattr(right, "name"):
535                return self.t_table.SimpleQuery(self.t_table.Column(left.name),
536                                                comparison,
537                                                self.t_table.Column(right.name))
538            else:
539                return self.t_table.SimpleQuery(self.t_table.Column(left.name),
540                                                comparison, right)
541    
542        def Dependencies(self):
543            """Return a tuple containing the original table"""
544            return (self.table,)
545    
546        def Width(self, col):
547            return self.table.Width(col)

Legend:
Removed from v.777  
changed lines
  Added in v.1328

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26