/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 765 by bh, Tue Apr 29 12:42:14 2003 UTC revision 1026 by frank, Mon May 26 11:46:42 2003 UTC
# Line 22  import os Line 22  import os
22  import weakref  import weakref
23  from sqlite import connect  from sqlite import connect
24    
25    from base import TitledObject
26    
27  import table  import table
28    
29  sql_type_map = {  sql_type_map = {
# Line 53  class TransientDatabase: Line 55  class TransientDatabase:
55          self.close()          self.close()
56    
57      def close(self):      def close(self):
58          if self.conn:          if self.conn is not None:
59              self.conn.close()              self.conn.close()
60                self.conn = None
61    
62      def new_table_name(self):      def new_table_name(self):
63          self.num_tables += 1          self.num_tables += 1
# Line 84  class ColumnReference: Line 87  class ColumnReference:
87          self.internal_name = internal_name          self.internal_name = internal_name
88    
89    
90  class TransientTableBase:  class TransientTableBase(table.OldTableInterfaceMixin):
91    
92      """Base class for tables in the transient database"""      """Base class for tables in the transient database"""
93    
# Line 95  class TransientTableBase: Line 98  class TransientTableBase:
98          self.indexed_columns = {}          self.indexed_columns = {}
99          self.read_record_cursor = None          self.read_record_cursor = None
100          self.read_record_last_row = None          self.read_record_last_row = None
101            self.read_record_last_result = None
102    
103      def create(self, columns):      def create(self, columns):
104          self.columns = columns          self.columns = columns
# Line 102  class TransientTableBase: Line 106  class TransientTableBase:
106          self.orig_names = []          self.orig_names = []
107          self.internal_to_orig = {}          self.internal_to_orig = {}
108          self.orig_to_internal = {}          self.orig_to_internal = {}
109            self.column_map = {}
110    
111          # Create the column objects and fill various maps and lists          # Create the column objects and fill various maps and lists
112          for col in self.columns:          for index in range(len(self.columns)):
113                col = self.columns[index]
114              self.name_to_column[col.name] = col              self.name_to_column[col.name] = col
115              self.orig_names.append(col.name)              self.orig_names.append(col.name)
116              self.internal_to_orig[col.internal_name] = col.name              self.internal_to_orig[col.internal_name] = col.name
117              self.orig_to_internal[col.name] = col.internal_name              self.orig_to_internal[col.name] = col.internal_name
118                self.column_map[col.name] = col
119                self.column_map[index] = col
120    
121          # Build the CREATE TABLE statement and create the table in the          # Build the CREATE TABLE statement and create the table in the
122          # database          # database
123          table_types = []          table_types = ["id INTEGER PRIMARY KEY"]
124          for col in self.columns:          for col in self.columns:
125              table_types.append("%s %s" % (col.internal_name,              table_types.append("%s %s" % (col.internal_name,
126                                            sql_type_map[col.type]))                                            sql_type_map[col.type]))
# Line 136  class TransientTableBase: Line 144  class TransientTableBase:
144              self.db.execute(stmt)              self.db.execute(stmt)
145              self.indexed_columns[column] = 1              self.indexed_columns[column] = 1
146    
147      def field_count(self):      def NumColumns(self):
148          return len(self.columns)          return len(self.columns)
149    
150      def field_info(self, i):      def NumRows(self):
         col = self.columns[i]  
         return col.type, col.name, 0, 0  
   
     def field_info_by_name(self, name):  
         for col in self.columns:  
             if col.name == name:  
                 return col.type, col.name, 0, 0  
         else:  
             return None  
   
     def record_count(self):  
151          result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)          result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152          return int(result[0])          return int(result[0])
153    
154      def read_record(self, index):      def Columns(self):
155            return self.columns
156    
157        def Column(self, col):
158            return self.column_map[col]
159    
160        def HasColumn(self, col):
161            """Return whether the table has a column with the given name or index
162            """
163            return self.column_map.has_key(col)
164    
165        def ReadRowAsDict(self, index):
166            # Implementation Strategy: Executing a completely new select
167            # statement every time this method is called is too slow. The
168            # most important usage is to read the records more or less
169            # sequentially. This happens e.g. when drawing a layer with a
170            # classification where the shapes are drawn in order of the
171            # shape ids. Another pattern is that the same row is requested
172            # several times in a row. This happens in the table view, for
173            # instance.
174    
175            # We can exploit this to make access faster by having one cursor
176            # open all the time and keeping the last row read around in case
177            # the same row is accessed again the next time and if the row
178            # index is larger than the row we have read last we simply fetch
179            # rows from the cursor until we've reached the requested row. If
180            # the requested row index is smaller then we start a new cursor.
181    
182            # FIXME: So far this scheme seems to work well enough. Obvious
183            # improvements would be to start the cursor at exactly the
184            # requested row (should be efficient and easy to do now that the
185            # id is the primary key) and to perhaps to also start a new
186            # cursor if the requested index is much larger than the last row
187            # so that we don't read and discard lots of the rows.
188    
189            # Check whether we have to start a new cursor
190          if self.read_record_cursor is None or index <self.read_record_last_row:          if self.read_record_cursor is None or index <self.read_record_last_row:
191              stmt = ("SELECT %s FROM %s;"              stmt = ("SELECT %s FROM %s;"
192                      % (", ".join([c.internal_name for c in self.columns]),                      % (", ".join([c.internal_name for c in self.columns]),
# Line 162  class TransientTableBase: Line 194  class TransientTableBase:
194              self.read_record_cursor = self.db.cursor()              self.read_record_cursor = self.db.cursor()
195              self.read_record_cursor.execute(stmt)              self.read_record_cursor.execute(stmt)
196              self.read_record_last_row = -1              self.read_record_last_row = -1
197          for i in range(index - self.read_record_last_row):              self.read_record_last_result = None
198              result = self.read_record_cursor.fetchone()  
199            # Now we should have a cursor at a position less than or equal
200            # to the index so the following if statement will always set
201            # result to a suitable value
202            assert index >= self.read_record_last_row
203    
204            if index == self.read_record_last_row:
205                result = self.read_record_last_result
206            else:
207                for i in range(index - self.read_record_last_row):
208                    result = self.read_record_cursor.fetchone()
209                    self.read_record_last_result = result
210          self.read_record_last_row = index          self.read_record_last_row = index
211          result = dict(zip(self.orig_names, result))          return dict(zip(self.orig_names, result))
212          return result  
213        def ReadValue(self, row, col):
214            """Return the value of the specified row and column
215    
216            The col parameter may be the index of the column or its name.
217            """
218            # Depending on the actual access patterns of the table data, it
219            # might be a bit faster in some circumstances to not implement
220            # this via ReadRowAsDict, but this simple implementation should
221            # be fast enough for most purposes.
222            return self.ReadRowAsDict(row)[self.column_map[col].name]
223    
224      def field_range(self, colname):      def ValueRange(self, col):
225          col = self.name_to_column[colname]          col = self.column_map[col]
226          iname = col.internal_name          iname = col.internal_name
227          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"          min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228                                     % (iname, iname, self.tablename))                                     % (iname, iname, self.tablename))
229          converter = type_converter_map[col.type]          converter = type_converter_map[col.type]
230          return ((converter(min), None), (converter(max), None))          return (converter(min), converter(max))
231    
232      def GetUniqueValues(self, colname):      def UniqueValues(self, col):
233          iname = self.orig_to_internal[colname]          iname = self.column_map[col].internal_name
234          cursor = self.db.cursor()          cursor = self.db.cursor()
235          cursor.execute("SELECT %s FROM %s GROUP BY %s;"          cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236                         % (iname, self.tablename, iname))                         % (iname, self.tablename, iname))
# Line 189  class TransientTableBase: Line 242  class TransientTableBase:
242              result.append(row[0])              result.append(row[0])
243          return result          return result
244    
245        def Width(self, col):
246            """Return the maximum width of values in the column
247    
248            The return value is the the maximum length of string representation
249            of the values in the column (represented by index or name)."""
250            max = 0
251            
252            type  = self.column_map[col].type
253            iname = self.column_map[col].internal_name
254            cursor = self.db.cursor()
255            cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256            values = [ i[0] for i in cursor.fetchall()]
257            if not values:
258                return None
259    
260            if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
261                prec = self.Precision(col)
262                format = "%%.%df" % prec
263            elif type == sql_type_map[table.FIELDTYPE_INT]:
264                format = "%d"
265            else:
266                format = "%s"
267            for value in values:
268                if value is None: continue
269                l = len(format % value)
270                if l > max:
271                    max = l
272    
273            return max
274    
275        def Precision(self, col):
276            """Return the precision of the column
277    
278            The return value is the maximum number of numeric characters after the
279            decimal if column type is double. Else precision zero is returned.
280            The column can be represented by index or name.
281            """
282        
283            type  = self.column_map[col].type
284            if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
285                iname = self.column_map[col].internal_name
286                cursor = self.db.cursor()
287                cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
288                values = [ i[0] for i in cursor.fetchall()]
289                if not values:
290                    return 0
291                
292                max = 0
293                for value in values:
294                    if value is None: continue
295                    l = len(str(value % 1))
296                    if l > max:
297                        max = l
298                if max > 2:
299                    return max - 2
300                else:
301                    return 0
302            else:
303                return 0
304    
305        def SimpleQuery(self, left, comparison, right):
306            """Return the indices of all rows that matching a condition.
307    
308  class TransientTable(TransientTableBase):          Parameters:
309               left -- The column object for the left side of the comparison
310    
311               comparison -- The comparison operator as a string. It must be
312                             one of '==', '!=', '<', '<=', '>=', '>'
313    
314               right -- The right hand side of the comparison. It must be
315                        either a column object or a value, i.e. a string,
316                        int or float.
317    
318            The return value is a sorted list of the indices of the rows
319            where the condition is true.
320            """
321            if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
322                raise ValueError("Comparison operator %r not allowed" % comparison)
323    
324            if hasattr(right, "internal_name"):
325                right_template = right.internal_name
326                params = ()
327            else:
328                right_template = "%s"
329                params = (right,)
330    
331            query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
332                    % (self.tablename, left.internal_name, comparison,
333                       right_template)
334    
335            cursor = self.db.cursor()
336            cursor.execute(query, params)
337            result = []
338            while 1:
339                row = cursor.fetchone()
340                if row is None:
341                    break
342                result.append(row[0])
343            return result
344    
345        def Dependencies(self):
346            """Placeholder for a method in a derived class.
347    
348            Return a sequence with the tables and other data objects that
349            self depends on.
350            """
351            raise NotImplementedError
352    
353    
354    class TransientTable(TitledObject, TransientTableBase):
355    
356      """A Table in a transient DB that starts as the copy of a Thuban Table."""      """A Table in a transient DB that starts as the copy of a Thuban Table."""
357    
# Line 201  class TransientTable(TransientTableBase) Line 362  class TransientTable(TransientTableBase)
362          interface.          interface.
363          """          """
364          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
365            TitledObject.__init__(self, table.Title())
366          self.create(table)          self.create(table)
367    
368      def create(self, table):      def create(self, table):
369          columns = []          columns = []
370          for i in range(table.field_count()):          for col in table.Columns():
371              type, name = table.field_info(i)[:2]              columns.append(ColumnReference(col.name, col.type,
             columns.append(ColumnReference(name, type,  
372                                             self.db.new_column_name()))                                             self.db.new_column_name()))
373          TransientTableBase.create(self, columns)          TransientTableBase.create(self, columns)
374    
375          # copy the input table to the transient db          # copy the input table to the transient db
376          insert_template = "INSERT INTO %s (%s) VALUES (%s);" \  
377            # A key to insert to use for the formatting of the insert
378            # statement. The key must not be equal to any of the column
379            # names so we construct one by building a string of x's that is
380            # longer than any of the column names
381            id_key = max([len(col.name) for col in self.columns]) * "x"
382    
383            insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
384                                 % (self.tablename,                                 % (self.tablename,
385                                    ", ".join([col.internal_name                                    ", ".join([col.internal_name
386                                               for col in self.columns]),                                               for col in self.columns]),
387                                      id_key,
388                                    ", ".join(["%%(%s)s" % col.name                                    ", ".join(["%%(%s)s" % col.name
389                                               for col in self.columns]))                                               for col in self.columns]))
390          cursor = self.db.cursor()          cursor = self.db.cursor()
391          for i in range(table.record_count()):          for i in range(table.NumRows()):
392              cursor.execute(insert_template, table.read_record(i))              row = table.ReadRowAsDict(i)
393                row[id_key] = i
394                cursor.execute(insert_template, row)
395          self.db.conn.commit()          self.db.conn.commit()
396    
397    
398    
399  class TransientJoinedTable(TransientTableBase):  class TransientJoinedTable(TitledObject, TransientTableBase):
400    
401      """A Table in the transient DB that contains a join of two tables"""      """A Table in the transient DB that contains a join of two tables"""
402    
403      def __init__(self, transient_db, left_table, left_field,      def __init__(self, transient_db, left_table, left_field,
404                   right_table, right_field = None):                   right_table, right_field = None, outer_join = False):
405          """Create a new table in the transient DB as a join of two tables.          """Create a new table in the transient DB as a join of two tables.
406    
407          Both input tables, left_table and right_table must have a          Both input tables, left_table and right_table must have a
408          transient_table method that returns a table object for a table          transient_table method that returns a table object for a table
409          in the trnsient database. The join is performed on the condition          in the transient database. The join is performed on the condition
410          that the value of the left_field column the the left table is          that the value of the left_field column the the left table is
411          equal to the value of the right_field in the right_table.          equal to the value of the right_field in the right_table.
412    
# Line 247  class TransientJoinedTable(TransientTabl Line 418  class TransientJoinedTable(TransientTabl
418          names of the input tables without having to create prefixes.          names of the input tables without having to create prefixes.
419          """          """
420          TransientTableBase.__init__(self, transient_db)          TransientTableBase.__init__(self, transient_db)
421            self.dependencies = (left_table, right_table)
422          self.left_table = left_table.transient_table()          self.left_table = left_table.transient_table()
423          self.left_field = left_field          self.left_field = left_field
424          self.right_table = right_table.transient_table()          self.right_table = right_table.transient_table()
# Line 254  class TransientJoinedTable(TransientTabl Line 426  class TransientJoinedTable(TransientTabl
426              self.right_field = right_field              self.right_field = right_field
427          else:          else:
428              self.right_field = self.left_field              self.right_field = self.left_field
429            self.outer_join = outer_join
430    
431            title = "Join of %(left)s and %(right)s" \
432                    % {"left": self.left_table.Title(),
433                       "right": self.right_table.Title()}
434            TitledObject.__init__(self, title)
435    
436          self.create()          self.create()
437    
438      def create(self):      def create(self):
# Line 267  class TransientJoinedTable(TransientTabl Line 446  class TransientJoinedTable(TransientTabl
446          columns = []          columns = []
447          for col in self.left_table.columns + self.right_table.columns:          for col in self.left_table.columns + self.right_table.columns:
448              if col.name in visited:              if col.name in visited:
449                    # We can't allow multiple columns with the same original
450                    # name, so omit this one. FIXME: There should be a
451                    # better solution.
452                  continue                  continue
453              columns.append(col)              columns.append(col)
454          TransientTableBase.create(self, columns)          TransientTableBase.create(self, columns)
455    
456          # Copy the joined data to the table.          # Copy the joined data to the table.
457          internal_names = [col.internal_name for col in self.columns]          internal_names = [col.internal_name for col in self.columns]
458          stmt = "INSERT INTO %s (%s) SELECT %s FROM %s JOIN %s ON %s = %s;" \          if self.outer_join:
459                 % (self.tablename,              join_operator = 'LEFT OUTER JOIN'
460                    ", ".join(internal_names),          else:
461                    ", ".join(internal_names),              join_operator = 'JOIN'
462                    self.left_table.tablename,          stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
463                    self.right_table.tablename,                  " %s %s ON %s = %s;"
464                    self.orig_to_internal[self.left_field],                  % (self.tablename,
465                    self.orig_to_internal[self.right_field])                     ", ".join(internal_names),
466                       self.left_table.tablename,
467                       ", ".join(internal_names),
468                       self.left_table.tablename,
469                       join_operator,
470                       self.right_table.tablename,
471                       self.orig_to_internal[self.left_field],
472                       self.orig_to_internal[self.right_field]))
473          self.db.execute(stmt)          self.db.execute(stmt)
474    
475        def Dependencies(self):
476            """Return a tuple with the two tables the join depends on."""
477            return self.dependencies
478    
479  class AutoTransientTable:  
480    class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
481    
482      """Table that copies data to a transient table on demand.      """Table that copies data to a transient table on demand.
483    
# Line 293  class AutoTransientTable: Line 486  class AutoTransientTable:
486      """      """
487    
488      def __init__(self, transient_db, table):      def __init__(self, transient_db, table):
489            TitledObject.__init__(self, table.Title())
490          self.transient_db = transient_db          self.transient_db = transient_db
491          self.table = table          self.table = table
492          self.t_table = None          self.t_table = None
493    
494      def record_count(self):      def Columns(self):
495          """Return the number of records"""          return self.table.Columns()
496          return self.table.record_count()  
497        def Column(self, col):
498      def field_count(self):          return self.table.Column(col)
499          """Return the number of fields in a record"""  
500          return self.table.field_count()      def HasColumn(self, col):
501            """Return whether the table has a column with the given name or index
502      def field_info(self, field):          """
503          """Return a tuple (type, name, width, prec) for the field no. field          return self.table.HasColumn(col)
504    
505          type is the data type of the field, name the name, width the      def NumRows(self):
506          field width in characters and prec the decimal precision.          return self.table.NumRows()
         """  
         info = self.table.field_info(field)  
         if info:  
             info = info[:2] + (0, 0)  
         return info  
   
     def field_info_by_name(self, fieldName):  
         info = self.table.field_info_by_name(fieldName)  
         if info:  
             info = info[:2] + (0, 0)  
         return info  
507    
508      def read_record(self, record):      def NumColumns(self):
509            return self.table.NumColumns()
510    
511        def ReadRowAsDict(self, record):
512          """Return the record no. record as a dict mapping field names to values          """Return the record no. record as a dict mapping field names to values
513          """          """
514          if self.t_table is not None:          if self.t_table is not None:
515              return self.t_table.read_record(record)              return self.t_table.ReadRowAsDict(record)
516          else:          else:
517              return self.table.read_record(record)              return self.table.ReadRowAsDict(record)
518    
519      def write_record(self, record, values):      def ReadValue(self, row, col):
520          raise NotImplementedError          """Return the value of the specified row and column
521    
522            The col parameter may be the index of the column or its name.
523            """
524            if self.t_table is not None:
525                return self.t_table.ReadValue(row, col)
526            else:
527                return self.table.ReadValue(row, col)
528    
529      def copy_to_transient(self):      def copy_to_transient(self):
530          """Internal: Create a transient table and copy the data into it"""          """Internal: Create a transient table and copy the data into it"""
# Line 345  class AutoTransientTable: Line 538  class AutoTransientTable:
538              self.copy_to_transient()              self.copy_to_transient()
539          return self.t_table          return self.t_table
540    
541      def field_range(self, colname):      def ValueRange(self, col):
542            if self.t_table is None:
543                self.copy_to_transient()
544            return self.t_table.ValueRange(col)
545    
546        def UniqueValues(self, col):
547          if self.t_table is None:          if self.t_table is None:
548              self.copy_to_transient()              self.copy_to_transient()
549          return self.t_table.field_range(colname)          return self.t_table.UniqueValues(col)
550    
551      def GetUniqueValues(self, colname):      def SimpleQuery(self, left, comparison, right):
552          if self.t_table is None:          if self.t_table is None:
553              self.copy_to_transient()              self.copy_to_transient()
554          return self.t_table.GetUniqueValues(colname)          # Make sure to use the column object of the transient table. The
555            # left argument is always a column object so we can just ask the
556            # t_table for the right object.
557            if hasattr(right, "name"):
558                return self.t_table.SimpleQuery(self.t_table.Column(left.name),
559                                                comparison,
560                                                self.t_table.Column(right.name))
561            else:
562                return self.t_table.SimpleQuery(self.t_table.Column(left.name),
563                                                comparison, right)
564    
565        def Dependencies(self):
566            """Return a tuple containing the original table"""
567            return (self.table,)
568    

Legend:
Removed from v.765  
changed lines
  Added in v.1026

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26