/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1511 - (hide annotations)
Tue Jul 29 14:48:57 2003 UTC (21 years, 7 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 20737 byte(s)
Remove some unnecessary imports

1 bh 765 # Copyright (C) 2003 by Intevation GmbH
2     # Authors:
3     # Bernhard Herzog <[email protected]>
4     #
5     # This program is free software under the GPL (>=v2)
6     # Read the file COPYING coming with the software for details.
7    
8     """Database for transient data
9    
10     This database is intended for data representations needed during the
11     lifetime of a Thuban session but which is not permanent. Examples of
12     this are for instance a join of two DBF files where the DBF files are
13     the permanent representation of the data and the join only exists in the
14     Thuban session and is reconstructed when the session is opened.
15     """
16    
17     __version__ = "$Revision$"
18     # $Source$
19     # $Id$
20    
21     from sqlite import connect
22    
23 jan 1020 from base import TitledObject
24    
25 bh 765 import table
26    
27     sql_type_map = {
28     table.FIELDTYPE_INT: "INTEGER",
29     table.FIELDTYPE_STRING: "VARCHAR",
30     table.FIELDTYPE_DOUBLE: "FLOAT",
31     }
32    
33     type_converter_map = {
34     table.FIELDTYPE_INT: int,
35     table.FIELDTYPE_STRING: str,
36     table.FIELDTYPE_DOUBLE: float,
37     }
38    
39     class TransientDatabase:
40    
41     def __init__(self, filename):
42     self.filename = filename
43     self.conn = connect(filename)
44     # Counters to produce unique table and column names
45     self.num_tables = 0
46     self.num_cols = 0
47     # Since there's only once process using the SQLite database, we
48     # might be able to get a tad more speed with default_synchronous
49     # OFF. So far I haven't seen any measurable speedup, though.
50     #self.execute("PRAGMA default_synchronous = OFF")
51    
52     def __del__(self):
53     self.close()
54    
55     def close(self):
56 bh 777 if self.conn is not None:
57 bh 765 self.conn.close()
58 bh 777 self.conn = None
59 bh 765
60     def new_table_name(self):
61     self.num_tables += 1
62     return "Table%03d" % self.num_tables
63    
64     def new_column_name(self):
65     self.num_cols += 1
66     return "Col%03d" % self.num_cols
67    
68     def execute(self, *args):
69     """execute the SQL statement in the database and return the result"""
70     cursor = self.conn.cursor()
71     cursor.execute(*args)
72     result = cursor.fetchone()
73     self.conn.commit()
74     return result
75    
76     def cursor(self):
77     return self.conn.cursor()
78    
79    
80     class ColumnReference:
81    
82     def __init__(self, name, type, internal_name):
83     self.name = name
84     self.type = type
85     self.internal_name = internal_name
86    
87    
88 bh 818 class TransientTableBase(table.OldTableInterfaceMixin):
89 bh 765
90     """Base class for tables in the transient database"""
91    
92     def __init__(self, transient_db):
93     """Initialize the table for use with the given transient db"""
94     self.db = transient_db
95     self.tablename = self.db.new_table_name()
96     self.indexed_columns = {}
97     self.read_record_cursor = None
98     self.read_record_last_row = None
99 bh 785 self.read_record_last_result = None
100 bh 765
101     def create(self, columns):
102     self.columns = columns
103     self.name_to_column = {}
104     self.orig_names = []
105     self.internal_to_orig = {}
106     self.orig_to_internal = {}
107 bh 818 self.column_map = {}
108 bh 765
109     # Create the column objects and fill various maps and lists
110 bh 818 for index in range(len(self.columns)):
111     col = self.columns[index]
112 bh 765 self.name_to_column[col.name] = col
113     self.orig_names.append(col.name)
114     self.internal_to_orig[col.internal_name] = col.name
115     self.orig_to_internal[col.name] = col.internal_name
116 bh 818 self.column_map[col.name] = col
117     self.column_map[index] = col
118 bh 765
119     # Build the CREATE TABLE statement and create the table in the
120     # database
121 bh 841 table_types = ["id INTEGER PRIMARY KEY"]
122 bh 765 for col in self.columns:
123     table_types.append("%s %s" % (col.internal_name,
124     sql_type_map[col.type]))
125     table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
126     ",\n ".join(table_types))
127     self.db.execute(table_stmt)
128    
129     def transient_table(self):
130     """
131     Return a table whose underlying implementation is in the transient db
132     """
133     return self
134    
135     def ensure_index(self, column):
136     """Ensure that there's an index on the given column"""
137     if not column in self.indexed_columns:
138     internal_name = self.orig_to_internal[column]
139     indexname = "Index_%s_%s" % (self.tablename, internal_name)
140     stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
141     internal_name)
142     self.db.execute(stmt)
143     self.indexed_columns[column] = 1
144    
145 bh 818 def NumColumns(self):
146 bh 765 return len(self.columns)
147    
148 bh 818 def NumRows(self):
149 bh 765 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
150     return int(result[0])
151    
152 bh 818 def Columns(self):
153     return self.columns
154    
155     def Column(self, col):
156     return self.column_map[col]
157    
158 bh 839 def HasColumn(self, col):
159     """Return whether the table has a column with the given name or index
160     """
161     return self.column_map.has_key(col)
162    
163 bh 818 def ReadRowAsDict(self, index):
164 bh 849 # Implementation Strategy: Executing a completely new select
165     # statement every time this method is called is too slow. The
166     # most important usage is to read the records more or less
167     # sequentially. This happens e.g. when drawing a layer with a
168     # classification where the shapes are drawn in order of the
169     # shape ids. Another pattern is that the same row is requested
170     # several times in a row. This happens in the table view, for
171     # instance.
172    
173     # We can exploit this to make access faster by having one cursor
174     # open all the time and keeping the last row read around in case
175     # the same row is accessed again the next time and if the row
176     # index is larger than the row we have read last we simply fetch
177     # rows from the cursor until we've reached the requested row. If
178     # the requested row index is smaller then we start a new cursor.
179    
180     # FIXME: So far this scheme seems to work well enough. Obvious
181     # improvements would be to start the cursor at exactly the
182     # requested row (should be efficient and easy to do now that the
183     # id is the primary key) and to perhaps to also start a new
184     # cursor if the requested index is much larger than the last row
185     # so that we don't read and discard lots of the rows.
186    
187     # Check whether we have to start a new cursor
188 bh 765 if self.read_record_cursor is None or index <self.read_record_last_row:
189     stmt = ("SELECT %s FROM %s;"
190     % (", ".join([c.internal_name for c in self.columns]),
191     self.tablename))
192     self.read_record_cursor = self.db.cursor()
193     self.read_record_cursor.execute(stmt)
194     self.read_record_last_row = -1
195 bh 785 self.read_record_last_result = None
196    
197     # Now we should have a cursor at a position less than or equal
198     # to the index so the following if statement will always set
199     # result to a suitable value
200     assert index >= self.read_record_last_row
201    
202     if index == self.read_record_last_row:
203 bh 818 result = self.read_record_last_result
204 bh 785 else:
205     for i in range(index - self.read_record_last_row):
206     result = self.read_record_cursor.fetchone()
207     self.read_record_last_result = result
208 bh 765 self.read_record_last_row = index
209 bh 785 return dict(zip(self.orig_names, result))
210 bh 765
211 bh 849 def ReadValue(self, row, col):
212     """Return the value of the specified row and column
213    
214     The col parameter may be the index of the column or its name.
215     """
216     # Depending on the actual access patterns of the table data, it
217     # might be a bit faster in some circumstances to not implement
218     # this via ReadRowAsDict, but this simple implementation should
219     # be fast enough for most purposes.
220     return self.ReadRowAsDict(row)[self.column_map[col].name]
221    
222 bh 818 def ValueRange(self, col):
223     col = self.column_map[col]
224 bh 765 iname = col.internal_name
225     min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
226     % (iname, iname, self.tablename))
227     converter = type_converter_map[col.type]
228 bh 818 return (converter(min), converter(max))
229 bh 765
230 bh 818 def UniqueValues(self, col):
231     iname = self.column_map[col].internal_name
232 bh 765 cursor = self.db.cursor()
233     cursor.execute("SELECT %s FROM %s GROUP BY %s;"
234     % (iname, self.tablename, iname))
235     result = []
236     while 1:
237     row = cursor.fetchone()
238     if row is None:
239     break
240     result.append(row[0])
241     return result
242    
243 frank 1026 def Width(self, col):
244     """Return the maximum width of values in the column
245    
246 bh 1381 The return value is the the maximum length of string
247     representation of the values in the column (represented by index
248     or name).
249     """
250 frank 1026 max = 0
251 bh 1381
252 frank 1026 type = self.column_map[col].type
253     iname = self.column_map[col].internal_name
254     cursor = self.db.cursor()
255     cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256     values = [ i[0] for i in cursor.fetchall()]
257     if not values:
258     return None
259    
260 bh 1381 if type == table.FIELDTYPE_DOUBLE:
261 bh 1043 format = "%.12f"
262 bh 1381 elif type == table.FIELDTYPE_INT:
263 frank 1026 format = "%d"
264     else:
265     format = "%s"
266     for value in values:
267     if value is None: continue
268     l = len(format % value)
269     if l > max:
270     max = l
271    
272     return max
273    
274 bh 841 def SimpleQuery(self, left, comparison, right):
275     """Return the indices of all rows that matching a condition.
276 bh 765
277 bh 841 Parameters:
278     left -- The column object for the left side of the comparison
279    
280     comparison -- The comparison operator as a string. It must be
281     one of '==', '!=', '<', '<=', '>=', '>'
282    
283     right -- The right hand side of the comparison. It must be
284     either a column object or a value, i.e. a string,
285     int or float.
286    
287     The return value is a sorted list of the indices of the rows
288     where the condition is true.
289     """
290     if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
291     raise ValueError("Comparison operator %r not allowed" % comparison)
292    
293     if hasattr(right, "internal_name"):
294     right_template = right.internal_name
295     params = ()
296     else:
297     right_template = "%s"
298     params = (right,)
299    
300     query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
301     % (self.tablename, left.internal_name, comparison,
302     right_template)
303    
304     cursor = self.db.cursor()
305     cursor.execute(query, params)
306     result = []
307     while 1:
308     row = cursor.fetchone()
309     if row is None:
310     break
311     result.append(row[0])
312     return result
313    
314 bh 984 def Dependencies(self):
315     """Placeholder for a method in a derived class.
316 bh 841
317 bh 984 Return a sequence with the tables and other data objects that
318     self depends on.
319     """
320     raise NotImplementedError
321    
322    
323 jan 1020 class TransientTable(TitledObject, TransientTableBase):
324 bh 765
325     """A Table in a transient DB that starts as the copy of a Thuban Table."""
326    
327     def __init__(self, transient_db, table):
328     """Create a new table in the given transient DB as a copy of table
329    
330     The table argument can be any object implementing the Table
331     interface.
332     """
333     TransientTableBase.__init__(self, transient_db)
334 jan 1020 TitledObject.__init__(self, table.Title())
335 bh 765 self.create(table)
336    
337     def create(self, table):
338     columns = []
339 bh 818 for col in table.Columns():
340     columns.append(ColumnReference(col.name, col.type,
341 bh 765 self.db.new_column_name()))
342     TransientTableBase.create(self, columns)
343    
344     # copy the input table to the transient db
345 bh 841
346     # A key to insert to use for the formatting of the insert
347     # statement. The key must not be equal to any of the column
348     # names so we construct one by building a string of x's that is
349     # longer than any of the column names
350     id_key = max([len(col.name) for col in self.columns]) * "x"
351    
352     insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
353 bh 765 % (self.tablename,
354     ", ".join([col.internal_name
355     for col in self.columns]),
356 bh 841 id_key,
357 bh 765 ", ".join(["%%(%s)s" % col.name
358     for col in self.columns]))
359     cursor = self.db.cursor()
360 bh 818 for i in range(table.NumRows()):
361 bh 841 row = table.ReadRowAsDict(i)
362     row[id_key] = i
363     cursor.execute(insert_template, row)
364 bh 765 self.db.conn.commit()
365    
366    
367    
368 jan 1020 class TransientJoinedTable(TitledObject, TransientTableBase):
369 bh 765
370     """A Table in the transient DB that contains a join of two tables"""
371    
372     def __init__(self, transient_db, left_table, left_field,
373 frank 1009 right_table, right_field = None, outer_join = False):
374 bh 765 """Create a new table in the transient DB as a join of two tables.
375    
376     Both input tables, left_table and right_table must have a
377     transient_table method that returns a table object for a table
378 frank 1002 in the transient database. The join is performed on the condition
379 bh 765 that the value of the left_field column the the left table is
380     equal to the value of the right_field in the right_table.
381    
382 bh 1364 The joined table contains all columns of the input tables,
383     however, the column names of the right table may be changed
384     slightly to make them unique in the joined table. This is
385     currently done by appending a sufficient number of underscores
386     ('_').
387 bh 765 """
388     TransientTableBase.__init__(self, transient_db)
389 bh 984 self.dependencies = (left_table, right_table)
390 bh 765 self.left_table = left_table.transient_table()
391     self.left_field = left_field
392     self.right_table = right_table.transient_table()
393     if right_field:
394     self.right_field = right_field
395     else:
396     self.right_field = self.left_field
397 frank 1009 self.outer_join = outer_join
398 jan 1020
399     title = "Join of %(left)s and %(right)s" \
400     % {"left": self.left_table.Title(),
401     "right": self.right_table.Title()}
402     TitledObject.__init__(self, title)
403    
404 bh 765 self.create()
405    
406     def create(self):
407     """Internal: Create the table with the joined data"""
408     self.tablename = self.db.new_table_name()
409    
410     self.right_table.ensure_index(self.right_field)
411    
412 bh 1328 # determine the internal column names to join on before
413     # coalescing the column information because if the external
414     # column names are the same they will be mapped to the same
415     # internal name afterwards.
416     internal_left_col = self.left_table.orig_to_internal[self.left_field]
417 bh 1364 internal_right_col =self.right_table.orig_to_internal[self.right_field]
418 bh 1328
419 bh 765 # Coalesce the column information
420     visited = {}
421     columns = []
422 frank 1333 newcolumns = []
423 bh 1364 for table in (self.left_table, self.right_table):
424     for col in table.Columns():
425     colname = col.name
426     # We can't allow multiple columns with the same
427     # original name, so append '_' to this one until
428     # it is unique.
429     # FIXME: There should be a better solution.
430     while colname in visited:
431     colname = colname + '_'
432     columns.append((table.tablename, col))
433     newcol = ColumnReference(colname, col.type,
434     "Col%03d" % (len(newcolumns)+1))
435     newcolumns.append(newcol)
436     visited[colname] = 1
437 frank 1333 TransientTableBase.create(self, newcolumns)
438 bh 765
439     # Copy the joined data to the table.
440 frank 1333 newinternal_names = [col.internal_name for col in self.columns]
441     internal_references = ["%s.%s" % (table, col.internal_name)
442     for table, col in columns]
443 frank 1009 if self.outer_join:
444     join_operator = 'LEFT OUTER JOIN'
445     else:
446     join_operator = 'JOIN'
447 bh 841 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
448 frank 1333 " %s %s ON %s.%s = %s.%s;"
449 bh 841 % (self.tablename,
450 frank 1333 ", ".join(newinternal_names),
451 bh 841 self.left_table.tablename,
452 frank 1333 ", ".join(internal_references),
453 bh 841 self.left_table.tablename,
454 frank 1009 join_operator,
455 bh 841 self.right_table.tablename,
456 frank 1333 self.left_table.tablename,
457 bh 1328 internal_left_col,
458 frank 1333 self.right_table.tablename,
459 bh 1328 internal_right_col))
460 bh 765 self.db.execute(stmt)
461    
462 bh 984 def Dependencies(self):
463     """Return a tuple with the two tables the join depends on."""
464     return self.dependencies
465 bh 765
466 bh 1375 def JoinType(self):
467     """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
468     if self.outer_join:
469     return "LEFT OUTER"
470     else:
471     return "INNER"
472 bh 984
473 bh 1375
474 jan 1020 class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
475 bh 765
476     """Table that copies data to a transient table on demand.
477    
478     The AutoTransientTable takes another table as input and copies data
479     to a table in a TransientDatabase instance on demand.
480     """
481    
482     def __init__(self, transient_db, table):
483 jan 1020 TitledObject.__init__(self, table.Title())
484 bh 765 self.transient_db = transient_db
485     self.table = table
486     self.t_table = None
487    
488 bh 818 def Columns(self):
489     return self.table.Columns()
490 bh 765
491 bh 818 def Column(self, col):
492     return self.table.Column(col)
493 bh 765
494 bh 839 def HasColumn(self, col):
495     """Return whether the table has a column with the given name or index
496     """
497     return self.table.HasColumn(col)
498    
499 bh 818 def NumRows(self):
500     return self.table.NumRows()
501 bh 765
502 bh 818 def NumColumns(self):
503     return self.table.NumColumns()
504 bh 765
505 bh 818 def ReadRowAsDict(self, record):
506 bh 765 """Return the record no. record as a dict mapping field names to values
507     """
508     if self.t_table is not None:
509 bh 839 return self.t_table.ReadRowAsDict(record)
510 bh 765 else:
511 bh 818 return self.table.ReadRowAsDict(record)
512 bh 765
513 bh 849 def ReadValue(self, row, col):
514     """Return the value of the specified row and column
515    
516     The col parameter may be the index of the column or its name.
517     """
518     if self.t_table is not None:
519     return self.t_table.ReadValue(row, col)
520     else:
521     return self.table.ReadValue(row, col)
522    
523 bh 765 def copy_to_transient(self):
524     """Internal: Create a transient table and copy the data into it"""
525     self.t_table = TransientTable(self.transient_db, self)
526    
527     def transient_table(self):
528     """
529     Return a table whose underlying implementation is in the transient db
530     """
531     if self.t_table is None:
532     self.copy_to_transient()
533     return self.t_table
534    
535 bh 818 def ValueRange(self, col):
536 bh 765 if self.t_table is None:
537     self.copy_to_transient()
538 bh 839 return self.t_table.ValueRange(col)
539 bh 765
540 bh 839 def UniqueValues(self, col):
541 bh 765 if self.t_table is None:
542     self.copy_to_transient()
543 bh 839 return self.t_table.UniqueValues(col)
544 bh 844
545     def SimpleQuery(self, left, comparison, right):
546     if self.t_table is None:
547     self.copy_to_transient()
548     # Make sure to use the column object of the transient table. The
549     # left argument is always a column object so we can just ask the
550     # t_table for the right object.
551 jonathan 933 if hasattr(right, "name"):
552     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
553     comparison,
554     self.t_table.Column(right.name))
555     else:
556     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
557     comparison, right)
558 bh 984
559     def Dependencies(self):
560     """Return a tuple containing the original table"""
561     return (self.table,)
562 frank 1026
563 bh 1043 def Width(self, col):
564     return self.table.Width(col)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26