/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1381 - (hide annotations)
Tue Jul 8 16:37:46 2003 UTC (21 years, 8 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 20762 byte(s)
* Thuban/Model/transientdb.py (TransientTableBase.Width): The type
constants in the column objects are the standard ones defined in
the table module.

* test/test_transientdb.py
(TestTransientTable.test_transienttable_to_dbf): New. Test whether
exporting transient tables as DBF works. This should catch the bug
just fixed in TransientTableBase.Width.

1 bh 765 # Copyright (C) 2003 by Intevation GmbH
2     # Authors:
3     # Bernhard Herzog <[email protected]>
4     #
5     # This program is free software under the GPL (>=v2)
6     # Read the file COPYING coming with the software for details.
7    
8     """Database for transient data
9    
10     This database is intended for data representations needed during the
11     lifetime of a Thuban session but which is not permanent. Examples of
12     this are for instance a join of two DBF files where the DBF files are
13     the permanent representation of the data and the join only exists in the
14     Thuban session and is reconstructed when the session is opened.
15     """
16    
17     __version__ = "$Revision$"
18     # $Source$
19     # $Id$
20    
21     import os
22     import weakref
23     from sqlite import connect
24    
25 jan 1020 from base import TitledObject
26    
27 bh 765 import table
28    
29     sql_type_map = {
30     table.FIELDTYPE_INT: "INTEGER",
31     table.FIELDTYPE_STRING: "VARCHAR",
32     table.FIELDTYPE_DOUBLE: "FLOAT",
33     }
34    
35     type_converter_map = {
36     table.FIELDTYPE_INT: int,
37     table.FIELDTYPE_STRING: str,
38     table.FIELDTYPE_DOUBLE: float,
39     }
40    
41     class TransientDatabase:
42    
43     def __init__(self, filename):
44     self.filename = filename
45     self.conn = connect(filename)
46     # Counters to produce unique table and column names
47     self.num_tables = 0
48     self.num_cols = 0
49     # Since there's only once process using the SQLite database, we
50     # might be able to get a tad more speed with default_synchronous
51     # OFF. So far I haven't seen any measurable speedup, though.
52     #self.execute("PRAGMA default_synchronous = OFF")
53    
54     def __del__(self):
55     self.close()
56    
57     def close(self):
58 bh 777 if self.conn is not None:
59 bh 765 self.conn.close()
60 bh 777 self.conn = None
61 bh 765
62     def new_table_name(self):
63     self.num_tables += 1
64     return "Table%03d" % self.num_tables
65    
66     def new_column_name(self):
67     self.num_cols += 1
68     return "Col%03d" % self.num_cols
69    
70     def execute(self, *args):
71     """execute the SQL statement in the database and return the result"""
72     cursor = self.conn.cursor()
73     cursor.execute(*args)
74     result = cursor.fetchone()
75     self.conn.commit()
76     return result
77    
78     def cursor(self):
79     return self.conn.cursor()
80    
81    
82     class ColumnReference:
83    
84     def __init__(self, name, type, internal_name):
85     self.name = name
86     self.type = type
87     self.internal_name = internal_name
88    
89    
90 bh 818 class TransientTableBase(table.OldTableInterfaceMixin):
91 bh 765
92     """Base class for tables in the transient database"""
93    
94     def __init__(self, transient_db):
95     """Initialize the table for use with the given transient db"""
96     self.db = transient_db
97     self.tablename = self.db.new_table_name()
98     self.indexed_columns = {}
99     self.read_record_cursor = None
100     self.read_record_last_row = None
101 bh 785 self.read_record_last_result = None
102 bh 765
103     def create(self, columns):
104     self.columns = columns
105     self.name_to_column = {}
106     self.orig_names = []
107     self.internal_to_orig = {}
108     self.orig_to_internal = {}
109 bh 818 self.column_map = {}
110 bh 765
111     # Create the column objects and fill various maps and lists
112 bh 818 for index in range(len(self.columns)):
113     col = self.columns[index]
114 bh 765 self.name_to_column[col.name] = col
115     self.orig_names.append(col.name)
116     self.internal_to_orig[col.internal_name] = col.name
117     self.orig_to_internal[col.name] = col.internal_name
118 bh 818 self.column_map[col.name] = col
119     self.column_map[index] = col
120 bh 765
121     # Build the CREATE TABLE statement and create the table in the
122     # database
123 bh 841 table_types = ["id INTEGER PRIMARY KEY"]
124 bh 765 for col in self.columns:
125     table_types.append("%s %s" % (col.internal_name,
126     sql_type_map[col.type]))
127     table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
128     ",\n ".join(table_types))
129     self.db.execute(table_stmt)
130    
131     def transient_table(self):
132     """
133     Return a table whose underlying implementation is in the transient db
134     """
135     return self
136    
137     def ensure_index(self, column):
138     """Ensure that there's an index on the given column"""
139     if not column in self.indexed_columns:
140     internal_name = self.orig_to_internal[column]
141     indexname = "Index_%s_%s" % (self.tablename, internal_name)
142     stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
143     internal_name)
144     self.db.execute(stmt)
145     self.indexed_columns[column] = 1
146    
147 bh 818 def NumColumns(self):
148 bh 765 return len(self.columns)
149    
150 bh 818 def NumRows(self):
151 bh 765 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152     return int(result[0])
153    
154 bh 818 def Columns(self):
155     return self.columns
156    
157     def Column(self, col):
158     return self.column_map[col]
159    
160 bh 839 def HasColumn(self, col):
161     """Return whether the table has a column with the given name or index
162     """
163     return self.column_map.has_key(col)
164    
165 bh 818 def ReadRowAsDict(self, index):
166 bh 849 # Implementation Strategy: Executing a completely new select
167     # statement every time this method is called is too slow. The
168     # most important usage is to read the records more or less
169     # sequentially. This happens e.g. when drawing a layer with a
170     # classification where the shapes are drawn in order of the
171     # shape ids. Another pattern is that the same row is requested
172     # several times in a row. This happens in the table view, for
173     # instance.
174    
175     # We can exploit this to make access faster by having one cursor
176     # open all the time and keeping the last row read around in case
177     # the same row is accessed again the next time and if the row
178     # index is larger than the row we have read last we simply fetch
179     # rows from the cursor until we've reached the requested row. If
180     # the requested row index is smaller then we start a new cursor.
181    
182     # FIXME: So far this scheme seems to work well enough. Obvious
183     # improvements would be to start the cursor at exactly the
184     # requested row (should be efficient and easy to do now that the
185     # id is the primary key) and to perhaps to also start a new
186     # cursor if the requested index is much larger than the last row
187     # so that we don't read and discard lots of the rows.
188    
189     # Check whether we have to start a new cursor
190 bh 765 if self.read_record_cursor is None or index <self.read_record_last_row:
191     stmt = ("SELECT %s FROM %s;"
192     % (", ".join([c.internal_name for c in self.columns]),
193     self.tablename))
194     self.read_record_cursor = self.db.cursor()
195     self.read_record_cursor.execute(stmt)
196     self.read_record_last_row = -1
197 bh 785 self.read_record_last_result = None
198    
199     # Now we should have a cursor at a position less than or equal
200     # to the index so the following if statement will always set
201     # result to a suitable value
202     assert index >= self.read_record_last_row
203    
204     if index == self.read_record_last_row:
205 bh 818 result = self.read_record_last_result
206 bh 785 else:
207     for i in range(index - self.read_record_last_row):
208     result = self.read_record_cursor.fetchone()
209     self.read_record_last_result = result
210 bh 765 self.read_record_last_row = index
211 bh 785 return dict(zip(self.orig_names, result))
212 bh 765
213 bh 849 def ReadValue(self, row, col):
214     """Return the value of the specified row and column
215    
216     The col parameter may be the index of the column or its name.
217     """
218     # Depending on the actual access patterns of the table data, it
219     # might be a bit faster in some circumstances to not implement
220     # this via ReadRowAsDict, but this simple implementation should
221     # be fast enough for most purposes.
222     return self.ReadRowAsDict(row)[self.column_map[col].name]
223    
224 bh 818 def ValueRange(self, col):
225     col = self.column_map[col]
226 bh 765 iname = col.internal_name
227     min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228     % (iname, iname, self.tablename))
229     converter = type_converter_map[col.type]
230 bh 818 return (converter(min), converter(max))
231 bh 765
232 bh 818 def UniqueValues(self, col):
233     iname = self.column_map[col].internal_name
234 bh 765 cursor = self.db.cursor()
235     cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236     % (iname, self.tablename, iname))
237     result = []
238     while 1:
239     row = cursor.fetchone()
240     if row is None:
241     break
242     result.append(row[0])
243     return result
244    
245 frank 1026 def Width(self, col):
246     """Return the maximum width of values in the column
247    
248 bh 1381 The return value is the the maximum length of string
249     representation of the values in the column (represented by index
250     or name).
251     """
252 frank 1026 max = 0
253 bh 1381
254 frank 1026 type = self.column_map[col].type
255     iname = self.column_map[col].internal_name
256     cursor = self.db.cursor()
257     cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
258     values = [ i[0] for i in cursor.fetchall()]
259     if not values:
260     return None
261    
262 bh 1381 if type == table.FIELDTYPE_DOUBLE:
263 bh 1043 format = "%.12f"
264 bh 1381 elif type == table.FIELDTYPE_INT:
265 frank 1026 format = "%d"
266     else:
267     format = "%s"
268     for value in values:
269     if value is None: continue
270     l = len(format % value)
271     if l > max:
272     max = l
273    
274     return max
275    
276 bh 841 def SimpleQuery(self, left, comparison, right):
277     """Return the indices of all rows that matching a condition.
278 bh 765
279 bh 841 Parameters:
280     left -- The column object for the left side of the comparison
281    
282     comparison -- The comparison operator as a string. It must be
283     one of '==', '!=', '<', '<=', '>=', '>'
284    
285     right -- The right hand side of the comparison. It must be
286     either a column object or a value, i.e. a string,
287     int or float.
288    
289     The return value is a sorted list of the indices of the rows
290     where the condition is true.
291     """
292     if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
293     raise ValueError("Comparison operator %r not allowed" % comparison)
294    
295     if hasattr(right, "internal_name"):
296     right_template = right.internal_name
297     params = ()
298     else:
299     right_template = "%s"
300     params = (right,)
301    
302     query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
303     % (self.tablename, left.internal_name, comparison,
304     right_template)
305    
306     cursor = self.db.cursor()
307     cursor.execute(query, params)
308     result = []
309     while 1:
310     row = cursor.fetchone()
311     if row is None:
312     break
313     result.append(row[0])
314     return result
315    
316 bh 984 def Dependencies(self):
317     """Placeholder for a method in a derived class.
318 bh 841
319 bh 984 Return a sequence with the tables and other data objects that
320     self depends on.
321     """
322     raise NotImplementedError
323    
324    
325 jan 1020 class TransientTable(TitledObject, TransientTableBase):
326 bh 765
327     """A Table in a transient DB that starts as the copy of a Thuban Table."""
328    
329     def __init__(self, transient_db, table):
330     """Create a new table in the given transient DB as a copy of table
331    
332     The table argument can be any object implementing the Table
333     interface.
334     """
335     TransientTableBase.__init__(self, transient_db)
336 jan 1020 TitledObject.__init__(self, table.Title())
337 bh 765 self.create(table)
338    
339     def create(self, table):
340     columns = []
341 bh 818 for col in table.Columns():
342     columns.append(ColumnReference(col.name, col.type,
343 bh 765 self.db.new_column_name()))
344     TransientTableBase.create(self, columns)
345    
346     # copy the input table to the transient db
347 bh 841
348     # A key to insert to use for the formatting of the insert
349     # statement. The key must not be equal to any of the column
350     # names so we construct one by building a string of x's that is
351     # longer than any of the column names
352     id_key = max([len(col.name) for col in self.columns]) * "x"
353    
354     insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
355 bh 765 % (self.tablename,
356     ", ".join([col.internal_name
357     for col in self.columns]),
358 bh 841 id_key,
359 bh 765 ", ".join(["%%(%s)s" % col.name
360     for col in self.columns]))
361     cursor = self.db.cursor()
362 bh 818 for i in range(table.NumRows()):
363 bh 841 row = table.ReadRowAsDict(i)
364     row[id_key] = i
365     cursor.execute(insert_template, row)
366 bh 765 self.db.conn.commit()
367    
368    
369    
370 jan 1020 class TransientJoinedTable(TitledObject, TransientTableBase):
371 bh 765
372     """A Table in the transient DB that contains a join of two tables"""
373    
374     def __init__(self, transient_db, left_table, left_field,
375 frank 1009 right_table, right_field = None, outer_join = False):
376 bh 765 """Create a new table in the transient DB as a join of two tables.
377    
378     Both input tables, left_table and right_table must have a
379     transient_table method that returns a table object for a table
380 frank 1002 in the transient database. The join is performed on the condition
381 bh 765 that the value of the left_field column the the left table is
382     equal to the value of the right_field in the right_table.
383    
384 bh 1364 The joined table contains all columns of the input tables,
385     however, the column names of the right table may be changed
386     slightly to make them unique in the joined table. This is
387     currently done by appending a sufficient number of underscores
388     ('_').
389 bh 765 """
390     TransientTableBase.__init__(self, transient_db)
391 bh 984 self.dependencies = (left_table, right_table)
392 bh 765 self.left_table = left_table.transient_table()
393     self.left_field = left_field
394     self.right_table = right_table.transient_table()
395     if right_field:
396     self.right_field = right_field
397     else:
398     self.right_field = self.left_field
399 frank 1009 self.outer_join = outer_join
400 jan 1020
401     title = "Join of %(left)s and %(right)s" \
402     % {"left": self.left_table.Title(),
403     "right": self.right_table.Title()}
404     TitledObject.__init__(self, title)
405    
406 bh 765 self.create()
407    
408     def create(self):
409     """Internal: Create the table with the joined data"""
410     self.tablename = self.db.new_table_name()
411    
412     self.right_table.ensure_index(self.right_field)
413    
414 bh 1328 # determine the internal column names to join on before
415     # coalescing the column information because if the external
416     # column names are the same they will be mapped to the same
417     # internal name afterwards.
418     internal_left_col = self.left_table.orig_to_internal[self.left_field]
419 bh 1364 internal_right_col =self.right_table.orig_to_internal[self.right_field]
420 bh 1328
421 bh 765 # Coalesce the column information
422     visited = {}
423     columns = []
424 frank 1333 newcolumns = []
425 bh 1364 for table in (self.left_table, self.right_table):
426     for col in table.Columns():
427     colname = col.name
428     # We can't allow multiple columns with the same
429     # original name, so append '_' to this one until
430     # it is unique.
431     # FIXME: There should be a better solution.
432     while colname in visited:
433     colname = colname + '_'
434     columns.append((table.tablename, col))
435     newcol = ColumnReference(colname, col.type,
436     "Col%03d" % (len(newcolumns)+1))
437     newcolumns.append(newcol)
438     visited[colname] = 1
439 frank 1333 TransientTableBase.create(self, newcolumns)
440 bh 765
441     # Copy the joined data to the table.
442 frank 1333 newinternal_names = [col.internal_name for col in self.columns]
443     internal_references = ["%s.%s" % (table, col.internal_name)
444     for table, col in columns]
445 frank 1009 if self.outer_join:
446     join_operator = 'LEFT OUTER JOIN'
447     else:
448     join_operator = 'JOIN'
449 bh 841 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
450 frank 1333 " %s %s ON %s.%s = %s.%s;"
451 bh 841 % (self.tablename,
452 frank 1333 ", ".join(newinternal_names),
453 bh 841 self.left_table.tablename,
454 frank 1333 ", ".join(internal_references),
455 bh 841 self.left_table.tablename,
456 frank 1009 join_operator,
457 bh 841 self.right_table.tablename,
458 frank 1333 self.left_table.tablename,
459 bh 1328 internal_left_col,
460 frank 1333 self.right_table.tablename,
461 bh 1328 internal_right_col))
462 bh 765 self.db.execute(stmt)
463    
464 bh 984 def Dependencies(self):
465     """Return a tuple with the two tables the join depends on."""
466     return self.dependencies
467 bh 765
468 bh 1375 def JoinType(self):
469     """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
470     if self.outer_join:
471     return "LEFT OUTER"
472     else:
473     return "INNER"
474 bh 984
475 bh 1375
476 jan 1020 class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
477 bh 765
478     """Table that copies data to a transient table on demand.
479    
480     The AutoTransientTable takes another table as input and copies data
481     to a table in a TransientDatabase instance on demand.
482     """
483    
484     def __init__(self, transient_db, table):
485 jan 1020 TitledObject.__init__(self, table.Title())
486 bh 765 self.transient_db = transient_db
487     self.table = table
488     self.t_table = None
489    
490 bh 818 def Columns(self):
491     return self.table.Columns()
492 bh 765
493 bh 818 def Column(self, col):
494     return self.table.Column(col)
495 bh 765
496 bh 839 def HasColumn(self, col):
497     """Return whether the table has a column with the given name or index
498     """
499     return self.table.HasColumn(col)
500    
501 bh 818 def NumRows(self):
502     return self.table.NumRows()
503 bh 765
504 bh 818 def NumColumns(self):
505     return self.table.NumColumns()
506 bh 765
507 bh 818 def ReadRowAsDict(self, record):
508 bh 765 """Return the record no. record as a dict mapping field names to values
509     """
510     if self.t_table is not None:
511 bh 839 return self.t_table.ReadRowAsDict(record)
512 bh 765 else:
513 bh 818 return self.table.ReadRowAsDict(record)
514 bh 765
515 bh 849 def ReadValue(self, row, col):
516     """Return the value of the specified row and column
517    
518     The col parameter may be the index of the column or its name.
519     """
520     if self.t_table is not None:
521     return self.t_table.ReadValue(row, col)
522     else:
523     return self.table.ReadValue(row, col)
524    
525 bh 765 def copy_to_transient(self):
526     """Internal: Create a transient table and copy the data into it"""
527     self.t_table = TransientTable(self.transient_db, self)
528    
529     def transient_table(self):
530     """
531     Return a table whose underlying implementation is in the transient db
532     """
533     if self.t_table is None:
534     self.copy_to_transient()
535     return self.t_table
536    
537 bh 818 def ValueRange(self, col):
538 bh 765 if self.t_table is None:
539     self.copy_to_transient()
540 bh 839 return self.t_table.ValueRange(col)
541 bh 765
542 bh 839 def UniqueValues(self, col):
543 bh 765 if self.t_table is None:
544     self.copy_to_transient()
545 bh 839 return self.t_table.UniqueValues(col)
546 bh 844
547     def SimpleQuery(self, left, comparison, right):
548     if self.t_table is None:
549     self.copy_to_transient()
550     # Make sure to use the column object of the transient table. The
551     # left argument is always a column object so we can just ask the
552     # t_table for the right object.
553 jonathan 933 if hasattr(right, "name"):
554     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
555     comparison,
556     self.t_table.Column(right.name))
557     else:
558     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
559     comparison, right)
560 bh 984
561     def Dependencies(self):
562     """Return a tuple containing the original table"""
563     return (self.table,)
564 frank 1026
565 bh 1043 def Width(self, col):
566     return self.table.Width(col)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26