/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1026 - (hide annotations)
Mon May 26 11:46:42 2003 UTC (21 years, 9 months ago) by frank
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 20712 byte(s)
TrannsientTableBase.Width, TransientTableBase.Precision):
Return column width and precision.

1 bh 765 # Copyright (C) 2003 by Intevation GmbH
2     # Authors:
3     # Bernhard Herzog <[email protected]>
4     #
5     # This program is free software under the GPL (>=v2)
6     # Read the file COPYING coming with the software for details.
7    
8     """Database for transient data
9    
10     This database is intended for data representations needed during the
11     lifetime of a Thuban session but which is not permanent. Examples of
12     this are for instance a join of two DBF files where the DBF files are
13     the permanent representation of the data and the join only exists in the
14     Thuban session and is reconstructed when the session is opened.
15     """
16    
17     __version__ = "$Revision$"
18     # $Source$
19     # $Id$
20    
21     import os
22     import weakref
23     from sqlite import connect
24    
25 jan 1020 from base import TitledObject
26    
27 bh 765 import table
28    
29     sql_type_map = {
30     table.FIELDTYPE_INT: "INTEGER",
31     table.FIELDTYPE_STRING: "VARCHAR",
32     table.FIELDTYPE_DOUBLE: "FLOAT",
33     }
34    
35     type_converter_map = {
36     table.FIELDTYPE_INT: int,
37     table.FIELDTYPE_STRING: str,
38     table.FIELDTYPE_DOUBLE: float,
39     }
40    
41     class TransientDatabase:
42    
43     def __init__(self, filename):
44     self.filename = filename
45     self.conn = connect(filename)
46     # Counters to produce unique table and column names
47     self.num_tables = 0
48     self.num_cols = 0
49     # Since there's only once process using the SQLite database, we
50     # might be able to get a tad more speed with default_synchronous
51     # OFF. So far I haven't seen any measurable speedup, though.
52     #self.execute("PRAGMA default_synchronous = OFF")
53    
54     def __del__(self):
55     self.close()
56    
57     def close(self):
58 bh 777 if self.conn is not None:
59 bh 765 self.conn.close()
60 bh 777 self.conn = None
61 bh 765
62     def new_table_name(self):
63     self.num_tables += 1
64     return "Table%03d" % self.num_tables
65    
66     def new_column_name(self):
67     self.num_cols += 1
68     return "Col%03d" % self.num_cols
69    
70     def execute(self, *args):
71     """execute the SQL statement in the database and return the result"""
72     cursor = self.conn.cursor()
73     cursor.execute(*args)
74     result = cursor.fetchone()
75     self.conn.commit()
76     return result
77    
78     def cursor(self):
79     return self.conn.cursor()
80    
81    
82     class ColumnReference:
83    
84     def __init__(self, name, type, internal_name):
85     self.name = name
86     self.type = type
87     self.internal_name = internal_name
88    
89    
90 bh 818 class TransientTableBase(table.OldTableInterfaceMixin):
91 bh 765
92     """Base class for tables in the transient database"""
93    
94     def __init__(self, transient_db):
95     """Initialize the table for use with the given transient db"""
96     self.db = transient_db
97     self.tablename = self.db.new_table_name()
98     self.indexed_columns = {}
99     self.read_record_cursor = None
100     self.read_record_last_row = None
101 bh 785 self.read_record_last_result = None
102 bh 765
103     def create(self, columns):
104     self.columns = columns
105     self.name_to_column = {}
106     self.orig_names = []
107     self.internal_to_orig = {}
108     self.orig_to_internal = {}
109 bh 818 self.column_map = {}
110 bh 765
111     # Create the column objects and fill various maps and lists
112 bh 818 for index in range(len(self.columns)):
113     col = self.columns[index]
114 bh 765 self.name_to_column[col.name] = col
115     self.orig_names.append(col.name)
116     self.internal_to_orig[col.internal_name] = col.name
117     self.orig_to_internal[col.name] = col.internal_name
118 bh 818 self.column_map[col.name] = col
119     self.column_map[index] = col
120 bh 765
121     # Build the CREATE TABLE statement and create the table in the
122     # database
123 bh 841 table_types = ["id INTEGER PRIMARY KEY"]
124 bh 765 for col in self.columns:
125     table_types.append("%s %s" % (col.internal_name,
126     sql_type_map[col.type]))
127     table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
128     ",\n ".join(table_types))
129     self.db.execute(table_stmt)
130    
131     def transient_table(self):
132     """
133     Return a table whose underlying implementation is in the transient db
134     """
135     return self
136    
137     def ensure_index(self, column):
138     """Ensure that there's an index on the given column"""
139     if not column in self.indexed_columns:
140     internal_name = self.orig_to_internal[column]
141     indexname = "Index_%s_%s" % (self.tablename, internal_name)
142     stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
143     internal_name)
144     self.db.execute(stmt)
145     self.indexed_columns[column] = 1
146    
147 bh 818 def NumColumns(self):
148 bh 765 return len(self.columns)
149    
150 bh 818 def NumRows(self):
151 bh 765 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152     return int(result[0])
153    
154 bh 818 def Columns(self):
155     return self.columns
156    
157     def Column(self, col):
158     return self.column_map[col]
159    
160 bh 839 def HasColumn(self, col):
161     """Return whether the table has a column with the given name or index
162     """
163     return self.column_map.has_key(col)
164    
165 bh 818 def ReadRowAsDict(self, index):
166 bh 849 # Implementation Strategy: Executing a completely new select
167     # statement every time this method is called is too slow. The
168     # most important usage is to read the records more or less
169     # sequentially. This happens e.g. when drawing a layer with a
170     # classification where the shapes are drawn in order of the
171     # shape ids. Another pattern is that the same row is requested
172     # several times in a row. This happens in the table view, for
173     # instance.
174    
175     # We can exploit this to make access faster by having one cursor
176     # open all the time and keeping the last row read around in case
177     # the same row is accessed again the next time and if the row
178     # index is larger than the row we have read last we simply fetch
179     # rows from the cursor until we've reached the requested row. If
180     # the requested row index is smaller then we start a new cursor.
181    
182     # FIXME: So far this scheme seems to work well enough. Obvious
183     # improvements would be to start the cursor at exactly the
184     # requested row (should be efficient and easy to do now that the
185     # id is the primary key) and to perhaps to also start a new
186     # cursor if the requested index is much larger than the last row
187     # so that we don't read and discard lots of the rows.
188    
189     # Check whether we have to start a new cursor
190 bh 765 if self.read_record_cursor is None or index <self.read_record_last_row:
191     stmt = ("SELECT %s FROM %s;"
192     % (", ".join([c.internal_name for c in self.columns]),
193     self.tablename))
194     self.read_record_cursor = self.db.cursor()
195     self.read_record_cursor.execute(stmt)
196     self.read_record_last_row = -1
197 bh 785 self.read_record_last_result = None
198    
199     # Now we should have a cursor at a position less than or equal
200     # to the index so the following if statement will always set
201     # result to a suitable value
202     assert index >= self.read_record_last_row
203    
204     if index == self.read_record_last_row:
205 bh 818 result = self.read_record_last_result
206 bh 785 else:
207     for i in range(index - self.read_record_last_row):
208     result = self.read_record_cursor.fetchone()
209     self.read_record_last_result = result
210 bh 765 self.read_record_last_row = index
211 bh 785 return dict(zip(self.orig_names, result))
212 bh 765
213 bh 849 def ReadValue(self, row, col):
214     """Return the value of the specified row and column
215    
216     The col parameter may be the index of the column or its name.
217     """
218     # Depending on the actual access patterns of the table data, it
219     # might be a bit faster in some circumstances to not implement
220     # this via ReadRowAsDict, but this simple implementation should
221     # be fast enough for most purposes.
222     return self.ReadRowAsDict(row)[self.column_map[col].name]
223    
224 bh 818 def ValueRange(self, col):
225     col = self.column_map[col]
226 bh 765 iname = col.internal_name
227     min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228     % (iname, iname, self.tablename))
229     converter = type_converter_map[col.type]
230 bh 818 return (converter(min), converter(max))
231 bh 765
232 bh 818 def UniqueValues(self, col):
233     iname = self.column_map[col].internal_name
234 bh 765 cursor = self.db.cursor()
235     cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236     % (iname, self.tablename, iname))
237     result = []
238     while 1:
239     row = cursor.fetchone()
240     if row is None:
241     break
242     result.append(row[0])
243     return result
244    
245 frank 1026 def Width(self, col):
246     """Return the maximum width of values in the column
247    
248     The return value is the the maximum length of string representation
249     of the values in the column (represented by index or name)."""
250     max = 0
251    
252     type = self.column_map[col].type
253     iname = self.column_map[col].internal_name
254     cursor = self.db.cursor()
255     cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256     values = [ i[0] for i in cursor.fetchall()]
257     if not values:
258     return None
259    
260     if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
261     prec = self.Precision(col)
262     format = "%%.%df" % prec
263     elif type == sql_type_map[table.FIELDTYPE_INT]:
264     format = "%d"
265     else:
266     format = "%s"
267     for value in values:
268     if value is None: continue
269     l = len(format % value)
270     if l > max:
271     max = l
272    
273     return max
274    
275     def Precision(self, col):
276     """Return the precision of the column
277    
278     The return value is the maximum number of numeric characters after the
279     decimal if column type is double. Else precision zero is returned.
280     The column can be represented by index or name.
281     """
282    
283     type = self.column_map[col].type
284     if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
285     iname = self.column_map[col].internal_name
286     cursor = self.db.cursor()
287     cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
288     values = [ i[0] for i in cursor.fetchall()]
289     if not values:
290     return 0
291    
292     max = 0
293     for value in values:
294     if value is None: continue
295     l = len(str(value % 1))
296     if l > max:
297     max = l
298     if max > 2:
299     return max - 2
300     else:
301     return 0
302     else:
303     return 0
304    
305 bh 841 def SimpleQuery(self, left, comparison, right):
306     """Return the indices of all rows that matching a condition.
307 bh 765
308 bh 841 Parameters:
309     left -- The column object for the left side of the comparison
310    
311     comparison -- The comparison operator as a string. It must be
312     one of '==', '!=', '<', '<=', '>=', '>'
313    
314     right -- The right hand side of the comparison. It must be
315     either a column object or a value, i.e. a string,
316     int or float.
317    
318     The return value is a sorted list of the indices of the rows
319     where the condition is true.
320     """
321     if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
322     raise ValueError("Comparison operator %r not allowed" % comparison)
323    
324     if hasattr(right, "internal_name"):
325     right_template = right.internal_name
326     params = ()
327     else:
328     right_template = "%s"
329     params = (right,)
330    
331     query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
332     % (self.tablename, left.internal_name, comparison,
333     right_template)
334    
335     cursor = self.db.cursor()
336     cursor.execute(query, params)
337     result = []
338     while 1:
339     row = cursor.fetchone()
340     if row is None:
341     break
342     result.append(row[0])
343     return result
344    
345 bh 984 def Dependencies(self):
346     """Placeholder for a method in a derived class.
347 bh 841
348 bh 984 Return a sequence with the tables and other data objects that
349     self depends on.
350     """
351     raise NotImplementedError
352    
353    
354 jan 1020 class TransientTable(TitledObject, TransientTableBase):
355 bh 765
356     """A Table in a transient DB that starts as the copy of a Thuban Table."""
357    
358     def __init__(self, transient_db, table):
359     """Create a new table in the given transient DB as a copy of table
360    
361     The table argument can be any object implementing the Table
362     interface.
363     """
364     TransientTableBase.__init__(self, transient_db)
365 jan 1020 TitledObject.__init__(self, table.Title())
366 bh 765 self.create(table)
367    
368     def create(self, table):
369     columns = []
370 bh 818 for col in table.Columns():
371     columns.append(ColumnReference(col.name, col.type,
372 bh 765 self.db.new_column_name()))
373     TransientTableBase.create(self, columns)
374    
375     # copy the input table to the transient db
376 bh 841
377     # A key to insert to use for the formatting of the insert
378     # statement. The key must not be equal to any of the column
379     # names so we construct one by building a string of x's that is
380     # longer than any of the column names
381     id_key = max([len(col.name) for col in self.columns]) * "x"
382    
383     insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
384 bh 765 % (self.tablename,
385     ", ".join([col.internal_name
386     for col in self.columns]),
387 bh 841 id_key,
388 bh 765 ", ".join(["%%(%s)s" % col.name
389     for col in self.columns]))
390     cursor = self.db.cursor()
391 bh 818 for i in range(table.NumRows()):
392 bh 841 row = table.ReadRowAsDict(i)
393     row[id_key] = i
394     cursor.execute(insert_template, row)
395 bh 765 self.db.conn.commit()
396    
397    
398    
399 jan 1020 class TransientJoinedTable(TitledObject, TransientTableBase):
400 bh 765
401     """A Table in the transient DB that contains a join of two tables"""
402    
403     def __init__(self, transient_db, left_table, left_field,
404 frank 1009 right_table, right_field = None, outer_join = False):
405 bh 765 """Create a new table in the transient DB as a join of two tables.
406    
407     Both input tables, left_table and right_table must have a
408     transient_table method that returns a table object for a table
409 frank 1002 in the transient database. The join is performed on the condition
410 bh 765 that the value of the left_field column the the left table is
411     equal to the value of the right_field in the right_table.
412    
413     The joined table contains all columns of the input tables with
414     one exception: Any column in the right_table with the same name
415     as one of the columns in the left_table will be omitted. This is
416     somewhat of an implementation detail, but is done so that the
417     column names of the joined table can be the same as the column
418     names of the input tables without having to create prefixes.
419     """
420     TransientTableBase.__init__(self, transient_db)
421 bh 984 self.dependencies = (left_table, right_table)
422 bh 765 self.left_table = left_table.transient_table()
423     self.left_field = left_field
424     self.right_table = right_table.transient_table()
425     if right_field:
426     self.right_field = right_field
427     else:
428     self.right_field = self.left_field
429 frank 1009 self.outer_join = outer_join
430 jan 1020
431     title = "Join of %(left)s and %(right)s" \
432     % {"left": self.left_table.Title(),
433     "right": self.right_table.Title()}
434     TitledObject.__init__(self, title)
435    
436 bh 765 self.create()
437    
438     def create(self):
439     """Internal: Create the table with the joined data"""
440     self.tablename = self.db.new_table_name()
441    
442     self.right_table.ensure_index(self.right_field)
443    
444     # Coalesce the column information
445     visited = {}
446     columns = []
447     for col in self.left_table.columns + self.right_table.columns:
448     if col.name in visited:
449 bh 841 # We can't allow multiple columns with the same original
450     # name, so omit this one. FIXME: There should be a
451     # better solution.
452 bh 765 continue
453     columns.append(col)
454     TransientTableBase.create(self, columns)
455    
456     # Copy the joined data to the table.
457     internal_names = [col.internal_name for col in self.columns]
458 frank 1009 if self.outer_join:
459     join_operator = 'LEFT OUTER JOIN'
460     else:
461     join_operator = 'JOIN'
462 bh 841 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
463 frank 1009 " %s %s ON %s = %s;"
464 bh 841 % (self.tablename,
465     ", ".join(internal_names),
466     self.left_table.tablename,
467     ", ".join(internal_names),
468     self.left_table.tablename,
469 frank 1009 join_operator,
470 bh 841 self.right_table.tablename,
471     self.orig_to_internal[self.left_field],
472     self.orig_to_internal[self.right_field]))
473 bh 765 self.db.execute(stmt)
474    
475 bh 984 def Dependencies(self):
476     """Return a tuple with the two tables the join depends on."""
477     return self.dependencies
478 bh 765
479 bh 984
480 jan 1020 class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
481 bh 765
482     """Table that copies data to a transient table on demand.
483    
484     The AutoTransientTable takes another table as input and copies data
485     to a table in a TransientDatabase instance on demand.
486     """
487    
488     def __init__(self, transient_db, table):
489 jan 1020 TitledObject.__init__(self, table.Title())
490 bh 765 self.transient_db = transient_db
491     self.table = table
492     self.t_table = None
493    
494 bh 818 def Columns(self):
495     return self.table.Columns()
496 bh 765
497 bh 818 def Column(self, col):
498     return self.table.Column(col)
499 bh 765
500 bh 839 def HasColumn(self, col):
501     """Return whether the table has a column with the given name or index
502     """
503     return self.table.HasColumn(col)
504    
505 bh 818 def NumRows(self):
506     return self.table.NumRows()
507 bh 765
508 bh 818 def NumColumns(self):
509     return self.table.NumColumns()
510 bh 765
511 bh 818 def ReadRowAsDict(self, record):
512 bh 765 """Return the record no. record as a dict mapping field names to values
513     """
514     if self.t_table is not None:
515 bh 839 return self.t_table.ReadRowAsDict(record)
516 bh 765 else:
517 bh 818 return self.table.ReadRowAsDict(record)
518 bh 765
519 bh 849 def ReadValue(self, row, col):
520     """Return the value of the specified row and column
521    
522     The col parameter may be the index of the column or its name.
523     """
524     if self.t_table is not None:
525     return self.t_table.ReadValue(row, col)
526     else:
527     return self.table.ReadValue(row, col)
528    
529 bh 765 def copy_to_transient(self):
530     """Internal: Create a transient table and copy the data into it"""
531     self.t_table = TransientTable(self.transient_db, self)
532    
533     def transient_table(self):
534     """
535     Return a table whose underlying implementation is in the transient db
536     """
537     if self.t_table is None:
538     self.copy_to_transient()
539     return self.t_table
540    
541 bh 818 def ValueRange(self, col):
542 bh 765 if self.t_table is None:
543     self.copy_to_transient()
544 bh 839 return self.t_table.ValueRange(col)
545 bh 765
546 bh 839 def UniqueValues(self, col):
547 bh 765 if self.t_table is None:
548     self.copy_to_transient()
549 bh 839 return self.t_table.UniqueValues(col)
550 bh 844
551     def SimpleQuery(self, left, comparison, right):
552     if self.t_table is None:
553     self.copy_to_transient()
554     # Make sure to use the column object of the transient table. The
555     # left argument is always a column object so we can just ask the
556     # t_table for the right object.
557 jonathan 933 if hasattr(right, "name"):
558     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
559     comparison,
560     self.t_table.Column(right.name))
561     else:
562     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
563     comparison, right)
564 bh 984
565     def Dependencies(self):
566     """Return a tuple containing the original table"""
567     return (self.table,)
568 frank 1026

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26