/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 984 - (hide annotations)
Thu May 22 16:37:48 2003 UTC (21 years, 9 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 18117 byte(s)
Implement a way to discover dependencies between tables and
shapestores.

* Thuban/Model/transientdb.py (TransientTableBase.Dependencies)
(TransientJoinedTable.Dependencies)
(AutoTransientTable.SimpleQuery): New. Implement the dependencies
interface
(TransientJoinedTable.__init__): Keep tack of the original table
objects in addition to the corresponding transient tables.

* Thuban/Model/table.py (DBFTable.Dependencies)
(MemoryTable.Dependencies): New. Implement the dependencies
interface

* Thuban/Model/data.py (ShapeTable): New. Helper class for
ShapefileStore
(ShapefileStore.__init__): Use ShapeTable instead of
AutoTransientTable
(ShapefileStore.Table, ShapefileStore.Shapefile): Add doc-strings
(ShapefileStore.FileName, ShapefileStore.FileType): New. Accessor
methods for filename and type
(ShapefileStore.Dependencies): New. Implement the dependencies
interface
(DerivedShapeStore): New class to replace SimpleStore. The main
difference to SimpleStore is that it depends not on a shapefile
but another shapestore which expresses the dependencies a bit
better
(SimpleStore.__init__): Add deprecation warning.

* test/test_dbf_table.py (TestDBFTable.test_dependencies): New.
Test for the Dependencies method.

* test/test_memory_table.py (TestMemoryTable.test_dependencies):
New. Test for the Dependencies method.

* test/test_transientdb.py
(TestTransientTable.test_auto_transient_table_dependencies): New.
Test for the Dependencies method.
(TestTransientTable.test_transient_joined_table): Add test for the
Dependencies method.

* test/test_session.py (TestSessionSimple.setUp)
(TestSessionSimple.tearDown): New. Implement a better way to
destroy the sessions.
(TestSessionSimple.test_initial_state)
(TestSessionSimple.test_add_table): Bind session to self.session
so that it's destroyed by tearDown
(TestSessionSimple.test_open_shapefile): New. Test for
OpenShapefile and the object it returns

1 bh 765 # Copyright (C) 2003 by Intevation GmbH
2     # Authors:
3     # Bernhard Herzog <[email protected]>
4     #
5     # This program is free software under the GPL (>=v2)
6     # Read the file COPYING coming with the software for details.
7    
8     """Database for transient data
9    
10     This database is intended for data representations needed during the
11     lifetime of a Thuban session but which is not permanent. Examples of
12     this are for instance a join of two DBF files where the DBF files are
13     the permanent representation of the data and the join only exists in the
14     Thuban session and is reconstructed when the session is opened.
15     """
16    
17     __version__ = "$Revision$"
18     # $Source$
19     # $Id$
20    
21     import os
22     import weakref
23     from sqlite import connect
24    
25     import table
26    
27     sql_type_map = {
28     table.FIELDTYPE_INT: "INTEGER",
29     table.FIELDTYPE_STRING: "VARCHAR",
30     table.FIELDTYPE_DOUBLE: "FLOAT",
31     }
32    
33     type_converter_map = {
34     table.FIELDTYPE_INT: int,
35     table.FIELDTYPE_STRING: str,
36     table.FIELDTYPE_DOUBLE: float,
37     }
38    
39     class TransientDatabase:
40    
41     def __init__(self, filename):
42     self.filename = filename
43     self.conn = connect(filename)
44     # Counters to produce unique table and column names
45     self.num_tables = 0
46     self.num_cols = 0
47     # Since there's only once process using the SQLite database, we
48     # might be able to get a tad more speed with default_synchronous
49     # OFF. So far I haven't seen any measurable speedup, though.
50     #self.execute("PRAGMA default_synchronous = OFF")
51    
52     def __del__(self):
53     self.close()
54    
55     def close(self):
56 bh 777 if self.conn is not None:
57 bh 765 self.conn.close()
58 bh 777 self.conn = None
59 bh 765
60     def new_table_name(self):
61     self.num_tables += 1
62     return "Table%03d" % self.num_tables
63    
64     def new_column_name(self):
65     self.num_cols += 1
66     return "Col%03d" % self.num_cols
67    
68     def execute(self, *args):
69     """execute the SQL statement in the database and return the result"""
70     cursor = self.conn.cursor()
71     cursor.execute(*args)
72     result = cursor.fetchone()
73     self.conn.commit()
74     return result
75    
76     def cursor(self):
77     return self.conn.cursor()
78    
79    
80     class ColumnReference:
81    
82     def __init__(self, name, type, internal_name):
83     self.name = name
84     self.type = type
85     self.internal_name = internal_name
86    
87    
88 bh 818 class TransientTableBase(table.OldTableInterfaceMixin):
89 bh 765
90     """Base class for tables in the transient database"""
91    
92     def __init__(self, transient_db):
93     """Initialize the table for use with the given transient db"""
94     self.db = transient_db
95     self.tablename = self.db.new_table_name()
96     self.indexed_columns = {}
97     self.read_record_cursor = None
98     self.read_record_last_row = None
99 bh 785 self.read_record_last_result = None
100 bh 765
101     def create(self, columns):
102     self.columns = columns
103     self.name_to_column = {}
104     self.orig_names = []
105     self.internal_to_orig = {}
106     self.orig_to_internal = {}
107 bh 818 self.column_map = {}
108 bh 765
109     # Create the column objects and fill various maps and lists
110 bh 818 for index in range(len(self.columns)):
111     col = self.columns[index]
112 bh 765 self.name_to_column[col.name] = col
113     self.orig_names.append(col.name)
114     self.internal_to_orig[col.internal_name] = col.name
115     self.orig_to_internal[col.name] = col.internal_name
116 bh 818 self.column_map[col.name] = col
117     self.column_map[index] = col
118 bh 765
119     # Build the CREATE TABLE statement and create the table in the
120     # database
121 bh 841 table_types = ["id INTEGER PRIMARY KEY"]
122 bh 765 for col in self.columns:
123     table_types.append("%s %s" % (col.internal_name,
124     sql_type_map[col.type]))
125     table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
126     ",\n ".join(table_types))
127     self.db.execute(table_stmt)
128    
129     def transient_table(self):
130     """
131     Return a table whose underlying implementation is in the transient db
132     """
133     return self
134    
135     def ensure_index(self, column):
136     """Ensure that there's an index on the given column"""
137     if not column in self.indexed_columns:
138     internal_name = self.orig_to_internal[column]
139     indexname = "Index_%s_%s" % (self.tablename, internal_name)
140     stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
141     internal_name)
142     self.db.execute(stmt)
143     self.indexed_columns[column] = 1
144    
145 bh 818 def NumColumns(self):
146 bh 765 return len(self.columns)
147    
148 bh 818 def NumRows(self):
149 bh 765 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
150     return int(result[0])
151    
152 bh 818 def Columns(self):
153     return self.columns
154    
155     def Column(self, col):
156     return self.column_map[col]
157    
158 bh 839 def HasColumn(self, col):
159     """Return whether the table has a column with the given name or index
160     """
161     return self.column_map.has_key(col)
162    
163 bh 818 def ReadRowAsDict(self, index):
164 bh 849 # Implementation Strategy: Executing a completely new select
165     # statement every time this method is called is too slow. The
166     # most important usage is to read the records more or less
167     # sequentially. This happens e.g. when drawing a layer with a
168     # classification where the shapes are drawn in order of the
169     # shape ids. Another pattern is that the same row is requested
170     # several times in a row. This happens in the table view, for
171     # instance.
172    
173     # We can exploit this to make access faster by having one cursor
174     # open all the time and keeping the last row read around in case
175     # the same row is accessed again the next time and if the row
176     # index is larger than the row we have read last we simply fetch
177     # rows from the cursor until we've reached the requested row. If
178     # the requested row index is smaller then we start a new cursor.
179    
180     # FIXME: So far this scheme seems to work well enough. Obvious
181     # improvements would be to start the cursor at exactly the
182     # requested row (should be efficient and easy to do now that the
183     # id is the primary key) and to perhaps to also start a new
184     # cursor if the requested index is much larger than the last row
185     # so that we don't read and discard lots of the rows.
186    
187     # Check whether we have to start a new cursor
188 bh 765 if self.read_record_cursor is None or index <self.read_record_last_row:
189     stmt = ("SELECT %s FROM %s;"
190     % (", ".join([c.internal_name for c in self.columns]),
191     self.tablename))
192     self.read_record_cursor = self.db.cursor()
193     self.read_record_cursor.execute(stmt)
194     self.read_record_last_row = -1
195 bh 785 self.read_record_last_result = None
196    
197     # Now we should have a cursor at a position less than or equal
198     # to the index so the following if statement will always set
199     # result to a suitable value
200     assert index >= self.read_record_last_row
201    
202     if index == self.read_record_last_row:
203 bh 818 result = self.read_record_last_result
204 bh 785 else:
205     for i in range(index - self.read_record_last_row):
206     result = self.read_record_cursor.fetchone()
207     self.read_record_last_result = result
208 bh 765 self.read_record_last_row = index
209 bh 785 return dict(zip(self.orig_names, result))
210 bh 765
211 bh 849 def ReadValue(self, row, col):
212     """Return the value of the specified row and column
213    
214     The col parameter may be the index of the column or its name.
215     """
216     # Depending on the actual access patterns of the table data, it
217     # might be a bit faster in some circumstances to not implement
218     # this via ReadRowAsDict, but this simple implementation should
219     # be fast enough for most purposes.
220     return self.ReadRowAsDict(row)[self.column_map[col].name]
221    
222 bh 818 def ValueRange(self, col):
223     col = self.column_map[col]
224 bh 765 iname = col.internal_name
225     min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
226     % (iname, iname, self.tablename))
227     converter = type_converter_map[col.type]
228 bh 818 return (converter(min), converter(max))
229 bh 765
230 bh 818 def UniqueValues(self, col):
231     iname = self.column_map[col].internal_name
232 bh 765 cursor = self.db.cursor()
233     cursor.execute("SELECT %s FROM %s GROUP BY %s;"
234     % (iname, self.tablename, iname))
235     result = []
236     while 1:
237     row = cursor.fetchone()
238     if row is None:
239     break
240     result.append(row[0])
241     return result
242    
243 bh 841 def SimpleQuery(self, left, comparison, right):
244     """Return the indices of all rows that matching a condition.
245 bh 765
246 bh 841 Parameters:
247     left -- The column object for the left side of the comparison
248    
249     comparison -- The comparison operator as a string. It must be
250     one of '==', '!=', '<', '<=', '>=', '>'
251    
252     right -- The right hand side of the comparison. It must be
253     either a column object or a value, i.e. a string,
254     int or float.
255    
256     The return value is a sorted list of the indices of the rows
257     where the condition is true.
258     """
259     if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
260     raise ValueError("Comparison operator %r not allowed" % comparison)
261    
262     if hasattr(right, "internal_name"):
263     right_template = right.internal_name
264     params = ()
265     else:
266     right_template = "%s"
267     params = (right,)
268    
269     query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
270     % (self.tablename, left.internal_name, comparison,
271     right_template)
272    
273     cursor = self.db.cursor()
274     cursor.execute(query, params)
275     result = []
276     while 1:
277     row = cursor.fetchone()
278     if row is None:
279     break
280     result.append(row[0])
281     return result
282    
283 bh 984 def Dependencies(self):
284     """Placeholder for a method in a derived class.
285 bh 841
286 bh 984 Return a sequence with the tables and other data objects that
287     self depends on.
288     """
289     raise NotImplementedError
290    
291    
292 bh 765 class TransientTable(TransientTableBase):
293    
294     """A Table in a transient DB that starts as the copy of a Thuban Table."""
295    
296     def __init__(self, transient_db, table):
297     """Create a new table in the given transient DB as a copy of table
298    
299     The table argument can be any object implementing the Table
300     interface.
301     """
302     TransientTableBase.__init__(self, transient_db)
303     self.create(table)
304    
305     def create(self, table):
306     columns = []
307 bh 818 for col in table.Columns():
308     columns.append(ColumnReference(col.name, col.type,
309 bh 765 self.db.new_column_name()))
310     TransientTableBase.create(self, columns)
311    
312     # copy the input table to the transient db
313 bh 841
314     # A key to insert to use for the formatting of the insert
315     # statement. The key must not be equal to any of the column
316     # names so we construct one by building a string of x's that is
317     # longer than any of the column names
318     id_key = max([len(col.name) for col in self.columns]) * "x"
319    
320     insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
321 bh 765 % (self.tablename,
322     ", ".join([col.internal_name
323     for col in self.columns]),
324 bh 841 id_key,
325 bh 765 ", ".join(["%%(%s)s" % col.name
326     for col in self.columns]))
327     cursor = self.db.cursor()
328 bh 818 for i in range(table.NumRows()):
329 bh 841 row = table.ReadRowAsDict(i)
330     row[id_key] = i
331     cursor.execute(insert_template, row)
332 bh 765 self.db.conn.commit()
333    
334    
335    
336     class TransientJoinedTable(TransientTableBase):
337    
338     """A Table in the transient DB that contains a join of two tables"""
339    
340     def __init__(self, transient_db, left_table, left_field,
341     right_table, right_field = None):
342     """Create a new table in the transient DB as a join of two tables.
343    
344     Both input tables, left_table and right_table must have a
345     transient_table method that returns a table object for a table
346     in the trnsient database. The join is performed on the condition
347     that the value of the left_field column the the left table is
348     equal to the value of the right_field in the right_table.
349    
350     The joined table contains all columns of the input tables with
351     one exception: Any column in the right_table with the same name
352     as one of the columns in the left_table will be omitted. This is
353     somewhat of an implementation detail, but is done so that the
354     column names of the joined table can be the same as the column
355     names of the input tables without having to create prefixes.
356     """
357     TransientTableBase.__init__(self, transient_db)
358 bh 984 self.dependencies = (left_table, right_table)
359 bh 765 self.left_table = left_table.transient_table()
360     self.left_field = left_field
361     self.right_table = right_table.transient_table()
362     if right_field:
363     self.right_field = right_field
364     else:
365     self.right_field = self.left_field
366     self.create()
367    
368     def create(self):
369     """Internal: Create the table with the joined data"""
370     self.tablename = self.db.new_table_name()
371    
372     self.right_table.ensure_index(self.right_field)
373    
374     # Coalesce the column information
375     visited = {}
376     columns = []
377     for col in self.left_table.columns + self.right_table.columns:
378     if col.name in visited:
379 bh 841 # We can't allow multiple columns with the same original
380     # name, so omit this one. FIXME: There should be a
381     # better solution.
382 bh 765 continue
383     columns.append(col)
384     TransientTableBase.create(self, columns)
385    
386     # Copy the joined data to the table.
387     internal_names = [col.internal_name for col in self.columns]
388 bh 841 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
389     " JOIN %s ON %s = %s;"
390     % (self.tablename,
391     ", ".join(internal_names),
392     self.left_table.tablename,
393     ", ".join(internal_names),
394     self.left_table.tablename,
395     self.right_table.tablename,
396     self.orig_to_internal[self.left_field],
397     self.orig_to_internal[self.right_field]))
398 bh 765 self.db.execute(stmt)
399    
400 bh 984 def Dependencies(self):
401     """Return a tuple with the two tables the join depends on."""
402     return self.dependencies
403 bh 765
404 bh 984
405 bh 818 class AutoTransientTable(table.OldTableInterfaceMixin):
406 bh 765
407     """Table that copies data to a transient table on demand.
408    
409     The AutoTransientTable takes another table as input and copies data
410     to a table in a TransientDatabase instance on demand.
411     """
412    
413     def __init__(self, transient_db, table):
414     self.transient_db = transient_db
415     self.table = table
416     self.t_table = None
417    
418 bh 818 def Columns(self):
419     return self.table.Columns()
420 bh 765
421 bh 818 def Column(self, col):
422     return self.table.Column(col)
423 bh 765
424 bh 839 def HasColumn(self, col):
425     """Return whether the table has a column with the given name or index
426     """
427     return self.table.HasColumn(col)
428    
429 bh 818 def NumRows(self):
430     return self.table.NumRows()
431 bh 765
432 bh 818 def NumColumns(self):
433     return self.table.NumColumns()
434 bh 765
435 bh 818 def ReadRowAsDict(self, record):
436 bh 765 """Return the record no. record as a dict mapping field names to values
437     """
438     if self.t_table is not None:
439 bh 839 return self.t_table.ReadRowAsDict(record)
440 bh 765 else:
441 bh 818 return self.table.ReadRowAsDict(record)
442 bh 765
443 bh 849 def ReadValue(self, row, col):
444     """Return the value of the specified row and column
445    
446     The col parameter may be the index of the column or its name.
447     """
448     if self.t_table is not None:
449     return self.t_table.ReadValue(row, col)
450     else:
451     return self.table.ReadValue(row, col)
452    
453 bh 765 def copy_to_transient(self):
454     """Internal: Create a transient table and copy the data into it"""
455     self.t_table = TransientTable(self.transient_db, self)
456    
457     def transient_table(self):
458     """
459     Return a table whose underlying implementation is in the transient db
460     """
461     if self.t_table is None:
462     self.copy_to_transient()
463     return self.t_table
464    
465 bh 818 def ValueRange(self, col):
466 bh 765 if self.t_table is None:
467     self.copy_to_transient()
468 bh 839 return self.t_table.ValueRange(col)
469 bh 765
470 bh 839 def UniqueValues(self, col):
471 bh 765 if self.t_table is None:
472     self.copy_to_transient()
473 bh 839 return self.t_table.UniqueValues(col)
474 bh 844
475     def SimpleQuery(self, left, comparison, right):
476     if self.t_table is None:
477     self.copy_to_transient()
478     # Make sure to use the column object of the transient table. The
479     # left argument is always a column object so we can just ask the
480     # t_table for the right object.
481 jonathan 933 if hasattr(right, "name"):
482     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
483     comparison,
484     self.t_table.Column(right.name))
485     else:
486     return self.t_table.SimpleQuery(self.t_table.Column(left.name),
487     comparison, right)
488 bh 984
489     def Dependencies(self):
490     """Return a tuple containing the original table"""
491     return (self.table,)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26