/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 984 - (show annotations)
Thu May 22 16:37:48 2003 UTC (21 years, 9 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 18117 byte(s)
Implement a way to discover dependencies between tables and
shapestores.

* Thuban/Model/transientdb.py (TransientTableBase.Dependencies)
(TransientJoinedTable.Dependencies)
(AutoTransientTable.SimpleQuery): New. Implement the dependencies
interface
(TransientJoinedTable.__init__): Keep tack of the original table
objects in addition to the corresponding transient tables.

* Thuban/Model/table.py (DBFTable.Dependencies)
(MemoryTable.Dependencies): New. Implement the dependencies
interface

* Thuban/Model/data.py (ShapeTable): New. Helper class for
ShapefileStore
(ShapefileStore.__init__): Use ShapeTable instead of
AutoTransientTable
(ShapefileStore.Table, ShapefileStore.Shapefile): Add doc-strings
(ShapefileStore.FileName, ShapefileStore.FileType): New. Accessor
methods for filename and type
(ShapefileStore.Dependencies): New. Implement the dependencies
interface
(DerivedShapeStore): New class to replace SimpleStore. The main
difference to SimpleStore is that it depends not on a shapefile
but another shapestore which expresses the dependencies a bit
better
(SimpleStore.__init__): Add deprecation warning.

* test/test_dbf_table.py (TestDBFTable.test_dependencies): New.
Test for the Dependencies method.

* test/test_memory_table.py (TestMemoryTable.test_dependencies):
New. Test for the Dependencies method.

* test/test_transientdb.py
(TestTransientTable.test_auto_transient_table_dependencies): New.
Test for the Dependencies method.
(TestTransientTable.test_transient_joined_table): Add test for the
Dependencies method.

* test/test_session.py (TestSessionSimple.setUp)
(TestSessionSimple.tearDown): New. Implement a better way to
destroy the sessions.
(TestSessionSimple.test_initial_state)
(TestSessionSimple.test_add_table): Bind session to self.session
so that it's destroyed by tearDown
(TestSessionSimple.test_open_shapefile): New. Test for
OpenShapefile and the object it returns

1 # Copyright (C) 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 #
5 # This program is free software under the GPL (>=v2)
6 # Read the file COPYING coming with the software for details.
7
8 """Database for transient data
9
10 This database is intended for data representations needed during the
11 lifetime of a Thuban session but which is not permanent. Examples of
12 this are for instance a join of two DBF files where the DBF files are
13 the permanent representation of the data and the join only exists in the
14 Thuban session and is reconstructed when the session is opened.
15 """
16
17 __version__ = "$Revision$"
18 # $Source$
19 # $Id$
20
21 import os
22 import weakref
23 from sqlite import connect
24
25 import table
26
27 sql_type_map = {
28 table.FIELDTYPE_INT: "INTEGER",
29 table.FIELDTYPE_STRING: "VARCHAR",
30 table.FIELDTYPE_DOUBLE: "FLOAT",
31 }
32
33 type_converter_map = {
34 table.FIELDTYPE_INT: int,
35 table.FIELDTYPE_STRING: str,
36 table.FIELDTYPE_DOUBLE: float,
37 }
38
39 class TransientDatabase:
40
41 def __init__(self, filename):
42 self.filename = filename
43 self.conn = connect(filename)
44 # Counters to produce unique table and column names
45 self.num_tables = 0
46 self.num_cols = 0
47 # Since there's only once process using the SQLite database, we
48 # might be able to get a tad more speed with default_synchronous
49 # OFF. So far I haven't seen any measurable speedup, though.
50 #self.execute("PRAGMA default_synchronous = OFF")
51
52 def __del__(self):
53 self.close()
54
55 def close(self):
56 if self.conn is not None:
57 self.conn.close()
58 self.conn = None
59
60 def new_table_name(self):
61 self.num_tables += 1
62 return "Table%03d" % self.num_tables
63
64 def new_column_name(self):
65 self.num_cols += 1
66 return "Col%03d" % self.num_cols
67
68 def execute(self, *args):
69 """execute the SQL statement in the database and return the result"""
70 cursor = self.conn.cursor()
71 cursor.execute(*args)
72 result = cursor.fetchone()
73 self.conn.commit()
74 return result
75
76 def cursor(self):
77 return self.conn.cursor()
78
79
80 class ColumnReference:
81
82 def __init__(self, name, type, internal_name):
83 self.name = name
84 self.type = type
85 self.internal_name = internal_name
86
87
88 class TransientTableBase(table.OldTableInterfaceMixin):
89
90 """Base class for tables in the transient database"""
91
92 def __init__(self, transient_db):
93 """Initialize the table for use with the given transient db"""
94 self.db = transient_db
95 self.tablename = self.db.new_table_name()
96 self.indexed_columns = {}
97 self.read_record_cursor = None
98 self.read_record_last_row = None
99 self.read_record_last_result = None
100
101 def create(self, columns):
102 self.columns = columns
103 self.name_to_column = {}
104 self.orig_names = []
105 self.internal_to_orig = {}
106 self.orig_to_internal = {}
107 self.column_map = {}
108
109 # Create the column objects and fill various maps and lists
110 for index in range(len(self.columns)):
111 col = self.columns[index]
112 self.name_to_column[col.name] = col
113 self.orig_names.append(col.name)
114 self.internal_to_orig[col.internal_name] = col.name
115 self.orig_to_internal[col.name] = col.internal_name
116 self.column_map[col.name] = col
117 self.column_map[index] = col
118
119 # Build the CREATE TABLE statement and create the table in the
120 # database
121 table_types = ["id INTEGER PRIMARY KEY"]
122 for col in self.columns:
123 table_types.append("%s %s" % (col.internal_name,
124 sql_type_map[col.type]))
125 table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
126 ",\n ".join(table_types))
127 self.db.execute(table_stmt)
128
129 def transient_table(self):
130 """
131 Return a table whose underlying implementation is in the transient db
132 """
133 return self
134
135 def ensure_index(self, column):
136 """Ensure that there's an index on the given column"""
137 if not column in self.indexed_columns:
138 internal_name = self.orig_to_internal[column]
139 indexname = "Index_%s_%s" % (self.tablename, internal_name)
140 stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
141 internal_name)
142 self.db.execute(stmt)
143 self.indexed_columns[column] = 1
144
145 def NumColumns(self):
146 return len(self.columns)
147
148 def NumRows(self):
149 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
150 return int(result[0])
151
152 def Columns(self):
153 return self.columns
154
155 def Column(self, col):
156 return self.column_map[col]
157
158 def HasColumn(self, col):
159 """Return whether the table has a column with the given name or index
160 """
161 return self.column_map.has_key(col)
162
163 def ReadRowAsDict(self, index):
164 # Implementation Strategy: Executing a completely new select
165 # statement every time this method is called is too slow. The
166 # most important usage is to read the records more or less
167 # sequentially. This happens e.g. when drawing a layer with a
168 # classification where the shapes are drawn in order of the
169 # shape ids. Another pattern is that the same row is requested
170 # several times in a row. This happens in the table view, for
171 # instance.
172
173 # We can exploit this to make access faster by having one cursor
174 # open all the time and keeping the last row read around in case
175 # the same row is accessed again the next time and if the row
176 # index is larger than the row we have read last we simply fetch
177 # rows from the cursor until we've reached the requested row. If
178 # the requested row index is smaller then we start a new cursor.
179
180 # FIXME: So far this scheme seems to work well enough. Obvious
181 # improvements would be to start the cursor at exactly the
182 # requested row (should be efficient and easy to do now that the
183 # id is the primary key) and to perhaps to also start a new
184 # cursor if the requested index is much larger than the last row
185 # so that we don't read and discard lots of the rows.
186
187 # Check whether we have to start a new cursor
188 if self.read_record_cursor is None or index <self.read_record_last_row:
189 stmt = ("SELECT %s FROM %s;"
190 % (", ".join([c.internal_name for c in self.columns]),
191 self.tablename))
192 self.read_record_cursor = self.db.cursor()
193 self.read_record_cursor.execute(stmt)
194 self.read_record_last_row = -1
195 self.read_record_last_result = None
196
197 # Now we should have a cursor at a position less than or equal
198 # to the index so the following if statement will always set
199 # result to a suitable value
200 assert index >= self.read_record_last_row
201
202 if index == self.read_record_last_row:
203 result = self.read_record_last_result
204 else:
205 for i in range(index - self.read_record_last_row):
206 result = self.read_record_cursor.fetchone()
207 self.read_record_last_result = result
208 self.read_record_last_row = index
209 return dict(zip(self.orig_names, result))
210
211 def ReadValue(self, row, col):
212 """Return the value of the specified row and column
213
214 The col parameter may be the index of the column or its name.
215 """
216 # Depending on the actual access patterns of the table data, it
217 # might be a bit faster in some circumstances to not implement
218 # this via ReadRowAsDict, but this simple implementation should
219 # be fast enough for most purposes.
220 return self.ReadRowAsDict(row)[self.column_map[col].name]
221
222 def ValueRange(self, col):
223 col = self.column_map[col]
224 iname = col.internal_name
225 min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
226 % (iname, iname, self.tablename))
227 converter = type_converter_map[col.type]
228 return (converter(min), converter(max))
229
230 def UniqueValues(self, col):
231 iname = self.column_map[col].internal_name
232 cursor = self.db.cursor()
233 cursor.execute("SELECT %s FROM %s GROUP BY %s;"
234 % (iname, self.tablename, iname))
235 result = []
236 while 1:
237 row = cursor.fetchone()
238 if row is None:
239 break
240 result.append(row[0])
241 return result
242
243 def SimpleQuery(self, left, comparison, right):
244 """Return the indices of all rows that matching a condition.
245
246 Parameters:
247 left -- The column object for the left side of the comparison
248
249 comparison -- The comparison operator as a string. It must be
250 one of '==', '!=', '<', '<=', '>=', '>'
251
252 right -- The right hand side of the comparison. It must be
253 either a column object or a value, i.e. a string,
254 int or float.
255
256 The return value is a sorted list of the indices of the rows
257 where the condition is true.
258 """
259 if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
260 raise ValueError("Comparison operator %r not allowed" % comparison)
261
262 if hasattr(right, "internal_name"):
263 right_template = right.internal_name
264 params = ()
265 else:
266 right_template = "%s"
267 params = (right,)
268
269 query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
270 % (self.tablename, left.internal_name, comparison,
271 right_template)
272
273 cursor = self.db.cursor()
274 cursor.execute(query, params)
275 result = []
276 while 1:
277 row = cursor.fetchone()
278 if row is None:
279 break
280 result.append(row[0])
281 return result
282
283 def Dependencies(self):
284 """Placeholder for a method in a derived class.
285
286 Return a sequence with the tables and other data objects that
287 self depends on.
288 """
289 raise NotImplementedError
290
291
292 class TransientTable(TransientTableBase):
293
294 """A Table in a transient DB that starts as the copy of a Thuban Table."""
295
296 def __init__(self, transient_db, table):
297 """Create a new table in the given transient DB as a copy of table
298
299 The table argument can be any object implementing the Table
300 interface.
301 """
302 TransientTableBase.__init__(self, transient_db)
303 self.create(table)
304
305 def create(self, table):
306 columns = []
307 for col in table.Columns():
308 columns.append(ColumnReference(col.name, col.type,
309 self.db.new_column_name()))
310 TransientTableBase.create(self, columns)
311
312 # copy the input table to the transient db
313
314 # A key to insert to use for the formatting of the insert
315 # statement. The key must not be equal to any of the column
316 # names so we construct one by building a string of x's that is
317 # longer than any of the column names
318 id_key = max([len(col.name) for col in self.columns]) * "x"
319
320 insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
321 % (self.tablename,
322 ", ".join([col.internal_name
323 for col in self.columns]),
324 id_key,
325 ", ".join(["%%(%s)s" % col.name
326 for col in self.columns]))
327 cursor = self.db.cursor()
328 for i in range(table.NumRows()):
329 row = table.ReadRowAsDict(i)
330 row[id_key] = i
331 cursor.execute(insert_template, row)
332 self.db.conn.commit()
333
334
335
336 class TransientJoinedTable(TransientTableBase):
337
338 """A Table in the transient DB that contains a join of two tables"""
339
340 def __init__(self, transient_db, left_table, left_field,
341 right_table, right_field = None):
342 """Create a new table in the transient DB as a join of two tables.
343
344 Both input tables, left_table and right_table must have a
345 transient_table method that returns a table object for a table
346 in the trnsient database. The join is performed on the condition
347 that the value of the left_field column the the left table is
348 equal to the value of the right_field in the right_table.
349
350 The joined table contains all columns of the input tables with
351 one exception: Any column in the right_table with the same name
352 as one of the columns in the left_table will be omitted. This is
353 somewhat of an implementation detail, but is done so that the
354 column names of the joined table can be the same as the column
355 names of the input tables without having to create prefixes.
356 """
357 TransientTableBase.__init__(self, transient_db)
358 self.dependencies = (left_table, right_table)
359 self.left_table = left_table.transient_table()
360 self.left_field = left_field
361 self.right_table = right_table.transient_table()
362 if right_field:
363 self.right_field = right_field
364 else:
365 self.right_field = self.left_field
366 self.create()
367
368 def create(self):
369 """Internal: Create the table with the joined data"""
370 self.tablename = self.db.new_table_name()
371
372 self.right_table.ensure_index(self.right_field)
373
374 # Coalesce the column information
375 visited = {}
376 columns = []
377 for col in self.left_table.columns + self.right_table.columns:
378 if col.name in visited:
379 # We can't allow multiple columns with the same original
380 # name, so omit this one. FIXME: There should be a
381 # better solution.
382 continue
383 columns.append(col)
384 TransientTableBase.create(self, columns)
385
386 # Copy the joined data to the table.
387 internal_names = [col.internal_name for col in self.columns]
388 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
389 " JOIN %s ON %s = %s;"
390 % (self.tablename,
391 ", ".join(internal_names),
392 self.left_table.tablename,
393 ", ".join(internal_names),
394 self.left_table.tablename,
395 self.right_table.tablename,
396 self.orig_to_internal[self.left_field],
397 self.orig_to_internal[self.right_field]))
398 self.db.execute(stmt)
399
400 def Dependencies(self):
401 """Return a tuple with the two tables the join depends on."""
402 return self.dependencies
403
404
405 class AutoTransientTable(table.OldTableInterfaceMixin):
406
407 """Table that copies data to a transient table on demand.
408
409 The AutoTransientTable takes another table as input and copies data
410 to a table in a TransientDatabase instance on demand.
411 """
412
413 def __init__(self, transient_db, table):
414 self.transient_db = transient_db
415 self.table = table
416 self.t_table = None
417
418 def Columns(self):
419 return self.table.Columns()
420
421 def Column(self, col):
422 return self.table.Column(col)
423
424 def HasColumn(self, col):
425 """Return whether the table has a column with the given name or index
426 """
427 return self.table.HasColumn(col)
428
429 def NumRows(self):
430 return self.table.NumRows()
431
432 def NumColumns(self):
433 return self.table.NumColumns()
434
435 def ReadRowAsDict(self, record):
436 """Return the record no. record as a dict mapping field names to values
437 """
438 if self.t_table is not None:
439 return self.t_table.ReadRowAsDict(record)
440 else:
441 return self.table.ReadRowAsDict(record)
442
443 def ReadValue(self, row, col):
444 """Return the value of the specified row and column
445
446 The col parameter may be the index of the column or its name.
447 """
448 if self.t_table is not None:
449 return self.t_table.ReadValue(row, col)
450 else:
451 return self.table.ReadValue(row, col)
452
453 def copy_to_transient(self):
454 """Internal: Create a transient table and copy the data into it"""
455 self.t_table = TransientTable(self.transient_db, self)
456
457 def transient_table(self):
458 """
459 Return a table whose underlying implementation is in the transient db
460 """
461 if self.t_table is None:
462 self.copy_to_transient()
463 return self.t_table
464
465 def ValueRange(self, col):
466 if self.t_table is None:
467 self.copy_to_transient()
468 return self.t_table.ValueRange(col)
469
470 def UniqueValues(self, col):
471 if self.t_table is None:
472 self.copy_to_transient()
473 return self.t_table.UniqueValues(col)
474
475 def SimpleQuery(self, left, comparison, right):
476 if self.t_table is None:
477 self.copy_to_transient()
478 # Make sure to use the column object of the transient table. The
479 # left argument is always a column object so we can just ask the
480 # t_table for the right object.
481 if hasattr(right, "name"):
482 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
483 comparison,
484 self.t_table.Column(right.name))
485 else:
486 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
487 comparison, right)
488
489 def Dependencies(self):
490 """Return a tuple containing the original table"""
491 return (self.table,)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26