/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1381 - (show annotations)
Tue Jul 8 16:37:46 2003 UTC (21 years, 8 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 20762 byte(s)
* Thuban/Model/transientdb.py (TransientTableBase.Width): The type
constants in the column objects are the standard ones defined in
the table module.

* test/test_transientdb.py
(TestTransientTable.test_transienttable_to_dbf): New. Test whether
exporting transient tables as DBF works. This should catch the bug
just fixed in TransientTableBase.Width.

1 # Copyright (C) 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 #
5 # This program is free software under the GPL (>=v2)
6 # Read the file COPYING coming with the software for details.
7
8 """Database for transient data
9
10 This database is intended for data representations needed during the
11 lifetime of a Thuban session but which is not permanent. Examples of
12 this are for instance a join of two DBF files where the DBF files are
13 the permanent representation of the data and the join only exists in the
14 Thuban session and is reconstructed when the session is opened.
15 """
16
17 __version__ = "$Revision$"
18 # $Source$
19 # $Id$
20
21 import os
22 import weakref
23 from sqlite import connect
24
25 from base import TitledObject
26
27 import table
28
29 sql_type_map = {
30 table.FIELDTYPE_INT: "INTEGER",
31 table.FIELDTYPE_STRING: "VARCHAR",
32 table.FIELDTYPE_DOUBLE: "FLOAT",
33 }
34
35 type_converter_map = {
36 table.FIELDTYPE_INT: int,
37 table.FIELDTYPE_STRING: str,
38 table.FIELDTYPE_DOUBLE: float,
39 }
40
41 class TransientDatabase:
42
43 def __init__(self, filename):
44 self.filename = filename
45 self.conn = connect(filename)
46 # Counters to produce unique table and column names
47 self.num_tables = 0
48 self.num_cols = 0
49 # Since there's only once process using the SQLite database, we
50 # might be able to get a tad more speed with default_synchronous
51 # OFF. So far I haven't seen any measurable speedup, though.
52 #self.execute("PRAGMA default_synchronous = OFF")
53
54 def __del__(self):
55 self.close()
56
57 def close(self):
58 if self.conn is not None:
59 self.conn.close()
60 self.conn = None
61
62 def new_table_name(self):
63 self.num_tables += 1
64 return "Table%03d" % self.num_tables
65
66 def new_column_name(self):
67 self.num_cols += 1
68 return "Col%03d" % self.num_cols
69
70 def execute(self, *args):
71 """execute the SQL statement in the database and return the result"""
72 cursor = self.conn.cursor()
73 cursor.execute(*args)
74 result = cursor.fetchone()
75 self.conn.commit()
76 return result
77
78 def cursor(self):
79 return self.conn.cursor()
80
81
82 class ColumnReference:
83
84 def __init__(self, name, type, internal_name):
85 self.name = name
86 self.type = type
87 self.internal_name = internal_name
88
89
90 class TransientTableBase(table.OldTableInterfaceMixin):
91
92 """Base class for tables in the transient database"""
93
94 def __init__(self, transient_db):
95 """Initialize the table for use with the given transient db"""
96 self.db = transient_db
97 self.tablename = self.db.new_table_name()
98 self.indexed_columns = {}
99 self.read_record_cursor = None
100 self.read_record_last_row = None
101 self.read_record_last_result = None
102
103 def create(self, columns):
104 self.columns = columns
105 self.name_to_column = {}
106 self.orig_names = []
107 self.internal_to_orig = {}
108 self.orig_to_internal = {}
109 self.column_map = {}
110
111 # Create the column objects and fill various maps and lists
112 for index in range(len(self.columns)):
113 col = self.columns[index]
114 self.name_to_column[col.name] = col
115 self.orig_names.append(col.name)
116 self.internal_to_orig[col.internal_name] = col.name
117 self.orig_to_internal[col.name] = col.internal_name
118 self.column_map[col.name] = col
119 self.column_map[index] = col
120
121 # Build the CREATE TABLE statement and create the table in the
122 # database
123 table_types = ["id INTEGER PRIMARY KEY"]
124 for col in self.columns:
125 table_types.append("%s %s" % (col.internal_name,
126 sql_type_map[col.type]))
127 table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
128 ",\n ".join(table_types))
129 self.db.execute(table_stmt)
130
131 def transient_table(self):
132 """
133 Return a table whose underlying implementation is in the transient db
134 """
135 return self
136
137 def ensure_index(self, column):
138 """Ensure that there's an index on the given column"""
139 if not column in self.indexed_columns:
140 internal_name = self.orig_to_internal[column]
141 indexname = "Index_%s_%s" % (self.tablename, internal_name)
142 stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
143 internal_name)
144 self.db.execute(stmt)
145 self.indexed_columns[column] = 1
146
147 def NumColumns(self):
148 return len(self.columns)
149
150 def NumRows(self):
151 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152 return int(result[0])
153
154 def Columns(self):
155 return self.columns
156
157 def Column(self, col):
158 return self.column_map[col]
159
160 def HasColumn(self, col):
161 """Return whether the table has a column with the given name or index
162 """
163 return self.column_map.has_key(col)
164
165 def ReadRowAsDict(self, index):
166 # Implementation Strategy: Executing a completely new select
167 # statement every time this method is called is too slow. The
168 # most important usage is to read the records more or less
169 # sequentially. This happens e.g. when drawing a layer with a
170 # classification where the shapes are drawn in order of the
171 # shape ids. Another pattern is that the same row is requested
172 # several times in a row. This happens in the table view, for
173 # instance.
174
175 # We can exploit this to make access faster by having one cursor
176 # open all the time and keeping the last row read around in case
177 # the same row is accessed again the next time and if the row
178 # index is larger than the row we have read last we simply fetch
179 # rows from the cursor until we've reached the requested row. If
180 # the requested row index is smaller then we start a new cursor.
181
182 # FIXME: So far this scheme seems to work well enough. Obvious
183 # improvements would be to start the cursor at exactly the
184 # requested row (should be efficient and easy to do now that the
185 # id is the primary key) and to perhaps to also start a new
186 # cursor if the requested index is much larger than the last row
187 # so that we don't read and discard lots of the rows.
188
189 # Check whether we have to start a new cursor
190 if self.read_record_cursor is None or index <self.read_record_last_row:
191 stmt = ("SELECT %s FROM %s;"
192 % (", ".join([c.internal_name for c in self.columns]),
193 self.tablename))
194 self.read_record_cursor = self.db.cursor()
195 self.read_record_cursor.execute(stmt)
196 self.read_record_last_row = -1
197 self.read_record_last_result = None
198
199 # Now we should have a cursor at a position less than or equal
200 # to the index so the following if statement will always set
201 # result to a suitable value
202 assert index >= self.read_record_last_row
203
204 if index == self.read_record_last_row:
205 result = self.read_record_last_result
206 else:
207 for i in range(index - self.read_record_last_row):
208 result = self.read_record_cursor.fetchone()
209 self.read_record_last_result = result
210 self.read_record_last_row = index
211 return dict(zip(self.orig_names, result))
212
213 def ReadValue(self, row, col):
214 """Return the value of the specified row and column
215
216 The col parameter may be the index of the column or its name.
217 """
218 # Depending on the actual access patterns of the table data, it
219 # might be a bit faster in some circumstances to not implement
220 # this via ReadRowAsDict, but this simple implementation should
221 # be fast enough for most purposes.
222 return self.ReadRowAsDict(row)[self.column_map[col].name]
223
224 def ValueRange(self, col):
225 col = self.column_map[col]
226 iname = col.internal_name
227 min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228 % (iname, iname, self.tablename))
229 converter = type_converter_map[col.type]
230 return (converter(min), converter(max))
231
232 def UniqueValues(self, col):
233 iname = self.column_map[col].internal_name
234 cursor = self.db.cursor()
235 cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236 % (iname, self.tablename, iname))
237 result = []
238 while 1:
239 row = cursor.fetchone()
240 if row is None:
241 break
242 result.append(row[0])
243 return result
244
245 def Width(self, col):
246 """Return the maximum width of values in the column
247
248 The return value is the the maximum length of string
249 representation of the values in the column (represented by index
250 or name).
251 """
252 max = 0
253
254 type = self.column_map[col].type
255 iname = self.column_map[col].internal_name
256 cursor = self.db.cursor()
257 cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
258 values = [ i[0] for i in cursor.fetchall()]
259 if not values:
260 return None
261
262 if type == table.FIELDTYPE_DOUBLE:
263 format = "%.12f"
264 elif type == table.FIELDTYPE_INT:
265 format = "%d"
266 else:
267 format = "%s"
268 for value in values:
269 if value is None: continue
270 l = len(format % value)
271 if l > max:
272 max = l
273
274 return max
275
276 def SimpleQuery(self, left, comparison, right):
277 """Return the indices of all rows that matching a condition.
278
279 Parameters:
280 left -- The column object for the left side of the comparison
281
282 comparison -- The comparison operator as a string. It must be
283 one of '==', '!=', '<', '<=', '>=', '>'
284
285 right -- The right hand side of the comparison. It must be
286 either a column object or a value, i.e. a string,
287 int or float.
288
289 The return value is a sorted list of the indices of the rows
290 where the condition is true.
291 """
292 if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
293 raise ValueError("Comparison operator %r not allowed" % comparison)
294
295 if hasattr(right, "internal_name"):
296 right_template = right.internal_name
297 params = ()
298 else:
299 right_template = "%s"
300 params = (right,)
301
302 query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
303 % (self.tablename, left.internal_name, comparison,
304 right_template)
305
306 cursor = self.db.cursor()
307 cursor.execute(query, params)
308 result = []
309 while 1:
310 row = cursor.fetchone()
311 if row is None:
312 break
313 result.append(row[0])
314 return result
315
316 def Dependencies(self):
317 """Placeholder for a method in a derived class.
318
319 Return a sequence with the tables and other data objects that
320 self depends on.
321 """
322 raise NotImplementedError
323
324
325 class TransientTable(TitledObject, TransientTableBase):
326
327 """A Table in a transient DB that starts as the copy of a Thuban Table."""
328
329 def __init__(self, transient_db, table):
330 """Create a new table in the given transient DB as a copy of table
331
332 The table argument can be any object implementing the Table
333 interface.
334 """
335 TransientTableBase.__init__(self, transient_db)
336 TitledObject.__init__(self, table.Title())
337 self.create(table)
338
339 def create(self, table):
340 columns = []
341 for col in table.Columns():
342 columns.append(ColumnReference(col.name, col.type,
343 self.db.new_column_name()))
344 TransientTableBase.create(self, columns)
345
346 # copy the input table to the transient db
347
348 # A key to insert to use for the formatting of the insert
349 # statement. The key must not be equal to any of the column
350 # names so we construct one by building a string of x's that is
351 # longer than any of the column names
352 id_key = max([len(col.name) for col in self.columns]) * "x"
353
354 insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
355 % (self.tablename,
356 ", ".join([col.internal_name
357 for col in self.columns]),
358 id_key,
359 ", ".join(["%%(%s)s" % col.name
360 for col in self.columns]))
361 cursor = self.db.cursor()
362 for i in range(table.NumRows()):
363 row = table.ReadRowAsDict(i)
364 row[id_key] = i
365 cursor.execute(insert_template, row)
366 self.db.conn.commit()
367
368
369
370 class TransientJoinedTable(TitledObject, TransientTableBase):
371
372 """A Table in the transient DB that contains a join of two tables"""
373
374 def __init__(self, transient_db, left_table, left_field,
375 right_table, right_field = None, outer_join = False):
376 """Create a new table in the transient DB as a join of two tables.
377
378 Both input tables, left_table and right_table must have a
379 transient_table method that returns a table object for a table
380 in the transient database. The join is performed on the condition
381 that the value of the left_field column the the left table is
382 equal to the value of the right_field in the right_table.
383
384 The joined table contains all columns of the input tables,
385 however, the column names of the right table may be changed
386 slightly to make them unique in the joined table. This is
387 currently done by appending a sufficient number of underscores
388 ('_').
389 """
390 TransientTableBase.__init__(self, transient_db)
391 self.dependencies = (left_table, right_table)
392 self.left_table = left_table.transient_table()
393 self.left_field = left_field
394 self.right_table = right_table.transient_table()
395 if right_field:
396 self.right_field = right_field
397 else:
398 self.right_field = self.left_field
399 self.outer_join = outer_join
400
401 title = "Join of %(left)s and %(right)s" \
402 % {"left": self.left_table.Title(),
403 "right": self.right_table.Title()}
404 TitledObject.__init__(self, title)
405
406 self.create()
407
408 def create(self):
409 """Internal: Create the table with the joined data"""
410 self.tablename = self.db.new_table_name()
411
412 self.right_table.ensure_index(self.right_field)
413
414 # determine the internal column names to join on before
415 # coalescing the column information because if the external
416 # column names are the same they will be mapped to the same
417 # internal name afterwards.
418 internal_left_col = self.left_table.orig_to_internal[self.left_field]
419 internal_right_col =self.right_table.orig_to_internal[self.right_field]
420
421 # Coalesce the column information
422 visited = {}
423 columns = []
424 newcolumns = []
425 for table in (self.left_table, self.right_table):
426 for col in table.Columns():
427 colname = col.name
428 # We can't allow multiple columns with the same
429 # original name, so append '_' to this one until
430 # it is unique.
431 # FIXME: There should be a better solution.
432 while colname in visited:
433 colname = colname + '_'
434 columns.append((table.tablename, col))
435 newcol = ColumnReference(colname, col.type,
436 "Col%03d" % (len(newcolumns)+1))
437 newcolumns.append(newcol)
438 visited[colname] = 1
439 TransientTableBase.create(self, newcolumns)
440
441 # Copy the joined data to the table.
442 newinternal_names = [col.internal_name for col in self.columns]
443 internal_references = ["%s.%s" % (table, col.internal_name)
444 for table, col in columns]
445 if self.outer_join:
446 join_operator = 'LEFT OUTER JOIN'
447 else:
448 join_operator = 'JOIN'
449 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
450 " %s %s ON %s.%s = %s.%s;"
451 % (self.tablename,
452 ", ".join(newinternal_names),
453 self.left_table.tablename,
454 ", ".join(internal_references),
455 self.left_table.tablename,
456 join_operator,
457 self.right_table.tablename,
458 self.left_table.tablename,
459 internal_left_col,
460 self.right_table.tablename,
461 internal_right_col))
462 self.db.execute(stmt)
463
464 def Dependencies(self):
465 """Return a tuple with the two tables the join depends on."""
466 return self.dependencies
467
468 def JoinType(self):
469 """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
470 if self.outer_join:
471 return "LEFT OUTER"
472 else:
473 return "INNER"
474
475
476 class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
477
478 """Table that copies data to a transient table on demand.
479
480 The AutoTransientTable takes another table as input and copies data
481 to a table in a TransientDatabase instance on demand.
482 """
483
484 def __init__(self, transient_db, table):
485 TitledObject.__init__(self, table.Title())
486 self.transient_db = transient_db
487 self.table = table
488 self.t_table = None
489
490 def Columns(self):
491 return self.table.Columns()
492
493 def Column(self, col):
494 return self.table.Column(col)
495
496 def HasColumn(self, col):
497 """Return whether the table has a column with the given name or index
498 """
499 return self.table.HasColumn(col)
500
501 def NumRows(self):
502 return self.table.NumRows()
503
504 def NumColumns(self):
505 return self.table.NumColumns()
506
507 def ReadRowAsDict(self, record):
508 """Return the record no. record as a dict mapping field names to values
509 """
510 if self.t_table is not None:
511 return self.t_table.ReadRowAsDict(record)
512 else:
513 return self.table.ReadRowAsDict(record)
514
515 def ReadValue(self, row, col):
516 """Return the value of the specified row and column
517
518 The col parameter may be the index of the column or its name.
519 """
520 if self.t_table is not None:
521 return self.t_table.ReadValue(row, col)
522 else:
523 return self.table.ReadValue(row, col)
524
525 def copy_to_transient(self):
526 """Internal: Create a transient table and copy the data into it"""
527 self.t_table = TransientTable(self.transient_db, self)
528
529 def transient_table(self):
530 """
531 Return a table whose underlying implementation is in the transient db
532 """
533 if self.t_table is None:
534 self.copy_to_transient()
535 return self.t_table
536
537 def ValueRange(self, col):
538 if self.t_table is None:
539 self.copy_to_transient()
540 return self.t_table.ValueRange(col)
541
542 def UniqueValues(self, col):
543 if self.t_table is None:
544 self.copy_to_transient()
545 return self.t_table.UniqueValues(col)
546
547 def SimpleQuery(self, left, comparison, right):
548 if self.t_table is None:
549 self.copy_to_transient()
550 # Make sure to use the column object of the transient table. The
551 # left argument is always a column object so we can just ask the
552 # t_table for the right object.
553 if hasattr(right, "name"):
554 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
555 comparison,
556 self.t_table.Column(right.name))
557 else:
558 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
559 comparison, right)
560
561 def Dependencies(self):
562 """Return a tuple containing the original table"""
563 return (self.table,)
564
565 def Width(self, col):
566 return self.table.Width(col)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26