/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1511 - (show annotations)
Tue Jul 29 14:48:57 2003 UTC (21 years, 7 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 20737 byte(s)
Remove some unnecessary imports

1 # Copyright (C) 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 #
5 # This program is free software under the GPL (>=v2)
6 # Read the file COPYING coming with the software for details.
7
8 """Database for transient data
9
10 This database is intended for data representations needed during the
11 lifetime of a Thuban session but which is not permanent. Examples of
12 this are for instance a join of two DBF files where the DBF files are
13 the permanent representation of the data and the join only exists in the
14 Thuban session and is reconstructed when the session is opened.
15 """
16
17 __version__ = "$Revision$"
18 # $Source$
19 # $Id$
20
21 from sqlite import connect
22
23 from base import TitledObject
24
25 import table
26
27 sql_type_map = {
28 table.FIELDTYPE_INT: "INTEGER",
29 table.FIELDTYPE_STRING: "VARCHAR",
30 table.FIELDTYPE_DOUBLE: "FLOAT",
31 }
32
33 type_converter_map = {
34 table.FIELDTYPE_INT: int,
35 table.FIELDTYPE_STRING: str,
36 table.FIELDTYPE_DOUBLE: float,
37 }
38
39 class TransientDatabase:
40
41 def __init__(self, filename):
42 self.filename = filename
43 self.conn = connect(filename)
44 # Counters to produce unique table and column names
45 self.num_tables = 0
46 self.num_cols = 0
47 # Since there's only once process using the SQLite database, we
48 # might be able to get a tad more speed with default_synchronous
49 # OFF. So far I haven't seen any measurable speedup, though.
50 #self.execute("PRAGMA default_synchronous = OFF")
51
52 def __del__(self):
53 self.close()
54
55 def close(self):
56 if self.conn is not None:
57 self.conn.close()
58 self.conn = None
59
60 def new_table_name(self):
61 self.num_tables += 1
62 return "Table%03d" % self.num_tables
63
64 def new_column_name(self):
65 self.num_cols += 1
66 return "Col%03d" % self.num_cols
67
68 def execute(self, *args):
69 """execute the SQL statement in the database and return the result"""
70 cursor = self.conn.cursor()
71 cursor.execute(*args)
72 result = cursor.fetchone()
73 self.conn.commit()
74 return result
75
76 def cursor(self):
77 return self.conn.cursor()
78
79
80 class ColumnReference:
81
82 def __init__(self, name, type, internal_name):
83 self.name = name
84 self.type = type
85 self.internal_name = internal_name
86
87
88 class TransientTableBase(table.OldTableInterfaceMixin):
89
90 """Base class for tables in the transient database"""
91
92 def __init__(self, transient_db):
93 """Initialize the table for use with the given transient db"""
94 self.db = transient_db
95 self.tablename = self.db.new_table_name()
96 self.indexed_columns = {}
97 self.read_record_cursor = None
98 self.read_record_last_row = None
99 self.read_record_last_result = None
100
101 def create(self, columns):
102 self.columns = columns
103 self.name_to_column = {}
104 self.orig_names = []
105 self.internal_to_orig = {}
106 self.orig_to_internal = {}
107 self.column_map = {}
108
109 # Create the column objects and fill various maps and lists
110 for index in range(len(self.columns)):
111 col = self.columns[index]
112 self.name_to_column[col.name] = col
113 self.orig_names.append(col.name)
114 self.internal_to_orig[col.internal_name] = col.name
115 self.orig_to_internal[col.name] = col.internal_name
116 self.column_map[col.name] = col
117 self.column_map[index] = col
118
119 # Build the CREATE TABLE statement and create the table in the
120 # database
121 table_types = ["id INTEGER PRIMARY KEY"]
122 for col in self.columns:
123 table_types.append("%s %s" % (col.internal_name,
124 sql_type_map[col.type]))
125 table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
126 ",\n ".join(table_types))
127 self.db.execute(table_stmt)
128
129 def transient_table(self):
130 """
131 Return a table whose underlying implementation is in the transient db
132 """
133 return self
134
135 def ensure_index(self, column):
136 """Ensure that there's an index on the given column"""
137 if not column in self.indexed_columns:
138 internal_name = self.orig_to_internal[column]
139 indexname = "Index_%s_%s" % (self.tablename, internal_name)
140 stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
141 internal_name)
142 self.db.execute(stmt)
143 self.indexed_columns[column] = 1
144
145 def NumColumns(self):
146 return len(self.columns)
147
148 def NumRows(self):
149 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
150 return int(result[0])
151
152 def Columns(self):
153 return self.columns
154
155 def Column(self, col):
156 return self.column_map[col]
157
158 def HasColumn(self, col):
159 """Return whether the table has a column with the given name or index
160 """
161 return self.column_map.has_key(col)
162
163 def ReadRowAsDict(self, index):
164 # Implementation Strategy: Executing a completely new select
165 # statement every time this method is called is too slow. The
166 # most important usage is to read the records more or less
167 # sequentially. This happens e.g. when drawing a layer with a
168 # classification where the shapes are drawn in order of the
169 # shape ids. Another pattern is that the same row is requested
170 # several times in a row. This happens in the table view, for
171 # instance.
172
173 # We can exploit this to make access faster by having one cursor
174 # open all the time and keeping the last row read around in case
175 # the same row is accessed again the next time and if the row
176 # index is larger than the row we have read last we simply fetch
177 # rows from the cursor until we've reached the requested row. If
178 # the requested row index is smaller then we start a new cursor.
179
180 # FIXME: So far this scheme seems to work well enough. Obvious
181 # improvements would be to start the cursor at exactly the
182 # requested row (should be efficient and easy to do now that the
183 # id is the primary key) and to perhaps to also start a new
184 # cursor if the requested index is much larger than the last row
185 # so that we don't read and discard lots of the rows.
186
187 # Check whether we have to start a new cursor
188 if self.read_record_cursor is None or index <self.read_record_last_row:
189 stmt = ("SELECT %s FROM %s;"
190 % (", ".join([c.internal_name for c in self.columns]),
191 self.tablename))
192 self.read_record_cursor = self.db.cursor()
193 self.read_record_cursor.execute(stmt)
194 self.read_record_last_row = -1
195 self.read_record_last_result = None
196
197 # Now we should have a cursor at a position less than or equal
198 # to the index so the following if statement will always set
199 # result to a suitable value
200 assert index >= self.read_record_last_row
201
202 if index == self.read_record_last_row:
203 result = self.read_record_last_result
204 else:
205 for i in range(index - self.read_record_last_row):
206 result = self.read_record_cursor.fetchone()
207 self.read_record_last_result = result
208 self.read_record_last_row = index
209 return dict(zip(self.orig_names, result))
210
211 def ReadValue(self, row, col):
212 """Return the value of the specified row and column
213
214 The col parameter may be the index of the column or its name.
215 """
216 # Depending on the actual access patterns of the table data, it
217 # might be a bit faster in some circumstances to not implement
218 # this via ReadRowAsDict, but this simple implementation should
219 # be fast enough for most purposes.
220 return self.ReadRowAsDict(row)[self.column_map[col].name]
221
222 def ValueRange(self, col):
223 col = self.column_map[col]
224 iname = col.internal_name
225 min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
226 % (iname, iname, self.tablename))
227 converter = type_converter_map[col.type]
228 return (converter(min), converter(max))
229
230 def UniqueValues(self, col):
231 iname = self.column_map[col].internal_name
232 cursor = self.db.cursor()
233 cursor.execute("SELECT %s FROM %s GROUP BY %s;"
234 % (iname, self.tablename, iname))
235 result = []
236 while 1:
237 row = cursor.fetchone()
238 if row is None:
239 break
240 result.append(row[0])
241 return result
242
243 def Width(self, col):
244 """Return the maximum width of values in the column
245
246 The return value is the the maximum length of string
247 representation of the values in the column (represented by index
248 or name).
249 """
250 max = 0
251
252 type = self.column_map[col].type
253 iname = self.column_map[col].internal_name
254 cursor = self.db.cursor()
255 cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256 values = [ i[0] for i in cursor.fetchall()]
257 if not values:
258 return None
259
260 if type == table.FIELDTYPE_DOUBLE:
261 format = "%.12f"
262 elif type == table.FIELDTYPE_INT:
263 format = "%d"
264 else:
265 format = "%s"
266 for value in values:
267 if value is None: continue
268 l = len(format % value)
269 if l > max:
270 max = l
271
272 return max
273
274 def SimpleQuery(self, left, comparison, right):
275 """Return the indices of all rows that matching a condition.
276
277 Parameters:
278 left -- The column object for the left side of the comparison
279
280 comparison -- The comparison operator as a string. It must be
281 one of '==', '!=', '<', '<=', '>=', '>'
282
283 right -- The right hand side of the comparison. It must be
284 either a column object or a value, i.e. a string,
285 int or float.
286
287 The return value is a sorted list of the indices of the rows
288 where the condition is true.
289 """
290 if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
291 raise ValueError("Comparison operator %r not allowed" % comparison)
292
293 if hasattr(right, "internal_name"):
294 right_template = right.internal_name
295 params = ()
296 else:
297 right_template = "%s"
298 params = (right,)
299
300 query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
301 % (self.tablename, left.internal_name, comparison,
302 right_template)
303
304 cursor = self.db.cursor()
305 cursor.execute(query, params)
306 result = []
307 while 1:
308 row = cursor.fetchone()
309 if row is None:
310 break
311 result.append(row[0])
312 return result
313
314 def Dependencies(self):
315 """Placeholder for a method in a derived class.
316
317 Return a sequence with the tables and other data objects that
318 self depends on.
319 """
320 raise NotImplementedError
321
322
323 class TransientTable(TitledObject, TransientTableBase):
324
325 """A Table in a transient DB that starts as the copy of a Thuban Table."""
326
327 def __init__(self, transient_db, table):
328 """Create a new table in the given transient DB as a copy of table
329
330 The table argument can be any object implementing the Table
331 interface.
332 """
333 TransientTableBase.__init__(self, transient_db)
334 TitledObject.__init__(self, table.Title())
335 self.create(table)
336
337 def create(self, table):
338 columns = []
339 for col in table.Columns():
340 columns.append(ColumnReference(col.name, col.type,
341 self.db.new_column_name()))
342 TransientTableBase.create(self, columns)
343
344 # copy the input table to the transient db
345
346 # A key to insert to use for the formatting of the insert
347 # statement. The key must not be equal to any of the column
348 # names so we construct one by building a string of x's that is
349 # longer than any of the column names
350 id_key = max([len(col.name) for col in self.columns]) * "x"
351
352 insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
353 % (self.tablename,
354 ", ".join([col.internal_name
355 for col in self.columns]),
356 id_key,
357 ", ".join(["%%(%s)s" % col.name
358 for col in self.columns]))
359 cursor = self.db.cursor()
360 for i in range(table.NumRows()):
361 row = table.ReadRowAsDict(i)
362 row[id_key] = i
363 cursor.execute(insert_template, row)
364 self.db.conn.commit()
365
366
367
368 class TransientJoinedTable(TitledObject, TransientTableBase):
369
370 """A Table in the transient DB that contains a join of two tables"""
371
372 def __init__(self, transient_db, left_table, left_field,
373 right_table, right_field = None, outer_join = False):
374 """Create a new table in the transient DB as a join of two tables.
375
376 Both input tables, left_table and right_table must have a
377 transient_table method that returns a table object for a table
378 in the transient database. The join is performed on the condition
379 that the value of the left_field column the the left table is
380 equal to the value of the right_field in the right_table.
381
382 The joined table contains all columns of the input tables,
383 however, the column names of the right table may be changed
384 slightly to make them unique in the joined table. This is
385 currently done by appending a sufficient number of underscores
386 ('_').
387 """
388 TransientTableBase.__init__(self, transient_db)
389 self.dependencies = (left_table, right_table)
390 self.left_table = left_table.transient_table()
391 self.left_field = left_field
392 self.right_table = right_table.transient_table()
393 if right_field:
394 self.right_field = right_field
395 else:
396 self.right_field = self.left_field
397 self.outer_join = outer_join
398
399 title = "Join of %(left)s and %(right)s" \
400 % {"left": self.left_table.Title(),
401 "right": self.right_table.Title()}
402 TitledObject.__init__(self, title)
403
404 self.create()
405
406 def create(self):
407 """Internal: Create the table with the joined data"""
408 self.tablename = self.db.new_table_name()
409
410 self.right_table.ensure_index(self.right_field)
411
412 # determine the internal column names to join on before
413 # coalescing the column information because if the external
414 # column names are the same they will be mapped to the same
415 # internal name afterwards.
416 internal_left_col = self.left_table.orig_to_internal[self.left_field]
417 internal_right_col =self.right_table.orig_to_internal[self.right_field]
418
419 # Coalesce the column information
420 visited = {}
421 columns = []
422 newcolumns = []
423 for table in (self.left_table, self.right_table):
424 for col in table.Columns():
425 colname = col.name
426 # We can't allow multiple columns with the same
427 # original name, so append '_' to this one until
428 # it is unique.
429 # FIXME: There should be a better solution.
430 while colname in visited:
431 colname = colname + '_'
432 columns.append((table.tablename, col))
433 newcol = ColumnReference(colname, col.type,
434 "Col%03d" % (len(newcolumns)+1))
435 newcolumns.append(newcol)
436 visited[colname] = 1
437 TransientTableBase.create(self, newcolumns)
438
439 # Copy the joined data to the table.
440 newinternal_names = [col.internal_name for col in self.columns]
441 internal_references = ["%s.%s" % (table, col.internal_name)
442 for table, col in columns]
443 if self.outer_join:
444 join_operator = 'LEFT OUTER JOIN'
445 else:
446 join_operator = 'JOIN'
447 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
448 " %s %s ON %s.%s = %s.%s;"
449 % (self.tablename,
450 ", ".join(newinternal_names),
451 self.left_table.tablename,
452 ", ".join(internal_references),
453 self.left_table.tablename,
454 join_operator,
455 self.right_table.tablename,
456 self.left_table.tablename,
457 internal_left_col,
458 self.right_table.tablename,
459 internal_right_col))
460 self.db.execute(stmt)
461
462 def Dependencies(self):
463 """Return a tuple with the two tables the join depends on."""
464 return self.dependencies
465
466 def JoinType(self):
467 """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
468 if self.outer_join:
469 return "LEFT OUTER"
470 else:
471 return "INNER"
472
473
474 class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
475
476 """Table that copies data to a transient table on demand.
477
478 The AutoTransientTable takes another table as input and copies data
479 to a table in a TransientDatabase instance on demand.
480 """
481
482 def __init__(self, transient_db, table):
483 TitledObject.__init__(self, table.Title())
484 self.transient_db = transient_db
485 self.table = table
486 self.t_table = None
487
488 def Columns(self):
489 return self.table.Columns()
490
491 def Column(self, col):
492 return self.table.Column(col)
493
494 def HasColumn(self, col):
495 """Return whether the table has a column with the given name or index
496 """
497 return self.table.HasColumn(col)
498
499 def NumRows(self):
500 return self.table.NumRows()
501
502 def NumColumns(self):
503 return self.table.NumColumns()
504
505 def ReadRowAsDict(self, record):
506 """Return the record no. record as a dict mapping field names to values
507 """
508 if self.t_table is not None:
509 return self.t_table.ReadRowAsDict(record)
510 else:
511 return self.table.ReadRowAsDict(record)
512
513 def ReadValue(self, row, col):
514 """Return the value of the specified row and column
515
516 The col parameter may be the index of the column or its name.
517 """
518 if self.t_table is not None:
519 return self.t_table.ReadValue(row, col)
520 else:
521 return self.table.ReadValue(row, col)
522
523 def copy_to_transient(self):
524 """Internal: Create a transient table and copy the data into it"""
525 self.t_table = TransientTable(self.transient_db, self)
526
527 def transient_table(self):
528 """
529 Return a table whose underlying implementation is in the transient db
530 """
531 if self.t_table is None:
532 self.copy_to_transient()
533 return self.t_table
534
535 def ValueRange(self, col):
536 if self.t_table is None:
537 self.copy_to_transient()
538 return self.t_table.ValueRange(col)
539
540 def UniqueValues(self, col):
541 if self.t_table is None:
542 self.copy_to_transient()
543 return self.t_table.UniqueValues(col)
544
545 def SimpleQuery(self, left, comparison, right):
546 if self.t_table is None:
547 self.copy_to_transient()
548 # Make sure to use the column object of the transient table. The
549 # left argument is always a column object so we can just ask the
550 # t_table for the right object.
551 if hasattr(right, "name"):
552 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
553 comparison,
554 self.t_table.Column(right.name))
555 else:
556 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
557 comparison, right)
558
559 def Dependencies(self):
560 """Return a tuple containing the original table"""
561 return (self.table,)
562
563 def Width(self, col):
564 return self.table.Width(col)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26