/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1026 - (show annotations)
Mon May 26 11:46:42 2003 UTC (21 years, 9 months ago) by frank
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 20712 byte(s)
TrannsientTableBase.Width, TransientTableBase.Precision):
Return column width and precision.

1 # Copyright (C) 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 #
5 # This program is free software under the GPL (>=v2)
6 # Read the file COPYING coming with the software for details.
7
8 """Database for transient data
9
10 This database is intended for data representations needed during the
11 lifetime of a Thuban session but which is not permanent. Examples of
12 this are for instance a join of two DBF files where the DBF files are
13 the permanent representation of the data and the join only exists in the
14 Thuban session and is reconstructed when the session is opened.
15 """
16
17 __version__ = "$Revision$"
18 # $Source$
19 # $Id$
20
21 import os
22 import weakref
23 from sqlite import connect
24
25 from base import TitledObject
26
27 import table
28
29 sql_type_map = {
30 table.FIELDTYPE_INT: "INTEGER",
31 table.FIELDTYPE_STRING: "VARCHAR",
32 table.FIELDTYPE_DOUBLE: "FLOAT",
33 }
34
35 type_converter_map = {
36 table.FIELDTYPE_INT: int,
37 table.FIELDTYPE_STRING: str,
38 table.FIELDTYPE_DOUBLE: float,
39 }
40
41 class TransientDatabase:
42
43 def __init__(self, filename):
44 self.filename = filename
45 self.conn = connect(filename)
46 # Counters to produce unique table and column names
47 self.num_tables = 0
48 self.num_cols = 0
49 # Since there's only once process using the SQLite database, we
50 # might be able to get a tad more speed with default_synchronous
51 # OFF. So far I haven't seen any measurable speedup, though.
52 #self.execute("PRAGMA default_synchronous = OFF")
53
54 def __del__(self):
55 self.close()
56
57 def close(self):
58 if self.conn is not None:
59 self.conn.close()
60 self.conn = None
61
62 def new_table_name(self):
63 self.num_tables += 1
64 return "Table%03d" % self.num_tables
65
66 def new_column_name(self):
67 self.num_cols += 1
68 return "Col%03d" % self.num_cols
69
70 def execute(self, *args):
71 """execute the SQL statement in the database and return the result"""
72 cursor = self.conn.cursor()
73 cursor.execute(*args)
74 result = cursor.fetchone()
75 self.conn.commit()
76 return result
77
78 def cursor(self):
79 return self.conn.cursor()
80
81
82 class ColumnReference:
83
84 def __init__(self, name, type, internal_name):
85 self.name = name
86 self.type = type
87 self.internal_name = internal_name
88
89
90 class TransientTableBase(table.OldTableInterfaceMixin):
91
92 """Base class for tables in the transient database"""
93
94 def __init__(self, transient_db):
95 """Initialize the table for use with the given transient db"""
96 self.db = transient_db
97 self.tablename = self.db.new_table_name()
98 self.indexed_columns = {}
99 self.read_record_cursor = None
100 self.read_record_last_row = None
101 self.read_record_last_result = None
102
103 def create(self, columns):
104 self.columns = columns
105 self.name_to_column = {}
106 self.orig_names = []
107 self.internal_to_orig = {}
108 self.orig_to_internal = {}
109 self.column_map = {}
110
111 # Create the column objects and fill various maps and lists
112 for index in range(len(self.columns)):
113 col = self.columns[index]
114 self.name_to_column[col.name] = col
115 self.orig_names.append(col.name)
116 self.internal_to_orig[col.internal_name] = col.name
117 self.orig_to_internal[col.name] = col.internal_name
118 self.column_map[col.name] = col
119 self.column_map[index] = col
120
121 # Build the CREATE TABLE statement and create the table in the
122 # database
123 table_types = ["id INTEGER PRIMARY KEY"]
124 for col in self.columns:
125 table_types.append("%s %s" % (col.internal_name,
126 sql_type_map[col.type]))
127 table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
128 ",\n ".join(table_types))
129 self.db.execute(table_stmt)
130
131 def transient_table(self):
132 """
133 Return a table whose underlying implementation is in the transient db
134 """
135 return self
136
137 def ensure_index(self, column):
138 """Ensure that there's an index on the given column"""
139 if not column in self.indexed_columns:
140 internal_name = self.orig_to_internal[column]
141 indexname = "Index_%s_%s" % (self.tablename, internal_name)
142 stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
143 internal_name)
144 self.db.execute(stmt)
145 self.indexed_columns[column] = 1
146
147 def NumColumns(self):
148 return len(self.columns)
149
150 def NumRows(self):
151 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152 return int(result[0])
153
154 def Columns(self):
155 return self.columns
156
157 def Column(self, col):
158 return self.column_map[col]
159
160 def HasColumn(self, col):
161 """Return whether the table has a column with the given name or index
162 """
163 return self.column_map.has_key(col)
164
165 def ReadRowAsDict(self, index):
166 # Implementation Strategy: Executing a completely new select
167 # statement every time this method is called is too slow. The
168 # most important usage is to read the records more or less
169 # sequentially. This happens e.g. when drawing a layer with a
170 # classification where the shapes are drawn in order of the
171 # shape ids. Another pattern is that the same row is requested
172 # several times in a row. This happens in the table view, for
173 # instance.
174
175 # We can exploit this to make access faster by having one cursor
176 # open all the time and keeping the last row read around in case
177 # the same row is accessed again the next time and if the row
178 # index is larger than the row we have read last we simply fetch
179 # rows from the cursor until we've reached the requested row. If
180 # the requested row index is smaller then we start a new cursor.
181
182 # FIXME: So far this scheme seems to work well enough. Obvious
183 # improvements would be to start the cursor at exactly the
184 # requested row (should be efficient and easy to do now that the
185 # id is the primary key) and to perhaps to also start a new
186 # cursor if the requested index is much larger than the last row
187 # so that we don't read and discard lots of the rows.
188
189 # Check whether we have to start a new cursor
190 if self.read_record_cursor is None or index <self.read_record_last_row:
191 stmt = ("SELECT %s FROM %s;"
192 % (", ".join([c.internal_name for c in self.columns]),
193 self.tablename))
194 self.read_record_cursor = self.db.cursor()
195 self.read_record_cursor.execute(stmt)
196 self.read_record_last_row = -1
197 self.read_record_last_result = None
198
199 # Now we should have a cursor at a position less than or equal
200 # to the index so the following if statement will always set
201 # result to a suitable value
202 assert index >= self.read_record_last_row
203
204 if index == self.read_record_last_row:
205 result = self.read_record_last_result
206 else:
207 for i in range(index - self.read_record_last_row):
208 result = self.read_record_cursor.fetchone()
209 self.read_record_last_result = result
210 self.read_record_last_row = index
211 return dict(zip(self.orig_names, result))
212
213 def ReadValue(self, row, col):
214 """Return the value of the specified row and column
215
216 The col parameter may be the index of the column or its name.
217 """
218 # Depending on the actual access patterns of the table data, it
219 # might be a bit faster in some circumstances to not implement
220 # this via ReadRowAsDict, but this simple implementation should
221 # be fast enough for most purposes.
222 return self.ReadRowAsDict(row)[self.column_map[col].name]
223
224 def ValueRange(self, col):
225 col = self.column_map[col]
226 iname = col.internal_name
227 min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228 % (iname, iname, self.tablename))
229 converter = type_converter_map[col.type]
230 return (converter(min), converter(max))
231
232 def UniqueValues(self, col):
233 iname = self.column_map[col].internal_name
234 cursor = self.db.cursor()
235 cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236 % (iname, self.tablename, iname))
237 result = []
238 while 1:
239 row = cursor.fetchone()
240 if row is None:
241 break
242 result.append(row[0])
243 return result
244
245 def Width(self, col):
246 """Return the maximum width of values in the column
247
248 The return value is the the maximum length of string representation
249 of the values in the column (represented by index or name)."""
250 max = 0
251
252 type = self.column_map[col].type
253 iname = self.column_map[col].internal_name
254 cursor = self.db.cursor()
255 cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256 values = [ i[0] for i in cursor.fetchall()]
257 if not values:
258 return None
259
260 if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
261 prec = self.Precision(col)
262 format = "%%.%df" % prec
263 elif type == sql_type_map[table.FIELDTYPE_INT]:
264 format = "%d"
265 else:
266 format = "%s"
267 for value in values:
268 if value is None: continue
269 l = len(format % value)
270 if l > max:
271 max = l
272
273 return max
274
275 def Precision(self, col):
276 """Return the precision of the column
277
278 The return value is the maximum number of numeric characters after the
279 decimal if column type is double. Else precision zero is returned.
280 The column can be represented by index or name.
281 """
282
283 type = self.column_map[col].type
284 if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
285 iname = self.column_map[col].internal_name
286 cursor = self.db.cursor()
287 cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
288 values = [ i[0] for i in cursor.fetchall()]
289 if not values:
290 return 0
291
292 max = 0
293 for value in values:
294 if value is None: continue
295 l = len(str(value % 1))
296 if l > max:
297 max = l
298 if max > 2:
299 return max - 2
300 else:
301 return 0
302 else:
303 return 0
304
305 def SimpleQuery(self, left, comparison, right):
306 """Return the indices of all rows that matching a condition.
307
308 Parameters:
309 left -- The column object for the left side of the comparison
310
311 comparison -- The comparison operator as a string. It must be
312 one of '==', '!=', '<', '<=', '>=', '>'
313
314 right -- The right hand side of the comparison. It must be
315 either a column object or a value, i.e. a string,
316 int or float.
317
318 The return value is a sorted list of the indices of the rows
319 where the condition is true.
320 """
321 if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
322 raise ValueError("Comparison operator %r not allowed" % comparison)
323
324 if hasattr(right, "internal_name"):
325 right_template = right.internal_name
326 params = ()
327 else:
328 right_template = "%s"
329 params = (right,)
330
331 query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
332 % (self.tablename, left.internal_name, comparison,
333 right_template)
334
335 cursor = self.db.cursor()
336 cursor.execute(query, params)
337 result = []
338 while 1:
339 row = cursor.fetchone()
340 if row is None:
341 break
342 result.append(row[0])
343 return result
344
345 def Dependencies(self):
346 """Placeholder for a method in a derived class.
347
348 Return a sequence with the tables and other data objects that
349 self depends on.
350 """
351 raise NotImplementedError
352
353
354 class TransientTable(TitledObject, TransientTableBase):
355
356 """A Table in a transient DB that starts as the copy of a Thuban Table."""
357
358 def __init__(self, transient_db, table):
359 """Create a new table in the given transient DB as a copy of table
360
361 The table argument can be any object implementing the Table
362 interface.
363 """
364 TransientTableBase.__init__(self, transient_db)
365 TitledObject.__init__(self, table.Title())
366 self.create(table)
367
368 def create(self, table):
369 columns = []
370 for col in table.Columns():
371 columns.append(ColumnReference(col.name, col.type,
372 self.db.new_column_name()))
373 TransientTableBase.create(self, columns)
374
375 # copy the input table to the transient db
376
377 # A key to insert to use for the formatting of the insert
378 # statement. The key must not be equal to any of the column
379 # names so we construct one by building a string of x's that is
380 # longer than any of the column names
381 id_key = max([len(col.name) for col in self.columns]) * "x"
382
383 insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
384 % (self.tablename,
385 ", ".join([col.internal_name
386 for col in self.columns]),
387 id_key,
388 ", ".join(["%%(%s)s" % col.name
389 for col in self.columns]))
390 cursor = self.db.cursor()
391 for i in range(table.NumRows()):
392 row = table.ReadRowAsDict(i)
393 row[id_key] = i
394 cursor.execute(insert_template, row)
395 self.db.conn.commit()
396
397
398
399 class TransientJoinedTable(TitledObject, TransientTableBase):
400
401 """A Table in the transient DB that contains a join of two tables"""
402
403 def __init__(self, transient_db, left_table, left_field,
404 right_table, right_field = None, outer_join = False):
405 """Create a new table in the transient DB as a join of two tables.
406
407 Both input tables, left_table and right_table must have a
408 transient_table method that returns a table object for a table
409 in the transient database. The join is performed on the condition
410 that the value of the left_field column the the left table is
411 equal to the value of the right_field in the right_table.
412
413 The joined table contains all columns of the input tables with
414 one exception: Any column in the right_table with the same name
415 as one of the columns in the left_table will be omitted. This is
416 somewhat of an implementation detail, but is done so that the
417 column names of the joined table can be the same as the column
418 names of the input tables without having to create prefixes.
419 """
420 TransientTableBase.__init__(self, transient_db)
421 self.dependencies = (left_table, right_table)
422 self.left_table = left_table.transient_table()
423 self.left_field = left_field
424 self.right_table = right_table.transient_table()
425 if right_field:
426 self.right_field = right_field
427 else:
428 self.right_field = self.left_field
429 self.outer_join = outer_join
430
431 title = "Join of %(left)s and %(right)s" \
432 % {"left": self.left_table.Title(),
433 "right": self.right_table.Title()}
434 TitledObject.__init__(self, title)
435
436 self.create()
437
438 def create(self):
439 """Internal: Create the table with the joined data"""
440 self.tablename = self.db.new_table_name()
441
442 self.right_table.ensure_index(self.right_field)
443
444 # Coalesce the column information
445 visited = {}
446 columns = []
447 for col in self.left_table.columns + self.right_table.columns:
448 if col.name in visited:
449 # We can't allow multiple columns with the same original
450 # name, so omit this one. FIXME: There should be a
451 # better solution.
452 continue
453 columns.append(col)
454 TransientTableBase.create(self, columns)
455
456 # Copy the joined data to the table.
457 internal_names = [col.internal_name for col in self.columns]
458 if self.outer_join:
459 join_operator = 'LEFT OUTER JOIN'
460 else:
461 join_operator = 'JOIN'
462 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
463 " %s %s ON %s = %s;"
464 % (self.tablename,
465 ", ".join(internal_names),
466 self.left_table.tablename,
467 ", ".join(internal_names),
468 self.left_table.tablename,
469 join_operator,
470 self.right_table.tablename,
471 self.orig_to_internal[self.left_field],
472 self.orig_to_internal[self.right_field]))
473 self.db.execute(stmt)
474
475 def Dependencies(self):
476 """Return a tuple with the two tables the join depends on."""
477 return self.dependencies
478
479
480 class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
481
482 """Table that copies data to a transient table on demand.
483
484 The AutoTransientTable takes another table as input and copies data
485 to a table in a TransientDatabase instance on demand.
486 """
487
488 def __init__(self, transient_db, table):
489 TitledObject.__init__(self, table.Title())
490 self.transient_db = transient_db
491 self.table = table
492 self.t_table = None
493
494 def Columns(self):
495 return self.table.Columns()
496
497 def Column(self, col):
498 return self.table.Column(col)
499
500 def HasColumn(self, col):
501 """Return whether the table has a column with the given name or index
502 """
503 return self.table.HasColumn(col)
504
505 def NumRows(self):
506 return self.table.NumRows()
507
508 def NumColumns(self):
509 return self.table.NumColumns()
510
511 def ReadRowAsDict(self, record):
512 """Return the record no. record as a dict mapping field names to values
513 """
514 if self.t_table is not None:
515 return self.t_table.ReadRowAsDict(record)
516 else:
517 return self.table.ReadRowAsDict(record)
518
519 def ReadValue(self, row, col):
520 """Return the value of the specified row and column
521
522 The col parameter may be the index of the column or its name.
523 """
524 if self.t_table is not None:
525 return self.t_table.ReadValue(row, col)
526 else:
527 return self.table.ReadValue(row, col)
528
529 def copy_to_transient(self):
530 """Internal: Create a transient table and copy the data into it"""
531 self.t_table = TransientTable(self.transient_db, self)
532
533 def transient_table(self):
534 """
535 Return a table whose underlying implementation is in the transient db
536 """
537 if self.t_table is None:
538 self.copy_to_transient()
539 return self.t_table
540
541 def ValueRange(self, col):
542 if self.t_table is None:
543 self.copy_to_transient()
544 return self.t_table.ValueRange(col)
545
546 def UniqueValues(self, col):
547 if self.t_table is None:
548 self.copy_to_transient()
549 return self.t_table.UniqueValues(col)
550
551 def SimpleQuery(self, left, comparison, right):
552 if self.t_table is None:
553 self.copy_to_transient()
554 # Make sure to use the column object of the transient table. The
555 # left argument is always a column object so we can just ask the
556 # t_table for the right object.
557 if hasattr(right, "name"):
558 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
559 comparison,
560 self.t_table.Column(right.name))
561 else:
562 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
563 comparison, right)
564
565 def Dependencies(self):
566 """Return a tuple containing the original table"""
567 return (self.table,)
568

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26