/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1968 - (show annotations)
Fri Nov 21 14:33:27 2003 UTC (21 years, 3 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 25412 byte(s)
Enable the experimental attribute editing again and introduce a
command line switch to actually activate it

* Thuban/UI/main.py (options): New. Container for options set on
the commmand line
(main): Add the --enable-attribute-editing flag.

* Thuban/UI/identifyview.py (IdentifyView.__init__): If attribute
editing is enabled use the grid ctrl which allows editing of the
values

* Thuban/Model/transientdb.py (AutoTransientTable.write_record):
New. Just delegate this to the underlying table.

1 # Copyright (C) 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 #
5 # This program is free software under the GPL (>=v2)
6 # Read the file COPYING coming with the software for details.
7
8 """Database for transient data
9
10 This database is intended for data representations needed during the
11 lifetime of a Thuban session but which is not permanent. Examples of
12 this are for instance a join of two DBF files where the DBF files are
13 the permanent representation of the data and the join only exists in the
14 Thuban session and is reconstructed when the session is opened.
15 """
16
17 __version__ = "$Revision$"
18 # $Source$
19 # $Id$
20
21 from sqlite import connect
22
23 from base import TitledObject
24
25 import table
26
27 sql_type_map = {
28 table.FIELDTYPE_INT: "INTEGER",
29 table.FIELDTYPE_STRING: "VARCHAR",
30 table.FIELDTYPE_DOUBLE: "FLOAT",
31 }
32
33 type_converter_map = {
34 table.FIELDTYPE_INT: int,
35 table.FIELDTYPE_STRING: str,
36 table.FIELDTYPE_DOUBLE: float,
37 }
38
39 class TransientDatabase:
40
41 def __init__(self, filename):
42 self.filename = filename
43 self.conn = connect(filename)
44 # Counters to produce unique table and column names
45 self.num_tables = 0
46 self.num_cols = 0
47 # Since there's only once process using the SQLite database, we
48 # might be able to get a tad more speed with default_synchronous
49 # OFF. So far I haven't seen any measurable speedup, though.
50 #self.execute("PRAGMA default_synchronous = OFF")
51
52 def __del__(self):
53 self.close()
54
55 def close(self):
56 if self.conn is not None:
57 self.conn.close()
58 self.conn = None
59
60 def new_table_name(self):
61 self.num_tables += 1
62 return "Table%03d" % self.num_tables
63
64 def new_column_name(self):
65 self.num_cols += 1
66 return "Col%03d" % self.num_cols
67
68 def execute(self, *args):
69 """execute the SQL statement in the database and return the result"""
70 cursor = self.conn.cursor()
71 cursor.execute(*args)
72 result = cursor.fetchone()
73 self.conn.commit()
74 return result
75
76 def cursor(self):
77 return self.conn.cursor()
78
79
80 class ColumnReference:
81
82 def __init__(self, name, type, internal_name):
83 self.name = name
84 self.type = type
85 self.internal_name = internal_name
86
87
88 class TransientTableBase:
89
90 """Base class for tables in the transient database"""
91
92 def __init__(self, transient_db):
93 """Initialize the table for use with the given transient db"""
94 self.db = transient_db
95 self.tablename = self.db.new_table_name()
96 self.indexed_columns = {}
97 self.read_record_cursor = None
98 self.read_record_last_row = None
99 self.read_record_last_result = None
100
101 def create(self, columns):
102 self.columns = columns
103 self.name_to_column = {}
104 self.orig_names = []
105 self.internal_to_orig = {}
106 self.orig_to_internal = {}
107 self.column_map = {}
108
109 # Create the column objects and fill various maps and lists
110 for index in range(len(self.columns)):
111 col = self.columns[index]
112 self.name_to_column[col.name] = col
113 self.orig_names.append(col.name)
114 self.internal_to_orig[col.internal_name] = col.name
115 self.orig_to_internal[col.name] = col.internal_name
116 self.column_map[col.name] = col
117 self.column_map[index] = col
118
119 # Build the CREATE TABLE statement and create the table in the
120 # database
121 table_types = ["id INTEGER PRIMARY KEY"]
122 for col in self.columns:
123 table_types.append("%s %s" % (col.internal_name,
124 sql_type_map[col.type]))
125 table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
126 ",\n ".join(table_types))
127 self.db.execute(table_stmt)
128
129 def transient_table(self):
130 """
131 Return a table whose underlying implementation is in the transient db
132 """
133 return self
134
135 def ensure_index(self, column):
136 """Ensure that there's an index on the given column"""
137 if not column in self.indexed_columns:
138 internal_name = self.orig_to_internal[column]
139 indexname = "Index_%s_%s" % (self.tablename, internal_name)
140 stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
141 internal_name)
142 self.db.execute(stmt)
143 self.indexed_columns[column] = 1
144
145 def NumColumns(self):
146 return len(self.columns)
147
148 def NumRows(self):
149 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
150 return int(result[0])
151
152 def Columns(self):
153 return self.columns
154
155 def Column(self, col):
156 return self.column_map[col]
157
158 def HasColumn(self, col):
159 """Return whether the table has a column with the given name or index
160 """
161 return self.column_map.has_key(col)
162
163 def RowIdToOrdinal(self, gid):
164 """Return the row ordinal given its id
165
166 At the moment the transient tables are only used for tables that
167 don't distinguish between row number and row id, so the value is
168 returned unchanged.
169 """
170 return gid
171
172 def RowOrdinalToId(self, num):
173 """Return the rowid for given its ordinal
174
175 At the moment the transient tables are only used for tables that
176 don't distinguish between row number and row id, so the value is
177 returned unchanged.
178 """
179 return num
180
181 def ReadRowAsDict(self, index, row_is_ordinal = 0):
182 """Return the entire row as a dictionary with column names as keys
183
184 The row_is_ordinal is ignored because at the moment the
185 transient tables are only used for DBF files where it doesn't
186 matter.
187 """
188 # Implementation Strategy: Executing a completely new select
189 # statement every time this method is called is too slow. The
190 # most important usage is to read the records more or less
191 # sequentially. This happens e.g. when drawing a layer with a
192 # classification where the shapes are drawn in order of the
193 # shape ids. Another pattern is that the same row is requested
194 # several times in a row. This happens in the table view, for
195 # instance.
196
197 # We can exploit this to make access faster by having one cursor
198 # open all the time and keeping the last row read around in case
199 # the same row is accessed again the next time and if the row
200 # index is larger than the row we have read last we simply fetch
201 # rows from the cursor until we've reached the requested row. If
202 # the requested row index is smaller then we start a new cursor.
203
204 # FIXME: So far this scheme seems to work well enough. Obvious
205 # improvements would be to start the cursor at exactly the
206 # requested row (should be efficient and easy to do now that the
207 # id is the primary key) and to perhaps to also start a new
208 # cursor if the requested index is much larger than the last row
209 # so that we don't read and discard lots of the rows.
210
211 # Check whether we have to start a new cursor
212 if self.read_record_cursor is None or index <self.read_record_last_row:
213 stmt = ("SELECT %s FROM %s;"
214 % (", ".join([c.internal_name for c in self.columns]),
215 self.tablename))
216 self.read_record_cursor = self.db.cursor()
217 self.read_record_cursor.execute(stmt)
218 self.read_record_last_row = -1
219 self.read_record_last_result = None
220
221 # Now we should have a cursor at a position less than or equal
222 # to the index so the following if statement will always set
223 # result to a suitable value
224 assert index >= self.read_record_last_row
225
226 if index == self.read_record_last_row:
227 result = self.read_record_last_result
228 else:
229 for i in range(index - self.read_record_last_row):
230 result = self.read_record_cursor.fetchone()
231 self.read_record_last_result = result
232 self.read_record_last_row = index
233 return dict(zip(self.orig_names, result))
234
235 def ReadValue(self, row, col, row_is_ordinal = 0):
236 """Return the value of the specified row and column
237
238 The col parameter may be the index of the column or its name.
239
240 The row_is_ordinal is ignored because at the moment the
241 transient tables are only used for DBF files where it doesn't
242 matter.
243 """
244 # Depending on the actual access patterns of the table data, it
245 # might be a bit faster in some circumstances to not implement
246 # this via ReadRowAsDict, but this simple implementation should
247 # be fast enough for most purposes.
248 return self.ReadRowAsDict(row)[self.column_map[col].name]
249
250 def ValueRange(self, col):
251 # Performance notes:
252 #
253 # In sqlite 2.8.6 the min and max aggregate functions can use an
254 # index but only when used as the only expression in the select
255 # statement (i.e. 'select min(col), max(col) from tbl;' will not
256 # use the index but 'select min(col) from tbl;' will) so we
257 # query the minimum and maximum separately.
258 #
259 # With the separate statements we can take advantage of an index
260 # if it exists. If the index doesn't exist, creating it first
261 # and then using it in the query is slower than the queries
262 # without an index. Creating the index is only an advantage if
263 # the queries are performed multiple times. With the current use
264 # patterns where ValueRange is only used occasionally by the
265 # classification generation dialog creating the index only for
266 # this usage is not really worth it, so we don't.
267 col = self.column_map[col]
268 iname = col.internal_name
269 min = self.db.execute("SELECT min(%s) FROM %s;"
270 % (iname, self.tablename))[0]
271 max = self.db.execute("SELECT max(%s) FROM %s;"
272 % (iname, self.tablename))[0]
273 converter = type_converter_map[col.type]
274 return (converter(min), converter(max))
275
276 def UniqueValues(self, col):
277 # Performance notes:
278 #
279 # In sqlite 2.8.6 there doesn't seem to be a way to query the
280 # unique items that uses an index. I've tried
281 #
282 # SELECT col FROM tbl GROUP BY col;
283 #
284 # and
285 #
286 # SELECT DISTINCT col FROM tbl;
287 #
288 # and in both cases the index is not used. If the index isn't
289 # used it doesn't make sense to call self.ensure_index.
290 iname = self.column_map[col].internal_name
291 cursor = self.db.cursor()
292 cursor.execute("SELECT %s FROM %s GROUP BY %s;"
293 % (iname, self.tablename, iname))
294 result = []
295 while 1:
296 row = cursor.fetchone()
297 if row is None:
298 break
299 result.append(row[0])
300 return result
301
302 def Width(self, col):
303 """Return the maximum width of values in the column
304
305 The return value is the the maximum length of string
306 representation of the values in the column (represented by index
307 or name).
308 """
309 max = 0
310
311 type = self.column_map[col].type
312 iname = self.column_map[col].internal_name
313 cursor = self.db.cursor()
314 cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
315 values = [ i[0] for i in cursor.fetchall()]
316 if not values:
317 return None
318
319 if type == table.FIELDTYPE_DOUBLE:
320 format = "%.12f"
321 elif type == table.FIELDTYPE_INT:
322 format = "%d"
323 else:
324 format = "%s"
325 for value in values:
326 if value is None: continue
327 l = len(format % value)
328 if l > max:
329 max = l
330
331 return max
332
333 def SimpleQuery(self, left, comparison, right):
334 """Return the indices of all rows that matching a condition.
335
336 Parameters:
337 left -- The column object for the left side of the comparison
338
339 comparison -- The comparison operator as a string. It must be
340 one of '==', '!=', '<', '<=', '>=', '>'
341
342 right -- The right hand side of the comparison. It must be
343 either a column object or a value, i.e. a string,
344 int or float.
345
346 The return value is a sorted list of the indices of the rows
347 where the condition is true.
348 """
349 if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
350 raise ValueError("Comparison operator %r not allowed" % comparison)
351
352 if hasattr(right, "internal_name"):
353 right_template = right.internal_name
354 params = ()
355 else:
356 right_template = "%s"
357 params = (right,)
358
359 query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
360 % (self.tablename, left.internal_name, comparison,
361 right_template)
362
363 cursor = self.db.cursor()
364 cursor.execute(query, params)
365 result = []
366 while 1:
367 row = cursor.fetchone()
368 if row is None:
369 break
370 result.append(row[0])
371 return result
372
373 def Dependencies(self):
374 """Placeholder for a method in a derived class.
375
376 Return a sequence with the tables and other data objects that
377 self depends on.
378 """
379 raise NotImplementedError
380
381
382 class TransientTable(TitledObject, TransientTableBase):
383
384 """A Table in a transient DB that starts as the copy of a Thuban Table."""
385
386 def __init__(self, transient_db, table):
387 """Create a new table in the given transient DB as a copy of table
388
389 The table argument can be any object implementing the Table
390 interface.
391 """
392 TransientTableBase.__init__(self, transient_db)
393 TitledObject.__init__(self, table.Title())
394 self.create(table)
395
396 def create(self, table):
397 columns = []
398 for col in table.Columns():
399 columns.append(ColumnReference(col.name, col.type,
400 self.db.new_column_name()))
401 TransientTableBase.create(self, columns)
402
403 # copy the input table to the transient db
404
405 # A key to insert to use for the formatting of the insert
406 # statement. The key must not be equal to any of the column
407 # names so we construct one by building a string of x's that is
408 # longer than any of the column names
409 id_key = max([len(col.name) for col in self.columns]) * "x"
410
411 insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
412 % (self.tablename,
413 ", ".join([col.internal_name
414 for col in self.columns]),
415 id_key,
416 ", ".join(["%%(%s)s" % col.name
417 for col in self.columns]))
418 cursor = self.db.cursor()
419 for i in range(table.NumRows()):
420 row = table.ReadRowAsDict(i)
421 row[id_key] = i
422 cursor.execute(insert_template, row)
423 self.db.conn.commit()
424
425
426
427 class TransientJoinedTable(TitledObject, TransientTableBase):
428
429 """A Table in the transient DB that contains a join of two tables"""
430
431 def __init__(self, transient_db, left_table, left_field,
432 right_table, right_field = None, outer_join = False):
433 """Create a new table in the transient DB as a join of two tables.
434
435 Both input tables, left_table and right_table must have a
436 transient_table method that returns a table object for a table
437 in the transient database. The join is performed on the condition
438 that the value of the left_field column the the left table is
439 equal to the value of the right_field in the right_table.
440
441 The joined table contains all columns of the input tables,
442 however, the column names of the right table may be changed
443 slightly to make them unique in the joined table. This is
444 currently done by appending a sufficient number of underscores
445 ('_').
446 """
447 TransientTableBase.__init__(self, transient_db)
448 self.dependencies = (left_table, right_table)
449 self.left_table = left_table.transient_table()
450 self.left_field = left_field
451 self.right_table = right_table.transient_table()
452 if right_field:
453 self.right_field = right_field
454 else:
455 self.right_field = self.left_field
456 self.outer_join = outer_join
457
458 title = "Join of %(left)s and %(right)s" \
459 % {"left": self.left_table.Title(),
460 "right": self.right_table.Title()}
461 TitledObject.__init__(self, title)
462
463 self.create()
464
465 def create(self):
466 """Internal: Create the table with the joined data"""
467 self.tablename = self.db.new_table_name()
468
469 self.right_table.ensure_index(self.right_field)
470
471 # determine the internal column names to join on before
472 # coalescing the column information because if the external
473 # column names are the same they will be mapped to the same
474 # internal name afterwards.
475 internal_left_col = self.left_table.orig_to_internal[self.left_field]
476 internal_right_col =self.right_table.orig_to_internal[self.right_field]
477
478 # Coalesce the column information
479 visited = {}
480 columns = []
481 newcolumns = []
482 for table in (self.left_table, self.right_table):
483 for col in table.Columns():
484 colname = col.name
485 # We can't allow multiple columns with the same
486 # original name, so append '_' to this one until
487 # it is unique.
488 # FIXME: There should be a better solution.
489 while colname in visited:
490 colname = colname + '_'
491 columns.append((table.tablename, col))
492 newcol = ColumnReference(colname, col.type,
493 "Col%03d" % (len(newcolumns)+1))
494 newcolumns.append(newcol)
495 visited[colname] = 1
496 TransientTableBase.create(self, newcolumns)
497
498 # Copy the joined data to the table.
499 newinternal_names = [col.internal_name for col in self.columns]
500 internal_references = ["%s.%s" % (table, col.internal_name)
501 for table, col in columns]
502 if self.outer_join:
503 join_operator = 'LEFT OUTER JOIN'
504 else:
505 join_operator = 'JOIN'
506 stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
507 " %s %s ON %s.%s = %s.%s;"
508 % (self.tablename,
509 ", ".join(newinternal_names),
510 self.left_table.tablename,
511 ", ".join(internal_references),
512 self.left_table.tablename,
513 join_operator,
514 self.right_table.tablename,
515 self.left_table.tablename,
516 internal_left_col,
517 self.right_table.tablename,
518 internal_right_col))
519 self.db.execute(stmt)
520
521 def Dependencies(self):
522 """Return a tuple with the two tables the join depends on."""
523 return self.dependencies
524
525 def JoinType(self):
526 """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
527 if self.outer_join:
528 return "LEFT OUTER"
529 else:
530 return "INNER"
531
532
533 class AutoTransientTable(TitledObject):
534
535 """Table that copies data to a transient table on demand.
536
537 The AutoTransientTable takes another table as input and copies data
538 to a table in a TransientDatabase instance on demand.
539 """
540
541 def __init__(self, transient_db, table):
542 TitledObject.__init__(self, table.Title())
543 self.transient_db = transient_db
544 self.table = table
545 self.t_table = None
546
547 def Columns(self):
548 return self.table.Columns()
549
550 def Column(self, col):
551 return self.table.Column(col)
552
553 def HasColumn(self, col):
554 """Return whether the table has a column with the given name or index
555 """
556 return self.table.HasColumn(col)
557
558 def NumRows(self):
559 return self.table.NumRows()
560
561 def NumColumns(self):
562 return self.table.NumColumns()
563
564 def RowIdToOrdinal(self, gid):
565 """Return the row ordinal given its id"""
566 if self.t_table is not None:
567 return self.t_table.RowIdToOrdinal(gid)
568 else:
569 return self.table.RowIdToOrdinal(gid)
570
571 def RowOrdinalToId(self, num):
572 """Return the rowid for given its ordinal"""
573 if self.t_table is not None:
574 return self.t_table.RowOrdinalToId(num)
575 else:
576 return self.table.RowOrdinalToId(num)
577
578 def ReadRowAsDict(self, record, row_is_ordinal = 0):
579 """Return the record no. record as a dict mapping field names to values
580 """
581 if self.t_table is not None:
582 return self.t_table.ReadRowAsDict(record,
583 row_is_ordinal = row_is_ordinal)
584 else:
585 return self.table.ReadRowAsDict(record,
586 row_is_ordinal = row_is_ordinal)
587
588 def ReadValue(self, row, col, row_is_ordinal = 0):
589 """Return the value of the specified row and column
590
591 The col parameter may be the index of the column or its name.
592 """
593 if self.t_table is not None:
594 return self.t_table.ReadValue(row, col,
595 row_is_ordinal = row_is_ordinal)
596 else:
597 return self.table.ReadValue(row, col,
598 row_is_ordinal = row_is_ordinal)
599
600 def copy_to_transient(self):
601 """Internal: Create a transient table and copy the data into it"""
602 self.t_table = TransientTable(self.transient_db, self)
603
604 def transient_table(self):
605 """
606 Return a table whose underlying implementation is in the transient db
607 """
608 if self.t_table is None:
609 self.copy_to_transient()
610 return self.t_table
611
612 def ValueRange(self, col):
613 # Performance of sqlite vs. DBF for this method:
614 #
615 # If the table has been copied to the sqlite database it's
616 # faster to use it even if there is no index on that column.
617 # Otherwise it's faster to simply loop through all rows in the
618 # DBF file. Copying the data to the sqlite database can take
619 # very long for large amounts of data
620 #
621 # Of course if the table is not a DBF file the issue could be
622 # different, although copying the data into sqlite first will
623 # likely always be slower than simply querying the non-sqlite
624 # table directly. Currently only DBFfiles and memory tables are
625 # used as the underlying non-sqlite table, though.
626 if self.t_table is not None:
627 return self.t_table.ValueRange(col)
628 else:
629 return self.table.ValueRange(col)
630
631 def UniqueValues(self, col):
632 # The performance trade-offs for this method are basically the
633 # same as for ValueRange except that currently there doesn't
634 # seem to be a way to take advantage of indexes in this case in
635 # sqlite. However, but it's still faster to query the transient
636 # table if it already exists.
637 if self.t_table is not None:
638 return self.t_table.UniqueValues(col)
639 else:
640 return self.table.UniqueValues(col)
641
642 def SimpleQuery(self, left, comparison, right):
643 if self.t_table is None:
644 self.copy_to_transient()
645 # Make sure to use the column object of the transient table. The
646 # left argument is always a column object so we can just ask the
647 # t_table for the right object.
648 if hasattr(right, "name"):
649 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
650 comparison,
651 self.t_table.Column(right.name))
652 else:
653 return self.t_table.SimpleQuery(self.t_table.Column(left.name),
654 comparison, right)
655
656 def Dependencies(self):
657 """Return a tuple containing the original table"""
658 return (self.table,)
659
660 def Width(self, col):
661 return self.table.Width(col)
662
663 def write_record(self, row, values):
664 """Write the values to the given row.
665
666 This is a very experimental feature which doesn't work in all
667 cases, so you better know what you're doing when calling this
668 method.
669 """
670 self.table.write_record(row, values)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26