/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 765 - (show annotations)
Tue Apr 29 12:42:14 2003 UTC (21 years, 10 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 12347 byte(s)
Next step of table implementation. Introduce a transient database
using SQLite that some of the data is copied to on demand. This
allows us to do joins and other operations that require an index
for good performance with reasonable efficiency. Thuban now needs
SQLite 2.8.0 and pysqlite 0.4.1. Older versions may work but I
haven't tested that.

* Thuban/Model/transientdb.py: New. Transient database
implementation.

* test/test_transientdb.py: New. Tests for the transient DB
classes.

* Thuban/Model/session.py (AutoRemoveFile, AutoRemoveDir): New
classes to help automatically remove temporary files and
directories.
(Session.__init__): New instance variables temp_dir for the
temporary directory and transient_db for the SQLite database
(Session.temp_directory): New. Create a temporary directory if not
yet done and return its name. Use AutoRemoveDir to have it
automatically deleted
(Session.TransientDB): Instantiate the transient database if not
done yet and return it.

* Thuban/Model/data.py (ShapefileStore.__init__): Use an
AutoTransientTable so that data is copied to the transient DB on
demand.
(SimpleStore): New class that simply combines a table and a
shapefile

* Thuban/Model/table.py (Table, DBFTable): Rename Table into
DBFTable and update its doc-string to reflect the fact that this
is only the table interface to a DBF file. Table is now an alias
for DBFTable for temporary backwards compatibility.

* Thuban/UI/application.py (ThubanApplication.OnExit): Make sure
the last reference to the session goes away so that the temporary
files are removed properly.

* test/test_load.py (LoadSessionTest.tearDown): Remove the
reference to the session to make sure the temporary files are
removed.

1 # Copyright (C) 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 #
5 # This program is free software under the GPL (>=v2)
6 # Read the file COPYING coming with the software for details.
7
8 """Database for transient data
9
10 This database is intended for data representations needed during the
11 lifetime of a Thuban session but which is not permanent. Examples of
12 this are for instance a join of two DBF files where the DBF files are
13 the permanent representation of the data and the join only exists in the
14 Thuban session and is reconstructed when the session is opened.
15 """
16
17 __version__ = "$Revision$"
18 # $Source$
19 # $Id$
20
21 import os
22 import weakref
23 from sqlite import connect
24
25 import table
26
27 sql_type_map = {
28 table.FIELDTYPE_INT: "INTEGER",
29 table.FIELDTYPE_STRING: "VARCHAR",
30 table.FIELDTYPE_DOUBLE: "FLOAT",
31 }
32
33 type_converter_map = {
34 table.FIELDTYPE_INT: int,
35 table.FIELDTYPE_STRING: str,
36 table.FIELDTYPE_DOUBLE: float,
37 }
38
39 class TransientDatabase:
40
41 def __init__(self, filename):
42 self.filename = filename
43 self.conn = connect(filename)
44 # Counters to produce unique table and column names
45 self.num_tables = 0
46 self.num_cols = 0
47 # Since there's only once process using the SQLite database, we
48 # might be able to get a tad more speed with default_synchronous
49 # OFF. So far I haven't seen any measurable speedup, though.
50 #self.execute("PRAGMA default_synchronous = OFF")
51
52 def __del__(self):
53 self.close()
54
55 def close(self):
56 if self.conn:
57 self.conn.close()
58
59 def new_table_name(self):
60 self.num_tables += 1
61 return "Table%03d" % self.num_tables
62
63 def new_column_name(self):
64 self.num_cols += 1
65 return "Col%03d" % self.num_cols
66
67 def execute(self, *args):
68 """execute the SQL statement in the database and return the result"""
69 cursor = self.conn.cursor()
70 cursor.execute(*args)
71 result = cursor.fetchone()
72 self.conn.commit()
73 return result
74
75 def cursor(self):
76 return self.conn.cursor()
77
78
79 class ColumnReference:
80
81 def __init__(self, name, type, internal_name):
82 self.name = name
83 self.type = type
84 self.internal_name = internal_name
85
86
87 class TransientTableBase:
88
89 """Base class for tables in the transient database"""
90
91 def __init__(self, transient_db):
92 """Initialize the table for use with the given transient db"""
93 self.db = transient_db
94 self.tablename = self.db.new_table_name()
95 self.indexed_columns = {}
96 self.read_record_cursor = None
97 self.read_record_last_row = None
98
99 def create(self, columns):
100 self.columns = columns
101 self.name_to_column = {}
102 self.orig_names = []
103 self.internal_to_orig = {}
104 self.orig_to_internal = {}
105
106 # Create the column objects and fill various maps and lists
107 for col in self.columns:
108 self.name_to_column[col.name] = col
109 self.orig_names.append(col.name)
110 self.internal_to_orig[col.internal_name] = col.name
111 self.orig_to_internal[col.name] = col.internal_name
112
113 # Build the CREATE TABLE statement and create the table in the
114 # database
115 table_types = []
116 for col in self.columns:
117 table_types.append("%s %s" % (col.internal_name,
118 sql_type_map[col.type]))
119 table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
120 ",\n ".join(table_types))
121 self.db.execute(table_stmt)
122
123 def transient_table(self):
124 """
125 Return a table whose underlying implementation is in the transient db
126 """
127 return self
128
129 def ensure_index(self, column):
130 """Ensure that there's an index on the given column"""
131 if not column in self.indexed_columns:
132 internal_name = self.orig_to_internal[column]
133 indexname = "Index_%s_%s" % (self.tablename, internal_name)
134 stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
135 internal_name)
136 self.db.execute(stmt)
137 self.indexed_columns[column] = 1
138
139 def field_count(self):
140 return len(self.columns)
141
142 def field_info(self, i):
143 col = self.columns[i]
144 return col.type, col.name, 0, 0
145
146 def field_info_by_name(self, name):
147 for col in self.columns:
148 if col.name == name:
149 return col.type, col.name, 0, 0
150 else:
151 return None
152
153 def record_count(self):
154 result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
155 return int(result[0])
156
157 def read_record(self, index):
158 if self.read_record_cursor is None or index <self.read_record_last_row:
159 stmt = ("SELECT %s FROM %s;"
160 % (", ".join([c.internal_name for c in self.columns]),
161 self.tablename))
162 self.read_record_cursor = self.db.cursor()
163 self.read_record_cursor.execute(stmt)
164 self.read_record_last_row = -1
165 for i in range(index - self.read_record_last_row):
166 result = self.read_record_cursor.fetchone()
167 self.read_record_last_row = index
168 result = dict(zip(self.orig_names, result))
169 return result
170
171 def field_range(self, colname):
172 col = self.name_to_column[colname]
173 iname = col.internal_name
174 min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
175 % (iname, iname, self.tablename))
176 converter = type_converter_map[col.type]
177 return ((converter(min), None), (converter(max), None))
178
179 def GetUniqueValues(self, colname):
180 iname = self.orig_to_internal[colname]
181 cursor = self.db.cursor()
182 cursor.execute("SELECT %s FROM %s GROUP BY %s;"
183 % (iname, self.tablename, iname))
184 result = []
185 while 1:
186 row = cursor.fetchone()
187 if row is None:
188 break
189 result.append(row[0])
190 return result
191
192
193 class TransientTable(TransientTableBase):
194
195 """A Table in a transient DB that starts as the copy of a Thuban Table."""
196
197 def __init__(self, transient_db, table):
198 """Create a new table in the given transient DB as a copy of table
199
200 The table argument can be any object implementing the Table
201 interface.
202 """
203 TransientTableBase.__init__(self, transient_db)
204 self.create(table)
205
206 def create(self, table):
207 columns = []
208 for i in range(table.field_count()):
209 type, name = table.field_info(i)[:2]
210 columns.append(ColumnReference(name, type,
211 self.db.new_column_name()))
212 TransientTableBase.create(self, columns)
213
214 # copy the input table to the transient db
215 insert_template = "INSERT INTO %s (%s) VALUES (%s);" \
216 % (self.tablename,
217 ", ".join([col.internal_name
218 for col in self.columns]),
219 ", ".join(["%%(%s)s" % col.name
220 for col in self.columns]))
221 cursor = self.db.cursor()
222 for i in range(table.record_count()):
223 cursor.execute(insert_template, table.read_record(i))
224 self.db.conn.commit()
225
226
227
228 class TransientJoinedTable(TransientTableBase):
229
230 """A Table in the transient DB that contains a join of two tables"""
231
232 def __init__(self, transient_db, left_table, left_field,
233 right_table, right_field = None):
234 """Create a new table in the transient DB as a join of two tables.
235
236 Both input tables, left_table and right_table must have a
237 transient_table method that returns a table object for a table
238 in the trnsient database. The join is performed on the condition
239 that the value of the left_field column the the left table is
240 equal to the value of the right_field in the right_table.
241
242 The joined table contains all columns of the input tables with
243 one exception: Any column in the right_table with the same name
244 as one of the columns in the left_table will be omitted. This is
245 somewhat of an implementation detail, but is done so that the
246 column names of the joined table can be the same as the column
247 names of the input tables without having to create prefixes.
248 """
249 TransientTableBase.__init__(self, transient_db)
250 self.left_table = left_table.transient_table()
251 self.left_field = left_field
252 self.right_table = right_table.transient_table()
253 if right_field:
254 self.right_field = right_field
255 else:
256 self.right_field = self.left_field
257 self.create()
258
259 def create(self):
260 """Internal: Create the table with the joined data"""
261 self.tablename = self.db.new_table_name()
262
263 self.right_table.ensure_index(self.right_field)
264
265 # Coalesce the column information
266 visited = {}
267 columns = []
268 for col in self.left_table.columns + self.right_table.columns:
269 if col.name in visited:
270 continue
271 columns.append(col)
272 TransientTableBase.create(self, columns)
273
274 # Copy the joined data to the table.
275 internal_names = [col.internal_name for col in self.columns]
276 stmt = "INSERT INTO %s (%s) SELECT %s FROM %s JOIN %s ON %s = %s;" \
277 % (self.tablename,
278 ", ".join(internal_names),
279 ", ".join(internal_names),
280 self.left_table.tablename,
281 self.right_table.tablename,
282 self.orig_to_internal[self.left_field],
283 self.orig_to_internal[self.right_field])
284 self.db.execute(stmt)
285
286
287 class AutoTransientTable:
288
289 """Table that copies data to a transient table on demand.
290
291 The AutoTransientTable takes another table as input and copies data
292 to a table in a TransientDatabase instance on demand.
293 """
294
295 def __init__(self, transient_db, table):
296 self.transient_db = transient_db
297 self.table = table
298 self.t_table = None
299
300 def record_count(self):
301 """Return the number of records"""
302 return self.table.record_count()
303
304 def field_count(self):
305 """Return the number of fields in a record"""
306 return self.table.field_count()
307
308 def field_info(self, field):
309 """Return a tuple (type, name, width, prec) for the field no. field
310
311 type is the data type of the field, name the name, width the
312 field width in characters and prec the decimal precision.
313 """
314 info = self.table.field_info(field)
315 if info:
316 info = info[:2] + (0, 0)
317 return info
318
319 def field_info_by_name(self, fieldName):
320 info = self.table.field_info_by_name(fieldName)
321 if info:
322 info = info[:2] + (0, 0)
323 return info
324
325 def read_record(self, record):
326 """Return the record no. record as a dict mapping field names to values
327 """
328 if self.t_table is not None:
329 return self.t_table.read_record(record)
330 else:
331 return self.table.read_record(record)
332
333 def write_record(self, record, values):
334 raise NotImplementedError
335
336 def copy_to_transient(self):
337 """Internal: Create a transient table and copy the data into it"""
338 self.t_table = TransientTable(self.transient_db, self)
339
340 def transient_table(self):
341 """
342 Return a table whose underlying implementation is in the transient db
343 """
344 if self.t_table is None:
345 self.copy_to_transient()
346 return self.t_table
347
348 def field_range(self, colname):
349 if self.t_table is None:
350 self.copy_to_transient()
351 return self.t_table.field_range(colname)
352
353 def GetUniqueValues(self, colname):
354 if self.t_table is None:
355 self.copy_to_transient()
356 return self.t_table.GetUniqueValues(colname)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26