/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/transientdb.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 765 - (hide annotations)
Tue Apr 29 12:42:14 2003 UTC (21 years, 10 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/transientdb.py
File MIME type: text/x-python
File size: 12347 byte(s)
Next step of table implementation. Introduce a transient database
using SQLite that some of the data is copied to on demand. This
allows us to do joins and other operations that require an index
for good performance with reasonable efficiency. Thuban now needs
SQLite 2.8.0 and pysqlite 0.4.1. Older versions may work but I
haven't tested that.

* Thuban/Model/transientdb.py: New. Transient database
implementation.

* test/test_transientdb.py: New. Tests for the transient DB
classes.

* Thuban/Model/session.py (AutoRemoveFile, AutoRemoveDir): New
classes to help automatically remove temporary files and
directories.
(Session.__init__): New instance variables temp_dir for the
temporary directory and transient_db for the SQLite database
(Session.temp_directory): New. Create a temporary directory if not
yet done and return its name. Use AutoRemoveDir to have it
automatically deleted
(Session.TransientDB): Instantiate the transient database if not
done yet and return it.

* Thuban/Model/data.py (ShapefileStore.__init__): Use an
AutoTransientTable so that data is copied to the transient DB on
demand.
(SimpleStore): New class that simply combines a table and a
shapefile

* Thuban/Model/table.py (Table, DBFTable): Rename Table into
DBFTable and update its doc-string to reflect the fact that this
is only the table interface to a DBF file. Table is now an alias
for DBFTable for temporary backwards compatibility.

* Thuban/UI/application.py (ThubanApplication.OnExit): Make sure
the last reference to the session goes away so that the temporary
files are removed properly.

* test/test_load.py (LoadSessionTest.tearDown): Remove the
reference to the session to make sure the temporary files are
removed.

1 bh 765 # Copyright (C) 2003 by Intevation GmbH
2     # Authors:
3     # Bernhard Herzog <[email protected]>
4     #
5     # This program is free software under the GPL (>=v2)
6     # Read the file COPYING coming with the software for details.
7    
8     """Database for transient data
9    
10     This database is intended for data representations needed during the
11     lifetime of a Thuban session but which is not permanent. Examples of
12     this are for instance a join of two DBF files where the DBF files are
13     the permanent representation of the data and the join only exists in the
14     Thuban session and is reconstructed when the session is opened.
15     """
16    
17     __version__ = "$Revision$"
18     # $Source$
19     # $Id$
20    
21     import os
22     import weakref
23     from sqlite import connect
24    
25     import table
26    
27     sql_type_map = {
28     table.FIELDTYPE_INT: "INTEGER",
29     table.FIELDTYPE_STRING: "VARCHAR",
30     table.FIELDTYPE_DOUBLE: "FLOAT",
31     }
32    
33     type_converter_map = {
34     table.FIELDTYPE_INT: int,
35     table.FIELDTYPE_STRING: str,
36     table.FIELDTYPE_DOUBLE: float,
37     }
38    
39     class TransientDatabase:
40    
41     def __init__(self, filename):
42     self.filename = filename
43     self.conn = connect(filename)
44     # Counters to produce unique table and column names
45     self.num_tables = 0
46     self.num_cols = 0
47     # Since there's only once process using the SQLite database, we
48     # might be able to get a tad more speed with default_synchronous
49     # OFF. So far I haven't seen any measurable speedup, though.
50     #self.execute("PRAGMA default_synchronous = OFF")
51    
52     def __del__(self):
53     self.close()
54    
55     def close(self):
56     if self.conn:
57     self.conn.close()
58    
59     def new_table_name(self):
60     self.num_tables += 1
61     return "Table%03d" % self.num_tables
62    
63     def new_column_name(self):
64     self.num_cols += 1
65     return "Col%03d" % self.num_cols
66    
67     def execute(self, *args):
68     """execute the SQL statement in the database and return the result"""
69     cursor = self.conn.cursor()
70     cursor.execute(*args)
71     result = cursor.fetchone()
72     self.conn.commit()
73     return result
74    
75     def cursor(self):
76     return self.conn.cursor()
77    
78    
79     class ColumnReference:
80    
81     def __init__(self, name, type, internal_name):
82     self.name = name
83     self.type = type
84     self.internal_name = internal_name
85    
86    
87     class TransientTableBase:
88    
89     """Base class for tables in the transient database"""
90    
91     def __init__(self, transient_db):
92     """Initialize the table for use with the given transient db"""
93     self.db = transient_db
94     self.tablename = self.db.new_table_name()
95     self.indexed_columns = {}
96     self.read_record_cursor = None
97     self.read_record_last_row = None
98    
99     def create(self, columns):
100     self.columns = columns
101     self.name_to_column = {}
102     self.orig_names = []
103     self.internal_to_orig = {}
104     self.orig_to_internal = {}
105    
106     # Create the column objects and fill various maps and lists
107     for col in self.columns:
108     self.name_to_column[col.name] = col
109     self.orig_names.append(col.name)
110     self.internal_to_orig[col.internal_name] = col.name
111     self.orig_to_internal[col.name] = col.internal_name
112    
113     # Build the CREATE TABLE statement and create the table in the
114     # database
115     table_types = []
116     for col in self.columns:
117     table_types.append("%s %s" % (col.internal_name,
118     sql_type_map[col.type]))
119     table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
120     ",\n ".join(table_types))
121     self.db.execute(table_stmt)
122    
123     def transient_table(self):
124     """
125     Return a table whose underlying implementation is in the transient db
126     """
127     return self
128    
129     def ensure_index(self, column):
130     """Ensure that there's an index on the given column"""
131     if not column in self.indexed_columns:
132     internal_name = self.orig_to_internal[column]
133     indexname = "Index_%s_%s" % (self.tablename, internal_name)
134     stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
135     internal_name)
136     self.db.execute(stmt)
137     self.indexed_columns[column] = 1
138    
139     def field_count(self):
140     return len(self.columns)
141    
142     def field_info(self, i):
143     col = self.columns[i]
144     return col.type, col.name, 0, 0
145    
146     def field_info_by_name(self, name):
147     for col in self.columns:
148     if col.name == name:
149     return col.type, col.name, 0, 0
150     else:
151     return None
152    
153     def record_count(self):
154     result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
155     return int(result[0])
156    
157     def read_record(self, index):
158     if self.read_record_cursor is None or index <self.read_record_last_row:
159     stmt = ("SELECT %s FROM %s;"
160     % (", ".join([c.internal_name for c in self.columns]),
161     self.tablename))
162     self.read_record_cursor = self.db.cursor()
163     self.read_record_cursor.execute(stmt)
164     self.read_record_last_row = -1
165     for i in range(index - self.read_record_last_row):
166     result = self.read_record_cursor.fetchone()
167     self.read_record_last_row = index
168     result = dict(zip(self.orig_names, result))
169     return result
170    
171     def field_range(self, colname):
172     col = self.name_to_column[colname]
173     iname = col.internal_name
174     min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
175     % (iname, iname, self.tablename))
176     converter = type_converter_map[col.type]
177     return ((converter(min), None), (converter(max), None))
178    
179     def GetUniqueValues(self, colname):
180     iname = self.orig_to_internal[colname]
181     cursor = self.db.cursor()
182     cursor.execute("SELECT %s FROM %s GROUP BY %s;"
183     % (iname, self.tablename, iname))
184     result = []
185     while 1:
186     row = cursor.fetchone()
187     if row is None:
188     break
189     result.append(row[0])
190     return result
191    
192    
193     class TransientTable(TransientTableBase):
194    
195     """A Table in a transient DB that starts as the copy of a Thuban Table."""
196    
197     def __init__(self, transient_db, table):
198     """Create a new table in the given transient DB as a copy of table
199    
200     The table argument can be any object implementing the Table
201     interface.
202     """
203     TransientTableBase.__init__(self, transient_db)
204     self.create(table)
205    
206     def create(self, table):
207     columns = []
208     for i in range(table.field_count()):
209     type, name = table.field_info(i)[:2]
210     columns.append(ColumnReference(name, type,
211     self.db.new_column_name()))
212     TransientTableBase.create(self, columns)
213    
214     # copy the input table to the transient db
215     insert_template = "INSERT INTO %s (%s) VALUES (%s);" \
216     % (self.tablename,
217     ", ".join([col.internal_name
218     for col in self.columns]),
219     ", ".join(["%%(%s)s" % col.name
220     for col in self.columns]))
221     cursor = self.db.cursor()
222     for i in range(table.record_count()):
223     cursor.execute(insert_template, table.read_record(i))
224     self.db.conn.commit()
225    
226    
227    
228     class TransientJoinedTable(TransientTableBase):
229    
230     """A Table in the transient DB that contains a join of two tables"""
231    
232     def __init__(self, transient_db, left_table, left_field,
233     right_table, right_field = None):
234     """Create a new table in the transient DB as a join of two tables.
235    
236     Both input tables, left_table and right_table must have a
237     transient_table method that returns a table object for a table
238     in the trnsient database. The join is performed on the condition
239     that the value of the left_field column the the left table is
240     equal to the value of the right_field in the right_table.
241    
242     The joined table contains all columns of the input tables with
243     one exception: Any column in the right_table with the same name
244     as one of the columns in the left_table will be omitted. This is
245     somewhat of an implementation detail, but is done so that the
246     column names of the joined table can be the same as the column
247     names of the input tables without having to create prefixes.
248     """
249     TransientTableBase.__init__(self, transient_db)
250     self.left_table = left_table.transient_table()
251     self.left_field = left_field
252     self.right_table = right_table.transient_table()
253     if right_field:
254     self.right_field = right_field
255     else:
256     self.right_field = self.left_field
257     self.create()
258    
259     def create(self):
260     """Internal: Create the table with the joined data"""
261     self.tablename = self.db.new_table_name()
262    
263     self.right_table.ensure_index(self.right_field)
264    
265     # Coalesce the column information
266     visited = {}
267     columns = []
268     for col in self.left_table.columns + self.right_table.columns:
269     if col.name in visited:
270     continue
271     columns.append(col)
272     TransientTableBase.create(self, columns)
273    
274     # Copy the joined data to the table.
275     internal_names = [col.internal_name for col in self.columns]
276     stmt = "INSERT INTO %s (%s) SELECT %s FROM %s JOIN %s ON %s = %s;" \
277     % (self.tablename,
278     ", ".join(internal_names),
279     ", ".join(internal_names),
280     self.left_table.tablename,
281     self.right_table.tablename,
282     self.orig_to_internal[self.left_field],
283     self.orig_to_internal[self.right_field])
284     self.db.execute(stmt)
285    
286    
287     class AutoTransientTable:
288    
289     """Table that copies data to a transient table on demand.
290    
291     The AutoTransientTable takes another table as input and copies data
292     to a table in a TransientDatabase instance on demand.
293     """
294    
295     def __init__(self, transient_db, table):
296     self.transient_db = transient_db
297     self.table = table
298     self.t_table = None
299    
300     def record_count(self):
301     """Return the number of records"""
302     return self.table.record_count()
303    
304     def field_count(self):
305     """Return the number of fields in a record"""
306     return self.table.field_count()
307    
308     def field_info(self, field):
309     """Return a tuple (type, name, width, prec) for the field no. field
310    
311     type is the data type of the field, name the name, width the
312     field width in characters and prec the decimal precision.
313     """
314     info = self.table.field_info(field)
315     if info:
316     info = info[:2] + (0, 0)
317     return info
318    
319     def field_info_by_name(self, fieldName):
320     info = self.table.field_info_by_name(fieldName)
321     if info:
322     info = info[:2] + (0, 0)
323     return info
324    
325     def read_record(self, record):
326     """Return the record no. record as a dict mapping field names to values
327     """
328     if self.t_table is not None:
329     return self.t_table.read_record(record)
330     else:
331     return self.table.read_record(record)
332    
333     def write_record(self, record, values):
334     raise NotImplementedError
335    
336     def copy_to_transient(self):
337     """Internal: Create a transient table and copy the data into it"""
338     self.t_table = TransientTable(self.transient_db, self)
339    
340     def transient_table(self):
341     """
342     Return a table whose underlying implementation is in the transient db
343     """
344     if self.t_table is None:
345     self.copy_to_transient()
346     return self.t_table
347    
348     def field_range(self, colname):
349     if self.t_table is None:
350     self.copy_to_transient()
351     return self.t_table.field_range(colname)
352    
353     def GetUniqueValues(self, colname):
354     if self.t_table is None:
355     self.copy_to_transient()
356     return self.t_table.GetUniqueValues(colname)

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26