Thuban/Model/transientdb.py

# Copyright (C) 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with the software for details.

"""Database for transient data

This database is intended for data representations needed during the
lifetime of a Thuban session but which is not permanent. Examples of
this are for instance a join of two DBF files where the DBF files are
the permanent representation of the data and the join only exists in the
Thuban session and is reconstructed when the session is opened.
"""

__version__ = "$Revision$"
# $Source$
# $Id$

import os
import weakref
from sqlite import connect

from base import TitledObject

import table

sql_type_map = {
    table.FIELDTYPE_INT: "INTEGER",
    table.FIELDTYPE_STRING: "VARCHAR",
    table.FIELDTYPE_DOUBLE: "FLOAT",
    }

type_converter_map = {
    table.FIELDTYPE_INT: int,
    table.FIELDTYPE_STRING: str,
    table.FIELDTYPE_DOUBLE: float,
    }

class TransientDatabase:

    def __init__(self, filename):
        self.filename = filename
        self.conn = connect(filename)
        # Counters to produce unique table and column names
        self.num_tables = 0
        self.num_cols = 0
        # Since there's only once process using the SQLite database, we
        # might be able to get a tad more speed with default_synchronous
        # OFF. So far I haven't seen any measurable speedup, though.
        #self.execute("PRAGMA default_synchronous = OFF")

    def __del__(self):
        self.close()

    def close(self):
        if self.conn is not None:
            self.conn.close()
            self.conn = None

    def new_table_name(self):
        self.num_tables += 1
        return "Table%03d" % self.num_tables

    def new_column_name(self):
        self.num_cols += 1
        return "Col%03d" % self.num_cols

    def execute(self, *args):
        """execute the SQL statement in the database and return the result"""
        cursor = self.conn.cursor()
        cursor.execute(*args)
        result = cursor.fetchone()
        self.conn.commit()
        return result

    def cursor(self):
        return self.conn.cursor()


class ColumnReference:

    def __init__(self, name, type, internal_name):
        self.name = name
        self.type = type
        self.internal_name = internal_name


class TransientTableBase(table.OldTableInterfaceMixin):

    """Base class for tables in the transient database"""

    def __init__(self, transient_db):
        """Initialize the table for use with the given transient db"""
        self.db = transient_db
        self.tablename = self.db.new_table_name()
        self.indexed_columns = {}
        self.read_record_cursor = None
        self.read_record_last_row = None
        self.read_record_last_result = None

    def create(self, columns):
        self.columns = columns
        self.name_to_column = {}
        self.orig_names = []
        self.internal_to_orig = {}
        self.orig_to_internal = {}
        self.column_map = {}

        # Create the column objects and fill various maps and lists
        for index in range(len(self.columns)):
            col = self.columns[index]
            self.name_to_column[col.name] = col
            self.orig_names.append(col.name)
            self.internal_to_orig[col.internal_name] = col.name
            self.orig_to_internal[col.name] = col.internal_name
            self.column_map[col.name] = col
            self.column_map[index] = col

        # Build the CREATE TABLE statement and create the table in the
        # database
        table_types = ["id INTEGER PRIMARY KEY"]
        for col in self.columns:
            table_types.append("%s %s" % (col.internal_name,
                                          sql_type_map[col.type]))
        table_stmt = "CREATE TABLE %s (\n    %s\n);" % (self.tablename,
                                                   ",\n    ".join(table_types))
        self.db.execute(table_stmt)

    def transient_table(self):
        """
        Return a table whose underlying implementation is in the transient db
        """
        return self

    def ensure_index(self, column):
        """Ensure that there's an index on the given column"""
        if not column in self.indexed_columns:
            internal_name = self.orig_to_internal[column]
            indexname = "Index_%s_%s" % (self.tablename, internal_name)
            stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
                                                    internal_name)
            self.db.execute(stmt)
            self.indexed_columns[column] = 1

    def NumColumns(self):
        return len(self.columns)

    def NumRows(self):
        result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
        return int(result[0])

    def Columns(self):
        return self.columns

    def Column(self, col):
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def ReadRowAsDict(self, index):
        # Implementation Strategy: Executing a completely new select
        # statement every time this method is called is too slow. The
        # most important usage is to read the records more or less
        # sequentially. This happens e.g. when drawing a layer with a
        # classification where the shapes are drawn in order of the
        # shape ids. Another pattern is that the same row is requested
        # several times in a row. This happens in the table view, for
        # instance.

        # We can exploit this to make access faster by having one cursor
        # open all the time and keeping the last row read around in case
        # the same row is accessed again the next time and if the row
        # index is larger than the row we have read last we simply fetch
        # rows from the cursor until we've reached the requested row. If
        # the requested row index is smaller then we start a new cursor.

        # FIXME: So far this scheme seems to work well enough. Obvious
        # improvements would be to start the cursor at exactly the
        # requested row (should be efficient and easy to do now that the
        # id is the primary key) and to perhaps to also start a new
        # cursor if the requested index is much larger than the last row
        # so that we don't read and discard lots of the rows.

        # Check whether we have to start a new cursor
        if self.read_record_cursor is None or index <self.read_record_last_row:
            stmt = ("SELECT %s FROM %s;"
                    % (", ".join([c.internal_name for c in self.columns]),
                       self.tablename))
            self.read_record_cursor = self.db.cursor()
            self.read_record_cursor.execute(stmt)
            self.read_record_last_row = -1
            self.read_record_last_result = None

        # Now we should have a cursor at a position less than or equal
        # to the index so the following if statement will always set
        # result to a suitable value
        assert index >= self.read_record_last_row

        if index == self.read_record_last_row:
            result = self.read_record_last_result
        else:
            for i in range(index - self.read_record_last_row):
                result = self.read_record_cursor.fetchone()
                self.read_record_last_result = result
        self.read_record_last_row = index
        return dict(zip(self.orig_names, result))

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        # Depending on the actual access patterns of the table data, it
        # might be a bit faster in some circumstances to not implement
        # this via ReadRowAsDict, but this simple implementation should
        # be fast enough for most purposes.
        return self.ReadRowAsDict(row)[self.column_map[col].name]

    def ValueRange(self, col):
        col = self.column_map[col]
        iname = col.internal_name
        min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
                                   % (iname, iname, self.tablename))
        converter = type_converter_map[col.type]
        return (converter(min), converter(max))

    def UniqueValues(self, col):
        iname = self.column_map[col].internal_name
        cursor = self.db.cursor()
        cursor.execute("SELECT %s FROM %s GROUP BY %s;"
                       % (iname, self.tablename, iname))
        result = []
        while 1:
            row = cursor.fetchone()
            if row is None:
                break
            result.append(row[0])
        return result

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string representation 
        of the values in the column (represented by index or name)."""
        max = 0
        
        type  = self.column_map[col].type
        iname = self.column_map[col].internal_name
        cursor = self.db.cursor()
        cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
        values = [ i[0] for i in cursor.fetchall()]
        if not values:
            return None

        if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
            prec = self.Precision(col)
            format = "%%.%df" % prec
        elif type == sql_type_map[table.FIELDTYPE_INT]:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            if value is None: continue
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Precision(self, col):
        """Return the precision of the column

        The return value is the maximum number of numeric characters after the
        decimal if column type is double. Else precision zero is returned.
        The column can be represented by index or name.
        """
    
        type  = self.column_map[col].type
        if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
            iname = self.column_map[col].internal_name
            cursor = self.db.cursor()
            cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
            values = [ i[0] for i in cursor.fetchall()]
            if not values:
                return 0
            
            max = 0
            for value in values:
                if value is None: continue
                l = len(str(value % 1))
                if l > max:
                    max = l
            if max > 2:
                return max - 2
            else:
                return 0
        else:
            return 0

    def SimpleQuery(self, left, comparison, right):
        """Return the indices of all rows that matching a condition.

        Parameters:
           left -- The column object for the left side of the comparison

           comparison -- The comparison operator as a string. It must be
                         one of '==', '!=', '<', '<=', '>=', '>'

           right -- The right hand side of the comparison. It must be
                    either a column object or a value, i.e. a string,
                    int or float.

        The return value is a sorted list of the indices of the rows
        where the condition is true.
        """
        if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
            raise ValueError("Comparison operator %r not allowed" % comparison)

        if hasattr(right, "internal_name"):
            right_template = right.internal_name
            params = ()
        else:
            right_template = "%s"
            params = (right,)

        query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
                % (self.tablename, left.internal_name, comparison,
                   right_template)

        cursor = self.db.cursor()
        cursor.execute(query, params)
        result = []
        while 1:
            row = cursor.fetchone()
            if row is None:
                break
            result.append(row[0])
        return result

    def Dependencies(self):
        """Placeholder for a method in a derived class.

        Return a sequence with the tables and other data objects that
        self depends on.
        """
        raise NotImplementedError


class TransientTable(TitledObject, TransientTableBase):

    """A Table in a transient DB that starts as the copy of a Thuban Table."""

    def __init__(self, transient_db, table):
        """Create a new table in the given transient DB as a copy of table

        The table argument can be any object implementing the Table
        interface.
        """
        TransientTableBase.__init__(self, transient_db)
        TitledObject.__init__(self, table.Title())
        self.create(table)

    def create(self, table):
        columns = []
        for col in table.Columns():
            columns.append(ColumnReference(col.name, col.type,
                                           self.db.new_column_name()))
        TransientTableBase.create(self, columns)

        # copy the input table to the transient db

        # A key to insert to use for the formatting of the insert
        # statement. The key must not be equal to any of the column
        # names so we construct one by building a string of x's that is
        # longer than any of the column names
        id_key = max([len(col.name) for col in self.columns]) * "x"

        insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
                               % (self.tablename,
                                  ", ".join([col.internal_name
                                             for col in self.columns]),
                                  id_key,
                                  ", ".join(["%%(%s)s" % col.name
                                             for col in self.columns]))
        cursor = self.db.cursor()
        for i in range(table.NumRows()):
            row = table.ReadRowAsDict(i)
            row[id_key] = i
            cursor.execute(insert_template, row)
        self.db.conn.commit()


class TransientJoinedTable(TitledObject, TransientTableBase):

    """A Table in the transient DB that contains a join of two tables"""

    def __init__(self, transient_db, left_table, left_field,
                 right_table, right_field = None, outer_join = False):
        """Create a new table in the transient DB as a join of two tables.

        Both input tables, left_table and right_table must have a
        transient_table method that returns a table object for a table
        in the transient database. The join is performed on the condition
        that the value of the left_field column the the left table is
        equal to the value of the right_field in the right_table.

        The joined table contains all columns of the input tables with
        one exception: Any column in the right_table with the same name
        as one of the columns in the left_table will be omitted. This is
        somewhat of an implementation detail, but is done so that the
        column names of the joined table can be the same as the column
        names of the input tables without having to create prefixes.
        """
        TransientTableBase.__init__(self, transient_db)
        self.dependencies = (left_table, right_table)
        self.left_table = left_table.transient_table()
        self.left_field = left_field
        self.right_table = right_table.transient_table()
        if right_field:
            self.right_field = right_field
        else:
            self.right_field = self.left_field
        self.outer_join = outer_join

        title = "Join of %(left)s and %(right)s" \
                % {"left": self.left_table.Title(),
                   "right": self.right_table.Title()}
        TitledObject.__init__(self, title)

        self.create()

    def create(self):
        """Internal: Create the table with the joined data"""
        self.tablename = self.db.new_table_name()

        self.right_table.ensure_index(self.right_field)

        # Coalesce the column information
        visited = {}
        columns = []
        for col in self.left_table.columns + self.right_table.columns:
            if col.name in visited:
                # We can't allow multiple columns with the same original
                # name, so omit this one. FIXME: There should be a
                # better solution.
                continue
            columns.append(col)
        TransientTableBase.create(self, columns)

        # Copy the joined data to the table.
        internal_names = [col.internal_name for col in self.columns]
        if self.outer_join:
            join_operator = 'LEFT OUTER JOIN'
        else:
            join_operator = 'JOIN'
        stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
                " %s %s ON %s = %s;"
                % (self.tablename,
                   ", ".join(internal_names),
                   self.left_table.tablename,
                   ", ".join(internal_names),
                   self.left_table.tablename,
                   join_operator,
                   self.right_table.tablename,
                   self.orig_to_internal[self.left_field],
                   self.orig_to_internal[self.right_field]))
        self.db.execute(stmt)

    def Dependencies(self):
        """Return a tuple with the two tables the join depends on."""
        return self.dependencies


class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):

    """Table that copies data to a transient table on demand.

    The AutoTransientTable takes another table as input and copies data
    to a table in a TransientDatabase instance on demand.
    """

    def __init__(self, transient_db, table):
        TitledObject.__init__(self, table.Title())
        self.transient_db = transient_db
        self.table = table
        self.t_table = None

    def Columns(self):
        return self.table.Columns()

    def Column(self, col):
        return self.table.Column(col)

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.table.HasColumn(col)

    def NumRows(self):
        return self.table.NumRows()

    def NumColumns(self):
        return self.table.NumColumns()

    def ReadRowAsDict(self, record):
        """Return the record no. record as a dict mapping field names to values
        """
        if self.t_table is not None:
            return self.t_table.ReadRowAsDict(record)
        else:
            return self.table.ReadRowAsDict(record)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        if self.t_table is not None:
            return self.t_table.ReadValue(row, col)
        else:
            return self.table.ReadValue(row, col)

    def copy_to_transient(self):
        """Internal: Create a transient table and copy the data into it"""
        self.t_table = TransientTable(self.transient_db, self)

    def transient_table(self):
        """
        Return a table whose underlying implementation is in the transient db
        """
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table

    def ValueRange(self, col):
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table.ValueRange(col)

    def UniqueValues(self, col):
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table.UniqueValues(col)

    def SimpleQuery(self, left, comparison, right):
        if self.t_table is None:
            self.copy_to_transient()
        # Make sure to use the column object of the transient table. The
        # left argument is always a column object so we can just ask the
        # t_table for the right object.
        if hasattr(right, "name"):
            return self.t_table.SimpleQuery(self.t_table.Column(left.name),
                                            comparison, 
                                            self.t_table.Column(right.name))
        else:
            return self.t_table.SimpleQuery(self.t_table.Column(left.name),
                                            comparison, right)

    def Dependencies(self):
        """Return a tuple containing the original table"""
        return (self.table,)

1	# Copyright (C) 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	#
5	# This program is free software under the GPL (>=v2)
6	# Read the file COPYING coming with the software for details.
7
8	"""Database for transient data
9
10	This database is intended for data representations needed during the
11	lifetime of a Thuban session but which is not permanent. Examples of
12	this are for instance a join of two DBF files where the DBF files are
13	the permanent representation of the data and the join only exists in the
14	Thuban session and is reconstructed when the session is opened.
15	"""
16
17	__version__ = "$Revision$"
18	# $Source$
19	# $Id$
20
21	import os
22	import weakref
23	from sqlite import connect
24
25	from base import TitledObject
26
27	import table
28
29	sql_type_map = {
30	table.FIELDTYPE_INT: "INTEGER",
31	table.FIELDTYPE_STRING: "VARCHAR",
32	table.FIELDTYPE_DOUBLE: "FLOAT",
33	}
34
35	type_converter_map = {
36	table.FIELDTYPE_INT: int,
37	table.FIELDTYPE_STRING: str,
38	table.FIELDTYPE_DOUBLE: float,
39	}
40
41	class TransientDatabase:
42
43	def __init__(self, filename):
44	self.filename = filename
45	self.conn = connect(filename)
46	# Counters to produce unique table and column names
47	self.num_tables = 0
48	self.num_cols = 0
49	# Since there's only once process using the SQLite database, we
50	# might be able to get a tad more speed with default_synchronous
51	# OFF. So far I haven't seen any measurable speedup, though.
52	#self.execute("PRAGMA default_synchronous = OFF")
53
54	def __del__(self):
55	self.close()
56
57	def close(self):
58	if self.conn is not None:
59	self.conn.close()
60	self.conn = None
61
62	def new_table_name(self):
63	self.num_tables += 1
64	return "Table%03d" % self.num_tables
65
66	def new_column_name(self):
67	self.num_cols += 1
68	return "Col%03d" % self.num_cols
69
70	def execute(self, *args):
71	"""execute the SQL statement in the database and return the result"""
72	cursor = self.conn.cursor()
73	cursor.execute(*args)
74	result = cursor.fetchone()
75	self.conn.commit()
76	return result
77
78	def cursor(self):
79	return self.conn.cursor()
80
81
82	class ColumnReference:
83
84	def __init__(self, name, type, internal_name):
85	self.name = name
86	self.type = type
87	self.internal_name = internal_name
88
89
90	class TransientTableBase(table.OldTableInterfaceMixin):
91
92	"""Base class for tables in the transient database"""
93
94	def __init__(self, transient_db):
95	"""Initialize the table for use with the given transient db"""
96	self.db = transient_db
97	self.tablename = self.db.new_table_name()
98	self.indexed_columns = {}
99	self.read_record_cursor = None
100	self.read_record_last_row = None
101	self.read_record_last_result = None
102
103	def create(self, columns):
104	self.columns = columns
105	self.name_to_column = {}
106	self.orig_names = []
107	self.internal_to_orig = {}
108	self.orig_to_internal = {}
109	self.column_map = {}
110
111	# Create the column objects and fill various maps and lists
112	for index in range(len(self.columns)):
113	col = self.columns[index]
114	self.name_to_column[col.name] = col
115	self.orig_names.append(col.name)
116	self.internal_to_orig[col.internal_name] = col.name
117	self.orig_to_internal[col.name] = col.internal_name
118	self.column_map[col.name] = col
119	self.column_map[index] = col
120
121	# Build the CREATE TABLE statement and create the table in the
122	# database
123	table_types = ["id INTEGER PRIMARY KEY"]
124	for col in self.columns:
125	table_types.append("%s %s" % (col.internal_name,
126	sql_type_map[col.type]))
127	table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
128	",\n ".join(table_types))
129	self.db.execute(table_stmt)
130
131	def transient_table(self):
132	"""
133	Return a table whose underlying implementation is in the transient db
134	"""
135	return self
136
137	def ensure_index(self, column):
138	"""Ensure that there's an index on the given column"""
139	if not column in self.indexed_columns:
140	internal_name = self.orig_to_internal[column]
141	indexname = "Index_%s_%s" % (self.tablename, internal_name)
142	stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
143	internal_name)
144	self.db.execute(stmt)
145	self.indexed_columns[column] = 1
146
147	def NumColumns(self):
148	return len(self.columns)
149
150	def NumRows(self):
151	result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152	return int(result[0])
153
154	def Columns(self):
155	return self.columns
156
157	def Column(self, col):
158	return self.column_map[col]
159
160	def HasColumn(self, col):
161	"""Return whether the table has a column with the given name or index
162	"""
163	return self.column_map.has_key(col)
164
165	def ReadRowAsDict(self, index):
166	# Implementation Strategy: Executing a completely new select
167	# statement every time this method is called is too slow. The
168	# most important usage is to read the records more or less
169	# sequentially. This happens e.g. when drawing a layer with a
170	# classification where the shapes are drawn in order of the
171	# shape ids. Another pattern is that the same row is requested
172	# several times in a row. This happens in the table view, for
173	# instance.
174
175	# We can exploit this to make access faster by having one cursor
176	# open all the time and keeping the last row read around in case
177	# the same row is accessed again the next time and if the row
178	# index is larger than the row we have read last we simply fetch
179	# rows from the cursor until we've reached the requested row. If
180	# the requested row index is smaller then we start a new cursor.
181
182	# FIXME: So far this scheme seems to work well enough. Obvious
183	# improvements would be to start the cursor at exactly the
184	# requested row (should be efficient and easy to do now that the
185	# id is the primary key) and to perhaps to also start a new
186	# cursor if the requested index is much larger than the last row
187	# so that we don't read and discard lots of the rows.
188
189	# Check whether we have to start a new cursor
190	if self.read_record_cursor is None or index <self.read_record_last_row:
191	stmt = ("SELECT %s FROM %s;"
192	% (", ".join([c.internal_name for c in self.columns]),
193	self.tablename))
194	self.read_record_cursor = self.db.cursor()
195	self.read_record_cursor.execute(stmt)
196	self.read_record_last_row = -1
197	self.read_record_last_result = None
198
199	# Now we should have a cursor at a position less than or equal
200	# to the index so the following if statement will always set
201	# result to a suitable value
202	assert index >= self.read_record_last_row
203
204	if index == self.read_record_last_row:
205	result = self.read_record_last_result
206	else:
207	for i in range(index - self.read_record_last_row):
208	result = self.read_record_cursor.fetchone()
209	self.read_record_last_result = result
210	self.read_record_last_row = index
211	return dict(zip(self.orig_names, result))
212
213	def ReadValue(self, row, col):
214	"""Return the value of the specified row and column
215
216	The col parameter may be the index of the column or its name.
217	"""
218	# Depending on the actual access patterns of the table data, it
219	# might be a bit faster in some circumstances to not implement
220	# this via ReadRowAsDict, but this simple implementation should
221	# be fast enough for most purposes.
222	return self.ReadRowAsDict(row)[self.column_map[col].name]
223
224	def ValueRange(self, col):
225	col = self.column_map[col]
226	iname = col.internal_name
227	min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228	% (iname, iname, self.tablename))
229	converter = type_converter_map[col.type]
230	return (converter(min), converter(max))
231
232	def UniqueValues(self, col):
233	iname = self.column_map[col].internal_name
234	cursor = self.db.cursor()
235	cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236	% (iname, self.tablename, iname))
237	result = []
238	while 1:
239	row = cursor.fetchone()
240	if row is None:
241	break
242	result.append(row[0])
243	return result
244
245	def Width(self, col):
246	"""Return the maximum width of values in the column
247
248	The return value is the the maximum length of string representation
249	of the values in the column (represented by index or name)."""
250	max = 0
251
252	type = self.column_map[col].type
253	iname = self.column_map[col].internal_name
254	cursor = self.db.cursor()
255	cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256	values = [ i[0] for i in cursor.fetchall()]
257	if not values:
258	return None
259
260	if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
261	prec = self.Precision(col)
262	format = "%%.%df" % prec
263	elif type == sql_type_map[table.FIELDTYPE_INT]:
264	format = "%d"
265	else:
266	format = "%s"
267	for value in values:
268	if value is None: continue
269	l = len(format % value)
270	if l > max:
271	max = l
272
273	return max
274
275	def Precision(self, col):
276	"""Return the precision of the column
277
278	The return value is the maximum number of numeric characters after the
279	decimal if column type is double. Else precision zero is returned.
280	The column can be represented by index or name.
281	"""
282
283	type = self.column_map[col].type
284	if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
285	iname = self.column_map[col].internal_name
286	cursor = self.db.cursor()
287	cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
288	values = [ i[0] for i in cursor.fetchall()]
289	if not values:
290	return 0
291
292	max = 0
293	for value in values:
294	if value is None: continue
295	l = len(str(value % 1))
296	if l > max:
297	max = l
298	if max > 2:
299	return max - 2
300	else:
301	return 0
302	else:
303	return 0
304
305	def SimpleQuery(self, left, comparison, right):
306	"""Return the indices of all rows that matching a condition.
307
308	Parameters:
309	left -- The column object for the left side of the comparison
310
311	comparison -- The comparison operator as a string. It must be
312	one of '==', '!=', '<', '<=', '>=', '>'
313
314	right -- The right hand side of the comparison. It must be
315	either a column object or a value, i.e. a string,
316	int or float.
317
318	The return value is a sorted list of the indices of the rows
319	where the condition is true.
320	"""
321	if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
322	raise ValueError("Comparison operator %r not allowed" % comparison)
323
324	if hasattr(right, "internal_name"):
325	right_template = right.internal_name
326	params = ()
327	else:
328	right_template = "%s"
329	params = (right,)
330
331	query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
332	% (self.tablename, left.internal_name, comparison,
333	right_template)
334
335	cursor = self.db.cursor()
336	cursor.execute(query, params)
337	result = []
338	while 1:
339	row = cursor.fetchone()
340	if row is None:
341	break
342	result.append(row[0])
343	return result
344
345	def Dependencies(self):
346	"""Placeholder for a method in a derived class.
347
348	Return a sequence with the tables and other data objects that
349	self depends on.
350	"""
351	raise NotImplementedError
352
353
354	class TransientTable(TitledObject, TransientTableBase):
355
356	"""A Table in a transient DB that starts as the copy of a Thuban Table."""
357
358	def __init__(self, transient_db, table):
359	"""Create a new table in the given transient DB as a copy of table
360
361	The table argument can be any object implementing the Table
362	interface.
363	"""
364	TransientTableBase.__init__(self, transient_db)
365	TitledObject.__init__(self, table.Title())
366	self.create(table)
367
368	def create(self, table):
369	columns = []
370	for col in table.Columns():
371	columns.append(ColumnReference(col.name, col.type,
372	self.db.new_column_name()))
373	TransientTableBase.create(self, columns)
374
375	# copy the input table to the transient db
376
377	# A key to insert to use for the formatting of the insert
378	# statement. The key must not be equal to any of the column
379	# names so we construct one by building a string of x's that is
380	# longer than any of the column names
381	id_key = max([len(col.name) for col in self.columns]) * "x"
382
383	insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
384	% (self.tablename,
385	", ".join([col.internal_name
386	for col in self.columns]),
387	id_key,
388	", ".join(["%%(%s)s" % col.name
389	for col in self.columns]))
390	cursor = self.db.cursor()
391	for i in range(table.NumRows()):
392	row = table.ReadRowAsDict(i)
393	row[id_key] = i
394	cursor.execute(insert_template, row)
395	self.db.conn.commit()
396
397
398
399	class TransientJoinedTable(TitledObject, TransientTableBase):
400
401	"""A Table in the transient DB that contains a join of two tables"""
402
403	def __init__(self, transient_db, left_table, left_field,
404	right_table, right_field = None, outer_join = False):
405	"""Create a new table in the transient DB as a join of two tables.
406
407	Both input tables, left_table and right_table must have a
408	transient_table method that returns a table object for a table
409	in the transient database. The join is performed on the condition
410	that the value of the left_field column the the left table is
411	equal to the value of the right_field in the right_table.
412
413	The joined table contains all columns of the input tables with
414	one exception: Any column in the right_table with the same name
415	as one of the columns in the left_table will be omitted. This is
416	somewhat of an implementation detail, but is done so that the
417	column names of the joined table can be the same as the column
418	names of the input tables without having to create prefixes.
419	"""
420	TransientTableBase.__init__(self, transient_db)
421	self.dependencies = (left_table, right_table)
422	self.left_table = left_table.transient_table()
423	self.left_field = left_field
424	self.right_table = right_table.transient_table()
425	if right_field:
426	self.right_field = right_field
427	else:
428	self.right_field = self.left_field
429	self.outer_join = outer_join
430
431	title = "Join of %(left)s and %(right)s" \
432	% {"left": self.left_table.Title(),
433	"right": self.right_table.Title()}
434	TitledObject.__init__(self, title)
435
436	self.create()
437
438	def create(self):
439	"""Internal: Create the table with the joined data"""
440	self.tablename = self.db.new_table_name()
441
442	self.right_table.ensure_index(self.right_field)
443
444	# Coalesce the column information
445	visited = {}
446	columns = []
447	for col in self.left_table.columns + self.right_table.columns:
448	if col.name in visited:
449	# We can't allow multiple columns with the same original
450	# name, so omit this one. FIXME: There should be a
451	# better solution.
452	continue
453	columns.append(col)
454	TransientTableBase.create(self, columns)
455
456	# Copy the joined data to the table.
457	internal_names = [col.internal_name for col in self.columns]
458	if self.outer_join:
459	join_operator = 'LEFT OUTER JOIN'
460	else:
461	join_operator = 'JOIN'
462	stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
463	" %s %s ON %s = %s;"
464	% (self.tablename,
465	", ".join(internal_names),
466	self.left_table.tablename,
467	", ".join(internal_names),
468	self.left_table.tablename,
469	join_operator,
470	self.right_table.tablename,
471	self.orig_to_internal[self.left_field],
472	self.orig_to_internal[self.right_field]))
473	self.db.execute(stmt)
474
475	def Dependencies(self):
476	"""Return a tuple with the two tables the join depends on."""
477	return self.dependencies
478
479
480	class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
481
482	"""Table that copies data to a transient table on demand.
483
484	The AutoTransientTable takes another table as input and copies data
485	to a table in a TransientDatabase instance on demand.
486	"""
487
488	def __init__(self, transient_db, table):
489	TitledObject.__init__(self, table.Title())
490	self.transient_db = transient_db
491	self.table = table
492	self.t_table = None
493
494	def Columns(self):
495	return self.table.Columns()
496
497	def Column(self, col):
498	return self.table.Column(col)
499
500	def HasColumn(self, col):
501	"""Return whether the table has a column with the given name or index
502	"""
503	return self.table.HasColumn(col)
504
505	def NumRows(self):
506	return self.table.NumRows()
507
508	def NumColumns(self):
509	return self.table.NumColumns()
510
511	def ReadRowAsDict(self, record):
512	"""Return the record no. record as a dict mapping field names to values
513	"""
514	if self.t_table is not None:
515	return self.t_table.ReadRowAsDict(record)
516	else:
517	return self.table.ReadRowAsDict(record)
518
519	def ReadValue(self, row, col):
520	"""Return the value of the specified row and column
521
522	The col parameter may be the index of the column or its name.
523	"""
524	if self.t_table is not None:
525	return self.t_table.ReadValue(row, col)
526	else:
527	return self.table.ReadValue(row, col)
528
529	def copy_to_transient(self):
530	"""Internal: Create a transient table and copy the data into it"""
531	self.t_table = TransientTable(self.transient_db, self)
532
533	def transient_table(self):
534	"""
535	Return a table whose underlying implementation is in the transient db
536	"""
537	if self.t_table is None:
538	self.copy_to_transient()
539	return self.t_table
540
541	def ValueRange(self, col):
542	if self.t_table is None:
543	self.copy_to_transient()
544	return self.t_table.ValueRange(col)
545
546	def UniqueValues(self, col):
547	if self.t_table is None:
548	self.copy_to_transient()
549	return self.t_table.UniqueValues(col)
550
551	def SimpleQuery(self, left, comparison, right):
552	if self.t_table is None:
553	self.copy_to_transient()
554	# Make sure to use the column object of the transient table. The
555	# left argument is always a column object so we can just ask the
556	# t_table for the right object.
557	if hasattr(right, "name"):
558	return self.t_table.SimpleQuery(self.t_table.Column(left.name),
559	comparison,
560	self.t_table.Column(right.name))
561	else:
562	return self.t_table.SimpleQuery(self.t_table.Column(left.name),
563	comparison, right)
564
565	def Dependencies(self):
566	"""Return a tuple containing the original table"""
567	return (self.table,)
568
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision