Thuban/Model/transientdb.py

# Copyright (C) 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with the software for details.

"""Database for transient data

This database is intended for data representations needed during the
lifetime of a Thuban session but which is not permanent. Examples of
this are for instance a join of two DBF files where the DBF files are
the permanent representation of the data and the join only exists in the
Thuban session and is reconstructed when the session is opened.
"""

__version__ = "$Revision$"
# $Source$
# $Id$

import os
import weakref
from sqlite import connect

from base import TitledObject

import table

sql_type_map = {
    table.FIELDTYPE_INT: "INTEGER",
    table.FIELDTYPE_STRING: "VARCHAR",
    table.FIELDTYPE_DOUBLE: "FLOAT",
    }

type_converter_map = {
    table.FIELDTYPE_INT: int,
    table.FIELDTYPE_STRING: str,
    table.FIELDTYPE_DOUBLE: float,
    }

class TransientDatabase:

    def __init__(self, filename):
        self.filename = filename
        self.conn = connect(filename)
        # Counters to produce unique table and column names
        self.num_tables = 0
        self.num_cols = 0
        # Since there's only once process using the SQLite database, we
        # might be able to get a tad more speed with default_synchronous
        # OFF. So far I haven't seen any measurable speedup, though.
        #self.execute("PRAGMA default_synchronous = OFF")

    def __del__(self):
        self.close()

    def close(self):
        if self.conn is not None:
            self.conn.close()
            self.conn = None

    def new_table_name(self):
        self.num_tables += 1
        return "Table%03d" % self.num_tables

    def new_column_name(self):
        self.num_cols += 1
        return "Col%03d" % self.num_cols

    def execute(self, *args):
        """execute the SQL statement in the database and return the result"""
        cursor = self.conn.cursor()
        cursor.execute(*args)
        result = cursor.fetchone()
        self.conn.commit()
        return result

    def cursor(self):
        return self.conn.cursor()


class ColumnReference:

    def __init__(self, name, type, internal_name):
        self.name = name
        self.type = type
        self.internal_name = internal_name


class TransientTableBase(table.OldTableInterfaceMixin):

    """Base class for tables in the transient database"""

    def __init__(self, transient_db):
        """Initialize the table for use with the given transient db"""
        self.db = transient_db
        self.tablename = self.db.new_table_name()
        self.indexed_columns = {}
        self.read_record_cursor = None
        self.read_record_last_row = None
        self.read_record_last_result = None

    def create(self, columns):
        self.columns = columns
        self.name_to_column = {}
        self.orig_names = []
        self.internal_to_orig = {}
        self.orig_to_internal = {}
        self.column_map = {}

        # Create the column objects and fill various maps and lists
        for index in range(len(self.columns)):
            col = self.columns[index]
            self.name_to_column[col.name] = col
            self.orig_names.append(col.name)
            self.internal_to_orig[col.internal_name] = col.name
            self.orig_to_internal[col.name] = col.internal_name
            self.column_map[col.name] = col
            self.column_map[index] = col

        # Build the CREATE TABLE statement and create the table in the
        # database
        table_types = ["id INTEGER PRIMARY KEY"]
        for col in self.columns:
            table_types.append("%s %s" % (col.internal_name,
                                          sql_type_map[col.type]))
        table_stmt = "CREATE TABLE %s (\n    %s\n);" % (self.tablename,
                                                   ",\n    ".join(table_types))
        self.db.execute(table_stmt)

    def transient_table(self):
        """
        Return a table whose underlying implementation is in the transient db
        """
        return self

    def ensure_index(self, column):
        """Ensure that there's an index on the given column"""
        if not column in self.indexed_columns:
            internal_name = self.orig_to_internal[column]
            indexname = "Index_%s_%s" % (self.tablename, internal_name)
            stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
                                                    internal_name)
            self.db.execute(stmt)
            self.indexed_columns[column] = 1

    def NumColumns(self):
        return len(self.columns)

    def NumRows(self):
        result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
        return int(result[0])

    def Columns(self):
        return self.columns

    def Column(self, col):
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def ReadRowAsDict(self, index):
        # Implementation Strategy: Executing a completely new select
        # statement every time this method is called is too slow. The
        # most important usage is to read the records more or less
        # sequentially. This happens e.g. when drawing a layer with a
        # classification where the shapes are drawn in order of the
        # shape ids. Another pattern is that the same row is requested
        # several times in a row. This happens in the table view, for
        # instance.

        # We can exploit this to make access faster by having one cursor
        # open all the time and keeping the last row read around in case
        # the same row is accessed again the next time and if the row
        # index is larger than the row we have read last we simply fetch
        # rows from the cursor until we've reached the requested row. If
        # the requested row index is smaller then we start a new cursor.

        # FIXME: So far this scheme seems to work well enough. Obvious
        # improvements would be to start the cursor at exactly the
        # requested row (should be efficient and easy to do now that the
        # id is the primary key) and to perhaps to also start a new
        # cursor if the requested index is much larger than the last row
        # so that we don't read and discard lots of the rows.

        # Check whether we have to start a new cursor
        if self.read_record_cursor is None or index <self.read_record_last_row:
            stmt = ("SELECT %s FROM %s;"
                    % (", ".join([c.internal_name for c in self.columns]),
                       self.tablename))
            self.read_record_cursor = self.db.cursor()
            self.read_record_cursor.execute(stmt)
            self.read_record_last_row = -1
            self.read_record_last_result = None

        # Now we should have a cursor at a position less than or equal
        # to the index so the following if statement will always set
        # result to a suitable value
        assert index >= self.read_record_last_row

        if index == self.read_record_last_row:
            result = self.read_record_last_result
        else:
            for i in range(index - self.read_record_last_row):
                result = self.read_record_cursor.fetchone()
                self.read_record_last_result = result
        self.read_record_last_row = index
        return dict(zip(self.orig_names, result))

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        # Depending on the actual access patterns of the table data, it
        # might be a bit faster in some circumstances to not implement
        # this via ReadRowAsDict, but this simple implementation should
        # be fast enough for most purposes.
        return self.ReadRowAsDict(row)[self.column_map[col].name]

    def ValueRange(self, col):
        col = self.column_map[col]
        iname = col.internal_name
        min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
                                   % (iname, iname, self.tablename))
        converter = type_converter_map[col.type]
        return (converter(min), converter(max))

    def UniqueValues(self, col):
        iname = self.column_map[col].internal_name
        cursor = self.db.cursor()
        cursor.execute("SELECT %s FROM %s GROUP BY %s;"
                       % (iname, self.tablename, iname))
        result = []
        while 1:
            row = cursor.fetchone()
            if row is None:
                break
            result.append(row[0])
        return result

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string representation 
        of the values in the column (represented by index or name)."""
        max = 0
        
        type  = self.column_map[col].type
        iname = self.column_map[col].internal_name
        cursor = self.db.cursor()
        cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
        values = [ i[0] for i in cursor.fetchall()]
        if not values:
            return None

        if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
            format = "%.12f"
        elif type == sql_type_map[table.FIELDTYPE_INT]:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            if value is None: continue
            l = len(format % value)
            if l > max:
                max = l

        return max

    def SimpleQuery(self, left, comparison, right):
        """Return the indices of all rows that matching a condition.

        Parameters:
           left -- The column object for the left side of the comparison

           comparison -- The comparison operator as a string. It must be
                         one of '==', '!=', '<', '<=', '>=', '>'

           right -- The right hand side of the comparison. It must be
                    either a column object or a value, i.e. a string,
                    int or float.

        The return value is a sorted list of the indices of the rows
        where the condition is true.
        """
        if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
            raise ValueError("Comparison operator %r not allowed" % comparison)

        if hasattr(right, "internal_name"):
            right_template = right.internal_name
            params = ()
        else:
            right_template = "%s"
            params = (right,)

        query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
                % (self.tablename, left.internal_name, comparison,
                   right_template)

        cursor = self.db.cursor()
        cursor.execute(query, params)
        result = []
        while 1:
            row = cursor.fetchone()
            if row is None:
                break
            result.append(row[0])
        return result

    def Dependencies(self):
        """Placeholder for a method in a derived class.

        Return a sequence with the tables and other data objects that
        self depends on.
        """
        raise NotImplementedError


class TransientTable(TitledObject, TransientTableBase):

    """A Table in a transient DB that starts as the copy of a Thuban Table."""

    def __init__(self, transient_db, table):
        """Create a new table in the given transient DB as a copy of table

        The table argument can be any object implementing the Table
        interface.
        """
        TransientTableBase.__init__(self, transient_db)
        TitledObject.__init__(self, table.Title())
        self.create(table)

    def create(self, table):
        columns = []
        for col in table.Columns():
            columns.append(ColumnReference(col.name, col.type,
                                           self.db.new_column_name()))
        TransientTableBase.create(self, columns)

        # copy the input table to the transient db

        # A key to insert to use for the formatting of the insert
        # statement. The key must not be equal to any of the column
        # names so we construct one by building a string of x's that is
        # longer than any of the column names
        id_key = max([len(col.name) for col in self.columns]) * "x"

        insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
                               % (self.tablename,
                                  ", ".join([col.internal_name
                                             for col in self.columns]),
                                  id_key,
                                  ", ".join(["%%(%s)s" % col.name
                                             for col in self.columns]))
        cursor = self.db.cursor()
        for i in range(table.NumRows()):
            row = table.ReadRowAsDict(i)
            row[id_key] = i
            cursor.execute(insert_template, row)
        self.db.conn.commit()


class TransientJoinedTable(TitledObject, TransientTableBase):

    """A Table in the transient DB that contains a join of two tables"""

    def __init__(self, transient_db, left_table, left_field,
                 right_table, right_field = None, outer_join = False):
        """Create a new table in the transient DB as a join of two tables.

        Both input tables, left_table and right_table must have a
        transient_table method that returns a table object for a table
        in the transient database. The join is performed on the condition
        that the value of the left_field column the the left table is
        equal to the value of the right_field in the right_table.

        The joined table contains all columns of the input tables with
        one exception: Any column in the right_table with the same name
        as one of the columns in the left_table will be omitted. This is
        somewhat of an implementation detail, but is done so that the
        column names of the joined table can be the same as the column
        names of the input tables without having to create prefixes.
        """
        TransientTableBase.__init__(self, transient_db)
        self.dependencies = (left_table, right_table)
        self.left_table = left_table.transient_table()
        self.left_field = left_field
        self.right_table = right_table.transient_table()
        if right_field:
            self.right_field = right_field
        else:
            self.right_field = self.left_field
        self.outer_join = outer_join

        title = "Join of %(left)s and %(right)s" \
                % {"left": self.left_table.Title(),
                   "right": self.right_table.Title()}
        TitledObject.__init__(self, title)

        self.create()

    def create(self):
        """Internal: Create the table with the joined data"""
        self.tablename = self.db.new_table_name()

        self.right_table.ensure_index(self.right_field)

        # determine the internal column names to join on before
        # coalescing the column information because if the external
        # column names are the same they will be mapped to the same
        # internal name afterwards.
        internal_left_col = self.left_table.orig_to_internal[self.left_field]
        internal_right_col = self.right_table.orig_to_internal[self.right_field]

        # Coalesce the column information
        visited = {}
        columns = []
        newcolumns = []
        for table, col in (
            [ (self.left_table.tablename, c) for c in self.left_table.columns ] 
            + [ (self.right_table.tablename, c) for c in self.right_table.columns]):
            if col.name in visited:
                if col.name == self.left_field:
                    continue
                else:
                    # We can't allow multiple columns with the same original
                    # name, so append '_' to this one until it is unique. 
                    # FIXME: There should be a better solution. 
                    while col.name in visited:
                        col.name = col.name + '_'
            columns.append((table, col))
            newcol = ColumnReference(col.name, col.type,
                                        "Col%03d" % (len(newcolumns)+1))
            newcolumns.append(newcol)
            visited[col.name] = 1
        TransientTableBase.create(self, newcolumns)

        # Copy the joined data to the table.
        newinternal_names = [col.internal_name for col in self.columns]
        internal_references = ["%s.%s" % (table, col.internal_name) 
                                                    for table, col in columns]
        if self.outer_join:
            join_operator = 'LEFT OUTER JOIN'
        else:
            join_operator = 'JOIN'
        stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
                " %s %s ON %s.%s = %s.%s;"
                % (self.tablename,
                   ", ".join(newinternal_names),
                   self.left_table.tablename,
                   ", ".join(internal_references),
                   self.left_table.tablename,
                   join_operator,
                   self.right_table.tablename,
                   self.left_table.tablename,
                   internal_left_col,
                   self.right_table.tablename,
                   internal_right_col))
        self.db.execute(stmt)

    def Dependencies(self):
        """Return a tuple with the two tables the join depends on."""
        return self.dependencies


class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):

    """Table that copies data to a transient table on demand.

    The AutoTransientTable takes another table as input and copies data
    to a table in a TransientDatabase instance on demand.
    """

    def __init__(self, transient_db, table):
        TitledObject.__init__(self, table.Title())
        self.transient_db = transient_db
        self.table = table
        self.t_table = None

    def Columns(self):
        return self.table.Columns()

    def Column(self, col):
        return self.table.Column(col)

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.table.HasColumn(col)

    def NumRows(self):
        return self.table.NumRows()

    def NumColumns(self):
        return self.table.NumColumns()

    def ReadRowAsDict(self, record):
        """Return the record no. record as a dict mapping field names to values
        """
        if self.t_table is not None:
            return self.t_table.ReadRowAsDict(record)
        else:
            return self.table.ReadRowAsDict(record)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        if self.t_table is not None:
            return self.t_table.ReadValue(row, col)
        else:
            return self.table.ReadValue(row, col)

    def copy_to_transient(self):
        """Internal: Create a transient table and copy the data into it"""
        self.t_table = TransientTable(self.transient_db, self)

    def transient_table(self):
        """
        Return a table whose underlying implementation is in the transient db
        """
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table

    def ValueRange(self, col):
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table.ValueRange(col)

    def UniqueValues(self, col):
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table.UniqueValues(col)

    def SimpleQuery(self, left, comparison, right):
        if self.t_table is None:
            self.copy_to_transient()
        # Make sure to use the column object of the transient table. The
        # left argument is always a column object so we can just ask the
        # t_table for the right object.
        if hasattr(right, "name"):
            return self.t_table.SimpleQuery(self.t_table.Column(left.name),
                                            comparison, 
                                            self.t_table.Column(right.name))
        else:
            return self.t_table.SimpleQuery(self.t_table.Column(left.name),
                                            comparison, right)

    def Dependencies(self):
        """Return a tuple containing the original table"""
        return (self.table,)

    def Width(self, col):
        return self.table.Width(col)
1	# Copyright (C) 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	#
5	# This program is free software under the GPL (>=v2)
6	# Read the file COPYING coming with the software for details.
7
8	"""Database for transient data
9
10	This database is intended for data representations needed during the
11	lifetime of a Thuban session but which is not permanent. Examples of
12	this are for instance a join of two DBF files where the DBF files are
13	the permanent representation of the data and the join only exists in the
14	Thuban session and is reconstructed when the session is opened.
15	"""
16
17	__version__ = "$Revision$"
18	# $Source$
19	# $Id$
20
21	import os
22	import weakref
23	from sqlite import connect
24
25	from base import TitledObject
26
27	import table
28
29	sql_type_map = {
30	table.FIELDTYPE_INT: "INTEGER",
31	table.FIELDTYPE_STRING: "VARCHAR",
32	table.FIELDTYPE_DOUBLE: "FLOAT",
33	}
34
35	type_converter_map = {
36	table.FIELDTYPE_INT: int,
37	table.FIELDTYPE_STRING: str,
38	table.FIELDTYPE_DOUBLE: float,
39	}
40
41	class TransientDatabase:
42
43	def __init__(self, filename):
44	self.filename = filename
45	self.conn = connect(filename)
46	# Counters to produce unique table and column names
47	self.num_tables = 0
48	self.num_cols = 0
49	# Since there's only once process using the SQLite database, we
50	# might be able to get a tad more speed with default_synchronous
51	# OFF. So far I haven't seen any measurable speedup, though.
52	#self.execute("PRAGMA default_synchronous = OFF")
53
54	def __del__(self):
55	self.close()
56
57	def close(self):
58	if self.conn is not None:
59	self.conn.close()
60	self.conn = None
61
62	def new_table_name(self):
63	self.num_tables += 1
64	return "Table%03d" % self.num_tables
65
66	def new_column_name(self):
67	self.num_cols += 1
68	return "Col%03d" % self.num_cols
69
70	def execute(self, *args):
71	"""execute the SQL statement in the database and return the result"""
72	cursor = self.conn.cursor()
73	cursor.execute(*args)
74	result = cursor.fetchone()
75	self.conn.commit()
76	return result
77
78	def cursor(self):
79	return self.conn.cursor()
80
81
82	class ColumnReference:
83
84	def __init__(self, name, type, internal_name):
85	self.name = name
86	self.type = type
87	self.internal_name = internal_name
88
89
90	class TransientTableBase(table.OldTableInterfaceMixin):
91
92	"""Base class for tables in the transient database"""
93
94	def __init__(self, transient_db):
95	"""Initialize the table for use with the given transient db"""
96	self.db = transient_db
97	self.tablename = self.db.new_table_name()
98	self.indexed_columns = {}
99	self.read_record_cursor = None
100	self.read_record_last_row = None
101	self.read_record_last_result = None
102
103	def create(self, columns):
104	self.columns = columns
105	self.name_to_column = {}
106	self.orig_names = []
107	self.internal_to_orig = {}
108	self.orig_to_internal = {}
109	self.column_map = {}
110
111	# Create the column objects and fill various maps and lists
112	for index in range(len(self.columns)):
113	col = self.columns[index]
114	self.name_to_column[col.name] = col
115	self.orig_names.append(col.name)
116	self.internal_to_orig[col.internal_name] = col.name
117	self.orig_to_internal[col.name] = col.internal_name
118	self.column_map[col.name] = col
119	self.column_map[index] = col
120
121	# Build the CREATE TABLE statement and create the table in the
122	# database
123	table_types = ["id INTEGER PRIMARY KEY"]
124	for col in self.columns:
125	table_types.append("%s %s" % (col.internal_name,
126	sql_type_map[col.type]))
127	table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
128	",\n ".join(table_types))
129	self.db.execute(table_stmt)
130
131	def transient_table(self):
132	"""
133	Return a table whose underlying implementation is in the transient db
134	"""
135	return self
136
137	def ensure_index(self, column):
138	"""Ensure that there's an index on the given column"""
139	if not column in self.indexed_columns:
140	internal_name = self.orig_to_internal[column]
141	indexname = "Index_%s_%s" % (self.tablename, internal_name)
142	stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
143	internal_name)
144	self.db.execute(stmt)
145	self.indexed_columns[column] = 1
146
147	def NumColumns(self):
148	return len(self.columns)
149
150	def NumRows(self):
151	result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
152	return int(result[0])
153
154	def Columns(self):
155	return self.columns
156
157	def Column(self, col):
158	return self.column_map[col]
159
160	def HasColumn(self, col):
161	"""Return whether the table has a column with the given name or index
162	"""
163	return self.column_map.has_key(col)
164
165	def ReadRowAsDict(self, index):
166	# Implementation Strategy: Executing a completely new select
167	# statement every time this method is called is too slow. The
168	# most important usage is to read the records more or less
169	# sequentially. This happens e.g. when drawing a layer with a
170	# classification where the shapes are drawn in order of the
171	# shape ids. Another pattern is that the same row is requested
172	# several times in a row. This happens in the table view, for
173	# instance.
174
175	# We can exploit this to make access faster by having one cursor
176	# open all the time and keeping the last row read around in case
177	# the same row is accessed again the next time and if the row
178	# index is larger than the row we have read last we simply fetch
179	# rows from the cursor until we've reached the requested row. If
180	# the requested row index is smaller then we start a new cursor.
181
182	# FIXME: So far this scheme seems to work well enough. Obvious
183	# improvements would be to start the cursor at exactly the
184	# requested row (should be efficient and easy to do now that the
185	# id is the primary key) and to perhaps to also start a new
186	# cursor if the requested index is much larger than the last row
187	# so that we don't read and discard lots of the rows.
188
189	# Check whether we have to start a new cursor
190	if self.read_record_cursor is None or index <self.read_record_last_row:
191	stmt = ("SELECT %s FROM %s;"
192	% (", ".join([c.internal_name for c in self.columns]),
193	self.tablename))
194	self.read_record_cursor = self.db.cursor()
195	self.read_record_cursor.execute(stmt)
196	self.read_record_last_row = -1
197	self.read_record_last_result = None
198
199	# Now we should have a cursor at a position less than or equal
200	# to the index so the following if statement will always set
201	# result to a suitable value
202	assert index >= self.read_record_last_row
203
204	if index == self.read_record_last_row:
205	result = self.read_record_last_result
206	else:
207	for i in range(index - self.read_record_last_row):
208	result = self.read_record_cursor.fetchone()
209	self.read_record_last_result = result
210	self.read_record_last_row = index
211	return dict(zip(self.orig_names, result))
212
213	def ReadValue(self, row, col):
214	"""Return the value of the specified row and column
215
216	The col parameter may be the index of the column or its name.
217	"""
218	# Depending on the actual access patterns of the table data, it
219	# might be a bit faster in some circumstances to not implement
220	# this via ReadRowAsDict, but this simple implementation should
221	# be fast enough for most purposes.
222	return self.ReadRowAsDict(row)[self.column_map[col].name]
223
224	def ValueRange(self, col):
225	col = self.column_map[col]
226	iname = col.internal_name
227	min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
228	% (iname, iname, self.tablename))
229	converter = type_converter_map[col.type]
230	return (converter(min), converter(max))
231
232	def UniqueValues(self, col):
233	iname = self.column_map[col].internal_name
234	cursor = self.db.cursor()
235	cursor.execute("SELECT %s FROM %s GROUP BY %s;"
236	% (iname, self.tablename, iname))
237	result = []
238	while 1:
239	row = cursor.fetchone()
240	if row is None:
241	break
242	result.append(row[0])
243	return result
244
245	def Width(self, col):
246	"""Return the maximum width of values in the column
247
248	The return value is the the maximum length of string representation
249	of the values in the column (represented by index or name)."""
250	max = 0
251
252	type = self.column_map[col].type
253	iname = self.column_map[col].internal_name
254	cursor = self.db.cursor()
255	cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
256	values = [ i[0] for i in cursor.fetchall()]
257	if not values:
258	return None
259
260	if type == sql_type_map[table.FIELDTYPE_DOUBLE]:
261	format = "%.12f"
262	elif type == sql_type_map[table.FIELDTYPE_INT]:
263	format = "%d"
264	else:
265	format = "%s"
266	for value in values:
267	if value is None: continue
268	l = len(format % value)
269	if l > max:
270	max = l
271
272	return max
273
274	def SimpleQuery(self, left, comparison, right):
275	"""Return the indices of all rows that matching a condition.
276
277	Parameters:
278	left -- The column object for the left side of the comparison
279
280	comparison -- The comparison operator as a string. It must be
281	one of '==', '!=', '<', '<=', '>=', '>'
282
283	right -- The right hand side of the comparison. It must be
284	either a column object or a value, i.e. a string,
285	int or float.
286
287	The return value is a sorted list of the indices of the rows
288	where the condition is true.
289	"""
290	if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
291	raise ValueError("Comparison operator %r not allowed" % comparison)
292
293	if hasattr(right, "internal_name"):
294	right_template = right.internal_name
295	params = ()
296	else:
297	right_template = "%s"
298	params = (right,)
299
300	query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
301	% (self.tablename, left.internal_name, comparison,
302	right_template)
303
304	cursor = self.db.cursor()
305	cursor.execute(query, params)
306	result = []
307	while 1:
308	row = cursor.fetchone()
309	if row is None:
310	break
311	result.append(row[0])
312	return result
313
314	def Dependencies(self):
315	"""Placeholder for a method in a derived class.
316
317	Return a sequence with the tables and other data objects that
318	self depends on.
319	"""
320	raise NotImplementedError
321
322
323	class TransientTable(TitledObject, TransientTableBase):
324
325	"""A Table in a transient DB that starts as the copy of a Thuban Table."""
326
327	def __init__(self, transient_db, table):
328	"""Create a new table in the given transient DB as a copy of table
329
330	The table argument can be any object implementing the Table
331	interface.
332	"""
333	TransientTableBase.__init__(self, transient_db)
334	TitledObject.__init__(self, table.Title())
335	self.create(table)
336
337	def create(self, table):
338	columns = []
339	for col in table.Columns():
340	columns.append(ColumnReference(col.name, col.type,
341	self.db.new_column_name()))
342	TransientTableBase.create(self, columns)
343
344	# copy the input table to the transient db
345
346	# A key to insert to use for the formatting of the insert
347	# statement. The key must not be equal to any of the column
348	# names so we construct one by building a string of x's that is
349	# longer than any of the column names
350	id_key = max([len(col.name) for col in self.columns]) * "x"
351
352	insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
353	% (self.tablename,
354	", ".join([col.internal_name
355	for col in self.columns]),
356	id_key,
357	", ".join(["%%(%s)s" % col.name
358	for col in self.columns]))
359	cursor = self.db.cursor()
360	for i in range(table.NumRows()):
361	row = table.ReadRowAsDict(i)
362	row[id_key] = i
363	cursor.execute(insert_template, row)
364	self.db.conn.commit()
365
366
367
368	class TransientJoinedTable(TitledObject, TransientTableBase):
369
370	"""A Table in the transient DB that contains a join of two tables"""
371
372	def __init__(self, transient_db, left_table, left_field,
373	right_table, right_field = None, outer_join = False):
374	"""Create a new table in the transient DB as a join of two tables.
375
376	Both input tables, left_table and right_table must have a
377	transient_table method that returns a table object for a table
378	in the transient database. The join is performed on the condition
379	that the value of the left_field column the the left table is
380	equal to the value of the right_field in the right_table.
381
382	The joined table contains all columns of the input tables with
383	one exception: Any column in the right_table with the same name
384	as one of the columns in the left_table will be omitted. This is
385	somewhat of an implementation detail, but is done so that the
386	column names of the joined table can be the same as the column
387	names of the input tables without having to create prefixes.
388	"""
389	TransientTableBase.__init__(self, transient_db)
390	self.dependencies = (left_table, right_table)
391	self.left_table = left_table.transient_table()
392	self.left_field = left_field
393	self.right_table = right_table.transient_table()
394	if right_field:
395	self.right_field = right_field
396	else:
397	self.right_field = self.left_field
398	self.outer_join = outer_join
399
400	title = "Join of %(left)s and %(right)s" \
401	% {"left": self.left_table.Title(),
402	"right": self.right_table.Title()}
403	TitledObject.__init__(self, title)
404
405	self.create()
406
407	def create(self):
408	"""Internal: Create the table with the joined data"""
409	self.tablename = self.db.new_table_name()
410
411	self.right_table.ensure_index(self.right_field)
412
413	# determine the internal column names to join on before
414	# coalescing the column information because if the external
415	# column names are the same they will be mapped to the same
416	# internal name afterwards.
417	internal_left_col = self.left_table.orig_to_internal[self.left_field]
418	internal_right_col = self.right_table.orig_to_internal[self.right_field]
419
420	# Coalesce the column information
421	visited = {}
422	columns = []
423	newcolumns = []
424	for table, col in (
425	[ (self.left_table.tablename, c) for c in self.left_table.columns ]
426	+ [ (self.right_table.tablename, c) for c in self.right_table.columns]):
427	if col.name in visited:
428	if col.name == self.left_field:
429	continue
430	else:
431	# We can't allow multiple columns with the same original
432	# name, so append '_' to this one until it is unique.
433	# FIXME: There should be a better solution.
434	while col.name in visited:
435	col.name = col.name + '_'
436	columns.append((table, col))
437	newcol = ColumnReference(col.name, col.type,
438	"Col%03d" % (len(newcolumns)+1))
439	newcolumns.append(newcol)
440	visited[col.name] = 1
441	TransientTableBase.create(self, newcolumns)
442
443	# Copy the joined data to the table.
444	newinternal_names = [col.internal_name for col in self.columns]
445	internal_references = ["%s.%s" % (table, col.internal_name)
446	for table, col in columns]
447	if self.outer_join:
448	join_operator = 'LEFT OUTER JOIN'
449	else:
450	join_operator = 'JOIN'
451	stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
452	" %s %s ON %s.%s = %s.%s;"
453	% (self.tablename,
454	", ".join(newinternal_names),
455	self.left_table.tablename,
456	", ".join(internal_references),
457	self.left_table.tablename,
458	join_operator,
459	self.right_table.tablename,
460	self.left_table.tablename,
461	internal_left_col,
462	self.right_table.tablename,
463	internal_right_col))
464	self.db.execute(stmt)
465
466	def Dependencies(self):
467	"""Return a tuple with the two tables the join depends on."""
468	return self.dependencies
469
470
471	class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
472
473	"""Table that copies data to a transient table on demand.
474
475	The AutoTransientTable takes another table as input and copies data
476	to a table in a TransientDatabase instance on demand.
477	"""
478
479	def __init__(self, transient_db, table):
480	TitledObject.__init__(self, table.Title())
481	self.transient_db = transient_db
482	self.table = table
483	self.t_table = None
484
485	def Columns(self):
486	return self.table.Columns()
487
488	def Column(self, col):
489	return self.table.Column(col)
490
491	def HasColumn(self, col):
492	"""Return whether the table has a column with the given name or index
493	"""
494	return self.table.HasColumn(col)
495
496	def NumRows(self):
497	return self.table.NumRows()
498
499	def NumColumns(self):
500	return self.table.NumColumns()
501
502	def ReadRowAsDict(self, record):
503	"""Return the record no. record as a dict mapping field names to values
504	"""
505	if self.t_table is not None:
506	return self.t_table.ReadRowAsDict(record)
507	else:
508	return self.table.ReadRowAsDict(record)
509
510	def ReadValue(self, row, col):
511	"""Return the value of the specified row and column
512
513	The col parameter may be the index of the column or its name.
514	"""
515	if self.t_table is not None:
516	return self.t_table.ReadValue(row, col)
517	else:
518	return self.table.ReadValue(row, col)
519
520	def copy_to_transient(self):
521	"""Internal: Create a transient table and copy the data into it"""
522	self.t_table = TransientTable(self.transient_db, self)
523
524	def transient_table(self):
525	"""
526	Return a table whose underlying implementation is in the transient db
527	"""
528	if self.t_table is None:
529	self.copy_to_transient()
530	return self.t_table
531
532	def ValueRange(self, col):
533	if self.t_table is None:
534	self.copy_to_transient()
535	return self.t_table.ValueRange(col)
536
537	def UniqueValues(self, col):
538	if self.t_table is None:
539	self.copy_to_transient()
540	return self.t_table.UniqueValues(col)
541
542	def SimpleQuery(self, left, comparison, right):
543	if self.t_table is None:
544	self.copy_to_transient()
545	# Make sure to use the column object of the transient table. The
546	# left argument is always a column object so we can just ask the
547	# t_table for the right object.
548	if hasattr(right, "name"):
549	return self.t_table.SimpleQuery(self.t_table.Column(left.name),
550	comparison,
551	self.t_table.Column(right.name))
552	else:
553	return self.t_table.SimpleQuery(self.t_table.Column(left.name),
554	comparison, right)
555
556	def Dependencies(self):
557	"""Return a tuple containing the original table"""
558	return (self.table,)
559
560	def Width(self, col):
561	return self.table.Width(col)
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision