Thuban/Model/transientdb.py

# Copyright (C) 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with the software for details.

"""Database for transient data

This database is intended for data representations needed during the
lifetime of a Thuban session but which is not permanent. Examples of
this are for instance a join of two DBF files where the DBF files are
the permanent representation of the data and the join only exists in the
Thuban session and is reconstructed when the session is opened.
"""

__version__ = "$Revision$"
# $Source$
# $Id$

from sqlite import connect

from base import TitledObject

import table

sql_type_map = {
    table.FIELDTYPE_INT: "INTEGER",
    table.FIELDTYPE_STRING: "VARCHAR",
    table.FIELDTYPE_DOUBLE: "FLOAT",
    }

type_converter_map = {
    table.FIELDTYPE_INT: int,
    table.FIELDTYPE_STRING: str,
    table.FIELDTYPE_DOUBLE: float,
    }

class TransientDatabase:

    def __init__(self, filename):
        self.filename = filename
        self.conn = connect(filename)
        # Counters to produce unique table and column names
        self.num_tables = 0
        self.num_cols = 0
        # Since there's only once process using the SQLite database, we
        # might be able to get a tad more speed with default_synchronous
        # OFF. So far I haven't seen any measurable speedup, though.
        #self.execute("PRAGMA default_synchronous = OFF")

    def __del__(self):
        self.close()

    def close(self):
        if self.conn is not None:
            self.conn.close()
            self.conn = None

    def new_table_name(self):
        self.num_tables += 1
        return "Table%03d" % self.num_tables

    def new_column_name(self):
        self.num_cols += 1
        return "Col%03d" % self.num_cols

    def execute(self, *args):
        """execute the SQL statement in the database and return the result"""
        cursor = self.conn.cursor()
        cursor.execute(*args)
        result = cursor.fetchone()
        self.conn.commit()
        return result

    def cursor(self):
        return self.conn.cursor()


class ColumnReference:

    def __init__(self, name, type, internal_name):
        self.name = name
        self.type = type
        self.internal_name = internal_name


class TransientTableBase(table.OldTableInterfaceMixin):

    """Base class for tables in the transient database"""

    def __init__(self, transient_db):
        """Initialize the table for use with the given transient db"""
        self.db = transient_db
        self.tablename = self.db.new_table_name()
        self.indexed_columns = {}
        self.read_record_cursor = None
        self.read_record_last_row = None
        self.read_record_last_result = None

    def create(self, columns):
        self.columns = columns
        self.name_to_column = {}
        self.orig_names = []
        self.internal_to_orig = {}
        self.orig_to_internal = {}
        self.column_map = {}

        # Create the column objects and fill various maps and lists
        for index in range(len(self.columns)):
            col = self.columns[index]
            self.name_to_column[col.name] = col
            self.orig_names.append(col.name)
            self.internal_to_orig[col.internal_name] = col.name
            self.orig_to_internal[col.name] = col.internal_name
            self.column_map[col.name] = col
            self.column_map[index] = col

        # Build the CREATE TABLE statement and create the table in the
        # database
        table_types = ["id INTEGER PRIMARY KEY"]
        for col in self.columns:
            table_types.append("%s %s" % (col.internal_name,
                                          sql_type_map[col.type]))
        table_stmt = "CREATE TABLE %s (\n    %s\n);" % (self.tablename,
                                                   ",\n    ".join(table_types))
        self.db.execute(table_stmt)

    def transient_table(self):
        """
        Return a table whose underlying implementation is in the transient db
        """
        return self

    def ensure_index(self, column):
        """Ensure that there's an index on the given column"""
        if not column in self.indexed_columns:
            internal_name = self.orig_to_internal[column]
            indexname = "Index_%s_%s" % (self.tablename, internal_name)
            stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
                                                    internal_name)
            self.db.execute(stmt)
            self.indexed_columns[column] = 1

    def NumColumns(self):
        return len(self.columns)

    def NumRows(self):
        result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
        return int(result[0])

    def Columns(self):
        return self.columns

    def Column(self, col):
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        At the moment the transient tables are only used for tables that
        don't distinguish between row number and row id, so the value is
        returned unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        At the moment the transient tables are only used for tables that
        don't distinguish between row number and row id, so the value is
        returned unchanged.
        """
        return num

    def ReadRowAsDict(self, index, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored because at the moment the
        transient tables are only used for DBF files where it doesn't
        matter.
        """
        # Implementation Strategy: Executing a completely new select
        # statement every time this method is called is too slow. The
        # most important usage is to read the records more or less
        # sequentially. This happens e.g. when drawing a layer with a
        # classification where the shapes are drawn in order of the
        # shape ids. Another pattern is that the same row is requested
        # several times in a row. This happens in the table view, for
        # instance.

        # We can exploit this to make access faster by having one cursor
        # open all the time and keeping the last row read around in case
        # the same row is accessed again the next time and if the row
        # index is larger than the row we have read last we simply fetch
        # rows from the cursor until we've reached the requested row. If
        # the requested row index is smaller then we start a new cursor.

        # FIXME: So far this scheme seems to work well enough. Obvious
        # improvements would be to start the cursor at exactly the
        # requested row (should be efficient and easy to do now that the
        # id is the primary key) and to perhaps to also start a new
        # cursor if the requested index is much larger than the last row
        # so that we don't read and discard lots of the rows.

        # Check whether we have to start a new cursor
        if self.read_record_cursor is None or index <self.read_record_last_row:
            stmt = ("SELECT %s FROM %s;"
                    % (", ".join([c.internal_name for c in self.columns]),
                       self.tablename))
            self.read_record_cursor = self.db.cursor()
            self.read_record_cursor.execute(stmt)
            self.read_record_last_row = -1
            self.read_record_last_result = None

        # Now we should have a cursor at a position less than or equal
        # to the index so the following if statement will always set
        # result to a suitable value
        assert index >= self.read_record_last_row

        if index == self.read_record_last_row:
            result = self.read_record_last_result
        else:
            for i in range(index - self.read_record_last_row):
                result = self.read_record_cursor.fetchone()
                self.read_record_last_result = result
        self.read_record_last_row = index
        return dict(zip(self.orig_names, result))

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored because at the moment the
        transient tables are only used for DBF files where it doesn't
        matter.
        """
        # Depending on the actual access patterns of the table data, it
        # might be a bit faster in some circumstances to not implement
        # this via ReadRowAsDict, but this simple implementation should
        # be fast enough for most purposes.
        return self.ReadRowAsDict(row)[self.column_map[col].name]

    def ValueRange(self, col):
        col = self.column_map[col]
        iname = col.internal_name
        min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
                                   % (iname, iname, self.tablename))
        converter = type_converter_map[col.type]
        return (converter(min), converter(max))

    def UniqueValues(self, col):
        iname = self.column_map[col].internal_name
        cursor = self.db.cursor()
        cursor.execute("SELECT %s FROM %s GROUP BY %s;"
                       % (iname, self.tablename, iname))
        result = []
        while 1:
            row = cursor.fetchone()
            if row is None:
                break
            result.append(row[0])
        return result

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        iname = self.column_map[col].internal_name
        cursor = self.db.cursor()
        cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
        values = [ i[0] for i in cursor.fetchall()]
        if not values:
            return None

        if type == table.FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == table.FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            if value is None: continue
            l = len(format % value)
            if l > max:
                max = l

        return max

    def SimpleQuery(self, left, comparison, right):
        """Return the indices of all rows that matching a condition.

        Parameters:
           left -- The column object for the left side of the comparison

           comparison -- The comparison operator as a string. It must be
                         one of '==', '!=', '<', '<=', '>=', '>'

           right -- The right hand side of the comparison. It must be
                    either a column object or a value, i.e. a string,
                    int or float.

        The return value is a sorted list of the indices of the rows
        where the condition is true.
        """
        if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
            raise ValueError("Comparison operator %r not allowed" % comparison)

        if hasattr(right, "internal_name"):
            right_template = right.internal_name
            params = ()
        else:
            right_template = "%s"
            params = (right,)

        query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
                % (self.tablename, left.internal_name, comparison,
                   right_template)

        cursor = self.db.cursor()
        cursor.execute(query, params)
        result = []
        while 1:
            row = cursor.fetchone()
            if row is None:
                break
            result.append(row[0])
        return result

    def Dependencies(self):
        """Placeholder for a method in a derived class.

        Return a sequence with the tables and other data objects that
        self depends on.
        """
        raise NotImplementedError


class TransientTable(TitledObject, TransientTableBase):

    """A Table in a transient DB that starts as the copy of a Thuban Table."""

    def __init__(self, transient_db, table):
        """Create a new table in the given transient DB as a copy of table

        The table argument can be any object implementing the Table
        interface.
        """
        TransientTableBase.__init__(self, transient_db)
        TitledObject.__init__(self, table.Title())
        self.create(table)

    def create(self, table):
        columns = []
        for col in table.Columns():
            columns.append(ColumnReference(col.name, col.type,
                                           self.db.new_column_name()))
        TransientTableBase.create(self, columns)

        # copy the input table to the transient db

        # A key to insert to use for the formatting of the insert
        # statement. The key must not be equal to any of the column
        # names so we construct one by building a string of x's that is
        # longer than any of the column names
        id_key = max([len(col.name) for col in self.columns]) * "x"

        insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
                               % (self.tablename,
                                  ", ".join([col.internal_name
                                             for col in self.columns]),
                                  id_key,
                                  ", ".join(["%%(%s)s" % col.name
                                             for col in self.columns]))
        cursor = self.db.cursor()
        for i in range(table.NumRows()):
            row = table.ReadRowAsDict(i)
            row[id_key] = i
            cursor.execute(insert_template, row)
        self.db.conn.commit()


class TransientJoinedTable(TitledObject, TransientTableBase):

    """A Table in the transient DB that contains a join of two tables"""

    def __init__(self, transient_db, left_table, left_field,
                 right_table, right_field = None, outer_join = False):
        """Create a new table in the transient DB as a join of two tables.

        Both input tables, left_table and right_table must have a
        transient_table method that returns a table object for a table
        in the transient database. The join is performed on the condition
        that the value of the left_field column the the left table is
        equal to the value of the right_field in the right_table.

        The joined table contains all columns of the input tables,
        however, the column names of the right table may be changed
        slightly to make them unique in the joined table. This is
        currently done by appending a sufficient number of underscores
        ('_').
        """
        TransientTableBase.__init__(self, transient_db)
        self.dependencies = (left_table, right_table)
        self.left_table = left_table.transient_table()
        self.left_field = left_field
        self.right_table = right_table.transient_table()
        if right_field:
            self.right_field = right_field
        else:
            self.right_field = self.left_field
        self.outer_join = outer_join

        title = "Join of %(left)s and %(right)s" \
                % {"left": self.left_table.Title(),
                   "right": self.right_table.Title()}
        TitledObject.__init__(self, title)

        self.create()

    def create(self):
        """Internal: Create the table with the joined data"""
        self.tablename = self.db.new_table_name()

        self.right_table.ensure_index(self.right_field)

        # determine the internal column names to join on before
        # coalescing the column information because if the external
        # column names are the same they will be mapped to the same
        # internal name afterwards.
        internal_left_col = self.left_table.orig_to_internal[self.left_field]
        internal_right_col =self.right_table.orig_to_internal[self.right_field]

        # Coalesce the column information
        visited = {}
        columns = []
        newcolumns = []
        for table in (self.left_table, self.right_table):
            for col in table.Columns():
                colname = col.name
                # We can't allow multiple columns with the same
                # original name, so append '_' to this one until
                # it is unique.
                # FIXME: There should be a better solution.
                while colname in visited:
                    colname = colname + '_'
                columns.append((table.tablename, col))
                newcol = ColumnReference(colname, col.type,
                                            "Col%03d" % (len(newcolumns)+1))
                newcolumns.append(newcol)
                visited[colname] = 1
        TransientTableBase.create(self, newcolumns)

        # Copy the joined data to the table.
        newinternal_names = [col.internal_name for col in self.columns]
        internal_references = ["%s.%s" % (table, col.internal_name) 
                                                    for table, col in columns]
        if self.outer_join:
            join_operator = 'LEFT OUTER JOIN'
        else:
            join_operator = 'JOIN'
        stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
                " %s %s ON %s.%s = %s.%s;"
                % (self.tablename,
                   ", ".join(newinternal_names),
                   self.left_table.tablename,
                   ", ".join(internal_references),
                   self.left_table.tablename,
                   join_operator,
                   self.right_table.tablename,
                   self.left_table.tablename,
                   internal_left_col,
                   self.right_table.tablename,
                   internal_right_col))
        self.db.execute(stmt)

    def Dependencies(self):
        """Return a tuple with the two tables the join depends on."""
        return self.dependencies

    def JoinType(self):
        """Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
        if self.outer_join:
            return "LEFT OUTER"
        else:
            return "INNER"


class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):

    """Table that copies data to a transient table on demand.

    The AutoTransientTable takes another table as input and copies data
    to a table in a TransientDatabase instance on demand.
    """

    def __init__(self, transient_db, table):
        TitledObject.__init__(self, table.Title())
        self.transient_db = transient_db
        self.table = table
        self.t_table = None

    def Columns(self):
        return self.table.Columns()

    def Column(self, col):
        return self.table.Column(col)

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.table.HasColumn(col)

    def NumRows(self):
        return self.table.NumRows()

    def NumColumns(self):
        return self.table.NumColumns()

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id"""
        if self.t_table is not None:
            return self.t_table.RowIdToOrdinal(gid)
        else:
            return self.table.RowIdToOrdinal(gid)

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal"""
        if self.t_table is not None:
            return self.t_table.RowOrdinalToId(num)
        else:
            return self.table.RowOrdinalToId(num)

    def ReadRowAsDict(self, record, row_is_ordinal = 0):
        """Return the record no. record as a dict mapping field names to values
        """
        if self.t_table is not None:
            return self.t_table.ReadRowAsDict(record,
                                              row_is_ordinal = row_is_ordinal)
        else:
            return self.table.ReadRowAsDict(record,
                                            row_is_ordinal = row_is_ordinal)

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        if self.t_table is not None:
            return self.t_table.ReadValue(row, col,
                                          row_is_ordinal = row_is_ordinal)
        else:
            return self.table.ReadValue(row, col,
                                        row_is_ordinal = row_is_ordinal)

    def copy_to_transient(self):
        """Internal: Create a transient table and copy the data into it"""
        self.t_table = TransientTable(self.transient_db, self)

    def transient_table(self):
        """
        Return a table whose underlying implementation is in the transient db
        """
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table

    def ValueRange(self, col):
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table.ValueRange(col)

    def UniqueValues(self, col):
        if self.t_table is None:
            self.copy_to_transient()
        return self.t_table.UniqueValues(col)

    def SimpleQuery(self, left, comparison, right):
        if self.t_table is None:
            self.copy_to_transient()
        # Make sure to use the column object of the transient table. The
        # left argument is always a column object so we can just ask the
        # t_table for the right object.
        if hasattr(right, "name"):
            return self.t_table.SimpleQuery(self.t_table.Column(left.name),
                                            comparison, 
                                            self.t_table.Column(right.name))
        else:
            return self.t_table.SimpleQuery(self.t_table.Column(left.name),
                                            comparison, right)

    def Dependencies(self):
        """Return a tuple containing the original table"""
        return (self.table,)

    def Width(self, col):
        return self.table.Width(col)
1	# Copyright (C) 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	#
5	# This program is free software under the GPL (>=v2)
6	# Read the file COPYING coming with the software for details.
7
8	"""Database for transient data
9
10	This database is intended for data representations needed during the
11	lifetime of a Thuban session but which is not permanent. Examples of
12	this are for instance a join of two DBF files where the DBF files are
13	the permanent representation of the data and the join only exists in the
14	Thuban session and is reconstructed when the session is opened.
15	"""
16
17	__version__ = "$Revision$"
18	# $Source$
19	# $Id$
20
21	from sqlite import connect
22
23	from base import TitledObject
24
25	import table
26
27	sql_type_map = {
28	table.FIELDTYPE_INT: "INTEGER",
29	table.FIELDTYPE_STRING: "VARCHAR",
30	table.FIELDTYPE_DOUBLE: "FLOAT",
31	}
32
33	type_converter_map = {
34	table.FIELDTYPE_INT: int,
35	table.FIELDTYPE_STRING: str,
36	table.FIELDTYPE_DOUBLE: float,
37	}
38
39	class TransientDatabase:
40
41	def __init__(self, filename):
42	self.filename = filename
43	self.conn = connect(filename)
44	# Counters to produce unique table and column names
45	self.num_tables = 0
46	self.num_cols = 0
47	# Since there's only once process using the SQLite database, we
48	# might be able to get a tad more speed with default_synchronous
49	# OFF. So far I haven't seen any measurable speedup, though.
50	#self.execute("PRAGMA default_synchronous = OFF")
51
52	def __del__(self):
53	self.close()
54
55	def close(self):
56	if self.conn is not None:
57	self.conn.close()
58	self.conn = None
59
60	def new_table_name(self):
61	self.num_tables += 1
62	return "Table%03d" % self.num_tables
63
64	def new_column_name(self):
65	self.num_cols += 1
66	return "Col%03d" % self.num_cols
67
68	def execute(self, *args):
69	"""execute the SQL statement in the database and return the result"""
70	cursor = self.conn.cursor()
71	cursor.execute(*args)
72	result = cursor.fetchone()
73	self.conn.commit()
74	return result
75
76	def cursor(self):
77	return self.conn.cursor()
78
79
80	class ColumnReference:
81
82	def __init__(self, name, type, internal_name):
83	self.name = name
84	self.type = type
85	self.internal_name = internal_name
86
87
88	class TransientTableBase(table.OldTableInterfaceMixin):
89
90	"""Base class for tables in the transient database"""
91
92	def __init__(self, transient_db):
93	"""Initialize the table for use with the given transient db"""
94	self.db = transient_db
95	self.tablename = self.db.new_table_name()
96	self.indexed_columns = {}
97	self.read_record_cursor = None
98	self.read_record_last_row = None
99	self.read_record_last_result = None
100
101	def create(self, columns):
102	self.columns = columns
103	self.name_to_column = {}
104	self.orig_names = []
105	self.internal_to_orig = {}
106	self.orig_to_internal = {}
107	self.column_map = {}
108
109	# Create the column objects and fill various maps and lists
110	for index in range(len(self.columns)):
111	col = self.columns[index]
112	self.name_to_column[col.name] = col
113	self.orig_names.append(col.name)
114	self.internal_to_orig[col.internal_name] = col.name
115	self.orig_to_internal[col.name] = col.internal_name
116	self.column_map[col.name] = col
117	self.column_map[index] = col
118
119	# Build the CREATE TABLE statement and create the table in the
120	# database
121	table_types = ["id INTEGER PRIMARY KEY"]
122	for col in self.columns:
123	table_types.append("%s %s" % (col.internal_name,
124	sql_type_map[col.type]))
125	table_stmt = "CREATE TABLE %s (\n %s\n);" % (self.tablename,
126	",\n ".join(table_types))
127	self.db.execute(table_stmt)
128
129	def transient_table(self):
130	"""
131	Return a table whose underlying implementation is in the transient db
132	"""
133	return self
134
135	def ensure_index(self, column):
136	"""Ensure that there's an index on the given column"""
137	if not column in self.indexed_columns:
138	internal_name = self.orig_to_internal[column]
139	indexname = "Index_%s_%s" % (self.tablename, internal_name)
140	stmt = "CREATE INDEX %s ON %s (%s);" % (indexname, self.tablename,
141	internal_name)
142	self.db.execute(stmt)
143	self.indexed_columns[column] = 1
144
145	def NumColumns(self):
146	return len(self.columns)
147
148	def NumRows(self):
149	result = self.db.execute("SELECT count(*) FROM %s;" % self.tablename)
150	return int(result[0])
151
152	def Columns(self):
153	return self.columns
154
155	def Column(self, col):
156	return self.column_map[col]
157
158	def HasColumn(self, col):
159	"""Return whether the table has a column with the given name or index
160	"""
161	return self.column_map.has_key(col)
162
163	def RowIdToOrdinal(self, gid):
164	"""Return the row ordinal given its id
165
166	At the moment the transient tables are only used for tables that
167	don't distinguish between row number and row id, so the value is
168	returned unchanged.
169	"""
170	return gid
171
172	def RowOrdinalToId(self, num):
173	"""Return the rowid for given its ordinal
174
175	At the moment the transient tables are only used for tables that
176	don't distinguish between row number and row id, so the value is
177	returned unchanged.
178	"""
179	return num
180
181	def ReadRowAsDict(self, index, row_is_ordinal = 0):
182	"""Return the entire row as a dictionary with column names as keys
183
184	The row_is_ordinal is ignored because at the moment the
185	transient tables are only used for DBF files where it doesn't
186	matter.
187	"""
188	# Implementation Strategy: Executing a completely new select
189	# statement every time this method is called is too slow. The
190	# most important usage is to read the records more or less
191	# sequentially. This happens e.g. when drawing a layer with a
192	# classification where the shapes are drawn in order of the
193	# shape ids. Another pattern is that the same row is requested
194	# several times in a row. This happens in the table view, for
195	# instance.
196
197	# We can exploit this to make access faster by having one cursor
198	# open all the time and keeping the last row read around in case
199	# the same row is accessed again the next time and if the row
200	# index is larger than the row we have read last we simply fetch
201	# rows from the cursor until we've reached the requested row. If
202	# the requested row index is smaller then we start a new cursor.
203
204	# FIXME: So far this scheme seems to work well enough. Obvious
205	# improvements would be to start the cursor at exactly the
206	# requested row (should be efficient and easy to do now that the
207	# id is the primary key) and to perhaps to also start a new
208	# cursor if the requested index is much larger than the last row
209	# so that we don't read and discard lots of the rows.
210
211	# Check whether we have to start a new cursor
212	if self.read_record_cursor is None or index <self.read_record_last_row:
213	stmt = ("SELECT %s FROM %s;"
214	% (", ".join([c.internal_name for c in self.columns]),
215	self.tablename))
216	self.read_record_cursor = self.db.cursor()
217	self.read_record_cursor.execute(stmt)
218	self.read_record_last_row = -1
219	self.read_record_last_result = None
220
221	# Now we should have a cursor at a position less than or equal
222	# to the index so the following if statement will always set
223	# result to a suitable value
224	assert index >= self.read_record_last_row
225
226	if index == self.read_record_last_row:
227	result = self.read_record_last_result
228	else:
229	for i in range(index - self.read_record_last_row):
230	result = self.read_record_cursor.fetchone()
231	self.read_record_last_result = result
232	self.read_record_last_row = index
233	return dict(zip(self.orig_names, result))
234
235	def ReadValue(self, row, col, row_is_ordinal = 0):
236	"""Return the value of the specified row and column
237
238	The col parameter may be the index of the column or its name.
239
240	The row_is_ordinal is ignored because at the moment the
241	transient tables are only used for DBF files where it doesn't
242	matter.
243	"""
244	# Depending on the actual access patterns of the table data, it
245	# might be a bit faster in some circumstances to not implement
246	# this via ReadRowAsDict, but this simple implementation should
247	# be fast enough for most purposes.
248	return self.ReadRowAsDict(row)[self.column_map[col].name]
249
250	def ValueRange(self, col):
251	col = self.column_map[col]
252	iname = col.internal_name
253	min, max = self.db.execute("SELECT min(%s), max(%s) FROM %s;"
254	% (iname, iname, self.tablename))
255	converter = type_converter_map[col.type]
256	return (converter(min), converter(max))
257
258	def UniqueValues(self, col):
259	iname = self.column_map[col].internal_name
260	cursor = self.db.cursor()
261	cursor.execute("SELECT %s FROM %s GROUP BY %s;"
262	% (iname, self.tablename, iname))
263	result = []
264	while 1:
265	row = cursor.fetchone()
266	if row is None:
267	break
268	result.append(row[0])
269	return result
270
271	def Width(self, col):
272	"""Return the maximum width of values in the column
273
274	The return value is the the maximum length of string
275	representation of the values in the column (represented by index
276	or name).
277	"""
278	max = 0
279
280	type = self.column_map[col].type
281	iname = self.column_map[col].internal_name
282	cursor = self.db.cursor()
283	cursor.execute("SELECT %s FROM %s;" % (iname, self.tablename))
284	values = [ i[0] for i in cursor.fetchall()]
285	if not values:
286	return None
287
288	if type == table.FIELDTYPE_DOUBLE:
289	format = "%.12f"
290	elif type == table.FIELDTYPE_INT:
291	format = "%d"
292	else:
293	format = "%s"
294	for value in values:
295	if value is None: continue
296	l = len(format % value)
297	if l > max:
298	max = l
299
300	return max
301
302	def SimpleQuery(self, left, comparison, right):
303	"""Return the indices of all rows that matching a condition.
304
305	Parameters:
306	left -- The column object for the left side of the comparison
307
308	comparison -- The comparison operator as a string. It must be
309	one of '==', '!=', '<', '<=', '>=', '>'
310
311	right -- The right hand side of the comparison. It must be
312	either a column object or a value, i.e. a string,
313	int or float.
314
315	The return value is a sorted list of the indices of the rows
316	where the condition is true.
317	"""
318	if comparison not in ("==", "!=", "<", "<=", ">=", ">"):
319	raise ValueError("Comparison operator %r not allowed" % comparison)
320
321	if hasattr(right, "internal_name"):
322	right_template = right.internal_name
323	params = ()
324	else:
325	right_template = "%s"
326	params = (right,)
327
328	query = "SELECT id FROM %s WHERE %s %s %s ORDER BY id;" \
329	% (self.tablename, left.internal_name, comparison,
330	right_template)
331
332	cursor = self.db.cursor()
333	cursor.execute(query, params)
334	result = []
335	while 1:
336	row = cursor.fetchone()
337	if row is None:
338	break
339	result.append(row[0])
340	return result
341
342	def Dependencies(self):
343	"""Placeholder for a method in a derived class.
344
345	Return a sequence with the tables and other data objects that
346	self depends on.
347	"""
348	raise NotImplementedError
349
350
351	class TransientTable(TitledObject, TransientTableBase):
352
353	"""A Table in a transient DB that starts as the copy of a Thuban Table."""
354
355	def __init__(self, transient_db, table):
356	"""Create a new table in the given transient DB as a copy of table
357
358	The table argument can be any object implementing the Table
359	interface.
360	"""
361	TransientTableBase.__init__(self, transient_db)
362	TitledObject.__init__(self, table.Title())
363	self.create(table)
364
365	def create(self, table):
366	columns = []
367	for col in table.Columns():
368	columns.append(ColumnReference(col.name, col.type,
369	self.db.new_column_name()))
370	TransientTableBase.create(self, columns)
371
372	# copy the input table to the transient db
373
374	# A key to insert to use for the formatting of the insert
375	# statement. The key must not be equal to any of the column
376	# names so we construct one by building a string of x's that is
377	# longer than any of the column names
378	id_key = max([len(col.name) for col in self.columns]) * "x"
379
380	insert_template = "INSERT INTO %s (id, %s) VALUES (%%(%s)s, %s);" \
381	% (self.tablename,
382	", ".join([col.internal_name
383	for col in self.columns]),
384	id_key,
385	", ".join(["%%(%s)s" % col.name
386	for col in self.columns]))
387	cursor = self.db.cursor()
388	for i in range(table.NumRows()):
389	row = table.ReadRowAsDict(i)
390	row[id_key] = i
391	cursor.execute(insert_template, row)
392	self.db.conn.commit()
393
394
395
396	class TransientJoinedTable(TitledObject, TransientTableBase):
397
398	"""A Table in the transient DB that contains a join of two tables"""
399
400	def __init__(self, transient_db, left_table, left_field,
401	right_table, right_field = None, outer_join = False):
402	"""Create a new table in the transient DB as a join of two tables.
403
404	Both input tables, left_table and right_table must have a
405	transient_table method that returns a table object for a table
406	in the transient database. The join is performed on the condition
407	that the value of the left_field column the the left table is
408	equal to the value of the right_field in the right_table.
409
410	The joined table contains all columns of the input tables,
411	however, the column names of the right table may be changed
412	slightly to make them unique in the joined table. This is
413	currently done by appending a sufficient number of underscores
414	('_').
415	"""
416	TransientTableBase.__init__(self, transient_db)
417	self.dependencies = (left_table, right_table)
418	self.left_table = left_table.transient_table()
419	self.left_field = left_field
420	self.right_table = right_table.transient_table()
421	if right_field:
422	self.right_field = right_field
423	else:
424	self.right_field = self.left_field
425	self.outer_join = outer_join
426
427	title = "Join of %(left)s and %(right)s" \
428	% {"left": self.left_table.Title(),
429	"right": self.right_table.Title()}
430	TitledObject.__init__(self, title)
431
432	self.create()
433
434	def create(self):
435	"""Internal: Create the table with the joined data"""
436	self.tablename = self.db.new_table_name()
437
438	self.right_table.ensure_index(self.right_field)
439
440	# determine the internal column names to join on before
441	# coalescing the column information because if the external
442	# column names are the same they will be mapped to the same
443	# internal name afterwards.
444	internal_left_col = self.left_table.orig_to_internal[self.left_field]
445	internal_right_col =self.right_table.orig_to_internal[self.right_field]
446
447	# Coalesce the column information
448	visited = {}
449	columns = []
450	newcolumns = []
451	for table in (self.left_table, self.right_table):
452	for col in table.Columns():
453	colname = col.name
454	# We can't allow multiple columns with the same
455	# original name, so append '_' to this one until
456	# it is unique.
457	# FIXME: There should be a better solution.
458	while colname in visited:
459	colname = colname + '_'
460	columns.append((table.tablename, col))
461	newcol = ColumnReference(colname, col.type,
462	"Col%03d" % (len(newcolumns)+1))
463	newcolumns.append(newcol)
464	visited[colname] = 1
465	TransientTableBase.create(self, newcolumns)
466
467	# Copy the joined data to the table.
468	newinternal_names = [col.internal_name for col in self.columns]
469	internal_references = ["%s.%s" % (table, col.internal_name)
470	for table, col in columns]
471	if self.outer_join:
472	join_operator = 'LEFT OUTER JOIN'
473	else:
474	join_operator = 'JOIN'
475	stmt = ("INSERT INTO %s (id, %s) SELECT %s.id, %s FROM %s"
476	" %s %s ON %s.%s = %s.%s;"
477	% (self.tablename,
478	", ".join(newinternal_names),
479	self.left_table.tablename,
480	", ".join(internal_references),
481	self.left_table.tablename,
482	join_operator,
483	self.right_table.tablename,
484	self.left_table.tablename,
485	internal_left_col,
486	self.right_table.tablename,
487	internal_right_col))
488	self.db.execute(stmt)
489
490	def Dependencies(self):
491	"""Return a tuple with the two tables the join depends on."""
492	return self.dependencies
493
494	def JoinType(self):
495	"""Return the type of the join (either 'INNER' or 'LEFT OUTER')"""
496	if self.outer_join:
497	return "LEFT OUTER"
498	else:
499	return "INNER"
500
501
502	class AutoTransientTable(TitledObject, table.OldTableInterfaceMixin):
503
504	"""Table that copies data to a transient table on demand.
505
506	The AutoTransientTable takes another table as input and copies data
507	to a table in a TransientDatabase instance on demand.
508	"""
509
510	def __init__(self, transient_db, table):
511	TitledObject.__init__(self, table.Title())
512	self.transient_db = transient_db
513	self.table = table
514	self.t_table = None
515
516	def Columns(self):
517	return self.table.Columns()
518
519	def Column(self, col):
520	return self.table.Column(col)
521
522	def HasColumn(self, col):
523	"""Return whether the table has a column with the given name or index
524	"""
525	return self.table.HasColumn(col)
526
527	def NumRows(self):
528	return self.table.NumRows()
529
530	def NumColumns(self):
531	return self.table.NumColumns()
532
533	def RowIdToOrdinal(self, gid):
534	"""Return the row ordinal given its id"""
535	if self.t_table is not None:
536	return self.t_table.RowIdToOrdinal(gid)
537	else:
538	return self.table.RowIdToOrdinal(gid)
539
540	def RowOrdinalToId(self, num):
541	"""Return the rowid for given its ordinal"""
542	if self.t_table is not None:
543	return self.t_table.RowOrdinalToId(num)
544	else:
545	return self.table.RowOrdinalToId(num)
546
547	def ReadRowAsDict(self, record, row_is_ordinal = 0):
548	"""Return the record no. record as a dict mapping field names to values
549	"""
550	if self.t_table is not None:
551	return self.t_table.ReadRowAsDict(record,
552	row_is_ordinal = row_is_ordinal)
553	else:
554	return self.table.ReadRowAsDict(record,
555	row_is_ordinal = row_is_ordinal)
556
557	def ReadValue(self, row, col, row_is_ordinal = 0):
558	"""Return the value of the specified row and column
559
560	The col parameter may be the index of the column or its name.
561	"""
562	if self.t_table is not None:
563	return self.t_table.ReadValue(row, col,
564	row_is_ordinal = row_is_ordinal)
565	else:
566	return self.table.ReadValue(row, col,
567	row_is_ordinal = row_is_ordinal)
568
569	def copy_to_transient(self):
570	"""Internal: Create a transient table and copy the data into it"""
571	self.t_table = TransientTable(self.transient_db, self)
572
573	def transient_table(self):
574	"""
575	Return a table whose underlying implementation is in the transient db
576	"""
577	if self.t_table is None:
578	self.copy_to_transient()
579	return self.t_table
580
581	def ValueRange(self, col):
582	if self.t_table is None:
583	self.copy_to_transient()
584	return self.t_table.ValueRange(col)
585
586	def UniqueValues(self, col):
587	if self.t_table is None:
588	self.copy_to_transient()
589	return self.t_table.UniqueValues(col)
590
591	def SimpleQuery(self, left, comparison, right):
592	if self.t_table is None:
593	self.copy_to_transient()
594	# Make sure to use the column object of the transient table. The
595	# left argument is always a column object so we can just ask the
596	# t_table for the right object.
597	if hasattr(right, "name"):
598	return self.t_table.SimpleQuery(self.t_table.Column(left.name),
599	comparison,
600	self.t_table.Column(right.name))
601	else:
602	return self.t_table.SimpleQuery(self.t_table.Column(left.name),
603	comparison, right)
604
605	def Dependencies(self):
606	"""Return a tuple containing the original table"""
607	return (self.table,)
608
609	def Width(self, col):
610	return self.table.Width(col)
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision