Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import inspect
import warnings

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class OldTableInterfaceMixin:

    """Mixin to implement the old table interface using the new one"""

    def __deprecation_warning(self):
        """Issue a DeprecationWarning for code hat uses the old interface"""
        callername = inspect.currentframe().f_back.f_code.co_name
        warnings.warn("The %s method of the old table interface"
                      " is deprecated" % callername,
                      DeprecationWarning, stacklevel = 3)

    def record_count(self):
        self.__deprecation_warning()
        return self.NumRows()

    def field_count(self):
        self.__deprecation_warning()
        return self.NumColumns()

    def field_info(self, field):
        """Return a tuple (type, name, width, prec) for the field no. field

        type is the data type of the field, name the name, width the
        field width in characters and prec the decimal precision. width
        and prec will be zero if the information returned by the Column
        method doesn't provide values for them.
        """
        self.__deprecation_warning()
        col = self.Column(field)
        return (col.type, col.name,
               getattr(col, "width", 0), getattr(col, "prec", 0))

    def field_info_by_name(self, col):
        self.__deprecation_warning()
        try:
            return self.field_info(col)
        except KeyError:
            # FIXME: It may be that field_info raises other exceptions
            # when the name is not a valid column name.
            return None

    def field_range(self, fieldName):
        self.__deprecation_warning()
        min, max = self.ValueRange(fieldName)
        return ((min, None), (max, None))

    def GetUniqueValues(self, field):
        self.__deprecation_warning()
        return self.UniqueValues(field)

    def read_record(self, r):
        self.__deprecation_warning()
        return self.ReadRowAsDict(r)


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(OldTableInterfaceMixin):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = filename
        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def ReadRowAsDict(self, row):
        """Return the entire row as a dictionary with column names as keys"""
        return self.dbf.read_record(row)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.dbf.read_record(row)[self.column_map[col].name]

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values


    # DBF specific interface parts.

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(OldTableInterfaceMixin):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index):
        """Return the entire row as a dictionary with column names as keys"""
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values


    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values
1	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	# Jan-Oliver Wagner <[email protected]>
5	#
6	# This program is free software under the GPL (>=v2)
7	# Read the file COPYING coming with Thuban for details.
8
9	"""
10	Classes for handling tables of data.
11	"""
12
13	__version__ = "$Revision$"
14
15	import inspect
16	import warnings
17
18	import dbflib
19
20	# the field types supported by a Table instance.
21	FIELDTYPE_INT = "int"
22	FIELDTYPE_STRING = "string"
23	FIELDTYPE_DOUBLE = "double"
24
25
26	# map the dbflib constants for the field types to our constants
27	dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
28	dbflib.FTInteger: FIELDTYPE_INT,
29	dbflib.FTDouble: FIELDTYPE_DOUBLE}
30
31
32	class OldTableInterfaceMixin:
33
34	"""Mixin to implement the old table interface using the new one"""
35
36	def __deprecation_warning(self):
37	"""Issue a DeprecationWarning for code hat uses the old interface"""
38	callername = inspect.currentframe().f_back.f_code.co_name
39	warnings.warn("The %s method of the old table interface"
40	" is deprecated" % callername,
41	DeprecationWarning, stacklevel = 3)
42
43	def record_count(self):
44	self.__deprecation_warning()
45	return self.NumRows()
46
47	def field_count(self):
48	self.__deprecation_warning()
49	return self.NumColumns()
50
51	def field_info(self, field):
52	"""Return a tuple (type, name, width, prec) for the field no. field
53
54	type is the data type of the field, name the name, width the
55	field width in characters and prec the decimal precision. width
56	and prec will be zero if the information returned by the Column
57	method doesn't provide values for them.
58	"""
59	self.__deprecation_warning()
60	col = self.Column(field)
61	return (col.type, col.name,
62	getattr(col, "width", 0), getattr(col, "prec", 0))
63
64	def field_info_by_name(self, col):
65	self.__deprecation_warning()
66	try:
67	return self.field_info(col)
68	except KeyError:
69	# FIXME: It may be that field_info raises other exceptions
70	# when the name is not a valid column name.
71	return None
72
73	def field_range(self, fieldName):
74	self.__deprecation_warning()
75	min, max = self.ValueRange(fieldName)
76	return ((min, None), (max, None))
77
78	def GetUniqueValues(self, field):
79	self.__deprecation_warning()
80	return self.UniqueValues(field)
81
82	def read_record(self, r):
83	self.__deprecation_warning()
84	return self.ReadRowAsDict(r)
85
86
87
88	class DBFColumn:
89
90	"""Description of a column in a DBFTable
91
92	Instances have the following public attributes:
93
94	name -- Name of the column
95	type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
96	FIELDTYPE_DOUBLE)
97	index -- The index of the column
98	width -- the width of the data in the column
99	prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
100	"""
101
102	def __init__(self, name, type, width, prec, index):
103	self.name = name
104	self.type = type
105	self.width = width
106	self.prec = prec
107	self.index = index
108
109
110	class DBFTable(OldTableInterfaceMixin):
111
112	"""
113	Table interface for the data in a DBF file
114	"""
115
116	# Implementation strategy regarding writing to a DBF file:
117	#
118	# Most of the time Thuban only needs to read from a table and it is
119	# important that Thuban can work with read-only files. Therefore the
120	# DBF file is opened only for reading initially. Only when
121	# write_record is called we try to open the DBF file for writing as
122	# well. If that succeeds the read/write DBF file will be used for
123	# all IO afterwards.
124	#
125	# It's important to use the same DBF file object for both reading
126	# and writing to make sure that reading a records after writing
127	# returns the new values. With two separate objects this wouldn't
128	# work because a DBF file object buffers some data
129
130	def __init__(self, filename):
131	self.filename = filename
132	self.dbf = dbflib.DBFFile(filename)
133
134	# If true, self.dbf is open for writing.
135	self._writable = 0
136
137	# Create the column information objects
138	self.columns = []
139	self.column_map = {}
140	for i in range(self.NumColumns()):
141	ftype, name, width, prec = self.dbf.field_info(i)
142	ftype = dbflib_fieldtypes[ftype]
143	index = len(self.columns)
144	col = DBFColumn(name, ftype, width, prec, index)
145	self.columns.append(col)
146	self.column_map[name] = col
147	self.column_map[index] = col
148
149	def NumRows(self):
150	"""Return the number of rows in the table"""
151	return self.dbf.record_count()
152
153	def NumColumns(self):
154	"""Return the number of columns in the table"""
155	return self.dbf.field_count()
156
157	def Columns(self):
158	"""Return the table's colum definitions
159
160	The return value is a sequence of DBFColumn instances, one for
161	each column.
162	"""
163	return self.columns
164
165	def Column(self, col):
166	"""Return information about the column given by its name or index
167
168	The returned object is an instance of DBFColumn
169	"""
170	return self.column_map[col]
171
172	def HasColumn(self, col):
173	"""Return whether the table has a column with the given name or index
174	"""
175	return self.column_map.has_key(col)
176
177	def ReadRowAsDict(self, row):
178	"""Return the entire row as a dictionary with column names as keys"""
179	return self.dbf.read_record(row)
180
181	def ReadValue(self, row, col):
182	"""Return the value of the specified row and column
183
184	The col parameter may be the index of the column or its name.
185	"""
186	return self.dbf.read_record(row)[self.column_map[col].name]
187
188	def ValueRange(self, col):
189	"""Return the minimum and maximum values of the values in the column
190
191	The return value is a tuple (min, max) unless the table is empty
192	in which case the return value is None.
193	"""
194	count = self.NumRows()
195
196	if count == 0:
197	return None
198
199	min = max = self.ReadValue(0, col)
200	for i in range(1, count):
201	value = self.ReadValue(i, col)
202	if value < min:
203	min = value
204	elif value > max:
205	max = value
206
207	return (min, max)
208
209	def UniqueValues(self, col):
210	"""Return a sorted list of all unique values in the column col"""
211	dict = {}
212
213	for i in range(self.NumRows()):
214	value = self.ReadValue(i, col)
215	dict[value] = 0
216
217	values = dict.keys()
218	values.sort()
219	return values
220
221
222	# DBF specific interface parts.
223
224	def Destroy(self):
225	self.dbf.close()
226	self.dbf = None
227
228	def write_record(self, record, values):
229	"""Write the values into the record
230
231	The values parameter may either be a dictionary or a sequence.
232
233	If it's a dictionary the keys must be the names of the fields
234	and their value must have a suitable type. Only the fields
235	actually contained in the dictionary are written. Fields for
236	which there's no item in the dict are not modified.
237
238	If it's a sequence, all fields must be present in the right
239	order.
240	"""
241	if not self._writable:
242	new_dbf = dbflib.DBFFile(self.filename, "r+b")
243	self.dbf.close()
244	self.dbf = new_dbf
245	self._writable = 1
246	self.dbf.write_record(record, values)
247	self.dbf.commit()
248
249
250
251	class MemoryColumn:
252
253	def __init__(self, name, type, index):
254	self.name = name
255	self.type = type
256	self.index = index
257
258	class MemoryTable(OldTableInterfaceMixin):
259
260	"""Very simple table implementation that operates on a list of tuples"""
261
262	def __init__(self, fields, data):
263	"""Initialize the MemoryTable
264
265	Parameters:
266	fields -- List of (name, field_type) pairs
267	data -- List of tuples, one for each row of data
268	"""
269	self.data = data
270
271	# Create the column information objects
272	self.columns = []
273	self.column_map = {}
274	for name, ftype in fields:
275	index = len(self.columns)
276	col = MemoryColumn(name, ftype, index)
277	self.columns.append(col)
278	self.column_map[name] = col
279	self.column_map[index] = col
280
281	def NumColumns(self):
282	"""Return the number of columns in the table"""
283	return len(self.columns)
284
285	def Column(self, col):
286	"""Return information about the column given by its name or index
287
288	The returned object is an instance of MemoryColumn.
289	"""
290	return self.column_map[col]
291
292	def Columns(self):
293	"""Return the table's colum definitions
294
295	The return value is a sequence of MemoryColumn instances, one
296	for each column.
297	"""
298	return self.columns
299
300	def HasColumn(self, col):
301	"""Return whether the table has a column with the given name or index
302	"""
303	return self.column_map.has_key(col)
304
305	def NumRows(self):
306	"""Return the number of rows in the table"""
307	return len(self.data)
308
309	def ReadValue(self, row, col):
310	"""Return the value of the specified row and column
311
312	The col parameter may be the index of the column or its name.
313	"""
314	return self.data[row][self.column_map[col].index]
315
316	def ReadRowAsDict(self, index):
317	"""Return the entire row as a dictionary with column names as keys"""
318	return dict([(col.name, self.data[index][col.index])
319	for col in self.columns])
320
321	def ValueRange(self, col):
322	"""Return the minimum and maximum values of the values in the column
323
324	The return value is a tuple (min, max) unless the table is empty
325	in which case the return value is None.
326	"""
327
328	index = self.column_map[col].index
329	values = [row[index] for row in self.data]
330	if not values:
331	return None
332
333	return min(values), max(values)
334
335	def UniqueValues(self, col):
336	"""Return a sorted list of all unique values in the column col"""
337	dict = {}
338
339	for i in range(self.NumRows()):
340	value = self.ReadValue(i, col)
341	dict[value] = 0
342
343	values = dict.keys()
344	values.sort()
345	return values
346
347
348	def write_record(self, record, values):
349	# TODO: Check for correct lenght and perhaps also
350	# for correct types in case values is a tuple. How to report problems?
351	# TODO: Allow values to be a dictionary and write the single
352	# fields that are specified.
353	self.data[record] = values
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision