Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class OldTableInterfaceMixin:

    """Mixin to implement the old table interface using the new one"""

    def record_count(self):
        return self.NumRows()

    def field_count(self):
        return self.NumColumns()

    def field_info(self, field):
        """Return a tuple (type, name, width, prec) for the field no. field

        type is the data type of the field, name the name, width the
        field width in characters and prec the decimal precision. width
        and prec will be zero if the information returned by the Column
        method doesn't provide values for them.
        """
        col = self.Column(field)
        return (col.type, col.name,
               getattr(col, "width", 0), getattr(col, "prec", 0))

    def field_info_by_name(self, col):
        try:
            return self.field_info(col)
        except KeyError:
            # FIXME: It may be that field_info raises other exceptions
            # when the name is not a valid column name.
            return None

    def field_range(self, fieldName):
        min, max = self.ValueRange(fieldName)
        return ((min, None), (max, None))

    def GetUniqueValues(self, field):
        return self.UniqueValues(field)

    def read_record(self, r):
        return self.ReadRowAsDict(r)


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(OldTableInterfaceMixin):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = filename
        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def ReadRowAsDict(self, row):
        """Return the entire row as a dictionary with column names as keys"""
        return self.dbf.read_record(row)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.dbf.read_record(row)[self.column_map[col].name]

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values


    # DBF specific interface parts.

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()


# Temporary backwards compatibility
Table = DBFTable


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(OldTableInterfaceMixin):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index):
        """Return the entire row as a dictionary with column names as keys"""
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values


    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values
1	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	# Jan-Oliver Wagner <[email protected]>
5	#
6	# This program is free software under the GPL (>=v2)
7	# Read the file COPYING coming with Thuban for details.
8
9	"""
10	Classes for handling tables of data.
11	"""
12
13	__version__ = "$Revision$"
14
15	import dbflib
16
17	# the field types supported by a Table instance.
18	FIELDTYPE_INT = "int"
19	FIELDTYPE_STRING = "string"
20	FIELDTYPE_DOUBLE = "double"
21
22
23	# map the dbflib constants for the field types to our constants
24	dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
25	dbflib.FTInteger: FIELDTYPE_INT,
26	dbflib.FTDouble: FIELDTYPE_DOUBLE}
27
28
29	class OldTableInterfaceMixin:
30
31	"""Mixin to implement the old table interface using the new one"""
32
33	def record_count(self):
34	return self.NumRows()
35
36	def field_count(self):
37	return self.NumColumns()
38
39	def field_info(self, field):
40	"""Return a tuple (type, name, width, prec) for the field no. field
41
42	type is the data type of the field, name the name, width the
43	field width in characters and prec the decimal precision. width
44	and prec will be zero if the information returned by the Column
45	method doesn't provide values for them.
46	"""
47	col = self.Column(field)
48	return (col.type, col.name,
49	getattr(col, "width", 0), getattr(col, "prec", 0))
50
51	def field_info_by_name(self, col):
52	try:
53	return self.field_info(col)
54	except KeyError:
55	# FIXME: It may be that field_info raises other exceptions
56	# when the name is not a valid column name.
57	return None
58
59	def field_range(self, fieldName):
60	min, max = self.ValueRange(fieldName)
61	return ((min, None), (max, None))
62
63	def GetUniqueValues(self, field):
64	return self.UniqueValues(field)
65
66	def read_record(self, r):
67	return self.ReadRowAsDict(r)
68
69
70
71	class DBFColumn:
72
73	"""Description of a column in a DBFTable
74
75	Instances have the following public attributes:
76
77	name -- Name of the column
78	type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
79	FIELDTYPE_DOUBLE)
80	index -- The index of the column
81	width -- the width of the data in the column
82	prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
83	"""
84
85	def __init__(self, name, type, width, prec, index):
86	self.name = name
87	self.type = type
88	self.width = width
89	self.prec = prec
90	self.index = index
91
92
93	class DBFTable(OldTableInterfaceMixin):
94
95	"""
96	Table interface for the data in a DBF file
97	"""
98
99	# Implementation strategy regarding writing to a DBF file:
100	#
101	# Most of the time Thuban only needs to read from a table and it is
102	# important that Thuban can work with read-only files. Therefore the
103	# DBF file is opened only for reading initially. Only when
104	# write_record is called we try to open the DBF file for writing as
105	# well. If that succeeds the read/write DBF file will be used for
106	# all IO afterwards.
107	#
108	# It's important to use the same DBF file object for both reading
109	# and writing to make sure that reading a records after writing
110	# returns the new values. With two separate objects this wouldn't
111	# work because a DBF file object buffers some data
112
113	def __init__(self, filename):
114	self.filename = filename
115	self.dbf = dbflib.DBFFile(filename)
116
117	# If true, self.dbf is open for writing.
118	self._writable = 0
119
120	# Create the column information objects
121	self.columns = []
122	self.column_map = {}
123	for i in range(self.NumColumns()):
124	ftype, name, width, prec = self.dbf.field_info(i)
125	ftype = dbflib_fieldtypes[ftype]
126	index = len(self.columns)
127	col = DBFColumn(name, ftype, width, prec, index)
128	self.columns.append(col)
129	self.column_map[name] = col
130	self.column_map[index] = col
131
132	def NumRows(self):
133	"""Return the number of rows in the table"""
134	return self.dbf.record_count()
135
136	def NumColumns(self):
137	"""Return the number of columns in the table"""
138	return self.dbf.field_count()
139
140	def Columns(self):
141	"""Return the table's colum definitions
142
143	The return value is a sequence of DBFColumn instances, one for
144	each column.
145	"""
146	return self.columns
147
148	def Column(self, col):
149	"""Return information about the column given by its name or index
150
151	The returned object is an instance of DBFColumn
152	"""
153	return self.column_map[col]
154
155	def ReadRowAsDict(self, row):
156	"""Return the entire row as a dictionary with column names as keys"""
157	return self.dbf.read_record(row)
158
159	def ReadValue(self, row, col):
160	"""Return the value of the specified row and column
161
162	The col parameter may be the index of the column or its name.
163	"""
164	return self.dbf.read_record(row)[self.column_map[col].name]
165
166	def ValueRange(self, col):
167	"""Return the minimum and maximum values of the values in the column
168
169	The return value is a tuple (min, max) unless the table is empty
170	in which case the return value is None.
171	"""
172	count = self.NumRows()
173
174	if count == 0:
175	return None
176
177	min = max = self.ReadValue(0, col)
178	for i in range(1, count):
179	value = self.ReadValue(i, col)
180	if value < min:
181	min = value
182	elif value > max:
183	max = value
184
185	return (min, max)
186
187	def UniqueValues(self, col):
188	"""Return a sorted list of all unique values in the column col"""
189	dict = {}
190
191	for i in range(self.NumRows()):
192	value = self.ReadValue(i, col)
193	dict[value] = 0
194
195	values = dict.keys()
196	values.sort()
197	return values
198
199
200	# DBF specific interface parts.
201
202	def Destroy(self):
203	self.dbf.close()
204	self.dbf = None
205
206	def write_record(self, record, values):
207	"""Write the values into the record
208
209	The values parameter may either be a dictionary or a sequence.
210
211	If it's a dictionary the keys must be the names of the fields
212	and their value must have a suitable type. Only the fields
213	actually contained in the dictionary are written. Fields for
214	which there's no item in the dict are not modified.
215
216	If it's a sequence, all fields must be present in the right
217	order.
218	"""
219	if not self._writable:
220	new_dbf = dbflib.DBFFile(self.filename, "r+b")
221	self.dbf.close()
222	self.dbf = new_dbf
223	self._writable = 1
224	self.dbf.write_record(record, values)
225	self.dbf.commit()
226
227
228
229	# Temporary backwards compatibility
230	Table = DBFTable
231
232
233
234	class MemoryColumn:
235
236	def __init__(self, name, type, index):
237	self.name = name
238	self.type = type
239	self.index = index
240
241	class MemoryTable(OldTableInterfaceMixin):
242
243	"""Very simple table implementation that operates on a list of tuples"""
244
245	def __init__(self, fields, data):
246	"""Initialize the MemoryTable
247
248	Parameters:
249	fields -- List of (name, field_type) pairs
250	data -- List of tuples, one for each row of data
251	"""
252	self.data = data
253
254	# Create the column information objects
255	self.columns = []
256	self.column_map = {}
257	for name, ftype in fields:
258	index = len(self.columns)
259	col = MemoryColumn(name, ftype, index)
260	self.columns.append(col)
261	self.column_map[name] = col
262	self.column_map[index] = col
263
264	def NumColumns(self):
265	"""Return the number of columns in the table"""
266	return len(self.columns)
267
268	def Column(self, col):
269	"""Return information about the column given by its name or index
270
271	The returned object is an instance of MemoryColumn.
272	"""
273	return self.column_map[col]
274
275	def Columns(self):
276	"""Return the table's colum definitions
277
278	The return value is a sequence of MemoryColumn instances, one
279	for each column.
280	"""
281	return self.columns
282
283	def NumRows(self):
284	"""Return the number of rows in the table"""
285	return len(self.data)
286
287	def ReadValue(self, row, col):
288	"""Return the value of the specified row and column
289
290	The col parameter may be the index of the column or its name.
291	"""
292	return self.data[row][self.column_map[col].index]
293
294	def ReadRowAsDict(self, index):
295	"""Return the entire row as a dictionary with column names as keys"""
296	return dict([(col.name, self.data[index][col.index])
297	for col in self.columns])
298
299	def ValueRange(self, col):
300	"""Return the minimum and maximum values of the values in the column
301
302	The return value is a tuple (min, max) unless the table is empty
303	in which case the return value is None.
304	"""
305
306	index = self.column_map[col].index
307	values = [row[index] for row in self.data]
308	if not values:
309	return None
310
311	return min(values), max(values)
312
313	def UniqueValues(self, col):
314	"""Return a sorted list of all unique values in the column col"""
315	dict = {}
316
317	for i in range(self.NumRows()):
318	value = self.ReadValue(i, col)
319	dict[value] = 0
320
321	values = dict.keys()
322	values.sort()
323	return values
324
325
326	def write_record(self, record, values):
327	# TODO: Check for correct lenght and perhaps also
328	# for correct types in case values is a tuple. How to report problems?
329	# TODO: Allow values to be a dictionary and write the single
330	# fields that are specified.
331	self.data[record] = values
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision