Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class OldTableInterfaceMixin:

    """Mixin to implement the old table interface using the new one"""

    def __deprecation_warning(self):
        """Issue a DeprecationWarning for code hat uses the old interface"""
        callername = inspect.currentframe().f_back.f_code.co_name
        warnings.warn("The %s method of the old table interface"
                      " is deprecated" % callername,
                      DeprecationWarning, stacklevel = 3)

    def record_count(self):
        self.__deprecation_warning()
        return self.NumRows()

    def field_count(self):
        self.__deprecation_warning()
        return self.NumColumns()

    def field_info(self, field):
        """Return a tuple (type, name, width, prec) for the field no. field

        type is the data type of the field, name the name, width the
        field width in characters and prec the decimal precision. width
        and prec will be zero if the information returned by the Column
        method doesn't provide values for them.
        """
        self.__deprecation_warning()
        col = self.Column(field)
        return (col.type, col.name,
               getattr(col, "width", 0), getattr(col, "prec", 0))

    def field_info_by_name(self, col):
        self.__deprecation_warning()
        try:
            return self.field_info(col)
        except KeyError:
            # FIXME: It may be that field_info raises other exceptions
            # when the name is not a valid column name.
            return None

    def field_range(self, fieldName):
        self.__deprecation_warning()
        min, max = self.ValueRange(fieldName)
        return ((min, None), (max, None))

    def GetUniqueValues(self, field):
        self.__deprecation_warning()
        return self.UniqueValues(field)

    def read_record(self, r):
        self.__deprecation_warning()
        return self.ReadRowAsDict(r)


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject, OldTableInterfaceMixin):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = filename
        title = os.path.basename(self.filename)
        TitledObject.__init__(self, title)
        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def ReadRowAsDict(self, row):
        """Return the entire row as a dictionary with column names as keys"""
        return self.dbf.read_record(row)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.dbf.read_record(row)[self.column_map[col].name]

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject, OldTableInterfaceMixin):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index):
        """Return the entire row as a dictionary with column names as keys"""
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def table_to_dbf(table, filename):
    """Create the dbf file filename from the table"""
    dbf = dbflib.create(filename)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec)

    for i in range(table.NumRows()):
        record = table.ReadRowAsDict(i)
        dbf.write_record(i, record)
    dbf.close()

def table_to_csv(table, filename):
    """Export table to csv file."""

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        for i in range(table.NumRows()):
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	# Jan-Oliver Wagner <[email protected]>
5	# Frank Koormann <[email protected]>
6	#
7	# This program is free software under the GPL (>=v2)
8	# Read the file COPYING coming with Thuban for details.
9
10	"""
11	Classes for handling tables of data.
12	"""
13
14	__version__ = "$Revision$"
15
16	import os
17	import inspect
18	import warnings
19
20	from base import TitledObject
21
22	import dbflib
23
24	# the field types supported by a Table instance.
25	FIELDTYPE_INT = "int"
26	FIELDTYPE_STRING = "string"
27	FIELDTYPE_DOUBLE = "double"
28
29
30	# map the dbflib constants for the field types to our constants
31	dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32	dbflib.FTInteger: FIELDTYPE_INT,
33	dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36	class OldTableInterfaceMixin:
37
38	"""Mixin to implement the old table interface using the new one"""
39
40	def __deprecation_warning(self):
41	"""Issue a DeprecationWarning for code hat uses the old interface"""
42	callername = inspect.currentframe().f_back.f_code.co_name
43	warnings.warn("The %s method of the old table interface"
44	" is deprecated" % callername,
45	DeprecationWarning, stacklevel = 3)
46
47	def record_count(self):
48	self.__deprecation_warning()
49	return self.NumRows()
50
51	def field_count(self):
52	self.__deprecation_warning()
53	return self.NumColumns()
54
55	def field_info(self, field):
56	"""Return a tuple (type, name, width, prec) for the field no. field
57
58	type is the data type of the field, name the name, width the
59	field width in characters and prec the decimal precision. width
60	and prec will be zero if the information returned by the Column
61	method doesn't provide values for them.
62	"""
63	self.__deprecation_warning()
64	col = self.Column(field)
65	return (col.type, col.name,
66	getattr(col, "width", 0), getattr(col, "prec", 0))
67
68	def field_info_by_name(self, col):
69	self.__deprecation_warning()
70	try:
71	return self.field_info(col)
72	except KeyError:
73	# FIXME: It may be that field_info raises other exceptions
74	# when the name is not a valid column name.
75	return None
76
77	def field_range(self, fieldName):
78	self.__deprecation_warning()
79	min, max = self.ValueRange(fieldName)
80	return ((min, None), (max, None))
81
82	def GetUniqueValues(self, field):
83	self.__deprecation_warning()
84	return self.UniqueValues(field)
85
86	def read_record(self, r):
87	self.__deprecation_warning()
88	return self.ReadRowAsDict(r)
89
90
91
92	class DBFColumn:
93
94	"""Description of a column in a DBFTable
95
96	Instances have the following public attributes:
97
98	name -- Name of the column
99	type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100	FIELDTYPE_DOUBLE)
101	index -- The index of the column
102	width -- the width of the data in the column
103	prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104	"""
105
106	def __init__(self, name, type, width, prec, index):
107	self.name = name
108	self.type = type
109	self.width = width
110	self.prec = prec
111	self.index = index
112
113
114	class DBFTable(TitledObject, OldTableInterfaceMixin):
115
116	"""
117	Table interface for the data in a DBF file
118	"""
119
120	# Implementation strategy regarding writing to a DBF file:
121	#
122	# Most of the time Thuban only needs to read from a table and it is
123	# important that Thuban can work with read-only files. Therefore the
124	# DBF file is opened only for reading initially. Only when
125	# write_record is called we try to open the DBF file for writing as
126	# well. If that succeeds the read/write DBF file will be used for
127	# all IO afterwards.
128	#
129	# It's important to use the same DBF file object for both reading
130	# and writing to make sure that reading a records after writing
131	# returns the new values. With two separate objects this wouldn't
132	# work because a DBF file object buffers some data
133
134	def __init__(self, filename):
135	self.filename = filename
136	title = os.path.basename(self.filename)
137	TitledObject.__init__(self, title)
138	self.dbf = dbflib.DBFFile(filename)
139
140	# If true, self.dbf is open for writing.
141	self._writable = 0
142
143	# Create the column information objects
144	self.columns = []
145	self.column_map = {}
146	for i in range(self.NumColumns()):
147	ftype, name, width, prec = self.dbf.field_info(i)
148	ftype = dbflib_fieldtypes[ftype]
149	index = len(self.columns)
150	col = DBFColumn(name, ftype, width, prec, index)
151	self.columns.append(col)
152	self.column_map[name] = col
153	self.column_map[index] = col
154
155	def NumRows(self):
156	"""Return the number of rows in the table"""
157	return self.dbf.record_count()
158
159	def NumColumns(self):
160	"""Return the number of columns in the table"""
161	return self.dbf.field_count()
162
163	def Columns(self):
164	"""Return the table's colum definitions
165
166	The return value is a sequence of DBFColumn instances, one for
167	each column.
168	"""
169	return self.columns
170
171	def Column(self, col):
172	"""Return information about the column given by its name or index
173
174	The returned object is an instance of DBFColumn
175	"""
176	return self.column_map[col]
177
178	def HasColumn(self, col):
179	"""Return whether the table has a column with the given name or index
180	"""
181	return self.column_map.has_key(col)
182
183	def ReadRowAsDict(self, row):
184	"""Return the entire row as a dictionary with column names as keys"""
185	return self.dbf.read_record(row)
186
187	def ReadValue(self, row, col):
188	"""Return the value of the specified row and column
189
190	The col parameter may be the index of the column or its name.
191	"""
192	return self.dbf.read_record(row)[self.column_map[col].name]
193
194	def ValueRange(self, col):
195	"""Return the minimum and maximum values of the values in the column
196
197	The return value is a tuple (min, max) unless the table is empty
198	in which case the return value is None.
199	"""
200	count = self.NumRows()
201
202	if count == 0:
203	return None
204
205	min = max = self.ReadValue(0, col)
206	for i in range(1, count):
207	value = self.ReadValue(i, col)
208	if value < min:
209	min = value
210	elif value > max:
211	max = value
212
213	return (min, max)
214
215	def UniqueValues(self, col):
216	"""Return a sorted list of all unique values in the column col"""
217	dict = {}
218
219	for i in range(self.NumRows()):
220	value = self.ReadValue(i, col)
221	dict[value] = 0
222
223	values = dict.keys()
224	values.sort()
225	return values
226
227	def Dependencies(self):
228	"""Return an empty sequence. The DBFTable doesn't depend on anything"""
229	return ()
230
231	# DBF specific interface parts.
232
233	def Width(self, col):
234	"""Return column width"""
235	return self.column_map[col].width
236
237	def Destroy(self):
238	self.dbf.close()
239	self.dbf = None
240
241	def write_record(self, record, values):
242	"""Write the values into the record
243
244	The values parameter may either be a dictionary or a sequence.
245
246	If it's a dictionary the keys must be the names of the fields
247	and their value must have a suitable type. Only the fields
248	actually contained in the dictionary are written. Fields for
249	which there's no item in the dict are not modified.
250
251	If it's a sequence, all fields must be present in the right
252	order.
253	"""
254	if not self._writable:
255	new_dbf = dbflib.DBFFile(self.filename, "r+b")
256	self.dbf.close()
257	self.dbf = new_dbf
258	self._writable = 1
259	self.dbf.write_record(record, values)
260	self.dbf.commit()
261
262	def FileName(self):
263	"""Return the filename the DBFTable was instantiated with"""
264	return self.filename
265
266
267	class MemoryColumn:
268
269	def __init__(self, name, type, index):
270	self.name = name
271	self.type = type
272	self.index = index
273
274	class MemoryTable(TitledObject, OldTableInterfaceMixin):
275
276	"""Very simple table implementation that operates on a list of tuples"""
277
278	def __init__(self, fields, data):
279	"""Initialize the MemoryTable
280
281	Parameters:
282	fields -- List of (name, field_type) pairs
283	data -- List of tuples, one for each row of data
284	"""
285	self.data = data
286	title = 'MemoryTable'
287	TitledObject.__init__(self, title)
288
289	# Create the column information objects
290	self.columns = []
291	self.column_map = {}
292	for name, ftype in fields:
293	index = len(self.columns)
294	col = MemoryColumn(name, ftype, index)
295	self.columns.append(col)
296	self.column_map[name] = col
297	self.column_map[index] = col
298
299	def NumColumns(self):
300	"""Return the number of columns in the table"""
301	return len(self.columns)
302
303	def Column(self, col):
304	"""Return information about the column given by its name or index
305
306	The returned object is an instance of MemoryColumn.
307	"""
308	return self.column_map[col]
309
310	def Columns(self):
311	"""Return the table's colum definitions
312
313	The return value is a sequence of MemoryColumn instances, one
314	for each column.
315	"""
316	return self.columns
317
318	def HasColumn(self, col):
319	"""Return whether the table has a column with the given name or index
320	"""
321	return self.column_map.has_key(col)
322
323	def NumRows(self):
324	"""Return the number of rows in the table"""
325	return len(self.data)
326
327	def ReadValue(self, row, col):
328	"""Return the value of the specified row and column
329
330	The col parameter may be the index of the column or its name.
331	"""
332	return self.data[row][self.column_map[col].index]
333
334	def ReadRowAsDict(self, index):
335	"""Return the entire row as a dictionary with column names as keys"""
336	return dict([(col.name, self.data[index][col.index])
337	for col in self.columns])
338
339	def ValueRange(self, col):
340	"""Return the minimum and maximum values of the values in the column
341
342	The return value is a tuple (min, max) unless the table is empty
343	in which case the return value is None.
344	"""
345
346	index = self.column_map[col].index
347	values = [row[index] for row in self.data]
348	if not values:
349	return None
350
351	return min(values), max(values)
352
353	def UniqueValues(self, col):
354	"""Return a sorted list of all unique values in the column col
355
356	col can be either column index or name.
357	"""
358	dict = {}
359
360	for i in range(self.NumRows()):
361	value = self.ReadValue(i, col)
362	dict[value] = 0
363
364	values = dict.keys()
365	values.sort()
366	return values
367
368	def Width(self, col):
369	"""Return the maximum width of values in the column
370
371	The return value is the the maximum length of string
372	representation of the values in the column (represented by index
373	or name).
374	"""
375	max = 0
376
377	type = self.column_map[col].type
378	index = self.column_map[col].index
379	values = [row[index] for row in self.data]
380	if not values:
381	return None
382
383	if type == FIELDTYPE_DOUBLE:
384	format = "%.12f"
385	elif type == FIELDTYPE_INT:
386	format = "%d"
387	else:
388	format = "%s"
389	for value in values:
390	l = len(format % value)
391	if l > max:
392	max = l
393
394	return max
395
396	def Dependencies(self):
397	"""Return an empty sequence. The MemoryTable doesn't depend on anything
398	"""
399	return ()
400
401	def write_record(self, record, values):
402	# TODO: Check for correct lenght and perhaps also
403	# for correct types in case values is a tuple. How to report problems?
404	# TODO: Allow values to be a dictionary and write the single
405	# fields that are specified.
406	self.data[record] = values
407
408
409	def table_to_dbf(table, filename):
410	"""Create the dbf file filename from the table"""
411	dbf = dbflib.create(filename)
412
413	dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
414	FIELDTYPE_INT: dbflib.FTInteger,
415	FIELDTYPE_DOUBLE: dbflib.FTDouble}
416
417	# Initialise the header. Distinguish between DBFTable and others.
418	for col in table.Columns():
419	width = table.Width(col.name)
420	if col.type == FIELDTYPE_DOUBLE:
421	prec = getattr(col, "prec", 12)
422	else:
423	prec = 0
424	dbf.add_field(col.name, dbflib_fieldtypes[col.type], width, prec)
425
426	for i in range(table.NumRows()):
427	record = table.ReadRowAsDict(i)
428	dbf.write_record(i, record)
429	dbf.close()
430
431	def table_to_csv(table, filename):
432	"""Export table to csv file."""
433
434	file = open(filename,"w")
435	columns = table.Columns()
436	if columns:
437	header = "#%s" % columns[0].name
438	for col in columns[1:]:
439	header = header + ",%s" % col.name
440	header = header + "\n"
441	file.write(header)
442
443	for i in range(table.NumRows()):
444	record = table.ReadRowAsDict(i)
445	if len(record):
446	line = "%s" % record[columns[0].name]
447	for col in columns[1:]:
448	line = line + ",%s" % record[col.name]
449	line = line + "\n"
450	file.write(line)
451	file.close()
452
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision