Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class OldTableInterfaceMixin:

    """Mixin to implement the old table interface using the new one"""

    def __deprecation_warning(self):
        """Issue a DeprecationWarning for code hat uses the old interface"""
        callername = inspect.currentframe().f_back.f_code.co_name
        warnings.warn("The %s method of the old table interface"
                      " is deprecated" % callername,
                      DeprecationWarning, stacklevel = 3)

    def record_count(self):
        self.__deprecation_warning()
        return self.NumRows()

    def field_count(self):
        self.__deprecation_warning()
        return self.NumColumns()

    def field_info(self, field):
        """Return a tuple (type, name, width, prec) for the field no. field

        type is the data type of the field, name the name, width the
        field width in characters and prec the decimal precision. width
        and prec will be zero if the information returned by the Column
        method doesn't provide values for them.
        """
        self.__deprecation_warning()
        col = self.Column(field)
        return (col.type, col.name,
               getattr(col, "width", 0), getattr(col, "prec", 0))

    def field_info_by_name(self, col):
        self.__deprecation_warning()
        try:
            return self.field_info(col)
        except KeyError:
            # FIXME: It may be that field_info raises other exceptions
            # when the name is not a valid column name.
            return None

    def field_range(self, fieldName):
        self.__deprecation_warning()
        min, max = self.ValueRange(fieldName)
        return ((min, None), (max, None))

    def GetUniqueValues(self, field):
        self.__deprecation_warning()
        return self.UniqueValues(field)

    def read_record(self, r):
        self.__deprecation_warning()
        return self.ReadRowAsDict(r)


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject, OldTableInterfaceMixin):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = filename

        # Omit the extension in the title as it's not really needed and
        # it can be confusing because dbflib removes extensions and
        # appends some variations of '.dbf' before it tries to open the
        # file. So the title could be e.g. myshapefile.shp when the real
        # filename is myshapefile.dbf
        title = os.path.splitext(os.path.basename(self.filename))[0]
        TitledObject.__init__(self, title)

        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def ReadRowAsDict(self, row):
        """Return the entire row as a dictionary with column names as keys"""
        return self.dbf.read_record(row)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.dbf.read_record(row)[self.column_map[col].name]

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject, OldTableInterfaceMixin):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index):
        """Return the entire row as a dictionary with column names as keys"""
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def _find_dbf_column_names(names):
    """Determine the column names to use in a DBF file

    DBF files have a length limit of 10 characters on the column names
    so when writing an arbitrary Thuban table to a DBF file we may have
    we may have to rename some of the columns making sure that they're
    unique in the DBF file too.

    Names that are already short enough will stay the same. Longer names
    will be truncated to 10 characters or if that isn't unique it will
    be truncated more and filled up with digits.

    The parameter names should be a list of the column names. The return
    value will be a dictionary mapping the names in the input list to
    the names to use in the DBF file.
    """
    # mapping from the original names in table to the names in the DBF
    # file
    name_map = {}

    # First, we keep all names that are already short enough
    for i in range(len(names) - 1, -1, -1):
        if len(names[i]) <= 10:
            name_map[names[i]] = names[i]
            del names[i]

    # dict used as a set of all names already used as DBF column names
    used = name_map.copy()

    # Go through all longer names. If the name truncated to 10
    # characters is not used already, we use that. Otherwise we truncate
    # it more and append numbers until we get an unused name
    for name in names:
        truncated = name[:10]
        num = 0; numstr = ""
        #print "truncated", truncated, num
        while truncated in used and len(numstr) < 10:
            num += 1
            numstr = str(num)
            truncated = name[:10 - len(numstr)] + numstr
            #print "truncated", truncated, num
        if len(numstr) >= 10:
            # This case should never happen in practice as tables with
            # 10^10 columns seem very unlikely :)
            raise ValueError("Can't find unique dbf column name")

        name_map[name] = truncated
        used[truncated] = 1

    return name_map

def table_to_dbf(table, filename, rows = None):
    """Create the dbf file filename from the table.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    dbf = dbflib.create(filename)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}


    name_map = _find_dbf_column_names([col.name for col in table.Columns()])

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
                      width, prec)

    if rows is None:
        rows = range(table.NumRows())

    recNum = 0
    for i in rows:
        record = {}
        for key, value in table.ReadRowAsDict(i).items():
            record[name_map[key]] = value
        dbf.write_record(recNum, record)
        recNum += 1
    dbf.close()

def table_to_csv(table, filename, rows = None):
    """Export table to csv file.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        if rows is None:
            rows = range(table.NumRows())

        for i in rows:
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	bh	590	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	bh	6	# Authors:
3			# Bernhard Herzog <[email protected]>
4	jan	806	# Jan-Oliver Wagner <[email protected]>
5	frank	1025	# Frank Koormann <[email protected]>
6	bh	6	#
7			# This program is free software under the GPL (>=v2)
8			# Read the file COPYING coming with Thuban for details.
9
10			"""
11			Classes for handling tables of data.
12			"""
13
14			__version__ = "$Revision$"
15
16	bh	998	import os
17	bh	839	import inspect
18			import warnings
19
20	jan	1019	from base import TitledObject
21
22	bh	6	import dbflib
23
24			# the field types supported by a Table instance.
25	jonathan	474	FIELDTYPE_INT = "int"
26			FIELDTYPE_STRING = "string"
27			FIELDTYPE_DOUBLE = "double"
28	bh	6
29
30			# map the dbflib constants for the field types to our constants
31			dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32			dbflib.FTInteger: FIELDTYPE_INT,
33			dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35	jan	806
36	bh	818	class OldTableInterfaceMixin:
37	jan	806
38	bh	818	"""Mixin to implement the old table interface using the new one"""
39	jan	806
40	bh	839	def __deprecation_warning(self):
41			"""Issue a DeprecationWarning for code hat uses the old interface"""
42			callername = inspect.currentframe().f_back.f_code.co_name
43			warnings.warn("The %s method of the old table interface"
44			" is deprecated" % callername,
45			DeprecationWarning, stacklevel = 3)
46
47	bh	818	def record_count(self):
48	bh	839	self.__deprecation_warning()
49	bh	818	return self.NumRows()
50	jan	806
51			def field_count(self):
52	bh	839	self.__deprecation_warning()
53	bh	818	return self.NumColumns()
54	jan	806
55	bh	818	def field_info(self, field):
56			"""Return a tuple (type, name, width, prec) for the field no. field
57	jan	806
58	bh	818	type is the data type of the field, name the name, width the
59			field width in characters and prec the decimal precision. width
60			and prec will be zero if the information returned by the Column
61			method doesn't provide values for them.
62			"""
63	bh	839	self.__deprecation_warning()
64	bh	818	col = self.Column(field)
65			return (col.type, col.name,
66			getattr(col, "width", 0), getattr(col, "prec", 0))
67	jan	806
68	bh	818	def field_info_by_name(self, col):
69	bh	839	self.__deprecation_warning()
70	bh	818	try:
71			return self.field_info(col)
72			except KeyError:
73			# FIXME: It may be that field_info raises other exceptions
74			# when the name is not a valid column name.
75			return None
76	jan	806
77	bh	818	def field_range(self, fieldName):
78	bh	839	self.__deprecation_warning()
79	bh	818	min, max = self.ValueRange(fieldName)
80			return ((min, None), (max, None))
81	jan	806
82	bh	818	def GetUniqueValues(self, field):
83	bh	839	self.__deprecation_warning()
84	bh	818	return self.UniqueValues(field)
85	jan	806
86	bh	818	def read_record(self, r):
87	bh	839	self.__deprecation_warning()
88	bh	818	return self.ReadRowAsDict(r)
89	bh	6
90	bh	818
91
92			class DBFColumn:
93
94			"""Description of a column in a DBFTable
95
96			Instances have the following public attributes:
97
98			name -- Name of the column
99			type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100			FIELDTYPE_DOUBLE)
101			index -- The index of the column
102			width -- the width of the data in the column
103			prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104	bh	6	"""
105	bh	818
106			def __init__(self, name, type, width, prec, index):
107			self.name = name
108			self.type = type
109			self.width = width
110			self.prec = prec
111			self.index = index
112
113
114	jan	1019	class DBFTable(TitledObject, OldTableInterfaceMixin):
115	bh	818
116			"""
117	bh	765	Table interface for the data in a DBF file
118	bh	6	"""
119
120	bh	286	# Implementation strategy regarding writing to a DBF file:
121			#
122			# Most of the time Thuban only needs to read from a table and it is
123			# important that Thuban can work with read-only files. Therefore the
124			# DBF file is opened only for reading initially. Only when
125			# write_record is called we try to open the DBF file for writing as
126	bh	590	# well. If that succeeds the read/write DBF file will be used for
127			# all IO afterwards.
128	bh	286	#
129			# It's important to use the same DBF file object for both reading
130			# and writing to make sure that reading a records after writing
131			# returns the new values. With two separate objects this wouldn't
132			# work because a DBF file object buffers some data
133
134	bh	6	def __init__(self, filename):
135			self.filename = filename
136	bh	1078
137			# Omit the extension in the title as it's not really needed and
138			# it can be confusing because dbflib removes extensions and
139			# appends some variations of '.dbf' before it tries to open the
140			# file. So the title could be e.g. myshapefile.shp when the real
141			# filename is myshapefile.dbf
142			title = os.path.splitext(os.path.basename(self.filename))[0]
143	jan	1019	TitledObject.__init__(self, title)
144	bh	1078
145	bh	284	self.dbf = dbflib.DBFFile(filename)
146	bh	6
147	bh	286	# If true, self.dbf is open for writing.
148			self._writable = 0
149
150	bh	818	# Create the column information objects
151			self.columns = []
152			self.column_map = {}
153			for i in range(self.NumColumns()):
154			ftype, name, width, prec = self.dbf.field_info(i)
155			ftype = dbflib_fieldtypes[ftype]
156			index = len(self.columns)
157			col = DBFColumn(name, ftype, width, prec, index)
158			self.columns.append(col)
159			self.column_map[name] = col
160			self.column_map[index] = col
161	bh	257
162	bh	818	def NumRows(self):
163			"""Return the number of rows in the table"""
164	bh	6	return self.dbf.record_count()
165
166	bh	818	def NumColumns(self):
167			"""Return the number of columns in the table"""
168	bh	6	return self.dbf.field_count()
169
170	bh	818	def Columns(self):
171			"""Return the table's colum definitions
172	bh	6
173	bh	818	The return value is a sequence of DBFColumn instances, one for
174			each column.
175	bh	6	"""
176	bh	818	return self.columns
177	bh	6
178	bh	818	def Column(self, col):
179			"""Return information about the column given by its name or index
180	jonathan	467
181	bh	818	The returned object is an instance of DBFColumn
182			"""
183			return self.column_map[col]
184	jonathan	467
185	bh	839	def HasColumn(self, col):
186			"""Return whether the table has a column with the given name or index
187			"""
188			return self.column_map.has_key(col)
189
190	bh	818	def ReadRowAsDict(self, row):
191			"""Return the entire row as a dictionary with column names as keys"""
192			return self.dbf.read_record(row)
193	jonathan	467
194	bh	818	def ReadValue(self, row, col):
195			"""Return the value of the specified row and column
196	jonathan	628
197	bh	818	The col parameter may be the index of the column or its name.
198			"""
199			return self.dbf.read_record(row)[self.column_map[col].name]
200	jonathan	628
201	bh	818	def ValueRange(self, col):
202			"""Return the minimum and maximum values of the values in the column
203	jonathan	628
204	bh	818	The return value is a tuple (min, max) unless the table is empty
205			in which case the return value is None.
206	jonathan	628	"""
207	bh	818	count = self.NumRows()
208	jonathan	628
209			if count == 0:
210			return None
211
212	bh	818	min = max = self.ReadValue(0, col)
213	jonathan	628	for i in range(1, count):
214	bh	818	value = self.ReadValue(i, col)
215			if value < min:
216			min = value
217			elif value > max:
218			max = value
219	jonathan	628
220	bh	818	return (min, max)
221	jonathan	628
222	bh	818	def UniqueValues(self, col):
223			"""Return a sorted list of all unique values in the column col"""
224			dict = {}
225	jonathan	628
226	bh	818	for i in range(self.NumRows()):
227			value = self.ReadValue(i, col)
228			dict[value] = 0
229	jonathan	628
230	bh	818	values = dict.keys()
231			values.sort()
232			return values
233	jonathan	628
234	bh	984	def Dependencies(self):
235			"""Return an empty sequence. The DBFTable doesn't depend on anything"""
236			return ()
237	jonathan	628
238	bh	818	# DBF specific interface parts.
239	jonathan	628
240	frank	1025	def Width(self, col):
241			"""Return column width"""
242			return self.column_map[col].width
243
244	bh	818	def Destroy(self):
245			self.dbf.close()
246			self.dbf = None
247	jonathan	628
248	bh	274	def write_record(self, record, values):
249			"""Write the values into the record
250
251			The values parameter may either be a dictionary or a sequence.
252
253			If it's a dictionary the keys must be the names of the fields
254			and their value must have a suitable type. Only the fields
255			actually contained in the dictionary are written. Fields for
256			which there's no item in the dict are not modified.
257
258			If it's a sequence, all fields must be present in the right
259			order.
260			"""
261	bh	286	if not self._writable:
262			new_dbf = dbflib.DBFFile(self.filename, "r+b")
263			self.dbf.close()
264			self.dbf = new_dbf
265			self._writable = 1
266			self.dbf.write_record(record, values)
267			self.dbf.commit()
268	jonathan	467
269	bh	994	def FileName(self):
270			"""Return the filename the DBFTable was instantiated with"""
271			return self.filename
272	bh	765
273
274	bh	818	class MemoryColumn:
275
276			def __init__(self, name, type, index):
277			self.name = name
278			self.type = type
279			self.index = index
280
281	jan	1019	class MemoryTable(TitledObject, OldTableInterfaceMixin):
282	bh	818
283			"""Very simple table implementation that operates on a list of tuples"""
284
285			def __init__(self, fields, data):
286			"""Initialize the MemoryTable
287
288			Parameters:
289			fields -- List of (name, field_type) pairs
290			data -- List of tuples, one for each row of data
291			"""
292			self.data = data
293	jan	1019	title = 'MemoryTable'
294			TitledObject.__init__(self, title)
295	bh	818
296			# Create the column information objects
297			self.columns = []
298			self.column_map = {}
299			for name, ftype in fields:
300			index = len(self.columns)
301			col = MemoryColumn(name, ftype, index)
302			self.columns.append(col)
303			self.column_map[name] = col
304			self.column_map[index] = col
305
306			def NumColumns(self):
307			"""Return the number of columns in the table"""
308			return len(self.columns)
309
310			def Column(self, col):
311			"""Return information about the column given by its name or index
312
313			The returned object is an instance of MemoryColumn.
314			"""
315			return self.column_map[col]
316
317			def Columns(self):
318			"""Return the table's colum definitions
319
320			The return value is a sequence of MemoryColumn instances, one
321			for each column.
322			"""
323			return self.columns
324
325	bh	839	def HasColumn(self, col):
326			"""Return whether the table has a column with the given name or index
327			"""
328			return self.column_map.has_key(col)
329
330	bh	818	def NumRows(self):
331			"""Return the number of rows in the table"""
332			return len(self.data)
333
334			def ReadValue(self, row, col):
335			"""Return the value of the specified row and column
336
337			The col parameter may be the index of the column or its name.
338			"""
339			return self.data[row][self.column_map[col].index]
340
341			def ReadRowAsDict(self, index):
342			"""Return the entire row as a dictionary with column names as keys"""
343			return dict([(col.name, self.data[index][col.index])
344			for col in self.columns])
345
346			def ValueRange(self, col):
347			"""Return the minimum and maximum values of the values in the column
348
349			The return value is a tuple (min, max) unless the table is empty
350			in which case the return value is None.
351			"""
352
353			index = self.column_map[col].index
354			values = [row[index] for row in self.data]
355			if not values:
356			return None
357
358			return min(values), max(values)
359
360			def UniqueValues(self, col):
361	frank	1025	"""Return a sorted list of all unique values in the column col
362
363			col can be either column index or name.
364			"""
365	bh	818	dict = {}
366
367			for i in range(self.NumRows()):
368			value = self.ReadValue(i, col)
369			dict[value] = 0
370
371			values = dict.keys()
372			values.sort()
373			return values
374
375	frank	1025	def Width(self, col):
376			"""Return the maximum width of values in the column
377
378	bh	1043	The return value is the the maximum length of string
379			representation of the values in the column (represented by index
380			or name).
381			"""
382	frank	1025	max = 0
383	bh	1043
384	frank	1025	type = self.column_map[col].type
385			index = self.column_map[col].index
386			values = [row[index] for row in self.data]
387			if not values:
388			return None
389
390			if type == FIELDTYPE_DOUBLE:
391	bh	1043	format = "%.12f"
392	frank	1025	elif type == FIELDTYPE_INT:
393			format = "%d"
394			else:
395			format = "%s"
396			for value in values:
397			l = len(format % value)
398			if l > max:
399			max = l
400
401			return max
402
403	bh	984	def Dependencies(self):
404			"""Return an empty sequence. The MemoryTable doesn't depend on anything
405			"""
406			return ()
407	bh	818
408			def write_record(self, record, values):
409			# TODO: Check for correct lenght and perhaps also
410			# for correct types in case values is a tuple. How to report problems?
411			# TODO: Allow values to be a dictionary and write the single
412			# fields that are specified.
413			self.data[record] = values
414	frank	1025
415
416	bh	1371
417			def _find_dbf_column_names(names):
418			"""Determine the column names to use in a DBF file
419
420			DBF files have a length limit of 10 characters on the column names
421			so when writing an arbitrary Thuban table to a DBF file we may have
422			we may have to rename some of the columns making sure that they're
423			unique in the DBF file too.
424
425			Names that are already short enough will stay the same. Longer names
426			will be truncated to 10 characters or if that isn't unique it will
427			be truncated more and filled up with digits.
428
429			The parameter names should be a list of the column names. The return
430			value will be a dictionary mapping the names in the input list to
431			the names to use in the DBF file.
432			"""
433			# mapping from the original names in table to the names in the DBF
434			# file
435			name_map = {}
436
437			# First, we keep all names that are already short enough
438			for i in range(len(names) - 1, -1, -1):
439			if len(names[i]) <= 10:
440			name_map[names[i]] = names[i]
441			del names[i]
442
443			# dict used as a set of all names already used as DBF column names
444			used = name_map.copy()
445
446			# Go through all longer names. If the name truncated to 10
447			# characters is not used already, we use that. Otherwise we truncate
448			# it more and append numbers until we get an unused name
449			for name in names:
450			truncated = name[:10]
451			num = 0; numstr = ""
452			#print "truncated", truncated, num
453			while truncated in used and len(numstr) < 10:
454			num += 1
455			numstr = str(num)
456			truncated = name[:10 - len(numstr)] + numstr
457			#print "truncated", truncated, num
458			if len(numstr) >= 10:
459			# This case should never happen in practice as tables with
460			# 10^10 columns seem very unlikely :)
461			raise ValueError("Can't find unique dbf column name")
462
463			name_map[name] = truncated
464			used[truncated] = 1
465
466			return name_map
467
468	jonathan	1431	def table_to_dbf(table, filename, rows = None):
469			"""Create the dbf file filename from the table.
470
471			If rows is not None (the default) then it must be a list of row
472			indices to be saved to the file, otherwise all rows are saved.
473			"""
474
475	frank	1025	dbf = dbflib.create(filename)
476
477			dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
478			FIELDTYPE_INT: dbflib.FTInteger,
479			FIELDTYPE_DOUBLE: dbflib.FTDouble}
480
481	bh	1371
482			name_map = _find_dbf_column_names([col.name for col in table.Columns()])
483
484	frank	1025	# Initialise the header. Distinguish between DBFTable and others.
485			for col in table.Columns():
486			width = table.Width(col.name)
487	bh	1043	if col.type == FIELDTYPE_DOUBLE:
488			prec = getattr(col, "prec", 12)
489			else:
490			prec = 0
491	bh	1371	dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
492			width, prec)
493	frank	1025
494	jonathan	1431	if rows is None:
495			rows = range(table.NumRows())
496	jonathan	1389
497			recNum = 0
498	jonathan	1431	for i in rows:
499	bh	1371	record = {}
500			for key, value in table.ReadRowAsDict(i).items():
501			record[name_map[key]] = value
502	jonathan	1389	dbf.write_record(recNum, record)
503			recNum += 1
504	frank	1025	dbf.close()
505
506	jonathan	1431	def table_to_csv(table, filename, rows = None):
507			"""Export table to csv file.
508
509			If rows is not None (the default) then it must be a list of row
510			indices to be saved to the file, otherwise all rows are saved.
511			"""
512	frank	1025
513			file = open(filename,"w")
514			columns = table.Columns()
515			if columns:
516			header = "#%s" % columns[0].name
517			for col in columns[1:]:
518			header = header + ",%s" % col.name
519			header = header + "\n"
520			file.write(header)
521
522	jonathan	1431	if rows is None:
523			rows = range(table.NumRows())
524	jonathan	1389
525	jonathan	1431	for i in rows:
526	frank	1025	record = table.ReadRowAsDict(i)
527			if len(record):
528			line = "%s" % record[columns[0].name]
529			for col in columns[1:]:
530			line = line + ",%s" % record[col.name]
531			line = line + "\n"
532			file.write(line)
533			file.close()
534
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision