Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class OldTableInterfaceMixin:

    """Mixin to implement the old table interface using the new one"""

    def __deprecation_warning(self):
        """Issue a DeprecationWarning for code hat uses the old interface"""
        callername = inspect.currentframe().f_back.f_code.co_name
        warnings.warn("The %s method of the old table interface"
                      " is deprecated" % callername,
                      DeprecationWarning, stacklevel = 3)

    def record_count(self):
        self.__deprecation_warning()
        return self.NumRows()

    def field_count(self):
        self.__deprecation_warning()
        return self.NumColumns()

    def field_info(self, field):
        """Return a tuple (type, name, width, prec) for the field no. field

        type is the data type of the field, name the name, width the
        field width in characters and prec the decimal precision. width
        and prec will be zero if the information returned by the Column
        method doesn't provide values for them.
        """
        self.__deprecation_warning()
        col = self.Column(field)
        return (col.type, col.name,
               getattr(col, "width", 0), getattr(col, "prec", 0))

    def field_info_by_name(self, col):
        self.__deprecation_warning()
        try:
            return self.field_info(col)
        except KeyError:
            # FIXME: It may be that field_info raises other exceptions
            # when the name is not a valid column name.
            return None

    def field_range(self, fieldName):
        self.__deprecation_warning()
        min, max = self.ValueRange(fieldName)
        return ((min, None), (max, None))

    def GetUniqueValues(self, field):
        self.__deprecation_warning()
        return self.UniqueValues(field)

    def read_record(self, r):
        self.__deprecation_warning()
        return self.ReadRowAsDict(r)


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject, OldTableInterfaceMixin):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = os.path.abspath(filename)

        # Omit the extension in the title as it's not really needed and
        # it can be confusing because dbflib removes extensions and
        # appends some variations of '.dbf' before it tries to open the
        # file. So the title could be e.g. myshapefile.shp when the real
        # filename is myshapefile.dbf
        title = os.path.splitext(os.path.basename(self.filename))[0]
        TitledObject.__init__(self, title)

        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadRowAsDict(self, row, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_record(row)

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_attribute(row, self.column_map[col].index)

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject, OldTableInterfaceMixin):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def _find_dbf_column_names(names):
    """Determine the column names to use in a DBF file

    DBF files have a length limit of 10 characters on the column names
    so when writing an arbitrary Thuban table to a DBF file we may have
    we may have to rename some of the columns making sure that they're
    unique in the DBF file too.

    Names that are already short enough will stay the same. Longer names
    will be truncated to 10 characters or if that isn't unique it will
    be truncated more and filled up with digits.

    The parameter names should be a list of the column names. The return
    value will be a dictionary mapping the names in the input list to
    the names to use in the DBF file.
    """
    # mapping from the original names in table to the names in the DBF
    # file
    name_map = {}

    # First, we keep all names that are already short enough
    for i in range(len(names) - 1, -1, -1):
        if len(names[i]) <= 10:
            name_map[names[i]] = names[i]
            del names[i]

    # dict used as a set of all names already used as DBF column names
    used = name_map.copy()

    # Go through all longer names. If the name truncated to 10
    # characters is not used already, we use that. Otherwise we truncate
    # it more and append numbers until we get an unused name
    for name in names:
        truncated = name[:10]
        num = 0; numstr = ""
        #print "truncated", truncated, num
        while truncated in used and len(numstr) < 10:
            num += 1
            numstr = str(num)
            truncated = name[:10 - len(numstr)] + numstr
            #print "truncated", truncated, num
        if len(numstr) >= 10:
            # This case should never happen in practice as tables with
            # 10^10 columns seem very unlikely :)
            raise ValueError("Can't find unique dbf column name")

        name_map[name] = truncated
        used[truncated] = 1

    return name_map

def table_to_dbf(table, filename, rows = None):
    """Create the dbf file filename from the table.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    dbf = dbflib.create(filename)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}


    name_map = _find_dbf_column_names([col.name for col in table.Columns()])

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
                      width, prec)

    if rows is None:
        rows = range(table.NumRows())

    recNum = 0
    for i in rows:
        record = {}
        for key, value in table.ReadRowAsDict(i).items():
            record[name_map[key]] = value
        dbf.write_record(recNum, record)
        recNum += 1
    dbf.close()

def table_to_csv(table, filename, rows = None):
    """Export table to csv file.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        if rows is None:
            rows = range(table.NumRows())

        for i in rows:
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	bh	590	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	bh	6	# Authors:
3			# Bernhard Herzog <[email protected]>
4	jan	806	# Jan-Oliver Wagner <[email protected]>
5	frank	1025	# Frank Koormann <[email protected]>
6	bh	6	#
7			# This program is free software under the GPL (>=v2)
8			# Read the file COPYING coming with Thuban for details.
9
10			"""
11			Classes for handling tables of data.
12			"""
13
14			__version__ = "$Revision$"
15
16	bh	998	import os
17	bh	839	import inspect
18			import warnings
19
20	jan	1019	from base import TitledObject
21
22	bh	6	import dbflib
23
24			# the field types supported by a Table instance.
25	jonathan	474	FIELDTYPE_INT = "int"
26			FIELDTYPE_STRING = "string"
27			FIELDTYPE_DOUBLE = "double"
28	bh	6
29
30			# map the dbflib constants for the field types to our constants
31			dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32			dbflib.FTInteger: FIELDTYPE_INT,
33			dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35	jan	806
36	bh	818	class OldTableInterfaceMixin:
37	jan	806
38	bh	818	"""Mixin to implement the old table interface using the new one"""
39	jan	806
40	bh	839	def __deprecation_warning(self):
41			"""Issue a DeprecationWarning for code hat uses the old interface"""
42			callername = inspect.currentframe().f_back.f_code.co_name
43			warnings.warn("The %s method of the old table interface"
44			" is deprecated" % callername,
45			DeprecationWarning, stacklevel = 3)
46
47	bh	818	def record_count(self):
48	bh	839	self.__deprecation_warning()
49	bh	818	return self.NumRows()
50	jan	806
51			def field_count(self):
52	bh	839	self.__deprecation_warning()
53	bh	818	return self.NumColumns()
54	jan	806
55	bh	818	def field_info(self, field):
56			"""Return a tuple (type, name, width, prec) for the field no. field
57	jan	806
58	bh	818	type is the data type of the field, name the name, width the
59			field width in characters and prec the decimal precision. width
60			and prec will be zero if the information returned by the Column
61			method doesn't provide values for them.
62			"""
63	bh	839	self.__deprecation_warning()
64	bh	818	col = self.Column(field)
65			return (col.type, col.name,
66			getattr(col, "width", 0), getattr(col, "prec", 0))
67	jan	806
68	bh	818	def field_info_by_name(self, col):
69	bh	839	self.__deprecation_warning()
70	bh	818	try:
71			return self.field_info(col)
72			except KeyError:
73			# FIXME: It may be that field_info raises other exceptions
74			# when the name is not a valid column name.
75			return None
76	jan	806
77	bh	818	def field_range(self, fieldName):
78	bh	839	self.__deprecation_warning()
79	bh	818	min, max = self.ValueRange(fieldName)
80			return ((min, None), (max, None))
81	jan	806
82	bh	818	def GetUniqueValues(self, field):
83	bh	839	self.__deprecation_warning()
84	bh	818	return self.UniqueValues(field)
85	jan	806
86	bh	818	def read_record(self, r):
87	bh	839	self.__deprecation_warning()
88	bh	818	return self.ReadRowAsDict(r)
89	bh	6
90	bh	818
91
92			class DBFColumn:
93
94			"""Description of a column in a DBFTable
95
96			Instances have the following public attributes:
97
98			name -- Name of the column
99			type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100			FIELDTYPE_DOUBLE)
101			index -- The index of the column
102			width -- the width of the data in the column
103			prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104	bh	6	"""
105	bh	818
106			def __init__(self, name, type, width, prec, index):
107			self.name = name
108			self.type = type
109			self.width = width
110			self.prec = prec
111			self.index = index
112
113
114	jan	1019	class DBFTable(TitledObject, OldTableInterfaceMixin):
115	bh	818
116			"""
117	bh	765	Table interface for the data in a DBF file
118	bh	6	"""
119
120	bh	286	# Implementation strategy regarding writing to a DBF file:
121			#
122			# Most of the time Thuban only needs to read from a table and it is
123			# important that Thuban can work with read-only files. Therefore the
124			# DBF file is opened only for reading initially. Only when
125			# write_record is called we try to open the DBF file for writing as
126	bh	590	# well. If that succeeds the read/write DBF file will be used for
127			# all IO afterwards.
128	bh	286	#
129			# It's important to use the same DBF file object for both reading
130			# and writing to make sure that reading a records after writing
131			# returns the new values. With two separate objects this wouldn't
132			# work because a DBF file object buffers some data
133
134	bh	6	def __init__(self, filename):
135	bh	1599	self.filename = os.path.abspath(filename)
136	bh	1078
137			# Omit the extension in the title as it's not really needed and
138			# it can be confusing because dbflib removes extensions and
139			# appends some variations of '.dbf' before it tries to open the
140			# file. So the title could be e.g. myshapefile.shp when the real
141			# filename is myshapefile.dbf
142			title = os.path.splitext(os.path.basename(self.filename))[0]
143	jan	1019	TitledObject.__init__(self, title)
144	bh	1078
145	bh	284	self.dbf = dbflib.DBFFile(filename)
146	bh	6
147	bh	286	# If true, self.dbf is open for writing.
148			self._writable = 0
149
150	bh	818	# Create the column information objects
151			self.columns = []
152			self.column_map = {}
153			for i in range(self.NumColumns()):
154			ftype, name, width, prec = self.dbf.field_info(i)
155			ftype = dbflib_fieldtypes[ftype]
156			index = len(self.columns)
157			col = DBFColumn(name, ftype, width, prec, index)
158			self.columns.append(col)
159			self.column_map[name] = col
160			self.column_map[index] = col
161	bh	257
162	bh	818	def NumRows(self):
163			"""Return the number of rows in the table"""
164	bh	6	return self.dbf.record_count()
165
166	bh	818	def NumColumns(self):
167			"""Return the number of columns in the table"""
168	bh	6	return self.dbf.field_count()
169
170	bh	818	def Columns(self):
171			"""Return the table's colum definitions
172	bh	6
173	bh	818	The return value is a sequence of DBFColumn instances, one for
174			each column.
175	bh	6	"""
176	bh	818	return self.columns
177	bh	6
178	bh	818	def Column(self, col):
179			"""Return information about the column given by its name or index
180	jonathan	467
181	bh	818	The returned object is an instance of DBFColumn
182			"""
183			return self.column_map[col]
184	jonathan	467
185	bh	839	def HasColumn(self, col):
186			"""Return whether the table has a column with the given name or index
187			"""
188			return self.column_map.has_key(col)
189
190	bh	1662	def RowIdToOrdinal(self, gid):
191			"""Return the row ordinal given its id
192
193			Since for DBFTables the row id is the row number, return the
194			value unchanged.
195			"""
196			return gid
197
198			def RowOrdinalToId(self, num):
199			"""Return the rowid for given its ordinal
200
201			Since for DBFTables the row id is the row number, return the
202			value unchanged.
203			"""
204			return num
205
206			def ReadRowAsDict(self, row, row_is_ordinal = 0):
207			"""Return the entire row as a dictionary with column names as keys
208
209			The row_is_ordinal is ignored for DBF tables because the row id
210			is always the row number.
211			"""
212	bh	818	return self.dbf.read_record(row)
213	jonathan	467
214	bh	1662	def ReadValue(self, row, col, row_is_ordinal = 0):
215	bh	818	"""Return the value of the specified row and column
216	jonathan	628
217	bh	818	The col parameter may be the index of the column or its name.
218	bh	1662
219			The row_is_ordinal is ignored for DBF tables because the row id
220			is always the row number.
221	bh	818	"""
222	bh	1919	return self.dbf.read_attribute(row, self.column_map[col].index)
223	jonathan	628
224	bh	818	def ValueRange(self, col):
225			"""Return the minimum and maximum values of the values in the column
226	jonathan	628
227	bh	818	The return value is a tuple (min, max) unless the table is empty
228			in which case the return value is None.
229	jonathan	628	"""
230	bh	818	count = self.NumRows()
231	jonathan	628
232			if count == 0:
233			return None
234
235	bh	818	min = max = self.ReadValue(0, col)
236	jonathan	628	for i in range(1, count):
237	bh	818	value = self.ReadValue(i, col)
238			if value < min:
239			min = value
240			elif value > max:
241			max = value
242	jonathan	628
243	bh	818	return (min, max)
244	jonathan	628
245	bh	818	def UniqueValues(self, col):
246			"""Return a sorted list of all unique values in the column col"""
247			dict = {}
248	jonathan	628
249	bh	818	for i in range(self.NumRows()):
250			value = self.ReadValue(i, col)
251			dict[value] = 0
252	jonathan	628
253	bh	818	values = dict.keys()
254			values.sort()
255			return values
256	jonathan	628
257	bh	984	def Dependencies(self):
258			"""Return an empty sequence. The DBFTable doesn't depend on anything"""
259			return ()
260	jonathan	628
261	bh	818	# DBF specific interface parts.
262	jonathan	628
263	frank	1025	def Width(self, col):
264			"""Return column width"""
265			return self.column_map[col].width
266
267	bh	818	def Destroy(self):
268			self.dbf.close()
269			self.dbf = None
270	jonathan	628
271	bh	274	def write_record(self, record, values):
272			"""Write the values into the record
273
274			The values parameter may either be a dictionary or a sequence.
275
276			If it's a dictionary the keys must be the names of the fields
277			and their value must have a suitable type. Only the fields
278			actually contained in the dictionary are written. Fields for
279			which there's no item in the dict are not modified.
280
281			If it's a sequence, all fields must be present in the right
282			order.
283			"""
284	bh	286	if not self._writable:
285			new_dbf = dbflib.DBFFile(self.filename, "r+b")
286			self.dbf.close()
287			self.dbf = new_dbf
288			self._writable = 1
289			self.dbf.write_record(record, values)
290			self.dbf.commit()
291	jonathan	467
292	bh	994	def FileName(self):
293			"""Return the filename the DBFTable was instantiated with"""
294			return self.filename
295	bh	765
296
297	bh	818	class MemoryColumn:
298
299			def __init__(self, name, type, index):
300			self.name = name
301			self.type = type
302			self.index = index
303
304	jan	1019	class MemoryTable(TitledObject, OldTableInterfaceMixin):
305	bh	818
306			"""Very simple table implementation that operates on a list of tuples"""
307
308			def __init__(self, fields, data):
309			"""Initialize the MemoryTable
310
311			Parameters:
312			fields -- List of (name, field_type) pairs
313			data -- List of tuples, one for each row of data
314			"""
315			self.data = data
316	jan	1019	title = 'MemoryTable'
317			TitledObject.__init__(self, title)
318	bh	818
319			# Create the column information objects
320			self.columns = []
321			self.column_map = {}
322			for name, ftype in fields:
323			index = len(self.columns)
324			col = MemoryColumn(name, ftype, index)
325			self.columns.append(col)
326			self.column_map[name] = col
327			self.column_map[index] = col
328
329			def NumColumns(self):
330			"""Return the number of columns in the table"""
331			return len(self.columns)
332
333			def Column(self, col):
334			"""Return information about the column given by its name or index
335
336			The returned object is an instance of MemoryColumn.
337			"""
338			return self.column_map[col]
339
340			def Columns(self):
341			"""Return the table's colum definitions
342
343			The return value is a sequence of MemoryColumn instances, one
344			for each column.
345			"""
346			return self.columns
347
348	bh	839	def HasColumn(self, col):
349			"""Return whether the table has a column with the given name or index
350			"""
351			return self.column_map.has_key(col)
352
353	bh	818	def NumRows(self):
354			"""Return the number of rows in the table"""
355			return len(self.data)
356
357	bh	1662	def RowIdToOrdinal(self, gid):
358			"""Return the row ordinal given its id
359
360			Since for MemoryTables the row id is the row number, return the
361			value unchanged.
362			"""
363			return gid
364
365			def RowOrdinalToId(self, num):
366			"""Return the rowid for given its ordinal
367
368			Since for MemoryTables the row id is the row number, return the
369			value unchanged.
370			"""
371			return num
372
373			def ReadValue(self, row, col, row_is_ordinal = 0):
374	bh	818	"""Return the value of the specified row and column
375
376			The col parameter may be the index of the column or its name.
377	bh	1662
378			The row_is_ordinal is ignored for DBF tables because the row id
379			is always the row number.
380	bh	818	"""
381			return self.data[row][self.column_map[col].index]
382
383	bh	1662	def ReadRowAsDict(self, index, row_is_ordinal = 0):
384			"""Return the entire row as a dictionary with column names as keys
385
386			The row_is_ordinal is ignored for DBF tables because the row id
387			is always the row number.
388			"""
389	bh	818	return dict([(col.name, self.data[index][col.index])
390			for col in self.columns])
391
392			def ValueRange(self, col):
393			"""Return the minimum and maximum values of the values in the column
394
395			The return value is a tuple (min, max) unless the table is empty
396			in which case the return value is None.
397			"""
398
399			index = self.column_map[col].index
400			values = [row[index] for row in self.data]
401			if not values:
402			return None
403
404			return min(values), max(values)
405
406			def UniqueValues(self, col):
407	frank	1025	"""Return a sorted list of all unique values in the column col
408
409			col can be either column index or name.
410			"""
411	bh	818	dict = {}
412
413			for i in range(self.NumRows()):
414			value = self.ReadValue(i, col)
415			dict[value] = 0
416
417			values = dict.keys()
418			values.sort()
419			return values
420
421	frank	1025	def Width(self, col):
422			"""Return the maximum width of values in the column
423
424	bh	1043	The return value is the the maximum length of string
425			representation of the values in the column (represented by index
426			or name).
427			"""
428	frank	1025	max = 0
429	bh	1043
430	frank	1025	type = self.column_map[col].type
431			index = self.column_map[col].index
432			values = [row[index] for row in self.data]
433			if not values:
434			return None
435
436			if type == FIELDTYPE_DOUBLE:
437	bh	1043	format = "%.12f"
438	frank	1025	elif type == FIELDTYPE_INT:
439			format = "%d"
440			else:
441			format = "%s"
442			for value in values:
443			l = len(format % value)
444			if l > max:
445			max = l
446
447			return max
448
449	bh	984	def Dependencies(self):
450			"""Return an empty sequence. The MemoryTable doesn't depend on anything
451			"""
452			return ()
453	bh	818
454			def write_record(self, record, values):
455			# TODO: Check for correct lenght and perhaps also
456			# for correct types in case values is a tuple. How to report problems?
457			# TODO: Allow values to be a dictionary and write the single
458			# fields that are specified.
459			self.data[record] = values
460	frank	1025
461
462	bh	1371
463			def _find_dbf_column_names(names):
464			"""Determine the column names to use in a DBF file
465
466			DBF files have a length limit of 10 characters on the column names
467			so when writing an arbitrary Thuban table to a DBF file we may have
468			we may have to rename some of the columns making sure that they're
469			unique in the DBF file too.
470
471			Names that are already short enough will stay the same. Longer names
472			will be truncated to 10 characters or if that isn't unique it will
473			be truncated more and filled up with digits.
474
475			The parameter names should be a list of the column names. The return
476			value will be a dictionary mapping the names in the input list to
477			the names to use in the DBF file.
478			"""
479			# mapping from the original names in table to the names in the DBF
480			# file
481			name_map = {}
482
483			# First, we keep all names that are already short enough
484			for i in range(len(names) - 1, -1, -1):
485			if len(names[i]) <= 10:
486			name_map[names[i]] = names[i]
487			del names[i]
488
489			# dict used as a set of all names already used as DBF column names
490			used = name_map.copy()
491
492			# Go through all longer names. If the name truncated to 10
493			# characters is not used already, we use that. Otherwise we truncate
494			# it more and append numbers until we get an unused name
495			for name in names:
496			truncated = name[:10]
497			num = 0; numstr = ""
498			#print "truncated", truncated, num
499			while truncated in used and len(numstr) < 10:
500			num += 1
501			numstr = str(num)
502			truncated = name[:10 - len(numstr)] + numstr
503			#print "truncated", truncated, num
504			if len(numstr) >= 10:
505			# This case should never happen in practice as tables with
506			# 10^10 columns seem very unlikely :)
507			raise ValueError("Can't find unique dbf column name")
508
509			name_map[name] = truncated
510			used[truncated] = 1
511
512			return name_map
513
514	jonathan	1431	def table_to_dbf(table, filename, rows = None):
515			"""Create the dbf file filename from the table.
516
517			If rows is not None (the default) then it must be a list of row
518			indices to be saved to the file, otherwise all rows are saved.
519			"""
520
521	frank	1025	dbf = dbflib.create(filename)
522
523			dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
524			FIELDTYPE_INT: dbflib.FTInteger,
525			FIELDTYPE_DOUBLE: dbflib.FTDouble}
526
527	bh	1371
528			name_map = _find_dbf_column_names([col.name for col in table.Columns()])
529
530	frank	1025	# Initialise the header. Distinguish between DBFTable and others.
531			for col in table.Columns():
532			width = table.Width(col.name)
533	bh	1043	if col.type == FIELDTYPE_DOUBLE:
534			prec = getattr(col, "prec", 12)
535			else:
536			prec = 0
537	bh	1371	dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
538			width, prec)
539	frank	1025
540	jonathan	1431	if rows is None:
541			rows = range(table.NumRows())
542	jonathan	1389
543			recNum = 0
544	jonathan	1431	for i in rows:
545	bh	1371	record = {}
546			for key, value in table.ReadRowAsDict(i).items():
547			record[name_map[key]] = value
548	jonathan	1389	dbf.write_record(recNum, record)
549			recNum += 1
550	frank	1025	dbf.close()
551
552	jonathan	1431	def table_to_csv(table, filename, rows = None):
553			"""Export table to csv file.
554
555			If rows is not None (the default) then it must be a list of row
556			indices to be saved to the file, otherwise all rows are saved.
557			"""
558	frank	1025
559			file = open(filename,"w")
560			columns = table.Columns()
561			if columns:
562			header = "#%s" % columns[0].name
563			for col in columns[1:]:
564			header = header + ",%s" % col.name
565			header = header + "\n"
566			file.write(header)
567
568	jonathan	1431	if rows is None:
569			rows = range(table.NumRows())
570	jonathan	1389
571	jonathan	1431	for i in rows:
572	frank	1025	record = table.ReadRowAsDict(i)
573			if len(record):
574			line = "%s" % record[columns[0].name]
575			for col in columns[1:]:
576			line = line + ",%s" % record[col.name]
577			line = line + "\n"
578			file.write(line)
579			file.close()
580
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision