Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class OldTableInterfaceMixin:

    """Mixin to implement the old table interface using the new one"""

    def __deprecation_warning(self):
        """Issue a DeprecationWarning for code hat uses the old interface"""
        callername = inspect.currentframe().f_back.f_code.co_name
        warnings.warn("The %s method of the old table interface"
                      " is deprecated" % callername,
                      DeprecationWarning, stacklevel = 3)

    def record_count(self):
        self.__deprecation_warning()
        return self.NumRows()

    def field_count(self):
        self.__deprecation_warning()
        return self.NumColumns()

    def field_info(self, field):
        """Return a tuple (type, name, width, prec) for the field no. field

        type is the data type of the field, name the name, width the
        field width in characters and prec the decimal precision. width
        and prec will be zero if the information returned by the Column
        method doesn't provide values for them.
        """
        self.__deprecation_warning()
        col = self.Column(field)
        return (col.type, col.name,
               getattr(col, "width", 0), getattr(col, "prec", 0))

    def field_info_by_name(self, col):
        self.__deprecation_warning()
        try:
            return self.field_info(col)
        except KeyError:
            # FIXME: It may be that field_info raises other exceptions
            # when the name is not a valid column name.
            return None

    def field_range(self, fieldName):
        self.__deprecation_warning()
        min, max = self.ValueRange(fieldName)
        return ((min, None), (max, None))

    def GetUniqueValues(self, field):
        self.__deprecation_warning()
        return self.UniqueValues(field)

    def read_record(self, r):
        self.__deprecation_warning()
        return self.ReadRowAsDict(r)


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject, OldTableInterfaceMixin):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = os.path.abspath(filename)

        # Omit the extension in the title as it's not really needed and
        # it can be confusing because dbflib removes extensions and
        # appends some variations of '.dbf' before it tries to open the
        # file. So the title could be e.g. myshapefile.shp when the real
        # filename is myshapefile.dbf
        title = os.path.splitext(os.path.basename(self.filename))[0]
        TitledObject.__init__(self, title)

        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadRowAsDict(self, row, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_record(row)

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_attribute(row, self.column_map[col].index)

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject, OldTableInterfaceMixin):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def _find_dbf_column_names(names):
    """Determine the column names to use in a DBF file

    DBF files have a length limit of 10 characters on the column names
    so when writing an arbitrary Thuban table to a DBF file we may have
    we may have to rename some of the columns making sure that they're
    unique in the DBF file too.

    Names that are already short enough will stay the same. Longer names
    will be truncated to 10 characters or if that isn't unique it will
    be truncated more and filled up with digits.

    The parameter names should be a list of the column names. The return
    value will be a dictionary mapping the names in the input list to
    the names to use in the DBF file.
    """
    # mapping from the original names in table to the names in the DBF
    # file
    name_map = {}

    # First, we keep all names that are already short enough
    for i in range(len(names) - 1, -1, -1):
        if len(names[i]) <= 10:
            name_map[names[i]] = names[i]
            del names[i]

    # dict used as a set of all names already used as DBF column names
    used = name_map.copy()

    # Go through all longer names. If the name truncated to 10
    # characters is not used already, we use that. Otherwise we truncate
    # it more and append numbers until we get an unused name
    for name in names:
        truncated = name[:10]
        num = 0; numstr = ""
        #print "truncated", truncated, num
        while truncated in used and len(numstr) < 10:
            num += 1
            numstr = str(num)
            truncated = name[:10 - len(numstr)] + numstr
            #print "truncated", truncated, num
        if len(numstr) >= 10:
            # This case should never happen in practice as tables with
            # 10^10 columns seem very unlikely :)
            raise ValueError("Can't find unique dbf column name")

        name_map[name] = truncated
        used[truncated] = 1

    return name_map

def table_to_dbf(table, filename, rows = None):
    """Create the dbf file filename from the table.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    dbf = dbflib.create(filename)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}


    name_map = _find_dbf_column_names([col.name for col in table.Columns()])

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
                      width, prec)

    if rows is None:
        rows = range(table.NumRows())

    recNum = 0
    for i in rows:
        record = {}
        for key, value in table.ReadRowAsDict(i).items():
            record[name_map[key]] = value
        dbf.write_record(recNum, record)
        recNum += 1
    dbf.close()

def table_to_csv(table, filename, rows = None):
    """Export table to csv file.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        if rows is None:
            rows = range(table.NumRows())

        for i in rows:
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	# Jan-Oliver Wagner <[email protected]>
5	# Frank Koormann <[email protected]>
6	#
7	# This program is free software under the GPL (>=v2)
8	# Read the file COPYING coming with Thuban for details.
9
10	"""
11	Classes for handling tables of data.
12	"""
13
14	__version__ = "$Revision$"
15
16	import os
17	import inspect
18	import warnings
19
20	from base import TitledObject
21
22	import dbflib
23
24	# the field types supported by a Table instance.
25	FIELDTYPE_INT = "int"
26	FIELDTYPE_STRING = "string"
27	FIELDTYPE_DOUBLE = "double"
28
29
30	# map the dbflib constants for the field types to our constants
31	dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32	dbflib.FTInteger: FIELDTYPE_INT,
33	dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36	class OldTableInterfaceMixin:
37
38	"""Mixin to implement the old table interface using the new one"""
39
40	def __deprecation_warning(self):
41	"""Issue a DeprecationWarning for code hat uses the old interface"""
42	callername = inspect.currentframe().f_back.f_code.co_name
43	warnings.warn("The %s method of the old table interface"
44	" is deprecated" % callername,
45	DeprecationWarning, stacklevel = 3)
46
47	def record_count(self):
48	self.__deprecation_warning()
49	return self.NumRows()
50
51	def field_count(self):
52	self.__deprecation_warning()
53	return self.NumColumns()
54
55	def field_info(self, field):
56	"""Return a tuple (type, name, width, prec) for the field no. field
57
58	type is the data type of the field, name the name, width the
59	field width in characters and prec the decimal precision. width
60	and prec will be zero if the information returned by the Column
61	method doesn't provide values for them.
62	"""
63	self.__deprecation_warning()
64	col = self.Column(field)
65	return (col.type, col.name,
66	getattr(col, "width", 0), getattr(col, "prec", 0))
67
68	def field_info_by_name(self, col):
69	self.__deprecation_warning()
70	try:
71	return self.field_info(col)
72	except KeyError:
73	# FIXME: It may be that field_info raises other exceptions
74	# when the name is not a valid column name.
75	return None
76
77	def field_range(self, fieldName):
78	self.__deprecation_warning()
79	min, max = self.ValueRange(fieldName)
80	return ((min, None), (max, None))
81
82	def GetUniqueValues(self, field):
83	self.__deprecation_warning()
84	return self.UniqueValues(field)
85
86	def read_record(self, r):
87	self.__deprecation_warning()
88	return self.ReadRowAsDict(r)
89
90
91
92	class DBFColumn:
93
94	"""Description of a column in a DBFTable
95
96	Instances have the following public attributes:
97
98	name -- Name of the column
99	type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100	FIELDTYPE_DOUBLE)
101	index -- The index of the column
102	width -- the width of the data in the column
103	prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104	"""
105
106	def __init__(self, name, type, width, prec, index):
107	self.name = name
108	self.type = type
109	self.width = width
110	self.prec = prec
111	self.index = index
112
113
114	class DBFTable(TitledObject, OldTableInterfaceMixin):
115
116	"""
117	Table interface for the data in a DBF file
118	"""
119
120	# Implementation strategy regarding writing to a DBF file:
121	#
122	# Most of the time Thuban only needs to read from a table and it is
123	# important that Thuban can work with read-only files. Therefore the
124	# DBF file is opened only for reading initially. Only when
125	# write_record is called we try to open the DBF file for writing as
126	# well. If that succeeds the read/write DBF file will be used for
127	# all IO afterwards.
128	#
129	# It's important to use the same DBF file object for both reading
130	# and writing to make sure that reading a records after writing
131	# returns the new values. With two separate objects this wouldn't
132	# work because a DBF file object buffers some data
133
134	def __init__(self, filename):
135	self.filename = os.path.abspath(filename)
136
137	# Omit the extension in the title as it's not really needed and
138	# it can be confusing because dbflib removes extensions and
139	# appends some variations of '.dbf' before it tries to open the
140	# file. So the title could be e.g. myshapefile.shp when the real
141	# filename is myshapefile.dbf
142	title = os.path.splitext(os.path.basename(self.filename))[0]
143	TitledObject.__init__(self, title)
144
145	self.dbf = dbflib.DBFFile(filename)
146
147	# If true, self.dbf is open for writing.
148	self._writable = 0
149
150	# Create the column information objects
151	self.columns = []
152	self.column_map = {}
153	for i in range(self.NumColumns()):
154	ftype, name, width, prec = self.dbf.field_info(i)
155	ftype = dbflib_fieldtypes[ftype]
156	index = len(self.columns)
157	col = DBFColumn(name, ftype, width, prec, index)
158	self.columns.append(col)
159	self.column_map[name] = col
160	self.column_map[index] = col
161
162	def NumRows(self):
163	"""Return the number of rows in the table"""
164	return self.dbf.record_count()
165
166	def NumColumns(self):
167	"""Return the number of columns in the table"""
168	return self.dbf.field_count()
169
170	def Columns(self):
171	"""Return the table's colum definitions
172
173	The return value is a sequence of DBFColumn instances, one for
174	each column.
175	"""
176	return self.columns
177
178	def Column(self, col):
179	"""Return information about the column given by its name or index
180
181	The returned object is an instance of DBFColumn
182	"""
183	return self.column_map[col]
184
185	def HasColumn(self, col):
186	"""Return whether the table has a column with the given name or index
187	"""
188	return self.column_map.has_key(col)
189
190	def RowIdToOrdinal(self, gid):
191	"""Return the row ordinal given its id
192
193	Since for DBFTables the row id is the row number, return the
194	value unchanged.
195	"""
196	return gid
197
198	def RowOrdinalToId(self, num):
199	"""Return the rowid for given its ordinal
200
201	Since for DBFTables the row id is the row number, return the
202	value unchanged.
203	"""
204	return num
205
206	def ReadRowAsDict(self, row, row_is_ordinal = 0):
207	"""Return the entire row as a dictionary with column names as keys
208
209	The row_is_ordinal is ignored for DBF tables because the row id
210	is always the row number.
211	"""
212	return self.dbf.read_record(row)
213
214	def ReadValue(self, row, col, row_is_ordinal = 0):
215	"""Return the value of the specified row and column
216
217	The col parameter may be the index of the column or its name.
218
219	The row_is_ordinal is ignored for DBF tables because the row id
220	is always the row number.
221	"""
222	return self.dbf.read_attribute(row, self.column_map[col].index)
223
224	def ValueRange(self, col):
225	"""Return the minimum and maximum values of the values in the column
226
227	The return value is a tuple (min, max) unless the table is empty
228	in which case the return value is None.
229	"""
230	count = self.NumRows()
231
232	if count == 0:
233	return None
234
235	min = max = self.ReadValue(0, col)
236	for i in range(1, count):
237	value = self.ReadValue(i, col)
238	if value < min:
239	min = value
240	elif value > max:
241	max = value
242
243	return (min, max)
244
245	def UniqueValues(self, col):
246	"""Return a sorted list of all unique values in the column col"""
247	dict = {}
248
249	for i in range(self.NumRows()):
250	value = self.ReadValue(i, col)
251	dict[value] = 0
252
253	values = dict.keys()
254	values.sort()
255	return values
256
257	def Dependencies(self):
258	"""Return an empty sequence. The DBFTable doesn't depend on anything"""
259	return ()
260
261	# DBF specific interface parts.
262
263	def Width(self, col):
264	"""Return column width"""
265	return self.column_map[col].width
266
267	def Destroy(self):
268	self.dbf.close()
269	self.dbf = None
270
271	def write_record(self, record, values):
272	"""Write the values into the record
273
274	The values parameter may either be a dictionary or a sequence.
275
276	If it's a dictionary the keys must be the names of the fields
277	and their value must have a suitable type. Only the fields
278	actually contained in the dictionary are written. Fields for
279	which there's no item in the dict are not modified.
280
281	If it's a sequence, all fields must be present in the right
282	order.
283	"""
284	if not self._writable:
285	new_dbf = dbflib.DBFFile(self.filename, "r+b")
286	self.dbf.close()
287	self.dbf = new_dbf
288	self._writable = 1
289	self.dbf.write_record(record, values)
290	self.dbf.commit()
291
292	def FileName(self):
293	"""Return the filename the DBFTable was instantiated with"""
294	return self.filename
295
296
297	class MemoryColumn:
298
299	def __init__(self, name, type, index):
300	self.name = name
301	self.type = type
302	self.index = index
303
304	class MemoryTable(TitledObject, OldTableInterfaceMixin):
305
306	"""Very simple table implementation that operates on a list of tuples"""
307
308	def __init__(self, fields, data):
309	"""Initialize the MemoryTable
310
311	Parameters:
312	fields -- List of (name, field_type) pairs
313	data -- List of tuples, one for each row of data
314	"""
315	self.data = data
316	title = 'MemoryTable'
317	TitledObject.__init__(self, title)
318
319	# Create the column information objects
320	self.columns = []
321	self.column_map = {}
322	for name, ftype in fields:
323	index = len(self.columns)
324	col = MemoryColumn(name, ftype, index)
325	self.columns.append(col)
326	self.column_map[name] = col
327	self.column_map[index] = col
328
329	def NumColumns(self):
330	"""Return the number of columns in the table"""
331	return len(self.columns)
332
333	def Column(self, col):
334	"""Return information about the column given by its name or index
335
336	The returned object is an instance of MemoryColumn.
337	"""
338	return self.column_map[col]
339
340	def Columns(self):
341	"""Return the table's colum definitions
342
343	The return value is a sequence of MemoryColumn instances, one
344	for each column.
345	"""
346	return self.columns
347
348	def HasColumn(self, col):
349	"""Return whether the table has a column with the given name or index
350	"""
351	return self.column_map.has_key(col)
352
353	def NumRows(self):
354	"""Return the number of rows in the table"""
355	return len(self.data)
356
357	def RowIdToOrdinal(self, gid):
358	"""Return the row ordinal given its id
359
360	Since for MemoryTables the row id is the row number, return the
361	value unchanged.
362	"""
363	return gid
364
365	def RowOrdinalToId(self, num):
366	"""Return the rowid for given its ordinal
367
368	Since for MemoryTables the row id is the row number, return the
369	value unchanged.
370	"""
371	return num
372
373	def ReadValue(self, row, col, row_is_ordinal = 0):
374	"""Return the value of the specified row and column
375
376	The col parameter may be the index of the column or its name.
377
378	The row_is_ordinal is ignored for DBF tables because the row id
379	is always the row number.
380	"""
381	return self.data[row][self.column_map[col].index]
382
383	def ReadRowAsDict(self, index, row_is_ordinal = 0):
384	"""Return the entire row as a dictionary with column names as keys
385
386	The row_is_ordinal is ignored for DBF tables because the row id
387	is always the row number.
388	"""
389	return dict([(col.name, self.data[index][col.index])
390	for col in self.columns])
391
392	def ValueRange(self, col):
393	"""Return the minimum and maximum values of the values in the column
394
395	The return value is a tuple (min, max) unless the table is empty
396	in which case the return value is None.
397	"""
398
399	index = self.column_map[col].index
400	values = [row[index] for row in self.data]
401	if not values:
402	return None
403
404	return min(values), max(values)
405
406	def UniqueValues(self, col):
407	"""Return a sorted list of all unique values in the column col
408
409	col can be either column index or name.
410	"""
411	dict = {}
412
413	for i in range(self.NumRows()):
414	value = self.ReadValue(i, col)
415	dict[value] = 0
416
417	values = dict.keys()
418	values.sort()
419	return values
420
421	def Width(self, col):
422	"""Return the maximum width of values in the column
423
424	The return value is the the maximum length of string
425	representation of the values in the column (represented by index
426	or name).
427	"""
428	max = 0
429
430	type = self.column_map[col].type
431	index = self.column_map[col].index
432	values = [row[index] for row in self.data]
433	if not values:
434	return None
435
436	if type == FIELDTYPE_DOUBLE:
437	format = "%.12f"
438	elif type == FIELDTYPE_INT:
439	format = "%d"
440	else:
441	format = "%s"
442	for value in values:
443	l = len(format % value)
444	if l > max:
445	max = l
446
447	return max
448
449	def Dependencies(self):
450	"""Return an empty sequence. The MemoryTable doesn't depend on anything
451	"""
452	return ()
453
454	def write_record(self, record, values):
455	# TODO: Check for correct lenght and perhaps also
456	# for correct types in case values is a tuple. How to report problems?
457	# TODO: Allow values to be a dictionary and write the single
458	# fields that are specified.
459	self.data[record] = values
460
461
462
463	def _find_dbf_column_names(names):
464	"""Determine the column names to use in a DBF file
465
466	DBF files have a length limit of 10 characters on the column names
467	so when writing an arbitrary Thuban table to a DBF file we may have
468	we may have to rename some of the columns making sure that they're
469	unique in the DBF file too.
470
471	Names that are already short enough will stay the same. Longer names
472	will be truncated to 10 characters or if that isn't unique it will
473	be truncated more and filled up with digits.
474
475	The parameter names should be a list of the column names. The return
476	value will be a dictionary mapping the names in the input list to
477	the names to use in the DBF file.
478	"""
479	# mapping from the original names in table to the names in the DBF
480	# file
481	name_map = {}
482
483	# First, we keep all names that are already short enough
484	for i in range(len(names) - 1, -1, -1):
485	if len(names[i]) <= 10:
486	name_map[names[i]] = names[i]
487	del names[i]
488
489	# dict used as a set of all names already used as DBF column names
490	used = name_map.copy()
491
492	# Go through all longer names. If the name truncated to 10
493	# characters is not used already, we use that. Otherwise we truncate
494	# it more and append numbers until we get an unused name
495	for name in names:
496	truncated = name[:10]
497	num = 0; numstr = ""
498	#print "truncated", truncated, num
499	while truncated in used and len(numstr) < 10:
500	num += 1
501	numstr = str(num)
502	truncated = name[:10 - len(numstr)] + numstr
503	#print "truncated", truncated, num
504	if len(numstr) >= 10:
505	# This case should never happen in practice as tables with
506	# 10^10 columns seem very unlikely :)
507	raise ValueError("Can't find unique dbf column name")
508
509	name_map[name] = truncated
510	used[truncated] = 1
511
512	return name_map
513
514	def table_to_dbf(table, filename, rows = None):
515	"""Create the dbf file filename from the table.
516
517	If rows is not None (the default) then it must be a list of row
518	indices to be saved to the file, otherwise all rows are saved.
519	"""
520
521	dbf = dbflib.create(filename)
522
523	dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
524	FIELDTYPE_INT: dbflib.FTInteger,
525	FIELDTYPE_DOUBLE: dbflib.FTDouble}
526
527
528	name_map = _find_dbf_column_names([col.name for col in table.Columns()])
529
530	# Initialise the header. Distinguish between DBFTable and others.
531	for col in table.Columns():
532	width = table.Width(col.name)
533	if col.type == FIELDTYPE_DOUBLE:
534	prec = getattr(col, "prec", 12)
535	else:
536	prec = 0
537	dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
538	width, prec)
539
540	if rows is None:
541	rows = range(table.NumRows())
542
543	recNum = 0
544	for i in rows:
545	record = {}
546	for key, value in table.ReadRowAsDict(i).items():
547	record[name_map[key]] = value
548	dbf.write_record(recNum, record)
549	recNum += 1
550	dbf.close()
551
552	def table_to_csv(table, filename, rows = None):
553	"""Export table to csv file.
554
555	If rows is not None (the default) then it must be a list of row
556	indices to be saved to the file, otherwise all rows are saved.
557	"""
558
559	file = open(filename,"w")
560	columns = table.Columns()
561	if columns:
562	header = "#%s" % columns[0].name
563	for col in columns[1:]:
564	header = header + ",%s" % col.name
565	header = header + "\n"
566	file.write(header)
567
568	if rows is None:
569	rows = range(table.NumRows())
570
571	for i in rows:
572	record = table.ReadRowAsDict(i)
573	if len(record):
574	line = "%s" % record[columns[0].name]
575	for col in columns[1:]:
576	line = line + ",%s" % record[col.name]
577	line = line + "\n"
578	file.write(line)
579	file.close()
580
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision