Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = os.path.abspath(filename)

        # Omit the extension in the title as it's not really needed and
        # it can be confusing because dbflib removes extensions and
        # appends some variations of '.dbf' before it tries to open the
        # file. So the title could be e.g. myshapefile.shp when the real
        # filename is myshapefile.dbf
        title = os.path.splitext(os.path.basename(self.filename))[0]
        TitledObject.__init__(self, title)

        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadRowAsDict(self, row, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_record(row)

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_attribute(row, self.column_map[col].index)

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def _find_dbf_column_names(names):
    """Determine the column names to use in a DBF file

    DBF files have a length limit of 10 characters on the column names
    so when writing an arbitrary Thuban table to a DBF file we may have
    we may have to rename some of the columns making sure that they're
    unique in the DBF file too.

    Names that are already short enough will stay the same. Longer names
    will be truncated to 10 characters or if that isn't unique it will
    be truncated more and filled up with digits.

    The parameter names should be a list of the column names. The return
    value will be a dictionary mapping the names in the input list to
    the names to use in the DBF file.
    """
    # mapping from the original names in table to the names in the DBF
    # file
    name_map = {}

    # First, we keep all names that are already short enough
    for i in range(len(names) - 1, -1, -1):
        if len(names[i]) <= 10:
            name_map[names[i]] = names[i]
            del names[i]

    # dict used as a set of all names already used as DBF column names
    used = name_map.copy()

    # Go through all longer names. If the name truncated to 10
    # characters is not used already, we use that. Otherwise we truncate
    # it more and append numbers until we get an unused name
    for name in names:
        truncated = name[:10]
        num = 0; numstr = ""
        #print "truncated", truncated, num
        while truncated in used and len(numstr) < 10:
            num += 1
            numstr = str(num)
            truncated = name[:10 - len(numstr)] + numstr
            #print "truncated", truncated, num
        if len(numstr) >= 10:
            # This case should never happen in practice as tables with
            # 10^10 columns seem very unlikely :)
            raise ValueError("Can't find unique dbf column name")

        name_map[name] = truncated
        used[truncated] = 1

    return name_map

def table_to_dbf(table, filename, rows = None):
    """Create the dbf file filename from the table.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    dbf = dbflib.create(filename)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}


    name_map = _find_dbf_column_names([col.name for col in table.Columns()])

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
                      width, prec)

    if rows is None:
        rows = range(table.NumRows())

    recNum = 0
    for i in rows:
        record = {}
        for key, value in table.ReadRowAsDict(i).items():
            record[name_map[key]] = value
        dbf.write_record(recNum, record)
        recNum += 1
    dbf.close()

def table_to_csv(table, filename, rows = None):
    """Export table to csv file.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        if rows is None:
            rows = range(table.NumRows())

        for i in rows:
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	bh	590	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	bh	6	# Authors:
3			# Bernhard Herzog <[email protected]>
4	jan	806	# Jan-Oliver Wagner <[email protected]>
5	frank	1025	# Frank Koormann <[email protected]>
6	bh	6	#
7			# This program is free software under the GPL (>=v2)
8			# Read the file COPYING coming with Thuban for details.
9
10			"""
11			Classes for handling tables of data.
12			"""
13
14			__version__ = "$Revision$"
15
16	bh	998	import os
17	bh	839	import inspect
18			import warnings
19
20	jan	1019	from base import TitledObject
21
22	bh	6	import dbflib
23
24			# the field types supported by a Table instance.
25	jonathan	474	FIELDTYPE_INT = "int"
26			FIELDTYPE_STRING = "string"
27			FIELDTYPE_DOUBLE = "double"
28	bh	6
29
30			# map the dbflib constants for the field types to our constants
31			dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32			dbflib.FTInteger: FIELDTYPE_INT,
33			dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35	jan	806
36	bh	818	class DBFColumn:
37
38			"""Description of a column in a DBFTable
39
40			Instances have the following public attributes:
41
42			name -- Name of the column
43			type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
44			FIELDTYPE_DOUBLE)
45			index -- The index of the column
46			width -- the width of the data in the column
47			prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
48	bh	6	"""
49	bh	818
50			def __init__(self, name, type, width, prec, index):
51			self.name = name
52			self.type = type
53			self.width = width
54			self.prec = prec
55			self.index = index
56
57
58	bh	1961	class DBFTable(TitledObject):
59	bh	818
60			"""
61	bh	765	Table interface for the data in a DBF file
62	bh	6	"""
63
64	bh	286	# Implementation strategy regarding writing to a DBF file:
65			#
66			# Most of the time Thuban only needs to read from a table and it is
67			# important that Thuban can work with read-only files. Therefore the
68			# DBF file is opened only for reading initially. Only when
69			# write_record is called we try to open the DBF file for writing as
70	bh	590	# well. If that succeeds the read/write DBF file will be used for
71			# all IO afterwards.
72	bh	286	#
73			# It's important to use the same DBF file object for both reading
74			# and writing to make sure that reading a records after writing
75			# returns the new values. With two separate objects this wouldn't
76			# work because a DBF file object buffers some data
77
78	bh	6	def __init__(self, filename):
79	bh	1599	self.filename = os.path.abspath(filename)
80	bh	1078
81			# Omit the extension in the title as it's not really needed and
82			# it can be confusing because dbflib removes extensions and
83			# appends some variations of '.dbf' before it tries to open the
84			# file. So the title could be e.g. myshapefile.shp when the real
85			# filename is myshapefile.dbf
86			title = os.path.splitext(os.path.basename(self.filename))[0]
87	jan	1019	TitledObject.__init__(self, title)
88	bh	1078
89	bh	284	self.dbf = dbflib.DBFFile(filename)
90	bh	6
91	bh	286	# If true, self.dbf is open for writing.
92			self._writable = 0
93
94	bh	818	# Create the column information objects
95			self.columns = []
96			self.column_map = {}
97			for i in range(self.NumColumns()):
98			ftype, name, width, prec = self.dbf.field_info(i)
99			ftype = dbflib_fieldtypes[ftype]
100			index = len(self.columns)
101			col = DBFColumn(name, ftype, width, prec, index)
102			self.columns.append(col)
103			self.column_map[name] = col
104			self.column_map[index] = col
105	bh	257
106	bh	818	def NumRows(self):
107			"""Return the number of rows in the table"""
108	bh	6	return self.dbf.record_count()
109
110	bh	818	def NumColumns(self):
111			"""Return the number of columns in the table"""
112	bh	6	return self.dbf.field_count()
113
114	bh	818	def Columns(self):
115			"""Return the table's colum definitions
116	bh	6
117	bh	818	The return value is a sequence of DBFColumn instances, one for
118			each column.
119	bh	6	"""
120	bh	818	return self.columns
121	bh	6
122	bh	818	def Column(self, col):
123			"""Return information about the column given by its name or index
124	jonathan	467
125	bh	818	The returned object is an instance of DBFColumn
126			"""
127			return self.column_map[col]
128	jonathan	467
129	bh	839	def HasColumn(self, col):
130			"""Return whether the table has a column with the given name or index
131			"""
132			return self.column_map.has_key(col)
133
134	bh	1662	def RowIdToOrdinal(self, gid):
135			"""Return the row ordinal given its id
136
137			Since for DBFTables the row id is the row number, return the
138			value unchanged.
139			"""
140			return gid
141
142			def RowOrdinalToId(self, num):
143			"""Return the rowid for given its ordinal
144
145			Since for DBFTables the row id is the row number, return the
146			value unchanged.
147			"""
148			return num
149
150			def ReadRowAsDict(self, row, row_is_ordinal = 0):
151			"""Return the entire row as a dictionary with column names as keys
152
153			The row_is_ordinal is ignored for DBF tables because the row id
154			is always the row number.
155			"""
156	bh	818	return self.dbf.read_record(row)
157	jonathan	467
158	bh	1662	def ReadValue(self, row, col, row_is_ordinal = 0):
159	bh	818	"""Return the value of the specified row and column
160	jonathan	628
161	bh	818	The col parameter may be the index of the column or its name.
162	bh	1662
163			The row_is_ordinal is ignored for DBF tables because the row id
164			is always the row number.
165	bh	818	"""
166	bh	1919	return self.dbf.read_attribute(row, self.column_map[col].index)
167	jonathan	628
168	bh	818	def ValueRange(self, col):
169			"""Return the minimum and maximum values of the values in the column
170	jonathan	628
171	bh	818	The return value is a tuple (min, max) unless the table is empty
172			in which case the return value is None.
173	jonathan	628	"""
174	bh	818	count = self.NumRows()
175	jonathan	628
176			if count == 0:
177			return None
178
179	bh	818	min = max = self.ReadValue(0, col)
180	jonathan	628	for i in range(1, count):
181	bh	818	value = self.ReadValue(i, col)
182			if value < min:
183			min = value
184			elif value > max:
185			max = value
186	jonathan	628
187	bh	818	return (min, max)
188	jonathan	628
189	bh	818	def UniqueValues(self, col):
190			"""Return a sorted list of all unique values in the column col"""
191			dict = {}
192	jonathan	628
193	bh	818	for i in range(self.NumRows()):
194			value = self.ReadValue(i, col)
195			dict[value] = 0
196	jonathan	628
197	bh	818	values = dict.keys()
198			values.sort()
199			return values
200	jonathan	628
201	bh	984	def Dependencies(self):
202			"""Return an empty sequence. The DBFTable doesn't depend on anything"""
203			return ()
204	jonathan	628
205	bh	818	# DBF specific interface parts.
206	jonathan	628
207	frank	1025	def Width(self, col):
208			"""Return column width"""
209			return self.column_map[col].width
210
211	bh	818	def Destroy(self):
212			self.dbf.close()
213			self.dbf = None
214	jonathan	628
215	bh	274	def write_record(self, record, values):
216			"""Write the values into the record
217
218			The values parameter may either be a dictionary or a sequence.
219
220			If it's a dictionary the keys must be the names of the fields
221			and their value must have a suitable type. Only the fields
222			actually contained in the dictionary are written. Fields for
223			which there's no item in the dict are not modified.
224
225			If it's a sequence, all fields must be present in the right
226			order.
227			"""
228	bh	286	if not self._writable:
229			new_dbf = dbflib.DBFFile(self.filename, "r+b")
230			self.dbf.close()
231			self.dbf = new_dbf
232			self._writable = 1
233			self.dbf.write_record(record, values)
234			self.dbf.commit()
235	jonathan	467
236	bh	994	def FileName(self):
237			"""Return the filename the DBFTable was instantiated with"""
238			return self.filename
239	bh	765
240
241	bh	818	class MemoryColumn:
242
243			def __init__(self, name, type, index):
244			self.name = name
245			self.type = type
246			self.index = index
247
248	bh	1961	class MemoryTable(TitledObject):
249	bh	818
250			"""Very simple table implementation that operates on a list of tuples"""
251
252			def __init__(self, fields, data):
253			"""Initialize the MemoryTable
254
255			Parameters:
256			fields -- List of (name, field_type) pairs
257			data -- List of tuples, one for each row of data
258			"""
259			self.data = data
260	jan	1019	title = 'MemoryTable'
261			TitledObject.__init__(self, title)
262	bh	818
263			# Create the column information objects
264			self.columns = []
265			self.column_map = {}
266			for name, ftype in fields:
267			index = len(self.columns)
268			col = MemoryColumn(name, ftype, index)
269			self.columns.append(col)
270			self.column_map[name] = col
271			self.column_map[index] = col
272
273			def NumColumns(self):
274			"""Return the number of columns in the table"""
275			return len(self.columns)
276
277			def Column(self, col):
278			"""Return information about the column given by its name or index
279
280			The returned object is an instance of MemoryColumn.
281			"""
282			return self.column_map[col]
283
284			def Columns(self):
285			"""Return the table's colum definitions
286
287			The return value is a sequence of MemoryColumn instances, one
288			for each column.
289			"""
290			return self.columns
291
292	bh	839	def HasColumn(self, col):
293			"""Return whether the table has a column with the given name or index
294			"""
295			return self.column_map.has_key(col)
296
297	bh	818	def NumRows(self):
298			"""Return the number of rows in the table"""
299			return len(self.data)
300
301	bh	1662	def RowIdToOrdinal(self, gid):
302			"""Return the row ordinal given its id
303
304			Since for MemoryTables the row id is the row number, return the
305			value unchanged.
306			"""
307			return gid
308
309			def RowOrdinalToId(self, num):
310			"""Return the rowid for given its ordinal
311
312			Since for MemoryTables the row id is the row number, return the
313			value unchanged.
314			"""
315			return num
316
317			def ReadValue(self, row, col, row_is_ordinal = 0):
318	bh	818	"""Return the value of the specified row and column
319
320			The col parameter may be the index of the column or its name.
321	bh	1662
322			The row_is_ordinal is ignored for DBF tables because the row id
323			is always the row number.
324	bh	818	"""
325			return self.data[row][self.column_map[col].index]
326
327	bh	1662	def ReadRowAsDict(self, index, row_is_ordinal = 0):
328			"""Return the entire row as a dictionary with column names as keys
329
330			The row_is_ordinal is ignored for DBF tables because the row id
331			is always the row number.
332			"""
333	bh	818	return dict([(col.name, self.data[index][col.index])
334			for col in self.columns])
335
336			def ValueRange(self, col):
337			"""Return the minimum and maximum values of the values in the column
338
339			The return value is a tuple (min, max) unless the table is empty
340			in which case the return value is None.
341			"""
342
343			index = self.column_map[col].index
344			values = [row[index] for row in self.data]
345			if not values:
346			return None
347
348			return min(values), max(values)
349
350			def UniqueValues(self, col):
351	frank	1025	"""Return a sorted list of all unique values in the column col
352
353			col can be either column index or name.
354			"""
355	bh	818	dict = {}
356
357			for i in range(self.NumRows()):
358			value = self.ReadValue(i, col)
359			dict[value] = 0
360
361			values = dict.keys()
362			values.sort()
363			return values
364
365	frank	1025	def Width(self, col):
366			"""Return the maximum width of values in the column
367
368	bh	1043	The return value is the the maximum length of string
369			representation of the values in the column (represented by index
370			or name).
371			"""
372	frank	1025	max = 0
373	bh	1043
374	frank	1025	type = self.column_map[col].type
375			index = self.column_map[col].index
376			values = [row[index] for row in self.data]
377			if not values:
378			return None
379
380			if type == FIELDTYPE_DOUBLE:
381	bh	1043	format = "%.12f"
382	frank	1025	elif type == FIELDTYPE_INT:
383			format = "%d"
384			else:
385			format = "%s"
386			for value in values:
387			l = len(format % value)
388			if l > max:
389			max = l
390
391			return max
392
393	bh	984	def Dependencies(self):
394			"""Return an empty sequence. The MemoryTable doesn't depend on anything
395			"""
396			return ()
397	bh	818
398			def write_record(self, record, values):
399			# TODO: Check for correct lenght and perhaps also
400			# for correct types in case values is a tuple. How to report problems?
401			# TODO: Allow values to be a dictionary and write the single
402			# fields that are specified.
403			self.data[record] = values
404	frank	1025
405
406	bh	1371
407			def _find_dbf_column_names(names):
408			"""Determine the column names to use in a DBF file
409
410			DBF files have a length limit of 10 characters on the column names
411			so when writing an arbitrary Thuban table to a DBF file we may have
412			we may have to rename some of the columns making sure that they're
413			unique in the DBF file too.
414
415			Names that are already short enough will stay the same. Longer names
416			will be truncated to 10 characters or if that isn't unique it will
417			be truncated more and filled up with digits.
418
419			The parameter names should be a list of the column names. The return
420			value will be a dictionary mapping the names in the input list to
421			the names to use in the DBF file.
422			"""
423			# mapping from the original names in table to the names in the DBF
424			# file
425			name_map = {}
426
427			# First, we keep all names that are already short enough
428			for i in range(len(names) - 1, -1, -1):
429			if len(names[i]) <= 10:
430			name_map[names[i]] = names[i]
431			del names[i]
432
433			# dict used as a set of all names already used as DBF column names
434			used = name_map.copy()
435
436			# Go through all longer names. If the name truncated to 10
437			# characters is not used already, we use that. Otherwise we truncate
438			# it more and append numbers until we get an unused name
439			for name in names:
440			truncated = name[:10]
441			num = 0; numstr = ""
442			#print "truncated", truncated, num
443			while truncated in used and len(numstr) < 10:
444			num += 1
445			numstr = str(num)
446			truncated = name[:10 - len(numstr)] + numstr
447			#print "truncated", truncated, num
448			if len(numstr) >= 10:
449			# This case should never happen in practice as tables with
450			# 10^10 columns seem very unlikely :)
451			raise ValueError("Can't find unique dbf column name")
452
453			name_map[name] = truncated
454			used[truncated] = 1
455
456			return name_map
457
458	jonathan	1431	def table_to_dbf(table, filename, rows = None):
459			"""Create the dbf file filename from the table.
460
461			If rows is not None (the default) then it must be a list of row
462			indices to be saved to the file, otherwise all rows are saved.
463			"""
464
465	frank	1025	dbf = dbflib.create(filename)
466
467			dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
468			FIELDTYPE_INT: dbflib.FTInteger,
469			FIELDTYPE_DOUBLE: dbflib.FTDouble}
470
471	bh	1371
472			name_map = _find_dbf_column_names([col.name for col in table.Columns()])
473
474	frank	1025	# Initialise the header. Distinguish between DBFTable and others.
475			for col in table.Columns():
476			width = table.Width(col.name)
477	bh	1043	if col.type == FIELDTYPE_DOUBLE:
478			prec = getattr(col, "prec", 12)
479			else:
480			prec = 0
481	bh	1371	dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
482			width, prec)
483	frank	1025
484	jonathan	1431	if rows is None:
485			rows = range(table.NumRows())
486	jonathan	1389
487			recNum = 0
488	jonathan	1431	for i in rows:
489	bh	1371	record = {}
490			for key, value in table.ReadRowAsDict(i).items():
491			record[name_map[key]] = value
492	jonathan	1389	dbf.write_record(recNum, record)
493			recNum += 1
494	frank	1025	dbf.close()
495
496	jonathan	1431	def table_to_csv(table, filename, rows = None):
497			"""Export table to csv file.
498
499			If rows is not None (the default) then it must be a list of row
500			indices to be saved to the file, otherwise all rows are saved.
501			"""
502	frank	1025
503			file = open(filename,"w")
504			columns = table.Columns()
505			if columns:
506			header = "#%s" % columns[0].name
507			for col in columns[1:]:
508			header = header + ",%s" % col.name
509			header = header + "\n"
510			file.write(header)
511
512	jonathan	1431	if rows is None:
513			rows = range(table.NumRows())
514	jonathan	1389
515	jonathan	1431	for i in rows:
516	frank	1025	record = table.ReadRowAsDict(i)
517			if len(record):
518			line = "%s" % record[columns[0].name]
519			for col in columns[1:]:
520			line = line + ",%s" % record[col.name]
521			line = line + "\n"
522			file.write(line)
523			file.close()
524
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision