Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class OldTableInterfaceMixin:

    """Mixin to implement the old table interface using the new one"""

    def __deprecation_warning(self):
        """Issue a DeprecationWarning for code hat uses the old interface"""
        callername = inspect.currentframe().f_back.f_code.co_name
        warnings.warn("The %s method of the old table interface"
                      " is deprecated" % callername,
                      DeprecationWarning, stacklevel = 3)

    def record_count(self):
        self.__deprecation_warning()
        return self.NumRows()

    def field_count(self):
        self.__deprecation_warning()
        return self.NumColumns()

    def field_info(self, field):
        """Return a tuple (type, name, width, prec) for the field no. field

        type is the data type of the field, name the name, width the
        field width in characters and prec the decimal precision. width
        and prec will be zero if the information returned by the Column
        method doesn't provide values for them.
        """
        self.__deprecation_warning()
        col = self.Column(field)
        return (col.type, col.name,
               getattr(col, "width", 0), getattr(col, "prec", 0))

    def field_info_by_name(self, col):
        self.__deprecation_warning()
        try:
            return self.field_info(col)
        except KeyError:
            # FIXME: It may be that field_info raises other exceptions
            # when the name is not a valid column name.
            return None

    def field_range(self, fieldName):
        self.__deprecation_warning()
        min, max = self.ValueRange(fieldName)
        return ((min, None), (max, None))

    def GetUniqueValues(self, field):
        self.__deprecation_warning()
        return self.UniqueValues(field)

    def read_record(self, r):
        self.__deprecation_warning()
        return self.ReadRowAsDict(r)


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject, OldTableInterfaceMixin):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = filename

        # Omit the extension in the title as it's not really needed and
        # it can be confusing because dbflib removes extensions and
        # appends some variations of '.dbf' before it tries to open the
        # file. So the title could be e.g. myshapefile.shp when the real
        # filename is myshapefile.dbf
        title = os.path.splitext(os.path.basename(self.filename))[0]
        TitledObject.__init__(self, title)

        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def ReadRowAsDict(self, row):
        """Return the entire row as a dictionary with column names as keys"""
        return self.dbf.read_record(row)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.dbf.read_record(row)[self.column_map[col].name]

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject, OldTableInterfaceMixin):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def ReadValue(self, row, col):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index):
        """Return the entire row as a dictionary with column names as keys"""
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def _find_dbf_column_names(names):
    """Determine the column names to use in a DBF file

    DBF files have a length limit of 10 characters on the column names
    so when writing an arbitrary Thuban table to a DBF file we may have
    we may have to rename some of the columns making sure that they're
    unique in the DBF file too.

    Names that are already short enough will stay the same. Longer names
    will be truncated to 10 characters or if that isn't unique it will
    be truncated more and filled up with digits.

    The parameter names should be a list of the column names. The return
    value will be a dictionary mapping the names in the input list to
    the names to use in the DBF file.
    """
    # mapping from the original names in table to the names in the DBF
    # file
    name_map = {}

    # First, we keep all names that are already short enough
    for i in range(len(names) - 1, -1, -1):
        if len(names[i]) <= 10:
            name_map[names[i]] = names[i]
            del names[i]

    # dict used as a set of all names already used as DBF column names
    used = name_map.copy()

    # Go through all longer names. If the name truncated to 10
    # characters is not used already, we use that. Otherwise we truncate
    # it more and append numbers until we get an unused name
    for name in names:
        truncated = name[:10]
        num = 0; numstr = ""
        #print "truncated", truncated, num
        while truncated in used and len(numstr) < 10:
            num += 1
            numstr = str(num)
            truncated = name[:10 - len(numstr)] + numstr
            #print "truncated", truncated, num
        if len(numstr) >= 10:
            # This case should never happen in practice as tables with
            # 10^10 columns seem very unlikely :)
            raise ValueError("Can't find unique dbf column name")

        name_map[name] = truncated
        used[truncated] = 1

    return name_map

def table_to_dbf(table, filename):
    """Create the dbf file filename from the table"""
    dbf = dbflib.create(filename)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}


    name_map = _find_dbf_column_names([col.name for col in table.Columns()])

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
                      width, prec)

    for i in range(table.NumRows()):
        record = {}
        for key, value in table.ReadRowAsDict(i).items():
            record[name_map[key]] = value
        dbf.write_record(i, record)
    dbf.close()

def table_to_csv(table, filename):
    """Export table to csv file."""

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        for i in range(table.NumRows()):
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	# Jan-Oliver Wagner <[email protected]>
5	# Frank Koormann <[email protected]>
6	#
7	# This program is free software under the GPL (>=v2)
8	# Read the file COPYING coming with Thuban for details.
9
10	"""
11	Classes for handling tables of data.
12	"""
13
14	__version__ = "$Revision$"
15
16	import os
17	import inspect
18	import warnings
19
20	from base import TitledObject
21
22	import dbflib
23
24	# the field types supported by a Table instance.
25	FIELDTYPE_INT = "int"
26	FIELDTYPE_STRING = "string"
27	FIELDTYPE_DOUBLE = "double"
28
29
30	# map the dbflib constants for the field types to our constants
31	dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32	dbflib.FTInteger: FIELDTYPE_INT,
33	dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36	class OldTableInterfaceMixin:
37
38	"""Mixin to implement the old table interface using the new one"""
39
40	def __deprecation_warning(self):
41	"""Issue a DeprecationWarning for code hat uses the old interface"""
42	callername = inspect.currentframe().f_back.f_code.co_name
43	warnings.warn("The %s method of the old table interface"
44	" is deprecated" % callername,
45	DeprecationWarning, stacklevel = 3)
46
47	def record_count(self):
48	self.__deprecation_warning()
49	return self.NumRows()
50
51	def field_count(self):
52	self.__deprecation_warning()
53	return self.NumColumns()
54
55	def field_info(self, field):
56	"""Return a tuple (type, name, width, prec) for the field no. field
57
58	type is the data type of the field, name the name, width the
59	field width in characters and prec the decimal precision. width
60	and prec will be zero if the information returned by the Column
61	method doesn't provide values for them.
62	"""
63	self.__deprecation_warning()
64	col = self.Column(field)
65	return (col.type, col.name,
66	getattr(col, "width", 0), getattr(col, "prec", 0))
67
68	def field_info_by_name(self, col):
69	self.__deprecation_warning()
70	try:
71	return self.field_info(col)
72	except KeyError:
73	# FIXME: It may be that field_info raises other exceptions
74	# when the name is not a valid column name.
75	return None
76
77	def field_range(self, fieldName):
78	self.__deprecation_warning()
79	min, max = self.ValueRange(fieldName)
80	return ((min, None), (max, None))
81
82	def GetUniqueValues(self, field):
83	self.__deprecation_warning()
84	return self.UniqueValues(field)
85
86	def read_record(self, r):
87	self.__deprecation_warning()
88	return self.ReadRowAsDict(r)
89
90
91
92	class DBFColumn:
93
94	"""Description of a column in a DBFTable
95
96	Instances have the following public attributes:
97
98	name -- Name of the column
99	type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100	FIELDTYPE_DOUBLE)
101	index -- The index of the column
102	width -- the width of the data in the column
103	prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104	"""
105
106	def __init__(self, name, type, width, prec, index):
107	self.name = name
108	self.type = type
109	self.width = width
110	self.prec = prec
111	self.index = index
112
113
114	class DBFTable(TitledObject, OldTableInterfaceMixin):
115
116	"""
117	Table interface for the data in a DBF file
118	"""
119
120	# Implementation strategy regarding writing to a DBF file:
121	#
122	# Most of the time Thuban only needs to read from a table and it is
123	# important that Thuban can work with read-only files. Therefore the
124	# DBF file is opened only for reading initially. Only when
125	# write_record is called we try to open the DBF file for writing as
126	# well. If that succeeds the read/write DBF file will be used for
127	# all IO afterwards.
128	#
129	# It's important to use the same DBF file object for both reading
130	# and writing to make sure that reading a records after writing
131	# returns the new values. With two separate objects this wouldn't
132	# work because a DBF file object buffers some data
133
134	def __init__(self, filename):
135	self.filename = filename
136
137	# Omit the extension in the title as it's not really needed and
138	# it can be confusing because dbflib removes extensions and
139	# appends some variations of '.dbf' before it tries to open the
140	# file. So the title could be e.g. myshapefile.shp when the real
141	# filename is myshapefile.dbf
142	title = os.path.splitext(os.path.basename(self.filename))[0]
143	TitledObject.__init__(self, title)
144
145	self.dbf = dbflib.DBFFile(filename)
146
147	# If true, self.dbf is open for writing.
148	self._writable = 0
149
150	# Create the column information objects
151	self.columns = []
152	self.column_map = {}
153	for i in range(self.NumColumns()):
154	ftype, name, width, prec = self.dbf.field_info(i)
155	ftype = dbflib_fieldtypes[ftype]
156	index = len(self.columns)
157	col = DBFColumn(name, ftype, width, prec, index)
158	self.columns.append(col)
159	self.column_map[name] = col
160	self.column_map[index] = col
161
162	def NumRows(self):
163	"""Return the number of rows in the table"""
164	return self.dbf.record_count()
165
166	def NumColumns(self):
167	"""Return the number of columns in the table"""
168	return self.dbf.field_count()
169
170	def Columns(self):
171	"""Return the table's colum definitions
172
173	The return value is a sequence of DBFColumn instances, one for
174	each column.
175	"""
176	return self.columns
177
178	def Column(self, col):
179	"""Return information about the column given by its name or index
180
181	The returned object is an instance of DBFColumn
182	"""
183	return self.column_map[col]
184
185	def HasColumn(self, col):
186	"""Return whether the table has a column with the given name or index
187	"""
188	return self.column_map.has_key(col)
189
190	def ReadRowAsDict(self, row):
191	"""Return the entire row as a dictionary with column names as keys"""
192	return self.dbf.read_record(row)
193
194	def ReadValue(self, row, col):
195	"""Return the value of the specified row and column
196
197	The col parameter may be the index of the column or its name.
198	"""
199	return self.dbf.read_record(row)[self.column_map[col].name]
200
201	def ValueRange(self, col):
202	"""Return the minimum and maximum values of the values in the column
203
204	The return value is a tuple (min, max) unless the table is empty
205	in which case the return value is None.
206	"""
207	count = self.NumRows()
208
209	if count == 0:
210	return None
211
212	min = max = self.ReadValue(0, col)
213	for i in range(1, count):
214	value = self.ReadValue(i, col)
215	if value < min:
216	min = value
217	elif value > max:
218	max = value
219
220	return (min, max)
221
222	def UniqueValues(self, col):
223	"""Return a sorted list of all unique values in the column col"""
224	dict = {}
225
226	for i in range(self.NumRows()):
227	value = self.ReadValue(i, col)
228	dict[value] = 0
229
230	values = dict.keys()
231	values.sort()
232	return values
233
234	def Dependencies(self):
235	"""Return an empty sequence. The DBFTable doesn't depend on anything"""
236	return ()
237
238	# DBF specific interface parts.
239
240	def Width(self, col):
241	"""Return column width"""
242	return self.column_map[col].width
243
244	def Destroy(self):
245	self.dbf.close()
246	self.dbf = None
247
248	def write_record(self, record, values):
249	"""Write the values into the record
250
251	The values parameter may either be a dictionary or a sequence.
252
253	If it's a dictionary the keys must be the names of the fields
254	and their value must have a suitable type. Only the fields
255	actually contained in the dictionary are written. Fields for
256	which there's no item in the dict are not modified.
257
258	If it's a sequence, all fields must be present in the right
259	order.
260	"""
261	if not self._writable:
262	new_dbf = dbflib.DBFFile(self.filename, "r+b")
263	self.dbf.close()
264	self.dbf = new_dbf
265	self._writable = 1
266	self.dbf.write_record(record, values)
267	self.dbf.commit()
268
269	def FileName(self):
270	"""Return the filename the DBFTable was instantiated with"""
271	return self.filename
272
273
274	class MemoryColumn:
275
276	def __init__(self, name, type, index):
277	self.name = name
278	self.type = type
279	self.index = index
280
281	class MemoryTable(TitledObject, OldTableInterfaceMixin):
282
283	"""Very simple table implementation that operates on a list of tuples"""
284
285	def __init__(self, fields, data):
286	"""Initialize the MemoryTable
287
288	Parameters:
289	fields -- List of (name, field_type) pairs
290	data -- List of tuples, one for each row of data
291	"""
292	self.data = data
293	title = 'MemoryTable'
294	TitledObject.__init__(self, title)
295
296	# Create the column information objects
297	self.columns = []
298	self.column_map = {}
299	for name, ftype in fields:
300	index = len(self.columns)
301	col = MemoryColumn(name, ftype, index)
302	self.columns.append(col)
303	self.column_map[name] = col
304	self.column_map[index] = col
305
306	def NumColumns(self):
307	"""Return the number of columns in the table"""
308	return len(self.columns)
309
310	def Column(self, col):
311	"""Return information about the column given by its name or index
312
313	The returned object is an instance of MemoryColumn.
314	"""
315	return self.column_map[col]
316
317	def Columns(self):
318	"""Return the table's colum definitions
319
320	The return value is a sequence of MemoryColumn instances, one
321	for each column.
322	"""
323	return self.columns
324
325	def HasColumn(self, col):
326	"""Return whether the table has a column with the given name or index
327	"""
328	return self.column_map.has_key(col)
329
330	def NumRows(self):
331	"""Return the number of rows in the table"""
332	return len(self.data)
333
334	def ReadValue(self, row, col):
335	"""Return the value of the specified row and column
336
337	The col parameter may be the index of the column or its name.
338	"""
339	return self.data[row][self.column_map[col].index]
340
341	def ReadRowAsDict(self, index):
342	"""Return the entire row as a dictionary with column names as keys"""
343	return dict([(col.name, self.data[index][col.index])
344	for col in self.columns])
345
346	def ValueRange(self, col):
347	"""Return the minimum and maximum values of the values in the column
348
349	The return value is a tuple (min, max) unless the table is empty
350	in which case the return value is None.
351	"""
352
353	index = self.column_map[col].index
354	values = [row[index] for row in self.data]
355	if not values:
356	return None
357
358	return min(values), max(values)
359
360	def UniqueValues(self, col):
361	"""Return a sorted list of all unique values in the column col
362
363	col can be either column index or name.
364	"""
365	dict = {}
366
367	for i in range(self.NumRows()):
368	value = self.ReadValue(i, col)
369	dict[value] = 0
370
371	values = dict.keys()
372	values.sort()
373	return values
374
375	def Width(self, col):
376	"""Return the maximum width of values in the column
377
378	The return value is the the maximum length of string
379	representation of the values in the column (represented by index
380	or name).
381	"""
382	max = 0
383
384	type = self.column_map[col].type
385	index = self.column_map[col].index
386	values = [row[index] for row in self.data]
387	if not values:
388	return None
389
390	if type == FIELDTYPE_DOUBLE:
391	format = "%.12f"
392	elif type == FIELDTYPE_INT:
393	format = "%d"
394	else:
395	format = "%s"
396	for value in values:
397	l = len(format % value)
398	if l > max:
399	max = l
400
401	return max
402
403	def Dependencies(self):
404	"""Return an empty sequence. The MemoryTable doesn't depend on anything
405	"""
406	return ()
407
408	def write_record(self, record, values):
409	# TODO: Check for correct lenght and perhaps also
410	# for correct types in case values is a tuple. How to report problems?
411	# TODO: Allow values to be a dictionary and write the single
412	# fields that are specified.
413	self.data[record] = values
414
415
416
417	def _find_dbf_column_names(names):
418	"""Determine the column names to use in a DBF file
419
420	DBF files have a length limit of 10 characters on the column names
421	so when writing an arbitrary Thuban table to a DBF file we may have
422	we may have to rename some of the columns making sure that they're
423	unique in the DBF file too.
424
425	Names that are already short enough will stay the same. Longer names
426	will be truncated to 10 characters or if that isn't unique it will
427	be truncated more and filled up with digits.
428
429	The parameter names should be a list of the column names. The return
430	value will be a dictionary mapping the names in the input list to
431	the names to use in the DBF file.
432	"""
433	# mapping from the original names in table to the names in the DBF
434	# file
435	name_map = {}
436
437	# First, we keep all names that are already short enough
438	for i in range(len(names) - 1, -1, -1):
439	if len(names[i]) <= 10:
440	name_map[names[i]] = names[i]
441	del names[i]
442
443	# dict used as a set of all names already used as DBF column names
444	used = name_map.copy()
445
446	# Go through all longer names. If the name truncated to 10
447	# characters is not used already, we use that. Otherwise we truncate
448	# it more and append numbers until we get an unused name
449	for name in names:
450	truncated = name[:10]
451	num = 0; numstr = ""
452	#print "truncated", truncated, num
453	while truncated in used and len(numstr) < 10:
454	num += 1
455	numstr = str(num)
456	truncated = name[:10 - len(numstr)] + numstr
457	#print "truncated", truncated, num
458	if len(numstr) >= 10:
459	# This case should never happen in practice as tables with
460	# 10^10 columns seem very unlikely :)
461	raise ValueError("Can't find unique dbf column name")
462
463	name_map[name] = truncated
464	used[truncated] = 1
465
466	return name_map
467
468	def table_to_dbf(table, filename):
469	"""Create the dbf file filename from the table"""
470	dbf = dbflib.create(filename)
471
472	dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
473	FIELDTYPE_INT: dbflib.FTInteger,
474	FIELDTYPE_DOUBLE: dbflib.FTDouble}
475
476
477	name_map = _find_dbf_column_names([col.name for col in table.Columns()])
478
479	# Initialise the header. Distinguish between DBFTable and others.
480	for col in table.Columns():
481	width = table.Width(col.name)
482	if col.type == FIELDTYPE_DOUBLE:
483	prec = getattr(col, "prec", 12)
484	else:
485	prec = 0
486	dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
487	width, prec)
488
489	for i in range(table.NumRows()):
490	record = {}
491	for key, value in table.ReadRowAsDict(i).items():
492	record[name_map[key]] = value
493	dbf.write_record(i, record)
494	dbf.close()
495
496	def table_to_csv(table, filename):
497	"""Export table to csv file."""
498
499	file = open(filename,"w")
500	columns = table.Columns()
501	if columns:
502	header = "#%s" % columns[0].name
503	for col in columns[1:]:
504	header = header + ",%s" % col.name
505	header = header + "\n"
506	file.write(header)
507
508	for i in range(table.NumRows()):
509	record = table.ReadRowAsDict(i)
510	if len(record):
511	line = "%s" % record[columns[0].name]
512	for col in columns[1:]:
513	line = line + ",%s" % record[col.name]
514	line = line + "\n"
515	file.write(line)
516	file.close()
517
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision