Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = os.path.abspath(filename)

        # Omit the extension in the title as it's not really needed and
        # it can be confusing because dbflib removes extensions and
        # appends some variations of '.dbf' before it tries to open the
        # file. So the title could be e.g. myshapefile.shp when the real
        # filename is myshapefile.dbf
        title = os.path.splitext(os.path.basename(self.filename))[0]
        TitledObject.__init__(self, title)

        self.dbf = dbflib.DBFFile(filename)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)
            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadRowAsDict(self, row, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_record(row)

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_attribute(row, self.column_map[col].index)

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.DBFFile(self.filename, "r+b")
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def _find_dbf_column_names(names):
    """Determine the column names to use in a DBF file

    DBF files have a length limit of 10 characters on the column names
    so when writing an arbitrary Thuban table to a DBF file we may have
    we may have to rename some of the columns making sure that they're
    unique in the DBF file too.

    Names that are already short enough will stay the same. Longer names
    will be truncated to 10 characters or if that isn't unique it will
    be truncated more and filled up with digits.

    The parameter names should be a list of the column names. The return
    value will be a dictionary mapping the names in the input list to
    the names to use in the DBF file.
    """
    # mapping from the original names in table to the names in the DBF
    # file
    name_map = {}

    # First, we keep all names that are already short enough
    for i in range(len(names) - 1, -1, -1):
        if len(names[i]) <= 10:
            name_map[names[i]] = names[i]
            del names[i]

    # dict used as a set of all names already used as DBF column names
    used = name_map.copy()

    # Go through all longer names. If the name truncated to 10
    # characters is not used already, we use that. Otherwise we truncate
    # it more and append numbers until we get an unused name
    for name in names:
        truncated = name[:10]
        num = 0; numstr = ""
        #print "truncated", truncated, num
        while truncated in used and len(numstr) < 10:
            num += 1
            numstr = str(num)
            truncated = name[:10 - len(numstr)] + numstr
            #print "truncated", truncated, num
        if len(numstr) >= 10:
            # This case should never happen in practice as tables with
            # 10^10 columns seem very unlikely :)
            raise ValueError("Can't find unique dbf column name")

        name_map[name] = truncated
        used[truncated] = 1

    return name_map

def table_to_dbf(table, filename, rows = None):
    """Create the dbf file filename from the table.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    dbf = dbflib.create(filename)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}


    name_map = _find_dbf_column_names([col.name for col in table.Columns()])

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
                      width, prec)

    if rows is None:
        rows = range(table.NumRows())

    recNum = 0
    for i in rows:
        record = {}
        for key, value in table.ReadRowAsDict(i).items():
            record[name_map[key]] = value
        dbf.write_record(recNum, record)
        recNum += 1
    dbf.close()

def table_to_csv(table, filename, rows = None):
    """Export table to csv file.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        if rows is None:
            rows = range(table.NumRows())

        for i in rows:
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	# Jan-Oliver Wagner <[email protected]>
5	# Frank Koormann <[email protected]>
6	#
7	# This program is free software under the GPL (>=v2)
8	# Read the file COPYING coming with Thuban for details.
9
10	"""
11	Classes for handling tables of data.
12	"""
13
14	__version__ = "$Revision$"
15
16	import os
17	import inspect
18	import warnings
19
20	from base import TitledObject
21
22	import dbflib
23
24	# the field types supported by a Table instance.
25	FIELDTYPE_INT = "int"
26	FIELDTYPE_STRING = "string"
27	FIELDTYPE_DOUBLE = "double"
28
29
30	# map the dbflib constants for the field types to our constants
31	dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32	dbflib.FTInteger: FIELDTYPE_INT,
33	dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36	class DBFColumn:
37
38	"""Description of a column in a DBFTable
39
40	Instances have the following public attributes:
41
42	name -- Name of the column
43	type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
44	FIELDTYPE_DOUBLE)
45	index -- The index of the column
46	width -- the width of the data in the column
47	prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
48	"""
49
50	def __init__(self, name, type, width, prec, index):
51	self.name = name
52	self.type = type
53	self.width = width
54	self.prec = prec
55	self.index = index
56
57
58	class DBFTable(TitledObject):
59
60	"""
61	Table interface for the data in a DBF file
62	"""
63
64	# Implementation strategy regarding writing to a DBF file:
65	#
66	# Most of the time Thuban only needs to read from a table and it is
67	# important that Thuban can work with read-only files. Therefore the
68	# DBF file is opened only for reading initially. Only when
69	# write_record is called we try to open the DBF file for writing as
70	# well. If that succeeds the read/write DBF file will be used for
71	# all IO afterwards.
72	#
73	# It's important to use the same DBF file object for both reading
74	# and writing to make sure that reading a records after writing
75	# returns the new values. With two separate objects this wouldn't
76	# work because a DBF file object buffers some data
77
78	def __init__(self, filename):
79	self.filename = os.path.abspath(filename)
80
81	# Omit the extension in the title as it's not really needed and
82	# it can be confusing because dbflib removes extensions and
83	# appends some variations of '.dbf' before it tries to open the
84	# file. So the title could be e.g. myshapefile.shp when the real
85	# filename is myshapefile.dbf
86	title = os.path.splitext(os.path.basename(self.filename))[0]
87	TitledObject.__init__(self, title)
88
89	self.dbf = dbflib.DBFFile(filename)
90
91	# If true, self.dbf is open for writing.
92	self._writable = 0
93
94	# Create the column information objects
95	self.columns = []
96	self.column_map = {}
97	for i in range(self.NumColumns()):
98	ftype, name, width, prec = self.dbf.field_info(i)
99	ftype = dbflib_fieldtypes[ftype]
100	index = len(self.columns)
101	col = DBFColumn(name, ftype, width, prec, index)
102	self.columns.append(col)
103	self.column_map[name] = col
104	self.column_map[index] = col
105
106	def NumRows(self):
107	"""Return the number of rows in the table"""
108	return self.dbf.record_count()
109
110	def NumColumns(self):
111	"""Return the number of columns in the table"""
112	return self.dbf.field_count()
113
114	def Columns(self):
115	"""Return the table's colum definitions
116
117	The return value is a sequence of DBFColumn instances, one for
118	each column.
119	"""
120	return self.columns
121
122	def Column(self, col):
123	"""Return information about the column given by its name or index
124
125	The returned object is an instance of DBFColumn
126	"""
127	return self.column_map[col]
128
129	def HasColumn(self, col):
130	"""Return whether the table has a column with the given name or index
131	"""
132	return self.column_map.has_key(col)
133
134	def RowIdToOrdinal(self, gid):
135	"""Return the row ordinal given its id
136
137	Since for DBFTables the row id is the row number, return the
138	value unchanged.
139	"""
140	return gid
141
142	def RowOrdinalToId(self, num):
143	"""Return the rowid for given its ordinal
144
145	Since for DBFTables the row id is the row number, return the
146	value unchanged.
147	"""
148	return num
149
150	def ReadRowAsDict(self, row, row_is_ordinal = 0):
151	"""Return the entire row as a dictionary with column names as keys
152
153	The row_is_ordinal is ignored for DBF tables because the row id
154	is always the row number.
155	"""
156	return self.dbf.read_record(row)
157
158	def ReadValue(self, row, col, row_is_ordinal = 0):
159	"""Return the value of the specified row and column
160
161	The col parameter may be the index of the column or its name.
162
163	The row_is_ordinal is ignored for DBF tables because the row id
164	is always the row number.
165	"""
166	return self.dbf.read_attribute(row, self.column_map[col].index)
167
168	def ValueRange(self, col):
169	"""Return the minimum and maximum values of the values in the column
170
171	The return value is a tuple (min, max) unless the table is empty
172	in which case the return value is None.
173	"""
174	count = self.NumRows()
175
176	if count == 0:
177	return None
178
179	min = max = self.ReadValue(0, col)
180	for i in range(1, count):
181	value = self.ReadValue(i, col)
182	if value < min:
183	min = value
184	elif value > max:
185	max = value
186
187	return (min, max)
188
189	def UniqueValues(self, col):
190	"""Return a sorted list of all unique values in the column col"""
191	dict = {}
192
193	for i in range(self.NumRows()):
194	value = self.ReadValue(i, col)
195	dict[value] = 0
196
197	values = dict.keys()
198	values.sort()
199	return values
200
201	def Dependencies(self):
202	"""Return an empty sequence. The DBFTable doesn't depend on anything"""
203	return ()
204
205	# DBF specific interface parts.
206
207	def Width(self, col):
208	"""Return column width"""
209	return self.column_map[col].width
210
211	def Destroy(self):
212	self.dbf.close()
213	self.dbf = None
214
215	def write_record(self, record, values):
216	"""Write the values into the record
217
218	The values parameter may either be a dictionary or a sequence.
219
220	If it's a dictionary the keys must be the names of the fields
221	and their value must have a suitable type. Only the fields
222	actually contained in the dictionary are written. Fields for
223	which there's no item in the dict are not modified.
224
225	If it's a sequence, all fields must be present in the right
226	order.
227	"""
228	if not self._writable:
229	new_dbf = dbflib.DBFFile(self.filename, "r+b")
230	self.dbf.close()
231	self.dbf = new_dbf
232	self._writable = 1
233	self.dbf.write_record(record, values)
234	self.dbf.commit()
235
236	def FileName(self):
237	"""Return the filename the DBFTable was instantiated with"""
238	return self.filename
239
240
241	class MemoryColumn:
242
243	def __init__(self, name, type, index):
244	self.name = name
245	self.type = type
246	self.index = index
247
248	class MemoryTable(TitledObject):
249
250	"""Very simple table implementation that operates on a list of tuples"""
251
252	def __init__(self, fields, data):
253	"""Initialize the MemoryTable
254
255	Parameters:
256	fields -- List of (name, field_type) pairs
257	data -- List of tuples, one for each row of data
258	"""
259	self.data = data
260	title = 'MemoryTable'
261	TitledObject.__init__(self, title)
262
263	# Create the column information objects
264	self.columns = []
265	self.column_map = {}
266	for name, ftype in fields:
267	index = len(self.columns)
268	col = MemoryColumn(name, ftype, index)
269	self.columns.append(col)
270	self.column_map[name] = col
271	self.column_map[index] = col
272
273	def NumColumns(self):
274	"""Return the number of columns in the table"""
275	return len(self.columns)
276
277	def Column(self, col):
278	"""Return information about the column given by its name or index
279
280	The returned object is an instance of MemoryColumn.
281	"""
282	return self.column_map[col]
283
284	def Columns(self):
285	"""Return the table's colum definitions
286
287	The return value is a sequence of MemoryColumn instances, one
288	for each column.
289	"""
290	return self.columns
291
292	def HasColumn(self, col):
293	"""Return whether the table has a column with the given name or index
294	"""
295	return self.column_map.has_key(col)
296
297	def NumRows(self):
298	"""Return the number of rows in the table"""
299	return len(self.data)
300
301	def RowIdToOrdinal(self, gid):
302	"""Return the row ordinal given its id
303
304	Since for MemoryTables the row id is the row number, return the
305	value unchanged.
306	"""
307	return gid
308
309	def RowOrdinalToId(self, num):
310	"""Return the rowid for given its ordinal
311
312	Since for MemoryTables the row id is the row number, return the
313	value unchanged.
314	"""
315	return num
316
317	def ReadValue(self, row, col, row_is_ordinal = 0):
318	"""Return the value of the specified row and column
319
320	The col parameter may be the index of the column or its name.
321
322	The row_is_ordinal is ignored for DBF tables because the row id
323	is always the row number.
324	"""
325	return self.data[row][self.column_map[col].index]
326
327	def ReadRowAsDict(self, index, row_is_ordinal = 0):
328	"""Return the entire row as a dictionary with column names as keys
329
330	The row_is_ordinal is ignored for DBF tables because the row id
331	is always the row number.
332	"""
333	return dict([(col.name, self.data[index][col.index])
334	for col in self.columns])
335
336	def ValueRange(self, col):
337	"""Return the minimum and maximum values of the values in the column
338
339	The return value is a tuple (min, max) unless the table is empty
340	in which case the return value is None.
341	"""
342
343	index = self.column_map[col].index
344	values = [row[index] for row in self.data]
345	if not values:
346	return None
347
348	return min(values), max(values)
349
350	def UniqueValues(self, col):
351	"""Return a sorted list of all unique values in the column col
352
353	col can be either column index or name.
354	"""
355	dict = {}
356
357	for i in range(self.NumRows()):
358	value = self.ReadValue(i, col)
359	dict[value] = 0
360
361	values = dict.keys()
362	values.sort()
363	return values
364
365	def Width(self, col):
366	"""Return the maximum width of values in the column
367
368	The return value is the the maximum length of string
369	representation of the values in the column (represented by index
370	or name).
371	"""
372	max = 0
373
374	type = self.column_map[col].type
375	index = self.column_map[col].index
376	values = [row[index] for row in self.data]
377	if not values:
378	return None
379
380	if type == FIELDTYPE_DOUBLE:
381	format = "%.12f"
382	elif type == FIELDTYPE_INT:
383	format = "%d"
384	else:
385	format = "%s"
386	for value in values:
387	l = len(format % value)
388	if l > max:
389	max = l
390
391	return max
392
393	def Dependencies(self):
394	"""Return an empty sequence. The MemoryTable doesn't depend on anything
395	"""
396	return ()
397
398	def write_record(self, record, values):
399	# TODO: Check for correct lenght and perhaps also
400	# for correct types in case values is a tuple. How to report problems?
401	# TODO: Allow values to be a dictionary and write the single
402	# fields that are specified.
403	self.data[record] = values
404
405
406
407	def _find_dbf_column_names(names):
408	"""Determine the column names to use in a DBF file
409
410	DBF files have a length limit of 10 characters on the column names
411	so when writing an arbitrary Thuban table to a DBF file we may have
412	we may have to rename some of the columns making sure that they're
413	unique in the DBF file too.
414
415	Names that are already short enough will stay the same. Longer names
416	will be truncated to 10 characters or if that isn't unique it will
417	be truncated more and filled up with digits.
418
419	The parameter names should be a list of the column names. The return
420	value will be a dictionary mapping the names in the input list to
421	the names to use in the DBF file.
422	"""
423	# mapping from the original names in table to the names in the DBF
424	# file
425	name_map = {}
426
427	# First, we keep all names that are already short enough
428	for i in range(len(names) - 1, -1, -1):
429	if len(names[i]) <= 10:
430	name_map[names[i]] = names[i]
431	del names[i]
432
433	# dict used as a set of all names already used as DBF column names
434	used = name_map.copy()
435
436	# Go through all longer names. If the name truncated to 10
437	# characters is not used already, we use that. Otherwise we truncate
438	# it more and append numbers until we get an unused name
439	for name in names:
440	truncated = name[:10]
441	num = 0; numstr = ""
442	#print "truncated", truncated, num
443	while truncated in used and len(numstr) < 10:
444	num += 1
445	numstr = str(num)
446	truncated = name[:10 - len(numstr)] + numstr
447	#print "truncated", truncated, num
448	if len(numstr) >= 10:
449	# This case should never happen in practice as tables with
450	# 10^10 columns seem very unlikely :)
451	raise ValueError("Can't find unique dbf column name")
452
453	name_map[name] = truncated
454	used[truncated] = 1
455
456	return name_map
457
458	def table_to_dbf(table, filename, rows = None):
459	"""Create the dbf file filename from the table.
460
461	If rows is not None (the default) then it must be a list of row
462	indices to be saved to the file, otherwise all rows are saved.
463	"""
464
465	dbf = dbflib.create(filename)
466
467	dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
468	FIELDTYPE_INT: dbflib.FTInteger,
469	FIELDTYPE_DOUBLE: dbflib.FTDouble}
470
471
472	name_map = _find_dbf_column_names([col.name for col in table.Columns()])
473
474	# Initialise the header. Distinguish between DBFTable and others.
475	for col in table.Columns():
476	width = table.Width(col.name)
477	if col.type == FIELDTYPE_DOUBLE:
478	prec = getattr(col, "prec", 12)
479	else:
480	prec = 0
481	dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
482	width, prec)
483
484	if rows is None:
485	rows = range(table.NumRows())
486
487	recNum = 0
488	for i in rows:
489	record = {}
490	for key, value in table.ReadRowAsDict(i).items():
491	record[name_map[key]] = value
492	dbf.write_record(recNum, record)
493	recNum += 1
494	dbf.close()
495
496	def table_to_csv(table, filename, rows = None):
497	"""Export table to csv file.
498
499	If rows is not None (the default) then it must be a list of row
500	indices to be saved to the file, otherwise all rows are saved.
501	"""
502
503	file = open(filename,"w")
504	columns = table.Columns()
505	if columns:
506	header = "#%s" % columns[0].name
507	for col in columns[1:]:
508	header = header + ",%s" % col.name
509	header = header + "\n"
510	file.write(header)
511
512	if rows is None:
513	rows = range(table.NumRows())
514
515	for i in rows:
516	record = table.ReadRowAsDict(i)
517	if len(record):
518	line = "%s" % record[columns[0].name]
519	for col in columns[1:]:
520	line = line + ",%s" % record[col.name]
521	line = line + "\n"
522	file.write(line)
523	file.close()
524
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision