Thuban/Model/table.py

# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
# Authors:
# Bernhard Herzog <[email protected]>
# Jan-Oliver Wagner <[email protected]>
# Frank Koormann <[email protected]>
#
# This program is free software under the GPL (>=v2)
# Read the file COPYING coming with Thuban for details.

"""
Classes for handling tables of data.
"""

__version__ = "$Revision$"

import os
import inspect
import warnings

from base import TitledObject

from Thuban import internal_from_unicode, unicode_from_internal

import dbflib

# the field types supported by a Table instance.
FIELDTYPE_INT = "int"
FIELDTYPE_STRING = "string"
FIELDTYPE_DOUBLE = "double"


# map the dbflib constants for the field types to our constants
dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
                     dbflib.FTInteger: FIELDTYPE_INT,
                     dbflib.FTDouble: FIELDTYPE_DOUBLE}


class DBFColumn:

    """Description of a column in a DBFTable

    Instances have the following public attributes:

    name -- Name of the column
    type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
            FIELDTYPE_DOUBLE)
    index -- The index of the column
    width -- the width of the data in the column
    prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
    """

    def __init__(self, name, type, width, prec, index):
        self.name = name
        self.type = type
        self.width = width
        self.prec = prec
        self.index = index


class DBFTable(TitledObject):

    """
    Table interface for the data in a DBF file
    """

    # Implementation strategy regarding writing to a DBF file:
    #
    # Most of the time Thuban only needs to read from a table and it is
    # important that Thuban can work with read-only files. Therefore the
    # DBF file is opened only for reading initially. Only when
    # write_record is called we try to open the DBF file for writing as
    # well. If that succeeds the read/write DBF file will be used for
    # all IO afterwards.
    #
    # It's important to use the same DBF file object for both reading
    # and writing to make sure that reading a records after writing
    # returns the new values. With two separate objects this wouldn't
    # work because a DBF file object buffers some data

    def __init__(self, filename):
        self.filename = os.path.abspath(filename)

        # Omit the extension in the title as it's not really needed and
        # it can be confusing because dbflib removes extensions and
        # appends some variations of '.dbf' before it tries to open the
        # file. So the title could be e.g. myshapefile.shp when the real
        # filename is myshapefile.dbf
        title = os.path.splitext(os.path.basename(self.filename))[0]
        TitledObject.__init__(self, title)

        self.dbf = dbflib.open(filename, return_unicode = True)

        # If true, self.dbf is open for writing.
        self._writable = 0

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for i in range(self.NumColumns()):
            ftype, name, width, prec = self.dbf.field_info(i)
            name = internal_from_unicode(name)
            ftype = dbflib_fieldtypes[ftype]
            index = len(self.columns)

            col = DBFColumn(name, ftype, width, prec, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumRows(self):
        """Return the number of rows in the table"""
        return self.dbf.record_count()

    def NumColumns(self):
        """Return the number of columns in the table"""
        return self.dbf.field_count()

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of DBFColumn instances, one for
        each column.
        """
        return self.columns

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of DBFColumn
        """
        return self.column_map[col]

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for DBFTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadRowAsDict(self, row, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_record(row)

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.dbf.read_attribute(row, self.column_map[col].index)

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """
        count = self.NumRows()

        if count == 0:
            return None

        min = max = self.ReadValue(0, col)
        for i in range(1, count):
            value = self.ReadValue(i, col)
            if value < min:
                min = value
            elif value > max:
                max = value

        return (min, max)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col"""
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Dependencies(self):
        """Return an empty sequence. The DBFTable doesn't depend on anything"""
        return ()

    # DBF specific interface parts.

    def Width(self, col):
        """Return column width"""
        return self.column_map[col].width

    def Destroy(self):
        self.dbf.close()
        self.dbf = None

    def write_record(self, record, values):
        """Write the values into the record

        The values parameter may either be a dictionary or a sequence.

        If it's a dictionary the keys must be the names of the fields
        and their value must have a suitable type. Only the fields
        actually contained in the dictionary are written. Fields for
        which there's no item in the dict are not modified.

        If it's a sequence, all fields must be present in the right
        order.
        """
        if not self._writable:
            new_dbf = dbflib.open(self.filename, "r+b", return_unicode = True)
            self.dbf.close()
            self.dbf = new_dbf
            self._writable = 1
        self.dbf.write_record(record, values)
        self.dbf.commit()

    def FileName(self):
        """Return the filename the DBFTable was instantiated with"""
        return self.filename


class MemoryColumn:

    def __init__(self, name, type, index):
        self.name = name
        self.type = type
        self.index = index

class MemoryTable(TitledObject):

    """Very simple table implementation that operates on a list of tuples"""

    def __init__(self, fields, data):
        """Initialize the MemoryTable

        Parameters:
        fields -- List of (name, field_type) pairs
        data -- List of tuples, one for each row of data
        """
        self.data = data
        title = 'MemoryTable'
        TitledObject.__init__(self, title)

        # Create the column information objects
        self.columns = []
        self.column_map = {}
        for name, ftype in fields:
            index = len(self.columns)
            col = MemoryColumn(name, ftype, index)
            self.columns.append(col)
            self.column_map[name] = col
            self.column_map[index] = col

    def NumColumns(self):
        """Return the number of columns in the table"""
        return len(self.columns)

    def Column(self, col):
        """Return information about the column given by its name or index

        The returned object is an instance of MemoryColumn.
        """
        return self.column_map[col]

    def Columns(self):
        """Return the table's colum definitions

        The return value is a sequence of MemoryColumn instances, one
        for each column.
        """
        return self.columns

    def HasColumn(self, col):
        """Return whether the table has a column with the given name or index
        """
        return self.column_map.has_key(col)

    def NumRows(self):
        """Return the number of rows in the table"""
        return len(self.data)

    def RowIdToOrdinal(self, gid):
        """Return the row ordinal given its id

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return gid

    def RowOrdinalToId(self, num):
        """Return the rowid for given its ordinal

        Since for MemoryTables the row id is the row number, return the
        value unchanged.
        """
        return num

    def ReadValue(self, row, col, row_is_ordinal = 0):
        """Return the value of the specified row and column

        The col parameter may be the index of the column or its name.

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return self.data[row][self.column_map[col].index]

    def ReadRowAsDict(self, index, row_is_ordinal = 0):
        """Return the entire row as a dictionary with column names as keys

        The row_is_ordinal is ignored for DBF tables because the row id
        is always the row number.
        """
        return dict([(col.name, self.data[index][col.index])
                      for col in self.columns])

    def ValueRange(self, col):
        """Return the minimum and maximum values of the values in the column

        The return value is a tuple (min, max) unless the table is empty
        in which case the return value is None.
        """

        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        return min(values), max(values)

    def UniqueValues(self, col):
        """Return a sorted list of all unique values in the column col

        col can be either column index or name.
        """
        dict = {}

        for i in range(self.NumRows()):
            value = self.ReadValue(i, col)
            dict[value] = 0

        values = dict.keys()
        values.sort()
        return values

    def Width(self, col):
        """Return the maximum width of values in the column

        The return value is the the maximum length of string
        representation of the values in the column (represented by index
        or name).
        """
        max = 0

        type  = self.column_map[col].type
        index = self.column_map[col].index
        values = [row[index] for row in self.data]
        if not values:
            return None

        if type == FIELDTYPE_DOUBLE:
            format = "%.12f"
        elif type == FIELDTYPE_INT:
            format = "%d"
        else:
            format = "%s"
        for value in values:
            l = len(format % value)
            if l > max:
                max = l

        return max

    def Dependencies(self):
        """Return an empty sequence. The MemoryTable doesn't depend on anything
        """
        return ()

    def write_record(self, record, values):
        # TODO: Check for correct lenght and perhaps also
        # for correct types in case values is a tuple. How to report problems?
        # TODO: Allow values to be a dictionary and write the single
        # fields that are specified.
        self.data[record] = values


def _find_dbf_column_names(names):
    """Determine the column names to use in a DBF file

    DBF files have a length limit of 10 characters on the column names
    so when writing an arbitrary Thuban table to a DBF file we may have
    we may have to rename some of the columns making sure that they're
    unique in the DBF file too.

    Names that are already short enough will stay the same. Longer names
    will be truncated to 10 characters or if that isn't unique it will
    be truncated more and filled up with digits.

    The parameter names should be a list of the column names. The return
    value will be a dictionary mapping the names in the input list to
    the names to use in the DBF file.
    """
    # mapping from the original names in table to the names in the DBF
    # file
    name_map = {}

    # First, we keep all names that are already short enough
    for i in range(len(names) - 1, -1, -1):
        if len(names[i]) <= 10:
            name_map[names[i]] = names[i]
            del names[i]

    # dict used as a set of all names already used as DBF column names
    used = name_map.copy()

    # Go through all longer names. If the name truncated to 10
    # characters is not used already, we use that. Otherwise we truncate
    # it more and append numbers until we get an unused name
    for name in names:
        truncated = name[:10]
        num = 0; numstr = ""
        #print "truncated", truncated, num
        while truncated in used and len(numstr) < 10:
            num += 1
            numstr = str(num)
            truncated = name[:10 - len(numstr)] + numstr
            #print "truncated", truncated, num
        if len(numstr) >= 10:
            # This case should never happen in practice as tables with
            # 10^10 columns seem very unlikely :)
            raise ValueError("Can't find unique dbf column name")

        name_map[name] = truncated
        used[truncated] = 1

    return name_map

def table_to_dbf(table, filename, rows = None):
    """Create the dbf file filename from the table.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    dbf = dbflib.create(filename, code_page = dbflib.LDID_ESRI_ANSI, return_unicode = True)

    dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
                         FIELDTYPE_INT: dbflib.FTInteger,
                         FIELDTYPE_DOUBLE: dbflib.FTDouble}


    name_map = _find_dbf_column_names([col.name for col in table.Columns()])

    # Initialise the header. Distinguish between DBFTable and others.
    for col in table.Columns():
        width = table.Width(col.name)
        if col.type == FIELDTYPE_DOUBLE:
            prec = getattr(col, "prec", 12)
        else:
            prec = 0
        dbf.add_field(unicode_from_internal(name_map[col.name]), 
                      dbflib_fieldtypes[col.type],
                      width, prec)

    if rows is None:
        rows = range(table.NumRows())

    recNum = 0
    for i in rows:
        record = {}
        for key, value in table.ReadRowAsDict(i).items():
            record[name_map[key]] = value
        dbf.write_record(recNum, record)
        recNum += 1
    dbf.close()

def table_to_csv(table, filename, rows = None):
    """Export table to csv file.
    
    If rows is not None (the default) then it must be a list of row
    indices to be saved to the file, otherwise all rows are saved.
    """

    file = open(filename,"w")
    columns = table.Columns()
    if columns:
        header = "#%s" % columns[0].name
        for col in columns[1:]:
            header = header + ",%s" % col.name
        header = header + "\n"
        file.write(header)

        if rows is None:
            rows = range(table.NumRows())

        for i in rows:
            record = table.ReadRowAsDict(i)
            if len(record):
                line = "%s" % record[columns[0].name]
                for col in columns[1:]:
                    line = line + ",%s" % record[col.name]
            line = line + "\n"
            file.write(line)
    file.close()

1	# Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2	# Authors:
3	# Bernhard Herzog <[email protected]>
4	# Jan-Oliver Wagner <[email protected]>
5	# Frank Koormann <[email protected]>
6	#
7	# This program is free software under the GPL (>=v2)
8	# Read the file COPYING coming with Thuban for details.
9
10	"""
11	Classes for handling tables of data.
12	"""
13
14	__version__ = "$Revision$"
15
16	import os
17	import inspect
18	import warnings
19
20	from base import TitledObject
21
22	from Thuban import internal_from_unicode, unicode_from_internal
23
24	import dbflib
25
26	# the field types supported by a Table instance.
27	FIELDTYPE_INT = "int"
28	FIELDTYPE_STRING = "string"
29	FIELDTYPE_DOUBLE = "double"
30
31
32	# map the dbflib constants for the field types to our constants
33	dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
34	dbflib.FTInteger: FIELDTYPE_INT,
35	dbflib.FTDouble: FIELDTYPE_DOUBLE}
36
37
38	class DBFColumn:
39
40	"""Description of a column in a DBFTable
41
42	Instances have the following public attributes:
43
44	name -- Name of the column
45	type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
46	FIELDTYPE_DOUBLE)
47	index -- The index of the column
48	width -- the width of the data in the column
49	prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
50	"""
51
52	def __init__(self, name, type, width, prec, index):
53	self.name = name
54	self.type = type
55	self.width = width
56	self.prec = prec
57	self.index = index
58
59
60	class DBFTable(TitledObject):
61
62	"""
63	Table interface for the data in a DBF file
64	"""
65
66	# Implementation strategy regarding writing to a DBF file:
67	#
68	# Most of the time Thuban only needs to read from a table and it is
69	# important that Thuban can work with read-only files. Therefore the
70	# DBF file is opened only for reading initially. Only when
71	# write_record is called we try to open the DBF file for writing as
72	# well. If that succeeds the read/write DBF file will be used for
73	# all IO afterwards.
74	#
75	# It's important to use the same DBF file object for both reading
76	# and writing to make sure that reading a records after writing
77	# returns the new values. With two separate objects this wouldn't
78	# work because a DBF file object buffers some data
79
80	def __init__(self, filename):
81	self.filename = os.path.abspath(filename)
82
83	# Omit the extension in the title as it's not really needed and
84	# it can be confusing because dbflib removes extensions and
85	# appends some variations of '.dbf' before it tries to open the
86	# file. So the title could be e.g. myshapefile.shp when the real
87	# filename is myshapefile.dbf
88	title = os.path.splitext(os.path.basename(self.filename))[0]
89	TitledObject.__init__(self, title)
90
91	self.dbf = dbflib.open(filename, return_unicode = True)
92
93	# If true, self.dbf is open for writing.
94	self._writable = 0
95
96	# Create the column information objects
97	self.columns = []
98	self.column_map = {}
99	for i in range(self.NumColumns()):
100	ftype, name, width, prec = self.dbf.field_info(i)
101	name = internal_from_unicode(name)
102	ftype = dbflib_fieldtypes[ftype]
103	index = len(self.columns)
104
105	col = DBFColumn(name, ftype, width, prec, index)
106	self.columns.append(col)
107	self.column_map[name] = col
108	self.column_map[index] = col
109
110	def NumRows(self):
111	"""Return the number of rows in the table"""
112	return self.dbf.record_count()
113
114	def NumColumns(self):
115	"""Return the number of columns in the table"""
116	return self.dbf.field_count()
117
118	def Columns(self):
119	"""Return the table's colum definitions
120
121	The return value is a sequence of DBFColumn instances, one for
122	each column.
123	"""
124	return self.columns
125
126	def Column(self, col):
127	"""Return information about the column given by its name or index
128
129	The returned object is an instance of DBFColumn
130	"""
131	return self.column_map[col]
132
133	def HasColumn(self, col):
134	"""Return whether the table has a column with the given name or index
135	"""
136	return self.column_map.has_key(col)
137
138	def RowIdToOrdinal(self, gid):
139	"""Return the row ordinal given its id
140
141	Since for DBFTables the row id is the row number, return the
142	value unchanged.
143	"""
144	return gid
145
146	def RowOrdinalToId(self, num):
147	"""Return the rowid for given its ordinal
148
149	Since for DBFTables the row id is the row number, return the
150	value unchanged.
151	"""
152	return num
153
154	def ReadRowAsDict(self, row, row_is_ordinal = 0):
155	"""Return the entire row as a dictionary with column names as keys
156
157	The row_is_ordinal is ignored for DBF tables because the row id
158	is always the row number.
159	"""
160	return self.dbf.read_record(row)
161
162	def ReadValue(self, row, col, row_is_ordinal = 0):
163	"""Return the value of the specified row and column
164
165	The col parameter may be the index of the column or its name.
166
167	The row_is_ordinal is ignored for DBF tables because the row id
168	is always the row number.
169	"""
170	return self.dbf.read_attribute(row, self.column_map[col].index)
171
172	def ValueRange(self, col):
173	"""Return the minimum and maximum values of the values in the column
174
175	The return value is a tuple (min, max) unless the table is empty
176	in which case the return value is None.
177	"""
178	count = self.NumRows()
179
180	if count == 0:
181	return None
182
183	min = max = self.ReadValue(0, col)
184	for i in range(1, count):
185	value = self.ReadValue(i, col)
186	if value < min:
187	min = value
188	elif value > max:
189	max = value
190
191	return (min, max)
192
193	def UniqueValues(self, col):
194	"""Return a sorted list of all unique values in the column col"""
195	dict = {}
196
197	for i in range(self.NumRows()):
198	value = self.ReadValue(i, col)
199	dict[value] = 0
200
201	values = dict.keys()
202	values.sort()
203	return values
204
205	def Dependencies(self):
206	"""Return an empty sequence. The DBFTable doesn't depend on anything"""
207	return ()
208
209	# DBF specific interface parts.
210
211	def Width(self, col):
212	"""Return column width"""
213	return self.column_map[col].width
214
215	def Destroy(self):
216	self.dbf.close()
217	self.dbf = None
218
219	def write_record(self, record, values):
220	"""Write the values into the record
221
222	The values parameter may either be a dictionary or a sequence.
223
224	If it's a dictionary the keys must be the names of the fields
225	and their value must have a suitable type. Only the fields
226	actually contained in the dictionary are written. Fields for
227	which there's no item in the dict are not modified.
228
229	If it's a sequence, all fields must be present in the right
230	order.
231	"""
232	if not self._writable:
233	new_dbf = dbflib.open(self.filename, "r+b", return_unicode = True)
234	self.dbf.close()
235	self.dbf = new_dbf
236	self._writable = 1
237	self.dbf.write_record(record, values)
238	self.dbf.commit()
239
240	def FileName(self):
241	"""Return the filename the DBFTable was instantiated with"""
242	return self.filename
243
244
245	class MemoryColumn:
246
247	def __init__(self, name, type, index):
248	self.name = name
249	self.type = type
250	self.index = index
251
252	class MemoryTable(TitledObject):
253
254	"""Very simple table implementation that operates on a list of tuples"""
255
256	def __init__(self, fields, data):
257	"""Initialize the MemoryTable
258
259	Parameters:
260	fields -- List of (name, field_type) pairs
261	data -- List of tuples, one for each row of data
262	"""
263	self.data = data
264	title = 'MemoryTable'
265	TitledObject.__init__(self, title)
266
267	# Create the column information objects
268	self.columns = []
269	self.column_map = {}
270	for name, ftype in fields:
271	index = len(self.columns)
272	col = MemoryColumn(name, ftype, index)
273	self.columns.append(col)
274	self.column_map[name] = col
275	self.column_map[index] = col
276
277	def NumColumns(self):
278	"""Return the number of columns in the table"""
279	return len(self.columns)
280
281	def Column(self, col):
282	"""Return information about the column given by its name or index
283
284	The returned object is an instance of MemoryColumn.
285	"""
286	return self.column_map[col]
287
288	def Columns(self):
289	"""Return the table's colum definitions
290
291	The return value is a sequence of MemoryColumn instances, one
292	for each column.
293	"""
294	return self.columns
295
296	def HasColumn(self, col):
297	"""Return whether the table has a column with the given name or index
298	"""
299	return self.column_map.has_key(col)
300
301	def NumRows(self):
302	"""Return the number of rows in the table"""
303	return len(self.data)
304
305	def RowIdToOrdinal(self, gid):
306	"""Return the row ordinal given its id
307
308	Since for MemoryTables the row id is the row number, return the
309	value unchanged.
310	"""
311	return gid
312
313	def RowOrdinalToId(self, num):
314	"""Return the rowid for given its ordinal
315
316	Since for MemoryTables the row id is the row number, return the
317	value unchanged.
318	"""
319	return num
320
321	def ReadValue(self, row, col, row_is_ordinal = 0):
322	"""Return the value of the specified row and column
323
324	The col parameter may be the index of the column or its name.
325
326	The row_is_ordinal is ignored for DBF tables because the row id
327	is always the row number.
328	"""
329	return self.data[row][self.column_map[col].index]
330
331	def ReadRowAsDict(self, index, row_is_ordinal = 0):
332	"""Return the entire row as a dictionary with column names as keys
333
334	The row_is_ordinal is ignored for DBF tables because the row id
335	is always the row number.
336	"""
337	return dict([(col.name, self.data[index][col.index])
338	for col in self.columns])
339
340	def ValueRange(self, col):
341	"""Return the minimum and maximum values of the values in the column
342
343	The return value is a tuple (min, max) unless the table is empty
344	in which case the return value is None.
345	"""
346
347	index = self.column_map[col].index
348	values = [row[index] for row in self.data]
349	if not values:
350	return None
351
352	return min(values), max(values)
353
354	def UniqueValues(self, col):
355	"""Return a sorted list of all unique values in the column col
356
357	col can be either column index or name.
358	"""
359	dict = {}
360
361	for i in range(self.NumRows()):
362	value = self.ReadValue(i, col)
363	dict[value] = 0
364
365	values = dict.keys()
366	values.sort()
367	return values
368
369	def Width(self, col):
370	"""Return the maximum width of values in the column
371
372	The return value is the the maximum length of string
373	representation of the values in the column (represented by index
374	or name).
375	"""
376	max = 0
377
378	type = self.column_map[col].type
379	index = self.column_map[col].index
380	values = [row[index] for row in self.data]
381	if not values:
382	return None
383
384	if type == FIELDTYPE_DOUBLE:
385	format = "%.12f"
386	elif type == FIELDTYPE_INT:
387	format = "%d"
388	else:
389	format = "%s"
390	for value in values:
391	l = len(format % value)
392	if l > max:
393	max = l
394
395	return max
396
397	def Dependencies(self):
398	"""Return an empty sequence. The MemoryTable doesn't depend on anything
399	"""
400	return ()
401
402	def write_record(self, record, values):
403	# TODO: Check for correct lenght and perhaps also
404	# for correct types in case values is a tuple. How to report problems?
405	# TODO: Allow values to be a dictionary and write the single
406	# fields that are specified.
407	self.data[record] = values
408
409
410
411	def _find_dbf_column_names(names):
412	"""Determine the column names to use in a DBF file
413
414	DBF files have a length limit of 10 characters on the column names
415	so when writing an arbitrary Thuban table to a DBF file we may have
416	we may have to rename some of the columns making sure that they're
417	unique in the DBF file too.
418
419	Names that are already short enough will stay the same. Longer names
420	will be truncated to 10 characters or if that isn't unique it will
421	be truncated more and filled up with digits.
422
423	The parameter names should be a list of the column names. The return
424	value will be a dictionary mapping the names in the input list to
425	the names to use in the DBF file.
426	"""
427	# mapping from the original names in table to the names in the DBF
428	# file
429	name_map = {}
430
431	# First, we keep all names that are already short enough
432	for i in range(len(names) - 1, -1, -1):
433	if len(names[i]) <= 10:
434	name_map[names[i]] = names[i]
435	del names[i]
436
437	# dict used as a set of all names already used as DBF column names
438	used = name_map.copy()
439
440	# Go through all longer names. If the name truncated to 10
441	# characters is not used already, we use that. Otherwise we truncate
442	# it more and append numbers until we get an unused name
443	for name in names:
444	truncated = name[:10]
445	num = 0; numstr = ""
446	#print "truncated", truncated, num
447	while truncated in used and len(numstr) < 10:
448	num += 1
449	numstr = str(num)
450	truncated = name[:10 - len(numstr)] + numstr
451	#print "truncated", truncated, num
452	if len(numstr) >= 10:
453	# This case should never happen in practice as tables with
454	# 10^10 columns seem very unlikely :)
455	raise ValueError("Can't find unique dbf column name")
456
457	name_map[name] = truncated
458	used[truncated] = 1
459
460	return name_map
461
462	def table_to_dbf(table, filename, rows = None):
463	"""Create the dbf file filename from the table.
464
465	If rows is not None (the default) then it must be a list of row
466	indices to be saved to the file, otherwise all rows are saved.
467	"""
468
469	dbf = dbflib.create(filename, code_page = dbflib.LDID_ESRI_ANSI, return_unicode = True)
470
471	dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
472	FIELDTYPE_INT: dbflib.FTInteger,
473	FIELDTYPE_DOUBLE: dbflib.FTDouble}
474
475
476	name_map = _find_dbf_column_names([col.name for col in table.Columns()])
477
478	# Initialise the header. Distinguish between DBFTable and others.
479	for col in table.Columns():
480	width = table.Width(col.name)
481	if col.type == FIELDTYPE_DOUBLE:
482	prec = getattr(col, "prec", 12)
483	else:
484	prec = 0
485	dbf.add_field(unicode_from_internal(name_map[col.name]),
486	dbflib_fieldtypes[col.type],
487	width, prec)
488
489	if rows is None:
490	rows = range(table.NumRows())
491
492	recNum = 0
493	for i in rows:
494	record = {}
495	for key, value in table.ReadRowAsDict(i).items():
496	record[name_map[key]] = value
497	dbf.write_record(recNum, record)
498	recNum += 1
499	dbf.close()
500
501	def table_to_csv(table, filename, rows = None):
502	"""Export table to csv file.
503
504	If rows is not None (the default) then it must be a list of row
505	indices to be saved to the file, otherwise all rows are saved.
506	"""
507
508	file = open(filename,"w")
509	columns = table.Columns()
510	if columns:
511	header = "#%s" % columns[0].name
512	for col in columns[1:]:
513	header = header + ",%s" % col.name
514	header = header + "\n"
515	file.write(header)
516
517	if rows is None:
518	rows = range(table.NumRows())
519
520	for i in rows:
521	record = table.ReadRowAsDict(i)
522	if len(record):
523	line = "%s" % record[columns[0].name]
524	for col in columns[1:]:
525	line = line + ",%s" % record[col.name]
526	line = line + "\n"
527	file.write(line)
528	file.close()
529
Name	Value
svn:eol-style	native
svn:keywords	Author Date Id Revision