/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2734 - (hide annotations)
Thu Mar 1 12:42:59 2007 UTC (18 years ago) by bramz
File MIME type: text/x-python
File size: 16329 byte(s)
made a copy
1 bh 590 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 bh 6 # Authors:
3     # Bernhard Herzog <[email protected]>
4 jan 806 # Jan-Oliver Wagner <[email protected]>
5 frank 1025 # Frank Koormann <[email protected]>
6 bh 6 #
7     # This program is free software under the GPL (>=v2)
8     # Read the file COPYING coming with Thuban for details.
9    
10     """
11     Classes for handling tables of data.
12     """
13    
14     __version__ = "$Revision$"
15    
16 bh 998 import os
17 bh 839 import inspect
18     import warnings
19    
20 jan 1019 from base import TitledObject
21    
22 bh 6 import dbflib
23    
24     # the field types supported by a Table instance.
25 jonathan 474 FIELDTYPE_INT = "int"
26     FIELDTYPE_STRING = "string"
27     FIELDTYPE_DOUBLE = "double"
28 bh 6
29    
30     # map the dbflib constants for the field types to our constants
31     dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32     dbflib.FTInteger: FIELDTYPE_INT,
33     dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
35 jan 806
36 bh 818 class DBFColumn:
37    
38     """Description of a column in a DBFTable
39    
40     Instances have the following public attributes:
41    
42     name -- Name of the column
43     type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
44     FIELDTYPE_DOUBLE)
45     index -- The index of the column
46     width -- the width of the data in the column
47     prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
48 bh 6 """
49 bh 818
50     def __init__(self, name, type, width, prec, index):
51     self.name = name
52     self.type = type
53     self.width = width
54     self.prec = prec
55     self.index = index
56    
57    
58 bh 1961 class DBFTable(TitledObject):
59 bh 818
60     """
61 bh 765 Table interface for the data in a DBF file
62 bh 6 """
63    
64 bh 286 # Implementation strategy regarding writing to a DBF file:
65     #
66     # Most of the time Thuban only needs to read from a table and it is
67     # important that Thuban can work with read-only files. Therefore the
68     # DBF file is opened only for reading initially. Only when
69     # write_record is called we try to open the DBF file for writing as
70 bh 590 # well. If that succeeds the read/write DBF file will be used for
71     # all IO afterwards.
72 bh 286 #
73     # It's important to use the same DBF file object for both reading
74     # and writing to make sure that reading a records after writing
75     # returns the new values. With two separate objects this wouldn't
76     # work because a DBF file object buffers some data
77    
78 bh 6 def __init__(self, filename):
79 bh 1599 self.filename = os.path.abspath(filename)
80 bh 1078
81     # Omit the extension in the title as it's not really needed and
82     # it can be confusing because dbflib removes extensions and
83     # appends some variations of '.dbf' before it tries to open the
84     # file. So the title could be e.g. myshapefile.shp when the real
85     # filename is myshapefile.dbf
86     title = os.path.splitext(os.path.basename(self.filename))[0]
87 jan 1019 TitledObject.__init__(self, title)
88 bh 1078
89 bh 284 self.dbf = dbflib.DBFFile(filename)
90 bh 6
91 bh 286 # If true, self.dbf is open for writing.
92     self._writable = 0
93    
94 bh 818 # Create the column information objects
95     self.columns = []
96     self.column_map = {}
97     for i in range(self.NumColumns()):
98     ftype, name, width, prec = self.dbf.field_info(i)
99     ftype = dbflib_fieldtypes[ftype]
100     index = len(self.columns)
101     col = DBFColumn(name, ftype, width, prec, index)
102     self.columns.append(col)
103     self.column_map[name] = col
104     self.column_map[index] = col
105 bh 257
106 bh 818 def NumRows(self):
107     """Return the number of rows in the table"""
108 bh 6 return self.dbf.record_count()
109    
110 bh 818 def NumColumns(self):
111     """Return the number of columns in the table"""
112 bh 6 return self.dbf.field_count()
113    
114 bh 818 def Columns(self):
115     """Return the table's colum definitions
116 bh 6
117 bh 818 The return value is a sequence of DBFColumn instances, one for
118     each column.
119 bh 6 """
120 bh 818 return self.columns
121 bh 6
122 bh 818 def Column(self, col):
123     """Return information about the column given by its name or index
124 jonathan 467
125 bh 818 The returned object is an instance of DBFColumn
126     """
127     return self.column_map[col]
128 jonathan 467
129 bh 839 def HasColumn(self, col):
130     """Return whether the table has a column with the given name or index
131     """
132     return self.column_map.has_key(col)
133    
134 bh 1662 def RowIdToOrdinal(self, gid):
135     """Return the row ordinal given its id
136    
137     Since for DBFTables the row id is the row number, return the
138     value unchanged.
139     """
140     return gid
141    
142     def RowOrdinalToId(self, num):
143     """Return the rowid for given its ordinal
144    
145     Since for DBFTables the row id is the row number, return the
146     value unchanged.
147     """
148     return num
149    
150     def ReadRowAsDict(self, row, row_is_ordinal = 0):
151     """Return the entire row as a dictionary with column names as keys
152    
153     The row_is_ordinal is ignored for DBF tables because the row id
154     is always the row number.
155     """
156 bh 818 return self.dbf.read_record(row)
157 jonathan 467
158 bh 1662 def ReadValue(self, row, col, row_is_ordinal = 0):
159 bh 818 """Return the value of the specified row and column
160 jonathan 628
161 bh 818 The col parameter may be the index of the column or its name.
162 bh 1662
163     The row_is_ordinal is ignored for DBF tables because the row id
164     is always the row number.
165 bh 818 """
166 bh 1919 return self.dbf.read_attribute(row, self.column_map[col].index)
167 jonathan 628
168 bh 818 def ValueRange(self, col):
169     """Return the minimum and maximum values of the values in the column
170 jonathan 628
171 bh 818 The return value is a tuple (min, max) unless the table is empty
172     in which case the return value is None.
173 jonathan 628 """
174 bh 818 count = self.NumRows()
175 jonathan 628
176     if count == 0:
177     return None
178    
179 bh 818 min = max = self.ReadValue(0, col)
180 jonathan 628 for i in range(1, count):
181 bh 818 value = self.ReadValue(i, col)
182     if value < min:
183     min = value
184     elif value > max:
185     max = value
186 jonathan 628
187 bh 818 return (min, max)
188 jonathan 628
189 bh 818 def UniqueValues(self, col):
190     """Return a sorted list of all unique values in the column col"""
191     dict = {}
192 jonathan 628
193 bh 818 for i in range(self.NumRows()):
194     value = self.ReadValue(i, col)
195     dict[value] = 0
196 jonathan 628
197 bh 818 values = dict.keys()
198     values.sort()
199     return values
200 jonathan 628
201 bh 984 def Dependencies(self):
202     """Return an empty sequence. The DBFTable doesn't depend on anything"""
203     return ()
204 jonathan 628
205 bh 818 # DBF specific interface parts.
206 jonathan 628
207 frank 1025 def Width(self, col):
208     """Return column width"""
209     return self.column_map[col].width
210    
211 bh 818 def Destroy(self):
212     self.dbf.close()
213     self.dbf = None
214 jonathan 628
215 bh 274 def write_record(self, record, values):
216     """Write the values into the record
217    
218     The values parameter may either be a dictionary or a sequence.
219    
220     If it's a dictionary the keys must be the names of the fields
221     and their value must have a suitable type. Only the fields
222     actually contained in the dictionary are written. Fields for
223     which there's no item in the dict are not modified.
224    
225     If it's a sequence, all fields must be present in the right
226     order.
227     """
228 bh 286 if not self._writable:
229     new_dbf = dbflib.DBFFile(self.filename, "r+b")
230     self.dbf.close()
231     self.dbf = new_dbf
232     self._writable = 1
233     self.dbf.write_record(record, values)
234     self.dbf.commit()
235 jonathan 467
236 bh 994 def FileName(self):
237     """Return the filename the DBFTable was instantiated with"""
238     return self.filename
239 bh 765
240    
241 bh 818 class MemoryColumn:
242    
243     def __init__(self, name, type, index):
244     self.name = name
245     self.type = type
246     self.index = index
247    
248 bh 1961 class MemoryTable(TitledObject):
249 bh 818
250     """Very simple table implementation that operates on a list of tuples"""
251    
252     def __init__(self, fields, data):
253     """Initialize the MemoryTable
254    
255     Parameters:
256     fields -- List of (name, field_type) pairs
257     data -- List of tuples, one for each row of data
258     """
259     self.data = data
260 jan 1019 title = 'MemoryTable'
261     TitledObject.__init__(self, title)
262 bh 818
263     # Create the column information objects
264     self.columns = []
265     self.column_map = {}
266     for name, ftype in fields:
267     index = len(self.columns)
268     col = MemoryColumn(name, ftype, index)
269     self.columns.append(col)
270     self.column_map[name] = col
271     self.column_map[index] = col
272    
273     def NumColumns(self):
274     """Return the number of columns in the table"""
275     return len(self.columns)
276    
277     def Column(self, col):
278     """Return information about the column given by its name or index
279    
280     The returned object is an instance of MemoryColumn.
281     """
282     return self.column_map[col]
283    
284     def Columns(self):
285     """Return the table's colum definitions
286    
287     The return value is a sequence of MemoryColumn instances, one
288     for each column.
289     """
290     return self.columns
291    
292 bh 839 def HasColumn(self, col):
293     """Return whether the table has a column with the given name or index
294     """
295     return self.column_map.has_key(col)
296    
297 bh 818 def NumRows(self):
298     """Return the number of rows in the table"""
299     return len(self.data)
300    
301 bh 1662 def RowIdToOrdinal(self, gid):
302     """Return the row ordinal given its id
303    
304     Since for MemoryTables the row id is the row number, return the
305     value unchanged.
306     """
307     return gid
308    
309     def RowOrdinalToId(self, num):
310     """Return the rowid for given its ordinal
311    
312     Since for MemoryTables the row id is the row number, return the
313     value unchanged.
314     """
315     return num
316    
317     def ReadValue(self, row, col, row_is_ordinal = 0):
318 bh 818 """Return the value of the specified row and column
319    
320     The col parameter may be the index of the column or its name.
321 bh 1662
322     The row_is_ordinal is ignored for DBF tables because the row id
323     is always the row number.
324 bh 818 """
325     return self.data[row][self.column_map[col].index]
326    
327 bh 1662 def ReadRowAsDict(self, index, row_is_ordinal = 0):
328     """Return the entire row as a dictionary with column names as keys
329    
330     The row_is_ordinal is ignored for DBF tables because the row id
331     is always the row number.
332     """
333 bh 818 return dict([(col.name, self.data[index][col.index])
334     for col in self.columns])
335    
336     def ValueRange(self, col):
337     """Return the minimum and maximum values of the values in the column
338    
339     The return value is a tuple (min, max) unless the table is empty
340     in which case the return value is None.
341     """
342    
343     index = self.column_map[col].index
344     values = [row[index] for row in self.data]
345     if not values:
346     return None
347    
348     return min(values), max(values)
349    
350     def UniqueValues(self, col):
351 frank 1025 """Return a sorted list of all unique values in the column col
352    
353     col can be either column index or name.
354     """
355 bh 818 dict = {}
356    
357     for i in range(self.NumRows()):
358     value = self.ReadValue(i, col)
359     dict[value] = 0
360    
361     values = dict.keys()
362     values.sort()
363     return values
364    
365 frank 1025 def Width(self, col):
366     """Return the maximum width of values in the column
367    
368 bh 1043 The return value is the the maximum length of string
369     representation of the values in the column (represented by index
370     or name).
371     """
372 frank 1025 max = 0
373 bh 1043
374 frank 1025 type = self.column_map[col].type
375     index = self.column_map[col].index
376     values = [row[index] for row in self.data]
377     if not values:
378     return None
379    
380     if type == FIELDTYPE_DOUBLE:
381 bh 1043 format = "%.12f"
382 frank 1025 elif type == FIELDTYPE_INT:
383     format = "%d"
384     else:
385     format = "%s"
386     for value in values:
387     l = len(format % value)
388     if l > max:
389     max = l
390    
391     return max
392    
393 bh 984 def Dependencies(self):
394     """Return an empty sequence. The MemoryTable doesn't depend on anything
395     """
396     return ()
397 bh 818
398     def write_record(self, record, values):
399     # TODO: Check for correct lenght and perhaps also
400     # for correct types in case values is a tuple. How to report problems?
401     # TODO: Allow values to be a dictionary and write the single
402     # fields that are specified.
403     self.data[record] = values
404 frank 1025
405    
406 bh 1371
407     def _find_dbf_column_names(names):
408     """Determine the column names to use in a DBF file
409    
410     DBF files have a length limit of 10 characters on the column names
411     so when writing an arbitrary Thuban table to a DBF file we may have
412     we may have to rename some of the columns making sure that they're
413     unique in the DBF file too.
414    
415     Names that are already short enough will stay the same. Longer names
416     will be truncated to 10 characters or if that isn't unique it will
417     be truncated more and filled up with digits.
418    
419     The parameter names should be a list of the column names. The return
420     value will be a dictionary mapping the names in the input list to
421     the names to use in the DBF file.
422     """
423     # mapping from the original names in table to the names in the DBF
424     # file
425     name_map = {}
426    
427     # First, we keep all names that are already short enough
428     for i in range(len(names) - 1, -1, -1):
429     if len(names[i]) <= 10:
430     name_map[names[i]] = names[i]
431     del names[i]
432    
433     # dict used as a set of all names already used as DBF column names
434     used = name_map.copy()
435    
436     # Go through all longer names. If the name truncated to 10
437     # characters is not used already, we use that. Otherwise we truncate
438     # it more and append numbers until we get an unused name
439     for name in names:
440     truncated = name[:10]
441     num = 0; numstr = ""
442     #print "truncated", truncated, num
443     while truncated in used and len(numstr) < 10:
444     num += 1
445     numstr = str(num)
446     truncated = name[:10 - len(numstr)] + numstr
447     #print "truncated", truncated, num
448     if len(numstr) >= 10:
449     # This case should never happen in practice as tables with
450     # 10^10 columns seem very unlikely :)
451     raise ValueError("Can't find unique dbf column name")
452    
453     name_map[name] = truncated
454     used[truncated] = 1
455    
456     return name_map
457    
458 jonathan 1431 def table_to_dbf(table, filename, rows = None):
459     """Create the dbf file filename from the table.
460    
461     If rows is not None (the default) then it must be a list of row
462     indices to be saved to the file, otherwise all rows are saved.
463     """
464    
465 frank 1025 dbf = dbflib.create(filename)
466    
467     dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
468     FIELDTYPE_INT: dbflib.FTInteger,
469     FIELDTYPE_DOUBLE: dbflib.FTDouble}
470    
471 bh 1371
472     name_map = _find_dbf_column_names([col.name for col in table.Columns()])
473    
474 frank 1025 # Initialise the header. Distinguish between DBFTable and others.
475     for col in table.Columns():
476     width = table.Width(col.name)
477 bh 1043 if col.type == FIELDTYPE_DOUBLE:
478     prec = getattr(col, "prec", 12)
479     else:
480     prec = 0
481 bh 1371 dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
482     width, prec)
483 frank 1025
484 jonathan 1431 if rows is None:
485     rows = range(table.NumRows())
486 jonathan 1389
487     recNum = 0
488 jonathan 1431 for i in rows:
489 bh 1371 record = {}
490     for key, value in table.ReadRowAsDict(i).items():
491     record[name_map[key]] = value
492 jonathan 1389 dbf.write_record(recNum, record)
493     recNum += 1
494 frank 1025 dbf.close()
495    
496 jonathan 1431 def table_to_csv(table, filename, rows = None):
497     """Export table to csv file.
498    
499     If rows is not None (the default) then it must be a list of row
500     indices to be saved to the file, otherwise all rows are saved.
501     """
502 frank 1025
503     file = open(filename,"w")
504     columns = table.Columns()
505     if columns:
506     header = "#%s" % columns[0].name
507     for col in columns[1:]:
508     header = header + ",%s" % col.name
509     header = header + "\n"
510     file.write(header)
511    
512 jonathan 1431 if rows is None:
513     rows = range(table.NumRows())
514 jonathan 1389
515 jonathan 1431 for i in rows:
516 frank 1025 record = table.ReadRowAsDict(i)
517     if len(record):
518     line = "%s" % record[columns[0].name]
519     for col in columns[1:]:
520     line = line + ",%s" % record[col.name]
521     line = line + "\n"
522     file.write(line)
523     file.close()
524    

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26