/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Annotation of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1431 - (hide annotations)
Wed Jul 16 13:23:41 2003 UTC (21 years, 7 months ago) by jonathan
Original Path: trunk/thuban/Thuban/Model/table.py
File MIME type: text/x-python
File size: 16885 byte(s)
(table_to_dbf, table_to_csv): Renamed
        parameter records to rows and add docstring.

1 bh 590 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 bh 6 # Authors:
3     # Bernhard Herzog <[email protected]>
4 jan 806 # Jan-Oliver Wagner <[email protected]>
5 frank 1025 # Frank Koormann <[email protected]>
6 bh 6 #
7     # This program is free software under the GPL (>=v2)
8     # Read the file COPYING coming with Thuban for details.
9    
10     """
11     Classes for handling tables of data.
12     """
13    
14     __version__ = "$Revision$"
15    
16 bh 998 import os
17 bh 839 import inspect
18     import warnings
19    
20 jan 1019 from base import TitledObject
21    
22 bh 6 import dbflib
23    
24     # the field types supported by a Table instance.
25 jonathan 474 FIELDTYPE_INT = "int"
26     FIELDTYPE_STRING = "string"
27     FIELDTYPE_DOUBLE = "double"
28 bh 6
29    
30     # map the dbflib constants for the field types to our constants
31     dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32     dbflib.FTInteger: FIELDTYPE_INT,
33     dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
35 jan 806
36 bh 818 class OldTableInterfaceMixin:
37 jan 806
38 bh 818 """Mixin to implement the old table interface using the new one"""
39 jan 806
40 bh 839 def __deprecation_warning(self):
41     """Issue a DeprecationWarning for code hat uses the old interface"""
42     callername = inspect.currentframe().f_back.f_code.co_name
43     warnings.warn("The %s method of the old table interface"
44     " is deprecated" % callername,
45     DeprecationWarning, stacklevel = 3)
46    
47 bh 818 def record_count(self):
48 bh 839 self.__deprecation_warning()
49 bh 818 return self.NumRows()
50 jan 806
51     def field_count(self):
52 bh 839 self.__deprecation_warning()
53 bh 818 return self.NumColumns()
54 jan 806
55 bh 818 def field_info(self, field):
56     """Return a tuple (type, name, width, prec) for the field no. field
57 jan 806
58 bh 818 type is the data type of the field, name the name, width the
59     field width in characters and prec the decimal precision. width
60     and prec will be zero if the information returned by the Column
61     method doesn't provide values for them.
62     """
63 bh 839 self.__deprecation_warning()
64 bh 818 col = self.Column(field)
65     return (col.type, col.name,
66     getattr(col, "width", 0), getattr(col, "prec", 0))
67 jan 806
68 bh 818 def field_info_by_name(self, col):
69 bh 839 self.__deprecation_warning()
70 bh 818 try:
71     return self.field_info(col)
72     except KeyError:
73     # FIXME: It may be that field_info raises other exceptions
74     # when the name is not a valid column name.
75     return None
76 jan 806
77 bh 818 def field_range(self, fieldName):
78 bh 839 self.__deprecation_warning()
79 bh 818 min, max = self.ValueRange(fieldName)
80     return ((min, None), (max, None))
81 jan 806
82 bh 818 def GetUniqueValues(self, field):
83 bh 839 self.__deprecation_warning()
84 bh 818 return self.UniqueValues(field)
85 jan 806
86 bh 818 def read_record(self, r):
87 bh 839 self.__deprecation_warning()
88 bh 818 return self.ReadRowAsDict(r)
89 bh 6
90 bh 818
91    
92     class DBFColumn:
93    
94     """Description of a column in a DBFTable
95    
96     Instances have the following public attributes:
97    
98     name -- Name of the column
99     type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100     FIELDTYPE_DOUBLE)
101     index -- The index of the column
102     width -- the width of the data in the column
103     prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104 bh 6 """
105 bh 818
106     def __init__(self, name, type, width, prec, index):
107     self.name = name
108     self.type = type
109     self.width = width
110     self.prec = prec
111     self.index = index
112    
113    
114 jan 1019 class DBFTable(TitledObject, OldTableInterfaceMixin):
115 bh 818
116     """
117 bh 765 Table interface for the data in a DBF file
118 bh 6 """
119    
120 bh 286 # Implementation strategy regarding writing to a DBF file:
121     #
122     # Most of the time Thuban only needs to read from a table and it is
123     # important that Thuban can work with read-only files. Therefore the
124     # DBF file is opened only for reading initially. Only when
125     # write_record is called we try to open the DBF file for writing as
126 bh 590 # well. If that succeeds the read/write DBF file will be used for
127     # all IO afterwards.
128 bh 286 #
129     # It's important to use the same DBF file object for both reading
130     # and writing to make sure that reading a records after writing
131     # returns the new values. With two separate objects this wouldn't
132     # work because a DBF file object buffers some data
133    
134 bh 6 def __init__(self, filename):
135     self.filename = filename
136 bh 1078
137     # Omit the extension in the title as it's not really needed and
138     # it can be confusing because dbflib removes extensions and
139     # appends some variations of '.dbf' before it tries to open the
140     # file. So the title could be e.g. myshapefile.shp when the real
141     # filename is myshapefile.dbf
142     title = os.path.splitext(os.path.basename(self.filename))[0]
143 jan 1019 TitledObject.__init__(self, title)
144 bh 1078
145 bh 284 self.dbf = dbflib.DBFFile(filename)
146 bh 6
147 bh 286 # If true, self.dbf is open for writing.
148     self._writable = 0
149    
150 bh 818 # Create the column information objects
151     self.columns = []
152     self.column_map = {}
153     for i in range(self.NumColumns()):
154     ftype, name, width, prec = self.dbf.field_info(i)
155     ftype = dbflib_fieldtypes[ftype]
156     index = len(self.columns)
157     col = DBFColumn(name, ftype, width, prec, index)
158     self.columns.append(col)
159     self.column_map[name] = col
160     self.column_map[index] = col
161 bh 257
162 bh 818 def NumRows(self):
163     """Return the number of rows in the table"""
164 bh 6 return self.dbf.record_count()
165    
166 bh 818 def NumColumns(self):
167     """Return the number of columns in the table"""
168 bh 6 return self.dbf.field_count()
169    
170 bh 818 def Columns(self):
171     """Return the table's colum definitions
172 bh 6
173 bh 818 The return value is a sequence of DBFColumn instances, one for
174     each column.
175 bh 6 """
176 bh 818 return self.columns
177 bh 6
178 bh 818 def Column(self, col):
179     """Return information about the column given by its name or index
180 jonathan 467
181 bh 818 The returned object is an instance of DBFColumn
182     """
183     return self.column_map[col]
184 jonathan 467
185 bh 839 def HasColumn(self, col):
186     """Return whether the table has a column with the given name or index
187     """
188     return self.column_map.has_key(col)
189    
190 bh 818 def ReadRowAsDict(self, row):
191     """Return the entire row as a dictionary with column names as keys"""
192     return self.dbf.read_record(row)
193 jonathan 467
194 bh 818 def ReadValue(self, row, col):
195     """Return the value of the specified row and column
196 jonathan 628
197 bh 818 The col parameter may be the index of the column or its name.
198     """
199     return self.dbf.read_record(row)[self.column_map[col].name]
200 jonathan 628
201 bh 818 def ValueRange(self, col):
202     """Return the minimum and maximum values of the values in the column
203 jonathan 628
204 bh 818 The return value is a tuple (min, max) unless the table is empty
205     in which case the return value is None.
206 jonathan 628 """
207 bh 818 count = self.NumRows()
208 jonathan 628
209     if count == 0:
210     return None
211    
212 bh 818 min = max = self.ReadValue(0, col)
213 jonathan 628 for i in range(1, count):
214 bh 818 value = self.ReadValue(i, col)
215     if value < min:
216     min = value
217     elif value > max:
218     max = value
219 jonathan 628
220 bh 818 return (min, max)
221 jonathan 628
222 bh 818 def UniqueValues(self, col):
223     """Return a sorted list of all unique values in the column col"""
224     dict = {}
225 jonathan 628
226 bh 818 for i in range(self.NumRows()):
227     value = self.ReadValue(i, col)
228     dict[value] = 0
229 jonathan 628
230 bh 818 values = dict.keys()
231     values.sort()
232     return values
233 jonathan 628
234 bh 984 def Dependencies(self):
235     """Return an empty sequence. The DBFTable doesn't depend on anything"""
236     return ()
237 jonathan 628
238 bh 818 # DBF specific interface parts.
239 jonathan 628
240 frank 1025 def Width(self, col):
241     """Return column width"""
242     return self.column_map[col].width
243    
244 bh 818 def Destroy(self):
245     self.dbf.close()
246     self.dbf = None
247 jonathan 628
248 bh 274 def write_record(self, record, values):
249     """Write the values into the record
250    
251     The values parameter may either be a dictionary or a sequence.
252    
253     If it's a dictionary the keys must be the names of the fields
254     and their value must have a suitable type. Only the fields
255     actually contained in the dictionary are written. Fields for
256     which there's no item in the dict are not modified.
257    
258     If it's a sequence, all fields must be present in the right
259     order.
260     """
261 bh 286 if not self._writable:
262     new_dbf = dbflib.DBFFile(self.filename, "r+b")
263     self.dbf.close()
264     self.dbf = new_dbf
265     self._writable = 1
266     self.dbf.write_record(record, values)
267     self.dbf.commit()
268 jonathan 467
269 bh 994 def FileName(self):
270     """Return the filename the DBFTable was instantiated with"""
271     return self.filename
272 bh 765
273    
274 bh 818 class MemoryColumn:
275    
276     def __init__(self, name, type, index):
277     self.name = name
278     self.type = type
279     self.index = index
280    
281 jan 1019 class MemoryTable(TitledObject, OldTableInterfaceMixin):
282 bh 818
283     """Very simple table implementation that operates on a list of tuples"""
284    
285     def __init__(self, fields, data):
286     """Initialize the MemoryTable
287    
288     Parameters:
289     fields -- List of (name, field_type) pairs
290     data -- List of tuples, one for each row of data
291     """
292     self.data = data
293 jan 1019 title = 'MemoryTable'
294     TitledObject.__init__(self, title)
295 bh 818
296     # Create the column information objects
297     self.columns = []
298     self.column_map = {}
299     for name, ftype in fields:
300     index = len(self.columns)
301     col = MemoryColumn(name, ftype, index)
302     self.columns.append(col)
303     self.column_map[name] = col
304     self.column_map[index] = col
305    
306     def NumColumns(self):
307     """Return the number of columns in the table"""
308     return len(self.columns)
309    
310     def Column(self, col):
311     """Return information about the column given by its name or index
312    
313     The returned object is an instance of MemoryColumn.
314     """
315     return self.column_map[col]
316    
317     def Columns(self):
318     """Return the table's colum definitions
319    
320     The return value is a sequence of MemoryColumn instances, one
321     for each column.
322     """
323     return self.columns
324    
325 bh 839 def HasColumn(self, col):
326     """Return whether the table has a column with the given name or index
327     """
328     return self.column_map.has_key(col)
329    
330 bh 818 def NumRows(self):
331     """Return the number of rows in the table"""
332     return len(self.data)
333    
334     def ReadValue(self, row, col):
335     """Return the value of the specified row and column
336    
337     The col parameter may be the index of the column or its name.
338     """
339     return self.data[row][self.column_map[col].index]
340    
341     def ReadRowAsDict(self, index):
342     """Return the entire row as a dictionary with column names as keys"""
343     return dict([(col.name, self.data[index][col.index])
344     for col in self.columns])
345    
346     def ValueRange(self, col):
347     """Return the minimum and maximum values of the values in the column
348    
349     The return value is a tuple (min, max) unless the table is empty
350     in which case the return value is None.
351     """
352    
353     index = self.column_map[col].index
354     values = [row[index] for row in self.data]
355     if not values:
356     return None
357    
358     return min(values), max(values)
359    
360     def UniqueValues(self, col):
361 frank 1025 """Return a sorted list of all unique values in the column col
362    
363     col can be either column index or name.
364     """
365 bh 818 dict = {}
366    
367     for i in range(self.NumRows()):
368     value = self.ReadValue(i, col)
369     dict[value] = 0
370    
371     values = dict.keys()
372     values.sort()
373     return values
374    
375 frank 1025 def Width(self, col):
376     """Return the maximum width of values in the column
377    
378 bh 1043 The return value is the the maximum length of string
379     representation of the values in the column (represented by index
380     or name).
381     """
382 frank 1025 max = 0
383 bh 1043
384 frank 1025 type = self.column_map[col].type
385     index = self.column_map[col].index
386     values = [row[index] for row in self.data]
387     if not values:
388     return None
389    
390     if type == FIELDTYPE_DOUBLE:
391 bh 1043 format = "%.12f"
392 frank 1025 elif type == FIELDTYPE_INT:
393     format = "%d"
394     else:
395     format = "%s"
396     for value in values:
397     l = len(format % value)
398     if l > max:
399     max = l
400    
401     return max
402    
403 bh 984 def Dependencies(self):
404     """Return an empty sequence. The MemoryTable doesn't depend on anything
405     """
406     return ()
407 bh 818
408     def write_record(self, record, values):
409     # TODO: Check for correct lenght and perhaps also
410     # for correct types in case values is a tuple. How to report problems?
411     # TODO: Allow values to be a dictionary and write the single
412     # fields that are specified.
413     self.data[record] = values
414 frank 1025
415    
416 bh 1371
417     def _find_dbf_column_names(names):
418     """Determine the column names to use in a DBF file
419    
420     DBF files have a length limit of 10 characters on the column names
421     so when writing an arbitrary Thuban table to a DBF file we may have
422     we may have to rename some of the columns making sure that they're
423     unique in the DBF file too.
424    
425     Names that are already short enough will stay the same. Longer names
426     will be truncated to 10 characters or if that isn't unique it will
427     be truncated more and filled up with digits.
428    
429     The parameter names should be a list of the column names. The return
430     value will be a dictionary mapping the names in the input list to
431     the names to use in the DBF file.
432     """
433     # mapping from the original names in table to the names in the DBF
434     # file
435     name_map = {}
436    
437     # First, we keep all names that are already short enough
438     for i in range(len(names) - 1, -1, -1):
439     if len(names[i]) <= 10:
440     name_map[names[i]] = names[i]
441     del names[i]
442    
443     # dict used as a set of all names already used as DBF column names
444     used = name_map.copy()
445    
446     # Go through all longer names. If the name truncated to 10
447     # characters is not used already, we use that. Otherwise we truncate
448     # it more and append numbers until we get an unused name
449     for name in names:
450     truncated = name[:10]
451     num = 0; numstr = ""
452     #print "truncated", truncated, num
453     while truncated in used and len(numstr) < 10:
454     num += 1
455     numstr = str(num)
456     truncated = name[:10 - len(numstr)] + numstr
457     #print "truncated", truncated, num
458     if len(numstr) >= 10:
459     # This case should never happen in practice as tables with
460     # 10^10 columns seem very unlikely :)
461     raise ValueError("Can't find unique dbf column name")
462    
463     name_map[name] = truncated
464     used[truncated] = 1
465    
466     return name_map
467    
468 jonathan 1431 def table_to_dbf(table, filename, rows = None):
469     """Create the dbf file filename from the table.
470    
471     If rows is not None (the default) then it must be a list of row
472     indices to be saved to the file, otherwise all rows are saved.
473     """
474    
475 frank 1025 dbf = dbflib.create(filename)
476    
477     dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
478     FIELDTYPE_INT: dbflib.FTInteger,
479     FIELDTYPE_DOUBLE: dbflib.FTDouble}
480    
481 bh 1371
482     name_map = _find_dbf_column_names([col.name for col in table.Columns()])
483    
484 frank 1025 # Initialise the header. Distinguish between DBFTable and others.
485     for col in table.Columns():
486     width = table.Width(col.name)
487 bh 1043 if col.type == FIELDTYPE_DOUBLE:
488     prec = getattr(col, "prec", 12)
489     else:
490     prec = 0
491 bh 1371 dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
492     width, prec)
493 frank 1025
494 jonathan 1431 if rows is None:
495     rows = range(table.NumRows())
496 jonathan 1389
497     recNum = 0
498 jonathan 1431 for i in rows:
499 bh 1371 record = {}
500     for key, value in table.ReadRowAsDict(i).items():
501     record[name_map[key]] = value
502 jonathan 1389 dbf.write_record(recNum, record)
503     recNum += 1
504 frank 1025 dbf.close()
505    
506 jonathan 1431 def table_to_csv(table, filename, rows = None):
507     """Export table to csv file.
508    
509     If rows is not None (the default) then it must be a list of row
510     indices to be saved to the file, otherwise all rows are saved.
511     """
512 frank 1025
513     file = open(filename,"w")
514     columns = table.Columns()
515     if columns:
516     header = "#%s" % columns[0].name
517     for col in columns[1:]:
518     header = header + ",%s" % col.name
519     header = header + "\n"
520     file.write(header)
521    
522 jonathan 1431 if rows is None:
523     rows = range(table.NumRows())
524 jonathan 1389
525 jonathan 1431 for i in rows:
526 frank 1025 record = table.ReadRowAsDict(i)
527     if len(record):
528     line = "%s" % record[columns[0].name]
529     for col in columns[1:]:
530     line = line + ",%s" % record[col.name]
531     line = line + "\n"
532     file.write(line)
533     file.close()
534    

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26