/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Contents of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1371 - (show annotations)
Fri Jul 4 18:19:16 2003 UTC (21 years, 8 months ago) by bh
Original Path: trunk/thuban/Thuban/Model/table.py
File MIME type: text/x-python
File size: 16435 byte(s)
* Thuban/Model/table.py (_find_dbf_column_names): New. Helper
function for table_to_dbf
(table_to_dbf): Deal with names longer than the 10 character limit

* test/test_dbf_table.py (TestTableToDBF.test_table_to_dbf): Add
doc-string
(TestTableToDBF.test_table_to_dbf_long_col_names): New test for
long column names

1 # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2 # Authors:
3 # Bernhard Herzog <[email protected]>
4 # Jan-Oliver Wagner <[email protected]>
5 # Frank Koormann <[email protected]>
6 #
7 # This program is free software under the GPL (>=v2)
8 # Read the file COPYING coming with Thuban for details.
9
10 """
11 Classes for handling tables of data.
12 """
13
14 __version__ = "$Revision$"
15
16 import os
17 import inspect
18 import warnings
19
20 from base import TitledObject
21
22 import dbflib
23
24 # the field types supported by a Table instance.
25 FIELDTYPE_INT = "int"
26 FIELDTYPE_STRING = "string"
27 FIELDTYPE_DOUBLE = "double"
28
29
30 # map the dbflib constants for the field types to our constants
31 dbflib_fieldtypes = {dbflib.FTString: FIELDTYPE_STRING,
32 dbflib.FTInteger: FIELDTYPE_INT,
33 dbflib.FTDouble: FIELDTYPE_DOUBLE}
34
35
36 class OldTableInterfaceMixin:
37
38 """Mixin to implement the old table interface using the new one"""
39
40 def __deprecation_warning(self):
41 """Issue a DeprecationWarning for code hat uses the old interface"""
42 callername = inspect.currentframe().f_back.f_code.co_name
43 warnings.warn("The %s method of the old table interface"
44 " is deprecated" % callername,
45 DeprecationWarning, stacklevel = 3)
46
47 def record_count(self):
48 self.__deprecation_warning()
49 return self.NumRows()
50
51 def field_count(self):
52 self.__deprecation_warning()
53 return self.NumColumns()
54
55 def field_info(self, field):
56 """Return a tuple (type, name, width, prec) for the field no. field
57
58 type is the data type of the field, name the name, width the
59 field width in characters and prec the decimal precision. width
60 and prec will be zero if the information returned by the Column
61 method doesn't provide values for them.
62 """
63 self.__deprecation_warning()
64 col = self.Column(field)
65 return (col.type, col.name,
66 getattr(col, "width", 0), getattr(col, "prec", 0))
67
68 def field_info_by_name(self, col):
69 self.__deprecation_warning()
70 try:
71 return self.field_info(col)
72 except KeyError:
73 # FIXME: It may be that field_info raises other exceptions
74 # when the name is not a valid column name.
75 return None
76
77 def field_range(self, fieldName):
78 self.__deprecation_warning()
79 min, max = self.ValueRange(fieldName)
80 return ((min, None), (max, None))
81
82 def GetUniqueValues(self, field):
83 self.__deprecation_warning()
84 return self.UniqueValues(field)
85
86 def read_record(self, r):
87 self.__deprecation_warning()
88 return self.ReadRowAsDict(r)
89
90
91
92 class DBFColumn:
93
94 """Description of a column in a DBFTable
95
96 Instances have the following public attributes:
97
98 name -- Name of the column
99 type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100 FIELDTYPE_DOUBLE)
101 index -- The index of the column
102 width -- the width of the data in the column
103 prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104 """
105
106 def __init__(self, name, type, width, prec, index):
107 self.name = name
108 self.type = type
109 self.width = width
110 self.prec = prec
111 self.index = index
112
113
114 class DBFTable(TitledObject, OldTableInterfaceMixin):
115
116 """
117 Table interface for the data in a DBF file
118 """
119
120 # Implementation strategy regarding writing to a DBF file:
121 #
122 # Most of the time Thuban only needs to read from a table and it is
123 # important that Thuban can work with read-only files. Therefore the
124 # DBF file is opened only for reading initially. Only when
125 # write_record is called we try to open the DBF file for writing as
126 # well. If that succeeds the read/write DBF file will be used for
127 # all IO afterwards.
128 #
129 # It's important to use the same DBF file object for both reading
130 # and writing to make sure that reading a records after writing
131 # returns the new values. With two separate objects this wouldn't
132 # work because a DBF file object buffers some data
133
134 def __init__(self, filename):
135 self.filename = filename
136
137 # Omit the extension in the title as it's not really needed and
138 # it can be confusing because dbflib removes extensions and
139 # appends some variations of '.dbf' before it tries to open the
140 # file. So the title could be e.g. myshapefile.shp when the real
141 # filename is myshapefile.dbf
142 title = os.path.splitext(os.path.basename(self.filename))[0]
143 TitledObject.__init__(self, title)
144
145 self.dbf = dbflib.DBFFile(filename)
146
147 # If true, self.dbf is open for writing.
148 self._writable = 0
149
150 # Create the column information objects
151 self.columns = []
152 self.column_map = {}
153 for i in range(self.NumColumns()):
154 ftype, name, width, prec = self.dbf.field_info(i)
155 ftype = dbflib_fieldtypes[ftype]
156 index = len(self.columns)
157 col = DBFColumn(name, ftype, width, prec, index)
158 self.columns.append(col)
159 self.column_map[name] = col
160 self.column_map[index] = col
161
162 def NumRows(self):
163 """Return the number of rows in the table"""
164 return self.dbf.record_count()
165
166 def NumColumns(self):
167 """Return the number of columns in the table"""
168 return self.dbf.field_count()
169
170 def Columns(self):
171 """Return the table's colum definitions
172
173 The return value is a sequence of DBFColumn instances, one for
174 each column.
175 """
176 return self.columns
177
178 def Column(self, col):
179 """Return information about the column given by its name or index
180
181 The returned object is an instance of DBFColumn
182 """
183 return self.column_map[col]
184
185 def HasColumn(self, col):
186 """Return whether the table has a column with the given name or index
187 """
188 return self.column_map.has_key(col)
189
190 def ReadRowAsDict(self, row):
191 """Return the entire row as a dictionary with column names as keys"""
192 return self.dbf.read_record(row)
193
194 def ReadValue(self, row, col):
195 """Return the value of the specified row and column
196
197 The col parameter may be the index of the column or its name.
198 """
199 return self.dbf.read_record(row)[self.column_map[col].name]
200
201 def ValueRange(self, col):
202 """Return the minimum and maximum values of the values in the column
203
204 The return value is a tuple (min, max) unless the table is empty
205 in which case the return value is None.
206 """
207 count = self.NumRows()
208
209 if count == 0:
210 return None
211
212 min = max = self.ReadValue(0, col)
213 for i in range(1, count):
214 value = self.ReadValue(i, col)
215 if value < min:
216 min = value
217 elif value > max:
218 max = value
219
220 return (min, max)
221
222 def UniqueValues(self, col):
223 """Return a sorted list of all unique values in the column col"""
224 dict = {}
225
226 for i in range(self.NumRows()):
227 value = self.ReadValue(i, col)
228 dict[value] = 0
229
230 values = dict.keys()
231 values.sort()
232 return values
233
234 def Dependencies(self):
235 """Return an empty sequence. The DBFTable doesn't depend on anything"""
236 return ()
237
238 # DBF specific interface parts.
239
240 def Width(self, col):
241 """Return column width"""
242 return self.column_map[col].width
243
244 def Destroy(self):
245 self.dbf.close()
246 self.dbf = None
247
248 def write_record(self, record, values):
249 """Write the values into the record
250
251 The values parameter may either be a dictionary or a sequence.
252
253 If it's a dictionary the keys must be the names of the fields
254 and their value must have a suitable type. Only the fields
255 actually contained in the dictionary are written. Fields for
256 which there's no item in the dict are not modified.
257
258 If it's a sequence, all fields must be present in the right
259 order.
260 """
261 if not self._writable:
262 new_dbf = dbflib.DBFFile(self.filename, "r+b")
263 self.dbf.close()
264 self.dbf = new_dbf
265 self._writable = 1
266 self.dbf.write_record(record, values)
267 self.dbf.commit()
268
269 def FileName(self):
270 """Return the filename the DBFTable was instantiated with"""
271 return self.filename
272
273
274 class MemoryColumn:
275
276 def __init__(self, name, type, index):
277 self.name = name
278 self.type = type
279 self.index = index
280
281 class MemoryTable(TitledObject, OldTableInterfaceMixin):
282
283 """Very simple table implementation that operates on a list of tuples"""
284
285 def __init__(self, fields, data):
286 """Initialize the MemoryTable
287
288 Parameters:
289 fields -- List of (name, field_type) pairs
290 data -- List of tuples, one for each row of data
291 """
292 self.data = data
293 title = 'MemoryTable'
294 TitledObject.__init__(self, title)
295
296 # Create the column information objects
297 self.columns = []
298 self.column_map = {}
299 for name, ftype in fields:
300 index = len(self.columns)
301 col = MemoryColumn(name, ftype, index)
302 self.columns.append(col)
303 self.column_map[name] = col
304 self.column_map[index] = col
305
306 def NumColumns(self):
307 """Return the number of columns in the table"""
308 return len(self.columns)
309
310 def Column(self, col):
311 """Return information about the column given by its name or index
312
313 The returned object is an instance of MemoryColumn.
314 """
315 return self.column_map[col]
316
317 def Columns(self):
318 """Return the table's colum definitions
319
320 The return value is a sequence of MemoryColumn instances, one
321 for each column.
322 """
323 return self.columns
324
325 def HasColumn(self, col):
326 """Return whether the table has a column with the given name or index
327 """
328 return self.column_map.has_key(col)
329
330 def NumRows(self):
331 """Return the number of rows in the table"""
332 return len(self.data)
333
334 def ReadValue(self, row, col):
335 """Return the value of the specified row and column
336
337 The col parameter may be the index of the column or its name.
338 """
339 return self.data[row][self.column_map[col].index]
340
341 def ReadRowAsDict(self, index):
342 """Return the entire row as a dictionary with column names as keys"""
343 return dict([(col.name, self.data[index][col.index])
344 for col in self.columns])
345
346 def ValueRange(self, col):
347 """Return the minimum and maximum values of the values in the column
348
349 The return value is a tuple (min, max) unless the table is empty
350 in which case the return value is None.
351 """
352
353 index = self.column_map[col].index
354 values = [row[index] for row in self.data]
355 if not values:
356 return None
357
358 return min(values), max(values)
359
360 def UniqueValues(self, col):
361 """Return a sorted list of all unique values in the column col
362
363 col can be either column index or name.
364 """
365 dict = {}
366
367 for i in range(self.NumRows()):
368 value = self.ReadValue(i, col)
369 dict[value] = 0
370
371 values = dict.keys()
372 values.sort()
373 return values
374
375 def Width(self, col):
376 """Return the maximum width of values in the column
377
378 The return value is the the maximum length of string
379 representation of the values in the column (represented by index
380 or name).
381 """
382 max = 0
383
384 type = self.column_map[col].type
385 index = self.column_map[col].index
386 values = [row[index] for row in self.data]
387 if not values:
388 return None
389
390 if type == FIELDTYPE_DOUBLE:
391 format = "%.12f"
392 elif type == FIELDTYPE_INT:
393 format = "%d"
394 else:
395 format = "%s"
396 for value in values:
397 l = len(format % value)
398 if l > max:
399 max = l
400
401 return max
402
403 def Dependencies(self):
404 """Return an empty sequence. The MemoryTable doesn't depend on anything
405 """
406 return ()
407
408 def write_record(self, record, values):
409 # TODO: Check for correct lenght and perhaps also
410 # for correct types in case values is a tuple. How to report problems?
411 # TODO: Allow values to be a dictionary and write the single
412 # fields that are specified.
413 self.data[record] = values
414
415
416
417 def _find_dbf_column_names(names):
418 """Determine the column names to use in a DBF file
419
420 DBF files have a length limit of 10 characters on the column names
421 so when writing an arbitrary Thuban table to a DBF file we may have
422 we may have to rename some of the columns making sure that they're
423 unique in the DBF file too.
424
425 Names that are already short enough will stay the same. Longer names
426 will be truncated to 10 characters or if that isn't unique it will
427 be truncated more and filled up with digits.
428
429 The parameter names should be a list of the column names. The return
430 value will be a dictionary mapping the names in the input list to
431 the names to use in the DBF file.
432 """
433 # mapping from the original names in table to the names in the DBF
434 # file
435 name_map = {}
436
437 # First, we keep all names that are already short enough
438 for i in range(len(names) - 1, -1, -1):
439 if len(names[i]) <= 10:
440 name_map[names[i]] = names[i]
441 del names[i]
442
443 # dict used as a set of all names already used as DBF column names
444 used = name_map.copy()
445
446 # Go through all longer names. If the name truncated to 10
447 # characters is not used already, we use that. Otherwise we truncate
448 # it more and append numbers until we get an unused name
449 for name in names:
450 truncated = name[:10]
451 num = 0; numstr = ""
452 #print "truncated", truncated, num
453 while truncated in used and len(numstr) < 10:
454 num += 1
455 numstr = str(num)
456 truncated = name[:10 - len(numstr)] + numstr
457 #print "truncated", truncated, num
458 if len(numstr) >= 10:
459 # This case should never happen in practice as tables with
460 # 10^10 columns seem very unlikely :)
461 raise ValueError("Can't find unique dbf column name")
462
463 name_map[name] = truncated
464 used[truncated] = 1
465
466 return name_map
467
468 def table_to_dbf(table, filename):
469 """Create the dbf file filename from the table"""
470 dbf = dbflib.create(filename)
471
472 dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
473 FIELDTYPE_INT: dbflib.FTInteger,
474 FIELDTYPE_DOUBLE: dbflib.FTDouble}
475
476
477 name_map = _find_dbf_column_names([col.name for col in table.Columns()])
478
479 # Initialise the header. Distinguish between DBFTable and others.
480 for col in table.Columns():
481 width = table.Width(col.name)
482 if col.type == FIELDTYPE_DOUBLE:
483 prec = getattr(col, "prec", 12)
484 else:
485 prec = 0
486 dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
487 width, prec)
488
489 for i in range(table.NumRows()):
490 record = {}
491 for key, value in table.ReadRowAsDict(i).items():
492 record[name_map[key]] = value
493 dbf.write_record(i, record)
494 dbf.close()
495
496 def table_to_csv(table, filename):
497 """Export table to csv file."""
498
499 file = open(filename,"w")
500 columns = table.Columns()
501 if columns:
502 header = "#%s" % columns[0].name
503 for col in columns[1:]:
504 header = header + ",%s" % col.name
505 header = header + "\n"
506 file.write(header)
507
508 for i in range(table.NumRows()):
509 record = table.ReadRowAsDict(i)
510 if len(record):
511 line = "%s" % record[columns[0].name]
512 for col in columns[1:]:
513 line = line + ",%s" % record[col.name]
514 line = line + "\n"
515 file.write(line)
516 file.close()
517

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26