/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 274 by bh, Thu Aug 22 16:25:46 2002 UTC revision 1961 by bh, Wed Nov 19 15:46:03 2003 UTC
# Line 1  Line 1 
1  # Copyright (c) 2001, 2002 by Intevation GmbH  # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4    # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 11  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17    import inspect
18    import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 24  dbflib_fieldtypes = {dbflib.FTString: FI Line 32  dbflib_fieldtypes = {dbflib.FTString: FI
32                       dbflib.FTInteger: FIELDTYPE_INT,                       dbflib.FTInteger: FIELDTYPE_INT,
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
 class Table:  
35    
36    class DBFColumn:
37    
38        """Description of a column in a DBFTable
39    
40        Instances have the following public attributes:
41    
42        name -- Name of the column
43        type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
44                FIELDTYPE_DOUBLE)
45        index -- The index of the column
46        width -- the width of the data in the column
47        prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
48      """      """
     Represent a table of data.  
49    
50      Currently this is basically just a wrapper around dbflib.      def __init__(self, name, type, width, prec, index):
51            self.name = name
52            self.type = type
53            self.width = width
54            self.prec = prec
55            self.index = index
56    
57    
58    class DBFTable(TitledObject):
59    
60        """
61        Table interface for the data in a DBF file
62      """      """
63    
64        # Implementation strategy regarding writing to a DBF file:
65        #
66        # Most of the time Thuban only needs to read from a table and it is
67        # important that Thuban can work with read-only files. Therefore the
68        # DBF file is opened only for reading initially. Only when
69        # write_record is called we try to open the DBF file for writing as
70        # well. If that succeeds the read/write DBF file will be used for
71        # all IO afterwards.
72        #
73        # It's important to use the same DBF file object for both reading
74        # and writing to make sure that reading a records after writing
75        # returns the new values. With two separate objects this wouldn't
76        # work because a DBF file object buffers some data
77    
78      def __init__(self, filename):      def __init__(self, filename):
79          self.filename = filename          self.filename = os.path.abspath(filename)
         self.dbf = dbflib.DBFFile(filename, "r+b")  
80    
81      def Destroy(self):          # Omit the extension in the title as it's not really needed and
82          self.dbf.close()          # it can be confusing because dbflib removes extensions and
83          self.dbf = None          # appends some variations of '.dbf' before it tries to open the
84            # file. So the title could be e.g. myshapefile.shp when the real
85            # filename is myshapefile.dbf
86            title = os.path.splitext(os.path.basename(self.filename))[0]
87            TitledObject.__init__(self, title)
88    
89            self.dbf = dbflib.DBFFile(filename)
90    
91            # If true, self.dbf is open for writing.
92            self._writable = 0
93    
94            # Create the column information objects
95            self.columns = []
96            self.column_map = {}
97            for i in range(self.NumColumns()):
98                ftype, name, width, prec = self.dbf.field_info(i)
99                ftype = dbflib_fieldtypes[ftype]
100                index = len(self.columns)
101                col = DBFColumn(name, ftype, width, prec, index)
102                self.columns.append(col)
103                self.column_map[name] = col
104                self.column_map[index] = col
105    
106      def record_count(self):      def NumRows(self):
107          """Return the number of records"""          """Return the number of rows in the table"""
108          return self.dbf.record_count()          return self.dbf.record_count()
109    
110      def field_count(self):      def NumColumns(self):
111          """Return the number of fields in a record"""          """Return the number of columns in the table"""
112          return self.dbf.field_count()          return self.dbf.field_count()
113    
114      def field_info(self, field):      def Columns(self):
115          """Return a tuple (type, name, width, prec) for the field no. field          """Return the table's colum definitions
116    
117          type is the data type of the field, name the name, width the          The return value is a sequence of DBFColumn instances, one for
118          field width in characters and prec the decimal precision.          each column.
119          """          """
120          type, name, width, prec = self.dbf.field_info(field)          return self.columns
121          type = dbflib_fieldtypes[type]  
122          return type, name, width, prec      def Column(self, col):
123            """Return information about the column given by its name or index
124    
125            The returned object is an instance of DBFColumn
126            """
127            return self.column_map[col]
128    
129        def HasColumn(self, col):
130            """Return whether the table has a column with the given name or index
131            """
132            return self.column_map.has_key(col)
133    
134        def RowIdToOrdinal(self, gid):
135            """Return the row ordinal given its id
136    
137            Since for DBFTables the row id is the row number, return the
138            value unchanged.
139            """
140            return gid
141    
142        def RowOrdinalToId(self, num):
143            """Return the rowid for given its ordinal
144    
145            Since for DBFTables the row id is the row number, return the
146            value unchanged.
147            """
148            return num
149    
150        def ReadRowAsDict(self, row, row_is_ordinal = 0):
151            """Return the entire row as a dictionary with column names as keys
152    
153      def read_record(self, record):          The row_is_ordinal is ignored for DBF tables because the row id
154          """Return the record no. record as a dict mapping field names to values          is always the row number.
155          """          """
156          return self.dbf.read_record(record)          return self.dbf.read_record(row)
157    
158        def ReadValue(self, row, col, row_is_ordinal = 0):
159            """Return the value of the specified row and column
160    
161            The col parameter may be the index of the column or its name.
162    
163            The row_is_ordinal is ignored for DBF tables because the row id
164            is always the row number.
165            """
166            return self.dbf.read_attribute(row, self.column_map[col].index)
167    
168        def ValueRange(self, col):
169            """Return the minimum and maximum values of the values in the column
170    
171            The return value is a tuple (min, max) unless the table is empty
172            in which case the return value is None.
173            """
174            count = self.NumRows()
175    
176            if count == 0:
177                return None
178    
179            min = max = self.ReadValue(0, col)
180            for i in range(1, count):
181                value = self.ReadValue(i, col)
182                if value < min:
183                    min = value
184                elif value > max:
185                    max = value
186    
187            return (min, max)
188    
189        def UniqueValues(self, col):
190            """Return a sorted list of all unique values in the column col"""
191            dict = {}
192    
193            for i in range(self.NumRows()):
194                value = self.ReadValue(i, col)
195                dict[value] = 0
196    
197            values = dict.keys()
198            values.sort()
199            return values
200    
201        def Dependencies(self):
202            """Return an empty sequence. The DBFTable doesn't depend on anything"""
203            return ()
204    
205        # DBF specific interface parts.
206    
207        def Width(self, col):
208            """Return column width"""
209            return self.column_map[col].width
210    
211        def Destroy(self):
212            self.dbf.close()
213            self.dbf = None
214    
215      def write_record(self, record, values):      def write_record(self, record, values):
216          """Write the values into the record          """Write the values into the record
# Line 76  class Table: Line 225  class Table:
225          If it's a sequence, all fields must be present in the right          If it's a sequence, all fields must be present in the right
226          order.          order.
227          """          """
228            if not self._writable:
229                new_dbf = dbflib.DBFFile(self.filename, "r+b")
230                self.dbf.close()
231                self.dbf = new_dbf
232                self._writable = 1
233          self.dbf.write_record(record, values)          self.dbf.write_record(record, values)
234          self.dbf.commit()          self.dbf.commit()
235    
236        def FileName(self):
237            """Return the filename the DBFTable was instantiated with"""
238            return self.filename
239    
240    
241    class MemoryColumn:
242    
243        def __init__(self, name, type, index):
244            self.name = name
245            self.type = type
246            self.index = index
247    
248    class MemoryTable(TitledObject):
249    
250        """Very simple table implementation that operates on a list of tuples"""
251    
252        def __init__(self, fields, data):
253            """Initialize the MemoryTable
254    
255            Parameters:
256            fields -- List of (name, field_type) pairs
257            data -- List of tuples, one for each row of data
258            """
259            self.data = data
260            title = 'MemoryTable'
261            TitledObject.__init__(self, title)
262    
263            # Create the column information objects
264            self.columns = []
265            self.column_map = {}
266            for name, ftype in fields:
267                index = len(self.columns)
268                col = MemoryColumn(name, ftype, index)
269                self.columns.append(col)
270                self.column_map[name] = col
271                self.column_map[index] = col
272    
273        def NumColumns(self):
274            """Return the number of columns in the table"""
275            return len(self.columns)
276    
277        def Column(self, col):
278            """Return information about the column given by its name or index
279    
280            The returned object is an instance of MemoryColumn.
281            """
282            return self.column_map[col]
283    
284        def Columns(self):
285            """Return the table's colum definitions
286    
287            The return value is a sequence of MemoryColumn instances, one
288            for each column.
289            """
290            return self.columns
291    
292        def HasColumn(self, col):
293            """Return whether the table has a column with the given name or index
294            """
295            return self.column_map.has_key(col)
296    
297        def NumRows(self):
298            """Return the number of rows in the table"""
299            return len(self.data)
300    
301        def RowIdToOrdinal(self, gid):
302            """Return the row ordinal given its id
303    
304            Since for MemoryTables the row id is the row number, return the
305            value unchanged.
306            """
307            return gid
308    
309        def RowOrdinalToId(self, num):
310            """Return the rowid for given its ordinal
311    
312            Since for MemoryTables the row id is the row number, return the
313            value unchanged.
314            """
315            return num
316    
317        def ReadValue(self, row, col, row_is_ordinal = 0):
318            """Return the value of the specified row and column
319    
320            The col parameter may be the index of the column or its name.
321    
322            The row_is_ordinal is ignored for DBF tables because the row id
323            is always the row number.
324            """
325            return self.data[row][self.column_map[col].index]
326    
327        def ReadRowAsDict(self, index, row_is_ordinal = 0):
328            """Return the entire row as a dictionary with column names as keys
329    
330            The row_is_ordinal is ignored for DBF tables because the row id
331            is always the row number.
332            """
333            return dict([(col.name, self.data[index][col.index])
334                          for col in self.columns])
335    
336        def ValueRange(self, col):
337            """Return the minimum and maximum values of the values in the column
338    
339            The return value is a tuple (min, max) unless the table is empty
340            in which case the return value is None.
341            """
342    
343            index = self.column_map[col].index
344            values = [row[index] for row in self.data]
345            if not values:
346                return None
347    
348            return min(values), max(values)
349    
350        def UniqueValues(self, col):
351            """Return a sorted list of all unique values in the column col
352    
353            col can be either column index or name.
354            """
355            dict = {}
356    
357            for i in range(self.NumRows()):
358                value = self.ReadValue(i, col)
359                dict[value] = 0
360    
361            values = dict.keys()
362            values.sort()
363            return values
364    
365        def Width(self, col):
366            """Return the maximum width of values in the column
367    
368            The return value is the the maximum length of string
369            representation of the values in the column (represented by index
370            or name).
371            """
372            max = 0
373    
374            type  = self.column_map[col].type
375            index = self.column_map[col].index
376            values = [row[index] for row in self.data]
377            if not values:
378                return None
379    
380            if type == FIELDTYPE_DOUBLE:
381                format = "%.12f"
382            elif type == FIELDTYPE_INT:
383                format = "%d"
384            else:
385                format = "%s"
386            for value in values:
387                l = len(format % value)
388                if l > max:
389                    max = l
390    
391            return max
392    
393        def Dependencies(self):
394            """Return an empty sequence. The MemoryTable doesn't depend on anything
395            """
396            return ()
397    
398        def write_record(self, record, values):
399            # TODO: Check for correct lenght and perhaps also
400            # for correct types in case values is a tuple. How to report problems?
401            # TODO: Allow values to be a dictionary and write the single
402            # fields that are specified.
403            self.data[record] = values
404    
405    
406    
407    def _find_dbf_column_names(names):
408        """Determine the column names to use in a DBF file
409    
410        DBF files have a length limit of 10 characters on the column names
411        so when writing an arbitrary Thuban table to a DBF file we may have
412        we may have to rename some of the columns making sure that they're
413        unique in the DBF file too.
414    
415        Names that are already short enough will stay the same. Longer names
416        will be truncated to 10 characters or if that isn't unique it will
417        be truncated more and filled up with digits.
418    
419        The parameter names should be a list of the column names. The return
420        value will be a dictionary mapping the names in the input list to
421        the names to use in the DBF file.
422        """
423        # mapping from the original names in table to the names in the DBF
424        # file
425        name_map = {}
426    
427        # First, we keep all names that are already short enough
428        for i in range(len(names) - 1, -1, -1):
429            if len(names[i]) <= 10:
430                name_map[names[i]] = names[i]
431                del names[i]
432    
433        # dict used as a set of all names already used as DBF column names
434        used = name_map.copy()
435    
436        # Go through all longer names. If the name truncated to 10
437        # characters is not used already, we use that. Otherwise we truncate
438        # it more and append numbers until we get an unused name
439        for name in names:
440            truncated = name[:10]
441            num = 0; numstr = ""
442            #print "truncated", truncated, num
443            while truncated in used and len(numstr) < 10:
444                num += 1
445                numstr = str(num)
446                truncated = name[:10 - len(numstr)] + numstr
447                #print "truncated", truncated, num
448            if len(numstr) >= 10:
449                # This case should never happen in practice as tables with
450                # 10^10 columns seem very unlikely :)
451                raise ValueError("Can't find unique dbf column name")
452    
453            name_map[name] = truncated
454            used[truncated] = 1
455    
456        return name_map
457    
458    def table_to_dbf(table, filename, rows = None):
459        """Create the dbf file filename from the table.
460        
461        If rows is not None (the default) then it must be a list of row
462        indices to be saved to the file, otherwise all rows are saved.
463        """
464    
465        dbf = dbflib.create(filename)
466    
467        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
468                             FIELDTYPE_INT: dbflib.FTInteger,
469                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
470    
471    
472        name_map = _find_dbf_column_names([col.name for col in table.Columns()])
473    
474        # Initialise the header. Distinguish between DBFTable and others.
475        for col in table.Columns():
476            width = table.Width(col.name)
477            if col.type == FIELDTYPE_DOUBLE:
478                prec = getattr(col, "prec", 12)
479            else:
480                prec = 0
481            dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
482                          width, prec)
483    
484        if rows is None:
485            rows = range(table.NumRows())
486    
487        recNum = 0
488        for i in rows:
489            record = {}
490            for key, value in table.ReadRowAsDict(i).items():
491                record[name_map[key]] = value
492            dbf.write_record(recNum, record)
493            recNum += 1
494        dbf.close()
495    
496    def table_to_csv(table, filename, rows = None):
497        """Export table to csv file.
498        
499        If rows is not None (the default) then it must be a list of row
500        indices to be saved to the file, otherwise all rows are saved.
501        """
502    
503        file = open(filename,"w")
504        columns = table.Columns()
505        if columns:
506            header = "#%s" % columns[0].name
507            for col in columns[1:]:
508                header = header + ",%s" % col.name
509            header = header + "\n"
510            file.write(header)
511    
512            if rows is None:
513                rows = range(table.NumRows())
514    
515            for i in rows:
516                record = table.ReadRowAsDict(i)
517                if len(record):
518                    line = "%s" % record[columns[0].name]
519                    for col in columns[1:]:
520                        line = line + ",%s" % record[col.name]
521                line = line + "\n"
522                file.write(line)
523        file.close()
524    

Legend:
Removed from v.274  
changed lines
  Added in v.1961

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26