/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 6 by bh, Tue Aug 28 15:41:52 2001 UTC revision 1919 by bh, Mon Nov 3 17:33:36 2003 UTC
# Line 1  Line 1 
1  # Copyright (c) 2001 by Intevation GmbH  # Copyright (c) 2001, 2002, 2003 by Intevation GmbH
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4    # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 11  Classes for handling tables of data. Line 13  Classes for handling tables of data.
13    
14  __version__ = "$Revision$"  __version__ = "$Revision$"
15    
16    import os
17    import inspect
18    import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 24  dbflib_fieldtypes = {dbflib.FTString: FI Line 32  dbflib_fieldtypes = {dbflib.FTString: FI
32                       dbflib.FTInteger: FIELDTYPE_INT,                       dbflib.FTInteger: FIELDTYPE_INT,
33                       dbflib.FTDouble: FIELDTYPE_DOUBLE}                       dbflib.FTDouble: FIELDTYPE_DOUBLE}
34    
 class Table:  
35    
36    class OldTableInterfaceMixin:
37    
38        """Mixin to implement the old table interface using the new one"""
39    
40        def __deprecation_warning(self):
41            """Issue a DeprecationWarning for code hat uses the old interface"""
42            callername = inspect.currentframe().f_back.f_code.co_name
43            warnings.warn("The %s method of the old table interface"
44                          " is deprecated" % callername,
45                          DeprecationWarning, stacklevel = 3)
46    
47        def record_count(self):
48            self.__deprecation_warning()
49            return self.NumRows()
50    
51        def field_count(self):
52            self.__deprecation_warning()
53            return self.NumColumns()
54    
55        def field_info(self, field):
56            """Return a tuple (type, name, width, prec) for the field no. field
57    
58            type is the data type of the field, name the name, width the
59            field width in characters and prec the decimal precision. width
60            and prec will be zero if the information returned by the Column
61            method doesn't provide values for them.
62            """
63            self.__deprecation_warning()
64            col = self.Column(field)
65            return (col.type, col.name,
66                   getattr(col, "width", 0), getattr(col, "prec", 0))
67    
68        def field_info_by_name(self, col):
69            self.__deprecation_warning()
70            try:
71                return self.field_info(col)
72            except KeyError:
73                # FIXME: It may be that field_info raises other exceptions
74                # when the name is not a valid column name.
75                return None
76    
77        def field_range(self, fieldName):
78            self.__deprecation_warning()
79            min, max = self.ValueRange(fieldName)
80            return ((min, None), (max, None))
81    
82        def GetUniqueValues(self, field):
83            self.__deprecation_warning()
84            return self.UniqueValues(field)
85    
86        def read_record(self, r):
87            self.__deprecation_warning()
88            return self.ReadRowAsDict(r)
89    
90    
91    
92    class DBFColumn:
93    
94        """Description of a column in a DBFTable
95    
96        Instances have the following public attributes:
97    
98        name -- Name of the column
99        type -- Type of the column (one of FIELDTYPE_STRING, FIELDTYPE_INT or\
100                FIELDTYPE_DOUBLE)
101        index -- The index of the column
102        width -- the width of the data in the column
103        prec -- The precision of the data (only valid for type == FIELDTYPE_DOUBLE)
104      """      """
     Represent a table of data.  
105    
106      Currently this is basically just a wrapper around dbflib.      def __init__(self, name, type, width, prec, index):
107            self.name = name
108            self.type = type
109            self.width = width
110            self.prec = prec
111            self.index = index
112    
113    
114    class DBFTable(TitledObject, OldTableInterfaceMixin):
115    
116      """      """
117        Table interface for the data in a DBF file
118        """
119    
120        # Implementation strategy regarding writing to a DBF file:
121        #
122        # Most of the time Thuban only needs to read from a table and it is
123        # important that Thuban can work with read-only files. Therefore the
124        # DBF file is opened only for reading initially. Only when
125        # write_record is called we try to open the DBF file for writing as
126        # well. If that succeeds the read/write DBF file will be used for
127        # all IO afterwards.
128        #
129        # It's important to use the same DBF file object for both reading
130        # and writing to make sure that reading a records after writing
131        # returns the new values. With two separate objects this wouldn't
132        # work because a DBF file object buffers some data
133    
134      def __init__(self, filename):      def __init__(self, filename):
135          self.filename = filename          self.filename = os.path.abspath(filename)
136    
137            # Omit the extension in the title as it's not really needed and
138            # it can be confusing because dbflib removes extensions and
139            # appends some variations of '.dbf' before it tries to open the
140            # file. So the title could be e.g. myshapefile.shp when the real
141            # filename is myshapefile.dbf
142            title = os.path.splitext(os.path.basename(self.filename))[0]
143            TitledObject.__init__(self, title)
144    
145          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
146    
147      def record_count(self):          # If true, self.dbf is open for writing.
148          """Return the number of records"""          self._writable = 0
149    
150            # Create the column information objects
151            self.columns = []
152            self.column_map = {}
153            for i in range(self.NumColumns()):
154                ftype, name, width, prec = self.dbf.field_info(i)
155                ftype = dbflib_fieldtypes[ftype]
156                index = len(self.columns)
157                col = DBFColumn(name, ftype, width, prec, index)
158                self.columns.append(col)
159                self.column_map[name] = col
160                self.column_map[index] = col
161    
162        def NumRows(self):
163            """Return the number of rows in the table"""
164          return self.dbf.record_count()          return self.dbf.record_count()
165    
166      def field_count(self):      def NumColumns(self):
167          """Return the number of fields in a record"""          """Return the number of columns in the table"""
168          return self.dbf.field_count()          return self.dbf.field_count()
169    
170      def field_info(self, field):      def Columns(self):
171          """Return a tuple (type, name, width, prec) for the field no. field          """Return the table's colum definitions
172    
173          type is the data type of the field, name the name, width the          The return value is a sequence of DBFColumn instances, one for
174          field width in characters and prec the decimal precision.          each column.
175            """
176            return self.columns
177    
178        def Column(self, col):
179            """Return information about the column given by its name or index
180    
181            The returned object is an instance of DBFColumn
182            """
183            return self.column_map[col]
184    
185        def HasColumn(self, col):
186            """Return whether the table has a column with the given name or index
187            """
188            return self.column_map.has_key(col)
189    
190        def RowIdToOrdinal(self, gid):
191            """Return the row ordinal given its id
192    
193            Since for DBFTables the row id is the row number, return the
194            value unchanged.
195            """
196            return gid
197    
198        def RowOrdinalToId(self, num):
199            """Return the rowid for given its ordinal
200    
201            Since for DBFTables the row id is the row number, return the
202            value unchanged.
203            """
204            return num
205    
206        def ReadRowAsDict(self, row, row_is_ordinal = 0):
207            """Return the entire row as a dictionary with column names as keys
208    
209            The row_is_ordinal is ignored for DBF tables because the row id
210            is always the row number.
211            """
212            return self.dbf.read_record(row)
213    
214        def ReadValue(self, row, col, row_is_ordinal = 0):
215            """Return the value of the specified row and column
216    
217            The col parameter may be the index of the column or its name.
218    
219            The row_is_ordinal is ignored for DBF tables because the row id
220            is always the row number.
221            """
222            return self.dbf.read_attribute(row, self.column_map[col].index)
223    
224        def ValueRange(self, col):
225            """Return the minimum and maximum values of the values in the column
226    
227            The return value is a tuple (min, max) unless the table is empty
228            in which case the return value is None.
229            """
230            count = self.NumRows()
231    
232            if count == 0:
233                return None
234    
235            min = max = self.ReadValue(0, col)
236            for i in range(1, count):
237                value = self.ReadValue(i, col)
238                if value < min:
239                    min = value
240                elif value > max:
241                    max = value
242    
243            return (min, max)
244    
245        def UniqueValues(self, col):
246            """Return a sorted list of all unique values in the column col"""
247            dict = {}
248    
249            for i in range(self.NumRows()):
250                value = self.ReadValue(i, col)
251                dict[value] = 0
252    
253            values = dict.keys()
254            values.sort()
255            return values
256    
257        def Dependencies(self):
258            """Return an empty sequence. The DBFTable doesn't depend on anything"""
259            return ()
260    
261        # DBF specific interface parts.
262    
263        def Width(self, col):
264            """Return column width"""
265            return self.column_map[col].width
266    
267        def Destroy(self):
268            self.dbf.close()
269            self.dbf = None
270    
271        def write_record(self, record, values):
272            """Write the values into the record
273    
274            The values parameter may either be a dictionary or a sequence.
275    
276            If it's a dictionary the keys must be the names of the fields
277            and their value must have a suitable type. Only the fields
278            actually contained in the dictionary are written. Fields for
279            which there's no item in the dict are not modified.
280    
281            If it's a sequence, all fields must be present in the right
282            order.
283            """
284            if not self._writable:
285                new_dbf = dbflib.DBFFile(self.filename, "r+b")
286                self.dbf.close()
287                self.dbf = new_dbf
288                self._writable = 1
289            self.dbf.write_record(record, values)
290            self.dbf.commit()
291    
292        def FileName(self):
293            """Return the filename the DBFTable was instantiated with"""
294            return self.filename
295    
296    
297    class MemoryColumn:
298    
299        def __init__(self, name, type, index):
300            self.name = name
301            self.type = type
302            self.index = index
303    
304    class MemoryTable(TitledObject, OldTableInterfaceMixin):
305    
306        """Very simple table implementation that operates on a list of tuples"""
307    
308        def __init__(self, fields, data):
309            """Initialize the MemoryTable
310    
311            Parameters:
312            fields -- List of (name, field_type) pairs
313            data -- List of tuples, one for each row of data
314            """
315            self.data = data
316            title = 'MemoryTable'
317            TitledObject.__init__(self, title)
318    
319            # Create the column information objects
320            self.columns = []
321            self.column_map = {}
322            for name, ftype in fields:
323                index = len(self.columns)
324                col = MemoryColumn(name, ftype, index)
325                self.columns.append(col)
326                self.column_map[name] = col
327                self.column_map[index] = col
328    
329        def NumColumns(self):
330            """Return the number of columns in the table"""
331            return len(self.columns)
332    
333        def Column(self, col):
334            """Return information about the column given by its name or index
335    
336            The returned object is an instance of MemoryColumn.
337            """
338            return self.column_map[col]
339    
340        def Columns(self):
341            """Return the table's colum definitions
342    
343            The return value is a sequence of MemoryColumn instances, one
344            for each column.
345            """
346            return self.columns
347    
348        def HasColumn(self, col):
349            """Return whether the table has a column with the given name or index
350            """
351            return self.column_map.has_key(col)
352    
353        def NumRows(self):
354            """Return the number of rows in the table"""
355            return len(self.data)
356    
357        def RowIdToOrdinal(self, gid):
358            """Return the row ordinal given its id
359    
360            Since for MemoryTables the row id is the row number, return the
361            value unchanged.
362            """
363            return gid
364    
365        def RowOrdinalToId(self, num):
366            """Return the rowid for given its ordinal
367    
368            Since for MemoryTables the row id is the row number, return the
369            value unchanged.
370            """
371            return num
372    
373        def ReadValue(self, row, col, row_is_ordinal = 0):
374            """Return the value of the specified row and column
375    
376            The col parameter may be the index of the column or its name.
377    
378            The row_is_ordinal is ignored for DBF tables because the row id
379            is always the row number.
380            """
381            return self.data[row][self.column_map[col].index]
382    
383        def ReadRowAsDict(self, index, row_is_ordinal = 0):
384            """Return the entire row as a dictionary with column names as keys
385    
386            The row_is_ordinal is ignored for DBF tables because the row id
387            is always the row number.
388            """
389            return dict([(col.name, self.data[index][col.index])
390                          for col in self.columns])
391    
392        def ValueRange(self, col):
393            """Return the minimum and maximum values of the values in the column
394    
395            The return value is a tuple (min, max) unless the table is empty
396            in which case the return value is None.
397            """
398    
399            index = self.column_map[col].index
400            values = [row[index] for row in self.data]
401            if not values:
402                return None
403    
404            return min(values), max(values)
405    
406        def UniqueValues(self, col):
407            """Return a sorted list of all unique values in the column col
408    
409            col can be either column index or name.
410            """
411            dict = {}
412    
413            for i in range(self.NumRows()):
414                value = self.ReadValue(i, col)
415                dict[value] = 0
416    
417            values = dict.keys()
418            values.sort()
419            return values
420    
421        def Width(self, col):
422            """Return the maximum width of values in the column
423    
424            The return value is the the maximum length of string
425            representation of the values in the column (represented by index
426            or name).
427          """          """
428          type, name, width, prec = self.dbf.field_info(field)          max = 0
         type = dbflib_fieldtypes[type]  
         return type, name, width, prec  
429    
430      def read_record(self, record):          type  = self.column_map[col].type
431          """Return the record no. record as a dict mapping field names to values          index = self.column_map[col].index
432            values = [row[index] for row in self.data]
433            if not values:
434                return None
435    
436            if type == FIELDTYPE_DOUBLE:
437                format = "%.12f"
438            elif type == FIELDTYPE_INT:
439                format = "%d"
440            else:
441                format = "%s"
442            for value in values:
443                l = len(format % value)
444                if l > max:
445                    max = l
446    
447            return max
448    
449        def Dependencies(self):
450            """Return an empty sequence. The MemoryTable doesn't depend on anything
451          """          """
452          return self.dbf.read_record(record)          return ()
453    
454        def write_record(self, record, values):
455            # TODO: Check for correct lenght and perhaps also
456            # for correct types in case values is a tuple. How to report problems?
457            # TODO: Allow values to be a dictionary and write the single
458            # fields that are specified.
459            self.data[record] = values
460    
461    
462    
463    def _find_dbf_column_names(names):
464        """Determine the column names to use in a DBF file
465    
466        DBF files have a length limit of 10 characters on the column names
467        so when writing an arbitrary Thuban table to a DBF file we may have
468        we may have to rename some of the columns making sure that they're
469        unique in the DBF file too.
470    
471        Names that are already short enough will stay the same. Longer names
472        will be truncated to 10 characters or if that isn't unique it will
473        be truncated more and filled up with digits.
474    
475        The parameter names should be a list of the column names. The return
476        value will be a dictionary mapping the names in the input list to
477        the names to use in the DBF file.
478        """
479        # mapping from the original names in table to the names in the DBF
480        # file
481        name_map = {}
482    
483        # First, we keep all names that are already short enough
484        for i in range(len(names) - 1, -1, -1):
485            if len(names[i]) <= 10:
486                name_map[names[i]] = names[i]
487                del names[i]
488    
489        # dict used as a set of all names already used as DBF column names
490        used = name_map.copy()
491    
492        # Go through all longer names. If the name truncated to 10
493        # characters is not used already, we use that. Otherwise we truncate
494        # it more and append numbers until we get an unused name
495        for name in names:
496            truncated = name[:10]
497            num = 0; numstr = ""
498            #print "truncated", truncated, num
499            while truncated in used and len(numstr) < 10:
500                num += 1
501                numstr = str(num)
502                truncated = name[:10 - len(numstr)] + numstr
503                #print "truncated", truncated, num
504            if len(numstr) >= 10:
505                # This case should never happen in practice as tables with
506                # 10^10 columns seem very unlikely :)
507                raise ValueError("Can't find unique dbf column name")
508    
509            name_map[name] = truncated
510            used[truncated] = 1
511    
512        return name_map
513    
514    def table_to_dbf(table, filename, rows = None):
515        """Create the dbf file filename from the table.
516            
517        If rows is not None (the default) then it must be a list of row
518        indices to be saved to the file, otherwise all rows are saved.
519        """
520    
521        dbf = dbflib.create(filename)
522    
523        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
524                             FIELDTYPE_INT: dbflib.FTInteger,
525                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
526    
527    
528        name_map = _find_dbf_column_names([col.name for col in table.Columns()])
529    
530        # Initialise the header. Distinguish between DBFTable and others.
531        for col in table.Columns():
532            width = table.Width(col.name)
533            if col.type == FIELDTYPE_DOUBLE:
534                prec = getattr(col, "prec", 12)
535            else:
536                prec = 0
537            dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
538                          width, prec)
539    
540        if rows is None:
541            rows = range(table.NumRows())
542    
543        recNum = 0
544        for i in rows:
545            record = {}
546            for key, value in table.ReadRowAsDict(i).items():
547                record[name_map[key]] = value
548            dbf.write_record(recNum, record)
549            recNum += 1
550        dbf.close()
551    
552    def table_to_csv(table, filename, rows = None):
553        """Export table to csv file.
554        
555        If rows is not None (the default) then it must be a list of row
556        indices to be saved to the file, otherwise all rows are saved.
557        """
558    
559        file = open(filename,"w")
560        columns = table.Columns()
561        if columns:
562            header = "#%s" % columns[0].name
563            for col in columns[1:]:
564                header = header + ",%s" % col.name
565            header = header + "\n"
566            file.write(header)
567    
568            if rows is None:
569                rows = range(table.NumRows())
570    
571            for i in rows:
572                record = table.ReadRowAsDict(i)
573                if len(record):
574                    line = "%s" % record[columns[0].name]
575                    for col in columns[1:]:
576                        line = line + ",%s" % record[col.name]
577                line = line + "\n"
578                file.write(line)
579        file.close()
580    

Legend:
Removed from v.6  
changed lines
  Added in v.1919

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26