/[thuban]/branches/WIP-pyshapelib-bramz/Thuban/Model/table.py
ViewVC logotype

Diff of /branches/WIP-pyshapelib-bramz/Thuban/Model/table.py

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 998 by bh, Thu May 22 19:29:39 2003 UTC revision 1662 by bh, Wed Aug 27 13:51:01 2003 UTC
# Line 2  Line 2 
2  # Authors:  # Authors:
3  # Bernhard Herzog <[email protected]>  # Bernhard Herzog <[email protected]>
4  # Jan-Oliver Wagner <[email protected]>  # Jan-Oliver Wagner <[email protected]>
5    # Frank Koormann <[email protected]>
6  #  #
7  # This program is free software under the GPL (>=v2)  # This program is free software under the GPL (>=v2)
8  # Read the file COPYING coming with Thuban for details.  # Read the file COPYING coming with Thuban for details.
# Line 16  import os Line 17  import os
17  import inspect  import inspect
18  import warnings  import warnings
19    
20    from base import TitledObject
21    
22  import dbflib  import dbflib
23    
24  # the field types supported by a Table instance.  # the field types supported by a Table instance.
# Line 108  class DBFColumn: Line 111  class DBFColumn:
111          self.index = index          self.index = index
112    
113    
114  class DBFTable(OldTableInterfaceMixin):  class DBFTable(TitledObject, OldTableInterfaceMixin):
115    
116      """      """
117      Table interface for the data in a DBF file      Table interface for the data in a DBF file
# Line 129  class DBFTable(OldTableInterfaceMixin): Line 132  class DBFTable(OldTableInterfaceMixin):
132      # work because a DBF file object buffers some data      # work because a DBF file object buffers some data
133    
134      def __init__(self, filename):      def __init__(self, filename):
135          self.filename = filename          self.filename = os.path.abspath(filename)
136    
137            # Omit the extension in the title as it's not really needed and
138            # it can be confusing because dbflib removes extensions and
139            # appends some variations of '.dbf' before it tries to open the
140            # file. So the title could be e.g. myshapefile.shp when the real
141            # filename is myshapefile.dbf
142            title = os.path.splitext(os.path.basename(self.filename))[0]
143            TitledObject.__init__(self, title)
144    
145          self.dbf = dbflib.DBFFile(filename)          self.dbf = dbflib.DBFFile(filename)
146    
147          # If true, self.dbf is open for writing.          # If true, self.dbf is open for writing.
# Line 147  class DBFTable(OldTableInterfaceMixin): Line 159  class DBFTable(OldTableInterfaceMixin):
159              self.column_map[name] = col              self.column_map[name] = col
160              self.column_map[index] = col              self.column_map[index] = col
161    
     def Title(self):  
         """Return the title of the table.  
   
         The title is simply the basename of the filename  
         """  
         return os.path.basename(self.filename)  
   
162      def NumRows(self):      def NumRows(self):
163          """Return the number of rows in the table"""          """Return the number of rows in the table"""
164          return self.dbf.record_count()          return self.dbf.record_count()
# Line 182  class DBFTable(OldTableInterfaceMixin): Line 187  class DBFTable(OldTableInterfaceMixin):
187          """          """
188          return self.column_map.has_key(col)          return self.column_map.has_key(col)
189    
190      def ReadRowAsDict(self, row):      def RowIdToOrdinal(self, gid):
191          """Return the entire row as a dictionary with column names as keys"""          """Return the row ordinal given its id
192    
193            Since for DBFTables the row id is the row number, return the
194            value unchanged.
195            """
196            return gid
197    
198        def RowOrdinalToId(self, num):
199            """Return the rowid for given its ordinal
200    
201            Since for DBFTables the row id is the row number, return the
202            value unchanged.
203            """
204            return num
205    
206        def ReadRowAsDict(self, row, row_is_ordinal = 0):
207            """Return the entire row as a dictionary with column names as keys
208    
209            The row_is_ordinal is ignored for DBF tables because the row id
210            is always the row number.
211            """
212          return self.dbf.read_record(row)          return self.dbf.read_record(row)
213    
214      def ReadValue(self, row, col):      def ReadValue(self, row, col, row_is_ordinal = 0):
215          """Return the value of the specified row and column          """Return the value of the specified row and column
216    
217          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
218    
219            The row_is_ordinal is ignored for DBF tables because the row id
220            is always the row number.
221          """          """
222          return self.dbf.read_record(row)[self.column_map[col].name]          return self.dbf.read_record(row)[self.column_map[col].name]
223    
# Line 232  class DBFTable(OldTableInterfaceMixin): Line 260  class DBFTable(OldTableInterfaceMixin):
260    
261      # DBF specific interface parts.      # DBF specific interface parts.
262    
263        def Width(self, col):
264            """Return column width"""
265            return self.column_map[col].width
266    
267      def Destroy(self):      def Destroy(self):
268          self.dbf.close()          self.dbf.close()
269          self.dbf = None          self.dbf = None
# Line 269  class MemoryColumn: Line 301  class MemoryColumn:
301          self.type = type          self.type = type
302          self.index = index          self.index = index
303    
304  class MemoryTable(OldTableInterfaceMixin):  class MemoryTable(TitledObject, OldTableInterfaceMixin):
305    
306      """Very simple table implementation that operates on a list of tuples"""      """Very simple table implementation that operates on a list of tuples"""
307    
# Line 281  class MemoryTable(OldTableInterfaceMixin Line 313  class MemoryTable(OldTableInterfaceMixin
313          data -- List of tuples, one for each row of data          data -- List of tuples, one for each row of data
314          """          """
315          self.data = data          self.data = data
316            title = 'MemoryTable'
317            TitledObject.__init__(self, title)
318    
319          # Create the column information objects          # Create the column information objects
320          self.columns = []          self.columns = []
# Line 292  class MemoryTable(OldTableInterfaceMixin Line 326  class MemoryTable(OldTableInterfaceMixin
326              self.column_map[name] = col              self.column_map[name] = col
327              self.column_map[index] = col              self.column_map[index] = col
328    
     def Title(self):  
         """Return 'MemoryTable'  
   
         Override in derived classes to have a more meaningful title.  
         """  
         return "MemoryTable"  
   
329      def NumColumns(self):      def NumColumns(self):
330          """Return the number of columns in the table"""          """Return the number of columns in the table"""
331          return len(self.columns)          return len(self.columns)
# Line 327  class MemoryTable(OldTableInterfaceMixin Line 354  class MemoryTable(OldTableInterfaceMixin
354          """Return the number of rows in the table"""          """Return the number of rows in the table"""
355          return len(self.data)          return len(self.data)
356    
357      def ReadValue(self, row, col):      def RowIdToOrdinal(self, gid):
358            """Return the row ordinal given its id
359    
360            Since for MemoryTables the row id is the row number, return the
361            value unchanged.
362            """
363            return gid
364    
365        def RowOrdinalToId(self, num):
366            """Return the rowid for given its ordinal
367    
368            Since for MemoryTables the row id is the row number, return the
369            value unchanged.
370            """
371            return num
372    
373        def ReadValue(self, row, col, row_is_ordinal = 0):
374          """Return the value of the specified row and column          """Return the value of the specified row and column
375    
376          The col parameter may be the index of the column or its name.          The col parameter may be the index of the column or its name.
377    
378            The row_is_ordinal is ignored for DBF tables because the row id
379            is always the row number.
380          """          """
381          return self.data[row][self.column_map[col].index]          return self.data[row][self.column_map[col].index]
382    
383      def ReadRowAsDict(self, index):      def ReadRowAsDict(self, index, row_is_ordinal = 0):
384          """Return the entire row as a dictionary with column names as keys"""          """Return the entire row as a dictionary with column names as keys
385    
386            The row_is_ordinal is ignored for DBF tables because the row id
387            is always the row number.
388            """
389          return dict([(col.name, self.data[index][col.index])          return dict([(col.name, self.data[index][col.index])
390                        for col in self.columns])                        for col in self.columns])
391    
# Line 354  class MemoryTable(OldTableInterfaceMixin Line 404  class MemoryTable(OldTableInterfaceMixin
404          return min(values), max(values)          return min(values), max(values)
405    
406      def UniqueValues(self, col):      def UniqueValues(self, col):
407          """Return a sorted list of all unique values in the column col"""          """Return a sorted list of all unique values in the column col
408    
409            col can be either column index or name.
410            """
411          dict = {}          dict = {}
412    
413          for i in range(self.NumRows()):          for i in range(self.NumRows()):
# Line 365  class MemoryTable(OldTableInterfaceMixin Line 418  class MemoryTable(OldTableInterfaceMixin
418          values.sort()          values.sort()
419          return values          return values
420    
421        def Width(self, col):
422            """Return the maximum width of values in the column
423    
424            The return value is the the maximum length of string
425            representation of the values in the column (represented by index
426            or name).
427            """
428            max = 0
429    
430            type  = self.column_map[col].type
431            index = self.column_map[col].index
432            values = [row[index] for row in self.data]
433            if not values:
434                return None
435    
436            if type == FIELDTYPE_DOUBLE:
437                format = "%.12f"
438            elif type == FIELDTYPE_INT:
439                format = "%d"
440            else:
441                format = "%s"
442            for value in values:
443                l = len(format % value)
444                if l > max:
445                    max = l
446    
447            return max
448    
449      def Dependencies(self):      def Dependencies(self):
450          """Return an empty sequence. The MemoryTable doesn't depend on anything          """Return an empty sequence. The MemoryTable doesn't depend on anything
451          """          """
# Line 376  class MemoryTable(OldTableInterfaceMixin Line 457  class MemoryTable(OldTableInterfaceMixin
457          # TODO: Allow values to be a dictionary and write the single          # TODO: Allow values to be a dictionary and write the single
458          # fields that are specified.          # fields that are specified.
459          self.data[record] = values          self.data[record] = values
460    
461    
462    
463    def _find_dbf_column_names(names):
464        """Determine the column names to use in a DBF file
465    
466        DBF files have a length limit of 10 characters on the column names
467        so when writing an arbitrary Thuban table to a DBF file we may have
468        we may have to rename some of the columns making sure that they're
469        unique in the DBF file too.
470    
471        Names that are already short enough will stay the same. Longer names
472        will be truncated to 10 characters or if that isn't unique it will
473        be truncated more and filled up with digits.
474    
475        The parameter names should be a list of the column names. The return
476        value will be a dictionary mapping the names in the input list to
477        the names to use in the DBF file.
478        """
479        # mapping from the original names in table to the names in the DBF
480        # file
481        name_map = {}
482    
483        # First, we keep all names that are already short enough
484        for i in range(len(names) - 1, -1, -1):
485            if len(names[i]) <= 10:
486                name_map[names[i]] = names[i]
487                del names[i]
488    
489        # dict used as a set of all names already used as DBF column names
490        used = name_map.copy()
491    
492        # Go through all longer names. If the name truncated to 10
493        # characters is not used already, we use that. Otherwise we truncate
494        # it more and append numbers until we get an unused name
495        for name in names:
496            truncated = name[:10]
497            num = 0; numstr = ""
498            #print "truncated", truncated, num
499            while truncated in used and len(numstr) < 10:
500                num += 1
501                numstr = str(num)
502                truncated = name[:10 - len(numstr)] + numstr
503                #print "truncated", truncated, num
504            if len(numstr) >= 10:
505                # This case should never happen in practice as tables with
506                # 10^10 columns seem very unlikely :)
507                raise ValueError("Can't find unique dbf column name")
508    
509            name_map[name] = truncated
510            used[truncated] = 1
511    
512        return name_map
513    
514    def table_to_dbf(table, filename, rows = None):
515        """Create the dbf file filename from the table.
516        
517        If rows is not None (the default) then it must be a list of row
518        indices to be saved to the file, otherwise all rows are saved.
519        """
520    
521        dbf = dbflib.create(filename)
522    
523        dbflib_fieldtypes = {FIELDTYPE_STRING: dbflib.FTString,
524                             FIELDTYPE_INT: dbflib.FTInteger,
525                             FIELDTYPE_DOUBLE: dbflib.FTDouble}
526    
527    
528        name_map = _find_dbf_column_names([col.name for col in table.Columns()])
529    
530        # Initialise the header. Distinguish between DBFTable and others.
531        for col in table.Columns():
532            width = table.Width(col.name)
533            if col.type == FIELDTYPE_DOUBLE:
534                prec = getattr(col, "prec", 12)
535            else:
536                prec = 0
537            dbf.add_field(name_map[col.name], dbflib_fieldtypes[col.type],
538                          width, prec)
539    
540        if rows is None:
541            rows = range(table.NumRows())
542    
543        recNum = 0
544        for i in rows:
545            record = {}
546            for key, value in table.ReadRowAsDict(i).items():
547                record[name_map[key]] = value
548            dbf.write_record(recNum, record)
549            recNum += 1
550        dbf.close()
551    
552    def table_to_csv(table, filename, rows = None):
553        """Export table to csv file.
554        
555        If rows is not None (the default) then it must be a list of row
556        indices to be saved to the file, otherwise all rows are saved.
557        """
558    
559        file = open(filename,"w")
560        columns = table.Columns()
561        if columns:
562            header = "#%s" % columns[0].name
563            for col in columns[1:]:
564                header = header + ",%s" % col.name
565            header = header + "\n"
566            file.write(header)
567    
568            if rows is None:
569                rows = range(table.NumRows())
570    
571            for i in rows:
572                record = table.ReadRowAsDict(i)
573                if len(record):
574                    line = "%s" % record[columns[0].name]
575                    for col in columns[1:]:
576                        line = line + ",%s" % record[col.name]
577                line = line + "\n"
578                file.write(line)
579        file.close()
580    

Legend:
Removed from v.998  
changed lines
  Added in v.1662

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26