Package dap :: Package plugins :: Module csvfiles
[hide private]
[frames] | no frames]

Source Code for Module dap.plugins.csvfiles

  1  """Plugin for CSV (comma separated values) files. 
  2   
  3  This plugin serves sequential data from a CSV file. It's a bit hackish and 
  4  abuses ``lambda`` and ``itertools``, but it works *very* nice. The plugin 
  5  uses the ``buildfilter()`` function to create a filter from the constraint 
  6  expression, and applies it on-the-fly on the data as it is being read. 
  7  """ 
  8   
  9  __author__ = "Roberto De Almeida <rob@pydap.org>" 
 10   
 11  import sys 
 12  import os.path 
 13  import re 
 14  import csv 
 15  import itertools 
 16  import urllib 
 17   
 18  from dap import dtypes 
 19  from dap.responses.das import typeconvert 
 20  from dap.server import BaseHandler 
 21  from dap.exceptions import OpenFileError 
 22  from dap.helper import buildfilter, parse_querystring 
 23  from dap.util.safeeval import expr_eval 
 24   
 25  extensions = r"""^.*\.(csv|CSV)$""" 
 26   
 27   
28 -def lazy_eval(s):
29 """Try to evalute expression or fallback to string. 30 31 >>> lazy_eval("1") 32 1 33 >>> lazy_eval("None") 34 'None' 35 """ 36 try: 37 s = expr_eval(s) 38 except: 39 pass 40 return s
41 42
43 -class Handler(BaseHandler):
44 - def __init__(self, filepath, environ):
45 """Handler constructor. 46 """ 47 self.filepath = filepath 48 self.environ = environ 49 dir, self.filename = os.path.split(filepath) 50 51 # Add dummy description. 52 self.description = "Comma Separated Values from file %s." % self.filename
53
54 - def _parseconstraints(self, constraints=None):
55 """Dataset builder. 56 57 This method opens a CSV reader, extracts the variable names from 58 the first line and returns an iterator to the data. Constraint 59 expressions or handled by the ``get_filter()`` function and a 60 filter to return only data from the columns corresponding to the 61 requested variables. 62 """ 63 try: 64 self._file = open(self.filepath) 65 reader = csv.reader(self._file) 66 except: 67 message = 'Unable to open file %s.' % self.filepath 68 raise OpenFileError(message) 69 70 # Parse constraints. 71 fields, queries = parse_querystring(constraints) 72 73 # Build the dataset. 74 dataset = dtypes.DatasetType(name=self.filename) 75 dataset.attributes['filename'] = self.filename 76 77 # Create sequence. 78 name = self.filename[:-4].split('_', 1)[0] 79 seq = dataset[name] = dtypes.SequenceType(name=name) 80 81 # Read variables names. 82 fieldnames = reader.next() 83 ids = ['%s.%s' % (seq.name, n) for n in fieldnames] 84 85 # We need to read the first line to grab the fields names and peek types. 86 line = reader.next() 87 types_ = [lazy_eval(i) for i in line] 88 types_ = [typeconvert[type(i)] for i in types_] 89 90 # Get list of requested variables. 91 if seq.id in fields.keys(): 92 req_ids = [] # put everything 93 else: 94 # Check for shorthand notation. Ugly, ugly hack. If the requested 95 # var is not in the list of ids we append the sequence id to it, 96 # assuming that is was requested using the shorthand notation syntax. 97 req_ids = [['%s.%s' % (seq.id, var), var][var in ids] for var in fields.keys()] 98 99 # Add requested variables. 100 if req_ids: 101 indexes = [] 102 for id_ in req_ids: 103 if id_ in ids: 104 i = ids.index(id_) 105 indexes.append(i) 106 name = fieldnames[i] 107 type_ = types_[i] 108 seq[name] = dtypes.BaseType(name=name, type=type_) 109 else: 110 for name, type_ in zip(fieldnames, types_): 111 seq[name] = dtypes.BaseType(name=name, type=type_) 112 113 # Reinsert first data line. 114 data = itertools.chain([line], reader) 115 data = itertools.imap(lambda l: map(lazy_eval, l), data) 116 117 # Filter results. 118 if queries: 119 # Get filter. 120 filter1 = buildfilter(queries, ids) 121 data = itertools.ifilter(filter1, data) 122 123 # Select only requested variables. 124 if req_ids: 125 filter2 = lambda x: [x[i] for i in indexes] 126 data = itertools.imap(filter2, data) 127 128 # Apply stride to sequence? 129 slice_ = fields.get(seq.id) 130 if slice_: 131 slice_ = slice_[0] 132 data = itertools.islice(data, slice_.start or 0, slice_.stop or sys.maxint, slice_.step or 1) 133 else: 134 # Check stored variables. If more than one variable is selected, 135 # and they have different slices, use the most restritive start, 136 # step and stop. 137 # 138 # Behaviour rev-eng'ed from http://test.opendap.org/dap/data/ff/1998-6-avhrr.dat 139 slices = [] 140 for var in seq.walk(): 141 slice_ = fields.get(var.id) 142 if slice_: slices.append(slice_[0]) 143 if slices: 144 start, step, stop = zip(*[(s.start or 0, s.step or 1, s.stop or sys.maxint) for s in slices]) 145 data = itertools.islice(data, max(start), min(stop), max(step)) 146 147 # Insert data directly into sequence. 148 seq.data = data 149 150 return dataset
151
152 - def close(self):
153 """Close the CSV file.""" 154 if hasattr(self, '_file'): self._file.close()
155 156
157 -def _test():
158 import doctest 159 doctest.testmod()
160 161 if __name__ == "__main__": 162 _test() 163