1 """Plugin for CSV (comma separated values) files.
2
3 This plugin serves sequential data from a CSV file. It's a bit hackish and
4 abuses ``lambda`` and ``itertools``, but it works *very* nice. The plugin
5 uses the ``buildfilter()`` function to create a filter from the constraint
6 expression, and applies it on-the-fly on the data as it is being read.
7 """
8
9 __author__ = "Roberto De Almeida <rob@pydap.org>"
10
11 import sys
12 import os.path
13 import re
14 import csv
15 import itertools
16 import urllib
17
18 from dap import dtypes
19 from dap.responses.das import typeconvert
20 from dap.server import BaseHandler
21 from dap.exceptions import OpenFileError
22 from dap.helper import buildfilter, parse_querystring
23 from dap.util.safeeval import expr_eval
24
25 extensions = r"""^.*\.(csv|CSV)$"""
26
27
29 """Try to evalute expression or fallback to string.
30
31 >>> lazy_eval("1")
32 1
33 >>> lazy_eval("None")
34 'None'
35 """
36 try:
37 s = expr_eval(s)
38 except:
39 pass
40 return s
41
42
45 """Handler constructor.
46 """
47 self.filepath = filepath
48 self.environ = environ
49 dir, self.filename = os.path.split(filepath)
50
51
52 self.description = "Comma Separated Values from file %s." % self.filename
53
55 """Dataset builder.
56
57 This method opens a CSV reader, extracts the variable names from
58 the first line and returns an iterator to the data. Constraint
59 expressions or handled by the ``get_filter()`` function and a
60 filter to return only data from the columns corresponding to the
61 requested variables.
62 """
63 try:
64 self._file = open(self.filepath)
65 reader = csv.reader(self._file)
66 except:
67 message = 'Unable to open file %s.' % self.filepath
68 raise OpenFileError(message)
69
70
71 fields, queries = parse_querystring(constraints)
72
73
74 dataset = dtypes.DatasetType(name=self.filename)
75 dataset.attributes['filename'] = self.filename
76
77
78 name = self.filename[:-4].split('_', 1)[0]
79 seq = dataset[name] = dtypes.SequenceType(name=name)
80
81
82 fieldnames = reader.next()
83 ids = ['%s.%s' % (seq.name, n) for n in fieldnames]
84
85
86 line = reader.next()
87 types_ = [lazy_eval(i) for i in line]
88 types_ = [typeconvert[type(i)] for i in types_]
89
90
91 if seq.id in fields.keys():
92 req_ids = []
93 else:
94
95
96
97 req_ids = [['%s.%s' % (seq.id, var), var][var in ids] for var in fields.keys()]
98
99
100 if req_ids:
101 indexes = []
102 for id_ in req_ids:
103 if id_ in ids:
104 i = ids.index(id_)
105 indexes.append(i)
106 name = fieldnames[i]
107 type_ = types_[i]
108 seq[name] = dtypes.BaseType(name=name, type=type_)
109 else:
110 for name, type_ in zip(fieldnames, types_):
111 seq[name] = dtypes.BaseType(name=name, type=type_)
112
113
114 data = itertools.chain([line], reader)
115 data = itertools.imap(lambda l: map(lazy_eval, l), data)
116
117
118 if queries:
119
120 filter1 = buildfilter(queries, ids)
121 data = itertools.ifilter(filter1, data)
122
123
124 if req_ids:
125 filter2 = lambda x: [x[i] for i in indexes]
126 data = itertools.imap(filter2, data)
127
128
129 slice_ = fields.get(seq.id)
130 if slice_:
131 slice_ = slice_[0]
132 data = itertools.islice(data, slice_.start or 0, slice_.stop or sys.maxint, slice_.step or 1)
133 else:
134
135
136
137
138
139 slices = []
140 for var in seq.walk():
141 slice_ = fields.get(var.id)
142 if slice_: slices.append(slice_[0])
143 if slices:
144 start, step, stop = zip(*[(s.start or 0, s.step or 1, s.stop or sys.maxint) for s in slices])
145 data = itertools.islice(data, max(start), min(stop), max(step))
146
147
148 seq.data = data
149
150 return dataset
151
153 """Close the CSV file."""
154 if hasattr(self, '_file'): self._file.close()
155
156
158 import doctest
159 doctest.testmod()
160
161 if __name__ == "__main__":
162 _test()
163