1 /*******************************************************************************
2 
3     Class for parsing streams of CSV data with handling of column headings. The
4     fields of the first row are parsed as the column headings. The user delegate
5     passed to the parse() methods receives the values of the fields in a row
6     together with the corresponding column headings, read from the first row.
7 
8     A second parse() method allows only certain columns in the CSV stream to be
9     processed.
10 
11     See ocean.text.csv.CSV for details on the basic format support of the
12     parser.
13 
14     Usage:
15 
16     ---
17 
18         import ocean.io.Stdout;
19         import ocean.io.device.File;
20 
21         scope file = new File("example.csv", File.ReadExisting);
22         scope csv = new HeadingsCSV;
23 
24         const include_headings = ["Criteria ID", "Country Code", "Canonical Name"];
25 
26         // Parse method allowing only certain columns to be passed to the
27         // delegate.
28         csv.parse(file, include_headings,
29         (HeadingsCSV.Field[] fields)
30         {
31             Stdout.format("Row=[");
32             foreach ( f; fields )
33             {
34                 Stdout.format("{}:{}, ", f.name, f.value);
35             }
36             Stdout.formatln("]");
37             return true; // tells CSV instance to continue parsing
38         });
39 
40     ---
41 
42     Copyright:
43         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
44         All rights reserved.
45 
46     License:
47         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
48         Alternatively, this file may be distributed under the terms of the Tango
49         3-Clause BSD License (see LICENSE_BSD.txt for details).
50 
51 *******************************************************************************/
52 
53 module ocean.text.csv.HeadingsCSV;
54 
55 
56 
57 
58 import ocean.meta.types.Qualifiers;
59 
60 import ocean.text.csv.CSV;
61 
62 import ocean.util.container.AppendBuffer;
63 import ocean.util.container.ConcatBuffer : SliceBuffer;
64 
65 import ocean.core.Array : contains, find;
66 
67 import ocean.io.model.IConduit;
68 
69 version (unittest) import ocean.core.Test;
70 
71 /*******************************************************************************
72 
73     CSV parser with special handling of column headings. Passes extracted
74     fields, one row at a time to a user-provided delegate, along with the column
75     heading of each field.
76 
77 *******************************************************************************/
78 
79 public class HeadingsCSV
80 {
81     /***************************************************************************
82 
83         Type of delegate which receives parsed CSV rows.
84 
85     ***************************************************************************/
86 
87     public alias bool delegate ( Field[] fields ) RowDg;
88 
89 
90     /***************************************************************************
91 
92         Struct containing the name and value of a field. Field names are sliced
93         from the 'headings' array (see below). A list of Field structs is passed
94         to the user's delegate which is passed to the parse method.
95 
96     ***************************************************************************/
97 
98     public struct Field
99     {
100         cstring name;
101         cstring value;
102     }
103 
104 
105     /***************************************************************************
106 
107         Internal simple CSV parser.
108 
109     ***************************************************************************/
110 
111     private CSV csv;
112 
113 
114     /***************************************************************************
115 
116         List of heading names, read from the first CSV row.
117 
118     ***************************************************************************/
119 
120     private SliceBuffer!(char) headings;
121 
122 
123     /***************************************************************************
124 
125         List of bools specifying whether each heading is to be passed to the
126         user's delegate. (Used by the second parse() method.) The flags in this
127         list are ordered the same as the column names in 'headings'.
128 
129     ***************************************************************************/
130 
131     private AppendBuffer!(bool) heading_included;
132 
133 
134     /***************************************************************************
135 
136         List of Field structs extracted from the current row, to be passed to
137         the user's delegate.
138 
139     ***************************************************************************/
140 
141     private AppendBuffer!(Field) fields;
142 
143 
144     /***************************************************************************
145 
146         Constructor.
147 
148     ***************************************************************************/
149 
150     public this ( )
151     {
152         this.csv = new CSV;
153         this.headings = new SliceBuffer!(char);
154         this.heading_included = new AppendBuffer!(bool);
155         this.fields = new AppendBuffer!(Field);
156     }
157 
158     /***************************************************************************
159 
160         Parses CSV data from the provided stream. Parsing ends when an EOF is
161         encountered. As rows are extracted and parsed, they are passed to the
162         provided delegate.
163 
164         Note that if a row is read which has more fields than there are headings
165         (i.e. fields in the first row of the CSV stream), then its name is set
166         to "unknown".
167 
168         Params:
169             stream = stream to read CSV data from
170             row_dg = delegate to receive parsed rows
171 
172     ***************************************************************************/
173 
174     public void parse ( InputStream stream, scope RowDg row_dg )
175     {
176         this.headings.clear();
177 
178         size_t row;
179         this.csv.parse(stream,
180         ( cstring[] parsed_fields )
181         {
182             // First row (headings)
183             if ( row++ == 0 )
184             {
185                 foreach ( f; parsed_fields )
186                 {
187                     this.headings.add(f);
188                 }
189             }
190             // Subsequent rows
191             else
192             {
193                 this.fields.length = 0;
194 
195                 foreach ( i, f; parsed_fields )
196                 {
197                     auto heading = i < this.headings.length
198                         ? this.headings[i] : "unknown";
199                     this.fields ~= Field(heading, f);
200                 }
201 
202                 if ( !row_dg(this.fields[]) )
203                 {
204                     return false;
205                 }
206             }
207 
208             return true;
209         });
210     }
211 
212 
213     /***************************************************************************
214 
215         Parses CSV data from the provided stream. Parsing ends when an EOF is
216         encountered. As rows are extracted and parsed, they are passed to the
217         provided delegate.
218 
219         An additional parameter (include_headings) allows the user to specify
220         which columns in the CSV stream are passed to the row delegate. In this
221         way, unnecessary columns can be ignored.
222 
223         Params:
224             stream = stream to read CSV data from
225             include_headings = list of column headings to be included in the
226                 fields passed to the row delegate
227             row_dg = delegate to receive parsed rows
228 
229     ***************************************************************************/
230 
231     public void parse ( InputStream stream, cstring[] include_headings,
232         scope RowDg row_dg )
233     {
234         this.headings.clear();
235         this.heading_included.length = 0;
236 
237         size_t row;
238         this.csv.parse(stream,
239         ( cstring[] parsed_fields )
240         {
241             //First row (headings)
242             if ( row++ == 0 )
243             {
244                 foreach ( i, f; parsed_fields )
245                 {
246                     this.headings.add(f);
247                 }
248                 // TODO: duplicate headings?
249 
250                 this.heading_included.length = this.headings.length;
251 
252                 foreach ( i, ref included; this.heading_included[] )
253                 {
254                     included = !!include_headings.contains(this.headings[i]);
255                 }
256             }
257             //Subsequent rows
258             else
259             {
260                 this.fields.length = 0;
261 
262                 foreach ( i, f; parsed_fields )
263                 {
264                     if ( i < this.headings.length && this.heading_included[i] )
265                     {
266                         this.fields ~= Field(this.headings[i], f);
267                     }
268                 }
269 
270                 if ( !row_dg(this.fields[]) )
271                 {
272                     return false;
273                 }
274             }
275 
276             return true;
277         });
278     }
279 }
280 
281 
282 
283 /*******************************************************************************
284 
285     UnitTest
286 
287 *******************************************************************************/
288 
289 version (unittest)
290 {
291     import ocean.io.device.Array;
292 }
293 
294 unittest
295 {
296     class Tester
297     {
298         private HeadingsCSV.Field[][] expected;
299         private size_t test_row;
300 
301         bool rowDg ( HeadingsCSV.Field[] parsed_fields )
302         {
303             auto expected_fields = this.expected[this.test_row++];
304 
305             foreach ( i, f; parsed_fields )
306             {
307                 .test(f.name == expected_fields[i].name);
308                 .test(f.value == expected_fields[i].value);
309             }
310 
311             return true;
312         }
313 
314         void test ( HeadingsCSV csv, cstring str, HeadingsCSV.Field[][] expected )
315         {
316             this.expected = expected;
317             this.test_row = 0;
318 
319             scope array = new Array(1024);
320             array.append(str);
321 
322             csv.parse(array, &this.rowDg);
323         }
324 
325         void test_inc ( HeadingsCSV csv, cstring str, cstring[] included_headings,
326             HeadingsCSV.Field[][] expected )
327         {
328             this.expected = expected;
329             this.test_row = 0;
330 
331             scope array = new Array(1024);
332             array.append(str);
333 
334             csv.parse(array, included_headings, &this.rowDg);
335         }
336     }
337 
338 
339     scope csv = new HeadingsCSV;
340     scope tester = new Tester;
341 
342     // Headings + single row test
343     tester.test(csv,
344 `Heading1,Heading2,Heading3,Heading4,Heading5
345 This,Time,With,Two,Rows`,
346        [[HeadingsCSV.Field("Heading1", "This"),
347         HeadingsCSV.Field("Heading2", "Time"),
348         HeadingsCSV.Field("Heading3", "With"),
349         HeadingsCSV.Field("Heading4", "Two"),
350         HeadingsCSV.Field("Heading5", "Rows")]]);
351 
352     // Headings + longer row test
353     tester.test(csv,
354 `Heading1,Heading2,Heading3,Heading4,Heading5
355 This,Time,With,Two,Rows,But,Longer`,
356        [[HeadingsCSV.Field("Heading1", "This"),
357         HeadingsCSV.Field("Heading2", "Time"),
358         HeadingsCSV.Field("Heading3", "With"),
359         HeadingsCSV.Field("Heading4", "Two"),
360         HeadingsCSV.Field("Heading5", "Rows"),
361         HeadingsCSV.Field("unknown", "But"),
362         HeadingsCSV.Field("unknown", "Longer")]]);
363 
364     // Headings + two rows test
365     tester.test(csv,
366 `Heading1,Heading2,Heading3,Heading4,Heading5
367 This,Time,With,Two,Rows
368 Yes,There,Are,Really,Three`,
369        [[HeadingsCSV.Field("Heading1", "This"),
370         HeadingsCSV.Field("Heading2", "Time"),
371         HeadingsCSV.Field("Heading3", "With"),
372         HeadingsCSV.Field("Heading4", "Two"),
373         HeadingsCSV.Field("Heading5", "Rows")],
374         [HeadingsCSV.Field("Heading1", "Yes"),
375         HeadingsCSV.Field("Heading2", "There"),
376         HeadingsCSV.Field("Heading3", "Are"),
377         HeadingsCSV.Field("Heading4", "Really"),
378         HeadingsCSV.Field("Heading5", "Three")]]);
379 
380     // Excluded headings
381     tester.test_inc(csv,
382 `Heading1,Heading2,Heading3,Heading4,Heading5
383 This,Time,With,Two,Rows
384 Yes,There,Are,Really,Three`,
385         ["Heading2", "Heading4", "Heading5"],
386        [[HeadingsCSV.Field("Heading2", "Time"),
387         HeadingsCSV.Field("Heading4", "Two"),
388         HeadingsCSV.Field("Heading5", "Rows")],
389        [HeadingsCSV.Field("Heading2", "There"),
390         HeadingsCSV.Field("Heading4", "Really"),
391         HeadingsCSV.Field("Heading5", "Three")]]);
392 
393     // Excluded headings + long row
394     tester.test_inc(csv,
395 `Heading1,Heading2,Heading3,Heading4,Heading5
396 This,Time,With,Two,Rows
397 Yes,There,Are,Really,Three,Some,Extra,Fields`,
398         ["Heading2", "Heading4", "Heading5"],
399        [[HeadingsCSV.Field("Heading2", "Time"),
400         HeadingsCSV.Field("Heading4", "Two"),
401         HeadingsCSV.Field("Heading5", "Rows")],
402        [HeadingsCSV.Field("Heading2", "There"),
403         HeadingsCSV.Field("Heading4", "Really"),
404         HeadingsCSV.Field("Heading5", "Three")]]);
405 }
406