1 /******************************************************************************* 2 3 Class for parsing streams of CSV data with handling of column headings. The 4 fields of the first row are parsed as the column headings. The user delegate 5 passed to the parse() methods receives the values of the fields in a row 6 together with the corresponding column headings, read from the first row. 7 8 A second parse() method allows only certain columns in the CSV stream to be 9 processed. 10 11 See ocean.text.csv.CSV for details on the basic format support of the 12 parser. 13 14 Usage: 15 16 --- 17 18 import ocean.io.Stdout; 19 import ocean.io.device.File; 20 21 scope file = new File("example.csv", File.ReadExisting); 22 scope csv = new HeadingsCSV; 23 24 const include_headings = ["Criteria ID", "Country Code", "Canonical Name"]; 25 26 // Parse method allowing only certain columns to be passed to the 27 // delegate. 28 csv.parse(file, include_headings, 29 (HeadingsCSV.Field[] fields) 30 { 31 Stdout.format("Row=["); 32 foreach ( f; fields ) 33 { 34 Stdout.format("{}:{}, ", f.name, f.value); 35 } 36 Stdout.formatln("]"); 37 return true; // tells CSV instance to continue parsing 38 }); 39 40 --- 41 42 Copyright: 43 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 44 All rights reserved. 45 46 License: 47 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 48 Alternatively, this file may be distributed under the terms of the Tango 49 3-Clause BSD License (see LICENSE_BSD.txt for details). 50 51 *******************************************************************************/ 52 53 module ocean.text.csv.HeadingsCSV; 54 55 56 57 58 import ocean.meta.types.Qualifiers; 59 60 import ocean.text.csv.CSV; 61 62 import ocean.util.container.AppendBuffer; 63 import ocean.util.container.ConcatBuffer : SliceBuffer; 64 65 import ocean.core.Array : contains, find; 66 67 import ocean.io.model.IConduit; 68 69 version (unittest) import ocean.core.Test; 70 71 /******************************************************************************* 72 73 CSV parser with special handling of column headings. Passes extracted 74 fields, one row at a time to a user-provided delegate, along with the column 75 heading of each field. 76 77 *******************************************************************************/ 78 79 public class HeadingsCSV 80 { 81 /*************************************************************************** 82 83 Type of delegate which receives parsed CSV rows. 84 85 ***************************************************************************/ 86 87 public alias bool delegate ( Field[] fields ) RowDg; 88 89 90 /*************************************************************************** 91 92 Struct containing the name and value of a field. Field names are sliced 93 from the 'headings' array (see below). A list of Field structs is passed 94 to the user's delegate which is passed to the parse method. 95 96 ***************************************************************************/ 97 98 public struct Field 99 { 100 cstring name; 101 cstring value; 102 } 103 104 105 /*************************************************************************** 106 107 Internal simple CSV parser. 108 109 ***************************************************************************/ 110 111 private CSV csv; 112 113 114 /*************************************************************************** 115 116 List of heading names, read from the first CSV row. 117 118 ***************************************************************************/ 119 120 private SliceBuffer!(char) headings; 121 122 123 /*************************************************************************** 124 125 List of bools specifying whether each heading is to be passed to the 126 user's delegate. (Used by the second parse() method.) The flags in this 127 list are ordered the same as the column names in 'headings'. 128 129 ***************************************************************************/ 130 131 private AppendBuffer!(bool) heading_included; 132 133 134 /*************************************************************************** 135 136 List of Field structs extracted from the current row, to be passed to 137 the user's delegate. 138 139 ***************************************************************************/ 140 141 private AppendBuffer!(Field) fields; 142 143 144 /*************************************************************************** 145 146 Constructor. 147 148 ***************************************************************************/ 149 150 public this ( ) 151 { 152 this.csv = new CSV; 153 this.headings = new SliceBuffer!(char); 154 this.heading_included = new AppendBuffer!(bool); 155 this.fields = new AppendBuffer!(Field); 156 } 157 158 /*************************************************************************** 159 160 Parses CSV data from the provided stream. Parsing ends when an EOF is 161 encountered. As rows are extracted and parsed, they are passed to the 162 provided delegate. 163 164 Note that if a row is read which has more fields than there are headings 165 (i.e. fields in the first row of the CSV stream), then its name is set 166 to "unknown". 167 168 Params: 169 stream = stream to read CSV data from 170 row_dg = delegate to receive parsed rows 171 172 ***************************************************************************/ 173 174 public void parse ( InputStream stream, scope RowDg row_dg ) 175 { 176 this.headings.clear(); 177 178 size_t row; 179 this.csv.parse(stream, 180 ( cstring[] parsed_fields ) 181 { 182 // First row (headings) 183 if ( row++ == 0 ) 184 { 185 foreach ( f; parsed_fields ) 186 { 187 this.headings.add(f); 188 } 189 } 190 // Subsequent rows 191 else 192 { 193 this.fields.length = 0; 194 195 foreach ( i, f; parsed_fields ) 196 { 197 auto heading = i < this.headings.length 198 ? this.headings[i] : "unknown"; 199 this.fields ~= Field(heading, f); 200 } 201 202 if ( !row_dg(this.fields[]) ) 203 { 204 return false; 205 } 206 } 207 208 return true; 209 }); 210 } 211 212 213 /*************************************************************************** 214 215 Parses CSV data from the provided stream. Parsing ends when an EOF is 216 encountered. As rows are extracted and parsed, they are passed to the 217 provided delegate. 218 219 An additional parameter (include_headings) allows the user to specify 220 which columns in the CSV stream are passed to the row delegate. In this 221 way, unnecessary columns can be ignored. 222 223 Params: 224 stream = stream to read CSV data from 225 include_headings = list of column headings to be included in the 226 fields passed to the row delegate 227 row_dg = delegate to receive parsed rows 228 229 ***************************************************************************/ 230 231 public void parse ( InputStream stream, cstring[] include_headings, 232 scope RowDg row_dg ) 233 { 234 this.headings.clear(); 235 this.heading_included.length = 0; 236 237 size_t row; 238 this.csv.parse(stream, 239 ( cstring[] parsed_fields ) 240 { 241 //First row (headings) 242 if ( row++ == 0 ) 243 { 244 foreach ( i, f; parsed_fields ) 245 { 246 this.headings.add(f); 247 } 248 // TODO: duplicate headings? 249 250 this.heading_included.length = this.headings.length; 251 252 foreach ( i, ref included; this.heading_included[] ) 253 { 254 included = !!include_headings.contains(this.headings[i]); 255 } 256 } 257 //Subsequent rows 258 else 259 { 260 this.fields.length = 0; 261 262 foreach ( i, f; parsed_fields ) 263 { 264 if ( i < this.headings.length && this.heading_included[i] ) 265 { 266 this.fields ~= Field(this.headings[i], f); 267 } 268 } 269 270 if ( !row_dg(this.fields[]) ) 271 { 272 return false; 273 } 274 } 275 276 return true; 277 }); 278 } 279 } 280 281 282 283 /******************************************************************************* 284 285 UnitTest 286 287 *******************************************************************************/ 288 289 version (unittest) 290 { 291 import ocean.io.device.Array; 292 } 293 294 unittest 295 { 296 class Tester 297 { 298 private HeadingsCSV.Field[][] expected; 299 private size_t test_row; 300 301 bool rowDg ( HeadingsCSV.Field[] parsed_fields ) 302 { 303 auto expected_fields = this.expected[this.test_row++]; 304 305 foreach ( i, f; parsed_fields ) 306 { 307 .test(f.name == expected_fields[i].name); 308 .test(f.value == expected_fields[i].value); 309 } 310 311 return true; 312 } 313 314 void test ( HeadingsCSV csv, cstring str, HeadingsCSV.Field[][] expected ) 315 { 316 this.expected = expected; 317 this.test_row = 0; 318 319 scope array = new Array(1024); 320 array.append(str); 321 322 csv.parse(array, &this.rowDg); 323 } 324 325 void test_inc ( HeadingsCSV csv, cstring str, cstring[] included_headings, 326 HeadingsCSV.Field[][] expected ) 327 { 328 this.expected = expected; 329 this.test_row = 0; 330 331 scope array = new Array(1024); 332 array.append(str); 333 334 csv.parse(array, included_headings, &this.rowDg); 335 } 336 } 337 338 339 scope csv = new HeadingsCSV; 340 scope tester = new Tester; 341 342 // Headings + single row test 343 tester.test(csv, 344 `Heading1,Heading2,Heading3,Heading4,Heading5 345 This,Time,With,Two,Rows`, 346 [[HeadingsCSV.Field("Heading1", "This"), 347 HeadingsCSV.Field("Heading2", "Time"), 348 HeadingsCSV.Field("Heading3", "With"), 349 HeadingsCSV.Field("Heading4", "Two"), 350 HeadingsCSV.Field("Heading5", "Rows")]]); 351 352 // Headings + longer row test 353 tester.test(csv, 354 `Heading1,Heading2,Heading3,Heading4,Heading5 355 This,Time,With,Two,Rows,But,Longer`, 356 [[HeadingsCSV.Field("Heading1", "This"), 357 HeadingsCSV.Field("Heading2", "Time"), 358 HeadingsCSV.Field("Heading3", "With"), 359 HeadingsCSV.Field("Heading4", "Two"), 360 HeadingsCSV.Field("Heading5", "Rows"), 361 HeadingsCSV.Field("unknown", "But"), 362 HeadingsCSV.Field("unknown", "Longer")]]); 363 364 // Headings + two rows test 365 tester.test(csv, 366 `Heading1,Heading2,Heading3,Heading4,Heading5 367 This,Time,With,Two,Rows 368 Yes,There,Are,Really,Three`, 369 [[HeadingsCSV.Field("Heading1", "This"), 370 HeadingsCSV.Field("Heading2", "Time"), 371 HeadingsCSV.Field("Heading3", "With"), 372 HeadingsCSV.Field("Heading4", "Two"), 373 HeadingsCSV.Field("Heading5", "Rows")], 374 [HeadingsCSV.Field("Heading1", "Yes"), 375 HeadingsCSV.Field("Heading2", "There"), 376 HeadingsCSV.Field("Heading3", "Are"), 377 HeadingsCSV.Field("Heading4", "Really"), 378 HeadingsCSV.Field("Heading5", "Three")]]); 379 380 // Excluded headings 381 tester.test_inc(csv, 382 `Heading1,Heading2,Heading3,Heading4,Heading5 383 This,Time,With,Two,Rows 384 Yes,There,Are,Really,Three`, 385 ["Heading2", "Heading4", "Heading5"], 386 [[HeadingsCSV.Field("Heading2", "Time"), 387 HeadingsCSV.Field("Heading4", "Two"), 388 HeadingsCSV.Field("Heading5", "Rows")], 389 [HeadingsCSV.Field("Heading2", "There"), 390 HeadingsCSV.Field("Heading4", "Really"), 391 HeadingsCSV.Field("Heading5", "Three")]]); 392 393 // Excluded headings + long row 394 tester.test_inc(csv, 395 `Heading1,Heading2,Heading3,Heading4,Heading5 396 This,Time,With,Two,Rows 397 Yes,There,Are,Really,Three,Some,Extra,Fields`, 398 ["Heading2", "Heading4", "Heading5"], 399 [[HeadingsCSV.Field("Heading2", "Time"), 400 HeadingsCSV.Field("Heading4", "Two"), 401 HeadingsCSV.Field("Heading5", "Rows")], 402 [HeadingsCSV.Field("Heading2", "There"), 403 HeadingsCSV.Field("Heading4", "Really"), 404 HeadingsCSV.Field("Heading5", "Three")]]); 405 } 406