1 /******************************************************************************* 2 3 Copyright: 4 Copyright (c) 2006 Tango contributors. 5 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 6 All rights reserved. 7 8 License: 9 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 10 See LICENSE_TANGO.txt for details. 11 12 Version: Jan 2006: initial release 13 14 Authors: Kris, Nthalk 15 16 *******************************************************************************/ 17 18 module ocean.io.stream.Quotes; 19 20 import ocean.meta.types.Qualifiers; 21 22 import ocean.io.stream.Iterator; 23 24 version (unittest) import ocean.core.Test; 25 26 /******************************************************************************* 27 28 Iterate over a set of delimited, optionally-quoted, text fields. 29 30 Each field is exposed to the client as a slice of the original 31 content, where the slice is transient. If you need to retain the 32 exposed content, then you should .dup it appropriately. 33 34 The content exposed via an iterator is supposed to be entirely 35 read-only. All current iterators abide by this rule, but it is 36 possible a user could mutate the content through a get() slice. 37 To enforce the desired read-only aspect, the code would have to 38 introduce redundant copying or the compiler would have to support 39 read-only arrays. 40 41 Usage: 42 --- 43 auto f = new File ("my.csv"); 44 auto l = new Lines (f); 45 auto b = new Array (0); 46 auto q = new Quotes(",", b); 47 48 foreach (line; l) 49 { 50 b.assign (line); 51 foreach (field, index; q) 52 Stdout (index, field); 53 Stdout.newline; 54 } 55 --- 56 57 See Iterator, Lines, Patterns, Delimiters. 58 59 *******************************************************************************/ 60 61 class Quotes : Iterator 62 { 63 private cstring delim; 64 65 /*********************************************************************** 66 67 This splits on delimiters only. If there is a quote, it 68 suspends delimiter splitting until the quote is finished. 69 70 ***********************************************************************/ 71 72 this (cstring delim, InputStream stream = null) 73 { 74 super (stream); 75 this.delim = delim; 76 } 77 78 /*********************************************************************** 79 80 This splits on delimiters only. If there is a quote, it 81 suspends delimiter splitting until the quote is finished. 82 83 ***********************************************************************/ 84 85 protected override 86 size_t scan (const(void)[] data) 87 { 88 char quote = 0; 89 int escape = 0; 90 auto content = (cast(const(char)*) data.ptr) [0 .. data.length]; 91 92 foreach (i, c; content) 93 // within a quote block? 94 if (quote) 95 { 96 if (c is '\\') 97 ++escape; 98 else 99 { 100 // matched the initial quote char? 101 if (c is quote && escape % 2 is 0) 102 quote = 0; 103 escape = 0; 104 } 105 } 106 else 107 // begin a quote block? 108 if (c is '"' || c is '\'') 109 quote = c; 110 else 111 if (has (delim, c)) 112 return found (set (content.ptr, 0, i)); 113 return notFound; 114 } 115 } 116 117 118 /******************************************************************************* 119 120 Unittests 121 122 *******************************************************************************/ 123 124 version (unittest) 125 { 126 import ocean.io.device.Array; 127 import ocean.text.Util; 128 } 129 130 unittest 131 { 132 istring[] expected = [ 133 `0` 134 ,`` 135 ,`` 136 ,`"3"` 137 ,`""` 138 ,`5` 139 ,`",6"` 140 ,`"7,"` 141 ,`8` 142 ,`"9,\\\","` 143 ,`10` 144 ,`',11",'` 145 ,`"12"` 146 ]; 147 148 auto b = new Array (expected.join (",").dup); 149 foreach (i, f; new Quotes(",", b)) 150 { 151 test (i < expected.length, "uhoh: unexpected match"); 152 test (f == expected[i], "uhoh: bad match)"); 153 } 154 }