1 /*******************************************************************************
2 
3         Copyright:
4             Copyright (c) 2006 Tango contributors.
5             Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
6             All rights reserved.
7 
8         License:
9             Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
10             See LICENSE_TANGO.txt for details.
11 
12         Version: Jan 2006: initial release
13 
14         Authors: Kris, Nthalk
15 
16 *******************************************************************************/
17 
18 module ocean.io.stream.Quotes;
19 
20 import ocean.meta.types.Qualifiers;
21 
22 import ocean.io.stream.Iterator;
23 
24 version (unittest) import ocean.core.Test;
25 
26 /*******************************************************************************
27 
28         Iterate over a set of delimited, optionally-quoted, text fields.
29 
30         Each field is exposed to the client as a slice of the original
31         content, where the slice is transient. If you need to retain the
32         exposed content, then you should .dup it appropriately.
33 
34         The content exposed via an iterator is supposed to be entirely
35         read-only. All current iterators abide by this rule, but it is
36         possible a user could mutate the content through a get() slice.
37         To enforce the desired read-only aspect, the code would have to
38         introduce redundant copying or the compiler would have to support
39         read-only arrays.
40 
41         Usage:
42         ---
43         auto f = new File ("my.csv");
44         auto l = new Lines (f);
45         auto b = new Array (0);
46         auto q = new Quotes(",", b);
47 
48         foreach (line; l)
49                 {
50                 b.assign (line);
51                 foreach (field, index; q)
52                          Stdout (index, field);
53                 Stdout.newline;
54                 }
55         ---
56 
57         See Iterator, Lines, Patterns, Delimiters.
58 
59 *******************************************************************************/
60 
61 class Quotes : Iterator
62 {
63         private cstring delim;
64 
65         /***********************************************************************
66 
67                 This splits on delimiters only. If there is a quote, it
68                 suspends delimiter splitting until the quote is finished.
69 
70         ***********************************************************************/
71 
72         this (cstring delim, InputStream stream = null)
73         {
74                 super (stream);
75                 this.delim = delim;
76         }
77 
78         /***********************************************************************
79 
80                 This splits on delimiters only. If there is a quote, it
81                 suspends delimiter splitting until the quote is finished.
82 
83         ***********************************************************************/
84 
85         protected override
86         size_t scan (const(void)[] data)
87         {
88                 char quote = 0;
89                 int  escape = 0;
90                 auto content = (cast(const(char)*) data.ptr) [0 .. data.length];
91 
92                 foreach (i, c; content)
93                          // within a quote block?
94                          if (quote)
95                             {
96                             if (c is '\\')
97                                 ++escape;
98                             else
99                                {
100                                // matched the initial quote char?
101                                if (c is quote && escape % 2 is 0)
102                                    quote = 0;
103                                escape = 0;
104                                }
105                             }
106                          else
107                             // begin a quote block?
108                             if (c is '"' || c is '\'')
109                                 quote = c;
110                             else
111                                if (has (delim, c))
112                                    return found (set (content.ptr, 0, i));
113                 return notFound;
114         }
115 }
116 
117 
118 /*******************************************************************************
119 
120     Unittests
121 
122 *******************************************************************************/
123 
124 version (unittest)
125 {
126     import ocean.io.device.Array;
127     import ocean.text.Util;
128 }
129 
130 unittest
131 {
132     istring[] expected = [
133         `0`
134         ,``
135         ,``
136         ,`"3"`
137         ,`""`
138         ,`5`
139         ,`",6"`
140         ,`"7,"`
141         ,`8`
142         ,`"9,\\\","`
143         ,`10`
144         ,`',11",'`
145         ,`"12"`
146     ];
147 
148     auto b = new Array (expected.join (",").dup);
149     foreach (i, f; new Quotes(",", b))
150     {
151         test (i < expected.length, "uhoh: unexpected match");
152         test (f == expected[i], "uhoh: bad match)");
153     }
154 }