1 /*******************************************************************************
2 3 Copyright:
4 Copyright (c) 2004 Kris Bell.
5 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
6 All rights reserved.
7 8 License:
9 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
10 See LICENSE_TANGO.txt for details.
11 12 Version: Initial release: December 2005
13 14 Authors: Kris Bell
15 16 *******************************************************************************/17 18 moduleocean.io.stream.Iterator;
19 20 importocean.meta.types.Qualifiers;
21 22 importocean.core.Verify;
23 24 importocean.io.stream.Buffered;
25 26 packageimportocean.io.device.Conduit : InputFilter, InputBuffer, InputStream;
27 28 /*******************************************************************************
29 30 The base class for a set of stream iterators. These operate
31 upon a buffered input stream, and are designed to deal with
32 partial content. That is, stream iterators go to work the
33 moment any data becomes available in the buffer. Contrast
34 this behaviour with the ocean.text.Util iterators, which
35 operate upon the extent of an array.
36 37 There are two types of iterators supported; exclusive and
38 inclusive. The former are the more common kind, where a token
39 is delimited by elements that are considered foreign. Examples
40 include space, comma, and end-of-line delineation. Inclusive
41 tokens are just the opposite: they look for patterns in the
42 text that should be part of the token itself - everything else
43 is considered foreign. Currently ocean.io.stream includes the
44 exclusive variety only.
45 46 Each pattern is exposed to the client as a slice of the original
47 content, where the slice is transient. If you need to retain the
48 exposed content, then you should .dup it appropriately.
49 50 The content provided to these iterators is intended to be fully
51 read-only. All current tokenizers abide by this rule, but it is
52 possible a user could mutate the content through a token slice.
53 To enforce the desired read-only aspect, the code would have to
54 introduce redundant copying or the compiler would have to support
55 read-only arrays (now in D2).
56 57 See Delimiters, Lines, Patterns, Quotes.
58 59 *******************************************************************************/60 61 classIterator : InputFilter62 {
63 privateInputBuffersource;
64 protectedcstringslice,
65 delim;
66 67 /***********************************************************************
68 69 The pattern scanner, implemented via subclasses.
70 71 ***********************************************************************/72 73 abstractprotectedsize_tscan (const(void)[] data);
74 75 /***********************************************************************
76 77 Instantiate with a buffer.
78 79 ***********************************************************************/80 81 this (InputStreamstream = null)
82 {
83 super (stream);
84 if (stream)
85 set (stream);
86 }
87 88 /***********************************************************************
89 90 Set the provided stream as the scanning source.
91 92 ***********************************************************************/93 94 Iteratorset (InputStreamstream)
95 {
96 verify(stream !isnull);
97 source = BufferedInput.create (stream);
98 super.source = source;
99 returnthis;
100 }
101 102 /***********************************************************************
103 104 Return the current token as a slice of the content.
105 106 ***********************************************************************/107 108 finalcstringget ()
109 {
110 returnslice;
111 }
112 113 /**********************************************************************
114 115 Iterate over the set of tokens. This should really
116 provide read-only access to the tokens, but D does
117 not support that at this time.
118 119 **********************************************************************/120 121 intopApply (scopeintdelegate(refcstring) dg)
122 {
123 boolmore;
124 intresult;
125 126 do {
127 more = consume;
128 result = dg (slice);
129 } while (more && !result);
130 returnresult;
131 }
132 133 /**********************************************************************
134 135 Iterate over a set of tokens, exposing a token count
136 starting at zero.
137 138 **********************************************************************/139 140 intopApply (scopeintdelegate(refint, refcstring) dg)
141 {
142 boolmore;
143 intresult,
144 tokens;
145 146 do {
147 more = consume;
148 result = dg (tokens, slice);
149 ++tokens;
150 } while (more && !result);
151 returnresult;
152 }
153 154 /**********************************************************************
155 156 Iterate over a set of tokens and delimiters, exposing a
157 token count starting at zero.
158 159 **********************************************************************/160 161 intopApply (scopeintdelegate(refint, refcstring, refcstring) dg)
162 {
163 boolmore;
164 intresult,
165 tokens;
166 167 do {
168 delim = null;
169 more = consume;
170 result = dg (tokens, slice, delim);
171 ++tokens;
172 } while (more && !result);
173 returnresult;
174 }
175 176 /***********************************************************************
177 178 Locate the next token. Returns the token if found, null
179 otherwise. Null indicates an end of stream condition. To
180 sweep a conduit for lines using method next():
181 ---
182 auto lines = new Lines!(char) (new File("myfile"));
183 while (lines.next)
184 Cout (lines.get).newline;
185 ---
186 187 Alternatively, we can extract one line from a conduit:
188 ---
189 auto line = (new Lines!(char) (new File("myfile"))).next;
190 ---
191 192 The difference between next() and foreach() is that the
193 latter processes all tokens in one go, whereas the former
194 processes in a piecemeal fashion. To wit:
195 ---
196 foreach (line; new Lines!(char) (new File("myfile")))
197 Cout(line).newline;
198 ---
199 200 ***********************************************************************/201 202 finalcstringnext ()
203 {
204 if (consume() || slice.length)
205 returnslice;
206 returnnull;
207 }
208 209 /***********************************************************************
210 211 Set the content of the current slice to the provided
212 start and end points.
213 214 ***********************************************************************/215 216 protectedfinalsize_tset (const(char)* content, size_tstart, size_tend)
217 {
218 slice = content [start .. end];
219 returnend;
220 }
221 222 /***********************************************************************
223 224 Set the content of the current slice to the provided
225 start and end points, and delimiter to the segment
226 between end & next (inclusive.)
227 228 ***********************************************************************/229 230 protectedfinalsize_tset (const(char)* content, size_tstart, size_tend, size_tnext)
231 {
232 slice = content [start .. end];
233 delim = content [end .. next+1];
234 returnend;
235 }
236 237 /***********************************************************************
238 239 Called when a scanner fails to find a matching pattern.
240 This may cause more content to be loaded, and a rescan
241 initiated.
242 243 ***********************************************************************/244 245 protectedfinalsize_tnotFound ()
246 {
247 returnEof;
248 }
249 250 /***********************************************************************
251 252 Invoked when a scanner matches a pattern. The provided
253 value should be the index of the last element of the
254 matching pattern, which is converted back to a void[]
255 index.
256 257 ***********************************************************************/258 259 protectedfinalsize_tfound (size_ti)
260 {
261 return (i + 1);
262 }
263 264 /***********************************************************************
265 266 See if set of characters holds a particular instance.
267 268 ***********************************************************************/269 270 protectedfinalboolhas (cstringset, charmatch)
271 {
272 foreach (c; set)
273 if (matchisc)
274 returntrue;
275 returnfalse;
276 }
277 278 /***********************************************************************
279 280 Consume the next token and place it in 'slice'. Returns
281 true when there are potentially more tokens.
282 283 ***********************************************************************/284 285 privateboolconsume ()
286 {
287 if (source.next (&scan))
288 returntrue;
289 290 // consume trailing token291 source.reader ((const(void)[] arr)
292 {
293 slice = (cast(const(char)*) arr.ptr) [0 .. arr.length];
294 returnarr.length;
295 });
296 returnfalse;
297 }
298 }