1 /*******************************************************************************
2 
3         Copyright:
4             Copyright (C) 2008 Kris Bell,
5             Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
6             All rights reserved.
7 
8         License:
9             Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
10             See LICENSE_TANGO.txt for details.
11 
12         Version: July 2008: Initial release
13 
14         Authors: Kris
15 
16 *******************************************************************************/
17 
18 module ocean.text.json.JsonEscape;
19 
20 import ocean.meta.types.Qualifiers;
21 
22 import ocean.text.json.JsonParser;
23 
24 import Util = ocean.text.Util;
25 
26 import Utf = ocean.text.convert.Utf;
27 
28 version (unittest) import ocean.core.Test;
29 
30 /******************************************************************************
31 
32         Convert 'escaped' chars to normal ones. For example: \\ => \
33 
34         The provided output buffer should be at least as long as the
35         input string, or it will be allocated from the heap instead.
36 
37         Returns a slice of dst where the content required conversion,
38         or the provided src otherwise
39 
40 ******************************************************************************/
41 
42 TC[] unescape(T, TC = Unqual!(T)) (T[] src, TC[] dst = null)
43 {
44         size_t content;
45 
46         void append (const(Unqual!(T))[] s)
47         {
48                 if (content + s.length > dst.length)
49                     dst.length = dst.length + s.length + 1024;
50                 dst[content .. content+s.length] = s;
51                 content += s.length;
52         }
53 
54         unescape (src, &append);
55         return dst [0 .. content];
56 }
57 
58 unittest
59 {
60     auto s = unescape("aaa\\\\b");
61     test (s == "aaa\\b");
62 }
63 
64 
65 /******************************************************************************
66 
67         Convert reserved chars to escaped ones. For example: \ => \\
68 
69         Either a slice of the provided output buffer is returned, or the
70         original content, depending on whether there were reserved chars
71         present or not. The output buffer will be expanded as necessary
72 
73 ******************************************************************************/
74 
75 TC[] escape(T, TC = Unqual!(T)) (T[] src, TC[] dst = null)
76 {
77         size_t content;
78 
79         void append (const(Unqual!(T))[] s)
80         {
81                 if (content + s.length > dst.length)
82                     dst.length = dst.length + s.length + 1024;
83                 dst[content .. content+s.length] = s;
84                 content += s.length;
85         }
86 
87         escape (src, &append);
88         return dst [0..content];
89 }
90 
91 unittest
92 {
93     auto s = escape("aaa\\");
94     test (s == "aaa\\\\");
95 }
96 
97 
98 /******************************************************************************
99 
100         Convert 'escaped' chars to normal ones. For example: \\ => \
101 
102         This variant does not require an interim workspace, and instead
103         emits directly via the provided delegate
104 
105 ******************************************************************************/
106 
107 void unescape(T, TC) (T[] src, scope void delegate(TC[]) emit)
108 {
109         static assert (is(Unqual!(T) == Unqual!(TC)));
110 
111         ptrdiff_t delta;
112         auto s = src.ptr;
113         auto len = src.length;
114         enum : T { slash = '\\' }
115 
116         // take a peek first to see if there's anything
117         if ((delta = Util.indexOf (s, slash, len)) < len)
118            {
119            // copy segments over, a chunk at a time
120            do {
121               emit (s[0 .. delta]);
122               len -= delta;
123               s += delta;
124 
125               // bogus trailing '\'
126               if (len < 2)
127                  {
128                  emit ("\\");
129                  len = 0;
130                  break;
131                  }
132 
133               // translate \c
134               switch (s[1])
135                      {
136                       case '\\':
137                            emit ("\\");
138                            break;
139 
140                       case '/':
141                            emit ("/");
142                            break;
143 
144                       case '"':
145                            emit (`"`);
146                            break;
147 
148                       case 'b':
149                            emit ("\b");
150                            break;
151 
152                       case 'f':
153                            emit ("\f");
154                            break;
155 
156                       case 'n':
157                            emit ("\n");
158                            break;
159 
160                       case 'r':
161                            emit ("\r");
162                            break;
163 
164                       case 't':
165                            emit ("\t");
166                            break;
167 
168                       case 'u':
169                            if (len < 6)
170                                goto default;
171                            else
172                               {
173                               dchar v = 0;
174                               Unqual!(T)[6]  t = void;
175 
176                               for (auto i=2; i < 6; ++i)
177                                   {
178                                   Unqual!(T) c = s[i];
179                                   if (c >= '0' && c <= '9')
180                                      {}
181                                   else
182                                      if (c >= 'a' && c <= 'f')
183                                          c -= 39;
184                                      else
185                                         if (c >= 'A' && c <= 'F')
186                                             c -= 7;
187                                         else
188                                            goto default;
189                                   v = (v << 4) + c - '0';
190                                   }
191 
192                               emit (Utf.fromString32 ((&v)[0..1], t));
193                               len -= 4;
194                               s += 4;
195                               }
196                            break;
197 
198                       default:
199                            throw invalid_escape;
200                      }
201 
202               s += 2;
203               len -= 2;
204               } while ((delta = Util.indexOf (s, slash, len)) < len);
205 
206            // copy tail too
207            emit (s [0 .. len]);
208            }
209         else
210            emit (src);
211 }
212 
213 
214 /******************************************************************************
215 
216         Convert reserved chars to escaped ones. For example: \ => \\
217 
218         This variant does not require an interim workspace, and instead
219         emits directly via the provided delegate
220 
221 ******************************************************************************/
222 
223 void escape(T, TC) (T[] src, scope void delegate(TC[]) emit)
224 {
225         static assert (is(Unqual!(TC) == Unqual!(T)));
226 
227         Unqual!(T)[2] patch = '\\';
228         auto s = src.ptr;
229         auto t = s;
230         auto e = s + src.length;
231 
232         while (s < e)
233               {
234               switch (*s)
235                      {
236                      case '"':
237                      case '/':
238                      case '\\':
239                           patch[1] = *s;
240                           break;
241                      case '\r':
242                           patch[1] = 'r';
243                           break;
244                      case '\n':
245                           patch[1] = 'n';
246                           break;
247                      case '\t':
248                           patch[1] = 't';
249                           break;
250                      case '\b':
251                           patch[1] = 'b';
252                           break;
253                      case '\f':
254                           patch[1] = 'f';
255                           break;
256                      default:
257                           ++s;
258                           continue;
259                      }
260               emit (t [0 .. s - t]);
261               emit (patch[]);
262               t = ++s;
263               }
264 
265         // did we change anything? Copy tail also
266         if (t is src.ptr)
267             emit (src);
268         else
269            emit (t [0 .. e - t]);
270 }
271 
272 private Exception invalid_escape;
273 
274 static this ( )
275 {
276     invalid_escape = new Exception ("invalid escape");
277 }