1 /*******************************************************************************
2 
3     Serializer, to be used with the StructSerializer, which converts a struct
4     so that a php client can read it
5 
6     Serializer, to be used with the StructSerializer in
7     ocean.io.serialize.StructSerializer, which dumps a struct to a string.
8 
9     Usage example (in conjunction with ocean.io.serialize.StructSerializer):
10 
11     ---
12 
13         // Example struct to serialize
14         struct Data
15         {
16             struct Id
17             {
18                 char[] name;
19                 hash_t id;
20             }
21 
22             Id[] ids;
23             char[] name;
24             uint count;
25             float money;
26         }
27 
28         // Set up some data in a struct
29         Data data;
30         test.ids = [Data.Id("hi", 23), Data.Id("hello", 17)];
31 
32         // Create serializer object
33         scope ser = new PHPStructSerializer!(char)();
34 
35         // output buffer
36         ubyte[] output;
37 
38         // Dump struct to buffer via serializer
39         ser.serialize(output, data);
40 
41     ---
42 
43     Copyright:
44         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
45         All rights reserved.
46 
47     License:
48         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
49         Alternatively, this file may be distributed under the terms of the Tango
50         3-Clause BSD License (see LICENSE_BSD.txt for details).
51 
52 *******************************************************************************/
53 
54 module ocean.io.serialize.PHPSerializer;
55 
56 
57 
58 
59 import ocean.core.Array;
60 
61 import ocean.core.Verify;
62 
63 import ocean.io.serialize.StructSerializer;
64 
65 import ocean.core.ExceptionDefinitions;
66 
67 import ocean.math.Math : pow;
68 
69 version (UnitTestVerbose) import ocean.io.Stdout;
70 
71 /*******************************************************************************
72 
73 
74 *******************************************************************************/
75 
76 public class PHPSerializer
77 {
78     /***************************************************************************
79 
80         Convenience method to serialize a struct.
81 
82         Params:
83             T = type of struct to serialize
84             output = string to serialize struct data to
85             item = struct to serialize
86 
87     ***************************************************************************/
88 
89     public void serialize ( T ) ( ref ubyte[] output, ref T item )
90     {
91         StructSerializer!(true).serialize(&item, this, output);
92     }
93 
94 
95     /***************************************************************************
96 
97         Called at the start of struct serialization - outputs the name of the
98         top-level object.
99 
100         Params:
101             output = string to serialize struct data to
102             name = name of top-level object
103 
104     ***************************************************************************/
105 
106     public void open ( ref ubyte[] output, char[] name )
107     {
108 
109     }
110 
111 
112     /***************************************************************************
113 
114         Called at the end of struct serialization
115 
116         Params:
117             output = string to serialize struct data to
118             name = name of top-level object
119 
120     ***************************************************************************/
121 
122     public void close ( ref ubyte[] output, char[] name )
123     {
124     }
125 
126 
127     /***************************************************************************
128 
129         Appends a named item to the output buffer.
130         Usually item is taken as it is without any conversion.
131 
132         Ulongs are converted using the DPD algorithym which is a compression
133         algorithym for BCD
134 
135         Note: the main method to use from the outside is the first serialize()
136         method above. This method is for the use of the StructSerializer.
137 
138         Params:
139             T = type of item
140             output = string to serialize struct data to
141             item = item to append
142             name = name of item
143 
144     ***************************************************************************/
145 
146     public void serialize ( T ) ( ref ubyte[] output, ref T item, char[] name )
147     {
148         static assert ( ! is(T == union) );
149 
150         static if ( is(T == union) )
151         {
152             throw new Exception("union unsupported");
153         }
154         else static if ( is ( T == ulong ) )
155         {
156             writeBCD(item, output);
157         }
158         else
159         {
160             output ~= (cast(ubyte*) &item)[0 .. T.sizeof];
161         }
162     }
163 
164     /***************************************************************************
165 
166         Enum that represents the bits as they are described in the DPD paper,
167         see http://web.archive.org/web/20070824053303/http://home.hetnet.nl/mr_1/81/jhm.bonten/computers/bitsandbytes/wordsizes/ibmpde.htm#dense
168 
169     ***************************************************************************/
170 
171     private enum Bits
172     {
173         a=0,b,c,d,e,f,g,h,i,j,k,m,
174         p=0,q,r,s,t,u,v,w,x,y
175     }
176 
177     /***************************************************************************
178 
179         Writes the given number <num> as DPD encoded BCD number to the buffer
180         <output>.
181 
182         The first 4 bits are used to specify the length of the array in byte.
183 
184         Params:
185             num = number to encode to DPD
186             output = buffer to write encoded number to
187 
188     ***************************************************************************/
189 
190     private void writeBCD ( ulong num, ref ubyte[] output )
191     {
192         // starting with 4 bits for the length
193         ubyte index = 4;
194 
195         ubyte[11] dpd = 0; // we need max 66.666.. bit
196         ubyte[3] bcd; // convert three at a time
197 
198         void setBit ( Bits offset, ubyte to = true )
199         {
200             if ( to != 0)
201                 dpd[(index+offset)/8] |= 1 << 7-((index+offset) % 8);
202         }
203 
204         ubyte isSet ( ubyte offset )
205         {
206             // We divide by 4 because BCD only needs 4 bits and it was easier
207             // to just use the first 4 bits of each element of a byte array
208             // for the BCD encoding before converting it to DPD
209             return !!(bcd[offset/4] & (1 << 3-(offset%4)));
210         }
211 
212         int i = 0;
213 
214         do
215         {
216             bcd[] = 0;
217             // convert three digits to BCD
218             for ( int c = 2; c >= 0 && num != 0; c--,i++ )
219             {
220                 // get and convert lowest-order number
221                 bcd[c] = cast(ubyte) ( num % 10 );
222                 num = num / 10;
223             }
224 
225             // pack those three digits using dpd
226             // see http://web.archive.org/web/20070824053303/http://home.hetnet.nl/mr_1/81/jhm.bonten/computers/bitsandbytes/wordsizes/ibmpde.htm#dense
227             with ( Bits )
228             {
229                 setBit(p, (isSet(a) & isSet(f) & isSet(i)) |
230                           (isSet(a) & isSet(j)) |
231                            isSet(b) );
232 
233                 setBit(q, (isSet(a) & isSet(g) & isSet(i)) |
234                           (isSet(a) & isSet(k)) |
235                            isSet(c) );
236 
237                 setBit(r, isSet(d));
238 
239                 setBit(s, (invert(isSet(a)) & isSet(e) & isSet(j)) |
240                           (isSet(f) & invert(isSet(i))) |
241                           (invert(isSet(a)) & isSet(f)) |
242                           (isSet(e) & isSet(i)) );
243 
244                 setBit(t, (invert(isSet(a)) & isSet(e) & isSet(k)) |
245                           (isSet(a) & isSet(i)) |
246                            isSet(g) );
247 
248                 setBit(u, isSet(h));
249 
250                 setBit(v, isSet(a) | isSet(e) | isSet(i));
251 
252                 setBit(w, (invert(isSet(e)) & isSet(j)) |
253                           (isSet(e) & isSet(i)) |
254                           isSet(a));
255 
256                 setBit(x, (invert(isSet(a)) & isSet(k)) |
257                           (isSet(a) & isSet(i)) |
258                           isSet(e));
259 
260                 setBit(y, isSet(m));
261             }
262 
263             // 10 more bits used now
264             index += 10;
265             ++i;
266         }
267         while (num);
268 
269         ubyte len = index/8 + (index % 8 == 0 ? 0 : 1);
270 
271         // write length to the first 4 bits
272         dpd[0] |= len << 4;
273 
274         verify(len <= 10, "unexpected DPD array length");
275 
276         output ~= dpd[0 .. len];
277     }
278     unittest
279     {
280         scope s = new PHPSerializer;
281         ubyte[] output;
282 
283         ulong fromDPD ( ubyte[] input )
284         {
285             ubyte decimal_spot;
286             ubyte[3] bcd;
287             ulong ret = 0;
288 
289             short it;
290 
291              // length is in the first three bits
292             auto len = cast(ubyte) (input[0] & 0b11110000) >> 4;
293 
294             ubyte initial = 4;
295 
296             ubyte isSet ( Bits offset )
297             {
298                 return !! (input[(it+offset)/8] &
299                           (1 << 7-((it+offset)%8)));
300             }
301 
302             void set ( ubyte offset, ubyte doSet )
303             {
304               //  version (UnitTestVerbose) Stdout.formatln("Set {}, {}", offset, doSet);
305                 if ( doSet != 0 ) bcd[offset/4] |= 1<< 3-offset%4;
306             }
307 
308             for ( it = initial; it+10 <= len*8 ; it+=10 ) with ( Bits )
309             {
310               //  version (UnitTestVerbose) Stdout.formatln("It: {}", it);
311                 set(a, (invert(isSet(s)) & isSet(v) & isSet(w)) |
312                        (isSet(t) & isSet(v) & isSet(w) & isSet(x)) |
313                        (isSet(v) & isSet(w) & invert(isSet(x))));
314 
315                 set(b, (isSet(p) & isSet(s) & isSet(x)) |
316                        (isSet(p) & invert(isSet(w))) |
317                        (isSet(p) & invert(isSet(v))));
318 
319                 set(c, (isSet(q) & isSet(s) & isSet(x)) |
320                        (isSet(q) & invert(isSet(w))) |
321                        (isSet(q) & invert(isSet(v))));
322 
323                 set(d, isSet(r));
324 
325                 set(e, (isSet(t) & isSet(v) & invert(isSet(w)) & isSet(x)) |
326                        (isSet(s) & isSet(v) & isSet(w) & isSet(x)) |
327                        (invert(isSet(t)) & isSet(v) & isSet(x)));
328 
329                 set(f, (isSet(p) & isSet(t) & isSet(v) & isSet(w) & isSet(x)) |
330                        (isSet(s) & invert(isSet(x))) |
331                        (isSet(s) & invert(isSet(v))));
332 
333                 set(g, (isSet(q) & isSet(t) & isSet(w)) |
334                        (isSet(t) & invert(isSet(x))) |
335                        (isSet(t) & invert(isSet(v))));
336 
337                 set(h, isSet(u));
338 
339                 set(i, (isSet(t) & isSet(v) & isSet(w) & isSet(x)) |
340                        (isSet(s) & isSet(v) & isSet(w) & isSet(x)) |
341                        (isSet(v) & invert(isSet(w)) & invert(isSet(x))));
342 
343                 set(j, (isSet(p) & invert(isSet(s)) & invert(isSet(t)) & isSet(w)) |
344                        (isSet(s) & isSet(v) & invert(isSet(w)) & isSet(x)) |
345                        (isSet(p) & isSet(w) & invert(isSet(x))) |
346                        (invert(isSet(v)) & isSet(w)));
347 
348                 set(k, (isSet(q) & invert(isSet(s)) & invert(isSet(t)) & isSet(v) & isSet(w)) |
349                        (isSet(q) & isSet(v) & isSet(w) & invert(isSet(x))) |
350                        (isSet(t) & isSet(v) & invert(isSet(w)) & isSet(x)) |
351                        (invert(isSet(v)) & isSet(x)));
352 
353                 set(m, isSet(y));
354 
355                 ret += bcd[2] * pow(10UL, cast(ulong) decimal_spot++) +
356                        bcd[1] * pow(10UL, cast(ulong) decimal_spot++) +
357                        bcd[0] * pow(10UL, cast(ulong) decimal_spot++);
358 
359             version (UnitTestVerbose) Stdout.formatln("Decoded: {:b}, {}", bcd, bcd);
360                 bcd[] = 0;
361             }
362 
363           // version (UnitTestVerbose) Stdout.formatln("Ret: {}", ret);
364 
365             return ret;
366         }
367 
368       /*
369         // This loop tests a broad range of numbers coming near the max of
370         // ulong. Depending on the numbers used, it might take a bit
371         // so it is commented.
372 
373         ulong last;
374         for ( ulong i = 0; i <= ulong.max; i++ )
375         {
376             //if (i % 10000 == 0)
377          //       version (UnitTestVerbose) Stdout.formatln("========== Testing {} (len: {})", i, output.length);
378 
379             output.length = 0;
380             s.writeBCD(i, output);
381 
382 
383             assert ( output.length != 0, "Output array has length 0" );
384             assert(fromDPD(output) == i, "De/En coding failed");
385 
386             if ( i + i*0.2 > i )
387                 i+=i*0.2;
388 
389             if ( last > i )
390                 break;
391             else
392                 last = i;
393         }
394 
395 
396         version (UnitTestVerbose) Stdout.formatln("Decoded: {}", fromDPD([161,201,156,126,149,35,78,177,5,64]));
397         version (UnitTestVerbose) Stdout.formatln("Decoded: {}", fromDPD([126, 215, 96, 69, 133, 176, 4]));
398 
399         output.length = 0;
400         s.writeBCD(1095216660735, output);
401         assert(fromDPD(output) == 1095216660735, "De/En coding failed");
402 */
403     }
404 
405     /***************************************************************************
406 
407         Called before a sub-struct is serialized.
408 
409         Params:
410             output = string to serialize struct data to
411             name = name of struct item
412 
413     ***************************************************************************/
414 
415     public void openStruct ( ref ubyte[] output, char[] name )
416     {
417 
418     }
419 
420 
421     /***************************************************************************
422 
423         Called after a sub-struct is serialized.
424 
425         Params:
426             output = string to serialize struct data to
427             name = name of struct item
428 
429     ***************************************************************************/
430 
431     public void closeStruct ( ref ubyte[] output, char[] name )
432     {
433 
434     }
435 
436 
437     /***************************************************************************
438 
439         Appends a named array to the output buffer.
440         The length of the array is written as uint, so arrays longer
441         than uint.max can't be used.
442         This is done because php doesn't support ulongs (only longs)
443 
444         Params:
445             T = base type of array
446             output = string to serialize struct data to
447             array = array to append
448             name = name of array item
449 
450     ***************************************************************************/
451 
452     public void serializeStaticArray ( T ) ( ref ubyte[] output, char[] name, T[] array )
453     {
454         uint len = cast(uint) array.length;
455 
456         output ~= (cast(ubyte*)array.ptr)[0 .. len];
457     }
458 
459 
460     /***************************************************************************
461 
462         Appends a named array to the output buffer.
463         The length of the array is written as uint, so arrays longer
464         than uint.max can't be used.
465         This is done because php doesn't support ulongs (only longs)
466 
467         Params:
468             T = base type of array
469             output = string to serialize struct data to
470             array = array to append
471             name = name of array item
472 
473     ***************************************************************************/
474 
475     public void serializeArray ( T ) ( ref ubyte[] output, char[] name, T[] array )
476     {
477         verify(array.length <= uint.max, "Array length doesn't fit into uint");
478         uint len = cast(uint) array.length;
479 
480         output ~= (cast(ubyte*)&len)[0 .. uint.sizeof];
481         output ~= (cast(ubyte*)array.ptr)[0 .. len];
482     }
483 
484 
485     /***************************************************************************
486 
487         Called before a struct array is serialized.
488 
489         Params:
490             T = base type of array
491             output = string to serialize struct data to
492             name = name of struct item
493             array = array to append
494 
495     ***************************************************************************/
496 
497     public void openStructArray ( T ) ( ref ubyte[] output, char[] name, T[] array )
498     {
499         throw new Exception("openStructArray unsupported");
500     }
501 
502 
503     /***************************************************************************
504 
505         Called after a struct array is serialized.
506 
507         Params:
508             T = base type of array
509             output = string to serialize struct data to
510             name = name of struct item
511             array = array to append
512 
513     ***************************************************************************/
514 
515     public void closeStructArray ( T ) ( ref ubyte[] output, char[] name, T[] array )
516     {
517         throw new Exception("closeStructArray unsupported");
518     }
519 }
520 
521 // Utility to workaround deprecation for `~Enum.Field`
522 private int invert ( int num )
523 {
524     return ~num;
525 }