1 /*******************************************************************************
2 
3     Serializer, to be used with the StructSerializer in
4     ocean.io.serialize.StructSerializer, which dumps a struct to a string.
5 
6     Usage example (in conjunction with ocean.io.serialize.StructSerializer):
7 
8     ---
9 
10         // Example struct to serialize
11         struct Data
12         {
13             struct Id
14             {
15                 cstring name;
16                 hash_t id;
17             }
18 
19             Id[] ids;
20             cstring name;
21             uint count;
22             float money;
23         }
24 
25         // Set up some data in a struct
26         Data data;
27         test.ids = [Data.Id("hi", 23), Data.Id("hello", 17)];
28 
29         // Create serializer object
30         scope ser = new StringStructSerializer!(char)();
31 
32         // A string buffer
33         char[] output;
34 
35         // Dump struct to buffer via serializer
36         ser.serialize(output, data);
37 
38     ---
39 
40     Copyright:
41         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
42         All rights reserved.
43 
44     License:
45         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
46         Alternatively, this file may be distributed under the terms of the Tango
47         3-Clause BSD License (see LICENSE_BSD.txt for details).
48 
49 *******************************************************************************/
50 
51 module ocean.io.serialize.StringStructSerializer;
52 
53 
54 
55 
56 import ocean.meta.types.Qualifiers;
57 
58 import ocean.core.Array;
59 
60 import ocean.io.serialize.StructSerializer;
61 
62 import ocean.text.convert.Formatter;
63 
64 import ocean.text.util.Time;
65 
66 import ocean.util.container.map.Set;
67 
68 import ocean.core.Exception;
69 
70 import ocean.meta.traits.Basic;
71 
72 /*******************************************************************************
73 
74     SerializerException
75 
76 *******************************************************************************/
77 
78 class SerializerException : Exception
79 {
80     mixin ReusableExceptionImplementation!();
81 }
82 
83 /*******************************************************************************
84 
85     Reusable exception instance.
86 
87 *******************************************************************************/
88 
89 private SerializerException serializer_exception;
90 
91 static this ()
92 {
93     .serializer_exception = new SerializerException();
94 }
95 
96 
97 /*******************************************************************************
98 
99     String struct serializer
100 
101     Params:
102         Char = character type of output string
103 
104 *******************************************************************************/
105 
106 public class StringStructSerializer ( Char )
107 {
108     static assert(isCharType!(Char), typeof(this).stringof ~
109         " - this class can only handle {char, wchar, dchar}, not " ~
110         Char.stringof);
111 
112 
113     /***************************************************************************
114 
115         Indentation size
116 
117     ***************************************************************************/
118 
119     private static immutable indent_size = 3;
120 
121 
122     /***************************************************************************
123 
124         Indentation level string - filled with spaces.
125 
126     ***************************************************************************/
127 
128     private Char[] indent;
129 
130 
131     /***************************************************************************
132 
133         format string for displaying an item of type floating point
134 
135     ***************************************************************************/
136 
137     private cstring fp_format;
138 
139 
140     /***************************************************************************
141 
142         Known list of common timestamp field names
143 
144     ***************************************************************************/
145 
146     private StandardHashingSet!(cstring) known_timestamp_fields;
147 
148 
149     /***************************************************************************
150 
151         Flag that is set to true if single character fields in structs should be
152         serialized into equivalent friendly string representations (applicable
153         only if these fields contain whitespace or other unprintable
154         characters).
155         e.g. the newline character will be serialized to the string '\n' instead
156         of to an actual new line.
157 
158     ***************************************************************************/
159 
160     private bool turn_ws_char_to_str;
161 
162 
163     /***************************************************************************
164 
165         Temporary formatting buffer.
166 
167     ***************************************************************************/
168 
169     private mstring buf;
170 
171 
172     /***************************************************************************
173 
174         Constructor, sets the maximum number of decimal digits to show for
175         floating point types.
176 
177         Params:
178             fp_dec_to_display = maximum number of decimal digits to show for
179                                 floating point types.
180 
181     ***************************************************************************/
182 
183     public this ( size_t fp_dec_to_display = 2 )
184     {
185         mstring tmp = "{}{} {} : {:.".dup;
186         sformat(tmp, "{}", fp_dec_to_display);
187         tmp ~= "}\n";
188         this.fp_format = tmp;
189         this.known_timestamp_fields = new StandardHashingSet!(cstring)(128);
190     }
191 
192 
193     /***************************************************************************
194 
195         Convenience method to serialize a struct.
196 
197         If a field name of a struct matches one of the names in the
198         timestamp_fields array and implicitly converts to `ulong`
199         an ISO formatted string will be emitted in parentheses next to the
200         value of the field (which is assumed to be a unix timestamp).
201 
202         Params:
203             T                = type of item
204             output           = string to serialize struct data to
205             item             = item to append
206             timestamp_fields = (optional) an array of timestamp field names
207             turn_ws_char_to_str = true if individual whitespace or unprintable
208                 character fields should be serialized into a friendlier string
209                 representation, e.g. tab character into '\t' (defaults to false)
210 
211     ***************************************************************************/
212 
213     public void serialize ( T ) ( ref Char[] output, ref T item,
214         cstring[] timestamp_fields = null, bool turn_ws_char_to_str = false )
215     {
216         this.turn_ws_char_to_str = turn_ws_char_to_str;
217 
218         this.known_timestamp_fields.clear();
219 
220         foreach (field_name; timestamp_fields)
221         {
222             this.known_timestamp_fields.put(field_name);
223         }
224 
225         StructSerializer!(true).serialize(&item, this, output);
226     }
227 
228 
229     /***************************************************************************
230 
231         Called at the start of struct serialization - outputs the name of the
232         top-level object.
233 
234         Params:
235             output = string to serialize struct data to
236             name = name of top-level object
237 
238     ***************************************************************************/
239 
240     public void open ( ref Char[] output, cstring name )
241     {
242         .serializer_exception.enforce(this.indent.length == 0,
243                 "Non-zero indentation in open");
244 
245         sformat(output, "struct {}:\n", name);
246         this.increaseIndent();
247     }
248 
249 
250     /***************************************************************************
251 
252         Called at the end of struct serialization
253 
254         Params:
255             output = string to serialize struct data to
256             name = name of top-level object
257 
258     ***************************************************************************/
259 
260     public void close ( ref Char[] output, cstring name )
261     {
262         this.decreaseIndent();
263     }
264 
265 
266     /***************************************************************************
267 
268         Appends a named item to the output string.
269 
270         Note: the main method to use from the outside is the first serialize()
271         method above. This method is for the use of the StructSerializer.
272 
273         Params:
274             T = type of item
275             output = string to serialize struct data to
276             item = item to append
277             name = name of item
278 
279     ***************************************************************************/
280 
281     public void serialize ( T ) ( ref Char[] output, ref T item, cstring name )
282     {
283         .serializer_exception.enforce(this.indent.length > 0,
284                 "Incorrect indentation in serialize");
285 
286         // TODO: temporary support for unions by casting them to ubyte[]
287         static if ( is(T == union) )
288         {
289             sformat(output, "{}union {} {} : {}\n", this.indent, T.stringof,
290                 name, (cast(ubyte*)&item)[0..item.sizeof]);
291         }
292         else static if ( isFloatingPointType!(T) )
293         {
294             sformat(output, this.fp_format, this.indent, T.stringof, name,
295                 item);
296         }
297         else static if ( is(T == char) )
298         {
299             // Individual character fields are handled in a special manner so
300             // that friendly string representations can be generated for them if
301             // necessary
302 
303             sformat(output, "{}{} {} : {}\n", this.indent, T.stringof, name,
304                 this.getCharAsString(item));
305         }
306         else
307         {
308             sformat(output, "{}{} {} : {}", this.indent, T.stringof, name,
309                 item);
310 
311             if ( is(T : ulong) && name in this.known_timestamp_fields )
312             {
313                 Char[20] tmp;
314                 sformat(output, " ({})\n", formatTime(item, tmp));
315             }
316             else
317             {
318                 sformat(output, "\n");
319             }
320         }
321     }
322 
323 
324     /***************************************************************************
325 
326         Called before a sub-struct is serialized.
327 
328         Params:
329             output = string to serialize struct data to
330             name = name of struct item
331 
332     ***************************************************************************/
333 
334     public void openStruct ( ref Char[] output, cstring name )
335     {
336         .serializer_exception.enforce(this.indent.length > 0,
337                 "Incorrect indentation in openStruct");
338 
339         sformat(output, "{}struct {}:\n", this.indent, name);
340         this.increaseIndent();
341     }
342 
343 
344     /***************************************************************************
345 
346         Called after a sub-struct is serialized.
347 
348         Params:
349             output = string to serialize struct data to
350             name = name of struct item
351 
352     ***************************************************************************/
353 
354     public void closeStruct ( ref Char[] output, cstring name )
355     {
356         this.decreaseIndent();
357     }
358 
359 
360     /***************************************************************************
361 
362         Appends a named array to the output string
363 
364         Params:
365             T = base type of array
366             output = string to serialize struct data to
367             array = array to append
368             name = name of array item
369 
370     ***************************************************************************/
371 
372     public void serializeArray ( T ) ( ref Char[] output, cstring name,
373         T[] array )
374     {
375         .serializer_exception.enforce(this.indent.length > 0,
376             "Incorrect indentation in serializeArray");
377 
378         sformat(output, "{}{}[] {} (length {}):", this.indent, T.stringof, name,
379             array.length);
380 
381         if ( array.length )
382         {
383             sformat(output, " {}", array);
384         }
385         else
386         {
387             static if ( isCharType!(T) )
388             {
389                 sformat(output, ` ""`);
390             }
391             else
392             {
393                 sformat(output, " []");
394             }
395         }
396 
397         sformat(output, "\n");
398     }
399 
400 
401     /***************************************************************************
402 
403         Called before a struct array is serialized.
404 
405         Params:
406             T = base type of array
407             output = string to serialize struct data to
408             name = name of struct item
409             array = array to append
410 
411     ***************************************************************************/
412 
413     public void openStructArray ( T ) ( ref Char[] output, cstring name,
414         T[] array )
415     {
416         .serializer_exception.enforce(this.indent.length > 0,
417             "Incorrect indentation in openStructArray");
418 
419         sformat(output, "{}{}[] {} (length {}):\n", this.indent, T.stringof,
420             name, array.length);
421         this.increaseIndent();
422     }
423 
424 
425     /***************************************************************************
426 
427         Called after a struct array is serialized.
428 
429         Params:
430             T = base type of array
431             output = string to serialize struct data to
432             name = name of struct item
433             array = array to append
434 
435     ***************************************************************************/
436 
437     public void closeStructArray ( T ) ( ref Char[] output, cstring name,
438         T[] array )
439     {
440         this.decreaseIndent();
441     }
442 
443 
444     /***************************************************************************
445 
446         Increases the indentation level.
447 
448     ***************************************************************************/
449 
450     private void increaseIndent ( )
451     {
452         this.indent.length = this.indent.length + indent_size;
453         assumeSafeAppend(this.indent);
454         this.indent[] = ' ';
455     }
456 
457 
458     /***************************************************************************
459 
460         Decreases the indentation level.
461 
462     ***************************************************************************/
463 
464     private void decreaseIndent ( )
465     {
466         .serializer_exception.enforce(this.indent.length >= indent_size,
467                 typeof(this).stringof ~ ".decreaseIndent - indentation cannot be decreased");
468 
469         this.indent.length = this.indent.length - indent_size;
470         assumeSafeAppend(this.indent);
471     }
472 
473 
474     /***************************************************************************
475 
476         Gets the string equivalent of a character. For most characters, the
477         string contains just the character itself; but in case of whitespace or
478         other unprintable characters, a friendlier string representation is
479         generated (provided the flag requesting this generation has been set).
480         For example, the string '\n' will be generated for the newline
481         character, '\t' for the tab character and so on.
482 
483         Params:
484             c = character whose string equivalent is to be got
485 
486         Returns:
487             string equivalent of the character
488 
489     ***************************************************************************/
490 
491     private mstring getCharAsString ( char c )
492     {
493         this.buf.length = 0;
494         assumeSafeAppend(this.buf);
495 
496         if ( !this.turn_ws_char_to_str )
497         {
498             sformat(this.buf, "{}", c);
499             return this.buf;
500         }
501 
502         // The set of characters to use for creating cases within the following
503         // switch block. These are just whitepace or unprintable characters but
504         // without their preceding backslashes.
505         static immutable letters = ['0', 'a', 'b', 'f', 'n', 'r', 't', 'v'];
506 
507         switch ( c )
508         {
509             case c.init:
510                 sformat(this.buf, "{}", "''");
511                 break;
512 
513             mixin(ctfeCreateCases(letters));
514 
515             default:
516                 sformat(this.buf, "{}", c);
517                 break;
518         }
519 
520         return this.buf;
521     }
522 
523 
524     /***************************************************************************
525 
526         Creates a string containing all the necessary case statements to be
527         mixed-in into the switch block that generates friendly string
528         representations of whitespace or unprintable characters. This function
529         is evaluated at compile-time.
530 
531         Params:
532             letters = string containing all the characters corresponding to the
533                 various case statements
534 
535         Returns:
536             string containing all case statements to be mixed-in
537 
538     ***************************************************************************/
539 
540     private static istring ctfeCreateCases ( istring letters )
541     {
542         istring mixin_str;
543 
544         foreach ( c; letters )
545         {
546             mixin_str ~=
547                 `case '\` ~ c ~ `':` ~
548                     `sformat(this.buf, "{}", "'\\` ~ c ~ `'");` ~
549                     `break;`;
550         }
551 
552         return mixin_str;
553     }
554 }
555 
556 version (unittest)
557 {
558     import ocean.core.Test;
559     import ocean.meta.types.Typedef;
560     import core.stdc.time;
561 }
562 
563 unittest
564 {
565     // empty struct
566 
567     auto serializer = new StringStructSerializer!(char);
568     mstring buffer;
569 
570     struct EmptyStruct
571     {
572     }
573 
574     EmptyStruct e;
575 
576     serializer.serialize(buffer, e);
577 
578     test!("==")(buffer.length, 20);
579     test!("==")(buffer, "struct EmptyStruct:\n");
580 }
581 
582 unittest
583 {
584     // regular arbitrary struct
585 
586     auto serializer = new StringStructSerializer!(char);
587     mstring buffer;
588 
589     struct TextFragment
590     {
591         char[] text;
592         int type;
593     }
594 
595     TextFragment text_fragment;
596     text_fragment.text = "eins".dup;
597     text_fragment.type = 1;
598 
599     serializer.serialize(buffer, text_fragment);
600 
601     test!("==")(buffer.length, 69);
602     test!("==")(buffer, "struct TextFragment:\n" ~
603                      "   char[] text (length 4): eins\n" ~
604                      "   int type : 1\n");
605 }
606 
607 unittest
608 {
609     // struct with timestamp fields
610 
611     auto serializer = new StringStructSerializer!(char);
612     mstring buffer;
613     cstring[] timestamp_fields = ["lastseen", "timestamp", "update_time"];
614 
615     struct TextFragmentTime
616     {
617         char[] text;
618         time_t time;        // not detected
619         char[] lastseen;    // not detected (doesn't convert to ulong)
620         time_t timestamp;   // detected
621         time_t update_time; // detected
622     }
623 
624     TextFragmentTime text_fragment_time;
625     text_fragment_time.text = "eins".dup;
626     text_fragment_time.time = 1456829726;
627 
628     serializer.serialize(buffer, text_fragment_time, timestamp_fields);
629 
630     test!("==")(buffer.length, 207);
631     test!("==")(buffer, "struct TextFragmentTime:\n" ~
632                      "   char[] text (length 4): eins\n" ~
633                      "   long time : 1456829726\n" ~
634                      `   char[] lastseen (length 0): ""` ~ "\n" ~
635                      "   long timestamp : 0 (1970-01-01 00:00:00)\n" ~
636                      "   long update_time : 0 (1970-01-01 00:00:00)\n");
637 
638     buffer.length = 0;
639     assumeSafeAppend(buffer);
640     serializer.serialize(buffer, text_fragment_time);
641 
642     test!("==")(buffer.length, 163);
643     test!("==")(buffer, "struct TextFragmentTime:\n" ~
644                      "   char[] text (length 4): eins\n" ~
645                      "   long time : 1456829726\n" ~
646                      `   char[] lastseen (length 0): ""` ~ "\n" ~
647                      "   long timestamp : 0\n" ~
648                      "   long update_time : 0\n");
649 }
650 
651 unittest
652 {
653     // struct with multi-dimensional array field
654 
655     auto serializer = new StringStructSerializer!(char);
656     mstring buffer;
657 
658     struct TextFragment
659     {
660         char[] text;
661         int type;
662     }
663 
664     struct MultiDimensionalArray
665     {
666         TextFragment[][] text_fragments;
667     }
668 
669     MultiDimensionalArray multi_dimensional_array;
670     multi_dimensional_array.text_fragments ~= [[TextFragment("eins".dup, 1)],
671         [TextFragment("zwei".dup, 2), TextFragment("drei".dup, 3)]];
672 
673     serializer.serialize(buffer, multi_dimensional_array);
674 
675     test!("==")(buffer.length, 461);
676     test!("==")(buffer, "struct MultiDimensionalArray:\n" ~
677                      "   TextFragment[][] text_fragments (length 2):\n" ~
678                      "      TextFragment[] text_fragments (length 1):\n" ~
679                      "         struct TextFragment:\n" ~
680                      "            char[] text (length 4): eins\n" ~
681                      "            int type : 1\n" ~
682                      "      TextFragment[] text_fragments (length 2):\n" ~
683                      "         struct TextFragment:\n" ~
684                      "            char[] text (length 4): zwei\n" ~
685                      "            int type : 2\n" ~
686                      "         struct TextFragment:\n" ~
687                      "            char[] text (length 4): drei\n" ~
688                      "            int type : 3\n");
689 }
690 
691 unittest
692 {
693     // struct with nested struct field
694 
695     auto serializer = new StringStructSerializer!(char);
696     mstring buffer;
697 
698     struct OuterStruct
699     {
700         int outer_a;
701         struct InnerStruct
702         {
703             int inner_a;
704         }
705         InnerStruct s;
706     }
707 
708     OuterStruct s;
709     s.outer_a = 100;
710     s.s.inner_a = 200;
711 
712     serializer.serialize(buffer, s);
713 
714     test!("==")(buffer.length, 78);
715     test!("==")(buffer, "struct OuterStruct:\n" ~
716                      "   int outer_a : 100\n" ~
717                      "   struct s:\n" ~
718                      "      int inner_a : 200\n");
719 }
720 
721 unittest
722 {
723     // struct with floating point fields
724 
725     auto serializer = new StringStructSerializer!(char);
726     mstring buffer;
727 
728     struct StructWithFloatingPoints
729     {
730         float a;
731         double b;
732         real c;
733     }
734 
735     StructWithFloatingPoints sf;
736     sf.a = 10.00;
737     sf.b = 23.42;
738 
739     serializer.serialize(buffer, sf);
740 
741     test!("==")(buffer.length, 85);
742     test!("==")(buffer, "struct StructWithFloatingPoints:\n" ~
743                      "   float a : 10\n" ~
744                      "   double b : 23.42\n" ~
745                      "   real c : nan\n");
746 }
747 
748 unittest
749 {
750     // struct with nested union field
751 
752     auto serializer = new StringStructSerializer!(char);
753     mstring buffer;
754 
755     struct StructWithUnion
756     {
757         union U
758         {
759             int a;
760             char b;
761             double c;
762         }
763 
764         U u;
765     }
766 
767     StructWithUnion su;
768     su.u.a = 100;
769 
770     serializer.serialize(buffer, su);
771 
772     test!("==")(buffer.length, 66);
773     test!("==")(buffer, "struct StructWithUnion:\n" ~
774                      "   union U u : [100, 0, 0, 0, 0, 0, 0, 0]\n");
775 
776     su.u.b = 'a';
777 
778     buffer.length = 0;
779     assumeSafeAppend(buffer);
780     serializer.serialize(buffer, su);
781 
782     test!("==")(buffer.length, 65);
783     test!("==")(buffer, "struct StructWithUnion:\n" ~
784                      "   union U u : [97, 0, 0, 0, 0, 0, 0, 0]\n");
785 }
786 
787 unittest
788 {
789     // struct with individual char fields
790 
791     auto serializer = new StringStructSerializer!(char);
792     mstring buffer;
793 
794     struct StructWithChars
795     {
796         char c0;
797         char c1;
798         char c2;
799         char c3;
800         char c4;
801         char c5;
802         char c6;
803         char c7;
804         char c8;
805         char c9;
806     }
807 
808     StructWithChars sc;
809     sc.c0 = 'g';
810     sc.c1 = 'k';
811     sc.c2 = '\0';
812     sc.c3 = '\a';
813     sc.c4 = '\b';
814     sc.c5 = '\f';
815     sc.c6 = '\n';
816     sc.c7 = '\r';
817     sc.c8 = '\t';
818     sc.c9 = '\v';
819 
820     // Generation of friendly string representations of characters disabled
821     serializer.serialize(buffer, sc);
822 
823     test!("==")(buffer.length, 174);
824     test!("==")(buffer, "struct StructWithChars:\n" ~
825                      "   char c0 : g\n" ~
826                      "   char c1 : k\n" ~
827                      "   char c2 : \0\n" ~
828                      "   char c3 : \a\n" ~
829                      "   char c4 : \b\n" ~
830                      "   char c5 : \f\n" ~
831                      "   char c6 : \n\n" ~
832                      "   char c7 : \r\n" ~
833                      "   char c8 : \t\n" ~
834                      "   char c9 : \v\n");
835 
836     // Generation of friendly string representations of characters enabled
837     buffer.length = 0;
838     assumeSafeAppend(buffer);
839     serializer.serialize(buffer, sc, [""], true);
840 
841     test!("==")(buffer.length, 198);
842     test!("==")(buffer, "struct StructWithChars:\n" ~
843                      "   char c0 : g\n" ~
844                      "   char c1 : k\n" ~
845                      "   char c2 : '\\0'\n" ~
846                      "   char c3 : '\\a'\n" ~
847                      "   char c4 : '\\b'\n" ~
848                      "   char c5 : '\\f'\n" ~
849                      "   char c6 : '\\n'\n" ~
850                      "   char c7 : '\\r'\n" ~
851                      "   char c8 : '\\t'\n" ~
852                      "   char c9 : '\\v'\n");
853 }
854 
855 unittest
856 {
857     // struct with regular int arrays
858 
859     auto serializer = new StringStructSerializer!(char);
860     mstring buffer;
861 
862     struct StructWithIntArrays
863     {
864         int[] a;
865         int[] b;
866     }
867 
868     StructWithIntArrays sia;
869     sia.a = [10, 20, 30];
870 
871     serializer.serialize(buffer, sia);
872 
873     test!("==")(buffer.length, 90);
874     test!("==")(buffer, "struct StructWithIntArrays:\n" ~
875                      "   int[] a (length 3): [10, 20, 30]\n" ~
876                      "   int[] b (length 0): []\n");
877 }
878 
879 unittest
880 {
881     // struct with individual typedef field
882 
883     auto serializer = new StringStructSerializer!(char);
884     mstring buffer;
885 
886     mixin(Typedef!(hash_t, "AdskilletId"));
887 
888     struct StructWithTypedef
889     {
890         AdskilletId a;
891     }
892 
893     StructWithTypedef st;
894     st.a = cast(AdskilletId)1000;
895 
896     serializer.serialize(buffer, st);
897 
898     test!("==")(buffer.length, 50);
899     test!("==")(buffer, "struct StructWithTypedef:\n" ~
900                      "   AdskilletId a : 1000\n");
901 }
902 
903 unittest
904 {
905     // struct with array of typedefs
906 
907     auto serializer = new StringStructSerializer!(char);
908     mstring buffer;
909 
910     mixin(Typedef!(hash_t, "AdskilletId"));
911 
912     struct StructWithTypedefArray
913     {
914         AdskilletId[] ids;
915     }
916 
917     StructWithTypedefArray sta;
918 
919     sta.ids = new AdskilletId[](4);
920 
921     foreach (idx, ref element; sta.ids)
922     {
923         element = cast(AdskilletId)(64 + idx);
924     }
925 
926     serializer.serialize(buffer, sta);
927 
928     test!("==")(buffer, "struct StructWithTypedefArray:\n" ~
929                      "   AdskilletId[] ids (length 4): [64, 65, 66, 67]\n");
930 }