1 /******************************************************************************
2 
3     Generates and reads headers of chunks of compressed data, containing the
4     data length, compression type and checksum
5 
6     There are two header versions:
7         1. the regular LzoHeader,
8         2. the Null header which has the same meaning as a Stop header.
9 
10     LzoHeader data layout if size_t has a width of 32-bit:
11         void[16] header
12 
13             header[0  ..  4] - length of chunk[4 .. $] (or compressed data
14                                length + header length - 4)
15             header[4 ..   8] - 32-bit CRC value of following header elements and
16                                compressed data (chunk[8 .. $]), calculated using
17                                lzo_crc32()
18             header[8  .. 12] - chunk/compression type code (signed integer)
19             header[12 .. 16] - length of uncompressed data (may be 0)
20 
21 
22     Null header data layout if size_t has a width of 32-bit:
23         void[4] null_header
24 
25             header[0  ..  4] - all bytes set to value 0
26 
27     Copyright:
28         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
29         All rights reserved.
30 
31     License:
32         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
33         Alternatively, this file may be distributed under the terms of the Tango
34         3-Clause BSD License (see LICENSE_BSD.txt for details).
35 
36  ******************************************************************************/
37 
38 module ocean.io.compress.lzo.LzoHeader;
39 
40 import ocean.io.compress.lzo.LzoCrc;
41 
42 import ocean.io.compress.CompressException;
43 
44 import ocean.core.Enforce: enforce;
45 
46 version (UnitTestVerbose) import ocean.io.Stdout : Stderr;
47 
48 /******************************************************************************
49 
50     LzoHeader structure
51 
52  ******************************************************************************/
53 
54 align (1) struct LzoHeader ( bool LengthInline = true )
55 {
56     /**************************************************************************
57 
58         Length of the chunk excluding this length value
59 
60         The default value is that of a payload-less chunk (start/stop chunk).
61 
62         "length" constant is defined below
63 
64      **************************************************************************/
65 
66     size_t chunk_length = length - size_t.sizeof;
67 
68     /**************************************************************************
69 
70         CRC32 of following header elements and compressed data
71 
72      **************************************************************************/
73 
74     private uint crc32_;
75 
76     /**************************************************************************
77 
78         Chunk type (Type enumerator is defined below)
79 
80      **************************************************************************/
81 
82     Type type = Type.None;
83 
84     /**************************************************************************
85 
86         Length of uncompressed data
87 
88      **************************************************************************/
89 
90     size_t uncompressed_length = 0;
91 
92     /**************************************************************************
93 
94         Error message source constant
95 
96      **************************************************************************/
97 
98     enum ErrMsgSource = typeof (this).stringof;
99 
100     /**************************************************************************
101 
102         Header type enumerator
103 
104      **************************************************************************/
105 
106     enum Type : int
107     {
108         Stop  = 0,
109 
110         None,
111         LZO1X,
112 
113         Start = -1,
114 
115     }
116 
117     /**************************************************************************
118 
119         Total data length of the members of this structure. With "align (1)"
120         as structure definition attribute "length" equals the "sizeof" value
121         since the member data are then packed without padding.
122         Because this structure represents the LZO chunk header data
123         elements, "length" must equal "sizeof" in order to generate the
124         correct LZO chunk header by serializing an instance of this
125         structure. Hence "length == sizeof" is checked at ccompile-time
126         in write().
127 
128         TODO: read_length
129 
130      **************************************************************************/
131 
132         public static size_t length ( )
133         {
134             return LzoHeader.sizeof;
135         }
136 
137         //static assert (length == typeof (this).sizeof);
138 
139         static if ( LengthInline )
140         {
141             enum read_length = length;
142         }
143         else
144         {
145             enum read_length = length - size_t.sizeof;
146         }
147 
148         /**************************************************************************
149 
150             Writes the header to chunk[0 .. this.read_length].
151 
152             Params:
153                 chunk = chunk without header
154 
155             Returns:
156                 chunk (passed through)
157 
158             Throws:
159                 CompressException if chunk is shorter than this.read_length
160 
161          **************************************************************************/
162 
163         void[] write ( void[] chunk )
164         {
165             static assert ((this).sizeof == SizeofTuple!(typeof (this.tupleof)),
166                            this.ErrMsgSource ~ ": Bad data alignment");
167 
168             enforce!(CompressException)(chunk.length >= (&this).read_length,
169                                          (&this).ErrMsgSource ~ ": Chunk too short to write header");
170 
171             (&this).chunk_length = chunk.length - (&this).chunk_length.sizeof;
172 
173             (&this).crc32_ = (&this).crc32((&this).strip(chunk));
174 
175             *(cast (typeof ((&this))) chunk.ptr) = this;
176 
177             return chunk;
178         }
179 
180         /**************************************************************************
181 
182             Sets this instance to create a header for a chunk containing
183             uncompressed data. Compression method is set to None.
184 
185             Params:
186                 payload = data to create header for
187 
188             Returns:
189                 this instance
190 
191             Throws:
192                 CompressException if chunk is shorter than this.read_length
193 
194          **************************************************************************/
195 
196         typeof ((&this)) uncompressed ( void[] payload )
197         {
198             (&this).type = (&this).type.None;
199 
200             (&this).uncompressed_length = payload.length;
201 
202             (&this).chunk_length += payload.length;
203 
204             (&this).crc32_ = (&this).crc32(payload);
205 
206             return (&this);
207         }
208 
209         /**************************************************************************
210 
211             Sets this instance to create a Start header. Since a Start chunk has no
212             payload, the returned data are a full Start chunk.
213 
214             Params:
215                 total_uncompressed_length = total uncompressed length of data
216                                             contained in the following chunks
217 
218             Returns:
219                 this instance
220 
221          **************************************************************************/
222 
223         typeof ((&this)) start ( size_t total_uncompressed_length )
224         {
225             *(&this) = typeof (this).init;
226 
227             (&this).type = (&this).type.Start;
228 
229             (&this).uncompressed_length = total_uncompressed_length;
230 
231             (&this).crc32_ = (&this).crc32();
232 
233             return (&this);
234         }
235 
236         /**************************************************************************
237 
238             Sets this instance to create a Stop header. Since a Stop chunk has no
239             payload, the returned data are a full Start chunk.
240 
241             Returns:
242                 this instance
243 
244          **************************************************************************/
245 
246         typeof ((&this)) stop ( )
247         {
248             *(&this) = typeof (this).init;
249 
250             (&this).type = (&this).type.Stop;
251 
252             (&this).crc32_ = (&this).crc32();
253 
254             return (&this);
255         }
256 
257         /**************************************************************************
258 
259             Reads chunk which is expected to be a Start chunk or a Null chunk.
260 
261             After chunk has been read, this.type is either set to Start, if the
262             provided chunk was a start chunk, or to Stop for a Null chunk.
263             this.uncompressed_size reflects the total uncompressed size of the data
264             in the chunks that will follow.
265 
266             Params:
267                 chunk = input chunk
268 
269             Throws:
270                 CompressException if chunk is neither a Start chunk, as expected,
271                 nor a Null chunk
272 
273          **************************************************************************/
274 
275         typeof ((&this)) readStart ( void[] chunk )
276         {
277             (&this).read(chunk);
278 
279             enforce!(CompressException)((&this).type == Type.Start || (&this).type == Type.Stop,
280                                          (&this).ErrMsgSource ~ ": Not a Start header as expected");
281 
282             return (&this);
283         }
284 
285         /**************************************************************************
286 
287             Checks whather chunk is valid or not. If it is valid this object's data
288             members are initialised with the chunk header info.
289 
290             Params:
291                 chunk = input chunk
292 
293             Returns:
294                 true if the chunk is valid
295 
296          **************************************************************************/
297 
298         bool tryRead ( void[] chunk )
299         {
300             if ( chunk.length >= (&this).read_length )
301             {
302                 if ((&this).isNullChunk(chunk))
303                 {
304                     (&this).stop();
305                     return true;
306                 }
307                 else
308                 {
309                     (&this).setHeader(chunk);
310                     auto payload = (&this).strip(chunk);
311                     return (&this).crc32_ == (&this).crc32(payload);
312                 }
313             }
314 
315             return false;
316         }
317 
318         /**************************************************************************
319 
320             Checks whether chunk is a Stop / Null chunk. This data members are set
321             from the chunk provided.
322 
323             Params:
324                 chunk = input chunk
325 
326              Returns:
327                 true if chunk is a Stop or Null chunk
328 
329          **************************************************************************/
330 
331         bool tryReadStop ( void[] chunk )
332         {
333             return tryReadType(chunk, Type.Stop, true);
334         }
335 
336         /**************************************************************************
337 
338             Checks whether chunk is a Start / Null chunk. This data members are set
339             from the chunk provided.
340 
341             Params:
342                 chunk = input chunk
343 
344              Returns:
345                 true if chunk is a Start or Null chunk
346 
347          **************************************************************************/
348 
349         bool tryReadStart ( void[] chunk )
350         {
351             return tryReadType(chunk, Type.Start, true);
352         }
353 
354         /**************************************************************************
355 
356             Returns the header data of this instance.
357 
358             Returns:
359                 header data of this instance
360 
361          **************************************************************************/
362 
363         void[] data ( )
364         {
365             return (cast (void*) (&this))[0 .. this.read_length];
366         }
367 
368         /**************************************************************************
369 
370             Returns the header data of tihs instance without the leading chunk
371             length value.
372 
373             Returns:
374                 header data of tihs instance without the leading chunk length value
375 
376          **************************************************************************/
377 
378         void[] data_without_length ( )
379         {
380             return (cast (void*) (&this))[size_t.sizeof .. this.read_length];
381         }
382 
383         /**************************************************************************
384 
385             Reads the header from chunk and sets the members of this instance to
386             the values contained in the header.
387 
388             Params:
389                 chunk = chunk with header (or Null chunk)
390 
391             Returns:
392                 payload from chunk (with header data stripped)
393 
394             Throws:
395                 CompressException if chunk is shorter than this.read_length
396 
397          **************************************************************************/
398 
399         void[] read ( void[] chunk )
400         {
401             void[] payload = null;
402 
403             if ((&this).isNullChunk(chunk))
404             {
405                 (&this).stop();
406             }
407             else
408             {
409                 (&this).setHeader(chunk);
410 
411                 payload = (&this).strip(chunk);
412 
413                 enforce!(CompressException)((&this).lengthValid(chunk),
414                                          (&this).ErrMsgSource ~ ": Chunk length mismatch");
415 
416                 enforce!(CompressException)((&this).crc32_ == (&this).crc32(payload),
417                                              (&this).ErrMsgSource ~ ": Chunk data corrupted (CRC32 mismatch)");
418             }
419 
420             return payload;
421         }
422 
423         /**************************************************************************
424 
425             Checks whether the chunk_length member is correct for the given chunk.
426 
427             Params:
428                 chunk = chunk to check (with header)
429 
430             Returns:
431                 true if the length of chunk corresponds to the chunk_length member
432 
433          **************************************************************************/
434 
435         size_t lengthValid ( void[] chunk )
436         {
437             static if ( LengthInline )
438             {
439                 return chunk.length == (&this).chunk_length + (&this).chunk_length.sizeof;
440             }
441             else
442             {
443                 return chunk.length == (&this).chunk_length;
444             }
445         }
446 
447         /**************************************************************************
448 
449              Sets the internal data members from the given chunk.
450 
451              Params:
452                  chunk = data to read from
453 
454             Returns:
455                 this
456 
457          **************************************************************************/
458 
459         typeof((&this)) setHeader ( void[] chunk )
460         {
461             static if ( LengthInline )
462             {
463                 *(&this) = *cast (typeof ((&this))) chunk.ptr;
464             }
465             else
466             {
467                 (&this).chunk_length = chunk.length;
468 
469                 void* read_ptr = chunk.ptr;
470                 (&this).crc32_ = *(cast(typeof((&this).crc32_)*) read_ptr);
471                 read_ptr += (&this).crc32_.sizeof;
472                 (&this).type = *(cast(typeof((&this).type)*) read_ptr);
473                 read_ptr += (&this).type.sizeof;
474                 (&this).uncompressed_length = *(cast(typeof((&this).uncompressed_length)*) read_ptr);
475             }
476 
477             return (&this);
478         }
479 
480         /**************************************************************************
481 
482             Calculates the CRC32 value of the header elements after crc32.
483 
484          **************************************************************************/
485 
486         uint crc32 ( void[] payload = null )
487         {
488             uint crc32 = LzoCrc.crc32((cast (void*) (&this))[this.crc32_.offsetof + this.crc32_.sizeof .. this.length]);
489 
490             if (payload)
491             {
492                 crc32 = LzoCrc.crc32(crc32, payload);
493             }
494 
495             return crc32;
496         }
497 
498         /**************************************************************************
499 
500             Reads chunk which is expected to be of the specified type, or optionally
501             a Null chunk; does not throw an exception if the chunk header is invalid
502             or not of the specified types but returns false instead.
503 
504             After chunk has been read, this.type is either set to the specified
505             type, if the provided chunk was of that type, or to Stop for a Null
506             chunk.
507 
508             this.uncompressed_size reflects the total uncompressed size of the data
509             in the chunks that will follow.
510 
511             Params:
512                 chunk = input chunk
513 
514              Returns:
515                 true if chunk is of the specified type, as expected, or a Null
516                 chunk, or false otherwise
517 
518          **************************************************************************/
519 
520         private bool tryReadType ( void[] chunk, Type check_type, bool allow_null )
521         {
522             bool validated = false;
523 
524             if (allow_null && (&this).isNullChunk(chunk))
525             {
526                 (&this).stop();
527 
528                 validated = true;
529             }
530             else if (chunk.length == (&this).read_length)
531             {
532                 (&this).setHeader(chunk);
533 
534                 if ((&this).type == check_type)
535                 {
536                     static if ( LengthInline )
537                     {
538                         if (chunk.length == (&this).chunk_length + (&this).chunk_length.sizeof)
539                         {
540                             validated = (&this).crc32_ == (&this).crc32;
541                         }
542                     }
543                     else
544                     {
545                         validated = (&this).crc32_ == (&this).crc32;
546                     }
547                 }
548             }
549 
550             return validated;
551         }
552 
553         /**************************************************************************
554 
555             Strips the header from chunk
556 
557             Params:
558                 chunk = chunk with header (must not be a Null chunk)
559 
560             Returns:
561                 chunk payload, that is, the chunk data without header (slice)
562 
563             Throws:
564                 CompressException if chunk.length is shorter than LzoHeader
565                 data
566 
567          **************************************************************************/
568 
569         static void[] strip ( void[] chunk )
570         {
571             enforce!(CompressException)(chunk.length >= read_length,
572                                          ErrMsgSource ~ ": Chunk too short to strip header");
573 
574             return chunk[read_length .. $];
575         }
576 
577         /**************************************************************************
578 
579             Checks whether chunk is a Null chunk, that is, it has a Null header. A
580             Null header is defined as
581                                                                                  ---
582                 void[size_t.sizeof] null_header;
583                 null_header[] = 0;
584                                                                                  ---
585 
586             Since no payload can be follow a Null header, a Null header is a
587             complete chunk on itself, that is the Null chunk.
588 
589             Params:
590                 chunk = input chunk
591 
592             Returns:
593                 true if chunk is a Null chunk or false otherwise.
594 
595          **************************************************************************/
596 
597         static bool isNullChunk ( void[] chunk )
598         {
599             static if ( LengthInline )
600             {
601                 return (chunk.length == size_t.sizeof)? !*cast (size_t*) chunk.ptr : false;
602             }
603             else
604             {
605                 return chunk.length == 0;
606             }
607         }
608 }
609 
610     /******************************************************************************
611 
612         Calculates the sum of the sizes of the types of T
613 
614      ******************************************************************************/
615 
616 template SizeofTuple ( T ... )
617 {
618     static if (T.length > 1)
619     {
620         static immutable SizeofTuple = T[0].sizeof + SizeofTuple!(T[1 .. $]);
621     }
622     else static if (T.length == 1)
623     {
624         static immutable SizeofTuple = T[0].sizeof;
625     }
626     else
627     {
628         static immutable size_t SizeofTuple = 0;
629     }
630 }
631 
632 /******************************************************************************
633 
634     Unit test
635 
636     Add -debug=GcDisabled to the compiler command line to disable the garbage
637     collector.
638 
639  ******************************************************************************/
640 
641 
642 version (UnitTest):
643 
644 import ocean.io.Stdout : Stderr;
645 
646 import ocean.time.StopWatch;
647 
648 import ocean.text.util.MetricPrefix;
649 
650 debug (GcDisabled) import ocean.core.Memory;
651 
652 import core.stdc.signal: signal, SIGINT;
653 
654 /******************************************************************************
655 
656 Terminator structure
657 
658 ******************************************************************************/
659 
660 struct Terminator
661 {
662     static:
663 
664     /**************************************************************************
665 
666         Termination flag
667 
668      **************************************************************************/
669 
670     bool terminated = false;
671 
672     /**************************************************************************
673 
674         Signal handler; raises the termination flag
675 
676      **************************************************************************/
677 
678     extern (C) void terminate ( int code ) nothrow @nogc
679     {
680         Terminator.terminated = true;
681     }
682 }
683 
684 unittest
685 {
686     // Uncomment the next line to see UnitTest output
687     // version = UnitTestVerbose;
688 
689     debug (GcDisabled)
690     {
691         pragma (msg, "LzoHeader unittest: garbage collector disabled");
692         GC.disable();
693     }
694 
695     StopWatch swatch;
696 
697     LzoHeader!() header;
698 
699     static immutable N = 1000;
700 
701     ubyte[header.sizeof][N] start_header_data, stop_header_data;
702 
703     static immutable C = 10;
704 
705     scope chunks4k  = new void[][](C, 4 * 1024);
706     scope chunks64k = new void[][](C, 64 * 1024);
707     scope chunks1M  = new void[][](C, 1024 * 1024);
708 
709     foreach (ref chunk; chunks4k)
710     {
711         (cast (char[]) chunk)[] = 'A';
712     }
713 
714     foreach (ref chunk; chunks64k)
715     {
716         (cast (char[]) chunk)[] = 'B';
717     }
718 
719     foreach (ref chunk; chunks1M)
720     {
721         (cast (char[]) chunk)[] = 'C';
722     }
723 
724     swatch.start();
725 
726     for (uint i = 0; i < N; i++)
727     {
728         start_header_data[i][] = cast (ubyte[]) header.start(4 * 1024).data;
729     }
730 
731     ulong us_start = swatch.microsec();
732 
733     for (uint i = 0; i < N; i++)
734     {
735         stop_header_data[i][] = cast (ubyte[]) header.stop().data;
736     }
737 
738     ulong us_stop = swatch.microsec();
739 
740     for (uint i = 0; i < N; i++)
741     {
742         header.tryReadStart(start_header_data[i]);
743     }
744 
745     ulong us_try_read_start = swatch.microsec();
746 
747     for (uint i = 0; i < N; i++)
748     {
749         header.readStart(start_header_data[i]);
750     }
751 
752     ulong us_read_start = swatch.microsec();
753 
754     for (uint i = 0; i < C; i++)
755     {
756         header.write(chunks4k[i]);
757     }
758 
759     ulong us_write4k = swatch.microsec();
760 
761     for (uint i = 0; i < C; i++)
762     {
763         header.write(chunks64k[i]);
764     }
765 
766     ulong us_write64k = swatch.microsec();
767 
768     for (uint i = 0; i < C; i++)
769     {
770         header.write(chunks1M[i]);
771     }
772 
773     ulong us_write1M = swatch.microsec();
774 
775     for (uint i = 0; i < C; i++)
776     {
777         header.read(chunks4k[i]);
778     }
779 
780     ulong us_read4k = swatch.microsec();
781 
782     for (uint i = 0; i < C; i++)
783     {
784         header.read(chunks64k[i]);
785     }
786 
787     ulong us_read64k = swatch.microsec();
788 
789     for (uint i = 0; i < C; i++)
790     {
791         header.read(chunks1M[i]);
792     }
793 
794     ulong us_read1M = swatch.microsec();
795 
796     us_read1M         -= us_read64k;
797     us_read64k        -= us_read4k;
798     us_read4k         -= us_write1M;
799     us_write1M        -= us_write64k;
800     us_write64k       -= us_write4k;
801     us_write4k        -= us_read_start;
802     us_read_start     -= us_try_read_start;
803     us_try_read_start -= us_stop;
804     us_stop           -= us_start;
805 
806     version (UnitTestVerbose)
807         Stderr.formatln("LzoHeader unittest results:\n\t"
808                  ~ "start():        1000 headers generated within {} ms\n\t"
809                  ~ "stop():         1000 headers generated within {} ms\n\t"
810                  ~ "tryReadStart(): 1000 headers checked   within {} ms\n\t"
811                  ~ "readStart():    1000 headers checked   within {} ms\n\t"
812                  ~ "\n\t"
813                  ~ "write(): 10 chunks of  4 kB each written within {} ms\n\t"
814                  ~ "write(): 10 chunks of 64 kB each written within {} ms\n\t"
815                  ~ "write(): 10 chunks of  1 MB each written within {} ms\n\t"
816                  ~ "\n\t"
817                  ~ "read():  10 chunks of  4 kB each read    within {} ms\n\t"
818                  ~ "read():  10 chunks of 64 kB each read    within {} ms\n\t"
819                  ~ "read():  10 chunks of  1 MB each read    within {} ms\n"
820                  ~ "\n"
821                  ~ "LzoHeader unittest: Looping for memory leak detection; "
822                  ~ "watch memory usage and press Ctrl+C to quit",
823                    us_start          / 1000.f,
824                    us_stop           / 1000.f,
825                    us_try_read_start / 1000.f,
826                    us_read_start     / 1000.f,
827                    us_write4k        / 1000.f,
828                    us_write64k       / 1000.f,
829                    us_write1M        / 1000.f,
830                    us_read4k         / 1000.f,
831                    us_read64k        / 1000.f,
832                    us_read1M         / 1000.f);
833 
834     auto prev_sigint_handler = signal(SIGINT, &Terminator.terminate);
835 
836     scope (exit) signal(SIGINT, prev_sigint_handler);
837 
838     debug ( OceanPerformanceTest ) while (!Terminator.terminated)
839     {
840         for (uint i = 0; i < N; i++)
841         {
842             start_header_data[i][] = cast (ubyte[]) header.start(4 * 1024).data;
843             stop_header_data[i][] = cast (ubyte[]) header.stop().data;
844 
845             header.tryReadStart(start_header_data[i]);
846             header.readStart(start_header_data[i]);
847         }
848 
849         for (uint i = 0; i < C; i++)
850         {
851             header.write(chunks4k[i]);
852             header.write(chunks64k[i]);
853             header.write(chunks1M[i]);
854 
855             header.read(chunks4k[i]);
856             header.read(chunks64k[i]);
857             header.read(chunks1M[i]);
858         }
859     }
860 
861     version (UnitTestVerbose) Stderr.formatln("\n\nCompressionHeader unittest finished\n");
862 }