1 /******************************************************************************
2 
3     Generates and reads headers of chunks of compressed data, containing the
4     data length, compression type and checksum
5 
6     There are two header versions:
7         1. the regular LzoHeader,
8         2. the Null header which has the same meaning as a Stop header.
9 
10     LzoHeader data layout if size_t has a width of 32-bit:
11         void[16] header
12 
13             header[0  ..  4] - length of chunk[4 .. $] (or compressed data
14                                length + header length - 4)
15             header[4 ..   8] - 32-bit CRC value of following header elements and
16                                compressed data (chunk[8 .. $]), calculated using
17                                lzo_crc32()
18             header[8  .. 12] - chunk/compression type code (signed integer)
19             header[12 .. 16] - length of uncompressed data (may be 0)
20 
21 
22     Null header data layout if size_t has a width of 32-bit:
23         void[4] null_header
24 
25             header[0  ..  4] - all bytes set to value 0
26 
27     Copyright:
28         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
29         All rights reserved.
30 
31     License:
32         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
33         Alternatively, this file may be distributed under the terms of the Tango
34         3-Clause BSD License (see LICENSE_BSD.txt for details).
35 
36  ******************************************************************************/
37 
38 module ocean.io.compress.lzo.LzoHeader;
39 
40 import ocean.io.compress.lzo.LzoCrc;
41 
42 import ocean.io.compress.CompressException;
43 
44 import ocean.core.Enforce: enforce;
45 
46 version (UnitTestVerbose) import ocean.io.Stdout : Stderr;
47 
48 /******************************************************************************
49 
50     LzoHeader structure
51 
52  ******************************************************************************/
53 
54 align (1) struct LzoHeader ( bool LengthInline = true )
55 {
56     /**************************************************************************
57 
58         Length of the chunk excluding this length value
59 
60         The default value is that of a payload-less chunk (start/stop chunk).
61 
62         "length" constant is defined below
63 
64      **************************************************************************/
65 
66     size_t chunk_length = length - size_t.sizeof;
67 
68     /**************************************************************************
69 
70         CRC32 of following header elements and compressed data
71 
72      **************************************************************************/
73 
74     private uint crc32_;
75 
76     /**************************************************************************
77 
78         Chunk type (Type enumerator is defined below)
79 
80      **************************************************************************/
81 
82     Type type = Type.None;
83 
84     /**************************************************************************
85 
86         Length of uncompressed data
87 
88      **************************************************************************/
89 
90     size_t uncompressed_length = 0;
91 
92     /**************************************************************************
93 
94         Error message source constant
95 
96      **************************************************************************/
97 
98     enum ErrMsgSource = typeof (this).stringof;
99 
100     /**************************************************************************
101 
102         Header type enumerator
103 
104      **************************************************************************/
105 
106     enum Type : int
107     {
108         Stop  = 0,
109 
110         None,
111         LZO1X,
112 
113         Start = -1,
114 
115     }
116 
117     /**************************************************************************
118 
119         Total data length of the members of this structure. With "align (1)"
120         as structure definition attribute "length" equals the "sizeof" value
121         since the member data are then packed without padding.
122         Because this structure represents the LZO chunk header data
123         elements, "length" must equal "sizeof" in order to generate the
124         correct LZO chunk header by serializing an instance of this
125         structure. Hence "length == sizeof" is checked at ccompile-time
126         in write().
127 
128         TODO: read_length
129 
130      **************************************************************************/
131 
132         public static size_t length ( )
133         {
134             return LzoHeader.sizeof;
135         }
136 
137         //static assert (length == typeof (this).sizeof);
138 
139         static if ( LengthInline )
140         {
141             enum read_length = length;
142         }
143         else
144         {
145             enum read_length = length - size_t.sizeof;
146         }
147 
148         /**************************************************************************
149 
150             Writes the header to chunk[0 .. this.read_length].
151 
152             Params:
153                 chunk = chunk without header
154 
155             Returns:
156                 chunk (passed through)
157 
158             Throws:
159                 CompressException if chunk is shorter than this.read_length
160 
161          **************************************************************************/
162 
163         void[] write ( void[] chunk )
164         {
165             static assert ((this).sizeof == SizeofTuple!(typeof (this.tupleof)),
166                            this.ErrMsgSource ~ ": Bad data alignment");
167 
168             enforce!(CompressException)(chunk.length >= this.read_length,
169                                          this.ErrMsgSource ~ ": Chunk too short to write header");
170 
171             this.chunk_length = chunk.length - this.chunk_length.sizeof;
172 
173             this.crc32_ = this.crc32(this.strip(chunk));
174 
175             *(cast(typeof(&this)) chunk.ptr) = this;
176 
177             return chunk;
178         }
179 
180         /**************************************************************************
181 
182             Sets this instance to create a header for a chunk containing
183             uncompressed data. Compression method is set to None.
184 
185             Params:
186                 payload = data to create header for
187 
188             Returns:
189                 this instance
190 
191             Throws:
192                 CompressException if chunk is shorter than this.read_length
193 
194          **************************************************************************/
195 
196         typeof(&this) uncompressed ( void[] payload )
197         {
198             this.type = this.type.None;
199 
200             this.uncompressed_length = payload.length;
201 
202             this.chunk_length += payload.length;
203 
204             this.crc32_ = this.crc32(payload);
205 
206             return &this;
207         }
208 
209         /**************************************************************************
210 
211             Sets this instance to create a Start header. Since a Start chunk has no
212             payload, the returned data are a full Start chunk.
213 
214             Params:
215                 total_uncompressed_length = total uncompressed length of data
216                                             contained in the following chunks
217 
218             Returns:
219                 this instance
220 
221          **************************************************************************/
222 
223         typeof(&this) start ( size_t total_uncompressed_length )
224         {
225             this = typeof (this).init;
226 
227             this.type = this.type.Start;
228 
229             this.uncompressed_length = total_uncompressed_length;
230 
231             this.crc32_ = this.crc32();
232 
233             return &this;
234         }
235 
236         /**************************************************************************
237 
238             Sets this instance to create a Stop header. Since a Stop chunk has no
239             payload, the returned data are a full Start chunk.
240 
241             Returns:
242                 this instance
243 
244          **************************************************************************/
245 
246         typeof(&this) stop ( )
247         {
248             this = typeof (this).init;
249 
250             this.type = this.type.Stop;
251 
252             this.crc32_ = this.crc32();
253 
254             return &this;
255         }
256 
257         /**************************************************************************
258 
259             Reads chunk which is expected to be a Start chunk or a Null chunk.
260 
261             After chunk has been read, this.type is either set to Start, if the
262             provided chunk was a start chunk, or to Stop for a Null chunk.
263             this.uncompressed_size reflects the total uncompressed size of the data
264             in the chunks that will follow.
265 
266             Params:
267                 chunk = input chunk
268 
269             Throws:
270                 CompressException if chunk is neither a Start chunk, as expected,
271                 nor a Null chunk
272 
273          **************************************************************************/
274 
275         typeof(&this) readStart ( void[] chunk )
276         {
277             this.read(chunk);
278 
279             enforce!(CompressException)(this.type == Type.Start || this.type == Type.Stop,
280                                          this.ErrMsgSource ~ ": Not a Start header as expected");
281 
282             return &this;
283         }
284 
285         /**************************************************************************
286 
287             Checks whather chunk is valid or not. If it is valid this object's data
288             members are initialised with the chunk header info.
289 
290             Params:
291                 chunk = input chunk
292 
293             Returns:
294                 true if the chunk is valid
295 
296          **************************************************************************/
297 
298         bool tryRead ( void[] chunk )
299         {
300             if ( chunk.length >= this.read_length )
301             {
302                 if (this.isNullChunk(chunk))
303                 {
304                     this.stop();
305                     return true;
306                 }
307                 else
308                 {
309                     this.setHeader(chunk);
310                     auto payload = this.strip(chunk);
311                     return this.crc32_ == this.crc32(payload);
312                 }
313             }
314 
315             return false;
316         }
317 
318         /**************************************************************************
319 
320             Checks whether chunk is a Stop / Null chunk. This data members are set
321             from the chunk provided.
322 
323             Params:
324                 chunk = input chunk
325 
326              Returns:
327                 true if chunk is a Stop or Null chunk
328 
329          **************************************************************************/
330 
331         bool tryReadStop ( void[] chunk )
332         {
333             return tryReadType(chunk, Type.Stop, true);
334         }
335 
336         /**************************************************************************
337 
338             Checks whether chunk is a Start / Null chunk. This data members are set
339             from the chunk provided.
340 
341             Params:
342                 chunk = input chunk
343 
344              Returns:
345                 true if chunk is a Start or Null chunk
346 
347          **************************************************************************/
348 
349         bool tryReadStart ( void[] chunk )
350         {
351             return tryReadType(chunk, Type.Start, true);
352         }
353 
354         /**************************************************************************
355 
356             Returns the header data of this instance.
357 
358             Returns:
359                 header data of this instance
360 
361          **************************************************************************/
362 
363         void[] data ( )
364         {
365             return (cast(void*) &this)[0 .. this.read_length];
366         }
367 
368         /**************************************************************************
369 
370             Returns the header data of tihs instance without the leading chunk
371             length value.
372 
373             Returns:
374                 header data of tihs instance without the leading chunk length value
375 
376          **************************************************************************/
377 
378         void[] data_without_length ( )
379         {
380             return (cast(void*) &this)[size_t.sizeof .. this.read_length];
381         }
382 
383         /**************************************************************************
384 
385             Reads the header from chunk and sets the members of this instance to
386             the values contained in the header.
387 
388             Params:
389                 chunk = chunk with header (or Null chunk)
390 
391             Returns:
392                 payload from chunk (with header data stripped)
393 
394             Throws:
395                 CompressException if chunk is shorter than this.read_length
396 
397          **************************************************************************/
398 
399         void[] read ( void[] chunk )
400         {
401             void[] payload = null;
402 
403             if (this.isNullChunk(chunk))
404             {
405                 this.stop();
406             }
407             else
408             {
409                 this.setHeader(chunk);
410 
411                 payload = this.strip(chunk);
412 
413                 enforce!(CompressException)(this.lengthValid(chunk),
414                                          this.ErrMsgSource ~ ": Chunk length mismatch");
415 
416                 enforce!(CompressException)(this.crc32_ == this.crc32(payload),
417                                              this.ErrMsgSource ~ ": Chunk data corrupted (CRC32 mismatch)");
418             }
419 
420             return payload;
421         }
422 
423         /**************************************************************************
424 
425             Checks whether the chunk_length member is correct for the given chunk.
426 
427             Params:
428                 chunk = chunk to check (with header)
429 
430             Returns:
431                 true if the length of chunk corresponds to the chunk_length member
432 
433          **************************************************************************/
434 
435         size_t lengthValid ( void[] chunk )
436         {
437             static if ( LengthInline )
438             {
439                 return chunk.length == this.chunk_length + this.chunk_length.sizeof;
440             }
441             else
442             {
443                 return chunk.length == this.chunk_length;
444             }
445         }
446 
447         /**************************************************************************
448 
449              Sets the internal data members from the given chunk.
450 
451              Params:
452                  chunk = data to read from
453 
454             Returns:
455                 this
456 
457          **************************************************************************/
458 
459         typeof(&this) setHeader ( void[] chunk )
460         {
461             static if ( LengthInline )
462             {
463                 this = *cast(typeof(&this)) chunk.ptr;
464             }
465             else
466             {
467                 this.chunk_length = chunk.length;
468 
469                 void* read_ptr = chunk.ptr;
470                 this.crc32_ = *(cast(typeof(this.crc32_)*) read_ptr);
471                 read_ptr += this.crc32_.sizeof;
472                 this.type = *(cast(typeof(this.type)*) read_ptr);
473                 read_ptr += this.type.sizeof;
474                 this.uncompressed_length = *(cast(typeof(this.uncompressed_length)*) read_ptr);
475             }
476 
477             return &this;
478         }
479 
480         /**************************************************************************
481 
482             Calculates the CRC32 value of the header elements after crc32.
483 
484          **************************************************************************/
485 
486         uint crc32 ( void[] payload = null )
487         {
488             uint crc32 = LzoCrc.crc32((cast (void*) &this)[this.crc32_.offsetof + this.crc32_.sizeof .. this.length]);
489 
490             if (payload)
491             {
492                 crc32 = LzoCrc.crc32(crc32, payload);
493             }
494 
495             return crc32;
496         }
497 
498         /**************************************************************************
499 
500             Reads chunk which is expected to be of the specified type, or optionally
501             a Null chunk; does not throw an exception if the chunk header is invalid
502             or not of the specified types but returns false instead.
503 
504             After chunk has been read, this.type is either set to the specified
505             type, if the provided chunk was of that type, or to Stop for a Null
506             chunk.
507 
508             this.uncompressed_size reflects the total uncompressed size of the data
509             in the chunks that will follow.
510 
511             Params:
512                 chunk = input chunk
513 
514              Returns:
515                 true if chunk is of the specified type, as expected, or a Null
516                 chunk, or false otherwise
517 
518          **************************************************************************/
519 
520         private bool tryReadType ( void[] chunk, Type check_type, bool allow_null )
521         {
522             bool validated = false;
523 
524             if (allow_null && this.isNullChunk(chunk))
525             {
526                 this.stop();
527 
528                 validated = true;
529             }
530             else if (chunk.length == this.read_length)
531             {
532                 this.setHeader(chunk);
533 
534                 if (this.type == check_type)
535                 {
536                     static if ( LengthInline )
537                     {
538                         if (chunk.length == this.chunk_length + this.chunk_length.sizeof)
539                         {
540                             validated = this.crc32_ == this.crc32;
541                         }
542                     }
543                     else
544                     {
545                         validated = this.crc32_ == this.crc32;
546                     }
547                 }
548             }
549 
550             return validated;
551         }
552 
553         /**************************************************************************
554 
555             Strips the header from chunk
556 
557             Params:
558                 chunk = chunk with header (must not be a Null chunk)
559 
560             Returns:
561                 chunk payload, that is, the chunk data without header (slice)
562 
563             Throws:
564                 CompressException if chunk.length is shorter than LzoHeader
565                 data
566 
567          **************************************************************************/
568 
569         static void[] strip ( void[] chunk )
570         {
571             enforce!(CompressException)(chunk.length >= read_length,
572                                          ErrMsgSource ~ ": Chunk too short to strip header");
573 
574             return chunk[read_length .. $];
575         }
576 
577         /**************************************************************************
578 
579             Checks whether chunk is a Null chunk, that is, it has a Null header. A
580             Null header is defined as
581                                                                                  ---
582                 void[size_t.sizeof] null_header;
583                 null_header[] = 0;
584                                                                                  ---
585 
586             Since no payload can be follow a Null header, a Null header is a
587             complete chunk on itself, that is the Null chunk.
588 
589             Params:
590                 chunk = input chunk
591 
592             Returns:
593                 true if chunk is a Null chunk or false otherwise.
594 
595          **************************************************************************/
596 
597         static bool isNullChunk ( void[] chunk )
598         {
599             static if ( LengthInline )
600             {
601                 return (chunk.length == size_t.sizeof)? !*cast (size_t*) chunk.ptr : false;
602             }
603             else
604             {
605                 return chunk.length == 0;
606             }
607         }
608 }
609 
610     /******************************************************************************
611 
612         Calculates the sum of the sizes of the types of T
613 
614      ******************************************************************************/
615 
616 template SizeofTuple ( T ... )
617 {
618     static if (T.length > 1)
619     {
620         static immutable SizeofTuple = T[0].sizeof + SizeofTuple!(T[1 .. $]);
621     }
622     else static if (T.length == 1)
623     {
624         static immutable SizeofTuple = T[0].sizeof;
625     }
626     else
627     {
628         static immutable size_t SizeofTuple = 0;
629     }
630 }
631 
632 /******************************************************************************
633 
634     Unit test
635 
636     Add -debug=GcDisabled to the compiler command line to disable the garbage
637     collector.
638 
639  ******************************************************************************/
640 
641 
642 version (unittest):
643 
644 import ocean.time.StopWatch;
645 
646 import ocean.text.util.MetricPrefix;
647 
648 debug (GcDisabled) import ocean.core.Memory;
649 
650 import core.stdc.signal: signal, SIGINT;
651 
652 /******************************************************************************
653 
654 Terminator structure
655 
656 ******************************************************************************/
657 
658 struct Terminator
659 {
660     static:
661 
662     /**************************************************************************
663 
664         Termination flag
665 
666      **************************************************************************/
667 
668     bool terminated = false;
669 
670     /**************************************************************************
671 
672         Signal handler; raises the termination flag
673 
674      **************************************************************************/
675 
676     extern (C) void terminate ( int code ) nothrow @nogc
677     {
678         Terminator.terminated = true;
679     }
680 }
681 
682 unittest
683 {
684     // Uncomment the next line to see UnitTest output
685     // version = UnitTestVerbose;
686 
687     debug (GcDisabled)
688     {
689         pragma (msg, "LzoHeader unittest: garbage collector disabled");
690         GC.disable();
691     }
692 
693     StopWatch swatch;
694 
695     LzoHeader!() header;
696 
697     static immutable N = 1000;
698 
699     ubyte[header.sizeof][N] start_header_data, stop_header_data;
700 
701     static immutable C = 10;
702 
703     scope chunks4k  = new void[][](C, 4 * 1024);
704     scope chunks64k = new void[][](C, 64 * 1024);
705     scope chunks1M  = new void[][](C, 1024 * 1024);
706 
707     foreach (ref chunk; chunks4k)
708     {
709         (cast (char[]) chunk)[] = 'A';
710     }
711 
712     foreach (ref chunk; chunks64k)
713     {
714         (cast (char[]) chunk)[] = 'B';
715     }
716 
717     foreach (ref chunk; chunks1M)
718     {
719         (cast (char[]) chunk)[] = 'C';
720     }
721 
722     swatch.start();
723 
724     for (uint i = 0; i < N; i++)
725     {
726         start_header_data[i][] = cast (ubyte[]) header.start(4 * 1024).data;
727     }
728 
729     ulong us_start = swatch.microsec();
730 
731     for (uint i = 0; i < N; i++)
732     {
733         stop_header_data[i][] = cast (ubyte[]) header.stop().data;
734     }
735 
736     ulong us_stop = swatch.microsec();
737 
738     for (uint i = 0; i < N; i++)
739     {
740         header.tryReadStart(start_header_data[i]);
741     }
742 
743     ulong us_try_read_start = swatch.microsec();
744 
745     for (uint i = 0; i < N; i++)
746     {
747         header.readStart(start_header_data[i]);
748     }
749 
750     ulong us_read_start = swatch.microsec();
751 
752     for (uint i = 0; i < C; i++)
753     {
754         header.write(chunks4k[i]);
755     }
756 
757     ulong us_write4k = swatch.microsec();
758 
759     for (uint i = 0; i < C; i++)
760     {
761         header.write(chunks64k[i]);
762     }
763 
764     ulong us_write64k = swatch.microsec();
765 
766     for (uint i = 0; i < C; i++)
767     {
768         header.write(chunks1M[i]);
769     }
770 
771     ulong us_write1M = swatch.microsec();
772 
773     for (uint i = 0; i < C; i++)
774     {
775         header.read(chunks4k[i]);
776     }
777 
778     ulong us_read4k = swatch.microsec();
779 
780     for (uint i = 0; i < C; i++)
781     {
782         header.read(chunks64k[i]);
783     }
784 
785     ulong us_read64k = swatch.microsec();
786 
787     for (uint i = 0; i < C; i++)
788     {
789         header.read(chunks1M[i]);
790     }
791 
792     ulong us_read1M = swatch.microsec();
793 
794     us_read1M         -= us_read64k;
795     us_read64k        -= us_read4k;
796     us_read4k         -= us_write1M;
797     us_write1M        -= us_write64k;
798     us_write64k       -= us_write4k;
799     us_write4k        -= us_read_start;
800     us_read_start     -= us_try_read_start;
801     us_try_read_start -= us_stop;
802     us_stop           -= us_start;
803 
804     version (UnitTestVerbose)
805         Stderr.formatln("LzoHeader unittest results:\n\t"
806                  ~ "start():        1000 headers generated within {} ms\n\t"
807                  ~ "stop():         1000 headers generated within {} ms\n\t"
808                  ~ "tryReadStart(): 1000 headers checked   within {} ms\n\t"
809                  ~ "readStart():    1000 headers checked   within {} ms\n\t"
810                  ~ "\n\t"
811                  ~ "write(): 10 chunks of  4 kB each written within {} ms\n\t"
812                  ~ "write(): 10 chunks of 64 kB each written within {} ms\n\t"
813                  ~ "write(): 10 chunks of  1 MB each written within {} ms\n\t"
814                  ~ "\n\t"
815                  ~ "read():  10 chunks of  4 kB each read    within {} ms\n\t"
816                  ~ "read():  10 chunks of 64 kB each read    within {} ms\n\t"
817                  ~ "read():  10 chunks of  1 MB each read    within {} ms\n"
818                  ~ "\n"
819                  ~ "LzoHeader unittest: Looping for memory leak detection; "
820                  ~ "watch memory usage and press Ctrl+C to quit",
821                    us_start          / 1000.f,
822                    us_stop           / 1000.f,
823                    us_try_read_start / 1000.f,
824                    us_read_start     / 1000.f,
825                    us_write4k        / 1000.f,
826                    us_write64k       / 1000.f,
827                    us_write1M        / 1000.f,
828                    us_read4k         / 1000.f,
829                    us_read64k        / 1000.f,
830                    us_read1M         / 1000.f);
831 
832     auto prev_sigint_handler = signal(SIGINT, &Terminator.terminate);
833 
834     scope (exit) signal(SIGINT, prev_sigint_handler);
835 
836     debug ( OceanPerformanceTest ) while (!Terminator.terminated)
837     {
838         for (uint i = 0; i < N; i++)
839         {
840             start_header_data[i][] = cast (ubyte[]) header.start(4 * 1024).data;
841             stop_header_data[i][] = cast (ubyte[]) header.stop().data;
842 
843             header.tryReadStart(start_header_data[i]);
844             header.readStart(start_header_data[i]);
845         }
846 
847         for (uint i = 0; i < C; i++)
848         {
849             header.write(chunks4k[i]);
850             header.write(chunks64k[i]);
851             header.write(chunks1M[i]);
852 
853             header.read(chunks4k[i]);
854             header.read(chunks64k[i]);
855             header.read(chunks1M[i]);
856         }
857     }
858 
859     version (UnitTestVerbose) Stderr.formatln("\n\nCompressionHeader unittest finished\n");
860 }