1 /*******************************************************************************
2 
3     Converts between native and text representations of HTTP time
4     values. Internally, time is represented as UTC with an epoch
5     fixed at Jan 1st 1970. The text representation is formatted in
6     accordance with RFC 1123, and the parser will accept one of
7     RFC 1123, RFC 850, or asctime formats.
8 
9     See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html for
10     further detail.
11 
12     Applying the D "import alias" mechanism to this module is highly
13     recommended, in order to limit namespace pollution:
14 
15     ---
16         import TimeStamp = ocean.text.convert.TimeStamp;
17 
18         auto t = TimeStamp.parse ("Sun, 06 Nov 1994 08:49:37 GMT");
19     ---
20 
21     Copyright:
22         Copyright (c) 2004 Kris Bell.
23         Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
24         All rights reserved.
25 
26     License:
27         Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
28         See LICENSE_TANGO.txt for details.
29 
30     Version: Initial release: May 2005
31 
32     Authors: Kris
33 
34 *******************************************************************************/
35 
36 module ocean.text.convert.TimeStamp;
37 
38 import ocean.transition;
39 
40 import ocean.time.Time;
41 
42 import ocean.core.ExceptionDefinitions;
43 
44 import Util = ocean.text.Util;
45 
46 import ocean.time.chrono.Gregorian;
47 
48 import Integer = ocean.text.convert.Integer_tango;
49 
50 import ocean.core.Verify;
51 
52 version(UnitTest) import ocean.core.Test;
53 
54 /******************************************************************************
55 
56   Parse provided input and return a UTC epoch time. An exception
57   is raised where the provided string is not fully parsed.
58 
59  ******************************************************************************/
60 
61 ulong toTime(T) (T[] src)
62 {
63     uint len;
64 
65     auto x = parse (src, &len);
66     if (len < src.length)
67         throw new IllegalArgumentException ("unknown time format: "~src);
68     return x;
69 }
70 
71 /******************************************************************************
72 
73   Template wrapper to make life simpler. Returns a text version
74   of the provided value.
75 
76   See format() for details
77 
78  ******************************************************************************/
79 
80 char[] toString (Time time)
81 {
82     char[32] tmp = void;
83 
84     return format (tmp, time).dup;
85 }
86 
87 /******************************************************************************
88 
89   RFC1123 formatted time
90 
91   Converts to the format "Sun, 06 Nov 1994 08:49:37 GMT", and
92   returns a populated slice of the provided buffer. Note that
93   RFC1123 format is always in absolute GMT time, and a thirty-
94   element buffer is sufficient for the produced output
95 
96   Throws an exception where the supplied time is invalid
97 
98  ******************************************************************************/
99 
100 Const!(T)[] format(T, U=Time) (T[] output, U t)
101 {return format!(T)(output, cast(Time) t);}
102 
103 Const!(T)[] format(T) (T[] output, Time t)
104 {
105     static Const!(T)[][] Months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
106         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
107     static Const!(T)[][] Days   = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
108 
109     Const!(T)[] convert (T[] tmp, long i)
110     {
111         return Integer.formatter!(T) (tmp, i, 'u', 0, 8);
112     }
113 
114     verify (output.length >= 29);
115     if (t is t.max)
116         throw new IllegalArgumentException ("TimeStamp.format :: invalid Time argument");
117 
118     // convert time to field values
119     auto time = t.time;
120     auto date = Gregorian.generic.toDate (t);
121 
122     // use the featherweight formatter ...
123     T[14] tmp = void;
124     return Util.layout (output, cast(Const!(T)[])"%0, %1 %2 %3 %4:%5:%6 GMT",
125             Days[date.dow],
126             convert (tmp[0..2], date.day),
127             Months[date.month-1],
128             convert (tmp[2..6], date.year),
129             convert (tmp[6..8], time.hours),
130             convert (tmp[8..10], time.minutes),
131             convert (tmp[10..12], time.seconds)
132             );
133 }
134 
135 unittest
136 {
137     static immutable STR_1970 = "Thu, 01 Jan 1970 00:00:00 GMT";
138     mstring buf;
139     buf.length = 29;
140     test(format(buf, Time.epoch1970) == STR_1970);
141     char[29] static_buf;
142     test(format(static_buf, Time.epoch1970) == STR_1970);
143 }
144 
145 /******************************************************************************
146 
147   ISO-8601 format :: "2006-01-31T14:49:30Z"
148 
149   Throws an exception where the supplied time is invalid
150 
151  ******************************************************************************/
152 
153 Const!(T)[] format8601(T, U=Time) (T[] output, U t)
154 {return format!(T)(output, cast(Time) t);}
155 
156 Const!(T)[] format8601(T) (T[] output, Time t)
157 {
158     Const!(T)[] convert (T[] tmp, long i)
159     {
160         return Integer.formatter!(T) (tmp, i, 'u', 0, 8);
161     }
162 
163 
164     verify (output.length >= 29);
165     if (t is t.max)
166         throw new IllegalArgumentException ("TimeStamp.format :: invalid Time argument");
167 
168     // convert time to field values
169     auto time = t.time;
170     auto date = Gregorian.generic.toDate (t);
171 
172     // use the featherweight formatter ...
173     T[20] tmp = void;
174     return Util.layout (output, cast(T[]) "%0-%1-%2T%3%:%4:%5Z",
175             convert (tmp[0..4], date.year),
176             convert (tmp[4..6], date.month),
177             convert (tmp[6..8], date.day),
178             convert (tmp[8..10], time.hours),
179             convert (tmp[10..12], time.minutes),
180             convert (tmp[12..14], time.seconds)
181             );
182 }
183 
184 unittest
185 {
186     static immutable STR_1970 = "1970-01-01T00:00:00Z";
187     mstring buf;
188     buf.length = 29;
189     test(format8601(buf, Time.epoch1970) == STR_1970);
190     char[29] static_buf;
191     test(format8601(static_buf, Time.epoch1970) == STR_1970);
192 }
193 
194 /******************************************************************************
195 
196   Parse provided input and return a UTC epoch time. A return value
197   of Time.max (or false, respectively) indicated a parse-failure.
198 
199   An option is provided to return the count of characters parsed -
200   an unchanged value here also indicates invalid input.
201 
202  ******************************************************************************/
203 
204 Time parse(T) (T[] src, uint* ate = null)
205 {
206     size_t len;
207     Time   value;
208 
209     if ((len = rfc1123 (src, value)) > 0 ||
210             (len = rfc850  (src, value)) > 0 ||
211             (len = iso8601  (src, value)) > 0 ||
212             (len = dostime  (src, value)) > 0 ||
213             (len = asctime (src, value)) > 0)
214     {
215         if (ate)
216             *ate = cast(int) len;
217         return value;
218     }
219     return Time.max;
220 }
221 
222 
223 /******************************************************************************
224 
225   Parse provided input and return a UTC epoch time. A return value
226   of Time.max (or false, respectively) indicated a parse-failure.
227 
228   An option is provided to return the count of characters parsed -
229   an unchanged value here also indicates invalid input.
230 
231  ******************************************************************************/
232 
233 bool parse(T) (T[] src, ref TimeOfDay tod, ref Date date, uint* ate = null)
234 {
235     size_t len;
236 
237     if ((len = rfc1123 (src, tod, date)) > 0 ||
238             (len = rfc850   (src, tod, date)) > 0 ||
239             (len = iso8601  (src, tod, date)) > 0 ||
240             (len = dostime  (src, tod, date)) > 0 ||
241             (len = asctime (src, tod, date)) > 0)
242     {
243         if (ate)
244             *ate = len;
245         return true;
246     }
247     return false;
248 }
249 
250 /******************************************************************************
251 
252   RFC 822, updated by RFC 1123 :: "Sun, 06 Nov 1994 08:49:37 GMT"
253 
254   Returns the number of elements consumed by the parse; zero if
255   the parse failed
256 
257  ******************************************************************************/
258 
259 size_t rfc1123(T) (T[] src, ref Time value)
260 {
261     TimeOfDay tod;
262     Date      date;
263 
264     auto r = rfc1123!(T)(src, tod, date);
265     if (r)
266         value = Gregorian.generic.toTime(date, tod);
267     return r;
268 }
269 
270 
271 /******************************************************************************
272 
273   RFC 822, updated by RFC 1123 :: "Sun, 06 Nov 1994 08:49:37 GMT"
274 
275   Returns the number of elements consumed by the parse; zero if
276   the parse failed
277 
278  ******************************************************************************/
279 
280 size_t rfc1123(T) (T[] src, ref TimeOfDay tod, ref Date date)
281 {
282     T* p = src.ptr;
283     T* e = p + src.length;
284 
285     bool dt (ref T* p)
286     {
287         return ((date.day = parseInt(p, e)) > 0  &&
288                 *p++ == ' '                     &&
289                 (date.month = parseMonth(p)) > 0 &&
290                 *p++ == ' '                     &&
291                 (date.year = parseInt(p, e)) > 0);
292     }
293 
294     if (parseShortDay(p) >= 0 &&
295             *p++ == ','           &&
296             *p++ == ' '           &&
297             dt (p)                &&
298             *p++ == ' '           &&
299             time (tod, p, e)      &&
300             *p++ == ' '           &&
301             p[0..3] == "GMT")
302     {
303         return cast(size_t) ((p+3) - src.ptr);
304     }
305     return 0;
306 }
307 
308 
309 /******************************************************************************
310 
311   RFC 850, obsoleted by RFC 1036 :: "Sunday, 06-Nov-94 08:49:37 GMT"
312 
313   Returns the number of elements consumed by the parse; zero if
314   the parse failed
315 
316  ******************************************************************************/
317 
318 size_t rfc850(T) (T[] src, ref Time value)
319 {
320     TimeOfDay tod;
321     Date      date;
322 
323     auto r = rfc850!(T)(src, tod, date);
324     if (r)
325         value = Gregorian.generic.toTime (date, tod);
326     return r;
327 }
328 
329 /******************************************************************************
330 
331   RFC 850, obsoleted by RFC 1036 :: "Sunday, 06-Nov-94 08:49:37 GMT"
332 
333   Returns the number of elements consumed by the parse; zero if
334   the parse failed
335 
336  ******************************************************************************/
337 
338 size_t rfc850(T) (T[] src, ref TimeOfDay tod, ref Date date)
339 {
340     T* p = src.ptr;
341     T* e = p + src.length;
342 
343     bool dt (ref T* p)
344     {
345         return ((date.day = parseInt(p, e)) > 0  &&
346                 *p++ == '-'                     &&
347                 (date.month = parseMonth(p)) > 0 &&
348                 *p++ == '-'                     &&
349                 (date.year = parseInt(p, e)) > 0);
350     }
351 
352     if (parseFullDay(p) >= 0 &&
353             *p++ == ','          &&
354             *p++ == ' '          &&
355             dt (p)               &&
356             *p++ == ' '          &&
357             time (tod, p, e)     &&
358             *p++ == ' '          &&
359             p[0..3] == "GMT")
360     {
361         if (date.year < 70)
362             date.year += 2000;
363         else
364             if (date.year < 100)
365                 date.year += 1900;
366 
367         return cast(size_t) ((p+3) - src.ptr);
368     }
369     return 0;
370 }
371 
372 
373 /******************************************************************************
374 
375   ANSI C's asctime() format :: "Sun Nov 6 08:49:37 1994"
376 
377   Returns the number of elements consumed by the parse; zero if
378   the parse failed
379 
380  ******************************************************************************/
381 
382 size_t asctime(T) (T[] src, ref Time value)
383 {
384     TimeOfDay tod;
385     Date      date;
386 
387     auto r = asctime!(T)(src, tod, date);
388     if (r)
389         value = Gregorian.generic.toTime (date, tod);
390     return r;
391 }
392 
393 /******************************************************************************
394 
395   ANSI C's asctime() format :: "Sun Nov 6 08:49:37 1994"
396 
397   Returns the number of elements consumed by the parse; zero if
398   the parse failed
399 
400  ******************************************************************************/
401 
402 size_t asctime(T) (T[] src, ref TimeOfDay tod, ref Date date)
403 {
404     T* p = src.ptr;
405     T* e = p + src.length;
406 
407     bool dt (ref T* p)
408     {
409         return ((date.month = parseMonth(p)) > 0 &&
410                 *p++ == ' '                      &&
411                 ((date.day = parseInt(p, e)) > 0
412                  || (*p++ == ' ' && (date.day = parseInt(p, e)) > 0)));
413     }
414 
415     if (parseShortDay(p) >= 0 &&
416         *p++ == ' '           &&
417         dt (p)                &&
418         *p++ == ' '           &&
419         time (tod, p, e)      &&
420         *p++ == ' '           &&
421         (date.year = parseInt (p, e)) > 0)
422     {
423         return cast(size_t) (p - src.ptr);
424     }
425     return 0;
426 }
427 
428 /******************************************************************************
429 
430   DOS time format :: "12-31-06 08:49AM"
431 
432   Returns the number of elements consumed by the parse; zero if
433   the parse failed
434 
435  ******************************************************************************/
436 
437 size_t dostime(T) (T[] src, ref Time value)
438 {
439     TimeOfDay tod;
440     Date      date;
441 
442     auto r = dostime!(T)(src, tod, date);
443     if (r)
444         value = Gregorian.generic.toTime(date, tod);
445     return r;
446 }
447 
448 
449 /******************************************************************************
450 
451   DOS time format :: "12-31-06 08:49AM"
452 
453   Returns the number of elements consumed by the parse; zero if
454   the parse failed
455 
456  ******************************************************************************/
457 
458 size_t dostime(T) (T[] src, ref TimeOfDay tod, ref Date date)
459 {
460     T* p = src.ptr;
461     T* e = p + src.length;
462 
463     bool dt (ref T* p)
464     {
465         return ((date.month = parseInt(p, e)) > 0 &&
466                 *p++ == '-'                       &&
467                 ((date.day = parseInt(p, e)) > 0  &&
468                  (*p++ == '-' && (date.year = parseInt(p, e)) > 0)));
469     }
470 
471     if (dt(p) >= 0                         &&
472         *p++ == ' '                        &&
473         (tod.hours = parseInt(p, e)) > 0   &&
474         *p++ == ':'                        &&
475         (tod.minutes = parseInt(p, e)) > 0 &&
476         (*p == 'A' || *p == 'P'))
477     {
478         if (*p is 'P')
479             tod.hours += 12;
480 
481         if (date.year < 70)
482             date.year += 2000;
483         else
484             if (date.year < 100)
485                 date.year += 1900;
486 
487         return cast(size_t) ((p+2) - src.ptr);
488     }
489     return 0;
490 }
491 
492 /******************************************************************************
493 
494   ISO-8601 format :: "2006-01-31 14:49:30,001"
495 
496   Returns the number of elements consumed by the parse; zero if
497   the parse failed
498 
499   Quote from http://en.wikipedia.org/wiki/ISO_8601 (2009-09-01):
500   "Decimal fractions may also be added to any of the three time elements.
501   A decimal point, either a comma or a dot (without any preference as
502   stated most recently in resolution 10 of the 22nd General Conference
503   CGPM in 2003), is used as a separator between the time element and
504   its fraction."
505 
506  ******************************************************************************/
507 
508 size_t iso8601(T) (T[] src, ref Time value)
509 {
510     TimeOfDay tod;
511     Date      date;
512 
513     size_t r = iso8601!(T)(src, tod, date);
514     if (r)
515         value = Gregorian.generic.toTime(date, tod);
516     return r;
517 }
518 
519 /******************************************************************************
520 
521   ISO-8601 format :: "2006-01-31 14:49:30,001"
522 
523   Returns the number of elements consumed by the parse; zero if
524   the parse failed
525 
526   Quote from http://en.wikipedia.org/wiki/ISO_8601 (2009-09-01):
527   "Decimal fractions may also be added to any of the three time elements.
528   A decimal point, either a comma or a dot (without any preference as
529   stated most recently in resolution 10 of the 22nd General Conference
530   CGPM in 2003), is used as a separator between the time element and
531   its fraction."
532 
533  ******************************************************************************/
534 
535 size_t iso8601(T) (T[] src, ref TimeOfDay tod, ref Date date)
536 {
537     T* p = src.ptr;
538     T* e = p + src.length;
539 
540     bool dt (ref T* p)
541     {
542         return ((date.year = parseInt(p, e)) > 0   &&
543                 *p++ == '-'                       &&
544                 ((date.month = parseInt(p, e)) > 0 &&
545                  (*p++ == '-'                       &&
546                   (date.day = parseInt(p, e)) > 0)));
547     }
548 
549     if (dt(p) >= 0       &&
550             *p++ == ' '      &&
551             time (tod, p, e))
552     {
553         // Are there chars left? If yes, parse millis. If no, millis = 0.
554         if (p - src.ptr) {
555             // check fraction separator
556             T frac_sep = *p++;
557             if (frac_sep is ',' || frac_sep is '.')
558                 // separator is ok: parse millis
559                 tod.millis = parseInt (p, e);
560             else
561                 // wrong separator: error
562                 return 0;
563         } else
564             tod.millis = 0;
565 
566         return cast(size_t) (p - src.ptr);
567     }
568     return 0;
569 }
570 
571 
572 /******************************************************************************
573 
574   Parse a time field
575 
576  ******************************************************************************/
577 
578 private bool time(T) (ref TimeOfDay time, ref T* p, T* e)
579 {
580     return ((time.hours = parseInt(p, e)) >= 0   &&
581             *p++ == ':'                          &&
582             (time.minutes = parseInt(p, e)) >= 0 &&
583             *p++ == ':'                          &&
584             (time.seconds = parseInt(p, e)) >= 0);
585 }
586 
587 
588 /******************************************************************************
589 
590   Match a month from the input
591 
592  ******************************************************************************/
593 
594 private int parseMonth(T) (ref T* p)
595 {
596     int month;
597 
598     switch (p[0..3])
599     {
600         case "Jan":
601             month = 1;
602             break;
603         case "Feb":
604             month = 2;
605             break;
606         case "Mar":
607             month = 3;
608             break;
609         case "Apr":
610             month = 4;
611             break;
612         case "May":
613             month = 5;
614             break;
615         case "Jun":
616             month = 6;
617             break;
618         case "Jul":
619             month = 7;
620             break;
621         case "Aug":
622             month = 8;
623             break;
624         case "Sep":
625             month = 9;
626             break;
627         case "Oct":
628             month = 10;
629             break;
630         case "Nov":
631             month = 11;
632             break;
633         case "Dec":
634             month = 12;
635             break;
636         default:
637             return month;
638     }
639     p += 3;
640     return month;
641 }
642 
643 
644 /******************************************************************************
645 
646   Match a day from the input
647 
648  ******************************************************************************/
649 
650 private int parseShortDay(T) (ref T* p)
651 {
652     int day;
653 
654     switch (p[0..3])
655     {
656         case "Sun":
657             day = 0;
658             break;
659         case "Mon":
660             day = 1;
661             break;
662         case "Tue":
663             day = 2;
664             break;
665         case "Wed":
666             day = 3;
667             break;
668         case "Thu":
669             day = 4;
670             break;
671         case "Fri":
672             day = 5;
673             break;
674         case "Sat":
675             day = 6;
676             break;
677         default:
678             return -1;
679     }
680     p += 3;
681     return day;
682 }
683 
684 
685 /******************************************************************************
686 
687   Match a day from the input. Sunday is 0
688 
689  ******************************************************************************/
690 
691 private size_t parseFullDay(T) (ref T* p)
692 {
693     static  T[][] days = [
694         "Sunday",
695         "Monday",
696         "Tuesday",
697         "Wednesday",
698         "Thursday",
699         "Friday",
700         "Saturday",
701     ];
702 
703     foreach (size_t i, day; days)
704         if (day == p[0..day.length])
705         {
706             p += day.length;
707             return i;
708         }
709     return -1;
710 }
711 
712 
713 /******************************************************************************
714 
715   Extract an integer from the input
716 
717  ******************************************************************************/
718 
719 private static int parseInt(T) (ref T* p, T* e)
720 {
721     int value;
722 
723     while (p < e && (*p >= '0' && *p <= '9'))
724         value = value * 10 + *p++ - '0';
725     return value;
726 }
727 
728 
729 /******************************************************************************
730 
731  ******************************************************************************/
732 
733 unittest
734 {
735     char[30] tmp;
736     Const!(char)[] s = "Sun, 06 Nov 1994 08:49:37 GMT";
737 
738     auto time = parse (s);
739     auto text = format (tmp, time);
740     test (text == s);
741 
742     cstring garbageTest = "Wed Jun 11 17:22:07 20088";
743     garbageTest = garbageTest[0..$-1];
744     char[128] tmp2;
745 
746     time = parse(garbageTest);
747     auto text2 = format(tmp2, time);
748     test (text2 == "Wed, 11 Jun 2008 17:22:07 GMT");
749 }