1 /*******************************************************************************
2 
3     Converts between native and text representations of HTTP time
4     values. Internally, time is represented as UTC with an epoch
5     fixed at Jan 1st 1970. The text representation is formatted in
6     accordance with RFC 1123, and the parser will accept one of
7     RFC 1123, RFC 850, or asctime formats.
8 
9     See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html for
10     further detail.
11 
12     Applying the D "import alias" mechanism to this module is highly
13     recommended, in order to limit namespace pollution:
14 
15     ---
16         import TimeStamp = ocean.text.convert.TimeStamp;
17 
18         auto t = TimeStamp.parse ("Sun, 06 Nov 1994 08:49:37 GMT");
19     ---
20 
21     Copyright:
22         Copyright (c) 2004 Kris Bell.
23         Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
24         All rights reserved.
25 
26     License:
27         Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
28         See LICENSE_TANGO.txt for details.
29 
30     Version: Initial release: May 2005
31 
32     Authors: Kris
33 
34 *******************************************************************************/
35 
36 module ocean.text.convert.TimeStamp;
37 
38 import ocean.core.ExceptionDefinitions;
39 import ocean.core.Verify;
40 import ocean.meta.types.Qualifiers;
41 import ocean.time.Time;
42 import ocean.text.convert.Formatter;
43 import ocean.time.chrono.Gregorian;
44 
45 version (unittest) import ocean.core.Test;
46 
47 /******************************************************************************
48 
49   Parse provided input and return a UTC epoch time. An exception
50   is raised where the provided string is not fully parsed.
51 
52  ******************************************************************************/
53 
54 ulong toTime(T) (T[] src)
55 {
56     uint len;
57 
58     auto x = parse (src, &len);
59     if (len < src.length)
60         throw new IllegalArgumentException ("unknown time format: "~src);
61     return x;
62 }
63 
64 /******************************************************************************
65 
66   Template wrapper to make life simpler. Returns a text version
67   of the provided value.
68 
69   See format() for details
70 
71  ******************************************************************************/
72 
73 char[] toString (Time time)
74 {
75     char[32] tmp = void;
76 
77     return format (tmp, time).dup;
78 }
79 
80 /******************************************************************************
81 
82   RFC1123 formatted time
83 
84   Converts to the format "Sun, 06 Nov 1994 08:49:37 GMT", and
85   returns a populated slice of the provided buffer. Note that
86   RFC1123 format is always in absolute GMT time, and a thirty-
87   element buffer is sufficient for the produced output
88 
89   Throws an exception where the supplied time is invalid
90 
91  ******************************************************************************/
92 
93 const(T)[] format(T, U=Time) (T[] output, U t)
94 {return format!(T)(output, cast(Time) t);}
95 
96 const(T)[] format(T) (T[] output, Time t)
97 {
98     static immutable T[][] Months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
99         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
100     static immutable T[][] Days   = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
101 
102     verify(output.length >= 29);
103     if (t is t.max)
104         throw new IllegalArgumentException("TimeStamp.format :: invalid Time argument");
105 
106     // convert time to field values
107     const time = t.time;
108     const date = Gregorian.generic.toDate(t);
109 
110     return snformat(output, "{}, {u2} {} {u4} {u2}:{u2}:{u2} GMT",
111                     Days[date.dow], date.day, Months[date.month-1], date.year,
112                     time.hours, time.minutes, time.seconds);
113 }
114 
115 unittest
116 {
117     static immutable STR_1970 = "Thu, 01 Jan 1970 00:00:00 GMT";
118     mstring buf;
119     buf.length = 29;
120     test(format(buf, Time.epoch1970) == STR_1970);
121     char[29] static_buf;
122     test(format(static_buf, Time.epoch1970) == STR_1970);
123 }
124 
125 /******************************************************************************
126 
127   ISO-8601 format :: "2006-01-31T14:49:30Z"
128 
129   Throws an exception where the supplied time is invalid
130 
131  ******************************************************************************/
132 
133 const(T)[] format8601(T, U=Time) (T[] output, U t)
134 {return format!(T)(output, cast(Time) t);}
135 
136 const(T)[] format8601(T) (T[] output, Time t)
137 {
138     verify(output.length >= 29);
139     if (t is t.max)
140         throw new IllegalArgumentException("TimeStamp.format :: invalid Time argument");
141 
142     // convert time to field values
143     const time = t.time;
144     const date = Gregorian.generic.toDate(t);
145 
146     return snformat(output, "{u4}-{u2}-{u2}T{u2}:{u2}:{u2}Z",
147                     date.year, date.month, date.day,
148                     time.hours, time.minutes, time.seconds);
149 }
150 
151 unittest
152 {
153     static immutable STR_1970 = "1970-01-01T00:00:00Z";
154     mstring buf;
155     buf.length = 29;
156     test(format8601(buf, Time.epoch1970) == STR_1970);
157     char[29] static_buf;
158     test(format8601(static_buf, Time.epoch1970) == STR_1970);
159 }
160 
161 /******************************************************************************
162 
163   Parse provided input and return a UTC epoch time. A return value
164   of Time.max (or false, respectively) indicated a parse-failure.
165 
166   An option is provided to return the count of characters parsed -
167   an unchanged value here also indicates invalid input.
168 
169  ******************************************************************************/
170 
171 Time parse(T) (T[] src, uint* ate = null)
172 {
173     size_t len;
174     Time   value;
175 
176     if ((len = rfc1123 (src, value)) > 0 ||
177             (len = rfc850  (src, value)) > 0 ||
178             (len = iso8601  (src, value)) > 0 ||
179             (len = dostime  (src, value)) > 0 ||
180             (len = asctime (src, value)) > 0)
181     {
182         if (ate)
183             *ate = cast(int) len;
184         return value;
185     }
186     return Time.max;
187 }
188 
189 
190 /******************************************************************************
191 
192   Parse provided input and return a UTC epoch time. A return value
193   of Time.max (or false, respectively) indicated a parse-failure.
194 
195   An option is provided to return the count of characters parsed -
196   an unchanged value here also indicates invalid input.
197 
198  ******************************************************************************/
199 
200 bool parse(T) (T[] src, ref TimeOfDay tod, ref Date date, uint* ate = null)
201 {
202     size_t len;
203 
204     if ((len = rfc1123 (src, tod, date)) > 0 ||
205             (len = rfc850   (src, tod, date)) > 0 ||
206             (len = iso8601  (src, tod, date)) > 0 ||
207             (len = dostime  (src, tod, date)) > 0 ||
208             (len = asctime (src, tod, date)) > 0)
209     {
210         if (ate)
211             *ate = len;
212         return true;
213     }
214     return false;
215 }
216 
217 /******************************************************************************
218 
219   RFC 822, updated by RFC 1123 :: "Sun, 06 Nov 1994 08:49:37 GMT"
220 
221   Returns the number of elements consumed by the parse; zero if
222   the parse failed
223 
224  ******************************************************************************/
225 
226 size_t rfc1123(T) (T[] src, ref Time value)
227 {
228     TimeOfDay tod;
229     Date      date;
230 
231     auto r = rfc1123!(T)(src, tod, date);
232     if (r)
233         value = Gregorian.generic.toTime(date, tod);
234     return r;
235 }
236 
237 
238 /******************************************************************************
239 
240   RFC 822, updated by RFC 1123 :: "Sun, 06 Nov 1994 08:49:37 GMT"
241 
242   Returns the number of elements consumed by the parse; zero if
243   the parse failed
244 
245  ******************************************************************************/
246 
247 size_t rfc1123(T) (T[] src, ref TimeOfDay tod, ref Date date)
248 {
249     T* p = src.ptr;
250     T* e = p + src.length;
251 
252     bool dt (ref T* p)
253     {
254         return ((date.day = parseInt(p, e)) > 0  &&
255                 *p++ == ' '                     &&
256                 (date.month = parseMonth(p)) > 0 &&
257                 *p++ == ' '                     &&
258                 (date.year = parseInt(p, e)) > 0);
259     }
260 
261     if (parseShortDay(p) >= 0 &&
262             *p++ == ','           &&
263             *p++ == ' '           &&
264             dt (p)                &&
265             *p++ == ' '           &&
266             time (tod, p, e)      &&
267             *p++ == ' '           &&
268             p[0..3] == "GMT")
269     {
270         return cast(size_t) ((p+3) - src.ptr);
271     }
272     return 0;
273 }
274 
275 
276 /******************************************************************************
277 
278   RFC 850, obsoleted by RFC 1036 :: "Sunday, 06-Nov-94 08:49:37 GMT"
279 
280   Returns the number of elements consumed by the parse; zero if
281   the parse failed
282 
283  ******************************************************************************/
284 
285 size_t rfc850(T) (T[] src, ref Time value)
286 {
287     TimeOfDay tod;
288     Date      date;
289 
290     auto r = rfc850!(T)(src, tod, date);
291     if (r)
292         value = Gregorian.generic.toTime (date, tod);
293     return r;
294 }
295 
296 /******************************************************************************
297 
298   RFC 850, obsoleted by RFC 1036 :: "Sunday, 06-Nov-94 08:49:37 GMT"
299 
300   Returns the number of elements consumed by the parse; zero if
301   the parse failed
302 
303  ******************************************************************************/
304 
305 size_t rfc850(T) (T[] src, ref TimeOfDay tod, ref Date date)
306 {
307     T* p = src.ptr;
308     T* e = p + src.length;
309 
310     bool dt (ref T* p)
311     {
312         return ((date.day = parseInt(p, e)) > 0  &&
313                 *p++ == '-'                     &&
314                 (date.month = parseMonth(p)) > 0 &&
315                 *p++ == '-'                     &&
316                 (date.year = parseInt(p, e)) > 0);
317     }
318 
319     if (parseFullDay(p) >= 0 &&
320             *p++ == ','          &&
321             *p++ == ' '          &&
322             dt (p)               &&
323             *p++ == ' '          &&
324             time (tod, p, e)     &&
325             *p++ == ' '          &&
326             p[0..3] == "GMT")
327     {
328         if (date.year < 70)
329             date.year += 2000;
330         else
331             if (date.year < 100)
332                 date.year += 1900;
333 
334         return cast(size_t) ((p+3) - src.ptr);
335     }
336     return 0;
337 }
338 
339 
340 /******************************************************************************
341 
342   ANSI C's asctime() format :: "Sun Nov 6 08:49:37 1994"
343 
344   Returns the number of elements consumed by the parse; zero if
345   the parse failed
346 
347  ******************************************************************************/
348 
349 size_t asctime(T) (T[] src, ref Time value)
350 {
351     TimeOfDay tod;
352     Date      date;
353 
354     auto r = asctime!(T)(src, tod, date);
355     if (r)
356         value = Gregorian.generic.toTime (date, tod);
357     return r;
358 }
359 
360 /******************************************************************************
361 
362   ANSI C's asctime() format :: "Sun Nov 6 08:49:37 1994"
363 
364   Returns the number of elements consumed by the parse; zero if
365   the parse failed
366 
367  ******************************************************************************/
368 
369 size_t asctime(T) (T[] src, ref TimeOfDay tod, ref Date date)
370 {
371     T* p = src.ptr;
372     T* e = p + src.length;
373 
374     bool dt (ref T* p)
375     {
376         return ((date.month = parseMonth(p)) > 0 &&
377                 *p++ == ' '                      &&
378                 ((date.day = parseInt(p, e)) > 0
379                  || (*p++ == ' ' && (date.day = parseInt(p, e)) > 0)));
380     }
381 
382     if (parseShortDay(p) >= 0 &&
383         *p++ == ' '           &&
384         dt (p)                &&
385         *p++ == ' '           &&
386         time (tod, p, e)      &&
387         *p++ == ' '           &&
388         (date.year = parseInt (p, e)) > 0)
389     {
390         return cast(size_t) (p - src.ptr);
391     }
392     return 0;
393 }
394 
395 /******************************************************************************
396 
397   DOS time format :: "12-31-06 08:49AM"
398 
399   Returns the number of elements consumed by the parse; zero if
400   the parse failed
401 
402  ******************************************************************************/
403 
404 size_t dostime(T) (T[] src, ref Time value)
405 {
406     TimeOfDay tod;
407     Date      date;
408 
409     auto r = dostime!(T)(src, tod, date);
410     if (r)
411         value = Gregorian.generic.toTime(date, tod);
412     return r;
413 }
414 
415 
416 /******************************************************************************
417 
418   DOS time format :: "12-31-06 08:49AM"
419 
420   Returns the number of elements consumed by the parse; zero if
421   the parse failed
422 
423  ******************************************************************************/
424 
425 size_t dostime(T) (T[] src, ref TimeOfDay tod, ref Date date)
426 {
427     T* p = src.ptr;
428     T* e = p + src.length;
429 
430     bool dt (ref T* p)
431     {
432         return ((date.month = parseInt(p, e)) > 0 &&
433                 *p++ == '-'                       &&
434                 ((date.day = parseInt(p, e)) > 0  &&
435                  (*p++ == '-' && (date.year = parseInt(p, e)) > 0)));
436     }
437 
438     if (dt(p) >= 0                         &&
439         *p++ == ' '                        &&
440         (tod.hours = parseInt(p, e)) > 0   &&
441         *p++ == ':'                        &&
442         (tod.minutes = parseInt(p, e)) > 0 &&
443         (*p == 'A' || *p == 'P'))
444     {
445         if (*p is 'P')
446             tod.hours += 12;
447 
448         if (date.year < 70)
449             date.year += 2000;
450         else
451             if (date.year < 100)
452                 date.year += 1900;
453 
454         return cast(size_t) ((p+2) - src.ptr);
455     }
456     return 0;
457 }
458 
459 /******************************************************************************
460 
461   ISO-8601 format :: "2006-01-31 14:49:30,001"
462 
463   Returns the number of elements consumed by the parse; zero if
464   the parse failed
465 
466   Quote from http://en.wikipedia.org/wiki/ISO_8601 (2009-09-01):
467   "Decimal fractions may also be added to any of the three time elements.
468   A decimal point, either a comma or a dot (without any preference as
469   stated most recently in resolution 10 of the 22nd General Conference
470   CGPM in 2003), is used as a separator between the time element and
471   its fraction."
472 
473  ******************************************************************************/
474 
475 size_t iso8601(T) (T[] src, ref Time value)
476 {
477     TimeOfDay tod;
478     Date      date;
479 
480     size_t r = iso8601!(T)(src, tod, date);
481     if (r)
482         value = Gregorian.generic.toTime(date, tod);
483     return r;
484 }
485 
486 /******************************************************************************
487 
488   ISO-8601 format :: "2006-01-31 14:49:30,001"
489 
490   Returns the number of elements consumed by the parse; zero if
491   the parse failed
492 
493   Quote from http://en.wikipedia.org/wiki/ISO_8601 (2009-09-01):
494   "Decimal fractions may also be added to any of the three time elements.
495   A decimal point, either a comma or a dot (without any preference as
496   stated most recently in resolution 10 of the 22nd General Conference
497   CGPM in 2003), is used as a separator between the time element and
498   its fraction."
499 
500  ******************************************************************************/
501 
502 size_t iso8601(T) (T[] src, ref TimeOfDay tod, ref Date date)
503 {
504     T* p = src.ptr;
505     T* e = p + src.length;
506 
507     bool dt (ref T* p)
508     {
509         return ((date.year = parseInt(p, e)) > 0   &&
510                 *p++ == '-'                       &&
511                 ((date.month = parseInt(p, e)) > 0 &&
512                  (*p++ == '-'                       &&
513                   (date.day = parseInt(p, e)) > 0)));
514     }
515 
516     if (dt(p) >= 0       &&
517             *p++ == ' '      &&
518             time (tod, p, e))
519     {
520         // Are there chars left? If yes, parse millis. If no, millis = 0.
521         if (p - src.ptr) {
522             // check fraction separator
523             T frac_sep = *p++;
524             if (frac_sep is ',' || frac_sep is '.')
525                 // separator is ok: parse millis
526                 tod.millis = parseInt (p, e);
527             else
528                 // wrong separator: error
529                 return 0;
530         } else
531             tod.millis = 0;
532 
533         return cast(size_t) (p - src.ptr);
534     }
535     return 0;
536 }
537 
538 
539 /******************************************************************************
540 
541   Parse a time field
542 
543  ******************************************************************************/
544 
545 private bool time(T) (ref TimeOfDay time, ref T* p, T* e)
546 {
547     return ((time.hours = parseInt(p, e)) >= 0   &&
548             *p++ == ':'                          &&
549             (time.minutes = parseInt(p, e)) >= 0 &&
550             *p++ == ':'                          &&
551             (time.seconds = parseInt(p, e)) >= 0);
552 }
553 
554 
555 /******************************************************************************
556 
557   Match a month from the input
558 
559  ******************************************************************************/
560 
561 private int parseMonth(T) (ref T* p)
562 {
563     int month;
564 
565     switch (p[0..3])
566     {
567         case "Jan":
568             month = 1;
569             break;
570         case "Feb":
571             month = 2;
572             break;
573         case "Mar":
574             month = 3;
575             break;
576         case "Apr":
577             month = 4;
578             break;
579         case "May":
580             month = 5;
581             break;
582         case "Jun":
583             month = 6;
584             break;
585         case "Jul":
586             month = 7;
587             break;
588         case "Aug":
589             month = 8;
590             break;
591         case "Sep":
592             month = 9;
593             break;
594         case "Oct":
595             month = 10;
596             break;
597         case "Nov":
598             month = 11;
599             break;
600         case "Dec":
601             month = 12;
602             break;
603         default:
604             return month;
605     }
606     p += 3;
607     return month;
608 }
609 
610 
611 /******************************************************************************
612 
613   Match a day from the input
614 
615  ******************************************************************************/
616 
617 private int parseShortDay(T) (ref T* p)
618 {
619     int day;
620 
621     switch (p[0..3])
622     {
623         case "Sun":
624             day = 0;
625             break;
626         case "Mon":
627             day = 1;
628             break;
629         case "Tue":
630             day = 2;
631             break;
632         case "Wed":
633             day = 3;
634             break;
635         case "Thu":
636             day = 4;
637             break;
638         case "Fri":
639             day = 5;
640             break;
641         case "Sat":
642             day = 6;
643             break;
644         default:
645             return -1;
646     }
647     p += 3;
648     return day;
649 }
650 
651 
652 /******************************************************************************
653 
654   Match a day from the input. Sunday is 0
655 
656  ******************************************************************************/
657 
658 private size_t parseFullDay(T) (ref T* p)
659 {
660     static  T[][] days = [
661         "Sunday",
662         "Monday",
663         "Tuesday",
664         "Wednesday",
665         "Thursday",
666         "Friday",
667         "Saturday",
668     ];
669 
670     foreach (size_t i, day; days)
671         if (day == p[0..day.length])
672         {
673             p += day.length;
674             return i;
675         }
676     return -1;
677 }
678 
679 
680 /******************************************************************************
681 
682   Extract an integer from the input
683 
684  ******************************************************************************/
685 
686 private static int parseInt(T) (ref T* p, T* e)
687 {
688     int value;
689 
690     while (p < e && (*p >= '0' && *p <= '9'))
691         value = value * 10 + *p++ - '0';
692     return value;
693 }
694 
695 
696 /******************************************************************************
697 
698  ******************************************************************************/
699 
700 unittest
701 {
702     char[30] tmp;
703     const(char)[] s = "Sun, 06 Nov 1994 08:49:37 GMT";
704 
705     auto time = parse (s);
706     auto text = format (tmp, time);
707     test (text == s);
708 
709     cstring garbageTest = "Wed Jun 11 17:22:07 20088";
710     garbageTest = garbageTest[0..$-1];
711     char[128] tmp2;
712 
713     time = parse(garbageTest);
714     auto text2 = format(tmp2, time);
715     test (text2 == "Wed, 11 Jun 2008 17:22:07 GMT");
716 }