1 // Written in the D programming language.
2 
3 /**
4  * $(RED Deprecated. It will be removed in February 2012.
5  *       Please use std.datetime instead.)
6  *
7  * dateparse module.
8  *
9  * Copyright: Copyright Digital Mars 2000 - 2009.
10  * License:   <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
11  * Authors:   $(WEB digitalmars.com, Walter Bright)
12  * Source:    $(PHOBOSSRC std/_dateparse.d)
13  */
14 /*
15  *          Copyright Digital Mars 2000 - 2009.
16  * Distributed under the Boost Software License, Version 1.0.
17  *    (See accompanying file LICENSE_1_0.txt or copy at
18  *          http://www.boost.org/LICENSE_1_0.txt)
19  */
20 module undead.dateparse;
21 
22 private
23 {
24     import std.algorithm, std.string;
25     import core.stdc.stdlib;
26     import undead.date;
27 }
28 
29 //deprecated:
30 
31 //debug=dateparse;
32 
33 class DateParseError : Error
34 {
35     this(string s)
36     {
37         super("Invalid date string: " ~ s);
38     }
39 }
40 
41 struct DateParse
42 {
43     void parse(string s, out Date date)
44     {
45         this = DateParse.init;
46 
47         //version (Win32)
48             buffer = (cast(char *)alloca(s.length))[0 .. s.length];
49         //else
50             //buffer = new char[s.length];
51 
52         debug(dateparse) printf("DateParse.parse('%.*s')\n", s);
53         if (!parseString(s))
54         {
55             goto Lerror;
56         }
57 
58     /+
59         if (year == year.init)
60             year = 0;
61         else
62     +/
63         debug(dateparse)
64             printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
65                 year, month, day,
66                 hours, minutes, seconds, ms,
67                 weekday, tzcorrection);
68         if (
69             year == year.init ||
70             (month < 1 || month > 12) ||
71             (day < 1 || day > 31) ||
72             (hours < 0 || hours > 23) ||
73             (minutes < 0 || minutes > 59) ||
74             (seconds < 0 || seconds > 59) ||
75             (tzcorrection != int.min &&
76              ((tzcorrection < -2300 || tzcorrection > 2300) ||
77               (tzcorrection % 10)))
78             )
79         {
80          Lerror:
81             throw new DateParseError(s);
82         }
83 
84         if (ampm)
85         {   if (hours > 12)
86                 goto Lerror;
87             if (hours < 12)
88             {
89                 if (ampm == 2)  // if P.M.
90                     hours += 12;
91             }
92             else if (ampm == 1) // if 12am
93             {
94                 hours = 0;              // which is midnight
95             }
96         }
97 
98 //      if (tzcorrection != tzcorrection.init)
99 //          tzcorrection /= 100;
100 
101         if (year >= 0 && year <= 99)
102             year += 1900;
103 
104         date.year = year;
105         date.month = month;
106         date.day = day;
107         date.hour = hours;
108         date.minute = minutes;
109         date.second = seconds;
110         date.ms = ms;
111         date.weekday = weekday;
112         date.tzcorrection = tzcorrection;
113     }
114 
115 
116 private:
117     int year = int.min; // our "nan" Date value
118     int month;          // 1..12
119     int day;            // 1..31
120     int hours;          // 0..23
121     int minutes;        // 0..59
122     int seconds;        // 0..59
123     int ms;             // 0..999
124     int weekday;        // 1..7
125     int ampm;           // 0: not specified
126                         // 1: AM
127                         // 2: PM
128     int tzcorrection = int.min; // -1200..1200 correction in hours
129 
130     string s;
131     int si;
132     int number;
133     char[] buffer;
134 
135     enum DP : byte
136     {
137         err,
138         weekday,
139         month,
140         number,
141         end,
142         colon,
143         minus,
144         slash,
145         ampm,
146         plus,
147         tz,
148         dst,
149         dsttz,
150     }
151 
152     DP nextToken()
153     {   int nest;
154         uint c;
155         int bi;
156         DP result = DP.err;
157 
158         //printf("DateParse::nextToken()\n");
159         for (;;)
160         {
161             assert(si <= s.length);
162             if (si == s.length)
163             {   result = DP.end;
164                 goto Lret;
165             }
166             //printf("\ts[%d] = '%c'\n", si, s[si]);
167             switch (s[si])
168             {
169                 case ':':       result = DP.colon; goto ret_inc;
170                 case '+':       result = DP.plus;  goto ret_inc;
171                 case '-':       result = DP.minus; goto ret_inc;
172                 case '/':       result = DP.slash; goto ret_inc;
173                 case '.':
174                     version(DATE_DOT_DELIM)
175                     {
176                         result = DP.slash;
177                         goto ret_inc;
178                     }
179                     else
180                     {
181                         si++;
182                         break;
183                     }
184 
185                 ret_inc:
186                     si++;
187                     goto Lret;
188 
189                 case ' ':
190                 case '\n':
191                 case '\r':
192                 case '\t':
193                 case ',':
194                     si++;
195                     break;
196 
197                 case '(':               // comment
198                     nest = 1;
199                     for (;;)
200                     {
201                         si++;
202                         if (si == s.length)
203                             goto Lret;          // error
204                         switch (s[si])
205                         {
206                             case '(':
207                                 nest++;
208                                 break;
209 
210                             case ')':
211                                 if (--nest == 0)
212                                     goto Lendofcomment;
213                                 break;
214 
215                             default:
216                                 break;
217                         }
218                     }
219                 Lendofcomment:
220                     si++;
221                     break;
222 
223                 default:
224                     number = 0;
225                     for (;;)
226                     {
227                         if (si == s.length)
228                             // c cannot be undefined here
229                             break;
230                         c = s[si];
231                         if (!(c >= '0' && c <= '9'))
232                             break;
233                         result = DP.number;
234                         number = number * 10 + (c - '0');
235                         si++;
236                     }
237                     if (result == DP.number)
238                         goto Lret;
239 
240                     bi = 0;
241                 bufloop:
242                     while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')
243                     {
244                         if (c < 'a')            // if upper case
245                             c += cast(uint)'a' - cast(uint)'A'; // to lower case
246                         buffer[bi] = cast(char)c;
247                         bi++;
248                         do
249                         {
250                             si++;
251                             if (si == s.length)
252                                 break bufloop;
253                             c = s[si];
254                         } while (c == '.');     // ignore embedded '.'s
255                     }
256                     result = classify(buffer[0 .. bi].idup);
257                     goto Lret;
258             }
259         }
260     Lret:
261         //printf("-DateParse::nextToken()\n");
262         return result;
263     }
264 
265     DP classify(string buf)
266     {
267         struct DateID
268         {
269             string name;
270             DP tok;
271             short value;
272         }
273 
274         static immutable DateID[] dateidtab =
275         [
276             {   "january",      DP.month,       1},
277             {   "february",     DP.month,       2},
278             {   "march",        DP.month,       3},
279             {   "april",        DP.month,       4},
280             {   "may",          DP.month,       5},
281             {   "june",         DP.month,       6},
282             {   "july",         DP.month,       7},
283             {   "august",       DP.month,       8},
284             {   "september",    DP.month,       9},
285             {   "october",      DP.month,       10},
286             {   "november",     DP.month,       11},
287             {   "december",     DP.month,       12},
288             {   "jan",          DP.month,       1},
289             {   "feb",          DP.month,       2},
290             {   "mar",          DP.month,       3},
291             {   "apr",          DP.month,       4},
292             {   "jun",          DP.month,       6},
293             {   "jul",          DP.month,       7},
294             {   "aug",          DP.month,       8},
295             {   "sep",          DP.month,       9},
296             {   "sept",         DP.month,       9},
297             {   "oct",          DP.month,       10},
298             {   "nov",          DP.month,       11},
299             {   "dec",          DP.month,       12},
300 
301             {   "sunday",       DP.weekday,     1},
302             {   "monday",       DP.weekday,     2},
303             {   "tuesday",      DP.weekday,     3},
304             {   "tues",         DP.weekday,     3},
305             {   "wednesday",    DP.weekday,     4},
306             {   "wednes",       DP.weekday,     4},
307             {   "thursday",     DP.weekday,     5},
308             {   "thur",         DP.weekday,     5},
309             {   "thurs",        DP.weekday,     5},
310             {   "friday",       DP.weekday,     6},
311             {   "saturday",     DP.weekday,     7},
312 
313             {   "sun",          DP.weekday,     1},
314             {   "mon",          DP.weekday,     2},
315             {   "tue",          DP.weekday,     3},
316             {   "wed",          DP.weekday,     4},
317             {   "thu",          DP.weekday,     5},
318             {   "fri",          DP.weekday,     6},
319             {   "sat",          DP.weekday,     7},
320 
321             {   "am",           DP.ampm,                1},
322             {   "pm",           DP.ampm,                2},
323 
324             {   "gmt",          DP.tz,          +000},
325             {   "ut",           DP.tz,          +000},
326             {   "utc",          DP.tz,          +000},
327             {   "wet",          DP.tz,          +000},
328             {   "z",            DP.tz,          +000},
329             {   "wat",          DP.tz,          +100},
330             {   "a",            DP.tz,          +100},
331             {   "at",           DP.tz,          +200},
332             {   "b",            DP.tz,          +200},
333             {   "c",            DP.tz,          +300},
334             {   "ast",          DP.tz,          +400},
335             {   "d",            DP.tz,          +400},
336             {   "est",          DP.tz,          +500},
337             {   "e",            DP.tz,          +500},
338             {   "cst",          DP.tz,          +600},
339             {   "f",            DP.tz,          +600},
340             {   "mst",          DP.tz,          +700},
341             {   "g",            DP.tz,          +700},
342             {   "pst",          DP.tz,          +800},
343             {   "h",            DP.tz,          +800},
344             {   "yst",          DP.tz,          +900},
345             {   "i",            DP.tz,          +900},
346             {   "ahst",         DP.tz,          +1000},
347             {   "cat",          DP.tz,          +1000},
348             {   "hst",          DP.tz,          +1000},
349             {   "k",            DP.tz,          +1000},
350             {   "nt",           DP.tz,          +1100},
351             {   "l",            DP.tz,          +1100},
352             {   "idlw",         DP.tz,          +1200},
353             {   "m",            DP.tz,          +1200},
354 
355             {   "cet",          DP.tz,          -100},
356             {   "fwt",          DP.tz,          -100},
357             {   "met",          DP.tz,          -100},
358             {   "mewt",         DP.tz,          -100},
359             {   "swt",          DP.tz,          -100},
360             {   "n",            DP.tz,          -100},
361             {   "eet",          DP.tz,          -200},
362             {   "o",            DP.tz,          -200},
363             {   "bt",           DP.tz,          -300},
364             {   "p",            DP.tz,          -300},
365             {   "zp4",          DP.tz,          -400},
366             {   "q",            DP.tz,          -400},
367             {   "zp5",          DP.tz,          -500},
368             {   "r",            DP.tz,          -500},
369             {   "zp6",          DP.tz,          -600},
370             {   "s",            DP.tz,          -600},
371             {   "wast",         DP.tz,          -700},
372             {   "t",            DP.tz,          -700},
373             {   "cct",          DP.tz,          -800},
374             {   "u",            DP.tz,          -800},
375             {   "jst",          DP.tz,          -900},
376             {   "v",            DP.tz,          -900},
377             {   "east",         DP.tz,          -1000},
378             {   "gst",          DP.tz,          -1000},
379             {   "w",            DP.tz,          -1000},
380             {   "x",            DP.tz,          -1100},
381             {   "idle",         DP.tz,          -1200},
382             {   "nzst",         DP.tz,          -1200},
383             {   "nzt",          DP.tz,          -1200},
384             {   "y",            DP.tz,          -1200},
385 
386             {   "bst",          DP.dsttz,       000},
387             {   "adt",          DP.dsttz,       +400},
388             {   "edt",          DP.dsttz,       +500},
389             {   "cdt",          DP.dsttz,       +600},
390             {   "mdt",          DP.dsttz,       +700},
391             {   "pdt",          DP.dsttz,       +800},
392             {   "ydt",          DP.dsttz,       +900},
393             {   "hdt",          DP.dsttz,       +1000},
394             {   "mest",         DP.dsttz,       -100},
395             {   "mesz",         DP.dsttz,       -100},
396             {   "sst",          DP.dsttz,       -100},
397             {   "fst",          DP.dsttz,       -100},
398             {   "wadt",         DP.dsttz,       -700},
399             {   "eadt",         DP.dsttz,       -1000},
400             {   "nzdt",         DP.dsttz,       -1200},
401 
402             {   "dst",          DP.dst,         0},
403         ];
404 
405         //message(DTEXT("DateParse::classify('%s')\n"), buf);
406 
407         // Do a linear search. Yes, it would be faster with a binary
408         // one.
409         for (uint i = 0; i < dateidtab.length; i++)
410         {
411             if (cmp(dateidtab[i].name, buf) == 0)
412             {
413                 number = dateidtab[i].value;
414                 return dateidtab[i].tok;
415             }
416         }
417         return DP.err;
418     }
419 
420     int parseString(string s)
421     {
422         int n1;
423         int dp;
424         int sisave;
425         int result;
426 
427         //message(DTEXT("DateParse::parseString('%ls')\n"), s);
428         this.s = s;
429         si = 0;
430         dp = nextToken();
431         for (;;)
432         {
433             //message(DTEXT("\tdp = %d\n"), dp);
434             switch (dp)
435             {
436             case DP.end:
437                 result = 1;
438             Lret:
439                 return result;
440 
441             case DP.err:
442             case_error:
443                 //message(DTEXT("\terror\n"));
444             default:
445                 result = 0;
446                 goto Lret;
447 
448             case DP.minus:
449                 break;                  // ignore spurious '-'
450 
451             case DP.weekday:
452                 weekday = number;
453                 break;
454 
455             case DP.month:              // month day, [year]
456                 month = number;
457                 dp = nextToken();
458                 if (dp == DP.number)
459                 {
460                     day = number;
461                     sisave = si;
462                     dp = nextToken();
463                     if (dp == DP.number)
464                     {
465                         n1 = number;
466                         dp = nextToken();
467                         if (dp == DP.colon)
468                         {   // back up, not a year
469                             si = sisave;
470                         }
471                         else
472                         {   year = n1;
473                             continue;
474                         }
475                         break;
476                     }
477                 }
478                 continue;
479 
480             case DP.number:
481                 n1 = number;
482                 dp = nextToken();
483                 switch (dp)
484                 {
485                 case DP.end:
486                     year = n1;
487                     break;
488 
489                 case DP.minus:
490                 case DP.slash:  // n1/ ? ? ?
491                     dp = parseCalendarDate(n1);
492                     if (dp == DP.err)
493                         goto case_error;
494                     break;
495 
496                 case DP.colon:  // hh:mm [:ss] [am | pm]
497                     dp = parseTimeOfDay(n1);
498                     if (dp == DP.err)
499                         goto case_error;
500                     break;
501 
502                 case DP.ampm:
503                     hours = n1;
504                     minutes = 0;
505                     seconds = 0;
506                     ampm = number;
507                     break;
508 
509                 case DP.month:
510                     day = n1;
511                     month = number;
512                     dp = nextToken();
513                     if (dp == DP.number)
514                     {   // day month year
515                         year = number;
516                         dp = nextToken();
517                     }
518                     break;
519 
520                 default:
521                     year = n1;
522                     break;
523                 }
524                 continue;
525             }
526             dp = nextToken();
527         }
528         // @@@ bug in the compiler: this is never reachable
529         assert(0);
530     }
531 
532     int parseCalendarDate(int n1)
533     {
534         int n2;
535         int n3;
536         int dp;
537 
538         debug(dateparse) printf("DateParse.parseCalendarDate(%d)\n", n1);
539         dp = nextToken();
540         if (dp == DP.month)     // day/month
541         {
542             day = n1;
543             month = number;
544             dp = nextToken();
545             if (dp == DP.number)
546             {   // day/month year
547                 year = number;
548                 dp = nextToken();
549             }
550             else if (dp == DP.minus || dp == DP.slash)
551             {   // day/month/year
552                 dp = nextToken();
553                 if (dp != DP.number)
554                     goto case_error;
555                 year = number;
556                 dp = nextToken();
557             }
558             return dp;
559         }
560         if (dp != DP.number)
561             goto case_error;
562         n2 = number;
563         //message(DTEXT("\tn2 = %d\n"), n2);
564         dp = nextToken();
565         if (dp == DP.minus || dp == DP.slash)
566         {
567             dp = nextToken();
568             if (dp != DP.number)
569                 goto case_error;
570             n3 = number;
571             //message(DTEXT("\tn3 = %d\n"), n3);
572             dp = nextToken();
573 
574             // case1: year/month/day
575             // case2: month/day/year
576             int case1, case2;
577 
578             case1 = (n1 > 12 ||
579                      (n2 >= 1 && n2 <= 12) &&
580                      (n3 >= 1 && n3 <= 31));
581             case2 = ((n1 >= 1 && n1 <= 12) &&
582                      (n2 >= 1 && n2 <= 31) ||
583                      n3 > 31);
584             if (case1 == case2)
585                 goto case_error;
586             if (case1)
587             {
588                 year = n1;
589                 month = n2;
590                 day = n3;
591             }
592             else
593             {
594                 month = n1;
595                 day = n2;
596                 year = n3;
597             }
598         }
599         else
600         {   // must be month/day
601             month = n1;
602             day = n2;
603         }
604         return dp;
605 
606     case_error:
607         return DP.err;
608     }
609 
610     int parseTimeOfDay(int n1)
611     {
612         int dp;
613         int sign;
614 
615         // 12am is midnight
616         // 12pm is noon
617 
618         //message(DTEXT("DateParse::parseTimeOfDay(%d)\n"), n1);
619         hours = n1;
620         dp = nextToken();
621         if (dp != DP.number)
622             goto case_error;
623         minutes = number;
624         dp = nextToken();
625         if (dp == DP.colon)
626         {
627             dp = nextToken();
628             if (dp != DP.number)
629                 goto case_error;
630             seconds = number;
631             dp = nextToken();
632         }
633         else
634             seconds = 0;
635 
636         if (dp == DP.ampm)
637         {
638             ampm = number;
639             dp = nextToken();
640         }
641         else if (dp == DP.plus || dp == DP.minus)
642         {
643         Loffset:
644             sign = (dp == DP.minus) ? -1 : 1;
645             dp = nextToken();
646             if (dp != DP.number)
647                 goto case_error;
648             tzcorrection = -sign * number;
649             dp = nextToken();
650         }
651         else if (dp == DP.tz)
652         {
653             tzcorrection = number;
654             dp = nextToken();
655             if (number == 0 && (dp == DP.plus || dp == DP.minus))
656                 goto Loffset;
657             if (dp == DP.dst)
658             {   tzcorrection += 100;
659                 dp = nextToken();
660             }
661         }
662         else if (dp == DP.dsttz)
663         {
664             tzcorrection = number;
665             dp = nextToken();
666         }
667 
668         return dp;
669 
670     case_error:
671         return DP.err;
672     }
673 
674 }
675 
676 unittest
677 {
678     DateParse dp;
679     Date d;
680 
681     dp.parse("March 10, 1959 12:00 -800", d);
682     assert(d.year         == 1959);
683     assert(d.month        == 3);
684     assert(d.day          == 10);
685     assert(d.hour         == 12);
686     assert(d.minute       == 0);
687     assert(d.second       == 0);
688     assert(d.ms           == 0);
689     assert(d.weekday      == 0);
690     assert(d.tzcorrection == 800);
691 
692     dp.parse("Tue Apr 02 02:04:57 GMT-0800 1996", d);
693     assert(d.year         == 1996);
694     assert(d.month        == 4);
695     assert(d.day          == 2);
696     assert(d.hour         == 2);
697     assert(d.minute       == 4);
698     assert(d.second       == 57);
699     assert(d.ms           == 0);
700     assert(d.weekday      == 3);
701     assert(d.tzcorrection == 800);
702 
703     dp.parse("March 14, -1980 21:14:50", d);
704     assert(d.year         == 1980);
705     assert(d.month        == 3);
706     assert(d.day          == 14);
707     assert(d.hour         == 21);
708     assert(d.minute       == 14);
709     assert(d.second       == 50);
710     assert(d.ms           == 0);
711     assert(d.weekday      == 0);
712     assert(d.tzcorrection == int.min);
713 
714     dp.parse("Tue Apr 02 02:04:57 1996", d);
715     assert(d.year         == 1996);
716     assert(d.month        == 4);
717     assert(d.day          == 2);
718     assert(d.hour         == 2);
719     assert(d.minute       == 4);
720     assert(d.second       == 57);
721     assert(d.ms           == 0);
722     assert(d.weekday      == 3);
723     assert(d.tzcorrection == int.min);
724 
725     dp.parse("Tue, 02 Apr 1996 02:04:57 G.M.T.", d);
726     assert(d.year         == 1996);
727     assert(d.month        == 4);
728     assert(d.day          == 2);
729     assert(d.hour         == 2);
730     assert(d.minute       == 4);
731     assert(d.second       == 57);
732     assert(d.ms           == 0);
733     assert(d.weekday      == 3);
734     assert(d.tzcorrection == 0);
735 
736     dp.parse("December 31, 3000", d);
737     assert(d.year         == 3000);
738     assert(d.month        == 12);
739     assert(d.day          == 31);
740     assert(d.hour         == 0);
741     assert(d.minute       == 0);
742     assert(d.second       == 0);
743     assert(d.ms           == 0);
744     assert(d.weekday      == 0);
745     assert(d.tzcorrection == int.min);
746 
747     dp.parse("Wed, 31 Dec 1969 16:00:00 GMT", d);
748     assert(d.year         == 1969);
749     assert(d.month        == 12);
750     assert(d.day          == 31);
751     assert(d.hour         == 16);
752     assert(d.minute       == 0);
753     assert(d.second       == 0);
754     assert(d.ms           == 0);
755     assert(d.weekday      == 4);
756     assert(d.tzcorrection == 0);
757 
758     dp.parse("1/1/1999 12:30 AM", d);
759     assert(d.year         == 1999);
760     assert(d.month        == 1);
761     assert(d.day          == 1);
762     assert(d.hour         == 0);
763     assert(d.minute       == 30);
764     assert(d.second       == 0);
765     assert(d.ms           == 0);
766     assert(d.weekday      == 0);
767     assert(d.tzcorrection == int.min);
768 
769     dp.parse("Tue, 20 May 2003 15:38:58 +0530", d);
770     assert(d.year         == 2003);
771     assert(d.month        == 5);
772     assert(d.day          == 20);
773     assert(d.hour         == 15);
774     assert(d.minute       == 38);
775     assert(d.second       == 58);
776     assert(d.ms           == 0);
777     assert(d.weekday      == 3);
778     assert(d.tzcorrection == -530);
779 
780     debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
781         d.year, d.month, d.day,
782         d.hour, d.minute, d.second, d.ms,
783         d.weekday, d.tzcorrection);
784 }