1 // Written in the D programming language. 2 3 /** 4 * $(RED Deprecated. It will be removed in February 2012. 5 * Please use std.datetime instead.) 6 * 7 * dateparse module. 8 * 9 * Copyright: Copyright Digital Mars 2000 - 2009. 10 * License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 11 * Authors: $(WEB digitalmars.com, Walter Bright) 12 * Source: $(PHOBOSSRC std/_dateparse.d) 13 */ 14 /* 15 * Copyright Digital Mars 2000 - 2009. 16 * Distributed under the Boost Software License, Version 1.0. 17 * (See accompanying file LICENSE_1_0.txt or copy at 18 * http://www.boost.org/LICENSE_1_0.txt) 19 */ 20 module undead.dateparse; 21 22 private 23 { 24 import std.algorithm, std.string; 25 import core.stdc.stdlib; 26 import undead.date; 27 } 28 29 //deprecated: 30 31 //debug=dateparse; 32 33 class DateParseError : Error 34 { 35 this(string s) 36 { 37 super("Invalid date string: " ~ s); 38 } 39 } 40 41 struct DateParse 42 { 43 void parse(string s, out Date date) 44 { 45 this = DateParse.init; 46 47 //version (Win32) 48 buffer = (cast(char *)alloca(s.length))[0 .. s.length]; 49 //else 50 //buffer = new char[s.length]; 51 52 debug(dateparse) printf("DateParse.parse('%.*s')\n", s); 53 if (!parseString(s)) 54 { 55 goto Lerror; 56 } 57 58 /+ 59 if (year == year.init) 60 year = 0; 61 else 62 +/ 63 debug(dateparse) 64 printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n", 65 year, month, day, 66 hours, minutes, seconds, ms, 67 weekday, tzcorrection); 68 if ( 69 year == year.init || 70 (month < 1 || month > 12) || 71 (day < 1 || day > 31) || 72 (hours < 0 || hours > 23) || 73 (minutes < 0 || minutes > 59) || 74 (seconds < 0 || seconds > 59) || 75 (tzcorrection != int.min && 76 ((tzcorrection < -2300 || tzcorrection > 2300) || 77 (tzcorrection % 10))) 78 ) 79 { 80 Lerror: 81 throw new DateParseError(s); 82 } 83 84 if (ampm) 85 { if (hours > 12) 86 goto Lerror; 87 if (hours < 12) 88 { 89 if (ampm == 2) // if P.M. 90 hours += 12; 91 } 92 else if (ampm == 1) // if 12am 93 { 94 hours = 0; // which is midnight 95 } 96 } 97 98 // if (tzcorrection != tzcorrection.init) 99 // tzcorrection /= 100; 100 101 if (year >= 0 && year <= 99) 102 year += 1900; 103 104 date.year = year; 105 date.month = month; 106 date.day = day; 107 date.hour = hours; 108 date.minute = minutes; 109 date.second = seconds; 110 date.ms = ms; 111 date.weekday = weekday; 112 date.tzcorrection = tzcorrection; 113 } 114 115 116 private: 117 int year = int.min; // our "nan" Date value 118 int month; // 1..12 119 int day; // 1..31 120 int hours; // 0..23 121 int minutes; // 0..59 122 int seconds; // 0..59 123 int ms; // 0..999 124 int weekday; // 1..7 125 int ampm; // 0: not specified 126 // 1: AM 127 // 2: PM 128 int tzcorrection = int.min; // -1200..1200 correction in hours 129 130 string s; 131 int si; 132 int number; 133 char[] buffer; 134 135 enum DP : byte 136 { 137 err, 138 weekday, 139 month, 140 number, 141 end, 142 colon, 143 minus, 144 slash, 145 ampm, 146 plus, 147 tz, 148 dst, 149 dsttz, 150 } 151 152 DP nextToken() 153 { int nest; 154 uint c; 155 int bi; 156 DP result = DP.err; 157 158 //printf("DateParse::nextToken()\n"); 159 for (;;) 160 { 161 assert(si <= s.length); 162 if (si == s.length) 163 { result = DP.end; 164 goto Lret; 165 } 166 //printf("\ts[%d] = '%c'\n", si, s[si]); 167 switch (s[si]) 168 { 169 case ':': result = DP.colon; goto ret_inc; 170 case '+': result = DP.plus; goto ret_inc; 171 case '-': result = DP.minus; goto ret_inc; 172 case '/': result = DP.slash; goto ret_inc; 173 case '.': 174 version(DATE_DOT_DELIM) 175 { 176 result = DP.slash; 177 goto ret_inc; 178 } 179 else 180 { 181 si++; 182 break; 183 } 184 185 ret_inc: 186 si++; 187 goto Lret; 188 189 case ' ': 190 case '\n': 191 case '\r': 192 case '\t': 193 case ',': 194 si++; 195 break; 196 197 case '(': // comment 198 nest = 1; 199 for (;;) 200 { 201 si++; 202 if (si == s.length) 203 goto Lret; // error 204 switch (s[si]) 205 { 206 case '(': 207 nest++; 208 break; 209 210 case ')': 211 if (--nest == 0) 212 goto Lendofcomment; 213 break; 214 215 default: 216 break; 217 } 218 } 219 Lendofcomment: 220 si++; 221 break; 222 223 default: 224 number = 0; 225 for (;;) 226 { 227 if (si == s.length) 228 // c cannot be undefined here 229 break; 230 c = s[si]; 231 if (!(c >= '0' && c <= '9')) 232 break; 233 result = DP.number; 234 number = number * 10 + (c - '0'); 235 si++; 236 } 237 if (result == DP.number) 238 goto Lret; 239 240 bi = 0; 241 bufloop: 242 while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') 243 { 244 if (c < 'a') // if upper case 245 c += cast(uint)'a' - cast(uint)'A'; // to lower case 246 buffer[bi] = cast(char)c; 247 bi++; 248 do 249 { 250 si++; 251 if (si == s.length) 252 break bufloop; 253 c = s[si]; 254 } while (c == '.'); // ignore embedded '.'s 255 } 256 result = classify(buffer[0 .. bi].idup); 257 goto Lret; 258 } 259 } 260 Lret: 261 //printf("-DateParse::nextToken()\n"); 262 return result; 263 } 264 265 DP classify(string buf) 266 { 267 struct DateID 268 { 269 string name; 270 DP tok; 271 short value; 272 } 273 274 static immutable DateID[] dateidtab = 275 [ 276 { "january", DP.month, 1}, 277 { "february", DP.month, 2}, 278 { "march", DP.month, 3}, 279 { "april", DP.month, 4}, 280 { "may", DP.month, 5}, 281 { "june", DP.month, 6}, 282 { "july", DP.month, 7}, 283 { "august", DP.month, 8}, 284 { "september", DP.month, 9}, 285 { "october", DP.month, 10}, 286 { "november", DP.month, 11}, 287 { "december", DP.month, 12}, 288 { "jan", DP.month, 1}, 289 { "feb", DP.month, 2}, 290 { "mar", DP.month, 3}, 291 { "apr", DP.month, 4}, 292 { "jun", DP.month, 6}, 293 { "jul", DP.month, 7}, 294 { "aug", DP.month, 8}, 295 { "sep", DP.month, 9}, 296 { "sept", DP.month, 9}, 297 { "oct", DP.month, 10}, 298 { "nov", DP.month, 11}, 299 { "dec", DP.month, 12}, 300 301 { "sunday", DP.weekday, 1}, 302 { "monday", DP.weekday, 2}, 303 { "tuesday", DP.weekday, 3}, 304 { "tues", DP.weekday, 3}, 305 { "wednesday", DP.weekday, 4}, 306 { "wednes", DP.weekday, 4}, 307 { "thursday", DP.weekday, 5}, 308 { "thur", DP.weekday, 5}, 309 { "thurs", DP.weekday, 5}, 310 { "friday", DP.weekday, 6}, 311 { "saturday", DP.weekday, 7}, 312 313 { "sun", DP.weekday, 1}, 314 { "mon", DP.weekday, 2}, 315 { "tue", DP.weekday, 3}, 316 { "wed", DP.weekday, 4}, 317 { "thu", DP.weekday, 5}, 318 { "fri", DP.weekday, 6}, 319 { "sat", DP.weekday, 7}, 320 321 { "am", DP.ampm, 1}, 322 { "pm", DP.ampm, 2}, 323 324 { "gmt", DP.tz, +000}, 325 { "ut", DP.tz, +000}, 326 { "utc", DP.tz, +000}, 327 { "wet", DP.tz, +000}, 328 { "z", DP.tz, +000}, 329 { "wat", DP.tz, +100}, 330 { "a", DP.tz, +100}, 331 { "at", DP.tz, +200}, 332 { "b", DP.tz, +200}, 333 { "c", DP.tz, +300}, 334 { "ast", DP.tz, +400}, 335 { "d", DP.tz, +400}, 336 { "est", DP.tz, +500}, 337 { "e", DP.tz, +500}, 338 { "cst", DP.tz, +600}, 339 { "f", DP.tz, +600}, 340 { "mst", DP.tz, +700}, 341 { "g", DP.tz, +700}, 342 { "pst", DP.tz, +800}, 343 { "h", DP.tz, +800}, 344 { "yst", DP.tz, +900}, 345 { "i", DP.tz, +900}, 346 { "ahst", DP.tz, +1000}, 347 { "cat", DP.tz, +1000}, 348 { "hst", DP.tz, +1000}, 349 { "k", DP.tz, +1000}, 350 { "nt", DP.tz, +1100}, 351 { "l", DP.tz, +1100}, 352 { "idlw", DP.tz, +1200}, 353 { "m", DP.tz, +1200}, 354 355 { "cet", DP.tz, -100}, 356 { "fwt", DP.tz, -100}, 357 { "met", DP.tz, -100}, 358 { "mewt", DP.tz, -100}, 359 { "swt", DP.tz, -100}, 360 { "n", DP.tz, -100}, 361 { "eet", DP.tz, -200}, 362 { "o", DP.tz, -200}, 363 { "bt", DP.tz, -300}, 364 { "p", DP.tz, -300}, 365 { "zp4", DP.tz, -400}, 366 { "q", DP.tz, -400}, 367 { "zp5", DP.tz, -500}, 368 { "r", DP.tz, -500}, 369 { "zp6", DP.tz, -600}, 370 { "s", DP.tz, -600}, 371 { "wast", DP.tz, -700}, 372 { "t", DP.tz, -700}, 373 { "cct", DP.tz, -800}, 374 { "u", DP.tz, -800}, 375 { "jst", DP.tz, -900}, 376 { "v", DP.tz, -900}, 377 { "east", DP.tz, -1000}, 378 { "gst", DP.tz, -1000}, 379 { "w", DP.tz, -1000}, 380 { "x", DP.tz, -1100}, 381 { "idle", DP.tz, -1200}, 382 { "nzst", DP.tz, -1200}, 383 { "nzt", DP.tz, -1200}, 384 { "y", DP.tz, -1200}, 385 386 { "bst", DP.dsttz, 000}, 387 { "adt", DP.dsttz, +400}, 388 { "edt", DP.dsttz, +500}, 389 { "cdt", DP.dsttz, +600}, 390 { "mdt", DP.dsttz, +700}, 391 { "pdt", DP.dsttz, +800}, 392 { "ydt", DP.dsttz, +900}, 393 { "hdt", DP.dsttz, +1000}, 394 { "mest", DP.dsttz, -100}, 395 { "mesz", DP.dsttz, -100}, 396 { "sst", DP.dsttz, -100}, 397 { "fst", DP.dsttz, -100}, 398 { "wadt", DP.dsttz, -700}, 399 { "eadt", DP.dsttz, -1000}, 400 { "nzdt", DP.dsttz, -1200}, 401 402 { "dst", DP.dst, 0}, 403 ]; 404 405 //message(DTEXT("DateParse::classify('%s')\n"), buf); 406 407 // Do a linear search. Yes, it would be faster with a binary 408 // one. 409 for (uint i = 0; i < dateidtab.length; i++) 410 { 411 if (cmp(dateidtab[i].name, buf) == 0) 412 { 413 number = dateidtab[i].value; 414 return dateidtab[i].tok; 415 } 416 } 417 return DP.err; 418 } 419 420 int parseString(string s) 421 { 422 int n1; 423 int dp; 424 int sisave; 425 int result; 426 427 //message(DTEXT("DateParse::parseString('%ls')\n"), s); 428 this.s = s; 429 si = 0; 430 dp = nextToken(); 431 for (;;) 432 { 433 //message(DTEXT("\tdp = %d\n"), dp); 434 switch (dp) 435 { 436 case DP.end: 437 result = 1; 438 Lret: 439 return result; 440 441 case DP.err: 442 case_error: 443 //message(DTEXT("\terror\n")); 444 default: 445 result = 0; 446 goto Lret; 447 448 case DP.minus: 449 break; // ignore spurious '-' 450 451 case DP.weekday: 452 weekday = number; 453 break; 454 455 case DP.month: // month day, [year] 456 month = number; 457 dp = nextToken(); 458 if (dp == DP.number) 459 { 460 day = number; 461 sisave = si; 462 dp = nextToken(); 463 if (dp == DP.number) 464 { 465 n1 = number; 466 dp = nextToken(); 467 if (dp == DP.colon) 468 { // back up, not a year 469 si = sisave; 470 } 471 else 472 { year = n1; 473 continue; 474 } 475 break; 476 } 477 } 478 continue; 479 480 case DP.number: 481 n1 = number; 482 dp = nextToken(); 483 switch (dp) 484 { 485 case DP.end: 486 year = n1; 487 break; 488 489 case DP.minus: 490 case DP.slash: // n1/ ? ? ? 491 dp = parseCalendarDate(n1); 492 if (dp == DP.err) 493 goto case_error; 494 break; 495 496 case DP.colon: // hh:mm [:ss] [am | pm] 497 dp = parseTimeOfDay(n1); 498 if (dp == DP.err) 499 goto case_error; 500 break; 501 502 case DP.ampm: 503 hours = n1; 504 minutes = 0; 505 seconds = 0; 506 ampm = number; 507 break; 508 509 case DP.month: 510 day = n1; 511 month = number; 512 dp = nextToken(); 513 if (dp == DP.number) 514 { // day month year 515 year = number; 516 dp = nextToken(); 517 } 518 break; 519 520 default: 521 year = n1; 522 break; 523 } 524 continue; 525 } 526 dp = nextToken(); 527 } 528 // @@@ bug in the compiler: this is never reachable 529 assert(0); 530 } 531 532 int parseCalendarDate(int n1) 533 { 534 int n2; 535 int n3; 536 int dp; 537 538 debug(dateparse) printf("DateParse.parseCalendarDate(%d)\n", n1); 539 dp = nextToken(); 540 if (dp == DP.month) // day/month 541 { 542 day = n1; 543 month = number; 544 dp = nextToken(); 545 if (dp == DP.number) 546 { // day/month year 547 year = number; 548 dp = nextToken(); 549 } 550 else if (dp == DP.minus || dp == DP.slash) 551 { // day/month/year 552 dp = nextToken(); 553 if (dp != DP.number) 554 goto case_error; 555 year = number; 556 dp = nextToken(); 557 } 558 return dp; 559 } 560 if (dp != DP.number) 561 goto case_error; 562 n2 = number; 563 //message(DTEXT("\tn2 = %d\n"), n2); 564 dp = nextToken(); 565 if (dp == DP.minus || dp == DP.slash) 566 { 567 dp = nextToken(); 568 if (dp != DP.number) 569 goto case_error; 570 n3 = number; 571 //message(DTEXT("\tn3 = %d\n"), n3); 572 dp = nextToken(); 573 574 // case1: year/month/day 575 // case2: month/day/year 576 int case1, case2; 577 578 case1 = (n1 > 12 || 579 (n2 >= 1 && n2 <= 12) && 580 (n3 >= 1 && n3 <= 31)); 581 case2 = ((n1 >= 1 && n1 <= 12) && 582 (n2 >= 1 && n2 <= 31) || 583 n3 > 31); 584 if (case1 == case2) 585 goto case_error; 586 if (case1) 587 { 588 year = n1; 589 month = n2; 590 day = n3; 591 } 592 else 593 { 594 month = n1; 595 day = n2; 596 year = n3; 597 } 598 } 599 else 600 { // must be month/day 601 month = n1; 602 day = n2; 603 } 604 return dp; 605 606 case_error: 607 return DP.err; 608 } 609 610 int parseTimeOfDay(int n1) 611 { 612 int dp; 613 int sign; 614 615 // 12am is midnight 616 // 12pm is noon 617 618 //message(DTEXT("DateParse::parseTimeOfDay(%d)\n"), n1); 619 hours = n1; 620 dp = nextToken(); 621 if (dp != DP.number) 622 goto case_error; 623 minutes = number; 624 dp = nextToken(); 625 if (dp == DP.colon) 626 { 627 dp = nextToken(); 628 if (dp != DP.number) 629 goto case_error; 630 seconds = number; 631 dp = nextToken(); 632 } 633 else 634 seconds = 0; 635 636 if (dp == DP.ampm) 637 { 638 ampm = number; 639 dp = nextToken(); 640 } 641 else if (dp == DP.plus || dp == DP.minus) 642 { 643 Loffset: 644 sign = (dp == DP.minus) ? -1 : 1; 645 dp = nextToken(); 646 if (dp != DP.number) 647 goto case_error; 648 tzcorrection = -sign * number; 649 dp = nextToken(); 650 } 651 else if (dp == DP.tz) 652 { 653 tzcorrection = number; 654 dp = nextToken(); 655 if (number == 0 && (dp == DP.plus || dp == DP.minus)) 656 goto Loffset; 657 if (dp == DP.dst) 658 { tzcorrection += 100; 659 dp = nextToken(); 660 } 661 } 662 else if (dp == DP.dsttz) 663 { 664 tzcorrection = number; 665 dp = nextToken(); 666 } 667 668 return dp; 669 670 case_error: 671 return DP.err; 672 } 673 674 } 675 676 unittest 677 { 678 DateParse dp; 679 Date d; 680 681 dp.parse("March 10, 1959 12:00 -800", d); 682 assert(d.year == 1959); 683 assert(d.month == 3); 684 assert(d.day == 10); 685 assert(d.hour == 12); 686 assert(d.minute == 0); 687 assert(d.second == 0); 688 assert(d.ms == 0); 689 assert(d.weekday == 0); 690 assert(d.tzcorrection == 800); 691 692 dp.parse("Tue Apr 02 02:04:57 GMT-0800 1996", d); 693 assert(d.year == 1996); 694 assert(d.month == 4); 695 assert(d.day == 2); 696 assert(d.hour == 2); 697 assert(d.minute == 4); 698 assert(d.second == 57); 699 assert(d.ms == 0); 700 assert(d.weekday == 3); 701 assert(d.tzcorrection == 800); 702 703 dp.parse("March 14, -1980 21:14:50", d); 704 assert(d.year == 1980); 705 assert(d.month == 3); 706 assert(d.day == 14); 707 assert(d.hour == 21); 708 assert(d.minute == 14); 709 assert(d.second == 50); 710 assert(d.ms == 0); 711 assert(d.weekday == 0); 712 assert(d.tzcorrection == int.min); 713 714 dp.parse("Tue Apr 02 02:04:57 1996", d); 715 assert(d.year == 1996); 716 assert(d.month == 4); 717 assert(d.day == 2); 718 assert(d.hour == 2); 719 assert(d.minute == 4); 720 assert(d.second == 57); 721 assert(d.ms == 0); 722 assert(d.weekday == 3); 723 assert(d.tzcorrection == int.min); 724 725 dp.parse("Tue, 02 Apr 1996 02:04:57 G.M.T.", d); 726 assert(d.year == 1996); 727 assert(d.month == 4); 728 assert(d.day == 2); 729 assert(d.hour == 2); 730 assert(d.minute == 4); 731 assert(d.second == 57); 732 assert(d.ms == 0); 733 assert(d.weekday == 3); 734 assert(d.tzcorrection == 0); 735 736 dp.parse("December 31, 3000", d); 737 assert(d.year == 3000); 738 assert(d.month == 12); 739 assert(d.day == 31); 740 assert(d.hour == 0); 741 assert(d.minute == 0); 742 assert(d.second == 0); 743 assert(d.ms == 0); 744 assert(d.weekday == 0); 745 assert(d.tzcorrection == int.min); 746 747 dp.parse("Wed, 31 Dec 1969 16:00:00 GMT", d); 748 assert(d.year == 1969); 749 assert(d.month == 12); 750 assert(d.day == 31); 751 assert(d.hour == 16); 752 assert(d.minute == 0); 753 assert(d.second == 0); 754 assert(d.ms == 0); 755 assert(d.weekday == 4); 756 assert(d.tzcorrection == 0); 757 758 dp.parse("1/1/1999 12:30 AM", d); 759 assert(d.year == 1999); 760 assert(d.month == 1); 761 assert(d.day == 1); 762 assert(d.hour == 0); 763 assert(d.minute == 30); 764 assert(d.second == 0); 765 assert(d.ms == 0); 766 assert(d.weekday == 0); 767 assert(d.tzcorrection == int.min); 768 769 dp.parse("Tue, 20 May 2003 15:38:58 +0530", d); 770 assert(d.year == 2003); 771 assert(d.month == 5); 772 assert(d.day == 20); 773 assert(d.hour == 15); 774 assert(d.minute == 38); 775 assert(d.second == 58); 776 assert(d.ms == 0); 777 assert(d.weekday == 3); 778 assert(d.tzcorrection == -530); 779 780 debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n", 781 d.year, d.month, d.day, 782 d.hour, d.minute, d.second, d.ms, 783 d.weekday, d.tzcorrection); 784 }