2 #ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__
3 #define DATE_TIME_FORMAT_DATE_PARSER_HPP__
5 /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
6 * Use, modification and distribution is subject to the
7 * Boost Software License, Version 1.0. (See accompanying
8 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
9 * Author: Jeff Garland, Bart Garst
14 #include "boost/lexical_cast.hpp"
15 #include "boost/date_time/string_parse_tree.hpp"
16 #include "boost/date_time/strings_from_facet.hpp"
17 #include "boost/date_time/special_values_parser.hpp"
22 #ifndef BOOST_NO_STDC_NAMESPACE
28 #ifdef BOOST_NO_STDC_NAMESPACE
34 namespace boost { namespace date_time {
36 //! Helper function for parsing fixed length strings into integers
37 /*! Will consume 'length' number of characters from stream. Consumed
38 * character are transfered to parse_match_result struct.
39 * Returns '-1' if no number can be parsed or incorrect number of
40 * digits in stream. */
41 template<typename int_type, typename charT>
44 fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
45 std::istreambuf_iterator<charT>& stream_end,
46 parse_match_result<charT>& mr,
48 const charT& fill_char)
50 //typedef std::basic_string<charT> string_type;
53 while (j < length && itr != stream_end &&
54 (std::isdigit(*itr) || *itr == fill_char)) {
55 if(*itr == fill_char) {
56 /* Since a fill_char can be anything, we convert it to a zero.
57 * lexical_cast will behave predictably when zero is used as fill. */
66 int_type i = static_cast<int_type>(-1);
67 // mr.cache will hold leading zeros. size() tells us when input is too short.
68 if(mr.cache.size() < length) {
72 i = boost::lexical_cast<int_type>(mr.cache);
73 }catch(bad_lexical_cast&){
74 // we want to return -1 if the cast fails so nothing to do here
79 //! Helper function for parsing fixed length strings into integers
80 /*! Will consume 'length' number of characters from stream. Consumed
81 * character are transfered to parse_match_result struct.
82 * Returns '-1' if no number can be parsed or incorrect number of
83 * digits in stream. */
84 template<typename int_type, typename charT>
87 fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
88 std::istreambuf_iterator<charT>& stream_end,
89 parse_match_result<charT>& mr,
92 return fixed_string_to_int<int_type, charT>(itr, stream_end, mr, length, '0');
95 //! Helper function for parsing varied length strings into integers
96 /*! Will consume 'max_length' characters from stream only if those
97 * characters are digits. Returns '-1' if no number can be parsed.
98 * Will not parse a number preceeded by a '+' or '-'. */
99 template<typename int_type, typename charT>
102 var_string_to_int(std::istreambuf_iterator<charT>& itr,
103 const std::istreambuf_iterator<charT>& stream_end,
104 unsigned int max_length)
106 typedef std::basic_string<charT> string_type;
109 while (itr != stream_end && (j < max_length) && std::isdigit(*itr)) {
114 int_type i = static_cast<int_type>(-1);
116 i = boost::lexical_cast<int_type>(s);
122 //! Class with generic date parsing using a format string
123 /*! The following is the set of recognized format specifiers
124 - %a - Short weekday name
125 - %A - Long weekday name
126 - %b - Abbreviated month name
127 - %B - Full month name
128 - %d - Day of the month as decimal 01 to 31
129 - %j - Day of year as decimal from 001 to 366
130 - %m - Month name as a decimal 01 to 12
131 - %U - Week number 00 to 53 with first Sunday as the first day of week 1?
132 - %w - Weekday as decimal number 0 to 6 where Sunday == 0
133 - %W - Week number 00 to 53 where Monday is first day of week 1
134 - %x - facet default date representation
135 - %y - Year without the century - eg: 04 for 2004
136 - %Y - Year with century
138 The weekday specifiers (%a and %A) do not add to the date construction,
139 but they provide a way to skip over the weekday names for formats that
142 todo -- Another interesting feature that this approach could provide is
143 an option to fill in any missing fields with the current values
144 from the clock. So if you have %m-%d the parser would detect
145 the missing year value and fill it in using the clock.
147 todo -- What to do with the %x. %x in the classic facet is just bad...
150 template<class date_type, typename charT>
151 class format_date_parser
154 typedef std::basic_string<charT> string_type;
155 typedef std::basic_istringstream<charT> stringstream_type;
156 typedef std::istreambuf_iterator<charT> stream_itr_type;
157 typedef typename string_type::const_iterator const_itr;
158 typedef typename date_type::year_type year_type;
159 typedef typename date_type::month_type month_type;
160 typedef typename date_type::day_type day_type;
161 typedef typename date_type::duration_type duration_type;
162 typedef typename date_type::day_of_week_type day_of_week_type;
163 typedef typename date_type::day_of_year_type day_of_year_type;
164 typedef string_parse_tree<charT> parse_tree_type;
165 typedef typename parse_tree_type::parse_match_result_type match_results;
166 typedef std::vector<std::basic_string<charT> > input_collection_type;
168 // TODO sv_parser uses its default constructor - write the others
170 format_date_parser(const string_type& format_str,
171 const input_collection_type& month_short_names,
172 const input_collection_type& month_long_names,
173 const input_collection_type& weekday_short_names,
174 const input_collection_type& weekday_long_names) :
175 m_format(format_str),
176 m_month_short_names(month_short_names, 1),
177 m_month_long_names(month_long_names, 1),
178 m_weekday_short_names(weekday_short_names),
179 m_weekday_long_names(weekday_long_names)
182 format_date_parser(const string_type& format_str,
183 const std::locale& locale) :
184 m_format(format_str),
185 m_month_short_names(gather_month_strings<charT>(locale), 1),
186 m_month_long_names(gather_month_strings<charT>(locale, false), 1),
187 m_weekday_short_names(gather_weekday_strings<charT>(locale)),
188 m_weekday_long_names(gather_weekday_strings<charT>(locale, false))
191 format_date_parser(const format_date_parser<date_type,charT>& fdp)
193 this->m_format = fdp.m_format;
194 this->m_month_short_names = fdp.m_month_short_names;
195 this->m_month_long_names = fdp.m_month_long_names;
196 this->m_weekday_short_names = fdp.m_weekday_short_names;
197 this->m_weekday_long_names = fdp.m_weekday_long_names;
200 string_type format() const
205 void format(string_type format_str)
207 m_format = format_str;
210 void short_month_names(const input_collection_type& month_names)
212 m_month_short_names = parse_tree_type(month_names, 1);
214 void long_month_names(const input_collection_type& month_names)
216 m_month_long_names = parse_tree_type(month_names, 1);
218 void short_weekday_names(const input_collection_type& weekday_names)
220 m_weekday_short_names = parse_tree_type(weekday_names);
222 void long_weekday_names(const input_collection_type& weekday_names)
224 m_weekday_long_names = parse_tree_type(weekday_names);
228 parse_date(const string_type& value,
229 const string_type& format_str,
230 const special_values_parser<date_type,charT>& sv_parser) const
232 stringstream_type ss(value);
233 stream_itr_type sitr(ss);
234 stream_itr_type stream_end;
235 return parse_date(sitr, stream_end, format_str, sv_parser);
239 parse_date(std::istreambuf_iterator<charT>& sitr,
240 std::istreambuf_iterator<charT>& stream_end,
241 const special_values_parser<date_type,charT>& sv_parser) const
243 return parse_date(sitr, stream_end, m_format, sv_parser);
246 /*! Of all the objects that the format_date_parser can parse, only a
247 * date can be a special value. Therefore, only parse_date checks
248 * for special_values. */
250 parse_date(std::istreambuf_iterator<charT>& sitr,
251 std::istreambuf_iterator<charT>& stream_end,
252 string_type format_str,
253 const special_values_parser<date_type,charT>& sv_parser) const
255 bool use_current_char = false;
257 // skip leading whitespace
258 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
260 short year(0), month(0), day(0), day_of_year(0);// wkday(0);
261 /* Initialized the following to their minimum values. These intermediate
262 * objects are used so we get specific exceptions when part of the input
264 * Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/
265 year_type t_year(1400);
266 month_type t_month(1);
268 day_of_week_type wkday(0);
271 const_itr itr(format_str.begin());
272 while (itr != format_str.end() && (sitr != stream_end)) {
274 if ( ++itr == format_str.end())
280 //this value is just throw away. It could be used for
281 //error checking potentially, but it isn't helpful in
282 //actually constructing the date - we just need to get it
284 match_results mr = m_weekday_short_names.match(sitr, stream_end);
285 if(mr.current_match == match_results::PARSE_ERROR) {
286 // check special_values
287 if(sv_parser.match(sitr, stream_end, mr)) {
288 return date_type(static_cast<special_values>(mr.current_match));
291 wkday = mr.current_match;
292 if (mr.has_remaining()) {
293 use_current_char = true;
299 //this value is just throw away. It could be used for
300 //error checking potentially, but it isn't helpful in
301 //actually constructing the date - we just need to get it
303 match_results mr = m_weekday_long_names.match(sitr, stream_end);
304 if(mr.current_match == match_results::PARSE_ERROR) {
305 // check special_values
306 if(sv_parser.match(sitr, stream_end, mr)) {
307 return date_type(static_cast<special_values>(mr.current_match));
310 wkday = mr.current_match;
311 if (mr.has_remaining()) {
312 use_current_char = true;
318 match_results mr = m_month_short_names.match(sitr, stream_end);
319 if(mr.current_match == match_results::PARSE_ERROR) {
320 // check special_values
321 if(sv_parser.match(sitr, stream_end, mr)) {
322 return date_type(static_cast<special_values>(mr.current_match));
325 t_month = month_type(mr.current_match);
326 if (mr.has_remaining()) {
327 use_current_char = true;
333 match_results mr = m_month_long_names.match(sitr, stream_end);
334 if(mr.current_match == match_results::PARSE_ERROR) {
335 // check special_values
336 if(sv_parser.match(sitr, stream_end, mr)) {
337 return date_type(static_cast<special_values>(mr.current_match));
340 t_month = month_type(mr.current_match);
341 if (mr.has_remaining()) {
342 use_current_char = true;
349 day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
351 if(sv_parser.match(sitr, stream_end, mr)) {
352 return date_type(static_cast<special_values>(mr.current_match));
355 t_day = day_type(day);
361 day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2, ' ');
363 if(sv_parser.match(sitr, stream_end, mr)) {
364 return date_type(static_cast<special_values>(mr.current_match));
367 t_day = day_type(day);
373 day_of_year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 3);
374 if(day_of_year == -1) {
375 if(sv_parser.match(sitr, stream_end, mr)) {
376 return date_type(static_cast<special_values>(mr.current_match));
379 // these next two lines are so we get an exception with bad input
380 day_of_year_type t_day_of_year(1);
381 t_day_of_year = day_of_year_type(day_of_year);
387 month = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
389 if(sv_parser.match(sitr, stream_end, mr)) {
390 return date_type(static_cast<special_values>(mr.current_match));
393 t_month = month_type(month);
399 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
401 if(sv_parser.match(sitr, stream_end, mr)) {
402 return date_type(static_cast<special_values>(mr.current_match));
405 t_year = year_type(year);
411 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
413 if(sv_parser.match(sitr, stream_end, mr)) {
414 return date_type(static_cast<special_values>(mr.current_match));
417 year += 2000; //make 2 digit years in this century
418 t_year = year_type(year);
422 {} //ignore those we don't understand
427 else { // itr == '%', second consecutive
431 itr++; //advance past format specifier
433 else { //skip past chars in format and in buffer
435 if (use_current_char) {
436 use_current_char = false;
444 if (day_of_year > 0) {
445 date_type d(static_cast<unsigned short>(year-1),12,31); //end of prior year
446 return d + duration_type(day_of_year);
449 return date_type(t_year, t_month, t_day); // exceptions were thrown earlier
450 // if input was no good
453 //! Throws bad_month if unable to parse
455 parse_month(std::istreambuf_iterator<charT>& sitr,
456 std::istreambuf_iterator<charT>& stream_end,
457 string_type format_str) const
460 return parse_month(sitr, stream_end, format_str, mr);
463 //! Throws bad_month if unable to parse
465 parse_month(std::istreambuf_iterator<charT>& sitr,
466 std::istreambuf_iterator<charT>& stream_end,
467 string_type format_str,
468 match_results& mr) const
470 bool use_current_char = false;
472 // skip leading whitespace
473 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
477 const_itr itr(format_str.begin());
478 while (itr != format_str.end() && (sitr != stream_end)) {
480 if ( ++itr == format_str.end())
486 mr = m_month_short_names.match(sitr, stream_end);
487 month = mr.current_match;
488 if (mr.has_remaining()) {
489 use_current_char = true;
495 mr = m_month_long_names.match(sitr, stream_end);
496 month = mr.current_match;
497 if (mr.has_remaining()) {
498 use_current_char = true;
504 month = var_string_to_int<short, charT>(sitr, stream_end, 2);
505 // var_string_to_int returns -1 if parse failed. That will
506 // cause a bad_month exception to be thrown so we do nothing here
510 {} //ignore those we don't understand
515 else { // itr == '%', second consecutive
519 itr++; //advance past format specifier
521 else { //skip past chars in format and in buffer
523 if (use_current_char) {
524 use_current_char = false;
532 return month_type(month); // throws bad_month exception when values are zero
535 //! Expects 1 or 2 digits 1-31. Throws bad_day_of_month if unable to parse
537 parse_var_day_of_month(std::istreambuf_iterator<charT>& sitr,
538 std::istreambuf_iterator<charT>& stream_end) const
540 // skip leading whitespace
541 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
543 return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
545 //! Expects 2 digits 01-31. Throws bad_day_of_month if unable to parse
547 parse_day_of_month(std::istreambuf_iterator<charT>& sitr,
548 std::istreambuf_iterator<charT>& stream_end) const
550 // skip leading whitespace
551 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
553 //return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
555 return day_type(fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2));
559 parse_weekday(std::istreambuf_iterator<charT>& sitr,
560 std::istreambuf_iterator<charT>& stream_end,
561 string_type format_str) const
564 return parse_weekday(sitr, stream_end, format_str, mr);
567 parse_weekday(std::istreambuf_iterator<charT>& sitr,
568 std::istreambuf_iterator<charT>& stream_end,
569 string_type format_str,
570 match_results& mr) const
572 bool use_current_char = false;
574 // skip leading whitespace
575 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
579 const_itr itr(format_str.begin());
580 while (itr != format_str.end() && (sitr != stream_end)) {
582 if ( ++itr == format_str.end())
588 //this value is just throw away. It could be used for
589 //error checking potentially, but it isn't helpful in
590 //actually constructing the date - we just need to get it
592 mr = m_weekday_short_names.match(sitr, stream_end);
593 wkday = mr.current_match;
594 if (mr.has_remaining()) {
595 use_current_char = true;
601 //this value is just throw away. It could be used for
602 //error checking potentially, but it isn't helpful in
603 //actually constructing the date - we just need to get it
605 mr = m_weekday_long_names.match(sitr, stream_end);
606 wkday = mr.current_match;
607 if (mr.has_remaining()) {
608 use_current_char = true;
614 // weekday as number 0-6, Sunday == 0
615 wkday = var_string_to_int<short, charT>(sitr, stream_end, 2);
619 {} //ignore those we don't understand
624 else { // itr == '%', second consecutive
628 itr++; //advance past format specifier
630 else { //skip past chars in format and in buffer
632 if (use_current_char) {
633 use_current_char = false;
641 return day_of_week_type(wkday); // throws bad_day_of_month exception
642 // when values are zero
645 //! throws bad_year if unable to parse
647 parse_year(std::istreambuf_iterator<charT>& sitr,
648 std::istreambuf_iterator<charT>& stream_end,
649 string_type format_str) const
652 return parse_year(sitr, stream_end, format_str, mr);
655 //! throws bad_year if unable to parse
657 parse_year(std::istreambuf_iterator<charT>& sitr,
658 std::istreambuf_iterator<charT>& stream_end,
659 string_type format_str,
660 match_results& mr) const
662 bool use_current_char = false;
664 // skip leading whitespace
665 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
667 unsigned short year(0);
669 const_itr itr(format_str.begin());
670 while (itr != format_str.end() && (sitr != stream_end)) {
672 if ( ++itr == format_str.end())
679 // year from 4 digit string
680 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
685 // year from 2 digit string (no century)
686 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
687 year += 2000; //make 2 digit years in this century
691 {} //ignore those we don't understand
696 else { // itr == '%', second consecutive
700 itr++; //advance past format specifier
702 else { //skip past chars in format and in buffer
704 if (use_current_char) {
705 use_current_char = false;
713 return year_type(year); // throws bad_year exception when values are zero
718 string_type m_format;
719 parse_tree_type m_month_short_names;
720 parse_tree_type m_month_long_names;
721 parse_tree_type m_weekday_short_names;
722 parse_tree_type m_weekday_long_names;