]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #ifndef DATE_TIME_TZ_DB_BASE_HPP__ |
2 | #define DATE_TIME_TZ_DB_BASE_HPP__ | |
3 | ||
4 | /* Copyright (c) 2003-2005 CrystalClear Software, Inc. | |
5 | * Subject to the Boost Software License, Version 1.0. | |
6 | * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) | |
7 | * Author: Jeff Garland, Bart Garst | |
8 | * $Date$ | |
9 | */ | |
10 | ||
11 | #include <map> | |
12 | #include <vector> | |
13 | #include <string> | |
14 | #include <sstream> | |
15 | #include <fstream> | |
16 | #include <stdexcept> | |
17 | #include <boost/tokenizer.hpp> | |
18 | #include <boost/shared_ptr.hpp> | |
19 | #include <boost/throw_exception.hpp> | |
20 | #include <boost/date_time/compiler_config.hpp> | |
21 | #include <boost/date_time/time_zone_names.hpp> | |
22 | #include <boost/date_time/time_zone_base.hpp> | |
23 | #include <boost/date_time/time_parsing.hpp> | |
24 | ||
25 | namespace boost { | |
26 | namespace date_time { | |
27 | ||
28 | //! Exception thrown when tz database cannot locate requested data file | |
29 | class data_not_accessible : public std::logic_error | |
30 | { | |
31 | public: | |
32 | data_not_accessible() : | |
33 | std::logic_error(std::string("Unable to locate or access the required datafile.")) | |
34 | {} | |
35 | data_not_accessible(const std::string& filespec) : | |
36 | std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) | |
37 | {} | |
38 | }; | |
39 | ||
40 | //! Exception thrown when tz database locates incorrect field structure in data file | |
41 | class bad_field_count : public std::out_of_range | |
42 | { | |
43 | public: | |
44 | bad_field_count(const std::string& s) : | |
45 | std::out_of_range(s) | |
46 | {} | |
47 | }; | |
48 | ||
49 | //! Creates a database of time_zones from csv datafile | |
50 | /*! The csv file containing the zone_specs used by the | |
51 | * tz_db_base is intended to be customized by the | |
52 | * library user. When customizing this file (or creating your own) the | |
53 | * file must follow a specific format. | |
54 | * | |
55 | * This first line is expected to contain column headings and is therefore | |
56 | * not processed by the tz_db_base. | |
57 | * | |
58 | * Each record (line) must have eleven fields. Some of those fields can | |
59 | * be empty. Every field (even empty ones) must be enclosed in | |
60 | * double-quotes. | |
61 | * Ex: | |
62 | * @code | |
63 | * "America/Phoenix" <- string enclosed in quotes | |
64 | * "" <- empty field | |
65 | * @endcode | |
66 | * | |
67 | * Some fields represent a length of time. The format of these fields | |
68 | * must be: | |
69 | * @code | |
70 | * "{+|-}hh:mm[:ss]" <- length-of-time format | |
71 | * @endcode | |
72 | * Where the plus or minus is mandatory and the seconds are optional. | |
73 | * | |
74 | * Since some time zones do not use daylight savings it is not always | |
75 | * necessary for every field in a zone_spec to contain a value. All | |
76 | * zone_specs must have at least ID and GMT offset. Zones that use | |
77 | * daylight savings must have all fields filled except: | |
78 | * STD ABBR, STD NAME, DST NAME. You should take note | |
79 | * that DST ABBR is mandatory for zones that use daylight savings | |
80 | * (see field descriptions for further details). | |
81 | * | |
82 | * ******* Fields and their description/details ********* | |
83 | * | |
84 | * ID: | |
85 | * Contains the identifying string for the zone_spec. Any string will | |
86 | * do as long as it's unique. No two ID's can be the same. | |
87 | * | |
88 | * STD ABBR: | |
89 | * STD NAME: | |
90 | * DST ABBR: | |
91 | * DST NAME: | |
92 | * These four are all the names and abbreviations used by the time | |
93 | * zone being described. While any string will do in these fields, | |
94 | * care should be taken. These fields hold the strings that will be | |
95 | * used in the output of many of the local_time classes. | |
96 | * Ex: | |
97 | * @code | |
98 | * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); | |
99 | * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); | |
100 | * cout << ny_time.to_long_string() << endl; | |
101 | * // 2004-Aug-30 00:00:00 Eastern Daylight Time | |
102 | * cout << ny_time.to_short_string() << endl; | |
103 | * // 2004-Aug-30 00:00:00 EDT | |
104 | * @endcode | |
105 | * | |
106 | * NOTE: The exact format/function names may vary - see local_time | |
107 | * documentation for further details. | |
108 | * | |
109 | * GMT offset: | |
110 | * This is the number of hours added to utc to get the local time | |
111 | * before any daylight savings adjustments are made. Some examples | |
112 | * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. | |
113 | * The format must follow the length-of-time format described above. | |
114 | * | |
115 | * DST adjustment: | |
116 | * The amount of time added to gmt_offset when daylight savings is in | |
117 | * effect. The format must follow the length-of-time format described | |
118 | * above. | |
119 | * | |
120 | * DST Start Date rule: | |
121 | * This is a specially formatted string that describes the day of year | |
122 | * in which the transition take place. It holds three fields of it's own, | |
123 | * separated by semicolons. | |
124 | * The first field indicates the "nth" weekday of the month. The possible | |
125 | * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), | |
126 | * and -1 (last). | |
127 | * The second field indicates the day-of-week from 0-6 (Sun=0). | |
128 | * The third field indicates the month from 1-12 (Jan=1). | |
129 | * | |
130 | * Examples are: "-1;5;9"="Last Friday of September", | |
131 | * "2;1;3"="Second Monday of March" | |
132 | * | |
133 | * Start time: | |
134 | * Start time is the number of hours past midnight, on the day of the | |
135 | * start transition, the transition takes place. More simply put, the | |
136 | * time of day the transition is made (in 24 hours format). The format | |
137 | * must follow the length-of-time format described above with the | |
138 | * exception that it must always be positive. | |
139 | * | |
140 | * DST End date rule: | |
141 | * See DST Start date rule. The difference here is this is the day | |
142 | * daylight savings ends (transition to STD). | |
143 | * | |
144 | * End time: | |
145 | * Same as Start time. | |
146 | */ | |
147 | template<class time_zone_type, class rule_type> | |
148 | class tz_db_base { | |
149 | public: | |
150 | /* Having CharT as a template parameter created problems | |
151 | * with posix_time::duration_from_string. Templatizing | |
152 | * duration_from_string was not possible at this time, however, | |
153 | * it should be possible in the future (when poor compilers get | |
154 | * fixed or stop being used). | |
155 | * Since this class was designed to use CharT as a parameter it | |
156 | * is simply typedef'd here to ease converting in back to a | |
157 | * parameter the future */ | |
158 | typedef char char_type; | |
159 | ||
160 | typedef typename time_zone_type::base_type time_zone_base_type; | |
161 | typedef typename time_zone_type::time_duration_type time_duration_type; | |
162 | typedef time_zone_names_base<char_type> time_zone_names; | |
163 | typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; | |
164 | typedef std::basic_string<char_type> string_type; | |
165 | ||
166 | //! Constructs an empty database | |
167 | tz_db_base() {} | |
168 | ||
169 | //! Process csv data file, may throw exceptions | |
170 | /*! May throw bad_field_count exceptions */ | |
171 | void load_from_stream(std::istream &in) | |
172 | { | |
173 | std::string buff; | |
174 | while( std::getline(in, buff)) { | |
175 | parse_string(buff); | |
176 | } | |
177 | } | |
178 | ||
179 | //! Process csv data file, may throw exceptions | |
180 | /*! May throw data_not_accessible, or bad_field_count exceptions */ | |
181 | void load_from_file(const std::string& pathspec) | |
182 | { | |
183 | std::string buff; | |
184 | ||
185 | std::ifstream ifs(pathspec.c_str()); | |
186 | if(!ifs){ | |
187 | boost::throw_exception(data_not_accessible(pathspec)); | |
188 | } | |
189 | std::getline(ifs, buff); // first line is column headings | |
190 | this->load_from_stream(ifs); | |
191 | } | |
192 | ||
193 | //! returns true if record successfully added to map | |
194 | /*! Takes a region name in the form of "America/Phoenix", and a | |
195 | * time_zone object for that region. The id string must be a unique | |
196 | * name that does not already exist in the database. */ | |
197 | bool add_record(const string_type& region, | |
198 | boost::shared_ptr<time_zone_base_type> tz) | |
199 | { | |
200 | typename map_type::value_type p(region, tz); | |
201 | return (m_zone_map.insert(p)).second; | |
202 | } | |
203 | ||
204 | //! Returns a time_zone object built from the specs for the given region | |
205 | /*! Returns a time_zone object built from the specs for the given | |
206 | * region. If region does not exist a local_time::record_not_found | |
207 | * exception will be thrown */ | |
208 | boost::shared_ptr<time_zone_base_type> | |
209 | time_zone_from_region(const string_type& region) const | |
210 | { | |
211 | // get the record | |
212 | typename map_type::const_iterator record = m_zone_map.find(region); | |
213 | if(record == m_zone_map.end()){ | |
214 | return boost::shared_ptr<time_zone_base_type>(); //null pointer | |
215 | } | |
216 | return record->second; | |
217 | } | |
218 | ||
219 | //! Returns a vector of strings holding the time zone regions in the database | |
220 | std::vector<std::string> region_list() const | |
221 | { | |
222 | typedef std::vector<std::string> vector_type; | |
223 | vector_type regions; | |
224 | typename map_type::const_iterator itr = m_zone_map.begin(); | |
225 | while(itr != m_zone_map.end()) { | |
226 | regions.push_back(itr->first); | |
227 | ++itr; | |
228 | } | |
229 | return regions; | |
230 | } | |
231 | ||
232 | private: | |
233 | typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; | |
234 | map_type m_zone_map; | |
235 | ||
236 | // start and end rule are of the same type | |
237 | typedef typename rule_type::start_rule::week_num week_num; | |
238 | ||
239 | /* TODO: mechanisms need to be put in place to handle different | |
240 | * types of rule specs. parse_rules() only handles nth_kday | |
241 | * rule types. */ | |
242 | ||
243 | //! parses rule specs for transition day rules | |
244 | rule_type* parse_rules(const string_type& sr, const string_type& er) const | |
245 | { | |
246 | using namespace gregorian; | |
247 | // start and end rule are of the same type, | |
248 | // both are included here for readability | |
249 | typedef typename rule_type::start_rule start_rule; | |
250 | typedef typename rule_type::end_rule end_rule; | |
251 | ||
252 | // these are: [start|end] nth, day, month | |
253 | int s_nth = 0, s_d = 0, s_m = 0; | |
254 | int e_nth = 0, e_d = 0, e_m = 0; | |
255 | split_rule_spec(s_nth, s_d, s_m, sr); | |
256 | split_rule_spec(e_nth, e_d, e_m, er); | |
257 | ||
258 | typename start_rule::week_num s_wn, e_wn; | |
259 | s_wn = get_week_num(s_nth); | |
260 | e_wn = get_week_num(e_nth); | |
261 | ||
262 | ||
263 | return new rule_type(start_rule(s_wn, | |
264 | static_cast<unsigned short>(s_d), | |
265 | static_cast<unsigned short>(s_m)), | |
266 | end_rule(e_wn, | |
267 | static_cast<unsigned short>(e_d), | |
268 | static_cast<unsigned short>(e_m))); | |
269 | } | |
270 | //! helper function for parse_rules() | |
271 | week_num get_week_num(int nth) const | |
272 | { | |
273 | typedef typename rule_type::start_rule start_rule; | |
274 | switch(nth){ | |
275 | case 1: | |
276 | return start_rule::first; | |
277 | case 2: | |
278 | return start_rule::second; | |
279 | case 3: | |
280 | return start_rule::third; | |
281 | case 4: | |
282 | return start_rule::fourth; | |
283 | case 5: | |
284 | case -1: | |
285 | return start_rule::fifth; | |
286 | default: | |
287 | // shouldn't get here - add error handling later | |
288 | break; | |
289 | } | |
290 | return start_rule::fifth; // silence warnings | |
291 | } | |
292 | ||
293 | //! splits the [start|end]_date_rule string into 3 ints | |
294 | void split_rule_spec(int& nth, int& d, int& m, string_type rule) const | |
295 | { | |
296 | typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; | |
297 | typedef boost::tokenizer<char_separator_type, | |
298 | std::basic_string<char_type>::const_iterator, | |
299 | std::basic_string<char_type> > tokenizer; | |
300 | typedef boost::tokenizer<char_separator_type, | |
301 | std::basic_string<char_type>::const_iterator, | |
302 | std::basic_string<char_type> >::iterator tokenizer_iterator; | |
303 | ||
304 | const char_type sep_char[] = { ';', '\0'}; | |
305 | char_separator_type sep(sep_char); | |
306 | tokenizer tokens(rule, sep); // 3 fields | |
307 | ||
308 | if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) { | |
309 | std::ostringstream msg; | |
310 | msg << "Expecting 3 fields, got " | |
311 | << std::distance ( tokens.begin(), tokens.end ()) | |
312 | << " fields in line: " << rule; | |
313 | boost::throw_exception(bad_field_count(msg.str())); | |
314 | } | |
315 | ||
316 | tokenizer_iterator tok_iter = tokens.begin(); | |
317 | nth = std::atoi(tok_iter->c_str()); ++tok_iter; | |
318 | d = std::atoi(tok_iter->c_str()); ++tok_iter; | |
319 | m = std::atoi(tok_iter->c_str()); | |
320 | } | |
321 | ||
322 | ||
323 | //! Take a line from the csv, turn it into a time_zone_type. | |
324 | /*! Take a line from the csv, turn it into a time_zone_type, | |
325 | * and add it to the map. Zone_specs in csv file are expected to | |
326 | * have eleven fields that describe the time zone. Returns true if | |
327 | * zone_spec successfully added to database */ | |
328 | bool parse_string(string_type& s) | |
329 | { | |
330 | std::vector<string_type> result; | |
331 | typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; | |
332 | ||
333 | token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); | |
334 | ||
335 | token_iter_type end; | |
336 | while (i != end) { | |
337 | result.push_back(*i); | |
338 | i++; | |
339 | } | |
340 | ||
341 | enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, | |
342 | DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, | |
343 | END_TIME, FIELD_COUNT }; | |
344 | ||
345 | //take a shot at fixing gcc 4.x error | |
346 | const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); | |
347 | if (result.size() != expected_fields) { | |
348 | std::ostringstream msg; | |
349 | msg << "Expecting " << FIELD_COUNT << " fields, got " | |
350 | << result.size() << " fields in line: " << s; | |
351 | boost::throw_exception(bad_field_count(msg.str())); | |
352 | BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach | |
353 | } | |
354 | ||
355 | // initializations | |
356 | bool has_dst = true; | |
357 | if(result[DSTABBR] == std::string()){ | |
358 | has_dst = false; | |
359 | } | |
360 | ||
361 | ||
362 | // start building components of a time_zone | |
363 | time_zone_names names(result[STDNAME], result[STDABBR], | |
364 | result[DSTNAME], result[DSTABBR]); | |
365 | ||
366 | time_duration_type utc_offset = | |
367 | str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); | |
368 | ||
369 | dst_adjustment_offsets adjust(time_duration_type(0,0,0), | |
370 | time_duration_type(0,0,0), | |
371 | time_duration_type(0,0,0)); | |
372 | ||
373 | boost::shared_ptr<rule_type> rules; | |
374 | ||
375 | if(has_dst){ | |
376 | adjust = dst_adjustment_offsets( | |
377 | str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), | |
378 | str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), | |
379 | str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) | |
380 | ); | |
381 | ||
382 | rules = | |
383 | boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], | |
384 | result[END_DATE_RULE])); | |
385 | } | |
386 | string_type id(result[ID]); | |
387 | boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); | |
388 | return (add_record(id, zone)); | |
389 | ||
390 | } | |
391 | ||
392 | }; | |
393 | ||
394 | } } // namespace | |
395 | ||
396 | #endif // DATE_TIME_TZ_DB_BASE_HPP__ |