]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #ifndef DATE_TIME_TZ_DB_BASE_HPP__ |
2 | #define DATE_TIME_TZ_DB_BASE_HPP__ | |
3 | ||
4 | /* Copyright (c) 2003-2005 CrystalClear Software, Inc. | |
5 | * Subject to the Boost Software License, Version 1.0. | |
6 | * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) | |
7 | * Author: Jeff Garland, Bart Garst | |
8 | * $Date$ | |
9 | */ | |
10 | ||
11 | #include <map> | |
12 | #include <vector> | |
13 | #include <string> | |
14 | #include <sstream> | |
15 | #include <fstream> | |
16 | #include <stdexcept> | |
17 | #include <boost/tokenizer.hpp> | |
18 | #include <boost/shared_ptr.hpp> | |
19 | #include <boost/throw_exception.hpp> | |
20 | #include <boost/date_time/compiler_config.hpp> | |
21 | #include <boost/date_time/time_zone_names.hpp> | |
22 | #include <boost/date_time/time_zone_base.hpp> | |
23 | #include <boost/date_time/time_parsing.hpp> | |
11fdf7f2 | 24 | #include <boost/algorithm/string.hpp> |
7c673cae FG |
25 | |
26 | namespace boost { | |
27 | namespace date_time { | |
28 | ||
29 | //! Exception thrown when tz database cannot locate requested data file | |
30 | class data_not_accessible : public std::logic_error | |
31 | { | |
32 | public: | |
33 | data_not_accessible() : | |
34 | std::logic_error(std::string("Unable to locate or access the required datafile.")) | |
35 | {} | |
36 | data_not_accessible(const std::string& filespec) : | |
37 | std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) | |
38 | {} | |
39 | }; | |
40 | ||
41 | //! Exception thrown when tz database locates incorrect field structure in data file | |
42 | class bad_field_count : public std::out_of_range | |
43 | { | |
44 | public: | |
45 | bad_field_count(const std::string& s) : | |
46 | std::out_of_range(s) | |
47 | {} | |
48 | }; | |
49 | ||
50 | //! Creates a database of time_zones from csv datafile | |
51 | /*! The csv file containing the zone_specs used by the | |
52 | * tz_db_base is intended to be customized by the | |
53 | * library user. When customizing this file (or creating your own) the | |
54 | * file must follow a specific format. | |
55 | * | |
56 | * This first line is expected to contain column headings and is therefore | |
57 | * not processed by the tz_db_base. | |
58 | * | |
59 | * Each record (line) must have eleven fields. Some of those fields can | |
60 | * be empty. Every field (even empty ones) must be enclosed in | |
61 | * double-quotes. | |
62 | * Ex: | |
63 | * @code | |
64 | * "America/Phoenix" <- string enclosed in quotes | |
65 | * "" <- empty field | |
66 | * @endcode | |
67 | * | |
68 | * Some fields represent a length of time. The format of these fields | |
69 | * must be: | |
70 | * @code | |
71 | * "{+|-}hh:mm[:ss]" <- length-of-time format | |
72 | * @endcode | |
73 | * Where the plus or minus is mandatory and the seconds are optional. | |
74 | * | |
75 | * Since some time zones do not use daylight savings it is not always | |
76 | * necessary for every field in a zone_spec to contain a value. All | |
77 | * zone_specs must have at least ID and GMT offset. Zones that use | |
78 | * daylight savings must have all fields filled except: | |
79 | * STD ABBR, STD NAME, DST NAME. You should take note | |
80 | * that DST ABBR is mandatory for zones that use daylight savings | |
81 | * (see field descriptions for further details). | |
82 | * | |
83 | * ******* Fields and their description/details ********* | |
84 | * | |
85 | * ID: | |
86 | * Contains the identifying string for the zone_spec. Any string will | |
87 | * do as long as it's unique. No two ID's can be the same. | |
88 | * | |
89 | * STD ABBR: | |
90 | * STD NAME: | |
91 | * DST ABBR: | |
92 | * DST NAME: | |
93 | * These four are all the names and abbreviations used by the time | |
94 | * zone being described. While any string will do in these fields, | |
95 | * care should be taken. These fields hold the strings that will be | |
96 | * used in the output of many of the local_time classes. | |
97 | * Ex: | |
98 | * @code | |
99 | * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); | |
100 | * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); | |
101 | * cout << ny_time.to_long_string() << endl; | |
102 | * // 2004-Aug-30 00:00:00 Eastern Daylight Time | |
103 | * cout << ny_time.to_short_string() << endl; | |
104 | * // 2004-Aug-30 00:00:00 EDT | |
105 | * @endcode | |
106 | * | |
107 | * NOTE: The exact format/function names may vary - see local_time | |
108 | * documentation for further details. | |
109 | * | |
110 | * GMT offset: | |
111 | * This is the number of hours added to utc to get the local time | |
112 | * before any daylight savings adjustments are made. Some examples | |
113 | * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. | |
114 | * The format must follow the length-of-time format described above. | |
115 | * | |
116 | * DST adjustment: | |
117 | * The amount of time added to gmt_offset when daylight savings is in | |
118 | * effect. The format must follow the length-of-time format described | |
119 | * above. | |
120 | * | |
121 | * DST Start Date rule: | |
122 | * This is a specially formatted string that describes the day of year | |
123 | * in which the transition take place. It holds three fields of it's own, | |
124 | * separated by semicolons. | |
125 | * The first field indicates the "nth" weekday of the month. The possible | |
126 | * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), | |
127 | * and -1 (last). | |
128 | * The second field indicates the day-of-week from 0-6 (Sun=0). | |
129 | * The third field indicates the month from 1-12 (Jan=1). | |
130 | * | |
131 | * Examples are: "-1;5;9"="Last Friday of September", | |
132 | * "2;1;3"="Second Monday of March" | |
133 | * | |
134 | * Start time: | |
135 | * Start time is the number of hours past midnight, on the day of the | |
136 | * start transition, the transition takes place. More simply put, the | |
137 | * time of day the transition is made (in 24 hours format). The format | |
138 | * must follow the length-of-time format described above with the | |
139 | * exception that it must always be positive. | |
140 | * | |
141 | * DST End date rule: | |
142 | * See DST Start date rule. The difference here is this is the day | |
143 | * daylight savings ends (transition to STD). | |
144 | * | |
145 | * End time: | |
146 | * Same as Start time. | |
147 | */ | |
148 | template<class time_zone_type, class rule_type> | |
149 | class tz_db_base { | |
150 | public: | |
151 | /* Having CharT as a template parameter created problems | |
152 | * with posix_time::duration_from_string. Templatizing | |
153 | * duration_from_string was not possible at this time, however, | |
154 | * it should be possible in the future (when poor compilers get | |
155 | * fixed or stop being used). | |
156 | * Since this class was designed to use CharT as a parameter it | |
157 | * is simply typedef'd here to ease converting in back to a | |
158 | * parameter the future */ | |
159 | typedef char char_type; | |
160 | ||
161 | typedef typename time_zone_type::base_type time_zone_base_type; | |
162 | typedef typename time_zone_type::time_duration_type time_duration_type; | |
163 | typedef time_zone_names_base<char_type> time_zone_names; | |
164 | typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; | |
165 | typedef std::basic_string<char_type> string_type; | |
166 | ||
167 | //! Constructs an empty database | |
168 | tz_db_base() {} | |
169 | ||
170 | //! Process csv data file, may throw exceptions | |
171 | /*! May throw bad_field_count exceptions */ | |
172 | void load_from_stream(std::istream &in) | |
173 | { | |
11fdf7f2 | 174 | std::string buff; |
7c673cae | 175 | while( std::getline(in, buff)) { |
11fdf7f2 | 176 | boost::trim_right(buff); |
7c673cae FG |
177 | parse_string(buff); |
178 | } | |
179 | } | |
180 | ||
181 | //! Process csv data file, may throw exceptions | |
182 | /*! May throw data_not_accessible, or bad_field_count exceptions */ | |
183 | void load_from_file(const std::string& pathspec) | |
184 | { | |
185 | std::string buff; | |
186 | ||
187 | std::ifstream ifs(pathspec.c_str()); | |
188 | if(!ifs){ | |
189 | boost::throw_exception(data_not_accessible(pathspec)); | |
190 | } | |
191 | std::getline(ifs, buff); // first line is column headings | |
192 | this->load_from_stream(ifs); | |
193 | } | |
194 | ||
195 | //! returns true if record successfully added to map | |
196 | /*! Takes a region name in the form of "America/Phoenix", and a | |
197 | * time_zone object for that region. The id string must be a unique | |
198 | * name that does not already exist in the database. */ | |
199 | bool add_record(const string_type& region, | |
200 | boost::shared_ptr<time_zone_base_type> tz) | |
201 | { | |
202 | typename map_type::value_type p(region, tz); | |
203 | return (m_zone_map.insert(p)).second; | |
204 | } | |
205 | ||
206 | //! Returns a time_zone object built from the specs for the given region | |
207 | /*! Returns a time_zone object built from the specs for the given | |
208 | * region. If region does not exist a local_time::record_not_found | |
209 | * exception will be thrown */ | |
210 | boost::shared_ptr<time_zone_base_type> | |
211 | time_zone_from_region(const string_type& region) const | |
212 | { | |
213 | // get the record | |
214 | typename map_type::const_iterator record = m_zone_map.find(region); | |
215 | if(record == m_zone_map.end()){ | |
216 | return boost::shared_ptr<time_zone_base_type>(); //null pointer | |
217 | } | |
218 | return record->second; | |
219 | } | |
220 | ||
221 | //! Returns a vector of strings holding the time zone regions in the database | |
222 | std::vector<std::string> region_list() const | |
223 | { | |
224 | typedef std::vector<std::string> vector_type; | |
225 | vector_type regions; | |
226 | typename map_type::const_iterator itr = m_zone_map.begin(); | |
227 | while(itr != m_zone_map.end()) { | |
228 | regions.push_back(itr->first); | |
229 | ++itr; | |
230 | } | |
231 | return regions; | |
232 | } | |
233 | ||
234 | private: | |
235 | typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; | |
236 | map_type m_zone_map; | |
237 | ||
238 | // start and end rule are of the same type | |
239 | typedef typename rule_type::start_rule::week_num week_num; | |
240 | ||
241 | /* TODO: mechanisms need to be put in place to handle different | |
242 | * types of rule specs. parse_rules() only handles nth_kday | |
243 | * rule types. */ | |
244 | ||
245 | //! parses rule specs for transition day rules | |
246 | rule_type* parse_rules(const string_type& sr, const string_type& er) const | |
247 | { | |
7c673cae FG |
248 | // start and end rule are of the same type, |
249 | // both are included here for readability | |
250 | typedef typename rule_type::start_rule start_rule; | |
251 | typedef typename rule_type::end_rule end_rule; | |
252 | ||
253 | // these are: [start|end] nth, day, month | |
254 | int s_nth = 0, s_d = 0, s_m = 0; | |
255 | int e_nth = 0, e_d = 0, e_m = 0; | |
256 | split_rule_spec(s_nth, s_d, s_m, sr); | |
257 | split_rule_spec(e_nth, e_d, e_m, er); | |
258 | ||
259 | typename start_rule::week_num s_wn, e_wn; | |
260 | s_wn = get_week_num(s_nth); | |
261 | e_wn = get_week_num(e_nth); | |
262 | ||
263 | ||
264 | return new rule_type(start_rule(s_wn, | |
265 | static_cast<unsigned short>(s_d), | |
266 | static_cast<unsigned short>(s_m)), | |
267 | end_rule(e_wn, | |
268 | static_cast<unsigned short>(e_d), | |
269 | static_cast<unsigned short>(e_m))); | |
270 | } | |
271 | //! helper function for parse_rules() | |
272 | week_num get_week_num(int nth) const | |
273 | { | |
274 | typedef typename rule_type::start_rule start_rule; | |
275 | switch(nth){ | |
276 | case 1: | |
277 | return start_rule::first; | |
278 | case 2: | |
279 | return start_rule::second; | |
280 | case 3: | |
281 | return start_rule::third; | |
282 | case 4: | |
283 | return start_rule::fourth; | |
284 | case 5: | |
285 | case -1: | |
286 | return start_rule::fifth; | |
287 | default: | |
288 | // shouldn't get here - add error handling later | |
289 | break; | |
290 | } | |
291 | return start_rule::fifth; // silence warnings | |
292 | } | |
293 | ||
294 | //! splits the [start|end]_date_rule string into 3 ints | |
295 | void split_rule_spec(int& nth, int& d, int& m, string_type rule) const | |
296 | { | |
297 | typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; | |
298 | typedef boost::tokenizer<char_separator_type, | |
299 | std::basic_string<char_type>::const_iterator, | |
300 | std::basic_string<char_type> > tokenizer; | |
301 | typedef boost::tokenizer<char_separator_type, | |
302 | std::basic_string<char_type>::const_iterator, | |
303 | std::basic_string<char_type> >::iterator tokenizer_iterator; | |
304 | ||
305 | const char_type sep_char[] = { ';', '\0'}; | |
306 | char_separator_type sep(sep_char); | |
307 | tokenizer tokens(rule, sep); // 3 fields | |
308 | ||
309 | if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) { | |
310 | std::ostringstream msg; | |
311 | msg << "Expecting 3 fields, got " | |
312 | << std::distance ( tokens.begin(), tokens.end ()) | |
313 | << " fields in line: " << rule; | |
314 | boost::throw_exception(bad_field_count(msg.str())); | |
315 | } | |
316 | ||
317 | tokenizer_iterator tok_iter = tokens.begin(); | |
318 | nth = std::atoi(tok_iter->c_str()); ++tok_iter; | |
319 | d = std::atoi(tok_iter->c_str()); ++tok_iter; | |
320 | m = std::atoi(tok_iter->c_str()); | |
321 | } | |
322 | ||
323 | ||
324 | //! Take a line from the csv, turn it into a time_zone_type. | |
325 | /*! Take a line from the csv, turn it into a time_zone_type, | |
326 | * and add it to the map. Zone_specs in csv file are expected to | |
327 | * have eleven fields that describe the time zone. Returns true if | |
328 | * zone_spec successfully added to database */ | |
329 | bool parse_string(string_type& s) | |
330 | { | |
331 | std::vector<string_type> result; | |
332 | typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; | |
333 | ||
334 | token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); | |
335 | ||
336 | token_iter_type end; | |
337 | while (i != end) { | |
338 | result.push_back(*i); | |
339 | i++; | |
340 | } | |
341 | ||
342 | enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, | |
343 | DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, | |
344 | END_TIME, FIELD_COUNT }; | |
345 | ||
346 | //take a shot at fixing gcc 4.x error | |
347 | const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); | |
348 | if (result.size() != expected_fields) { | |
349 | std::ostringstream msg; | |
350 | msg << "Expecting " << FIELD_COUNT << " fields, got " | |
351 | << result.size() << " fields in line: " << s; | |
352 | boost::throw_exception(bad_field_count(msg.str())); | |
353 | BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach | |
354 | } | |
355 | ||
356 | // initializations | |
357 | bool has_dst = true; | |
358 | if(result[DSTABBR] == std::string()){ | |
359 | has_dst = false; | |
360 | } | |
361 | ||
362 | ||
363 | // start building components of a time_zone | |
364 | time_zone_names names(result[STDNAME], result[STDABBR], | |
365 | result[DSTNAME], result[DSTABBR]); | |
366 | ||
367 | time_duration_type utc_offset = | |
368 | str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); | |
369 | ||
370 | dst_adjustment_offsets adjust(time_duration_type(0,0,0), | |
371 | time_duration_type(0,0,0), | |
372 | time_duration_type(0,0,0)); | |
373 | ||
374 | boost::shared_ptr<rule_type> rules; | |
375 | ||
376 | if(has_dst){ | |
377 | adjust = dst_adjustment_offsets( | |
378 | str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), | |
379 | str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), | |
380 | str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) | |
381 | ); | |
382 | ||
383 | rules = | |
384 | boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], | |
385 | result[END_DATE_RULE])); | |
386 | } | |
387 | string_type id(result[ID]); | |
388 | boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); | |
389 | return (add_record(id, zone)); | |
390 | ||
391 | } | |
392 | ||
393 | }; | |
394 | ||
395 | } } // namespace | |
396 | ||
397 | #endif // DATE_TIME_TZ_DB_BASE_HPP__ |