]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | #ifndef __S3SELECT_OPER__ |
2 | #define __S3SELECT_OPER__ | |
3 | ||
4 | #include <string> | |
5 | #include <iostream> | |
6 | #include <list> | |
7 | #include <map> | |
8 | #include <vector> | |
20effc67 TL |
9 | #include <algorithm> |
10 | #include <cstring> | |
11 | #include <cmath> | |
1e59de90 | 12 | #include <set> |
f67539c2 TL |
13 | |
14 | #include <boost/lexical_cast.hpp> | |
15 | #include <boost/date_time/posix_time/posix_time.hpp> | |
16 | #include <boost/bind.hpp> | |
20effc67 TL |
17 | #include "s3select_parquet_intrf.h" //NOTE: should include first (c++11 std::string_view) |
18 | ||
1e59de90 TL |
19 | |
20 | #if __has_include (<hs/hs.h>) && REGEX_HS | |
21 | #include <hs/hs.h> | |
22 | #elif __has_include (<re2/re2.h>) && REGEX_RE2 | |
23 | #include <re2/re2.h> | |
24 | #else | |
25 | #include <regex> | |
26 | #undef REGEX_HS | |
27 | #undef REGEX_RE2 | |
28 | #endif | |
29 | ||
f67539c2 TL |
30 | namespace bsc = BOOST_SPIRIT_CLASSIC_NS; |
31 | ||
32 | namespace s3selectEngine | |
33 | { | |
34 | ||
20effc67 TL |
35 | //=== stl allocator definition |
36 | //this allocator is fit for placement new (no calls to heap) | |
37 | ||
38 | class chunkalloc_out_of_mem | |
39 | { | |
40 | }; | |
41 | ||
42 | template <typename T, size_t pool_sz> | |
43 | class ChunkAllocator : public std::allocator<T> | |
44 | { | |
45 | public: | |
46 | typedef size_t size_type; | |
47 | typedef T* pointer; | |
48 | size_t buffer_capacity; | |
49 | char* buffer_ptr; | |
50 | ||
51 | //only ONE pool,not allocated dynamically; main assumption, caller knows in advance its memory limitations. | |
52 | char buffer[pool_sz]; | |
53 | ||
54 | template <typename _Tp1> | |
55 | struct rebind | |
56 | { | |
57 | typedef ChunkAllocator<_Tp1, pool_sz> other; | |
58 | }; | |
59 | ||
60 | //================================== | |
61 | inline T* _Allocate(size_t num_of_element, T*) | |
62 | { | |
63 | // allocate storage for _Count elements of type T | |
64 | ||
65 | pointer res = (pointer)(buffer_ptr + buffer_capacity); | |
66 | ||
67 | buffer_capacity+= sizeof(T) * num_of_element; | |
68 | ||
69 | size_t addr_alignment = (buffer_capacity % sizeof(char*)); | |
70 | buffer_capacity += addr_alignment != 0 ? sizeof(char*) - addr_alignment : 0; | |
71 | ||
72 | if (buffer_capacity> sizeof(buffer)) | |
73 | { | |
74 | throw chunkalloc_out_of_mem(); | |
75 | } | |
76 | ||
77 | return res; | |
78 | } | |
79 | ||
80 | //================================== | |
81 | inline pointer allocate(size_type n, [[maybe_unused]] const void* hint = 0) | |
82 | { | |
83 | return (_Allocate(n, (pointer)0)); | |
84 | } | |
85 | ||
86 | //================================== | |
87 | inline void deallocate(pointer p, size_type n) | |
88 | { | |
89 | } | |
90 | ||
91 | //================================== | |
92 | ChunkAllocator() noexcept : std::allocator<T>() | |
93 | { | |
94 | // alloc from main-buffer | |
95 | buffer_capacity = 0; | |
96 | memset( &buffer[0], 0, sizeof(buffer)); | |
97 | buffer_ptr = &buffer[0]; | |
98 | } | |
99 | ||
100 | //================================== | |
101 | ChunkAllocator(const ChunkAllocator& other) noexcept : std::allocator<T>(other) | |
102 | { | |
103 | // copy const | |
104 | buffer_capacity = 0; | |
105 | buffer_ptr = &buffer[0]; | |
106 | } | |
107 | ||
108 | //================================== | |
109 | ~ChunkAllocator() noexcept | |
110 | { | |
111 | //do nothing | |
112 | } | |
113 | }; | |
114 | ||
115 | class base_statement; | |
116 | //typedef std::vector<base_statement *> bs_stmt_vec_t; //without specific allocator | |
117 | ||
118 | //ChunkAllocator, prevent allocation from heap. | |
119 | typedef std::vector<base_statement*, ChunkAllocator<base_statement*, 256> > bs_stmt_vec_t; | |
120 | ||
1e59de90 | 121 | class base_s3select_exception : public std::exception |
f67539c2 TL |
122 | { |
123 | ||
124 | public: | |
125 | enum class s3select_exp_en_t | |
126 | { | |
127 | NONE, | |
128 | ERROR, | |
129 | FATAL | |
130 | } ; | |
131 | ||
132 | private: | |
133 | s3select_exp_en_t m_severity; | |
134 | ||
135 | public: | |
136 | std::string _msg; | |
20effc67 | 137 | explicit base_s3select_exception(const char* n) : m_severity(s3select_exp_en_t::NONE) |
f67539c2 TL |
138 | { |
139 | _msg.assign(n); | |
140 | } | |
141 | base_s3select_exception(const char* n, s3select_exp_en_t severity) : m_severity(severity) | |
142 | { | |
143 | _msg.assign(n); | |
144 | } | |
145 | base_s3select_exception(std::string n, s3select_exp_en_t severity) : m_severity(severity) | |
146 | { | |
147 | _msg = n; | |
148 | } | |
149 | ||
1e59de90 | 150 | virtual const char* what() const _GLIBCXX_NOTHROW |
f67539c2 TL |
151 | { |
152 | return _msg.c_str(); | |
153 | } | |
154 | ||
155 | s3select_exp_en_t severity() | |
156 | { | |
157 | return m_severity; | |
158 | } | |
159 | ||
20effc67 | 160 | virtual ~base_s3select_exception() = default; |
f67539c2 TL |
161 | }; |
162 | ||
163 | ||
f67539c2 TL |
164 | |
165 | class s3select_allocator //s3select is the "owner" | |
166 | { | |
167 | private: | |
168 | ||
169 | std::vector<char*> list_of_buff; | |
20effc67 | 170 | std::vector<char*> list_of_ptr; |
f67539c2 TL |
171 | u_int32_t m_idx; |
172 | ||
20effc67 | 173 | #define __S3_ALLOCATION_BUFF__ (24*1024) |
f67539c2 TL |
174 | void check_capacity(size_t sz) |
175 | { | |
176 | if (sz>__S3_ALLOCATION_BUFF__) | |
177 | { | |
178 | throw base_s3select_exception("requested size too big", base_s3select_exception::s3select_exp_en_t::FATAL); | |
179 | } | |
180 | ||
181 | if ((m_idx + sz) >= __S3_ALLOCATION_BUFF__) | |
182 | { | |
183 | list_of_buff.push_back((char*)malloc(__S3_ALLOCATION_BUFF__)); | |
184 | m_idx = 0; | |
185 | } | |
186 | } | |
187 | ||
188 | void inc(size_t sz) | |
189 | { | |
190 | m_idx += sz; | |
191 | m_idx += sizeof(char*) - (m_idx % sizeof(char*)); //alignment | |
192 | } | |
193 | ||
20effc67 TL |
194 | public: |
195 | s3select_allocator():m_idx(0) | |
196 | { | |
197 | list_of_buff.push_back((char*)malloc(__S3_ALLOCATION_BUFF__)); | |
198 | } | |
199 | ||
200 | void *alloc(size_t sz) | |
f67539c2 | 201 | { |
20effc67 TL |
202 | check_capacity(sz); |
203 | ||
204 | char* buff = list_of_buff.back(); | |
205 | ||
206 | u_int32_t idx = m_idx; | |
207 | ||
208 | inc(sz); | |
209 | ||
210 | return &buff[ idx ]; | |
211 | } | |
212 | ||
213 | void push_for_delete(void *p) | |
214 | {//in case of using S3SELECT_NO_PLACEMENT_NEW | |
215 | list_of_ptr.push_back((char*)p); | |
f67539c2 TL |
216 | } |
217 | ||
218 | virtual ~s3select_allocator() | |
219 | { | |
220 | for(auto b : list_of_buff) | |
221 | { | |
222 | free(b); | |
223 | } | |
f67539c2 | 224 | |
20effc67 TL |
225 | for(auto b : list_of_ptr) |
226 | {//in case of using S3SELECT_NO_PLACEMENT_NEW | |
227 | delete(b); | |
228 | } | |
f67539c2 TL |
229 | } |
230 | }; | |
231 | ||
232 | // placement new for allocation of all s3select objects on single(or few) buffers, deallocation of those objects is by releasing the buffer. | |
20effc67 | 233 | #define S3SELECT_NEW(self, type , ... ) [=]() \ |
f67539c2 | 234 | { \ |
20effc67 | 235 | auto res=new (self->getAllocator()->alloc(sizeof(type))) type(__VA_ARGS__); \ |
f67539c2 TL |
236 | return res; \ |
237 | }(); | |
238 | ||
20effc67 TL |
239 | // no placement new; actually, its an oridinary new with additional functionality for deleting the AST nodes. |
240 | // (this changes, is for verifying the valgrind report on leak) | |
241 | #define S3SELECT_NO_PLACEMENT_NEW(self, type , ... ) [=]() \ | |
242 | { \ | |
243 | auto res=new type(__VA_ARGS__); \ | |
244 | self->getAllocator()->push_for_delete(res); \ | |
245 | return res; \ | |
246 | }(); | |
f67539c2 | 247 | |
20effc67 TL |
248 | class s3select_reserved_word |
249 | { | |
250 | public: | |
f67539c2 | 251 | |
20effc67 TL |
252 | enum class reserve_word_en_t |
253 | { | |
254 | NA, | |
255 | S3S_NULL,//TODO check AWS defintions for reserve words, its a long list , what about functions-names? | |
256 | S3S_NAN, | |
257 | S3S_TRUE, | |
258 | S3S_FALSE | |
259 | } ; | |
f67539c2 | 260 | |
20effc67 | 261 | using reserved_words = std::map<std::string,reserve_word_en_t>; |
f67539c2 | 262 | |
20effc67 | 263 | const reserved_words m_reserved_words= |
f67539c2 | 264 | { |
20effc67 TL |
265 | {"null",reserve_word_en_t::S3S_NULL},{"NULL",reserve_word_en_t::S3S_NULL}, |
266 | {"nan",reserve_word_en_t::S3S_NAN},{"NaN",reserve_word_en_t::S3S_NAN}, | |
267 | {"true",reserve_word_en_t::S3S_TRUE},{"TRUE",reserve_word_en_t::S3S_TRUE}, | |
268 | {"false",reserve_word_en_t::S3S_FALSE},{"FALSE",reserve_word_en_t::S3S_FALSE} | |
269 | }; | |
f67539c2 | 270 | |
20effc67 | 271 | bool is_reserved_word(std::string & token) |
f67539c2 | 272 | { |
20effc67 | 273 | return m_reserved_words.find(token) != m_reserved_words.end() ; |
f67539c2 TL |
274 | } |
275 | ||
20effc67 | 276 | reserve_word_en_t get_reserved_word(std::string & token) |
f67539c2 | 277 | { |
20effc67 | 278 | if (is_reserved_word(token)==true) |
f67539c2 | 279 | { |
20effc67 | 280 | return m_reserved_words.find(token)->second; |
f67539c2 | 281 | } |
20effc67 | 282 | else |
f67539c2 | 283 | { |
20effc67 | 284 | return reserve_word_en_t::NA; |
f67539c2 | 285 | } |
f67539c2 TL |
286 | } |
287 | ||
f67539c2 TL |
288 | }; |
289 | ||
290 | class base_statement; | |
291 | class projection_alias | |
292 | { | |
293 | //purpose: mapping between alias-name to base_statement* | |
294 | //those routines are *NOT* intensive, works once per query parse time. | |
295 | ||
296 | private: | |
297 | std::vector< std::pair<std::string, base_statement*> > alias_map; | |
298 | ||
299 | public: | |
300 | std::vector< std::pair<std::string, base_statement*> >* get() | |
301 | { | |
302 | return &alias_map; | |
303 | } | |
304 | ||
305 | bool insert_new_entry(std::string alias_name, base_statement* bs) | |
306 | { | |
307 | //purpose: only unique alias names. | |
308 | ||
309 | for(auto alias: alias_map) | |
310 | { | |
311 | if(alias.first.compare(alias_name) == 0) | |
312 | { | |
313 | return false; //alias name already exist | |
314 | } | |
315 | ||
316 | } | |
317 | std::pair<std::string, base_statement*> new_alias(alias_name, bs); | |
318 | alias_map.push_back(new_alias); | |
319 | ||
320 | return true; | |
321 | } | |
322 | ||
323 | base_statement* search_alias(std::string alias_name) | |
324 | { | |
325 | for(auto alias: alias_map) | |
326 | { | |
327 | if(alias.first.compare(alias_name) == 0) | |
328 | { | |
329 | return alias.second; //refernce to execution node | |
330 | } | |
331 | } | |
332 | return 0; | |
333 | } | |
334 | }; | |
335 | ||
336 | struct binop_plus | |
337 | { | |
338 | double operator()(double a, double b) | |
339 | { | |
20effc67 | 340 | return a + b; |
f67539c2 TL |
341 | } |
342 | }; | |
343 | ||
344 | struct binop_minus | |
345 | { | |
346 | double operator()(double a, double b) | |
347 | { | |
20effc67 | 348 | return a - b; |
f67539c2 TL |
349 | } |
350 | }; | |
351 | ||
352 | struct binop_mult | |
353 | { | |
354 | double operator()(double a, double b) | |
355 | { | |
356 | return a * b; | |
357 | } | |
20effc67 | 358 | }; |
f67539c2 TL |
359 | |
360 | struct binop_div | |
361 | { | |
362 | double operator()(double a, double b) | |
363 | { | |
20effc67 | 364 | if (b == 0) { |
1e59de90 | 365 | if( std::isnan(a)) { |
20effc67 TL |
366 | return a; |
367 | } else { | |
368 | throw base_s3select_exception("division by zero is not allowed"); | |
369 | } | |
370 | } else { | |
371 | return a / b; | |
372 | } | |
f67539c2 TL |
373 | } |
374 | }; | |
375 | ||
376 | struct binop_pow | |
377 | { | |
378 | double operator()(double a, double b) | |
379 | { | |
380 | return pow(a, b); | |
381 | } | |
382 | }; | |
383 | ||
20effc67 TL |
384 | struct binop_modulo |
385 | { | |
386 | int64_t operator()(int64_t a, int64_t b) | |
387 | { | |
388 | if (b == 0) | |
389 | { | |
390 | throw base_s3select_exception("Mod zero is not allowed"); | |
391 | } else { | |
392 | return a % b; | |
393 | } | |
394 | } | |
395 | }; | |
396 | ||
397 | typedef std::tuple<boost::posix_time::ptime, boost::posix_time::time_duration, bool> timestamp_t; | |
398 | ||
399 | class value; | |
400 | class multi_values | |
401 | { | |
402 | public: | |
403 | std::vector<value*> values; | |
404 | ||
405 | public: | |
406 | void push_value(value* v); | |
407 | ||
408 | void clear() | |
409 | { | |
410 | values.clear(); | |
411 | } | |
412 | ||
413 | }; | |
414 | ||
f67539c2 TL |
415 | class value |
416 | { | |
417 | ||
418 | public: | |
419 | typedef union | |
420 | { | |
421 | int64_t num; | |
20effc67 | 422 | char* str;//TODO consider string_view(save copy) |
f67539c2 | 423 | double dbl; |
20effc67 | 424 | timestamp_t* timestamp; |
1e59de90 | 425 | bool b; |
f67539c2 TL |
426 | } value_t; |
427 | ||
20effc67 TL |
428 | multi_values multiple_values; |
429 | ||
f67539c2 TL |
430 | private: |
431 | value_t __val; | |
1e59de90 TL |
432 | //JSON query has a unique structure, the variable-name reside on input. there are cases were it should be extracted. |
433 | std::vector<std::string> m_json_key; | |
434 | std::string m_to_string; | |
435 | //std::basic_string<char,std::char_traits<char>,ChunkAllocator<char,256>> m_to_string; | |
436 | std::string m_str_value; | |
437 | //std::basic_string<char,std::char_traits<char>,ChunkAllocator<char,256>> m_str_value; | |
f67539c2 TL |
438 | |
439 | public: | |
440 | enum class value_En_t | |
441 | { | |
442 | DECIMAL, | |
443 | FLOAT, | |
444 | STRING, | |
445 | TIMESTAMP, | |
20effc67 TL |
446 | S3NULL, |
447 | S3NAN, | |
448 | BOOL, | |
449 | MULTIPLE_VALUES, | |
f67539c2 TL |
450 | NA |
451 | } ; | |
452 | value_En_t type; | |
453 | ||
20effc67 | 454 | explicit value(int64_t n) : type(value_En_t::DECIMAL) |
f67539c2 TL |
455 | { |
456 | __val.num = n; | |
457 | } | |
20effc67 | 458 | explicit value(int n) : type(value_En_t::DECIMAL) |
f67539c2 TL |
459 | { |
460 | __val.num = n; | |
461 | } | |
1e59de90 | 462 | explicit value(bool b) : type(value_En_t::BOOL) |
f67539c2 TL |
463 | { |
464 | __val.num = (int64_t)b; | |
465 | } | |
20effc67 | 466 | explicit value(double d) : type(value_En_t::FLOAT) |
f67539c2 TL |
467 | { |
468 | __val.dbl = d; | |
469 | } | |
20effc67 | 470 | explicit value(timestamp_t* timestamp) : type(value_En_t::TIMESTAMP) |
f67539c2 TL |
471 | { |
472 | __val.timestamp = timestamp; | |
473 | } | |
474 | ||
20effc67 | 475 | explicit value(const char* s) : type(value_En_t::STRING) |
f67539c2 TL |
476 | { |
477 | m_str_value.assign(s); | |
478 | __val.str = m_str_value.data(); | |
479 | } | |
480 | ||
1e59de90 TL |
481 | explicit value(std::nullptr_t) : type(value_En_t::S3NULL) |
482 | {} | |
483 | ||
20effc67 TL |
484 | ~value() |
485 | {//TODO should be a part of the cleanup routine(__function::push_for_cleanup) | |
486 | multiple_values.values.clear(); | |
487 | } | |
488 | ||
f67539c2 TL |
489 | value():type(value_En_t::NA) |
490 | { | |
491 | __val.num=0; | |
492 | } | |
493 | ||
494 | bool is_number() const | |
495 | { | |
496 | if ((type == value_En_t::DECIMAL || type == value_En_t::FLOAT)) | |
497 | { | |
498 | return true; | |
499 | } | |
500 | ||
501 | return false; | |
502 | } | |
503 | ||
504 | bool is_string() const | |
505 | { | |
506 | return type == value_En_t::STRING; | |
507 | } | |
508 | bool is_timestamp() const | |
509 | { | |
510 | return type == value_En_t::TIMESTAMP; | |
511 | } | |
512 | ||
20effc67 TL |
513 | bool is_bool() const |
514 | { | |
515 | return type == value_En_t::BOOL; | |
516 | } | |
517 | ||
518 | bool is_null() const | |
519 | { | |
520 | return type == value_En_t::S3NULL; | |
521 | } | |
522 | ||
523 | bool is_nan() const | |
524 | { | |
525 | if (type == value_En_t::FLOAT) { | |
526 | return std::isnan(this->__val.dbl); | |
527 | } | |
528 | return type == value_En_t::S3NAN; | |
529 | } | |
530 | ||
531 | bool is_true() | |
532 | { | |
533 | return (i64()!=0 && !is_null()); | |
534 | } | |
535 | ||
536 | void set_nan() | |
537 | { | |
538 | __val.dbl = NAN; | |
539 | type = value_En_t::FLOAT; | |
540 | } | |
541 | ||
542 | void set_true() | |
543 | { | |
544 | __val.num = 1; | |
545 | type = value_En_t::BOOL; | |
546 | } | |
547 | ||
548 | void set_false() | |
549 | { | |
550 | __val.num = 0; | |
551 | type = value_En_t::BOOL; | |
552 | } | |
553 | ||
554 | void setnull() | |
555 | { | |
556 | type = value_En_t::S3NULL; | |
557 | } | |
f67539c2 | 558 | |
1e59de90 TL |
559 | void set_string_nocopy(char* str) |
560 | {//purpose: value does not own the string | |
561 | __val.str = str; | |
562 | type = value_En_t::STRING; | |
563 | } | |
564 | ||
565 | value_En_t _type() const { return type; } | |
566 | ||
567 | void set_json_key_path(std::vector<std::string>& key_path) | |
568 | { | |
569 | m_json_key = key_path; | |
570 | } | |
571 | ||
20effc67 | 572 | const char* to_string() //TODO very intensive , must improve this |
f67539c2 TL |
573 | { |
574 | ||
575 | if (type != value_En_t::STRING) | |
576 | { | |
577 | if (type == value_En_t::DECIMAL) | |
578 | { | |
579 | m_to_string.assign( boost::lexical_cast<std::string>(__val.num) ); | |
580 | } | |
20effc67 TL |
581 | if (type == value_En_t::BOOL) |
582 | { | |
583 | if(__val.num == 0) | |
584 | { | |
585 | m_to_string.assign("false"); | |
586 | } | |
587 | else | |
588 | { | |
589 | m_to_string.assign("true"); | |
590 | } | |
591 | } | |
f67539c2 TL |
592 | else if(type == value_En_t::FLOAT) |
593 | { | |
594 | m_to_string = boost::lexical_cast<std::string>(__val.dbl); | |
595 | } | |
20effc67 TL |
596 | else if (type == value_En_t::TIMESTAMP) |
597 | { | |
598 | boost::posix_time::ptime new_ptime; | |
599 | boost::posix_time::time_duration td; | |
600 | bool flag; | |
601 | ||
602 | std::tie(new_ptime, td, flag) = *__val.timestamp; | |
603 | ||
604 | if (flag) | |
605 | { | |
606 | m_to_string = to_iso_extended_string(new_ptime) + "Z"; | |
607 | } | |
608 | else | |
609 | { | |
610 | std::string tz_hour = std::to_string(std::abs(td.hours())); | |
611 | std::string tz_mint = std::to_string(std::abs(td.minutes())); | |
612 | std::string sign; | |
613 | if (td.is_negative()) | |
614 | sign = "-"; | |
615 | else | |
616 | sign = "+"; | |
617 | ||
618 | m_to_string = to_iso_extended_string(new_ptime) + sign + | |
619 | std::string(2 - tz_hour.length(), '0') + tz_hour + ":" | |
620 | + std::string(2 - tz_mint.length(), '0') + tz_mint; | |
621 | } | |
622 | } | |
623 | else if (type == value_En_t::S3NULL) | |
f67539c2 | 624 | { |
20effc67 | 625 | m_to_string.assign("null"); |
f67539c2 TL |
626 | } |
627 | } | |
628 | else | |
629 | { | |
630 | m_to_string.assign( __val.str ); | |
631 | } | |
632 | ||
1e59de90 TL |
633 | if(m_json_key.size()) |
634 | { | |
635 | std::string key_path; | |
636 | for(auto& p : m_json_key) | |
637 | {//TODO upon star-operation key-path assignment is very intensive | |
638 | key_path.append(p); | |
639 | key_path.append("."); | |
640 | } | |
641 | ||
642 | key_path.append(" : "); | |
643 | key_path.append(m_to_string); | |
644 | m_to_string = key_path; | |
645 | } | |
646 | ||
20effc67 | 647 | return m_to_string.c_str(); |
f67539c2 TL |
648 | } |
649 | ||
1e59de90 TL |
650 | value(const value& o) |
651 | { | |
652 | if(o.type == value_En_t::STRING) | |
653 | { | |
654 | if(o.m_str_value.size()) | |
655 | { | |
656 | m_str_value = o.m_str_value; | |
657 | __val.str = m_str_value.data(); | |
658 | } | |
659 | else if(o.__val.str) | |
660 | { | |
661 | __val.str = o.__val.str; | |
662 | } | |
663 | } | |
664 | else | |
665 | { | |
666 | this->__val = o.__val; | |
667 | } | |
668 | ||
669 | this->m_json_key = o.m_json_key; | |
670 | ||
671 | this->type = o.type; | |
672 | } | |
f67539c2 TL |
673 | |
674 | value& operator=(value& o) | |
675 | { | |
20effc67 | 676 | if(o.type == value_En_t::STRING) |
f67539c2 | 677 | { |
1e59de90 TL |
678 | if(o.m_str_value.size()) |
679 | { | |
680 | m_str_value = o.m_str_value; | |
681 | __val.str = m_str_value.data(); | |
682 | } | |
683 | else if(o.__val.str) | |
684 | { | |
685 | __val.str = o.__val.str; | |
686 | } | |
f67539c2 TL |
687 | } |
688 | else | |
689 | { | |
690 | this->__val = o.__val; | |
691 | } | |
692 | ||
693 | this->type = o.type; | |
694 | ||
1e59de90 TL |
695 | this->m_json_key = o.m_json_key; |
696 | ||
f67539c2 TL |
697 | return *this; |
698 | } | |
699 | ||
700 | value& operator=(const char* s) | |
701 | { | |
702 | m_str_value.assign(s); | |
703 | this->__val.str = m_str_value.data(); | |
704 | this->type = value_En_t::STRING; | |
705 | ||
706 | return *this; | |
707 | } | |
708 | ||
709 | value& operator=(int64_t i) | |
710 | { | |
711 | this->__val.num = i; | |
712 | this->type = value_En_t::DECIMAL; | |
713 | ||
714 | return *this; | |
715 | } | |
716 | ||
1e59de90 TL |
717 | value& operator=(int i) |
718 | { | |
719 | this->__val.num = i; | |
720 | this->type = value_En_t::DECIMAL; | |
721 | ||
722 | return *this; | |
723 | } | |
724 | ||
725 | value& operator=(unsigned i) | |
726 | { | |
727 | this->__val.num = i; | |
728 | this->type = value_En_t::DECIMAL; | |
729 | ||
730 | return *this; | |
731 | } | |
732 | ||
733 | value& operator=(uint64_t i) | |
734 | { | |
735 | this->__val.num = i; | |
736 | this->type = value_En_t::DECIMAL; | |
737 | ||
738 | return *this; | |
739 | } | |
740 | ||
f67539c2 TL |
741 | value& operator=(double d) |
742 | { | |
743 | this->__val.dbl = d; | |
744 | this->type = value_En_t::FLOAT; | |
745 | ||
746 | return *this; | |
747 | } | |
748 | ||
749 | value& operator=(bool b) | |
750 | { | |
751 | this->__val.num = (int64_t)b; | |
20effc67 | 752 | this->type = value_En_t::BOOL; |
f67539c2 TL |
753 | |
754 | return *this; | |
755 | } | |
756 | ||
20effc67 | 757 | value& operator=(timestamp_t* p) |
f67539c2 TL |
758 | { |
759 | this->__val.timestamp = p; | |
760 | this->type = value_En_t::TIMESTAMP; | |
761 | ||
762 | return *this; | |
763 | } | |
764 | ||
765 | int64_t i64() | |
766 | { | |
767 | return __val.num; | |
768 | } | |
769 | ||
770 | const char* str() | |
771 | { | |
772 | return __val.str; | |
773 | } | |
774 | ||
775 | double dbl() | |
776 | { | |
777 | return __val.dbl; | |
778 | } | |
779 | ||
1e59de90 TL |
780 | bool bl() |
781 | { | |
782 | return __val.b; | |
783 | } | |
784 | ||
20effc67 | 785 | timestamp_t* timestamp() const |
f67539c2 TL |
786 | { |
787 | return __val.timestamp; | |
788 | } | |
789 | ||
790 | bool operator<(const value& v)//basic compare operator , most itensive runtime operation | |
20effc67 | 791 | { |
f67539c2 TL |
792 | //TODO NA possible? |
793 | if (is_string() && v.is_string()) | |
794 | { | |
795 | return strcmp(__val.str, v.__val.str) < 0; | |
796 | } | |
797 | ||
798 | if (is_number() && v.is_number()) | |
799 | { | |
800 | ||
801 | if(type != v.type) //conversion //TODO find better way | |
802 | { | |
803 | if (type == value_En_t::DECIMAL) | |
804 | { | |
805 | return (double)__val.num < v.__val.dbl; | |
806 | } | |
807 | else | |
808 | { | |
809 | return __val.dbl < (double)v.__val.num; | |
810 | } | |
811 | } | |
812 | else //no conversion | |
813 | { | |
814 | if(type == value_En_t::DECIMAL) | |
815 | { | |
816 | return __val.num < v.__val.num; | |
817 | } | |
818 | else | |
819 | { | |
820 | return __val.dbl < v.__val.dbl; | |
821 | } | |
822 | ||
823 | } | |
824 | } | |
825 | ||
826 | if(is_timestamp() && v.is_timestamp()) | |
827 | { | |
828 | return *timestamp() < *(v.timestamp()); | |
829 | } | |
830 | ||
20effc67 TL |
831 | if(is_nan() || v.is_nan()) |
832 | { | |
833 | return false; | |
834 | } | |
835 | ||
f67539c2 TL |
836 | throw base_s3select_exception("operands not of the same type(numeric , string), while comparision"); |
837 | } | |
838 | ||
839 | bool operator>(const value& v) //basic compare operator , most itensive runtime operation | |
840 | { | |
841 | //TODO NA possible? | |
842 | if (is_string() && v.is_string()) | |
843 | { | |
844 | return strcmp(__val.str, v.__val.str) > 0; | |
845 | } | |
846 | ||
847 | if (is_number() && v.is_number()) | |
848 | { | |
849 | ||
850 | if(type != v.type) //conversion //TODO find better way | |
851 | { | |
852 | if (type == value_En_t::DECIMAL) | |
853 | { | |
854 | return (double)__val.num > v.__val.dbl; | |
855 | } | |
856 | else | |
857 | { | |
858 | return __val.dbl > (double)v.__val.num; | |
859 | } | |
860 | } | |
861 | else //no conversion | |
862 | { | |
863 | if(type == value_En_t::DECIMAL) | |
864 | { | |
865 | return __val.num > v.__val.num; | |
866 | } | |
867 | else | |
868 | { | |
869 | return __val.dbl > v.__val.dbl; | |
870 | } | |
871 | ||
872 | } | |
873 | } | |
874 | ||
875 | if(is_timestamp() && v.is_timestamp()) | |
876 | { | |
877 | return *timestamp() > *(v.timestamp()); | |
878 | } | |
879 | ||
20effc67 TL |
880 | if(is_nan() || v.is_nan()) |
881 | { | |
882 | return false; | |
883 | } | |
884 | ||
f67539c2 TL |
885 | throw base_s3select_exception("operands not of the same type(numeric , string), while comparision"); |
886 | } | |
887 | ||
1e59de90 | 888 | friend bool operator==(const value& lhs, const value& rhs) //basic compare operator , most itensive runtime operation |
f67539c2 TL |
889 | { |
890 | //TODO NA possible? | |
1e59de90 | 891 | if (lhs.is_string() && rhs.is_string()) |
f67539c2 | 892 | { |
1e59de90 | 893 | return strcmp(lhs.__val.str, rhs.__val.str) == 0; |
f67539c2 TL |
894 | } |
895 | ||
896 | ||
1e59de90 | 897 | if (lhs.is_number() && rhs.is_number()) |
f67539c2 TL |
898 | { |
899 | ||
1e59de90 | 900 | if(lhs.type != rhs.type) //conversion //TODO find better way |
f67539c2 | 901 | { |
1e59de90 | 902 | if (lhs.type == value_En_t::DECIMAL) |
f67539c2 | 903 | { |
1e59de90 | 904 | return (double)lhs.__val.num == rhs.__val.dbl; |
f67539c2 TL |
905 | } |
906 | else | |
907 | { | |
1e59de90 | 908 | return lhs.__val.dbl == (double)rhs.__val.num; |
f67539c2 TL |
909 | } |
910 | } | |
911 | else //no conversion | |
912 | { | |
1e59de90 | 913 | if(lhs.type == value_En_t::DECIMAL) |
f67539c2 | 914 | { |
1e59de90 | 915 | return lhs.__val.num == rhs.__val.num; |
f67539c2 TL |
916 | } |
917 | else | |
918 | { | |
1e59de90 | 919 | return lhs.__val.dbl == rhs.__val.dbl; |
f67539c2 TL |
920 | } |
921 | ||
922 | } | |
923 | } | |
924 | ||
1e59de90 | 925 | if(lhs.is_timestamp() && rhs.is_timestamp()) |
f67539c2 | 926 | { |
1e59de90 | 927 | return *(lhs.timestamp()) == *(rhs.timestamp()); |
f67539c2 TL |
928 | } |
929 | ||
20effc67 | 930 | if( |
1e59de90 | 931 | (lhs.is_bool() && rhs.is_bool()) |
20effc67 | 932 | || |
1e59de90 | 933 | (lhs.is_number() && rhs.is_bool()) |
20effc67 | 934 | || |
1e59de90 | 935 | (lhs.is_bool() && rhs.is_number()) |
20effc67 TL |
936 | ) |
937 | { | |
1e59de90 | 938 | return lhs.__val.num == rhs.__val.num; |
20effc67 TL |
939 | } |
940 | ||
1e59de90 | 941 | if (lhs.is_nan() || rhs.is_nan()) |
20effc67 TL |
942 | { |
943 | return false; | |
944 | } | |
945 | ||
f67539c2 TL |
946 | throw base_s3select_exception("operands not of the same type(numeric , string), while comparision"); |
947 | } | |
948 | bool operator<=(const value& v) | |
20effc67 TL |
949 | { |
950 | if (is_nan() || v.is_nan()) { | |
951 | return false; | |
952 | } else { | |
953 | return !(*this>v); | |
954 | } | |
955 | } | |
956 | ||
f67539c2 | 957 | bool operator>=(const value& v) |
20effc67 TL |
958 | { |
959 | if (is_nan() || v.is_nan()) { | |
960 | return false; | |
961 | } else { | |
962 | return !(*this<v); | |
963 | } | |
964 | } | |
965 | ||
f67539c2 | 966 | bool operator!=(const value& v) |
20effc67 TL |
967 | { |
968 | if (is_nan() || v.is_nan()) { | |
969 | return true; | |
970 | } else { | |
971 | return !(*this == v); | |
972 | } | |
f67539c2 | 973 | } |
20effc67 | 974 | |
f67539c2 TL |
975 | template<typename binop> //conversion rules for arithmetical binary operations |
976 | value& compute(value& l, const value& r) //left should be this, it contain the result | |
977 | { | |
978 | binop __op; | |
979 | ||
980 | if (l.is_string() || r.is_string()) | |
981 | { | |
982 | throw base_s3select_exception("illegal binary operation with string"); | |
983 | } | |
20effc67 TL |
984 | if (l.is_bool() || r.is_bool()) |
985 | { | |
986 | throw base_s3select_exception("illegal binary operation with bool type"); | |
987 | } | |
f67539c2 | 988 | |
20effc67 TL |
989 | if (l.is_number() && r.is_number()) |
990 | { | |
991 | if (l.type != r.type) | |
f67539c2 TL |
992 | { |
993 | //conversion | |
994 | ||
995 | if (l.type == value_En_t::DECIMAL) | |
996 | { | |
997 | l.__val.dbl = __op((double)l.__val.num, r.__val.dbl); | |
998 | l.type = value_En_t::FLOAT; | |
999 | } | |
1000 | else | |
1001 | { | |
1002 | l.__val.dbl = __op(l.__val.dbl, (double)r.__val.num); | |
1003 | l.type = value_En_t::FLOAT; | |
1004 | } | |
1005 | } | |
1006 | else | |
1007 | { | |
1008 | //no conversion | |
1009 | ||
1010 | if (l.type == value_En_t::DECIMAL) | |
1011 | { | |
1012 | l.__val.num = __op(l.__val.num, r.__val.num ); | |
1013 | l.type = value_En_t::DECIMAL; | |
1014 | } | |
1015 | else | |
1016 | { | |
1017 | l.__val.dbl = __op(l.__val.dbl, r.__val.dbl ); | |
1018 | l.type = value_En_t::FLOAT; | |
1019 | } | |
1020 | } | |
20effc67 TL |
1021 | } |
1022 | ||
1023 | if (l.is_null() || r.is_null()) | |
1024 | { | |
1025 | l.setnull(); | |
1026 | } else if(l.is_nan() || r.is_nan()) { | |
1027 | l.set_nan(); | |
1028 | } | |
f67539c2 TL |
1029 | |
1030 | return l; | |
1031 | } | |
1032 | ||
1033 | value& operator+(const value& v) | |
1034 | { | |
1035 | return compute<binop_plus>(*this, v); | |
1036 | } | |
1037 | ||
20effc67 TL |
1038 | value operator++(int) |
1039 | { | |
1040 | *this = *this + static_cast<value>(1); | |
1041 | return *this; | |
1042 | } | |
1043 | ||
f67539c2 TL |
1044 | value& operator-(const value& v) |
1045 | { | |
1046 | return compute<binop_minus>(*this, v); | |
1047 | } | |
1048 | ||
1049 | value& operator*(const value& v) | |
1050 | { | |
1051 | return compute<binop_mult>(*this, v); | |
1052 | } | |
20effc67 TL |
1053 | |
1054 | value& operator/(value& v) | |
f67539c2 | 1055 | { |
20effc67 TL |
1056 | if (v.is_null() || this->is_null()) { |
1057 | v.setnull(); | |
1058 | return v; | |
1059 | } else { | |
1060 | return compute<binop_div>(*this, v); | |
1061 | } | |
f67539c2 | 1062 | } |
20effc67 | 1063 | |
f67539c2 TL |
1064 | value& operator^(const value& v) |
1065 | { | |
1066 | return compute<binop_pow>(*this, v); | |
1067 | } | |
1068 | ||
20effc67 TL |
1069 | value & operator%(const value &v) |
1070 | { | |
1071 | if(v.type == value_En_t::DECIMAL) { | |
1072 | return compute<binop_modulo>(*this,v); | |
1073 | } else { | |
1074 | throw base_s3select_exception("wrong use of modulo operation!"); | |
1075 | } | |
1076 | } | |
f67539c2 TL |
1077 | }; |
1078 | ||
20effc67 | 1079 | void multi_values::push_value(value *v) |
f67539c2 | 1080 | { |
20effc67 TL |
1081 | //v could be single or multiple values |
1082 | if (v->type == value::value_En_t::MULTIPLE_VALUES) | |
1083 | { | |
1084 | for (auto sv : v->multiple_values.values) | |
1085 | { | |
1086 | values.push_back(sv); | |
1087 | } | |
1088 | } | |
1089 | else | |
1090 | { | |
1091 | values.push_back(v); | |
1092 | } | |
1093 | } | |
f67539c2 | 1094 | |
f67539c2 | 1095 | |
20effc67 TL |
1096 | class scratch_area |
1097 | { | |
1098 | ||
1099 | private: | |
20effc67 TL |
1100 | std::vector<value> *m_schema_values; //values got a type |
1101 | int m_upper_bound; | |
f67539c2 | 1102 | |
20effc67 TL |
1103 | std::vector<std::pair<std::string, int >> m_column_name_pos; |
1104 | bool parquet_type; | |
1105 | char str_buff[4096]; | |
1106 | uint16_t buff_loc; | |
1e59de90 TL |
1107 | int max_json_idx; |
1108 | timestamp_t tmstmp; | |
1109 | ||
f67539c2 | 1110 | public: |
20effc67 | 1111 | |
1e59de90 TL |
1112 | typedef std::pair<std::vector<std::string>,value> json_key_value_t; |
1113 | typedef std::vector< json_key_value_t > json_star_op_cont_t; | |
1114 | json_star_op_cont_t m_json_star_operation; | |
1115 | ||
1116 | scratch_area():m_upper_bound(-1),parquet_type(false),buff_loc(0),max_json_idx(-1) | |
1117 | {//TODO it should resize dynamicly | |
1118 | m_schema_values = new std::vector<value>(128,value(nullptr)); | |
f67539c2 | 1119 | } |
f67539c2 | 1120 | |
20effc67 | 1121 | ~scratch_area() |
f67539c2 | 1122 | { |
20effc67 | 1123 | delete m_schema_values; |
f67539c2 TL |
1124 | } |
1125 | ||
1e59de90 TL |
1126 | json_star_op_cont_t* get_star_operation_cont() |
1127 | { | |
1128 | return &m_json_star_operation; | |
1129 | } | |
1130 | ||
1131 | void clear_data() | |
1132 | { | |
1133 | m_json_star_operation.clear(); | |
1134 | for(int i=0;i<=max_json_idx;i++) | |
1135 | { | |
1136 | (*m_schema_values)[i].setnull(); | |
1137 | } | |
1138 | } | |
1139 | ||
20effc67 | 1140 | void set_column_pos(const char* n, int pos)//TODO use std::string |
f67539c2 | 1141 | { |
20effc67 | 1142 | m_column_name_pos.push_back( std::pair<const char*, int>(n, pos)); |
f67539c2 | 1143 | } |
20effc67 TL |
1144 | |
1145 | void update(std::vector<char*>& tokens, size_t num_of_tokens) | |
f67539c2 | 1146 | { |
20effc67 TL |
1147 | size_t i=0; |
1148 | for(auto s : tokens) | |
1149 | { | |
1150 | if (i>=num_of_tokens) | |
1151 | { | |
1152 | break; | |
1153 | } | |
1e59de90 TL |
1154 | //not copy the string content. |
1155 | (*m_schema_values)[i++].set_string_nocopy(s); | |
20effc67 TL |
1156 | } |
1157 | m_upper_bound = i; | |
1158 | ||
1159 | } | |
1160 | ||
1161 | int get_column_pos(const char* n) | |
1162 | { | |
1163 | //done only upon building the AST, not on "runtime" | |
1164 | ||
1165 | for( auto iter : m_column_name_pos) | |
1166 | { | |
1167 | if (!strcmp(iter.first.c_str(), n)) | |
1168 | { | |
1169 | return iter.second; | |
1170 | } | |
1171 | } | |
1172 | ||
1173 | return -1; | |
1174 | } | |
1175 | ||
1176 | void set_parquet_type() | |
1177 | { | |
1178 | parquet_type = true; | |
1179 | } | |
1180 | ||
1181 | void get_column_value(uint16_t column_pos, value &v) | |
1e59de90 TL |
1182 | {// TODO handle out of boundaries |
1183 | v = (*m_schema_values)[ column_pos ]; | |
20effc67 TL |
1184 | } |
1185 | ||
1e59de90 | 1186 | value* get_column_value(uint16_t column_pos) |
20effc67 | 1187 | { |
1e59de90 | 1188 | return &(*m_schema_values)[ column_pos ]; |
20effc67 | 1189 | } |
1e59de90 | 1190 | |
20effc67 TL |
1191 | int get_num_of_columns() |
1192 | { | |
1193 | return m_upper_bound; | |
1194 | } | |
1195 | ||
1e59de90 | 1196 | int update_json_varible(value v,int json_idx) |
20effc67 | 1197 | { |
1e59de90 TL |
1198 | if(json_idx>max_json_idx) |
1199 | { | |
1200 | max_json_idx = json_idx; | |
1201 | } | |
1202 | (*m_schema_values)[ json_idx ] = v; | |
1203 | ||
1204 | if(json_idx>m_upper_bound) | |
1205 | { | |
1206 | m_upper_bound = json_idx; | |
1207 | } | |
1208 | return 0; | |
20effc67 TL |
1209 | } |
1210 | ||
1211 | #ifdef _ARROW_EXIST | |
1e59de90 TL |
1212 | |
1213 | #define S3SELECT_MICROSEC (1000*1000) | |
1214 | #define S3SELECT_MILLISEX (1000) | |
1215 | ||
20effc67 TL |
1216 | int update(std::vector<parquet_file_parser::parquet_value_t> &parquet_row_value, parquet_file_parser::column_pos_t &column_positions) |
1217 | { | |
1218 | //TODO no need for copy , possible to save referece (its save last row for calculation) | |
1219 | ||
1220 | parquet_file_parser::column_pos_t::iterator column_pos_iter = column_positions.begin(); | |
1221 | m_upper_bound =0; | |
1222 | buff_loc=0; | |
1223 | ||
1224 | for(auto v : parquet_row_value) | |
1225 | { | |
1226 | //TODO (parquet_value_t) --> (value) , or better get it as value (i.e. parquet reader know class-value) | |
1227 | //TODO temporary | |
1228 | switch( v.type ) | |
1229 | { | |
1230 | case parquet_file_parser::parquet_type::INT32: | |
1e59de90 | 1231 | (*m_schema_values)[ *column_pos_iter ] = v.num; |
20effc67 TL |
1232 | break; |
1233 | ||
1234 | case parquet_file_parser::parquet_type::INT64: | |
1e59de90 | 1235 | (*m_schema_values)[ *column_pos_iter ] = v.num; |
20effc67 TL |
1236 | break; |
1237 | ||
1238 | case parquet_file_parser::parquet_type::DOUBLE: | |
1e59de90 | 1239 | (*m_schema_values)[ *column_pos_iter ] = v.dbl; |
20effc67 TL |
1240 | break; |
1241 | ||
1242 | case parquet_file_parser::parquet_type::STRING: | |
1243 | //TODO waste of CPU | |
1244 | //TODO value need to present string with char* and length | |
1245 | ||
1246 | memcpy(str_buff+buff_loc, v.str, v.str_len); | |
1247 | str_buff[buff_loc+v.str_len] = 0; | |
1248 | (*m_schema_values)[ *column_pos_iter ] = str_buff+buff_loc; | |
1249 | buff_loc += v.str_len+1; | |
1250 | break; | |
1251 | ||
1252 | case parquet_file_parser::parquet_type::PARQUET_NULL: | |
1253 | ||
1254 | (*m_schema_values)[ *column_pos_iter ].setnull(); | |
1255 | break; | |
1256 | ||
1e59de90 TL |
1257 | case parquet_file_parser::parquet_type::TIMESTAMP: //TODO milli-sec, micro-sec, nano-sec |
1258 | { | |
1259 | auto tm_sec = v.num/S3SELECT_MICROSEC; //TODO should use the correct unit | |
1260 | boost::posix_time::ptime new_ptime = boost::posix_time::from_time_t( tm_sec ); | |
1261 | boost::posix_time::time_duration td_zero((tm_sec/3600)%24,(tm_sec/60)%24,tm_sec%60); | |
1262 | tmstmp = std::make_tuple(new_ptime, td_zero, (char)'Z'); | |
1263 | (*m_schema_values)[ *column_pos_iter ] = &tmstmp; | |
1264 | } | |
1265 | break; | |
1266 | ||
20effc67 | 1267 | default: |
1e59de90 TL |
1268 | throw base_s3select_exception("wrong parquet type for conversion."); |
1269 | ||
1270 | //return -1;//TODO exception | |
20effc67 | 1271 | } |
1e59de90 | 1272 | m_upper_bound = *column_pos_iter+1; |
20effc67 TL |
1273 | column_pos_iter ++; |
1274 | } | |
1275 | return 0; | |
f67539c2 | 1276 | } |
20effc67 TL |
1277 | #endif // _ARROW_EXIST |
1278 | ||
1279 | }; | |
f67539c2 | 1280 | |
20effc67 TL |
1281 | class base_statement |
1282 | { | |
1283 | ||
1284 | protected: | |
1285 | ||
1286 | scratch_area* m_scratch; | |
1287 | projection_alias* m_aliases; | |
1288 | bool is_last_call; //valid only for aggregation functions | |
1289 | bool m_is_cache_result; | |
1290 | value m_alias_result; | |
1291 | base_statement* m_projection_alias; | |
1292 | int m_eval_stack_depth; | |
1293 | bool m_skip_non_aggregate_op; | |
1294 | value value_na; | |
1e59de90 TL |
1295 | //JSON queries has different syntax from other data-sources(Parquet,CSV) |
1296 | bool m_json_statement; | |
20effc67 TL |
1297 | |
1298 | public: | |
1299 | base_statement():m_scratch(nullptr), is_last_call(false), m_is_cache_result(false), | |
1e59de90 | 1300 | m_projection_alias(nullptr), m_eval_stack_depth(0), m_skip_non_aggregate_op(false),m_json_statement(false) {} |
20effc67 TL |
1301 | |
1302 | virtual value& eval() | |
1303 | { | |
1304 | //purpose: on aggregation flow to run only the correct subtree(aggregation subtree) | |
1305 | ||
1306 | if (m_skip_non_aggregate_op == false) | |
1307 | return eval_internal();//not skipping this node. | |
1308 | else | |
1309 | { | |
1310 | //skipping this node. | |
1311 | //in case execution should skip a node, it will traverse (left and right) | |
1312 | //and search for subtree to execute. | |
1313 | //example: sum( ... ) - sum( ... ) ; the minus operand is skipped while sum() operand is not. | |
1314 | if(left()) | |
1315 | left()->eval_internal(); | |
1316 | ||
1317 | if(right()) | |
1318 | right()->eval_internal(); | |
1319 | ||
1320 | } | |
1321 | ||
1322 | return value_na; | |
1323 | } | |
1324 | ||
1325 | virtual value& eval_internal() = 0; | |
1326 | ||
20effc67 TL |
1327 | public: |
1328 | virtual base_statement* left() const | |
1329 | { | |
1330 | return 0; | |
1331 | } | |
1332 | virtual base_statement* right() const | |
1333 | { | |
1334 | return 0; | |
1335 | } | |
1336 | virtual std::string print(int ident) =0;//TODO complete it, one option to use level parametr in interface , | |
1337 | virtual bool semantic() =0;//done once , post syntax , traverse all nodes and validate semantics. | |
1338 | ||
1e59de90 | 1339 | virtual void traverse_and_apply(scratch_area* sa, projection_alias* pa,bool json_statement) |
20effc67 TL |
1340 | { |
1341 | m_scratch = sa; | |
1342 | m_aliases = pa; | |
1e59de90 TL |
1343 | m_json_statement = json_statement; |
1344 | ||
20effc67 TL |
1345 | if (left()) |
1346 | { | |
1e59de90 | 1347 | left()->traverse_and_apply(m_scratch, m_aliases, json_statement); |
20effc67 TL |
1348 | } |
1349 | if (right()) | |
1350 | { | |
1e59de90 | 1351 | right()->traverse_and_apply(m_scratch, m_aliases, json_statement); |
20effc67 TL |
1352 | } |
1353 | } | |
1354 | ||
1355 | virtual void set_skip_non_aggregate(bool skip_non_aggregate_op) | |
1356 | { | |
1357 | m_skip_non_aggregate_op = skip_non_aggregate_op; | |
1358 | ||
1359 | if (left()) | |
1360 | { | |
1361 | left()->set_skip_non_aggregate(m_skip_non_aggregate_op); | |
1362 | } | |
1363 | if (right()) | |
1364 | { | |
1365 | right()->set_skip_non_aggregate(m_skip_non_aggregate_op); | |
1366 | } | |
1367 | } | |
1368 | ||
1369 | virtual bool is_aggregate() const | |
1370 | { | |
1371 | return false; | |
1372 | } | |
1373 | ||
1374 | virtual bool is_column() const | |
1375 | { | |
1376 | return false; | |
1377 | } | |
1378 | ||
1e59de90 TL |
1379 | virtual bool is_star_operation() const |
1380 | { | |
1381 | return false; | |
1382 | } | |
1383 | ||
20effc67 TL |
1384 | virtual void resolve_node() |
1385 | {//part of semantic analysis(TODO maybe semantic method should handle this) | |
1386 | if (left()) | |
1387 | { | |
1388 | left()->resolve_node(); | |
1389 | } | |
1390 | if (right()) | |
1391 | { | |
1392 | right()->resolve_node(); | |
1393 | } | |
1394 | } | |
1395 | ||
1e59de90 TL |
1396 | bool is_json_statement() |
1397 | { | |
1398 | return m_json_statement; | |
1399 | } | |
1400 | ||
20effc67 TL |
1401 | bool is_function() const; |
1402 | const base_statement* get_aggregate() const; | |
1403 | bool is_nested_aggregate(bool&) const; | |
1404 | bool is_column_reference() const; | |
1405 | bool mark_aggreagtion_subtree_to_execute(); | |
1e59de90 TL |
1406 | bool is_statement_contain_star_operation() const; |
1407 | void push_for_cleanup(std::set<base_statement*>&); | |
20effc67 TL |
1408 | |
1409 | #ifdef _ARROW_EXIST | |
1410 | void extract_columns(parquet_file_parser::column_pos_t &cols,const uint16_t max_columns); | |
1411 | #endif | |
f67539c2 TL |
1412 | |
1413 | virtual void set_last_call() | |
1414 | { | |
1415 | is_last_call = true; | |
1416 | if(left()) | |
1417 | { | |
1418 | left()->set_last_call(); | |
1419 | } | |
1420 | if(right()) | |
1421 | { | |
1422 | right()->set_last_call(); | |
1423 | } | |
1424 | } | |
1425 | ||
1426 | bool is_set_last_call() | |
1427 | { | |
1428 | return is_last_call; | |
1429 | } | |
1430 | ||
1431 | void invalidate_cache_result() | |
1432 | { | |
1433 | m_is_cache_result = false; | |
1434 | } | |
1435 | ||
1436 | bool is_result_cached() | |
1437 | { | |
1438 | return m_is_cache_result == true; | |
1439 | } | |
1440 | ||
1441 | void set_result_cache(value& eval_result) | |
1442 | { | |
1443 | m_alias_result = eval_result; | |
1444 | m_is_cache_result = true; | |
1445 | } | |
1446 | ||
1447 | void dec_call_stack_depth() | |
1448 | { | |
1449 | m_eval_stack_depth --; | |
1450 | } | |
1451 | ||
1452 | value& get_result_cache() | |
1453 | { | |
1454 | return m_alias_result; | |
1455 | } | |
1456 | ||
1457 | int& get_eval_call_depth() | |
1458 | { | |
1459 | m_eval_stack_depth++; | |
1460 | return m_eval_stack_depth; | |
1461 | } | |
1462 | ||
1463 | virtual ~base_statement() {} | |
1464 | ||
20effc67 TL |
1465 | void dtor() |
1466 | { | |
1467 | this->~base_statement(); | |
1468 | } | |
1469 | ||
1470 | scratch_area* getScratchArea() | |
1471 | { | |
1472 | return m_scratch; | |
1473 | } | |
1474 | ||
1475 | projection_alias* getAlias() | |
1476 | { | |
1477 | return m_aliases; | |
1478 | } | |
1479 | ||
f67539c2 TL |
1480 | }; |
1481 | ||
1482 | class variable : public base_statement | |
1483 | { | |
1484 | ||
1485 | public: | |
1486 | ||
1487 | enum class var_t | |
1488 | { | |
1489 | NA, | |
1e59de90 TL |
1490 | VARIABLE_NAME,//schema column (i.e. age , price , ...) |
1491 | COLUMN_VALUE, //concrete value (string,number,boolean) | |
1492 | JSON_VARIABLE,//a key-path reference | |
f67539c2 TL |
1493 | POS, // CSV column number (i.e. _1 , _2 ... ) |
1494 | STAR_OPERATION, //'*' | |
1495 | } ; | |
1496 | var_t m_var_type; | |
1497 | ||
1498 | private: | |
1499 | ||
1500 | std::string _name; | |
1501 | int column_pos; | |
1502 | value var_value; | |
1e59de90 | 1503 | int json_variable_idx; |
f67539c2 TL |
1504 | |
1505 | const int undefined_column_pos = -1; | |
1506 | const int column_alias = -2; | |
1507 | ||
1508 | public: | |
1e59de90 | 1509 | variable():m_var_type(var_t::NA), _name(""), column_pos(-1), json_variable_idx(-1) {} |
f67539c2 | 1510 | |
1e59de90 | 1511 | explicit variable(int64_t i) : m_var_type(var_t::COLUMN_VALUE), column_pos(-1), var_value(i), json_variable_idx(-1) {} |
f67539c2 | 1512 | |
1e59de90 | 1513 | explicit variable(double d) : m_var_type(var_t::COLUMN_VALUE), _name("#"), column_pos(-1), var_value(d), json_variable_idx(-1) {} |
f67539c2 | 1514 | |
1e59de90 | 1515 | explicit variable(int i) : m_var_type(var_t::COLUMN_VALUE), column_pos(-1), var_value(i), json_variable_idx(-1) {} |
f67539c2 | 1516 | |
1e59de90 TL |
1517 | explicit variable(const std::string& n) : m_var_type(var_t::VARIABLE_NAME), _name(n), column_pos(-1), json_variable_idx(-1) {} |
1518 | ||
1519 | explicit variable(const std::string& n, var_t tp, size_t json_idx) : m_var_type(var_t::NA) | |
1520 | {//only upon JSON use case | |
1521 | if(tp == variable::var_t::JSON_VARIABLE) | |
1522 | { | |
1523 | m_var_type = variable::var_t::JSON_VARIABLE; | |
1524 | json_variable_idx = static_cast<int>(json_idx); | |
1525 | _name = n;//"#"; debug | |
1526 | } | |
1527 | } | |
f67539c2 TL |
1528 | |
1529 | variable(const std::string& n, var_t tp) : m_var_type(var_t::NA) | |
1530 | { | |
1531 | if(tp == variable::var_t::POS) | |
1532 | { | |
1533 | _name = n; | |
1534 | m_var_type = tp; | |
1535 | int pos = atoi( n.c_str() + 1 ); //TODO >0 < (schema definition , semantic analysis) | |
1536 | column_pos = pos -1;// _1 is the first column ( zero position ) | |
1537 | } | |
1e59de90 | 1538 | else if (tp == variable::var_t::COLUMN_VALUE) |
f67539c2 TL |
1539 | { |
1540 | _name = "#"; | |
1541 | m_var_type = tp; | |
1542 | column_pos = -1; | |
1543 | var_value = n.c_str(); | |
1544 | } | |
1545 | else if (tp ==variable::var_t::STAR_OPERATION) | |
1546 | { | |
1547 | _name = "#"; | |
1548 | m_var_type = tp; | |
1549 | column_pos = -1; | |
1550 | } | |
1551 | } | |
1552 | ||
20effc67 TL |
1553 | explicit variable(s3select_reserved_word::reserve_word_en_t reserve_word) |
1554 | { | |
1555 | if (reserve_word == s3select_reserved_word::reserve_word_en_t::S3S_NULL) | |
1556 | { | |
1e59de90 | 1557 | m_var_type = variable::var_t::COLUMN_VALUE; |
20effc67 TL |
1558 | column_pos = undefined_column_pos; |
1559 | var_value.type = value::value_En_t::S3NULL;//TODO use set_null | |
1560 | } | |
1561 | else if (reserve_word == s3select_reserved_word::reserve_word_en_t::S3S_NAN) | |
1562 | { | |
1e59de90 | 1563 | m_var_type = variable::var_t::COLUMN_VALUE; |
20effc67 TL |
1564 | column_pos = undefined_column_pos; |
1565 | var_value.set_nan(); | |
1566 | } | |
1567 | else if (reserve_word == s3select_reserved_word::reserve_word_en_t::S3S_TRUE) | |
1568 | { | |
1e59de90 | 1569 | m_var_type = variable::var_t::COLUMN_VALUE; |
20effc67 TL |
1570 | column_pos = -1; |
1571 | var_value.set_true(); | |
1572 | } | |
1573 | else if (reserve_word == s3select_reserved_word::reserve_word_en_t::S3S_FALSE) | |
1574 | { | |
1e59de90 | 1575 | m_var_type = variable::var_t::COLUMN_VALUE; |
20effc67 TL |
1576 | column_pos = -1; |
1577 | var_value.set_false(); | |
1578 | } | |
1579 | else | |
1580 | { | |
1581 | _name = "#"; | |
1582 | m_var_type = var_t::NA; | |
1583 | column_pos = undefined_column_pos; | |
1584 | } | |
1585 | } | |
1586 | ||
f67539c2 TL |
1587 | void operator=(value& v) |
1588 | { | |
1589 | var_value = v; | |
1590 | } | |
1591 | ||
1592 | void set_value(const char* s) | |
1593 | { | |
1594 | var_value = s; | |
1595 | } | |
1596 | ||
1597 | void set_value(double d) | |
1598 | { | |
1599 | var_value = d; | |
1600 | } | |
1601 | ||
1602 | void set_value(int64_t i) | |
1603 | { | |
1604 | var_value = i; | |
1605 | } | |
1606 | ||
20effc67 | 1607 | void set_value(timestamp_t* p) |
f67539c2 TL |
1608 | { |
1609 | var_value = p; | |
1610 | } | |
1611 | ||
20effc67 TL |
1612 | void set_value(bool b) |
1613 | { | |
1e59de90 | 1614 | var_value = b; |
20effc67 TL |
1615 | var_value.type = value::value_En_t::BOOL; |
1616 | } | |
1617 | ||
1618 | void set_null() | |
1619 | { | |
1620 | var_value.setnull(); | |
1621 | } | |
1622 | ||
f67539c2 TL |
1623 | virtual ~variable() {} |
1624 | ||
20effc67 | 1625 | virtual bool is_column() const //is reference to column. |
f67539c2 | 1626 | { |
1e59de90 TL |
1627 | if(m_var_type == var_t::VARIABLE_NAME || m_var_type == var_t::POS || m_var_type == var_t::STAR_OPERATION) |
1628 | { | |
1629 | return true; | |
1630 | } | |
1631 | return false; | |
1632 | } | |
1633 | ||
1634 | virtual bool is_star_operation() const | |
1635 | { | |
1636 | if(m_var_type == var_t::STAR_OPERATION) | |
f67539c2 TL |
1637 | { |
1638 | return true; | |
1639 | } | |
1640 | return false; | |
1641 | } | |
1642 | ||
1643 | value& get_value() | |
1644 | { | |
1645 | return var_value; //TODO is it correct | |
1646 | } | |
20effc67 TL |
1647 | |
1648 | std::string get_name() | |
1649 | { | |
1650 | return _name; | |
1651 | } | |
1652 | ||
1653 | int get_column_pos() | |
1654 | { | |
1655 | return column_pos; | |
1656 | } | |
1657 | ||
f67539c2 TL |
1658 | virtual value::value_En_t get_value_type() |
1659 | { | |
1660 | return var_value.type; | |
1661 | } | |
1662 | ||
1e59de90 TL |
1663 | value& star_operation() |
1664 | {//purpose return content of all columns in a input stream | |
1665 | if(is_json_statement()) | |
1666 | return json_star_operation(); | |
f67539c2 | 1667 | |
1e59de90 TL |
1668 | var_value.multiple_values.clear(); |
1669 | for(int i=0; i<m_scratch->get_num_of_columns(); i++) | |
20effc67 | 1670 | { |
1e59de90 | 1671 | var_value.multiple_values.push_value( m_scratch->get_column_value(i) ); |
20effc67 | 1672 | } |
1e59de90 TL |
1673 | var_value.type = value::value_En_t::MULTIPLE_VALUES; |
1674 | return var_value; | |
1675 | } | |
20effc67 | 1676 | |
1e59de90 TL |
1677 | value& json_star_operation() |
1678 | {//purpose: per JSON star-operation it needs to get column-name(full-path) and its value | |
f67539c2 | 1679 | |
1e59de90 TL |
1680 | var_value.multiple_values.clear(); |
1681 | for(auto& key_value : *m_scratch->get_star_operation_cont()) | |
1682 | { | |
1683 | key_value.second.set_json_key_path(key_value.first); | |
1684 | var_value.multiple_values.push_value(&key_value.second); | |
f67539c2 TL |
1685 | } |
1686 | ||
20effc67 | 1687 | var_value.type = value::value_En_t::MULTIPLE_VALUES; |
f67539c2 | 1688 | |
f67539c2 TL |
1689 | return var_value; |
1690 | } | |
1691 | ||
20effc67 | 1692 | virtual value& eval_internal() |
f67539c2 | 1693 | { |
1e59de90 | 1694 | if (m_var_type == var_t::COLUMN_VALUE) |
f67539c2 TL |
1695 | { |
1696 | return var_value; // a literal,could be deciml / float / string | |
1697 | } | |
1698 | else if(m_var_type == var_t::STAR_OPERATION) | |
1699 | { | |
1700 | return star_operation(); | |
1701 | } | |
1e59de90 TL |
1702 | else if(m_var_type == var_t::JSON_VARIABLE && json_variable_idx >= 0) |
1703 | { | |
1704 | column_pos = json_variable_idx; //TODO handle column alias | |
1705 | } | |
f67539c2 TL |
1706 | else if (column_pos == undefined_column_pos) |
1707 | { | |
1708 | //done once , for the first time | |
1709 | column_pos = m_scratch->get_column_pos(_name.c_str()); | |
1710 | ||
1711 | if(column_pos>=0 && m_aliases->search_alias(_name.c_str())) | |
1712 | { | |
1713 | throw base_s3select_exception(std::string("multiple definition of column {") + _name + "} as schema-column and alias", base_s3select_exception::s3select_exp_en_t::FATAL); | |
1714 | } | |
1715 | ||
1716 | ||
1717 | if (column_pos == undefined_column_pos) | |
1718 | { | |
1719 | //not belong to schema , should exist in aliases | |
1720 | m_projection_alias = m_aliases->search_alias(_name.c_str()); | |
1721 | ||
1722 | //not enter this scope again | |
1723 | column_pos = column_alias; | |
1724 | if(m_projection_alias == 0) | |
1725 | { | |
1726 | throw base_s3select_exception(std::string("alias {")+_name+std::string("} or column not exist in schema"), base_s3select_exception::s3select_exp_en_t::FATAL); | |
1727 | } | |
1728 | } | |
1729 | ||
1730 | } | |
1731 | ||
1732 | if (m_projection_alias) | |
1733 | { | |
1734 | if (m_projection_alias->get_eval_call_depth()>2) | |
1735 | { | |
1736 | throw base_s3select_exception("number of calls exceed maximum size, probably a cyclic reference to alias", base_s3select_exception::s3select_exp_en_t::FATAL); | |
1737 | } | |
1738 | ||
1739 | if (m_projection_alias->is_result_cached() == false) | |
1740 | { | |
1741 | var_value = m_projection_alias->eval(); | |
1742 | m_projection_alias->set_result_cache(var_value); | |
1743 | } | |
1744 | else | |
1745 | { | |
1746 | var_value = m_projection_alias->get_result_cache(); | |
1747 | } | |
1748 | ||
1749 | m_projection_alias->dec_call_stack_depth(); | |
1750 | } | |
1751 | else | |
1752 | { | |
20effc67 TL |
1753 | m_scratch->get_column_value(column_pos,var_value); |
1754 | //in the case of successive column-delimiter {1,some_data,,3}=> third column is NULL | |
1755 | if (var_value.is_string() && (var_value.str()== 0 || (var_value.str() && *var_value.str()==0))) | |
1e59de90 | 1756 | var_value.setnull();//TODO is it correct for Parquet |
f67539c2 TL |
1757 | } |
1758 | ||
1759 | return var_value; | |
1760 | } | |
1761 | ||
1762 | virtual std::string print(int ident) | |
1763 | { | |
1764 | //std::string out = std::string(ident,' ') + std::string("var:") + std::to_string(var_value.__val.num); | |
1765 | //return out; | |
1766 | return std::string("#");//TBD | |
1767 | } | |
1768 | ||
1769 | virtual bool semantic() | |
1770 | { | |
1771 | return false; | |
1772 | } | |
1773 | ||
1774 | }; | |
1775 | ||
1776 | class arithmetic_operand : public base_statement | |
1777 | { | |
1778 | ||
1779 | public: | |
1780 | ||
1781 | enum class cmp_t {NA, EQ, LE, LT, GT, GE, NE} ; | |
1782 | ||
1783 | private: | |
1784 | base_statement* l; | |
1785 | base_statement* r; | |
1786 | ||
1787 | cmp_t _cmp; | |
1788 | value var_value; | |
20effc67 TL |
1789 | bool negation_result;//false: dont negate ; upon NOT operator(unary) its true |
1790 | ||
f67539c2 TL |
1791 | public: |
1792 | ||
1793 | virtual bool semantic() | |
1794 | { | |
1795 | return true; | |
1796 | } | |
1797 | ||
20effc67 | 1798 | base_statement* left() const override |
f67539c2 TL |
1799 | { |
1800 | return l; | |
1801 | } | |
20effc67 | 1802 | base_statement* right() const override |
f67539c2 TL |
1803 | { |
1804 | return r; | |
1805 | } | |
1806 | ||
1807 | virtual std::string print(int ident) | |
1808 | { | |
1809 | //std::string out = std::string(ident,' ') + "compare:" += std::to_string(_cmp) + "\n" + l->print(ident-5) +r->print(ident+5); | |
1810 | //return out; | |
1811 | return std::string("#");//TBD | |
1812 | } | |
1813 | ||
20effc67 | 1814 | virtual value& eval_internal() |
f67539c2 | 1815 | { |
20effc67 TL |
1816 | if ((l->eval()).is_null()) { |
1817 | var_value.setnull(); | |
1818 | return var_value; | |
1819 | } else if((r->eval()).is_null()) { | |
1820 | var_value.setnull(); | |
1821 | return var_value; | |
1822 | } | |
1823 | ||
f67539c2 TL |
1824 | switch (_cmp) |
1825 | { | |
1826 | case cmp_t::EQ: | |
20effc67 | 1827 | return var_value = bool( (l->eval() == r->eval()) ^ negation_result ); |
f67539c2 TL |
1828 | break; |
1829 | ||
1830 | case cmp_t::LE: | |
20effc67 | 1831 | return var_value = bool( (l->eval() <= r->eval()) ^ negation_result ); |
f67539c2 TL |
1832 | break; |
1833 | ||
1834 | case cmp_t::GE: | |
20effc67 | 1835 | return var_value = bool( (l->eval() >= r->eval()) ^ negation_result ); |
f67539c2 TL |
1836 | break; |
1837 | ||
1838 | case cmp_t::NE: | |
20effc67 | 1839 | return var_value = bool( (l->eval() != r->eval()) ^ negation_result ); |
f67539c2 TL |
1840 | break; |
1841 | ||
1842 | case cmp_t::GT: | |
20effc67 | 1843 | return var_value = bool( (l->eval() > r->eval()) ^ negation_result ); |
f67539c2 TL |
1844 | break; |
1845 | ||
1846 | case cmp_t::LT: | |
20effc67 | 1847 | return var_value = bool( (l->eval() < r->eval()) ^ negation_result ); |
f67539c2 TL |
1848 | break; |
1849 | ||
1850 | default: | |
1851 | throw base_s3select_exception("internal error"); | |
1852 | break; | |
1853 | } | |
1854 | } | |
1855 | ||
20effc67 TL |
1856 | arithmetic_operand(base_statement* _l, cmp_t c, base_statement* _r):l(_l), r(_r), _cmp(c),negation_result(false) {} |
1857 | ||
1858 | explicit arithmetic_operand(base_statement* p)//NOT operator | |
1859 | { | |
1860 | l = dynamic_cast<arithmetic_operand*>(p)->l; | |
1861 | r = dynamic_cast<arithmetic_operand*>(p)->r; | |
1862 | _cmp = dynamic_cast<arithmetic_operand*>(p)->_cmp; | |
1863 | // not( not ( logical expression )) == ( logical expression ); there is no limitation for number of NOT. | |
1864 | negation_result = ! dynamic_cast<arithmetic_operand*>(p)->negation_result; | |
1865 | } | |
f67539c2 TL |
1866 | |
1867 | virtual ~arithmetic_operand() {} | |
1868 | }; | |
1869 | ||
1870 | class logical_operand : public base_statement | |
1871 | { | |
1872 | ||
1873 | public: | |
1874 | ||
1875 | enum class oplog_t {AND, OR, NA}; | |
1876 | ||
1877 | private: | |
1878 | base_statement* l; | |
1879 | base_statement* r; | |
1880 | ||
1881 | oplog_t _oplog; | |
1882 | value var_value; | |
20effc67 | 1883 | bool negation_result;//false: dont negate ; upon NOT operator(unary) its true |
f67539c2 TL |
1884 | |
1885 | public: | |
1886 | ||
20effc67 | 1887 | base_statement* left() const override |
f67539c2 TL |
1888 | { |
1889 | return l; | |
1890 | } | |
20effc67 | 1891 | base_statement* right() const override |
f67539c2 TL |
1892 | { |
1893 | return r; | |
1894 | } | |
1895 | ||
1896 | virtual bool semantic() | |
1897 | { | |
1898 | return true; | |
1899 | } | |
1900 | ||
20effc67 TL |
1901 | logical_operand(base_statement* _l, oplog_t _o, base_statement* _r):l(_l), r(_r), _oplog(_o),negation_result(false) {} |
1902 | ||
1903 | explicit logical_operand(base_statement * p)//NOT operator | |
1904 | { | |
1905 | l = dynamic_cast<logical_operand*>(p)->l; | |
1906 | r = dynamic_cast<logical_operand*>(p)->r; | |
1907 | _oplog = dynamic_cast<logical_operand*>(p)->_oplog; | |
1908 | // not( not ( logical expression )) == ( logical expression ); there is no limitation for number of NOT. | |
1909 | negation_result = ! dynamic_cast<logical_operand*>(p)->negation_result; | |
1910 | } | |
f67539c2 TL |
1911 | |
1912 | virtual ~logical_operand() {} | |
1913 | ||
1914 | virtual std::string print(int ident) | |
1915 | { | |
1916 | //std::string out = std::string(ident, ' ') + "logical_operand:" += std::to_string(_oplog) + "\n" + l->print(ident - 5) + r->print(ident + 5); | |
1917 | //return out; | |
1918 | return std::string("#");//TBD | |
1919 | } | |
20effc67 | 1920 | virtual value& eval_internal() |
f67539c2 | 1921 | { |
20effc67 TL |
1922 | if (!l || !r) |
1923 | { | |
1924 | throw base_s3select_exception("missing operand for logical ", base_s3select_exception::s3select_exp_en_t::FATAL); | |
1925 | } | |
1926 | value a = l->eval(); | |
f67539c2 TL |
1927 | if (_oplog == oplog_t::AND) |
1928 | { | |
20effc67 TL |
1929 | if (!a.is_null() && a.i64() == false) { |
1930 | bool res = false ^ negation_result; | |
1931 | return var_value = res; | |
1932 | } | |
1933 | value b = r->eval(); | |
1934 | if(!b.is_null() && b.i64() == false) { | |
1935 | bool res = false ^ negation_result; | |
1936 | return var_value = res; | |
1937 | } else { | |
1938 | if (a.is_null() || b.is_null()) { | |
1939 | var_value.setnull(); | |
1940 | return var_value; | |
1941 | } else { | |
1942 | bool res = true ^ negation_result ; | |
1943 | return var_value =res; | |
1944 | } | |
1945 | } | |
f67539c2 TL |
1946 | } |
1947 | else | |
1948 | { | |
20effc67 TL |
1949 | if (a.is_true()) { |
1950 | bool res = true ^ negation_result; | |
1951 | return var_value = res; | |
1952 | } | |
1953 | value b = r->eval(); | |
1954 | if(b.is_true() == true) { | |
1955 | bool res = true ^ negation_result; | |
1956 | return var_value = res; | |
1957 | } else { | |
1958 | if (a.is_null() || b.is_null()) { | |
1959 | var_value.setnull(); | |
1960 | return var_value; | |
1961 | } else { | |
1962 | bool res = false ^ negation_result ; | |
1963 | return var_value =res; | |
1964 | } | |
f67539c2 | 1965 | } |
f67539c2 TL |
1966 | } |
1967 | } | |
f67539c2 TL |
1968 | }; |
1969 | ||
1970 | class mulldiv_operation : public base_statement | |
1971 | { | |
1972 | ||
1973 | public: | |
1974 | ||
20effc67 | 1975 | enum class muldiv_t {NA, MULL, DIV, POW, MOD} ; |
f67539c2 TL |
1976 | |
1977 | private: | |
1978 | base_statement* l; | |
1979 | base_statement* r; | |
1980 | ||
1981 | muldiv_t _mulldiv; | |
1982 | value var_value; | |
20effc67 | 1983 | value tmp_value; |
f67539c2 TL |
1984 | |
1985 | public: | |
1986 | ||
20effc67 | 1987 | base_statement* left() const override |
f67539c2 TL |
1988 | { |
1989 | return l; | |
1990 | } | |
20effc67 | 1991 | base_statement* right() const override |
f67539c2 TL |
1992 | { |
1993 | return r; | |
1994 | } | |
1995 | ||
1996 | virtual bool semantic() | |
1997 | { | |
1998 | return true; | |
1999 | } | |
2000 | ||
2001 | virtual std::string print(int ident) | |
2002 | { | |
2003 | //std::string out = std::string(ident, ' ') + "mulldiv_operation:" += std::to_string(_mulldiv) + "\n" + l->print(ident - 5) + r->print(ident + 5); | |
2004 | //return out; | |
2005 | return std::string("#");//TBD | |
2006 | } | |
2007 | ||
20effc67 | 2008 | virtual value& eval_internal() |
f67539c2 TL |
2009 | { |
2010 | switch (_mulldiv) | |
2011 | { | |
2012 | case muldiv_t::MULL: | |
20effc67 TL |
2013 | tmp_value = l->eval(); |
2014 | return var_value = tmp_value * r->eval(); | |
f67539c2 TL |
2015 | break; |
2016 | ||
2017 | case muldiv_t::DIV: | |
20effc67 TL |
2018 | tmp_value = l->eval(); |
2019 | return var_value = tmp_value / r->eval(); | |
f67539c2 TL |
2020 | break; |
2021 | ||
2022 | case muldiv_t::POW: | |
20effc67 TL |
2023 | tmp_value = l->eval(); |
2024 | return var_value = tmp_value ^ r->eval(); | |
2025 | break; | |
2026 | ||
2027 | case muldiv_t::MOD: | |
2028 | tmp_value = l->eval(); | |
2029 | return var_value = tmp_value % r->eval(); | |
f67539c2 TL |
2030 | break; |
2031 | ||
2032 | default: | |
2033 | throw base_s3select_exception("internal error"); | |
2034 | break; | |
2035 | } | |
2036 | } | |
2037 | ||
2038 | mulldiv_operation(base_statement* _l, muldiv_t c, base_statement* _r):l(_l), r(_r), _mulldiv(c) {} | |
2039 | ||
2040 | virtual ~mulldiv_operation() {} | |
2041 | }; | |
2042 | ||
2043 | class addsub_operation : public base_statement | |
2044 | { | |
2045 | ||
2046 | public: | |
2047 | ||
2048 | enum class addsub_op_t {ADD, SUB, NA}; | |
2049 | ||
2050 | private: | |
2051 | base_statement* l; | |
2052 | base_statement* r; | |
2053 | ||
2054 | addsub_op_t _op; | |
2055 | value var_value; | |
20effc67 | 2056 | value tmp_value; |
f67539c2 TL |
2057 | |
2058 | public: | |
2059 | ||
20effc67 | 2060 | base_statement* left() const override |
f67539c2 TL |
2061 | { |
2062 | return l; | |
2063 | } | |
20effc67 | 2064 | base_statement* right() const override |
f67539c2 TL |
2065 | { |
2066 | return r; | |
2067 | } | |
2068 | ||
2069 | virtual bool semantic() | |
2070 | { | |
2071 | return true; | |
2072 | } | |
2073 | ||
2074 | addsub_operation(base_statement* _l, addsub_op_t _o, base_statement* _r):l(_l), r(_r), _op(_o) {} | |
2075 | ||
2076 | virtual ~addsub_operation() {} | |
2077 | ||
2078 | virtual std::string print(int ident) | |
2079 | { | |
2080 | //std::string out = std::string(ident, ' ') + "addsub_operation:" += std::to_string(_op) + "\n" + l->print(ident - 5) + r->print(ident + 5); | |
2081 | return std::string("#");//TBD | |
2082 | } | |
2083 | ||
20effc67 | 2084 | virtual value& eval_internal() |
f67539c2 TL |
2085 | { |
2086 | if (_op == addsub_op_t::NA) // -num , +num , unary-operation on number | |
2087 | { | |
2088 | if (l) | |
2089 | { | |
2090 | return var_value = l->eval(); | |
2091 | } | |
2092 | else if (r) | |
2093 | { | |
2094 | return var_value = r->eval(); | |
2095 | } | |
2096 | } | |
2097 | else if (_op == addsub_op_t::ADD) | |
20effc67 TL |
2098 | {tmp_value=l->eval(); |
2099 | return var_value = (tmp_value + r->eval()); | |
f67539c2 TL |
2100 | } |
2101 | else | |
20effc67 TL |
2102 | {tmp_value=l->eval(); |
2103 | return var_value = (tmp_value - r->eval()); | |
f67539c2 TL |
2104 | } |
2105 | ||
2106 | return var_value; | |
2107 | } | |
2108 | }; | |
2109 | ||
20effc67 TL |
2110 | class negate_function_operation : public base_statement |
2111 | { | |
2112 | //purpose: some functions (between,like,in) are participating in where-clause as predicates; thus NOT unary-operator may operate on them. | |
2113 | ||
2114 | private: | |
2115 | ||
2116 | base_statement* function_to_negate; | |
2117 | value res; | |
2118 | ||
2119 | public: | |
2120 | ||
2121 | explicit negate_function_operation(base_statement *f):function_to_negate(f){} | |
2122 | ||
2123 | virtual std::string print(int ident) | |
2124 | { | |
2125 | return std::string("#");//TBD | |
2126 | } | |
2127 | ||
2128 | virtual bool semantic() | |
2129 | { | |
2130 | return true; | |
2131 | } | |
2132 | ||
2133 | base_statement* left() const override | |
2134 | { | |
2135 | return function_to_negate; | |
2136 | } | |
2137 | ||
2138 | virtual value& eval_internal() | |
2139 | { | |
2140 | res = function_to_negate->eval(); | |
2141 | ||
2142 | if (res.is_number() || res.is_bool())//TODO is integer type | |
2143 | { | |
2144 | if (res.is_true()) | |
2145 | { | |
2146 | res = (bool)0; | |
2147 | } | |
2148 | else | |
2149 | { | |
2150 | res = (bool)1; | |
2151 | } | |
2152 | } | |
2153 | ||
2154 | return res; | |
2155 | } | |
2156 | ||
2157 | }; | |
2158 | ||
f67539c2 TL |
2159 | class base_function |
2160 | { | |
2161 | ||
2162 | protected: | |
2163 | bool aggregate; | |
2164 | ||
2165 | public: | |
2166 | //TODO add semantic to base-function , it operate once on function creation | |
2167 | // validate semantic on creation instead on run-time | |
20effc67 | 2168 | virtual bool operator()(bs_stmt_vec_t* args, variable* result) = 0; |
f67539c2 | 2169 | base_function() : aggregate(false) {} |
20effc67 | 2170 | bool is_aggregate() const |
f67539c2 TL |
2171 | { |
2172 | return aggregate == true; | |
2173 | } | |
2174 | virtual void get_aggregate_result(variable*) {} | |
2175 | ||
20effc67 TL |
2176 | virtual ~base_function() = default; |
2177 | ||
2178 | virtual void dtor() | |
2179 | {//release function-body implementation | |
2180 | this->~base_function(); | |
2181 | } | |
2182 | ||
f67539c2 TL |
2183 | }; |
2184 | ||
20effc67 TL |
2185 | class base_date_extract : public base_function |
2186 | { | |
2187 | protected: | |
2188 | value val_timestamp; | |
2189 | boost::posix_time::ptime new_ptime; | |
2190 | boost::posix_time::time_duration td; | |
2191 | bool flag; | |
2192 | ||
2193 | public: | |
2194 | void param_validation(bs_stmt_vec_t*& args) | |
2195 | { | |
2196 | auto iter = args->begin(); | |
2197 | int args_size = args->size(); | |
2198 | ||
2199 | if (args_size < 1) | |
2200 | { | |
2201 | throw base_s3select_exception("to_timestamp should have 2 parameters"); | |
2202 | } | |
2203 | ||
2204 | base_statement* ts = *iter; | |
2205 | val_timestamp = ts->eval(); | |
2206 | if(val_timestamp.is_timestamp()== false) | |
2207 | { | |
2208 | throw base_s3select_exception("second parameter is not timestamp"); | |
2209 | } | |
2210 | ||
2211 | std::tie(new_ptime, td, flag) = *val_timestamp.timestamp(); | |
2212 | } | |
2213 | ||
2214 | }; | |
2215 | ||
2216 | class base_date_diff : public base_function | |
2217 | { | |
2218 | protected: | |
2219 | boost::posix_time::ptime ptime1; | |
2220 | boost::posix_time::ptime ptime2; | |
2221 | ||
2222 | public: | |
2223 | void param_validation(bs_stmt_vec_t*& args) | |
2224 | { | |
2225 | auto iter = args->begin(); | |
2226 | int args_size = args->size(); | |
2227 | ||
2228 | if (args_size < 2) | |
2229 | { | |
2230 | throw base_s3select_exception("datediff need 3 parameters"); | |
2231 | } | |
2232 | ||
2233 | base_statement* dt1_param = *iter; | |
2234 | value val_ts1 = dt1_param->eval(); | |
2235 | ||
2236 | if (val_ts1.is_timestamp() == false) | |
2237 | { | |
2238 | throw base_s3select_exception("second parameter should be timestamp"); | |
2239 | } | |
2240 | ||
2241 | iter++; | |
2242 | base_statement* dt2_param = *iter; | |
2243 | value val_ts2 = dt2_param->eval(); | |
2244 | ||
2245 | if (val_ts2.is_timestamp() == false) | |
2246 | { | |
2247 | throw base_s3select_exception("third parameter should be timestamp"); | |
2248 | } | |
2249 | ||
2250 | boost::posix_time::ptime ts1_ptime; | |
2251 | boost::posix_time::time_duration ts1_td; | |
2252 | boost::posix_time::ptime ts2_ptime; | |
2253 | boost::posix_time::time_duration ts2_td; | |
2254 | ||
2255 | std::tie(ts1_ptime, ts1_td, std::ignore) = *val_ts1.timestamp(); | |
2256 | std::tie(ts2_ptime, ts2_td, std::ignore) = *val_ts2.timestamp(); | |
2257 | ||
2258 | ptime1 = ts1_ptime + boost::posix_time::hours(ts1_td.hours() * -1); | |
2259 | ptime1 += boost::posix_time::minutes(ts1_td.minutes() * -1); | |
2260 | ptime2 = ts2_ptime + boost::posix_time::hours(ts2_td.hours() * -1); | |
2261 | ptime2 += boost::posix_time::minutes(ts2_td.minutes() * -1); | |
2262 | } | |
2263 | ||
2264 | }; | |
2265 | ||
2266 | class base_date_add : public base_function | |
2267 | { | |
2268 | protected: | |
2269 | value val_quantity; | |
2270 | boost::posix_time::ptime new_ptime; | |
2271 | boost::posix_time::time_duration td; | |
2272 | bool flag; | |
2273 | timestamp_t new_tmstmp; | |
2274 | ||
2275 | public: | |
2276 | void param_validation(bs_stmt_vec_t*& args) | |
2277 | { | |
2278 | auto iter = args->begin(); | |
2279 | int args_size = args->size(); | |
2280 | ||
2281 | if (args_size < 2) | |
2282 | { | |
2283 | throw base_s3select_exception("add_to_timestamp should have 3 parameters"); | |
2284 | } | |
2285 | ||
2286 | base_statement* quan = *iter; | |
2287 | val_quantity = quan->eval(); | |
2288 | ||
2289 | if (val_quantity.is_number() == false) | |
2290 | { | |
2291 | throw base_s3select_exception("second parameter should be number"); //TODO what about double? | |
2292 | } | |
2293 | ||
2294 | iter++; | |
2295 | base_statement* ts = *iter; | |
2296 | value val_ts = ts->eval(); | |
2297 | ||
2298 | if(val_ts.is_timestamp() == false) | |
2299 | { | |
2300 | throw base_s3select_exception("third parameter should be time-stamp"); | |
2301 | } | |
2302 | ||
2303 | std::tie(new_ptime, td, flag) = *val_ts.timestamp(); | |
2304 | } | |
2305 | ||
2306 | }; | |
2307 | ||
2308 | class base_time_to_string | |
2309 | { | |
2310 | protected: | |
2311 | std::vector<std::string> months = { "January", "February", "March","April", | |
2312 | "May", "June", "July", "August", "September", | |
2313 | "October", "November", "December"}; | |
2314 | public: | |
2315 | virtual std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) = 0; | |
2316 | virtual ~base_time_to_string() = default; | |
2317 | }; | |
2318 | ||
2319 | class derive_yyyy : public base_time_to_string | |
2320 | { | |
2321 | public: | |
2322 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2323 | { | |
2324 | int64_t yr = new_ptime.date().year(); | |
2325 | return std::string(param - 4, '0') + std::to_string(yr); | |
2326 | } | |
2327 | } yyyy_to_string; | |
2328 | ||
2329 | class derive_yy : public base_time_to_string | |
2330 | { | |
2331 | public: | |
2332 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2333 | { | |
2334 | int64_t yr = new_ptime.date().year(); | |
2335 | return std::string(2 - std::to_string(yr%100).length(), '0') + std::to_string(yr%100); | |
2336 | } | |
2337 | } yy_to_string; | |
2338 | ||
2339 | class derive_y : public base_time_to_string | |
2340 | { | |
2341 | public: | |
2342 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2343 | { | |
2344 | int64_t yr = new_ptime.date().year(); | |
2345 | return std::to_string(yr); | |
2346 | } | |
2347 | } y_to_string; | |
2348 | ||
2349 | class derive_mmmmm_month : public base_time_to_string | |
2350 | { | |
2351 | public: | |
2352 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2353 | { | |
2354 | int64_t mnth = new_ptime.date().month(); | |
2355 | return (months[mnth - 1]).substr(0, 1); | |
2356 | } | |
2357 | } mmmmm_month_to_string; | |
2358 | ||
2359 | class derive_mmmm_month : public base_time_to_string | |
2360 | { | |
2361 | public: | |
2362 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2363 | { | |
2364 | int64_t mnth = new_ptime.date().month(); | |
2365 | return months[mnth - 1]; | |
2366 | } | |
2367 | } mmmm_month_to_string; | |
2368 | ||
2369 | class derive_mmm_month : public base_time_to_string | |
2370 | { | |
2371 | public: | |
2372 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2373 | { | |
2374 | int64_t mnth = new_ptime.date().month(); | |
2375 | return (months[mnth - 1]).substr(0, 3); | |
2376 | } | |
2377 | } mmm_month_to_string; | |
2378 | ||
2379 | class derive_mm_month : public base_time_to_string | |
2380 | { | |
2381 | public: | |
2382 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2383 | { | |
2384 | int64_t mnth = new_ptime.date().month(); | |
2385 | std::string mnth_str = std::to_string(mnth); | |
2386 | return std::string(2 - mnth_str.length(), '0') + mnth_str; | |
2387 | } | |
2388 | } mm_month_to_string; | |
2389 | ||
2390 | class derive_m_month : public base_time_to_string | |
2391 | { | |
2392 | public: | |
2393 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2394 | { | |
2395 | int64_t mnth = new_ptime.date().month(); | |
2396 | return std::to_string(mnth); | |
2397 | } | |
2398 | } m_month_to_string; | |
2399 | ||
2400 | class derive_dd : public base_time_to_string | |
2401 | { | |
2402 | public: | |
2403 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2404 | { | |
2405 | std::string day = std::to_string(new_ptime.date().day()); | |
2406 | return std::string(2 - day.length(), '0') + day; | |
2407 | } | |
2408 | } dd_to_string; | |
2409 | ||
2410 | class derive_d : public base_time_to_string | |
2411 | { | |
2412 | public: | |
2413 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2414 | { | |
2415 | std::string day = std::to_string(new_ptime.date().day()); | |
2416 | return day; | |
2417 | } | |
2418 | } d_to_string; | |
2419 | ||
2420 | class derive_a : public base_time_to_string | |
2421 | { | |
2422 | public: | |
2423 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2424 | { | |
2425 | int64_t hr = new_ptime.time_of_day().hours(); | |
2426 | std::string meridiem = (hr < 12 ? "AM" : "PM"); | |
2427 | return meridiem; | |
2428 | } | |
2429 | } a_to_string; | |
2430 | ||
2431 | class derive_hh : public base_time_to_string | |
2432 | { | |
2433 | public: | |
2434 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2435 | { | |
2436 | int64_t hr = new_ptime.time_of_day().hours(); | |
2437 | std::string hr_12 = std::to_string(hr%12 == 0 ? 12 : hr%12); | |
2438 | return std::string(2 - hr_12.length(), '0') + hr_12; | |
2439 | } | |
2440 | } hh_to_string; | |
2441 | ||
2442 | class derive_h : public base_time_to_string | |
2443 | { | |
2444 | public: | |
2445 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2446 | { | |
2447 | int64_t hr = new_ptime.time_of_day().hours(); | |
2448 | std::string hr_12 = std::to_string(hr%12 == 0 ? 12 : hr%12); | |
2449 | return hr_12; | |
2450 | } | |
2451 | } h_to_string; | |
2452 | ||
2453 | class derive_h2 : public base_time_to_string | |
2454 | { | |
2455 | public: | |
2456 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2457 | { | |
2458 | int64_t hr = new_ptime.time_of_day().hours(); | |
2459 | std::string hr_24 = std::to_string(hr); | |
2460 | return std::string(2 - hr_24.length(), '0') + hr_24; | |
2461 | } | |
2462 | } h2_to_string; | |
2463 | ||
2464 | class derive_h1 : public base_time_to_string | |
2465 | { | |
2466 | public: | |
2467 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2468 | { | |
2469 | int64_t hr = new_ptime.time_of_day().hours(); | |
2470 | return std::to_string(hr); | |
2471 | } | |
2472 | } h1_to_string; | |
2473 | ||
2474 | class derive_mm : public base_time_to_string | |
2475 | { | |
2476 | public: | |
2477 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2478 | { | |
2479 | std::string mint = std::to_string(new_ptime.time_of_day().minutes()); | |
2480 | return std::string(2 - mint.length(), '0') + mint; | |
2481 | } | |
2482 | } mm_to_string; | |
2483 | ||
2484 | class derive_m : public base_time_to_string | |
2485 | { | |
2486 | public: | |
2487 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2488 | { | |
2489 | std::string mint = std::to_string(new_ptime.time_of_day().minutes()); | |
2490 | return mint; | |
2491 | } | |
2492 | } m_to_string; | |
2493 | ||
2494 | class derive_ss : public base_time_to_string | |
2495 | { | |
2496 | public: | |
2497 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2498 | { | |
2499 | std::string sec = std::to_string(new_ptime.time_of_day().seconds()); | |
2500 | return std::string(2 - sec.length(), '0') + sec; | |
2501 | } | |
2502 | } ss_to_string; | |
2503 | ||
2504 | class derive_s : public base_time_to_string | |
2505 | { | |
2506 | public: | |
2507 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2508 | { | |
2509 | std::string sec = std::to_string(new_ptime.time_of_day().seconds()); | |
2510 | return sec; | |
2511 | } | |
2512 | } s_to_string; | |
2513 | ||
2514 | class derive_frac_sec : public base_time_to_string | |
2515 | { | |
2516 | public: | |
2517 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2518 | { | |
2519 | std::string frac_seconds = std::to_string(new_ptime.time_of_day().fractional_seconds()); | |
2520 | #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG | |
2521 | frac_seconds = std::string(9 - frac_seconds.length(), '0') + frac_seconds; | |
2522 | #else | |
2523 | frac_seconds = std::string(6 - frac_seconds.length(), '0') + frac_seconds; | |
2524 | #endif | |
2525 | if (param >= frac_seconds.length()) | |
2526 | { | |
2527 | return frac_seconds + std::string(param - frac_seconds.length(), '0'); | |
2528 | } | |
2529 | else | |
2530 | { | |
2531 | return frac_seconds.substr(0, param); | |
2532 | } | |
2533 | } | |
2534 | } frac_sec_to_string; | |
2535 | ||
2536 | class derive_n : public base_time_to_string | |
2537 | { | |
2538 | public: | |
2539 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2540 | { | |
2541 | int frac_seconds = new_ptime.time_of_day().fractional_seconds(); | |
2542 | ||
2543 | if(frac_seconds == 0) | |
2544 | return std::to_string(frac_seconds); | |
2545 | else | |
2546 | { | |
2547 | #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG | |
2548 | return std::to_string(frac_seconds); | |
2549 | #else | |
2550 | return std::to_string(frac_seconds) + std::string(3, '0'); | |
2551 | #endif | |
2552 | } | |
2553 | } | |
2554 | } n_to_string; | |
2555 | ||
2556 | class derive_x1 : public base_time_to_string | |
2557 | { | |
2558 | public: | |
2559 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2560 | { | |
2561 | int tz_hour = td.hours(); | |
2562 | int tz_minute = td.minutes(); | |
2563 | if (tz_hour == 0 && tz_minute == 0) | |
2564 | { | |
2565 | return "Z"; | |
2566 | } | |
2567 | else if (tz_minute == 0) | |
2568 | { | |
2569 | std::string tz_hr = std::to_string(std::abs(tz_hour)); | |
2570 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr; | |
2571 | } | |
2572 | else | |
2573 | { | |
2574 | std::string tz_hr = std::to_string(std::abs(tz_hour)); | |
2575 | std::string tz_mn = std::to_string(std::abs(tz_minute)); | |
2576 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr + std::string(2 - tz_mn.length(), '0') + tz_mn; | |
2577 | } | |
2578 | } | |
2579 | } x1_to_string; | |
2580 | ||
2581 | class derive_x2 : public base_time_to_string | |
2582 | { | |
2583 | public: | |
2584 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2585 | { | |
2586 | int tz_hour = td.hours(); | |
2587 | int tz_minute = td.minutes(); | |
2588 | if (tz_hour == 0 && tz_minute == 0) | |
2589 | { | |
2590 | return "Z"; | |
2591 | } | |
2592 | else | |
2593 | { | |
2594 | std::string tz_hr = std::to_string(std::abs(tz_hour)); | |
2595 | std::string tz_mn = std::to_string(std::abs(tz_minute)); | |
2596 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr + std::string(2 - tz_mn.length(), '0') + tz_mn; | |
2597 | } | |
2598 | } | |
2599 | } x2_to_string; | |
2600 | ||
2601 | class derive_x3 : public base_time_to_string | |
2602 | { | |
2603 | public: | |
2604 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2605 | { | |
2606 | int tz_hour = td.hours(); | |
2607 | int tz_minute = td.minutes(); | |
2608 | if (tz_hour == 0 && tz_minute == 0) | |
2609 | { | |
2610 | return "Z"; | |
2611 | } | |
2612 | else | |
2613 | { | |
2614 | std::string tz_hr = std::to_string(std::abs(tz_hour)); | |
2615 | std::string tz_mn = std::to_string(std::abs(tz_minute)); | |
2616 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr + ":" + std::string(2 - tz_mn.length(), '0') + tz_mn; | |
2617 | } | |
2618 | } | |
2619 | } x3_to_string; | |
2620 | ||
2621 | class derive_x : public base_time_to_string | |
2622 | { | |
2623 | public: | |
2624 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2625 | { | |
2626 | int tz_minute = td.minutes(); | |
2627 | std::string tz_hr = std::to_string(std::abs(td.hours())); | |
2628 | if (tz_minute == 0) | |
2629 | { | |
2630 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr; | |
2631 | } | |
2632 | else | |
2633 | { | |
2634 | std::string tz_mn = std::to_string(std::abs(tz_minute)); | |
2635 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr + std::string(2 - tz_mn.length(), '0') + tz_mn; | |
2636 | } | |
2637 | } | |
2638 | } x_to_string; | |
2639 | ||
2640 | class derive_xx : public base_time_to_string | |
2641 | { | |
2642 | public: | |
2643 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2644 | { | |
2645 | std::string tz_hr = std::to_string(std::abs(td.hours())); | |
2646 | std::string tz_mn = std::to_string(std::abs(td.minutes())); | |
2647 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr + std::string(2 - tz_mn.length(), '0') + tz_mn; | |
2648 | } | |
2649 | } xx_to_string; | |
2650 | ||
2651 | class derive_xxx : public base_time_to_string | |
2652 | { | |
2653 | public: | |
2654 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2655 | { | |
2656 | std::string tz_hr = std::to_string(std::abs(td.hours())); | |
2657 | std::string tz_mn = std::to_string(std::abs(td.minutes())); | |
2658 | return (td.is_negative() ? "-" : "+") + std::string(2 - tz_hr.length(), '0') + tz_hr + ":" + std::string(2 - tz_mn.length(), '0') + tz_mn; | |
2659 | } | |
2660 | } xxx_to_string; | |
2661 | ||
2662 | class derive_delimiter : public base_time_to_string | |
2663 | { | |
2664 | public: | |
2665 | std::string print_time(boost::posix_time::ptime& new_ptime, boost::posix_time::time_duration& td, uint32_t param) | |
2666 | { | |
2667 | char ch = param; | |
2668 | return std::string(1, ch); | |
2669 | } | |
2670 | } delimiter_to_string; | |
2671 | ||
2672 | class base_timestamp_to_string : public base_function | |
2673 | { | |
2674 | protected: | |
2675 | boost::posix_time::ptime new_ptime; | |
2676 | boost::posix_time::time_duration td; | |
2677 | bool flag; | |
2678 | std::string format; | |
2679 | std::vector<char> m_metachar {'y', 'M', 'd', 'a', 'h', 'H', 'm', 's', 'S', 'n', 'X', 'x'}; | |
2680 | std::vector<std::string> m_metaword_vec {"yyy", "yy", "y", "MMMMM", "MMMM", "MMM", "MM", "M", | |
2681 | "dd", "d", "a", "hh", "h", "HH", "H", "mm", "m", "ss", "s", "n", | |
2682 | "XXXXX", "XXXX", "XXX", "XX", "X", "xxxxx", "xxxx", "xxx", "xx", | |
2683 | "x"}; | |
2684 | std::vector<base_time_to_string*> print_vector; | |
2685 | std::vector<uint32_t> para; | |
2686 | bool initialized = false; | |
2687 | ||
2688 | using to_string_lib_t = std::map<std::string,base_time_to_string* >; | |
2689 | ||
2690 | const to_string_lib_t time_to_string_functions = | |
2691 | { | |
2692 | {"yyyy+", &yyyy_to_string}, | |
2693 | {"yyy", &y_to_string}, | |
2694 | {"yy", &yy_to_string}, | |
2695 | {"y", &y_to_string}, | |
2696 | {"MMMMM", &mmmmm_month_to_string}, | |
2697 | {"MMMM", &mmmm_month_to_string}, | |
2698 | {"MMM", &mmm_month_to_string}, | |
2699 | {"MM", &mm_month_to_string}, | |
2700 | {"M", &m_month_to_string}, | |
2701 | {"dd", &dd_to_string }, | |
2702 | {"d", &d_to_string }, | |
2703 | {"a", &a_to_string }, | |
2704 | {"hh", &hh_to_string}, | |
2705 | {"h", &h_to_string}, | |
2706 | {"HH", &h2_to_string}, | |
2707 | {"H", &h1_to_string}, | |
2708 | {"mm", &mm_to_string}, | |
2709 | {"m", &m_to_string}, | |
2710 | {"ss", &ss_to_string}, | |
2711 | {"s", &s_to_string}, | |
2712 | {"S+", &frac_sec_to_string}, | |
2713 | {"n", &n_to_string}, | |
2714 | {"XXXXX", &x3_to_string}, | |
2715 | {"XXXX", &x2_to_string}, | |
2716 | {"XXX", &x3_to_string}, | |
2717 | {"XX", &x2_to_string}, | |
2718 | {"X", &x1_to_string}, | |
2719 | {"xxxxx", &xxx_to_string}, | |
2720 | {"xxxx", &xx_to_string}, | |
2721 | {"xxx", &xxx_to_string}, | |
2722 | {"xx", &xx_to_string}, | |
2723 | {"x", &x_to_string}, | |
2724 | {"delimiter", &delimiter_to_string} | |
2725 | }; | |
2726 | ||
2727 | public: | |
2728 | void param_validation(bs_stmt_vec_t*& args) | |
2729 | { | |
2730 | auto iter = args->begin(); | |
2731 | int args_size = args->size(); | |
2732 | ||
2733 | if (args_size < 2) | |
2734 | { | |
2735 | throw base_s3select_exception("to_string need 2 parameters"); | |
2736 | } | |
2737 | ||
2738 | base_statement* dt1_param = *iter; | |
2739 | value val_timestamp = dt1_param->eval(); | |
2740 | ||
2741 | if (val_timestamp.is_timestamp() == false) | |
2742 | { | |
2743 | throw base_s3select_exception("first parameter should be timestamp"); | |
2744 | } | |
2745 | ||
2746 | iter++; | |
2747 | base_statement* frmt = *iter; | |
2748 | value val_format = frmt->eval(); | |
2749 | ||
2750 | if (val_format.is_string() == false) | |
2751 | { | |
2752 | throw base_s3select_exception("second parameter should be string"); | |
2753 | } | |
2754 | ||
2755 | std::tie(new_ptime, td, flag) = *val_timestamp.timestamp(); | |
2756 | format = val_format.to_string(); | |
2757 | } | |
2758 | ||
2759 | uint32_t length_same_char_str(std::string str, char ch) | |
2760 | { | |
2761 | int i = 0; | |
2762 | while(str[i] == ch) | |
2763 | i++; | |
2764 | return i; | |
2765 | } | |
2766 | ||
2767 | void prepare_to_string_vector(std::vector<base_time_to_string*>& print_vector, std::vector<uint32_t>& para) | |
2768 | { | |
2769 | for (uint32_t i = 0; i < format.length(); i++) | |
2770 | { | |
2771 | if (std::find(m_metachar.begin(), m_metachar.end() , format[i]) != m_metachar.end()) | |
2772 | { | |
2773 | if (format.substr(i, 4).compare("yyyy") == 0) | |
2774 | { | |
2775 | uint32_t len = length_same_char_str(format.substr(i), 'y'); | |
2776 | auto it = time_to_string_functions.find("yyyy+"); | |
2777 | print_vector.push_back( it->second); | |
2778 | para.push_back(len); | |
2779 | i += len - 1; | |
2780 | continue; | |
2781 | } | |
2782 | else if (format[i] == 'S') | |
2783 | { | |
2784 | uint32_t len = length_same_char_str(format.substr(i), 'S'); | |
2785 | auto it = time_to_string_functions.find("S+"); | |
2786 | print_vector.push_back( it->second); | |
2787 | para.push_back(len); | |
2788 | i += len - 1; | |
2789 | continue; | |
2790 | } | |
2791 | ||
2792 | for (auto word : m_metaword_vec) | |
2793 | { | |
2794 | if (format.substr(i, word.length()).compare(word) == 0) | |
2795 | { | |
2796 | auto it = time_to_string_functions.find(word.c_str()); | |
2797 | print_vector.push_back( it->second); | |
2798 | para.push_back('\0'); | |
2799 | i += word.length() - 1; | |
2800 | break; | |
2801 | } | |
2802 | } | |
2803 | } | |
2804 | else | |
2805 | { | |
2806 | auto it = time_to_string_functions.find("delimiter"); | |
2807 | print_vector.push_back( it->second ); | |
2808 | para.push_back(format[i]); | |
2809 | } | |
2810 | } | |
2811 | } | |
2812 | ||
2813 | std::string execute_to_string(std::vector<base_time_to_string*>& print_vector, std::vector<uint32_t>& para) | |
2814 | { | |
2815 | std::string res; | |
2816 | int temp = 0; | |
2817 | for(auto p : print_vector) | |
2818 | { | |
2819 | res += p->print_time(new_ptime, td, para.at(temp)); | |
2820 | temp++; | |
2821 | } | |
2822 | return res; | |
2823 | } | |
2824 | ||
2825 | }; | |
f67539c2 | 2826 | |
1e59de90 TL |
2827 | |
2828 | class base_like : public base_function | |
2829 | { | |
2830 | protected: | |
2831 | value like_expr_val; | |
2832 | value escape_expr_val; | |
2833 | bool constant_state = false; | |
2834 | #if REGEX_HS | |
2835 | hs_database_t* compiled_regex; | |
2836 | hs_scratch_t *scratch = NULL; | |
2837 | bool res; | |
2838 | #elif REGEX_RE2 | |
2839 | std::unique_ptr<RE2> compiled_regex; | |
2840 | #else | |
2841 | std::regex compiled_regex; | |
2842 | #endif | |
2843 | ||
2844 | public: | |
2845 | void param_validation(base_statement* escape_expr, base_statement* like_expr) | |
2846 | { | |
2847 | escape_expr_val = escape_expr->eval(); | |
2848 | if (escape_expr_val.type != value::value_En_t::STRING) | |
2849 | { | |
2850 | throw base_s3select_exception("esacpe expression must be string"); | |
2851 | } | |
2852 | ||
2853 | like_expr_val = like_expr->eval(); | |
2854 | if (like_expr_val.type != value::value_En_t::STRING) | |
2855 | { | |
2856 | throw base_s3select_exception("like expression must be string"); | |
2857 | } | |
2858 | } | |
2859 | ||
2860 | std::vector<char> transform(const char* s, char escape) | |
2861 | { | |
2862 | enum state_expr_t {START, ESCAPE, START_STAR_CHAR, START_METACHAR, START_ANYCHAR, METACHAR, | |
2863 | STAR_CHAR, ANYCHAR, END }; | |
2864 | state_expr_t st{START}; | |
2865 | ||
2866 | const char *p = s; | |
2867 | size_t size = strlen(s); | |
2868 | size_t i = 0; | |
2869 | std::vector<char> v; | |
2870 | ||
2871 | while(*p) | |
2872 | { | |
2873 | switch (st) | |
2874 | { | |
2875 | case START: | |
2876 | if (*p == escape) | |
2877 | { | |
2878 | st = ESCAPE; | |
2879 | v.push_back('^'); | |
2880 | } | |
2881 | else if (*p == '%') | |
2882 | { | |
2883 | v.push_back('^'); | |
2884 | v.push_back('.'); | |
2885 | v.push_back('*'); | |
2886 | st = START_STAR_CHAR; | |
2887 | } | |
2888 | else if (*p == '_') | |
2889 | { | |
2890 | v.push_back('^'); | |
2891 | v.push_back('.'); | |
2892 | st=START_METACHAR; | |
2893 | } | |
2894 | else | |
2895 | { | |
2896 | v.push_back('^'); | |
2897 | v.push_back(*p); | |
2898 | st=START_ANYCHAR; | |
2899 | } | |
2900 | break; | |
2901 | ||
2902 | case START_STAR_CHAR: | |
2903 | if (*p == escape) | |
2904 | { | |
2905 | st = ESCAPE; | |
2906 | } | |
2907 | else if (*p == '%') | |
2908 | { | |
2909 | st = START_STAR_CHAR; | |
2910 | } | |
2911 | else if (*p == '_') | |
2912 | { | |
2913 | v.push_back('.'); | |
2914 | st = METACHAR; | |
2915 | } | |
2916 | else | |
2917 | { | |
2918 | v.push_back(*p); | |
2919 | st = ANYCHAR; | |
2920 | } | |
2921 | break; | |
2922 | ||
2923 | case START_METACHAR: | |
2924 | if (*p == escape) | |
2925 | { | |
2926 | st = ESCAPE; | |
2927 | } | |
2928 | else if(*p == '_') | |
2929 | { | |
2930 | v.push_back('.'); | |
2931 | st = METACHAR; | |
2932 | } | |
2933 | else if(*p == '%') | |
2934 | { | |
2935 | v.push_back('.'); | |
2936 | v.push_back('*'); | |
2937 | st = STAR_CHAR; | |
2938 | } | |
2939 | else | |
2940 | { | |
2941 | v.push_back(*p); | |
2942 | st = ANYCHAR; | |
2943 | } | |
2944 | break; | |
2945 | ||
2946 | case START_ANYCHAR: | |
2947 | if (*p == escape) | |
2948 | { | |
2949 | st = ESCAPE; | |
2950 | } | |
2951 | else if (*p == '_' && i == size-1) | |
2952 | { | |
2953 | v.push_back('.'); | |
2954 | v.push_back('$'); | |
2955 | st = END; | |
2956 | } | |
2957 | else if (*p == '_') | |
2958 | { | |
2959 | v.push_back('.'); | |
2960 | st = METACHAR; | |
2961 | } | |
2962 | else if (*p == '%' && i == size-1) | |
2963 | { | |
2964 | v.push_back('.'); | |
2965 | v.push_back('*'); | |
2966 | v.push_back('$'); | |
2967 | st = END; | |
2968 | } | |
2969 | else if (*p == '%') | |
2970 | { | |
2971 | v.push_back('.'); | |
2972 | v.push_back('*'); | |
2973 | st = STAR_CHAR; | |
2974 | } | |
2975 | else if (i == size-1) | |
2976 | { | |
2977 | v.push_back(*p); | |
2978 | v.push_back('$'); | |
2979 | st = END; | |
2980 | } | |
2981 | else | |
2982 | { | |
2983 | v.push_back(*p); | |
2984 | st = ANYCHAR; | |
2985 | } | |
2986 | break; | |
2987 | ||
2988 | case METACHAR: | |
2989 | if (*p == escape) | |
2990 | { | |
2991 | st = ESCAPE; | |
2992 | } | |
2993 | else if (*p == '_' && i == size-1) | |
2994 | { | |
2995 | v.push_back('.'); | |
2996 | v.push_back('$'); | |
2997 | st = END; | |
2998 | } | |
2999 | else if (*p == '_') | |
3000 | { | |
3001 | v.push_back('.'); | |
3002 | st = METACHAR; | |
3003 | } | |
3004 | else if (*p == '%' && i == size-1) | |
3005 | { | |
3006 | v.push_back('.'); | |
3007 | v.push_back('*'); | |
3008 | v.push_back('$'); | |
3009 | st = END; | |
3010 | } | |
3011 | else if (*p == '%') | |
3012 | { | |
3013 | v.push_back('.'); | |
3014 | v.push_back('*'); | |
3015 | st = STAR_CHAR; | |
3016 | } | |
3017 | else if (i == size-1) | |
3018 | { | |
3019 | v.push_back(*p); | |
3020 | v.push_back('$'); | |
3021 | st = END; | |
3022 | } | |
3023 | else | |
3024 | { | |
3025 | v.push_back(*p); | |
3026 | st = ANYCHAR; | |
3027 | } | |
3028 | break; | |
3029 | ||
3030 | case ANYCHAR: | |
3031 | if (*p == escape) | |
3032 | { | |
3033 | st = ESCAPE; | |
3034 | } | |
3035 | else if (*p == '_' && i == size-1) | |
3036 | { | |
3037 | v.push_back('.'); | |
3038 | v.push_back('$'); | |
3039 | st = END; | |
3040 | } | |
3041 | else if (*p == '_') | |
3042 | { | |
3043 | v.push_back('.'); | |
3044 | st = METACHAR; | |
3045 | } | |
3046 | else if (*p == '%' && i == size-1) | |
3047 | { | |
3048 | v.push_back('.'); | |
3049 | v.push_back('*'); | |
3050 | v.push_back('$'); | |
3051 | st = END; | |
3052 | } | |
3053 | else if (*p == '%') | |
3054 | { | |
3055 | v.push_back('.'); | |
3056 | v.push_back('*'); | |
3057 | st = STAR_CHAR; | |
3058 | } | |
3059 | else if (i == size-1) | |
3060 | { | |
3061 | v.push_back(*p); | |
3062 | v.push_back('$'); | |
3063 | st = END; | |
3064 | } | |
3065 | else | |
3066 | { | |
3067 | v.push_back(*p); | |
3068 | st = ANYCHAR; | |
3069 | } | |
3070 | break; | |
3071 | ||
3072 | case STAR_CHAR: | |
3073 | if (*p == escape) | |
3074 | { | |
3075 | st = ESCAPE; | |
3076 | } | |
3077 | else if (*p == '%' && i == size-1) | |
3078 | { | |
3079 | v.push_back('$'); | |
3080 | st = END; | |
3081 | } | |
3082 | else if (*p == '%') | |
3083 | { | |
3084 | st = STAR_CHAR; | |
3085 | } | |
3086 | else if (*p == '_' && i == size-1) | |
3087 | { | |
3088 | v.push_back('.'); | |
3089 | v.push_back('$'); | |
3090 | st = END; | |
3091 | } | |
3092 | else if (*p == '_') | |
3093 | { | |
3094 | v.push_back('.'); | |
3095 | st = METACHAR; | |
3096 | } | |
3097 | else if (i == size-1) | |
3098 | { | |
3099 | v.push_back(*p); | |
3100 | v.push_back('$'); | |
3101 | st = END; | |
3102 | } | |
3103 | else | |
3104 | { | |
3105 | v.push_back(*p); | |
3106 | st = ANYCHAR; | |
3107 | } | |
3108 | break; | |
3109 | ||
3110 | case ESCAPE: | |
3111 | if (i == size-1) | |
3112 | { | |
3113 | v.push_back(*p); | |
3114 | v.push_back('$'); | |
3115 | st = END; | |
3116 | } | |
3117 | else | |
3118 | { | |
3119 | v.push_back(*p); | |
3120 | st = ANYCHAR; | |
3121 | } | |
3122 | break; | |
3123 | ||
3124 | case END: | |
3125 | return v; | |
3126 | ||
3127 | default: | |
3128 | throw base_s3select_exception("missing state!"); | |
3129 | break; | |
3130 | } | |
3131 | p++; | |
3132 | i++; | |
3133 | } | |
3134 | return v; | |
3135 | } | |
3136 | ||
3137 | void compile(std::vector<char>& like_regex) | |
3138 | { | |
3139 | std::string like_as_regex_str(like_regex.begin(), like_regex.end()); | |
3140 | ||
3141 | #if REGEX_HS | |
3142 | std::string temp = "^" + like_as_regex_str + "\\z"; //for anchoring start and end | |
3143 | char* c_regex = &temp[0]; | |
3144 | hs_compile_error_t *compile_err; | |
3145 | if (hs_compile(c_regex, HS_FLAG_DOTALL, HS_MODE_BLOCK, NULL, &compiled_regex, | |
3146 | &compile_err) != HS_SUCCESS) | |
3147 | { | |
3148 | throw base_s3select_exception("ERROR: Unable to compile pattern."); | |
3149 | } | |
3150 | ||
3151 | if (hs_alloc_scratch(compiled_regex, &scratch) != HS_SUCCESS) | |
3152 | { | |
3153 | throw base_s3select_exception("ERROR: Unable to allocate scratch space."); | |
3154 | } | |
3155 | #elif REGEX_RE2 | |
3156 | compiled_regex = std::make_unique<RE2>(like_as_regex_str); | |
3157 | #else | |
3158 | compiled_regex = std::regex(like_as_regex_str); | |
3159 | #endif | |
3160 | } | |
3161 | ||
3162 | void match(value& main_expr_val, variable* result) | |
3163 | { | |
3164 | std::string content_str = main_expr_val.to_string(); | |
3165 | #if REGEX_HS | |
3166 | const char* content = content_str.c_str(); | |
3167 | res = false; | |
3168 | ||
3169 | if (hs_scan(compiled_regex, content, strlen(content), 0, scratch, eventHandler, &res) != | |
3170 | HS_SUCCESS) | |
3171 | { | |
3172 | throw base_s3select_exception("ERROR: Unable to scan input buffer. Exiting."); | |
3173 | } | |
3174 | ||
3175 | result->set_value(res); | |
3176 | #elif REGEX_RE2 | |
3177 | re2::StringPiece res[1]; | |
3178 | ||
3179 | if (compiled_regex->Match(content_str, 0, content_str.size(), RE2::ANCHOR_BOTH, res, 1)) | |
3180 | { | |
3181 | result->set_value(true); | |
3182 | } | |
3183 | else | |
3184 | { | |
3185 | result->set_value(false); | |
3186 | } | |
3187 | #else | |
3188 | if (std::regex_match(content_str, compiled_regex)) | |
3189 | { | |
3190 | result->set_value(true); | |
3191 | } | |
3192 | else | |
3193 | { | |
3194 | result->set_value(false); | |
3195 | } | |
3196 | #endif | |
3197 | } | |
3198 | ||
3199 | static int eventHandler(unsigned int id, unsigned long long from, unsigned long long to, | |
3200 | unsigned int flags, void* ctx) | |
3201 | { | |
3202 | *((bool*)ctx) = true; | |
3203 | return 0; | |
3204 | } | |
3205 | ||
3206 | }; | |
3207 | ||
f67539c2 TL |
3208 | };//namespace |
3209 | ||
3210 | #endif |