]> git.proxmox.com Git - ceph.git/blob - ceph/src/s3select/include/s3select_functions.h
5583e5536528499df7904fbfde7069eb38125d18
[ceph.git] / ceph / src / s3select / include / s3select_functions.h
1 #ifndef __S3SELECT_FUNCTIONS__
2 #define __S3SELECT_FUNCTIONS__
3
4
5 #include "s3select_oper.h"
6 #include <boost/algorithm/string.hpp>
7 #include <boost/algorithm/string/trim.hpp>
8 #include <boost/regex.hpp>
9 #include <algorithm>
10
11 using namespace std::string_literals;
12
13 #define BOOST_BIND_ACTION_PARAM( push_name ,param ) boost::bind( &push_name::operator(), g_ ## push_name , _1 ,_2, param)
14 namespace s3selectEngine
15 {
16
17 constexpr double sec_scale(int n)
18 {
19 return pow(10, n);
20 }
21
22 struct push_char
23 {
24 void operator()(const char* a, const char* b, uint32_t* n) const
25 {
26 *n = *a;
27 }
28
29 };
30 static push_char g_push_char;
31
32 struct push_2dig
33 {
34 void operator()(const char* a, const char* b, uint32_t* n) const
35 {
36 *n = (static_cast<char>(*a) - 48) * 10 + (static_cast<char>(*(a+1)) - 48) ;
37 }
38
39 };
40 static push_2dig g_push_2dig;
41
42 struct push_4dig
43 {
44 void operator()(const char* a, const char* b, uint32_t* n) const
45 {
46 *n = (static_cast<char>(*a) - 48) * 1000 + (static_cast<char>(*(a+1)) - 48) * 100 + (static_cast<char>(*(a+2)) - 48) * 10 + (static_cast<char>(*(a+3)) - 48);
47 }
48
49 };
50 static push_4dig g_push_4dig;
51
52 struct push_1fdig
53 {
54 void operator()(const char* a, const char* b, uint32_t* n) const
55 {
56 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
57 const double scale = sec_scale(9-1); //nano-sec
58 #else
59 const double scale = sec_scale(6-1); //micro-sec
60 #endif
61
62 *n = ((static_cast<char>(*a) - 48)) * scale;
63 }
64
65 };
66 static push_1fdig g_push_1fdig;
67
68 struct push_2fdig
69 {
70 void operator()(const char* a, const char* b, uint32_t* n) const
71 {
72 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
73 const double scale = sec_scale(9-2); //nano-sec
74 #else
75 const double scale = sec_scale(6-2); //micro-sec
76 #endif
77
78 *n = ((static_cast<char>(*a) - 48) * 10 + (static_cast<char>(*(a+1)) - 48)) * scale;
79 }
80
81 };
82 static push_2fdig g_push_2fdig;
83
84 struct push_3fdig
85 {
86 void operator()(const char* a, const char* b, uint32_t* n) const
87 {
88 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
89 const double scale = sec_scale(9-3); //nano-sec
90 #else
91 const double scale = sec_scale(6-3); //micro-sec
92 #endif
93
94 *n = ((static_cast<char>(*a) - 48) * 100 + (static_cast<char>(*(a+1)) - 48) * 10 + (static_cast<char>(*(a+2)) - 48)) * scale;
95 }
96
97 };
98 static push_3fdig g_push_3fdig;
99
100 struct push_4fdig
101 {
102 void operator()(const char* a, const char* b, uint32_t* n) const
103 {
104 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
105 const double scale = sec_scale(9-4); //nano-sec
106 #else
107 const double scale = sec_scale(6-4); //micro-sec
108 #endif
109
110 *n = ((static_cast<char>(*a) - 48) * 1000 + (static_cast<char>(*(a+1)) - 48) * 100 + (static_cast<char>(*(a+2)) - 48) * 10 + (static_cast<char>(*(a+3)) - 48)) * scale;
111 }
112
113 };
114 static push_4fdig g_push_4fdig;
115
116 struct push_5fdig
117 {
118 void operator()(const char* a, const char* b, uint32_t* n) const
119 {
120 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
121 const double scale = sec_scale(9-5); //nano-sec
122 #else
123 const double scale = sec_scale(6-5); //micro-sec
124 #endif
125
126 *n = ((static_cast<char>(*a) - 48) * 10000 + (static_cast<char>(*(a+1)) - 48) * 1000 + (static_cast<char>(*(a+2)) - 48) * 100 + (static_cast<char>(*(a+3)) - 48) * 10 + (static_cast<char>(*(a+4)) - 48)) * scale;
127 }
128
129 };
130 static push_5fdig g_push_5fdig;
131
132 struct push_6fdig
133 {
134 void operator()(const char* a, const char* b, uint32_t* n) const
135 {
136 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
137 const double scale = sec_scale(9-6); //nano-sec
138 #else
139 const double scale = sec_scale(6-6); //micro-sec
140 #endif
141
142 *n = ((static_cast<char>(*a) - 48) * 100000 + (static_cast<char>(*(a+1)) - 48) * 10000 + (static_cast<char>(*(a+2)) - 48) * 1000 + (static_cast<char>(*(a+3)) - 48) * 100 + (static_cast<char>(*(a+4)) - 48) * 10 + (static_cast<char>(*(a+5)) - 48)) * scale;
143 }
144
145 };
146 static push_6fdig g_push_6fdig;
147
148 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
149 struct push_7fdig
150 {
151 void operator()(const char* a, const char* b, uint32_t* n) const
152 {
153 const double scale = sec_scale(9-7); //nano-sec
154 *n = ((static_cast<char>(*a) - 48) * 1000000 + (static_cast<char>(*(a+1)) - 48) * 100000 + (static_cast<char>(*(a+2)) - 48) * 10000 + (static_cast<char>(*(a+3)) - 48) * 1000 + (static_cast<char>(*(a+4)) - 48) * 100 + (static_cast<char>(*(a+5)) - 48) * 10 + (static_cast<char>(*(a+6)) - 48)) * scale;
155 }
156
157 };
158 static push_7fdig g_push_7fdig;
159
160 struct push_8fdig
161 {
162 void operator()(const char* a, const char* b, uint32_t* n) const
163 {
164 const double scale = sec_scale(9-8); //nano-sec
165 *n = ((static_cast<char>(*a) - 48) * 10000000 + (static_cast<char>(*(a+1)) - 48) * 1000000 + (static_cast<char>(*(a+2)) - 48) * 100000 + (static_cast<char>(*(a+3)) - 48) * 10000 + (static_cast<char>(*(a+4)) - 48) * 1000 + (static_cast<char>(*(a+5)) - 48) * 100 + (static_cast<char>(*(a+6)) - 48) * 10 + (static_cast<char>(*(a+7)) - 48)) * scale;
166 }
167
168 };
169 static push_8fdig g_push_8fdig;
170
171 struct push_9fdig
172 {
173 void operator()(const char* a, const char* b, uint32_t* n) const
174 {
175 const double scale = sec_scale(9-9); //nano-sec
176 *n = ((static_cast<char>(*a) - 48) * 100000000 + (static_cast<char>(*(a+1)) - 48) * 10000000 + (static_cast<char>(*(a+2)) - 48) * 1000000 + (static_cast<char>(*(a+3)) - 48) * 100000 + (static_cast<char>(*(a+4)) - 48) * 10000 + (static_cast<char>(*(a+5)) - 48) * 1000 + (static_cast<char>(*(a+6)) - 48) * 100 + (static_cast<char>(*(a+7)) - 48) * 10 + (static_cast<char>(*(a+8)) - 48)) * scale;
177 }
178
179 };
180 static push_9fdig g_push_9fdig;
181 #endif
182
183 enum class s3select_func_En_t {ADD,
184 SUM,
185 AVG,
186 MIN,
187 MAX,
188 COUNT,
189 TO_INT,
190 TO_FLOAT,
191 TO_TIMESTAMP,
192 TO_STRING_CONSTANT,
193 TO_STRING_DYNAMIC,
194 TO_BOOL,
195 SUBSTR,
196 EXTRACT_YEAR,
197 EXTRACT_MONTH,
198 EXTRACT_DAY,
199 EXTRACT_HOUR,
200 EXTRACT_MINUTE,
201 EXTRACT_SECOND,
202 EXTRACT_WEEK,
203 EXTRACT_TIMEZONE_HOUR,
204 EXTRACT_TIMEZONE_MINUTE,
205 DATE_ADD_YEAR,
206 DATE_ADD_MONTH,
207 DATE_ADD_DAY,
208 DATE_ADD_HOUR,
209 DATE_ADD_MINUTE,
210 DATE_ADD_SECOND,
211 DATE_DIFF_YEAR,
212 DATE_DIFF_MONTH,
213 DATE_DIFF_DAY,
214 DATE_DIFF_HOUR,
215 DATE_DIFF_MINUTE,
216 DATE_DIFF_SECOND,
217 UTCNOW,
218 LENGTH,
219 LOWER,
220 UPPER,
221 NULLIF,
222 BETWEEN,
223 NOT_BETWEEN,
224 IS_NULL,
225 IS_NOT_NULL,
226 IN,
227 LIKE,
228 VERSION,
229 CASE_WHEN_ELSE,
230 WHEN_THEN,
231 WHEN_VALUE_THEN,
232 COALESCE,
233 STRING,
234 TRIM,
235 LEADING,
236 TRAILING
237 };
238
239
240 class s3select_functions
241 {
242
243 private:
244
245 using FunctionLibrary = std::map<std::string, s3select_func_En_t>;
246 s3select_allocator* m_s3select_allocator;
247 std::set<base_statement*>* m_ast_nodes_for_cleanup;
248
249 const FunctionLibrary m_functions_library =
250 {
251 {"add", s3select_func_En_t::ADD},
252 {"sum", s3select_func_En_t::SUM},
253 {"avg", s3select_func_En_t::AVG},
254 {"count", s3select_func_En_t::COUNT},
255 {"min", s3select_func_En_t::MIN},
256 {"max", s3select_func_En_t::MAX},
257 {"int", s3select_func_En_t::TO_INT},
258 {"float", s3select_func_En_t::TO_FLOAT},
259 {"substring", s3select_func_En_t::SUBSTR},
260 {"to_timestamp", s3select_func_En_t::TO_TIMESTAMP},
261 {"#to_string_constant#",s3select_func_En_t::TO_STRING_CONSTANT},
262 {"#to_string_dynamic#",s3select_func_En_t::TO_STRING_DYNAMIC},
263 {"to_bool", s3select_func_En_t::TO_BOOL},
264 {"#extract_year#", s3select_func_En_t::EXTRACT_YEAR},
265 {"#extract_month#", s3select_func_En_t::EXTRACT_MONTH},
266 {"#extract_day#", s3select_func_En_t::EXTRACT_DAY},
267 {"#extract_hour#", s3select_func_En_t::EXTRACT_HOUR},
268 {"#extract_minute#", s3select_func_En_t::EXTRACT_MINUTE},
269 {"#extract_second#", s3select_func_En_t::EXTRACT_SECOND},
270 {"#extract_week#", s3select_func_En_t::EXTRACT_WEEK},
271 {"#extract_timezone_hour#", s3select_func_En_t::EXTRACT_TIMEZONE_HOUR},
272 {"#extract_timezone_minute#", s3select_func_En_t::EXTRACT_TIMEZONE_MINUTE},
273 {"#dateadd_year#", s3select_func_En_t::DATE_ADD_YEAR},
274 {"#dateadd_month#", s3select_func_En_t::DATE_ADD_MONTH},
275 {"#dateadd_day#", s3select_func_En_t::DATE_ADD_DAY},
276 {"#dateadd_hour#", s3select_func_En_t::DATE_ADD_HOUR},
277 {"#dateadd_minute#", s3select_func_En_t::DATE_ADD_MINUTE},
278 {"#dateadd_second#", s3select_func_En_t::DATE_ADD_SECOND},
279 {"#datediff_year#", s3select_func_En_t::DATE_DIFF_YEAR},
280 {"#datediff_month#", s3select_func_En_t::DATE_DIFF_MONTH},
281 {"#datediff_day#", s3select_func_En_t::DATE_DIFF_DAY},
282 {"#datediff_hour#", s3select_func_En_t::DATE_DIFF_HOUR},
283 {"#datediff_minute#", s3select_func_En_t::DATE_DIFF_MINUTE},
284 {"#datediff_second#", s3select_func_En_t::DATE_DIFF_SECOND},
285 {"utcnow", s3select_func_En_t::UTCNOW},
286 {"character_length", s3select_func_En_t::LENGTH},
287 {"char_length", s3select_func_En_t::LENGTH},
288 {"lower", s3select_func_En_t::LOWER},
289 {"upper", s3select_func_En_t::UPPER},
290 {"nullif", s3select_func_En_t::NULLIF},
291 {"#between#", s3select_func_En_t::BETWEEN},
292 {"#not_between#", s3select_func_En_t::NOT_BETWEEN},
293 {"#is_null#", s3select_func_En_t::IS_NULL},
294 {"#is_not_null#", s3select_func_En_t::IS_NOT_NULL},
295 {"#in_predicate#", s3select_func_En_t::IN},
296 {"#like_predicate#", s3select_func_En_t::LIKE},
297 {"version", s3select_func_En_t::VERSION},
298 {"#when-then#", s3select_func_En_t::WHEN_THEN},
299 {"#when-value-then#", s3select_func_En_t::WHEN_VALUE_THEN},
300 {"#case-when-else#", s3select_func_En_t::CASE_WHEN_ELSE},
301 {"coalesce", s3select_func_En_t::COALESCE},
302 {"string", s3select_func_En_t::STRING},
303 {"#trim#", s3select_func_En_t::TRIM},
304 {"#leading#", s3select_func_En_t::LEADING},
305 {"#trailing#", s3select_func_En_t::TRAILING}
306 };
307
308 public:
309
310 base_function* create(std::string_view fn_name,const bs_stmt_vec_t&);
311
312 s3select_functions():m_s3select_allocator(nullptr),m_ast_nodes_for_cleanup(nullptr)
313 {
314 }
315
316
317 void setAllocator(s3select_allocator* alloc)
318 {
319 m_s3select_allocator = alloc;
320 }
321
322 void set_AST_nodes_for_cleanup(std::set<base_statement*>* ast_for_cleanup)
323 {
324 m_ast_nodes_for_cleanup = ast_for_cleanup;
325 }
326
327 s3select_allocator* getAllocator()
328 {
329 return m_s3select_allocator;
330 }
331
332 void clean();
333
334 };
335
336 class __function : public base_statement
337 {
338
339 private:
340 bs_stmt_vec_t arguments;
341 std::basic_string<char,std::char_traits<char>,ChunkAllocator<char,256>> name;
342 base_function* m_func_impl;
343 s3select_functions* m_s3select_functions;
344 variable m_result;
345 bool m_is_aggregate_function;
346
347 void _resolve_name()
348 {
349 if (m_func_impl)
350 {
351 return;
352 }
353
354 auto string_to_lower = [](std::basic_string<char,std::char_traits<char>,ChunkAllocator<char,256>> s)
355 {
356 std::transform(s.begin(),s.end(),s.begin(),[](unsigned char c){ return std::tolower(c); });
357 return s;
358 };
359
360 //the function name is converted into lowercase to enable case-insensitive
361 base_function* f = m_s3select_functions->create(string_to_lower(name),arguments);
362 if (!f)
363 {
364 throw base_s3select_exception("function not found", base_s3select_exception::s3select_exp_en_t::FATAL); //should abort query
365 }
366 m_func_impl = f;
367 m_is_aggregate_function= m_func_impl->is_aggregate();
368
369 }
370
371 public:
372
373 base_function* impl()
374 {
375 return m_func_impl;
376 }
377
378 void traverse_and_apply(scratch_area* sa, projection_alias* pa,bool json_statement) override
379 {
380 m_scratch = sa;
381 m_aliases = pa;
382 m_json_statement = json_statement;
383 for (base_statement* ba : arguments)
384 {
385 ba->traverse_and_apply(sa, pa, json_statement);
386 }
387 }
388
389 void set_last_call() override
390 {//it cover the use-case where aggregation function is an argument in non-aggregate function.
391 is_last_call = true;
392 for (auto& ba : arguments)
393 {
394 ba->set_last_call();
395 }
396 }
397
398 void set_skip_non_aggregate(bool skip_non_aggregate_op) override
399 {//it cover the use-case where aggregation function is an argument in non-aggregate function.
400 m_skip_non_aggregate_op = skip_non_aggregate_op;
401 for (auto& ba : arguments)
402 {
403 ba->set_skip_non_aggregate(m_skip_non_aggregate_op);
404 }
405 }
406
407 bool is_aggregate() const override
408 {
409 return m_is_aggregate_function;
410 }
411
412 bool semantic() override
413 {
414 return true;
415 }
416
417 __function(const char* fname, s3select_functions* s3f) : name(fname), m_func_impl(nullptr), m_s3select_functions(s3f),m_is_aggregate_function(false)
418 {}
419
420 value& eval() override
421 {
422 return eval_internal();
423 }
424
425 value& eval_internal() override
426 {
427
428 _resolve_name();//node is "resolved" (function is created) upon first call/first row.
429
430 if (is_last_call == false)
431 {//all rows prior to last row
432 if(m_skip_non_aggregate_op == false || is_aggregate() == true)
433 {
434 (*m_func_impl)(&arguments, &m_result);
435 }
436 else if(m_skip_non_aggregate_op == true)
437 {
438 for(auto& p : arguments)
439 {//evaluating the arguments (not the function itself, which is a non-aggregate function)
440 //i.e. in the following use case substring( , sum(),count() ) ; only sum() and count() are evaluated.
441 p->eval();
442 }
443 }
444 }
445 else
446 {//on the last row, the aggregate function is finalized,
447 //and non-aggregate function is evaluated with the result of aggregate function.
448 if(is_aggregate())
449 (*m_func_impl).get_aggregate_result(&m_result);
450 else
451 (*m_func_impl)(&arguments, &m_result);
452 }
453
454 return m_result.get_value();
455 }
456
457 void resolve_node() override
458 {
459 _resolve_name();
460
461 for (auto& arg : arguments)
462 {
463 arg->resolve_node();
464 }
465 }
466
467 std::string print(int ident) override
468 {
469 return std::string(0);
470 }
471
472 void push_argument(base_statement* arg)
473 {
474 arguments.push_back(arg);
475 }
476
477
478 bs_stmt_vec_t& get_arguments()
479 {
480 return arguments;
481 }
482
483 virtual ~__function() = default;
484 };
485
486 /*
487 s3-select function defintions
488 */
489 struct _fn_add : public base_function
490 {
491
492 value var_result;
493
494 bool operator()(bs_stmt_vec_t* args, variable* result) override
495 {
496 auto iter = args->begin();
497 base_statement* x = *iter;
498 iter++;
499 base_statement* y = *iter;
500
501 var_result = x->eval() + y->eval();
502
503 *result = var_result;
504
505 return true;
506 }
507 };
508
509 struct _fn_sum : public base_function
510 {
511
512 value sum;
513
514 _fn_sum() : sum(0)
515 {
516 aggregate = true;
517 }
518
519 bool operator()(bs_stmt_vec_t* args, variable* result) override
520 {
521 auto iter = args->begin();
522 base_statement* x = *iter;
523
524 try
525 {
526 sum = sum + x->eval();
527 }
528 catch (base_s3select_exception& e)
529 {
530 if (e.severity() == base_s3select_exception::s3select_exp_en_t::FATAL)
531 {
532 throw;
533 }
534 }
535
536 return true;
537 }
538
539 void get_aggregate_result(variable* result) override
540 {
541 *result = sum ;
542 }
543 };
544
545 struct _fn_count : public base_function
546 {
547
548 int64_t count;
549
550 _fn_count():count(0)
551 {
552 aggregate=true;
553 }
554
555 bool operator()(bs_stmt_vec_t* args, variable* result) override
556 {
557 if (args->size())
558 {// in case argument exist, should count only non-null.
559 auto iter = args->begin();
560 base_statement* x = *iter;
561
562 if(!x->eval().is_null())
563 {
564 count += 1;
565 }
566 }
567 else
568 {//in case of non-arguments // count()
569 count += 1;
570 }
571
572 return true;
573 }
574
575 void get_aggregate_result(variable* result) override
576 {
577 result->set_value(count);
578 }
579
580 };
581
582 struct _fn_avg : public base_function
583 {
584
585 value sum;
586 value count{0.0};
587
588 _fn_avg() : sum(0) { aggregate = true; }
589
590 bool operator()(bs_stmt_vec_t* args, variable *result) override
591 {
592 auto iter = args->begin();
593 base_statement *x = *iter;
594
595 try
596 {
597 sum = sum + x->eval();
598 count++;
599 }
600 catch (base_s3select_exception &e)
601 {
602 throw base_s3select_exception(e.what());
603 }
604
605 return true;
606 }
607
608 void get_aggregate_result(variable *result) override
609 {
610 if(count == static_cast<value>(0)) {
611 throw base_s3select_exception("count cannot be zero!");
612 } else {
613 *result = sum/count ;
614 }
615 }
616 };
617
618 struct _fn_min : public base_function
619 {
620
621 value min;
622
623 _fn_min():min(__INT64_MAX__)
624 {
625 aggregate=true;
626 }
627
628 bool operator()(bs_stmt_vec_t* args, variable* result) override
629 {
630 auto iter = args->begin();
631 base_statement* x = *iter;
632
633 if(min > x->eval())
634 {
635 min=x->eval();
636 }
637
638 return true;
639 }
640
641 void get_aggregate_result(variable* result) override
642 {
643 *result = min;
644 }
645
646 };
647
648 struct _fn_max : public base_function
649 {
650
651 value max;
652
653 _fn_max():max(-__INT64_MAX__)
654 {
655 aggregate=true;
656 }
657
658 bool operator()(bs_stmt_vec_t* args, variable* result) override
659 {
660 auto iter = args->begin();
661 base_statement* x = *iter;
662
663 if(max < x->eval())
664 {
665 max=x->eval();
666 }
667
668 return true;
669 }
670
671 void get_aggregate_result(variable* result) override
672 {
673 *result = max;
674 }
675
676 };
677
678 struct _fn_to_int : public base_function
679 {
680 value var_result;
681
682 bool operator()(bs_stmt_vec_t* args, variable* result) override
683 {
684 value v = (*args->begin())->eval();
685
686 switch (v.type) {
687
688 case value::value_En_t::STRING:
689 {
690 char* pend;
691 errno = 0;
692 int64_t i= strtol(v.str(), &pend, 10);
693 if (errno == ERANGE) {
694 throw base_s3select_exception("converted value would fall out of the range of the result type!");
695 }
696 if (pend == v.str()) {
697 // no number found
698 throw base_s3select_exception("text cannot be converted to a number");
699 }
700 if (*pend) {
701 throw base_s3select_exception("extra characters after the number");
702 }
703
704 var_result = i;
705 }
706 break;
707
708 case value::value_En_t::FLOAT:
709 var_result = static_cast<int64_t>(v.dbl());
710 break;
711
712 default:
713 var_result = v.i64();
714 break;
715 }
716
717 *result = var_result;
718 return true;
719 }
720
721 };
722
723 struct _fn_to_float : public base_function
724 {
725 value var_result;
726
727 bool operator()(bs_stmt_vec_t* args, variable* result) override
728 {
729 value v = (*args->begin())->eval();
730
731 switch (v.type) {
732
733 case value::value_En_t::STRING:
734 {
735 char* pend;
736 double d = strtod(v.str(), &pend);
737 if (errno == ERANGE) {
738 throw base_s3select_exception("converted value would fall out of the range of the result type!");
739 }
740 if (pend == v.str()) {
741 // no number found
742 throw base_s3select_exception("text cannot be converted to a number");
743 }
744 if (*pend) {
745 throw base_s3select_exception("extra characters after the number");
746 }
747
748 var_result = d;
749 }
750 break;
751
752 case value::value_En_t::FLOAT:
753 var_result = v.dbl();
754 break;
755
756 default:
757 var_result = v.i64();
758 break;
759 }
760
761 *result = var_result;
762 return true;
763 }
764
765 };
766
767 struct _fn_to_timestamp : public base_function
768 {
769 bsc::rule<> date_separator = bsc::ch_p("-");
770 bsc::rule<> time_separator = bsc::ch_p(":");
771 bsc::rule<> nano_sec_separator = bsc::ch_p(".");
772 bsc::rule<> delimiter = bsc::ch_p("T");
773 bsc::rule<> zero_timezone = bsc::ch_p("Z");
774 bsc::rule<> timezone_sign = bsc::ch_p("-") | bsc::ch_p("+");
775
776 uint32_t yr = 1700, mo = 1, dy = 1;
777 bsc::rule<> dig4 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
778 bsc::rule<> dig2 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p];
779
780 bsc::rule<> d_yyyy_dig = ((dig4[BOOST_BIND_ACTION_PARAM(push_4dig, &yr)]) >> *(delimiter));
781 bsc::rule<> d_yyyymmdd_dig = ((dig4[BOOST_BIND_ACTION_PARAM(push_4dig, &yr)]) >> *(date_separator)
782 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mo)]) >> *(date_separator)
783 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &dy)]) >> *(delimiter));
784
785 uint32_t hr = 0, mn = 0, sc = 0, frac_sec = 0, tz_hr = 0, tz_mn = 0, sign, tm_zone = '0';
786
787 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
788 bsc::rule<> fdig9 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
789 bsc::rule<> fdig8 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
790 bsc::rule<> fdig7 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
791 #endif
792
793 bsc::rule<> fdig6 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
794 bsc::rule<> fdig5 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
795 bsc::rule<> fdig4 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
796 bsc::rule<> fdig3 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
797 bsc::rule<> fdig2 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p];
798 bsc::rule<> fdig1 = bsc::lexeme_d[bsc::digit_p];
799
800 bsc::rule<> d_timezone_dig = ((timezone_sign[BOOST_BIND_ACTION_PARAM(push_char, &sign)]) >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &tz_hr)]) >> *(time_separator)
801 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &tz_mn)])) | (zero_timezone[BOOST_BIND_ACTION_PARAM(push_char, &tm_zone)]);
802
803 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
804 bsc::rule<> fraction_sec = (fdig9[BOOST_BIND_ACTION_PARAM(push_9fdig, &frac_sec)]) |
805 (fdig8[BOOST_BIND_ACTION_PARAM(push_8fdig, &frac_sec)]) |
806 (fdig7[BOOST_BIND_ACTION_PARAM(push_7fdig, &frac_sec)]) |
807 (fdig6[BOOST_BIND_ACTION_PARAM(push_6fdig, &frac_sec)]) |
808 (fdig5[BOOST_BIND_ACTION_PARAM(push_5fdig, &frac_sec)]) |
809 (fdig4[BOOST_BIND_ACTION_PARAM(push_4fdig, &frac_sec)]) |
810 (fdig3[BOOST_BIND_ACTION_PARAM(push_3fdig, &frac_sec)]) |
811 (fdig2[BOOST_BIND_ACTION_PARAM(push_2fdig, &frac_sec)]) |
812 (fdig1[BOOST_BIND_ACTION_PARAM(push_1fdig, &frac_sec)]);
813 #else
814 bsc::rule<> fraction_sec = (fdig6[BOOST_BIND_ACTION_PARAM(push_6fdig, &frac_sec)]) |
815 (fdig5[BOOST_BIND_ACTION_PARAM(push_5fdig, &frac_sec)]) |
816 (fdig4[BOOST_BIND_ACTION_PARAM(push_4fdig, &frac_sec)]) |
817 (fdig3[BOOST_BIND_ACTION_PARAM(push_3fdig, &frac_sec)]) |
818 (fdig2[BOOST_BIND_ACTION_PARAM(push_2fdig, &frac_sec)]) |
819 (fdig1[BOOST_BIND_ACTION_PARAM(push_1fdig, &frac_sec)]);
820 #endif
821
822 bsc::rule<> d_time_dig = ((dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &hr)]) >> *(time_separator)
823 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mn)]) >> *(time_separator)
824 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &sc)]) >> *(nano_sec_separator)
825 >> (fraction_sec) >> (d_timezone_dig)) |
826 ((dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &hr)]) >> *(time_separator)
827 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mn)]) >> *(time_separator)
828 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &sc)]) >> (d_timezone_dig)) |
829 ((dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &hr)]) >> *(time_separator)
830 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mn)]) >> (d_timezone_dig));
831
832 bsc::rule<> d_date_time = ((d_yyyymmdd_dig) >> (d_time_dig)) | (d_yyyymmdd_dig) | (d_yyyy_dig);
833
834 timestamp_t tmstmp;
835 value v_str;
836 int tz_hour, tz_min;
837
838 bool datetime_validation()
839 {
840 if (yr >= 1400 && yr <= 9999 && mo >= 1 && mo <= 12 && dy >= 1 && hr < 24 && mn < 60 && sc < 60 && tz_hour <= 14 && tz_hour >= -12 && tz_mn < 60)
841 {
842 if ( (tz_hour == -12 || tz_hour == 14) && tz_mn > 0)
843 return false;
844
845 switch (mo)
846 {
847 case 1:
848 case 3:
849 case 5:
850 case 7:
851 case 8:
852 case 10:
853 case 12:
854 if(dy <= 31)
855 {
856 return true;
857 }
858 break;
859 case 4:
860 case 6:
861 case 9:
862 case 11:
863 if(dy <= 30)
864 {
865 return true;
866 }
867 break;
868 case 2:
869 if(dy >= 28)
870 {
871 if(!(yr % 4) == 0 && dy > 28)
872 {
873 return false;
874 }
875 else if(!(yr % 100) == 0 && dy <= 29)
876 {
877 return true;
878 }
879 else if(!(yr % 400) == 0 && dy > 28)
880 {
881 return false;
882 }
883 else
884 {
885 return true;
886 }
887 }
888 else
889 {
890 return true;
891 }
892 break;
893 default:
894 return false;
895 break;
896 }
897 }
898 return false;
899 }
900
901 bool operator()(bs_stmt_vec_t* args, variable* result) override
902 {
903
904 hr = 0;
905 mn = 0;
906 sc = 0;
907 frac_sec = 0;
908 tz_hr = 0;
909 tz_mn = 0;
910 tm_zone = '0';
911
912 auto iter = args->begin();
913 int args_size = args->size();
914
915 if (args_size != 1)
916 {
917 throw base_s3select_exception("to_timestamp should have one parameter");
918 }
919
920 base_statement* str = *iter;
921
922 v_str = str->eval();
923
924 if (v_str.type != value::value_En_t::STRING)
925 {
926 throw base_s3select_exception("to_timestamp first argument must be string"); //can skip current row
927 }
928
929 bsc::parse_info<> info_dig = bsc::parse(v_str.str(), d_date_time);
930
931 tz_hour = tz_hr;
932 tz_min = tz_mn;
933 if ((char)sign == '-')
934 {
935 tz_hour *= -1;
936 tz_min *= -1;
937 }
938
939 if(datetime_validation()==false or !info_dig.full)
940 {
941 throw base_s3select_exception("input date-time is illegal");
942 }
943
944 boost::posix_time::ptime new_ptime;
945
946 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
947 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mo, dy),
948 boost::posix_time::hours(hr) +
949 boost::posix_time::minutes(mn) +
950 boost::posix_time::seconds(sc) +
951 boost::posix_time::nanoseconds(frac_sec));
952 #else
953 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mo, dy),
954 boost::posix_time::hours(hr) +
955 boost::posix_time::minutes(mn) +
956 boost::posix_time::seconds(sc) +
957 boost::posix_time::microseconds(frac_sec));
958 #endif
959
960 tmstmp = std::make_tuple(new_ptime, boost::posix_time::time_duration(tz_hour, tz_min, 0), (char)tm_zone == 'Z');
961
962 result->set_value(&tmstmp);
963
964 return true;
965 }
966
967 };
968
969 struct _fn_to_string_constant : public base_timestamp_to_string
970 {
971 bool operator()(bs_stmt_vec_t* args, variable* result) override
972 {
973 param_validation(args);
974
975 if (!initialized)
976 {
977 prepare_to_string_vector(print_vector, para);
978 initialized = true;
979 }
980
981 std::string result_ = execute_to_string(print_vector, para);
982
983 result->set_value(result_.c_str());
984 return true;
985 }
986 };
987
988 struct _fn_to_string_dynamic : public base_timestamp_to_string
989 {
990 bool operator()(bs_stmt_vec_t* args, variable* result) override
991 {
992 param_validation(args);
993
994 print_vector.clear();
995 para.clear();
996
997 prepare_to_string_vector(print_vector, para);
998
999 std::string result_ = execute_to_string(print_vector, para);
1000
1001 result->set_value(result_.c_str());
1002 return true;
1003 }
1004 };
1005
1006 struct _fn_extract_year_from_timestamp : public base_date_extract
1007 {
1008 bool operator()(bs_stmt_vec_t* args, variable* result) override
1009 {
1010 param_validation(args);
1011
1012 result->set_value( (int64_t)new_ptime.date().year());
1013 return true;
1014 }
1015 };
1016
1017 struct _fn_extract_month_from_timestamp : public base_date_extract
1018 {
1019 bool operator()(bs_stmt_vec_t* args, variable* result) override
1020 {
1021 param_validation(args);
1022
1023 result->set_value( (int64_t)new_ptime.date().month());
1024 return true;
1025 }
1026 };
1027
1028 struct _fn_extract_day_from_timestamp : public base_date_extract
1029 {
1030 bool operator()(bs_stmt_vec_t* args, variable* result) override
1031 {
1032 param_validation(args);
1033
1034 result->set_value( (int64_t)new_ptime.date().day());
1035 return true;
1036 }
1037 };
1038
1039 struct _fn_extract_hour_from_timestamp : public base_date_extract
1040 {
1041 bool operator()(bs_stmt_vec_t* args, variable* result) override
1042 {
1043 param_validation(args);
1044
1045 result->set_value( (int64_t)new_ptime.time_of_day().hours());
1046 return true;
1047 }
1048 };
1049
1050 struct _fn_extract_minute_from_timestamp : public base_date_extract
1051 {
1052 bool operator()(bs_stmt_vec_t* args, variable* result) override
1053 {
1054 param_validation(args);
1055
1056 result->set_value( (int64_t)new_ptime.time_of_day().minutes());
1057 return true;
1058 }
1059 };
1060
1061 struct _fn_extract_second_from_timestamp : public base_date_extract
1062 {
1063 bool operator()(bs_stmt_vec_t* args, variable* result) override
1064 {
1065 param_validation(args);
1066
1067 result->set_value( (int64_t)new_ptime.time_of_day().seconds());
1068 return true;
1069 }
1070 };
1071
1072 struct _fn_extract_week_from_timestamp : public base_date_extract
1073 {
1074 bool operator()(bs_stmt_vec_t* args, variable* result) override
1075 {
1076 param_validation(args);
1077
1078 result->set_value( (int64_t)new_ptime.date().week_number());
1079 return true;
1080 }
1081 };
1082
1083 struct _fn_extract_tz_hour_from_timestamp : public base_date_extract
1084 {
1085 bool operator()(bs_stmt_vec_t* args, variable* result) override
1086 {
1087 param_validation(args);
1088
1089 result->set_value((int64_t)td.hours());
1090 return true;
1091 }
1092 };
1093
1094 struct _fn_extract_tz_minute_from_timestamp : public base_date_extract
1095 {
1096 bool operator()(bs_stmt_vec_t* args, variable* result) override
1097 {
1098 param_validation(args);
1099
1100 result->set_value((int64_t)td.minutes());
1101 return true;
1102 }
1103 };
1104
1105 struct _fn_diff_year_timestamp : public base_date_diff
1106 {
1107 bool operator()(bs_stmt_vec_t* args, variable* result) override
1108 {
1109 param_validation(args);
1110
1111 int year1 = ptime1.date().year();
1112 int year2 = ptime2.date().year();
1113 boost::posix_time::time_duration time1 = boost::posix_time::time_duration(
1114 ptime1.time_of_day().hours(), ptime1.time_of_day().minutes(),
1115 ptime1.time_of_day().seconds());
1116 boost::posix_time::time_duration time2 = boost::posix_time::time_duration(
1117 ptime2.time_of_day().hours(), ptime2.time_of_day().minutes(),
1118 ptime2.time_of_day().seconds());
1119
1120 if (year2 > year1 && ((ptime2.date().day_of_year() < ptime1.date().day_of_year()) ||
1121 (ptime2.date().day_of_year() == ptime1.date().day_of_year() && time2 < time1)))
1122 {
1123 year2 -= 1;
1124 }
1125 else if (year2 < year1 && ((ptime2.date().day_of_year() > ptime1.date().day_of_year()) ||
1126 (ptime2.date().day_of_year() == ptime1.date().day_of_year() && time2 > time1)))
1127 {
1128 year2 += 1;
1129 }
1130
1131 int64_t yr = year2 - year1;
1132 result->set_value( yr );
1133 return true;
1134 }
1135 };
1136
1137 struct _fn_diff_month_timestamp : public base_date_diff
1138 {
1139 bool operator()(bs_stmt_vec_t* args, variable* result) override
1140 {
1141 param_validation(args);
1142
1143 int year1 = ptime1.date().year();
1144 int year2 = ptime2.date().year();
1145 int mon1 = ptime1.date().month();
1146 int mon2 = ptime2.date().month();
1147 boost::posix_time::time_duration time1 = boost::posix_time::time_duration(
1148 ptime1.time_of_day().hours(), ptime1.time_of_day().minutes(),
1149 ptime1.time_of_day().seconds());
1150 boost::posix_time::time_duration time2 = boost::posix_time::time_duration(
1151 ptime2.time_of_day().hours(), ptime2.time_of_day().minutes(),
1152 ptime2.time_of_day().seconds());
1153
1154 if (year2 > year1)
1155 {
1156 if (ptime2.date().day() < ptime1.date().day() || (ptime2.date().day() == ptime1.date().day() && time2 < time1))
1157 {
1158 mon2 -= 1;
1159 }
1160
1161 if (ptime2.date().month() < ptime1.date().month())
1162 {
1163 mon2 += 12;
1164 year2 -= 1;
1165 }
1166 }
1167 else if (year2 < year1)
1168 {
1169 if (ptime2.date().day() > ptime1.date().day() || (ptime2.date().day() == ptime1.date().day() && time2 > time1))
1170 {
1171 mon1 -= 1;
1172 }
1173
1174 if (ptime2.date().month() > ptime1.date().month())
1175 {
1176 mon1 += 12;
1177 year1 -= 1;
1178 }
1179 }
1180
1181 int64_t mon_diff = (year2 - year1) * 12 + mon2 - mon1;
1182
1183 result->set_value(mon_diff);
1184 return true;
1185 }
1186 };
1187
1188 struct _fn_diff_day_timestamp : public base_date_diff
1189 {
1190 bool operator()(bs_stmt_vec_t* args, variable* result) override
1191 {
1192 param_validation(args);
1193
1194 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1195 int total_seconds = (((td_res.hours() * 60) + td_res.minutes()) * 60) + td_res.seconds();
1196 int64_t days = total_seconds / (24 * 3600);
1197
1198 result->set_value(days);
1199 return true;
1200 }
1201 };
1202
1203 struct _fn_diff_hour_timestamp : public base_date_diff
1204 {
1205 bool operator()(bs_stmt_vec_t* args, variable* result) override
1206 {
1207 param_validation(args);
1208
1209 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1210 result->set_value((int64_t)td_res.hours());
1211 return true;
1212 }
1213 };
1214
1215 struct _fn_diff_minute_timestamp : public base_date_diff
1216 {
1217 bool operator()(bs_stmt_vec_t* args, variable* result) override
1218 {
1219 param_validation(args);
1220
1221 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1222 result->set_value((int64_t)((td_res.hours() * 60) + td_res.minutes()));
1223 return true;
1224 }
1225 };
1226
1227 struct _fn_diff_second_timestamp : public base_date_diff
1228 {
1229 bool operator()(bs_stmt_vec_t* args, variable* result) override
1230 {
1231 param_validation(args);
1232
1233 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1234 result->set_value((int64_t)((((td_res.hours() * 60) + td_res.minutes()) * 60) + td_res.seconds()));
1235 return true;
1236 }
1237 };
1238
1239 struct _fn_add_year_to_timestamp : public base_date_add
1240 {
1241 bool operator()(bs_stmt_vec_t* args, variable* result) override
1242 {
1243 param_validation(args);
1244
1245 new_ptime += boost::gregorian::years( val_quantity.i64() );
1246 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1247 result->set_value( &new_tmstmp );
1248 return true;
1249 }
1250 };
1251
1252 struct _fn_add_month_to_timestamp : public base_date_add
1253 {
1254 bool operator()(bs_stmt_vec_t* args, variable* result) override
1255 {
1256 param_validation(args);
1257
1258 int yr, mn, dy, quant;
1259 quant = val_quantity.i64();
1260 dy = new_ptime.date().day();
1261
1262 int temp = quant % 12;
1263 mn = new_ptime.date().month() + temp;
1264 temp = quant / 12;
1265 yr = new_ptime.date().year() + temp;
1266
1267 if (mn > 12)
1268 {
1269 yr += 1;
1270 temp = mn % 12;
1271 if (temp == 0)
1272 {
1273 temp = 12;
1274 }
1275 mn = temp;
1276 }
1277 else if (mn < 1)
1278 {
1279 yr -= 1;
1280 if (mn == 0)
1281 {
1282 mn = 12;
1283 }
1284 else
1285 {
1286 mn = 12 + mn;
1287 }
1288 }
1289
1290 if ((mn == 4 || mn == 6 || mn == 9 || mn == 11) && dy > 30)
1291 {
1292 dy = 30;
1293 }
1294 else if (mn == 2 && dy > 28)
1295 {
1296 if (!(yr % 4) == 0 || ((yr % 100) == 0 && !(yr % 400) == 0))
1297 {
1298 dy = 28;
1299 }
1300 else
1301 {
1302 dy = 29;
1303 }
1304 }
1305
1306 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
1307 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mn, dy),
1308 boost::posix_time::hours(new_ptime.time_of_day().hours()) +
1309 boost::posix_time::minutes(new_ptime.time_of_day().minutes()) +
1310 boost::posix_time::seconds(new_ptime.time_of_day().seconds()) +
1311 boost::posix_time::nanoseconds(new_ptime.time_of_day().fractional_seconds()));
1312 #else
1313 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mn, dy),
1314 boost::posix_time::hours(new_ptime.time_of_day().hours()) +
1315 boost::posix_time::minutes(new_ptime.time_of_day().minutes()) +
1316 boost::posix_time::seconds(new_ptime.time_of_day().seconds()) +
1317 boost::posix_time::microseconds(new_ptime.time_of_day().fractional_seconds()));
1318 #endif
1319
1320 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1321 result->set_value( &new_tmstmp );
1322 return true;
1323 }
1324 };
1325
1326 struct _fn_add_day_to_timestamp : public base_date_add
1327 {
1328 bool operator()(bs_stmt_vec_t* args, variable* result) override
1329 {
1330 param_validation(args);
1331
1332 new_ptime += boost::gregorian::days( val_quantity.i64() );
1333 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1334 result->set_value( &new_tmstmp );
1335 return true;
1336 }
1337 };
1338
1339 struct _fn_add_hour_to_timestamp : public base_date_add
1340 {
1341 bool operator()(bs_stmt_vec_t* args, variable* result) override
1342 {
1343 param_validation(args);
1344
1345 new_ptime += boost::posix_time::hours( val_quantity.i64() );
1346 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1347 result->set_value( &new_tmstmp );
1348 return true;
1349 }
1350 };
1351
1352 struct _fn_add_minute_to_timestamp : public base_date_add
1353 {
1354 bool operator()(bs_stmt_vec_t* args, variable* result) override
1355 {
1356 param_validation(args);
1357
1358 new_ptime += boost::posix_time::minutes( val_quantity.i64() );
1359 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1360 result->set_value( &new_tmstmp );
1361 return true;
1362 }
1363 };
1364
1365 struct _fn_add_second_to_timestamp : public base_date_add
1366 {
1367 bool operator()(bs_stmt_vec_t* args, variable* result) override
1368 {
1369 param_validation(args);
1370
1371 new_ptime += boost::posix_time::seconds( val_quantity.i64() );
1372 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1373 result->set_value( &new_tmstmp );
1374 return true;
1375 }
1376 };
1377
1378 struct _fn_utcnow : public base_function
1379 {
1380 timestamp_t now_timestamp;
1381
1382 bool operator()(bs_stmt_vec_t* args, variable* result) override
1383 {
1384 int args_size = args->size();
1385
1386 if (args_size != 0)
1387 {
1388 throw base_s3select_exception("utcnow does not expect any parameters");
1389 }
1390
1391 boost::posix_time::ptime now_ptime = boost::posix_time::ptime( boost::posix_time::second_clock::universal_time());
1392 now_timestamp = std::make_tuple(now_ptime, boost::posix_time::time_duration(0, 0, 0), false);
1393 result->set_value( &now_timestamp );
1394
1395 return true;
1396 }
1397 };
1398
1399 struct _fn_between : public base_function
1400 {
1401
1402 value res;
1403
1404 bool operator()(bs_stmt_vec_t* args, variable* result) override
1405 {
1406 int args_size = args->size();
1407
1408
1409 if (args_size != 3)
1410 {
1411 throw base_s3select_exception("between operates on 3 expressions");//TODO FATAL
1412 }
1413
1414 auto iter = args->begin();
1415
1416 base_statement* second_expr = *iter;
1417 iter++;
1418 base_statement* first_expr = *iter;
1419 iter++;
1420 base_statement* main_expr = *iter;
1421
1422 value second_expr_val = second_expr->eval();
1423 value first_expr_val = first_expr->eval();
1424 value main_expr_val = main_expr->eval();
1425
1426 if ((second_expr_val.type == first_expr_val.type && first_expr_val.type == main_expr_val.type) || (second_expr_val.is_number() && first_expr_val.is_number() && main_expr_val.is_number()))
1427 {
1428 if((main_expr_val >= first_expr_val) && (main_expr_val <= second_expr_val)) {
1429 result->set_value(true);
1430 } else {
1431 result->set_value(false);
1432 }
1433 }
1434 return true;
1435 }
1436 };
1437
1438 struct _fn_not_between : public base_function
1439 {
1440
1441 value res;
1442 _fn_between between_op;
1443
1444 bool operator()(bs_stmt_vec_t* args, variable* result) override
1445 {
1446 between_op(args,result);
1447
1448 if (result->get_value().is_true() == 0) {
1449 result->set_value(true);
1450 } else {
1451 result->set_value(false);
1452 }
1453 return true;
1454 }
1455 };
1456
1457 static char s3select_ver[10]="41.a";
1458
1459 struct _fn_version : public base_function
1460 {
1461 value val; //TODO use git to generate sha1
1462 bool operator()(bs_stmt_vec_t* args, variable* result) override
1463 {
1464 val = &s3select_ver[0];
1465 *result = val;
1466 return true;
1467 }
1468 };
1469
1470 struct _fn_isnull : public base_function
1471 {
1472
1473 value res;
1474
1475 bool operator()(bs_stmt_vec_t* args, variable* result) override
1476 {
1477 auto iter = args->begin();
1478 base_statement* expr = *iter;
1479 value expr_val = expr->eval();
1480 if ( expr_val.is_null()) {
1481 result->set_value(true);
1482 } else {
1483 result->set_value(false);
1484 }
1485 return true;
1486 }
1487 };
1488
1489 struct _fn_is_not_null : public base_function
1490 {
1491 value res;
1492 _fn_isnull isnull_op;
1493
1494 bool operator()(bs_stmt_vec_t* args, variable* result) override
1495 {
1496
1497 isnull_op(args,result);
1498
1499 if (result->get_value().is_true() == 0)
1500 result->set_value(true);
1501 else
1502 result->set_value(false);
1503
1504 return true;
1505 }
1506 };
1507
1508 struct _fn_in : public base_function
1509 {
1510
1511 value res;
1512
1513 bool operator()(bs_stmt_vec_t *args, variable *result) override
1514 {
1515 int args_size = static_cast<int>(args->size()-1);
1516 base_statement *main_expr = (*args)[args_size];
1517 value main_expr_val = main_expr->eval();
1518 args_size--;
1519 while (args_size>=0)
1520 {
1521 base_statement *expr = (*args)[args_size];
1522 value expr_val = expr->eval();
1523 args_size--;
1524 if ((expr_val.type == main_expr_val.type) || (expr_val.is_number() && main_expr_val.is_number()))
1525 {
1526 if (expr_val == main_expr_val)
1527 {
1528 result->set_value(true);
1529 return true;
1530 }
1531 }
1532 }
1533 result->set_value(false);
1534 return true;
1535 }
1536 };
1537
1538 struct _fn_like : public base_like
1539 {
1540 explicit _fn_like(base_statement* esc, base_statement* like_expr)
1541 {
1542 auto is_constant = [&](base_statement* bs) {
1543 if (dynamic_cast<variable*>(bs) && dynamic_cast<variable*>(bs)->m_var_type == variable::var_t::COLUMN_VALUE) {
1544 return true;
1545 } else {
1546 return false;
1547 }
1548 };
1549
1550 if (is_constant(esc) && is_constant(like_expr)) {
1551 constant_state = true;
1552 }
1553
1554 if(constant_state == true)
1555 {
1556 param_validation(esc, like_expr);
1557 std::vector<char> like_as_regex = transform(like_expr_val.str(), *escape_expr_val.str());
1558 compile(like_as_regex);
1559 }
1560 }
1561
1562 bool operator()(bs_stmt_vec_t* args, variable* result) override
1563 {
1564 auto iter = args->begin();
1565
1566 base_statement* escape_expr = *iter;
1567 iter++;
1568 base_statement* like_expr = *iter;
1569 iter++;
1570 base_statement* main_expr = *iter;
1571
1572 if (constant_state == false)
1573 {
1574 param_validation(escape_expr, like_expr);
1575 std::vector<char> like_as_regex = transform(like_expr_val.str(), *escape_expr_val.str());
1576 compile(like_as_regex);
1577 }
1578
1579 value main_expr_val = main_expr->eval();
1580 if (main_expr_val.type != value::value_En_t::STRING)
1581 {
1582 throw base_s3select_exception("main expression must be string");
1583 }
1584
1585 match(main_expr_val, result);
1586 return true;
1587 }
1588 };
1589
1590 struct _fn_substr : public base_function
1591 {
1592
1593 char buff[4096];// this buffer is persist for the query life time, it use for the results per row(only for the specific function call)
1594 //it prevent from intensive use of malloc/free (fragmentation).
1595 //should validate result length.
1596 //TODO may replace by std::string (dynamic) , or to replace with global allocator , in query scope.
1597 value v_str;
1598 value v_from;
1599 value v_to;
1600
1601 bool operator()(bs_stmt_vec_t* args, variable* result) override
1602 {
1603 auto iter = args->begin();
1604 int args_size = args->size();
1605
1606
1607 if (args_size<2)
1608 {
1609 throw base_s3select_exception("substr accept 2 arguments or 3");
1610 }
1611
1612 base_statement* str = *iter;
1613 iter++;
1614 base_statement* from = *iter;
1615 base_statement* to;
1616
1617 if (args_size == 3)
1618 {
1619 iter++;
1620 to = *iter;
1621 v_to = to->eval();
1622 if (!v_to.is_number())
1623 {
1624 throw base_s3select_exception("substr third argument must be number"); //can skip row
1625 }
1626 }
1627
1628 v_str = str->eval();
1629
1630 if(v_str.type != value::value_En_t::STRING)
1631 {
1632 throw base_s3select_exception("substr first argument must be string"); //can skip current row
1633 }
1634
1635 int str_length = strlen(v_str.str());
1636
1637 v_from = from->eval();
1638 if(!v_from.is_number())
1639 {
1640 throw base_s3select_exception("substr second argument must be number"); //can skip current row
1641 }
1642
1643 int64_t f;
1644 int64_t t;
1645
1646 if (v_from.type == value::value_En_t::FLOAT)
1647 {
1648 f=v_from.dbl();
1649 }
1650 else
1651 {
1652 f=v_from.i64();
1653 }
1654
1655 if (f <= 0 && args_size == 2)
1656 {
1657 f = 1;
1658 }
1659
1660 if (f>str_length)
1661 {
1662 result->set_value("");
1663 return true;
1664 }
1665
1666 if (str_length>(int)sizeof(buff))
1667 {
1668 throw base_s3select_exception("string too long for internal buffer"); //can skip row
1669 }
1670
1671 if (args_size == 3)
1672 {
1673 if (v_to.type == value::value_En_t::FLOAT)
1674 {
1675 t = v_to.dbl();
1676 }
1677 else
1678 {
1679 t = v_to.i64();
1680 }
1681
1682 if (f <= 0)
1683 {
1684 t = t + f - 1;
1685 f = 1;
1686 }
1687
1688 if (t<0)
1689 {
1690 t = 0;
1691 }
1692
1693 if (t > str_length)
1694 {
1695 t = str_length;
1696 }
1697
1698 if( (str_length-(f-1)-t) <0)
1699 {//in case the requested length is too long, reduce it to exact length.
1700 t = str_length-(f-1);
1701 }
1702
1703 strncpy(buff, v_str.str()+f-1, t);
1704 }
1705 else
1706 {
1707 strcpy(buff, v_str.str()+f-1);
1708 }
1709
1710 result->set_value(buff);
1711
1712 return true;
1713 }
1714 };
1715
1716 struct _fn_charlength : public base_function {
1717
1718 value v_str;
1719
1720 bool operator()(bs_stmt_vec_t* args, variable* result) override
1721 {
1722 auto iter = args->begin();
1723 base_statement* str = *iter;
1724 v_str = str->eval();
1725 if(v_str.type != value::value_En_t::STRING) {
1726 throw base_s3select_exception("content is not string!");
1727 } else {
1728 int64_t str_length = strlen(v_str.str());
1729 result->set_value(str_length);
1730 return true;
1731 }
1732 }
1733 };
1734
1735 struct _fn_lower : public base_function {
1736
1737 std::string buff;
1738 value v_str;
1739
1740 bool operator()(bs_stmt_vec_t* args, variable* result) override
1741 {
1742 auto iter = args->begin();
1743 base_statement* str = *iter;
1744 v_str = str->eval();
1745 if(v_str.type != value::value_En_t::STRING) {
1746 throw base_s3select_exception("content is not string");
1747 } else {
1748 buff = v_str.str();
1749 boost::algorithm::to_lower(buff);
1750 result->set_value(buff.c_str());
1751 return true;
1752 }
1753 }
1754 };
1755
1756 struct _fn_upper : public base_function {
1757
1758 std::string buff;
1759 value v_str;
1760
1761 bool operator()(bs_stmt_vec_t* args, variable* result) override
1762 {
1763 auto iter = args->begin();
1764 base_statement* str = *iter;
1765 v_str = str->eval();
1766 if(v_str.type != value::value_En_t::STRING) {
1767 throw base_s3select_exception("content is not string");
1768 } else {
1769 buff = v_str.str();
1770 boost::algorithm::to_upper(buff);
1771 result->set_value(buff.c_str());
1772 return true;
1773 }
1774 }
1775 };
1776
1777 struct _fn_nullif : public base_function {
1778
1779 value x;
1780 value y;
1781
1782 bool operator()(bs_stmt_vec_t* args, variable* result) override
1783 {
1784 auto iter = args->begin();
1785
1786 int args_size = args->size();
1787 if (args_size != 2)
1788 {
1789 throw base_s3select_exception("nullif accept only 2 arguments");
1790 }
1791 base_statement *first = *iter;
1792 x = first->eval();
1793 iter++;
1794 base_statement *second = *iter;
1795 y = second->eval();
1796 if (x.is_null() && y.is_null())
1797 {
1798 result->set_null();
1799 return true;
1800 }
1801 if (x.is_null())
1802 {
1803 result->set_null();
1804 return true;
1805 }
1806 if (!(x.is_number() && y.is_number())) {
1807 if (x.type != y.type) {
1808 *result = x;
1809 return true;
1810 }
1811 }
1812 if (x != y) {
1813 *result = x;
1814 } else {
1815 result->set_null();
1816 }
1817 return true;
1818 }
1819 };
1820
1821 struct _fn_when_then : public base_function {
1822
1823 value when_value;
1824
1825 bool operator()(bs_stmt_vec_t* args, variable* result) override
1826 {
1827 auto iter = args->begin();
1828
1829 base_statement* then_expr = *iter;
1830 iter ++;
1831
1832 base_statement* when_expr = *iter;
1833
1834 when_value = when_expr->eval();
1835
1836 if (when_value.is_true())//true
1837 {
1838 *result = then_expr->eval();
1839 return true;
1840 }
1841
1842 result->set_null();
1843
1844 return true;
1845 }
1846 };
1847
1848 struct _fn_when_value_then : public base_function {
1849
1850 value when_value;
1851 value case_value;
1852 value then_value;
1853
1854 bool operator()(bs_stmt_vec_t* args, variable* result) override
1855 {
1856 auto iter = args->begin();
1857
1858 base_statement* then_expr = *iter;
1859 iter++;
1860
1861 base_statement* when_expr = *iter;
1862 iter++;
1863
1864 base_statement* case_expr = *iter;
1865
1866 when_value = when_expr->eval();
1867 case_value = case_expr->eval();
1868 then_value = then_expr->eval();
1869
1870 if (case_value == when_value)
1871 {
1872 *result = then_value;
1873 return true;
1874 }
1875
1876 result->set_null();
1877 return true;
1878 }
1879 };
1880
1881 struct _fn_case_when_else : public base_function {
1882
1883 value when_then_value;
1884
1885 bool operator()(bs_stmt_vec_t* args, variable* result) override
1886 {
1887 base_statement* else_expr = *(args->begin());
1888
1889 size_t args_size = args->size() -1;
1890
1891 for(int ivec=args_size;ivec>0;ivec--)
1892 {
1893 when_then_value = (*args)[ivec]->eval();
1894
1895 if(!when_then_value.is_null())
1896 {
1897 *result = when_then_value;
1898 return true;
1899 }
1900
1901 }
1902
1903 *result = else_expr->eval();
1904 return true;
1905 }
1906 };
1907
1908 struct _fn_coalesce : public base_function
1909 {
1910
1911 value res;
1912
1913 bool operator()(bs_stmt_vec_t* args, variable* result) override
1914 {
1915 auto iter_begin = args->begin();
1916 int args_size = args->size();
1917 while (args_size >= 1)
1918 {
1919 base_statement* expr = *iter_begin;
1920 value expr_val = expr->eval();
1921 iter_begin++;
1922 if ( !(expr_val.is_null())) {
1923 *result = expr_val;
1924 return true;
1925 }
1926 args_size--;
1927 }
1928 result->set_null();
1929 return true;
1930 }
1931 };
1932
1933 struct _fn_string : public base_function
1934 {
1935
1936 value res;
1937
1938 bool operator()(bs_stmt_vec_t* args, variable* result) override
1939 {
1940 auto iter = args->begin();
1941
1942 base_statement* expr = *iter;
1943 value expr_val = expr->eval();
1944 result->set_value((expr_val.to_string()));
1945 return true;
1946 }
1947 };
1948
1949 struct _fn_to_bool : public base_function
1950 {
1951
1952 value func_arg;
1953
1954 bool operator()(bs_stmt_vec_t* args, variable* result) override
1955 {
1956 int64_t i=0;
1957 func_arg = (*args->begin())->eval();
1958
1959 if (func_arg.type == value::value_En_t::FLOAT)
1960 {
1961 i = func_arg.dbl();
1962 }
1963 else if (func_arg.type == value::value_En_t::DECIMAL || func_arg.type == value::value_En_t::BOOL)
1964 {
1965 i = func_arg.i64();
1966 }
1967 else
1968 {
1969 i = 0;
1970 }
1971 if (i == 0)
1972 {
1973 result->set_value(false);
1974 }
1975 else
1976 {
1977 result->set_value(true);
1978 }
1979 return true;
1980 }
1981 };
1982
1983 struct _fn_trim : public base_function {
1984
1985 std::string input_string;
1986 value v_remove;
1987 value v_input;
1988
1989 _fn_trim()
1990 {
1991 v_remove = " ";
1992 }
1993
1994 bool operator()(bs_stmt_vec_t* args, variable* result) override
1995 {
1996 auto iter = args->begin();
1997 int args_size = args->size();
1998 base_statement* str = *iter;
1999 v_input = str->eval();
2000 if(v_input.type != value::value_En_t::STRING) {
2001 throw base_s3select_exception("content is not string");
2002 }
2003 input_string = v_input.str();
2004 if (args_size == 2) {
2005 iter++;
2006 base_statement* next = *iter;
2007 v_remove = next->eval();
2008 }
2009 boost::trim_right_if(input_string,boost::is_any_of(v_remove.str()));
2010 boost::trim_left_if(input_string,boost::is_any_of(v_remove.str()));
2011 result->set_value(input_string.c_str());
2012 return true;
2013 }
2014 };
2015
2016 struct _fn_leading : public base_function {
2017
2018 std::string input_string;
2019 value v_remove;
2020 value v_input;
2021
2022 _fn_leading()
2023 {
2024 v_remove = " ";
2025 }
2026
2027 bool operator()(bs_stmt_vec_t* args, variable* result) override
2028 {
2029 auto iter = args->begin();
2030 int args_size = args->size();
2031 base_statement* str = *iter;
2032 v_input = str->eval();
2033 if(v_input.type != value::value_En_t::STRING) {
2034 throw base_s3select_exception("content is not string");
2035 }
2036 input_string = v_input.str();
2037 if (args_size == 2) {
2038 iter++;
2039 base_statement* next = *iter;
2040 v_remove = next->eval();
2041 }
2042 boost::trim_left_if(input_string,boost::is_any_of(v_remove.str()));
2043 result->set_value(input_string.c_str());
2044 return true;
2045 }
2046 };
2047
2048 struct _fn_trailing : public base_function {
2049
2050 std::string input_string;
2051 value v_remove;
2052 value v_input;
2053
2054 _fn_trailing()
2055 {
2056 v_remove = " ";
2057 }
2058
2059 bool operator()(bs_stmt_vec_t* args, variable* result) override
2060 {
2061 auto iter = args->begin();
2062 int args_size = args->size();
2063 base_statement* str = *iter;
2064 v_input = str->eval();
2065 if(v_input.type != value::value_En_t::STRING) {
2066 throw base_s3select_exception("content is not string");
2067 }
2068 input_string = v_input.str();
2069 if (args_size == 2) {
2070 iter++;
2071 base_statement* next = *iter;
2072 v_remove = next->eval();
2073 }
2074 boost::trim_right_if(input_string,boost::is_any_of(v_remove.str()));
2075 result->set_value(input_string.c_str());
2076 return true;
2077 }
2078 };
2079
2080 base_function* s3select_functions::create(std::string_view fn_name,const bs_stmt_vec_t &arguments)
2081 {
2082 const FunctionLibrary::const_iterator iter = m_functions_library.find(fn_name.data());
2083
2084 if (iter == m_functions_library.end())
2085 {
2086 std::string msg;
2087 msg = std::string{fn_name} + " " + " function not found";
2088 throw base_s3select_exception(msg, base_s3select_exception::s3select_exp_en_t::FATAL);
2089 }
2090
2091 switch (iter->second)
2092 {
2093 case s3select_func_En_t::ADD:
2094 return S3SELECT_NEW(this,_fn_add);
2095 break;
2096
2097 case s3select_func_En_t::SUM:
2098 return S3SELECT_NEW(this,_fn_sum);
2099 break;
2100
2101 case s3select_func_En_t::COUNT:
2102 return S3SELECT_NEW(this,_fn_count);
2103 break;
2104
2105 case s3select_func_En_t::MIN:
2106 return S3SELECT_NEW(this,_fn_min);
2107 break;
2108
2109 case s3select_func_En_t::MAX:
2110 return S3SELECT_NEW(this,_fn_max);
2111 break;
2112
2113 case s3select_func_En_t::TO_INT:
2114 return S3SELECT_NEW(this,_fn_to_int);
2115 break;
2116
2117 case s3select_func_En_t::TO_FLOAT:
2118 return S3SELECT_NEW(this,_fn_to_float);
2119 break;
2120
2121 case s3select_func_En_t::SUBSTR:
2122 return S3SELECT_NEW(this,_fn_substr);
2123 break;
2124
2125 case s3select_func_En_t::TO_TIMESTAMP:
2126 return S3SELECT_NEW(this,_fn_to_timestamp);
2127 break;
2128
2129 case s3select_func_En_t::TO_STRING_CONSTANT:
2130 return S3SELECT_NEW(this,_fn_to_string_constant);
2131 break;
2132
2133 case s3select_func_En_t::TO_STRING_DYNAMIC:
2134 return S3SELECT_NEW(this,_fn_to_string_dynamic);
2135 break;
2136
2137 case s3select_func_En_t::TO_BOOL:
2138 return S3SELECT_NEW(this,_fn_to_bool);
2139 break;
2140
2141 case s3select_func_En_t::EXTRACT_YEAR:
2142 return S3SELECT_NEW(this,_fn_extract_year_from_timestamp);
2143 break;
2144
2145 case s3select_func_En_t::EXTRACT_MONTH:
2146 return S3SELECT_NEW(this,_fn_extract_month_from_timestamp);
2147 break;
2148
2149 case s3select_func_En_t::EXTRACT_DAY:
2150 return S3SELECT_NEW(this,_fn_extract_day_from_timestamp);
2151 break;
2152
2153 case s3select_func_En_t::EXTRACT_HOUR:
2154 return S3SELECT_NEW(this,_fn_extract_hour_from_timestamp);
2155 break;
2156
2157 case s3select_func_En_t::EXTRACT_MINUTE:
2158 return S3SELECT_NEW(this,_fn_extract_minute_from_timestamp);
2159 break;
2160
2161 case s3select_func_En_t::EXTRACT_SECOND:
2162 return S3SELECT_NEW(this,_fn_extract_second_from_timestamp);
2163 break;
2164
2165 case s3select_func_En_t::EXTRACT_WEEK:
2166 return S3SELECT_NEW(this,_fn_extract_week_from_timestamp);
2167 break;
2168
2169 case s3select_func_En_t::EXTRACT_TIMEZONE_HOUR:
2170 return S3SELECT_NEW(this,_fn_extract_tz_hour_from_timestamp);
2171 break;
2172
2173 case s3select_func_En_t::EXTRACT_TIMEZONE_MINUTE:
2174 return S3SELECT_NEW(this,_fn_extract_tz_minute_from_timestamp);
2175 break;
2176
2177 case s3select_func_En_t::DATE_ADD_YEAR:
2178 return S3SELECT_NEW(this,_fn_add_year_to_timestamp);
2179 break;
2180
2181 case s3select_func_En_t::DATE_ADD_MONTH:
2182 return S3SELECT_NEW(this,_fn_add_month_to_timestamp);
2183 break;
2184
2185 case s3select_func_En_t::DATE_ADD_DAY:
2186 return S3SELECT_NEW(this,_fn_add_day_to_timestamp);
2187 break;
2188
2189 case s3select_func_En_t::DATE_ADD_HOUR:
2190 return S3SELECT_NEW(this,_fn_add_hour_to_timestamp);
2191 break;
2192
2193 case s3select_func_En_t::DATE_ADD_MINUTE:
2194 return S3SELECT_NEW(this,_fn_add_minute_to_timestamp);
2195 break;
2196
2197 case s3select_func_En_t::DATE_ADD_SECOND:
2198 return S3SELECT_NEW(this,_fn_add_second_to_timestamp);
2199 break;
2200
2201 case s3select_func_En_t::DATE_DIFF_YEAR:
2202 return S3SELECT_NEW(this,_fn_diff_year_timestamp);
2203 break;
2204
2205 case s3select_func_En_t::DATE_DIFF_MONTH:
2206 return S3SELECT_NEW(this,_fn_diff_month_timestamp);
2207 break;
2208
2209 case s3select_func_En_t::DATE_DIFF_DAY:
2210 return S3SELECT_NEW(this,_fn_diff_day_timestamp);
2211 break;
2212
2213 case s3select_func_En_t::DATE_DIFF_HOUR:
2214 return S3SELECT_NEW(this,_fn_diff_hour_timestamp);
2215 break;
2216
2217 case s3select_func_En_t::DATE_DIFF_MINUTE:
2218 return S3SELECT_NEW(this,_fn_diff_minute_timestamp);
2219 break;
2220
2221 case s3select_func_En_t::DATE_DIFF_SECOND:
2222 return S3SELECT_NEW(this,_fn_diff_second_timestamp);
2223 break;
2224
2225 case s3select_func_En_t::UTCNOW:
2226 return S3SELECT_NEW(this,_fn_utcnow);
2227 break;
2228
2229 case s3select_func_En_t::AVG:
2230 return S3SELECT_NEW(this,_fn_avg);
2231 break;
2232
2233 case s3select_func_En_t::LOWER:
2234 return S3SELECT_NEW(this,_fn_lower);
2235 break;
2236
2237 case s3select_func_En_t::UPPER:
2238 return S3SELECT_NEW(this,_fn_upper);
2239 break;
2240
2241 case s3select_func_En_t::LENGTH:
2242 return S3SELECT_NEW(this,_fn_charlength);
2243 break;
2244
2245 case s3select_func_En_t::BETWEEN:
2246 return S3SELECT_NEW(this,_fn_between);
2247 break;
2248
2249 case s3select_func_En_t::NOT_BETWEEN:
2250 return S3SELECT_NEW(this,_fn_not_between);
2251 break;
2252
2253 case s3select_func_En_t::IS_NULL:
2254 return S3SELECT_NEW(this,_fn_isnull);
2255 break;
2256
2257 case s3select_func_En_t::IS_NOT_NULL:
2258 return S3SELECT_NEW(this,_fn_is_not_null);
2259 break;
2260
2261 case s3select_func_En_t::IN:
2262 return S3SELECT_NEW(this,_fn_in);
2263 break;
2264
2265 case s3select_func_En_t::VERSION:
2266 return S3SELECT_NEW(this,_fn_version);
2267 break;
2268
2269 case s3select_func_En_t::NULLIF:
2270 return S3SELECT_NEW(this,_fn_nullif);
2271 break;
2272
2273 case s3select_func_En_t::LIKE:
2274 return S3SELECT_NEW(this,_fn_like,arguments[0],arguments[1]);
2275 break;
2276
2277 case s3select_func_En_t::COALESCE:
2278 return S3SELECT_NEW(this,_fn_coalesce);
2279 break;
2280
2281 case s3select_func_En_t::WHEN_THEN:
2282 return S3SELECT_NEW(this,_fn_when_then);
2283 break;
2284
2285 case s3select_func_En_t::WHEN_VALUE_THEN:
2286 return S3SELECT_NEW(this,_fn_when_value_then);
2287 break;
2288
2289 case s3select_func_En_t::CASE_WHEN_ELSE:
2290 return S3SELECT_NEW(this,_fn_case_when_else);
2291 break;
2292
2293 case s3select_func_En_t::STRING:
2294 return S3SELECT_NEW(this,_fn_string);
2295 break;
2296
2297 case s3select_func_En_t::TRIM:
2298 return S3SELECT_NEW(this,_fn_trim);
2299 break;
2300
2301 case s3select_func_En_t::LEADING:
2302 return S3SELECT_NEW(this,_fn_leading);
2303 break;
2304
2305 case s3select_func_En_t::TRAILING:
2306 return S3SELECT_NEW(this,_fn_trailing);
2307 break;
2308
2309 default:
2310 throw base_s3select_exception("internal error while resolving function-name");
2311 break;
2312 }
2313 }
2314
2315 bool base_statement::is_function() const
2316 {
2317 if (dynamic_cast<__function*>(const_cast<base_statement*>(this)))
2318 {
2319 return true;
2320 }
2321 else
2322 {
2323 return false;
2324 }
2325 }
2326
2327 const base_statement* base_statement::get_aggregate() const
2328 {
2329 //search for aggregation function in AST
2330 const base_statement* res = 0;
2331
2332 if (is_aggregate())
2333 {
2334 return this;
2335 }
2336
2337 if (left() && (res=left()->get_aggregate())!=0)
2338 {
2339 return res;
2340 }
2341
2342 if (right() && (res=right()->get_aggregate())!=0)
2343 {
2344 return res;
2345 }
2346
2347 if (is_function())
2348 {
2349 for (auto i : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2350 {
2351 const base_statement* b=i->get_aggregate();
2352 if (b)
2353 {
2354 return b;
2355 }
2356 }
2357 }
2358 return 0;
2359 }
2360
2361 bool base_statement::is_column_reference() const
2362 {
2363 if(is_column())
2364 return true;
2365
2366 if(left())
2367 return left()->is_column_reference();
2368
2369 if(right())
2370 return right()->is_column_reference();
2371
2372 if(is_function())
2373 {
2374 for(auto a : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2375 {
2376 if(a->is_column_reference())
2377 return true;
2378 }
2379 }
2380
2381 return false;
2382 }
2383
2384 bool base_statement::is_nested_aggregate(bool &aggr_flow) const
2385 {
2386 if (is_aggregate())
2387 {
2388 aggr_flow=true;
2389 for (auto& i : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2390 {
2391 if (i->get_aggregate() != nullptr)
2392 {
2393 return true;
2394 }
2395 }
2396 }
2397
2398 if(left() && left()->is_nested_aggregate(aggr_flow))
2399 return true;
2400
2401 if(right() && right()->is_nested_aggregate(aggr_flow))
2402 return true;
2403
2404 if (is_function())
2405 {
2406 for (auto& i : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2407 {
2408 if (i->get_aggregate() != nullptr)
2409 {
2410 return i->is_nested_aggregate(aggr_flow);
2411 }
2412 }
2413 }
2414
2415 return false;
2416 }
2417
2418 bool base_statement::is_statement_contain_star_operation() const
2419 {
2420 if(is_star_operation())
2421 return true;
2422
2423 if(left())
2424 return left()->is_statement_contain_star_operation();
2425
2426 if(right())
2427 return right()->is_statement_contain_star_operation();
2428
2429 if(is_function())
2430 {
2431 for(auto a : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2432 {
2433 if(a->is_star_operation())
2434 return true;
2435 }
2436 }
2437
2438 return false;
2439 }
2440
2441 bool base_statement::mark_aggreagtion_subtree_to_execute()
2442 {//purpase:: set aggregation subtree as runnable.
2443 //the function search for aggregation function, and mark its subtree {skip = false}
2444 if (is_aggregate())
2445 set_skip_non_aggregate(false);
2446
2447 if (left())
2448 left()->mark_aggreagtion_subtree_to_execute();
2449
2450 if(right())
2451 right()->mark_aggreagtion_subtree_to_execute();
2452
2453 if (is_function())
2454 {
2455 for (auto& i : dynamic_cast<__function*>(this)->get_arguments())
2456 {
2457 i->mark_aggreagtion_subtree_to_execute();
2458 }
2459 }
2460
2461 return true;
2462 }
2463
2464 void base_statement::push_for_cleanup(std::set<base_statement*>& ast_nodes_to_delete)//semantic loop on each projection
2465 {
2466 //placement new is releasing the main-buffer in which all AST nodes
2467 //allocating from it. meaning no calls to destructors.
2468 //the purpose of this routine is to traverse the AST in map all nodes for cleanup.
2469 //the cleanup method will trigger all destructors.
2470
2471 ast_nodes_to_delete.insert(this);
2472
2473 if (left())
2474 left()->push_for_cleanup(ast_nodes_to_delete);
2475
2476 if(right())
2477 right()->push_for_cleanup(ast_nodes_to_delete);
2478
2479 if (is_function())
2480 {
2481 for (auto& i : dynamic_cast<__function*>(this)->get_arguments())
2482 {
2483 i->push_for_cleanup(ast_nodes_to_delete);
2484 }
2485 }
2486 }
2487
2488 #ifdef _ARROW_EXIST
2489 void base_statement::extract_columns(parquet_file_parser::column_pos_t &cols,const uint16_t max_columns)
2490 {// purpose: to extract all column-ids from query
2491 if(is_column()) //column reference or column position
2492 {variable* v = dynamic_cast<variable*>(this);
2493 if(dynamic_cast<variable*>(this)->m_var_type == variable::var_t::VARIABLE_NAME)
2494 {//column reference
2495
2496 if (v->getScratchArea()->get_column_pos(v->get_name().c_str())>=0)
2497 {//column belong to schema
2498 cols.insert( v->getScratchArea()->get_column_pos(v->get_name().c_str() ));
2499 }else {
2500 if(v->getAlias()->search_alias(v->get_name()))
2501 {//column is an alias --> extract columns belong to alias
2502 //TODO cyclic alias to resolve
2503 v->getAlias()->search_alias(v->get_name())->extract_columns(cols,max_columns);
2504 }else {
2505 //column is not alias --> error
2506 std::stringstream ss;
2507 ss << "column " + v->get_name() + " is not part of schema nor an alias";
2508 throw base_s3select_exception(ss.str(),base_s3select_exception::s3select_exp_en_t::FATAL);
2509 }
2510 }
2511 }else if(v->m_var_type == variable::var_t::STAR_OPERATION)
2512 {
2513 for(uint16_t i=0;i<max_columns;i++)
2514 {//push all columns
2515 cols.insert( i );
2516 }
2517 }
2518 else {
2519 if (v->get_column_pos()>=max_columns)
2520 {
2521 std::stringstream ss;
2522 ss << "column " + std::to_string( v->get_column_pos()+1 ) + " exceed max number of columns";
2523 throw base_s3select_exception(ss.str(),base_s3select_exception::s3select_exp_en_t::FATAL);
2524 }
2525 cols.insert(v->get_column_pos());//push column positions
2526 }
2527 }else if(is_function())
2528 {
2529 __function* f = (dynamic_cast<__function*>(this));
2530 bs_stmt_vec_t args = f->get_arguments();
2531 for (auto prm : args)
2532 {//traverse function args
2533 prm->extract_columns(cols,max_columns);
2534 }
2535
2536 }
2537
2538 //keep traversing down the AST
2539 if(left())
2540 left()->extract_columns(cols,max_columns);
2541
2542 if(right())
2543 right()->extract_columns(cols,max_columns);
2544 }
2545 #endif //_ARROW_EXIST
2546
2547 } //namespace s3selectEngine
2548
2549 #endif