]> git.proxmox.com Git - ceph.git/blob - ceph/src/s3select/include/s3select_functions.h
8c507fca1b9f5aa4231f13c58a59e027281f0aa7
[ceph.git] / ceph / src / s3select / include / s3select_functions.h
1 #ifndef __S3SELECT_FUNCTIONS__
2 #define __S3SELECT_FUNCTIONS__
3
4
5 #include "s3select_oper.h"
6 #include <boost/algorithm/string.hpp>
7 #include <boost/algorithm/string/trim.hpp>
8 #include <boost/regex.hpp>
9 #include <algorithm>
10
11 using namespace std::string_literals;
12
13 #define BOOST_BIND_ACTION_PARAM( push_name ,param ) boost::bind( &push_name::operator(), g_ ## push_name , _1 ,_2, param)
14 namespace s3selectEngine
15 {
16
17 constexpr double sec_scale(int n)
18 {
19 return pow(10, n);
20 }
21
22 struct push_char
23 {
24 void operator()(const char* a, const char* b, uint32_t* n) const
25 {
26 *n = *a;
27 }
28
29 };
30 static push_char g_push_char;
31
32 struct push_2dig
33 {
34 void operator()(const char* a, const char* b, uint32_t* n) const
35 {
36 *n = (static_cast<char>(*a) - 48) * 10 + (static_cast<char>(*(a+1)) - 48) ;
37 }
38
39 };
40 static push_2dig g_push_2dig;
41
42 struct push_4dig
43 {
44 void operator()(const char* a, const char* b, uint32_t* n) const
45 {
46 *n = (static_cast<char>(*a) - 48) * 1000 + (static_cast<char>(*(a+1)) - 48) * 100 + (static_cast<char>(*(a+2)) - 48) * 10 + (static_cast<char>(*(a+3)) - 48);
47 }
48
49 };
50 static push_4dig g_push_4dig;
51
52 struct push_1fdig
53 {
54 void operator()(const char* a, const char* b, uint32_t* n) const
55 {
56 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
57 const double scale = sec_scale(9-1); //nano-sec
58 #else
59 const double scale = sec_scale(6-1); //micro-sec
60 #endif
61
62 *n = ((static_cast<char>(*a) - 48)) * scale;
63 }
64
65 };
66 static push_1fdig g_push_1fdig;
67
68 struct push_2fdig
69 {
70 void operator()(const char* a, const char* b, uint32_t* n) const
71 {
72 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
73 const double scale = sec_scale(9-2); //nano-sec
74 #else
75 const double scale = sec_scale(6-2); //micro-sec
76 #endif
77
78 *n = ((static_cast<char>(*a) - 48) * 10 + (static_cast<char>(*(a+1)) - 48)) * scale;
79 }
80
81 };
82 static push_2fdig g_push_2fdig;
83
84 struct push_3fdig
85 {
86 void operator()(const char* a, const char* b, uint32_t* n) const
87 {
88 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
89 const double scale = sec_scale(9-3); //nano-sec
90 #else
91 const double scale = sec_scale(6-3); //micro-sec
92 #endif
93
94 *n = ((static_cast<char>(*a) - 48) * 100 + (static_cast<char>(*(a+1)) - 48) * 10 + (static_cast<char>(*(a+2)) - 48)) * scale;
95 }
96
97 };
98 static push_3fdig g_push_3fdig;
99
100 struct push_4fdig
101 {
102 void operator()(const char* a, const char* b, uint32_t* n) const
103 {
104 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
105 const double scale = sec_scale(9-4); //nano-sec
106 #else
107 const double scale = sec_scale(6-4); //micro-sec
108 #endif
109
110 *n = ((static_cast<char>(*a) - 48) * 1000 + (static_cast<char>(*(a+1)) - 48) * 100 + (static_cast<char>(*(a+2)) - 48) * 10 + (static_cast<char>(*(a+3)) - 48)) * scale;
111 }
112
113 };
114 static push_4fdig g_push_4fdig;
115
116 struct push_5fdig
117 {
118 void operator()(const char* a, const char* b, uint32_t* n) const
119 {
120 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
121 const double scale = sec_scale(9-5); //nano-sec
122 #else
123 const double scale = sec_scale(6-5); //micro-sec
124 #endif
125
126 *n = ((static_cast<char>(*a) - 48) * 10000 + (static_cast<char>(*(a+1)) - 48) * 1000 + (static_cast<char>(*(a+2)) - 48) * 100 + (static_cast<char>(*(a+3)) - 48) * 10 + (static_cast<char>(*(a+4)) - 48)) * scale;
127 }
128
129 };
130 static push_5fdig g_push_5fdig;
131
132 struct push_6fdig
133 {
134 void operator()(const char* a, const char* b, uint32_t* n) const
135 {
136 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
137 const double scale = sec_scale(9-6); //nano-sec
138 #else
139 const double scale = sec_scale(6-6); //micro-sec
140 #endif
141
142 *n = ((static_cast<char>(*a) - 48) * 100000 + (static_cast<char>(*(a+1)) - 48) * 10000 + (static_cast<char>(*(a+2)) - 48) * 1000 + (static_cast<char>(*(a+3)) - 48) * 100 + (static_cast<char>(*(a+4)) - 48) * 10 + (static_cast<char>(*(a+5)) - 48)) * scale;
143 }
144
145 };
146 static push_6fdig g_push_6fdig;
147
148 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
149 struct push_7fdig
150 {
151 void operator()(const char* a, const char* b, uint32_t* n) const
152 {
153 const double scale = sec_scale(9-7); //nano-sec
154 *n = ((static_cast<char>(*a) - 48) * 1000000 + (static_cast<char>(*(a+1)) - 48) * 100000 + (static_cast<char>(*(a+2)) - 48) * 10000 + (static_cast<char>(*(a+3)) - 48) * 1000 + (static_cast<char>(*(a+4)) - 48) * 100 + (static_cast<char>(*(a+5)) - 48) * 10 + (static_cast<char>(*(a+6)) - 48)) * scale;
155 }
156
157 };
158 static push_7fdig g_push_7fdig;
159
160 struct push_8fdig
161 {
162 void operator()(const char* a, const char* b, uint32_t* n) const
163 {
164 const double scale = sec_scale(9-8); //nano-sec
165 *n = ((static_cast<char>(*a) - 48) * 10000000 + (static_cast<char>(*(a+1)) - 48) * 1000000 + (static_cast<char>(*(a+2)) - 48) * 100000 + (static_cast<char>(*(a+3)) - 48) * 10000 + (static_cast<char>(*(a+4)) - 48) * 1000 + (static_cast<char>(*(a+5)) - 48) * 100 + (static_cast<char>(*(a+6)) - 48) * 10 + (static_cast<char>(*(a+7)) - 48)) * scale;
166 }
167
168 };
169 static push_8fdig g_push_8fdig;
170
171 struct push_9fdig
172 {
173 void operator()(const char* a, const char* b, uint32_t* n) const
174 {
175 const double scale = sec_scale(9-9); //nano-sec
176 *n = ((static_cast<char>(*a) - 48) * 100000000 + (static_cast<char>(*(a+1)) - 48) * 10000000 + (static_cast<char>(*(a+2)) - 48) * 1000000 + (static_cast<char>(*(a+3)) - 48) * 100000 + (static_cast<char>(*(a+4)) - 48) * 10000 + (static_cast<char>(*(a+5)) - 48) * 1000 + (static_cast<char>(*(a+6)) - 48) * 100 + (static_cast<char>(*(a+7)) - 48) * 10 + (static_cast<char>(*(a+8)) - 48)) * scale;
177 }
178
179 };
180 static push_9fdig g_push_9fdig;
181 #endif
182
183 enum class s3select_func_En_t {ADD,
184 SUM,
185 AVG,
186 MIN,
187 MAX,
188 COUNT,
189 TO_INT,
190 TO_FLOAT,
191 TO_TIMESTAMP,
192 TO_STRING_CONSTANT,
193 TO_STRING_DYNAMIC,
194 TO_BOOL,
195 SUBSTR,
196 EXTRACT_YEAR,
197 EXTRACT_MONTH,
198 EXTRACT_DAY,
199 EXTRACT_HOUR,
200 EXTRACT_MINUTE,
201 EXTRACT_SECOND,
202 EXTRACT_WEEK,
203 EXTRACT_TIMEZONE_HOUR,
204 EXTRACT_TIMEZONE_MINUTE,
205 DATE_ADD_YEAR,
206 DATE_ADD_MONTH,
207 DATE_ADD_DAY,
208 DATE_ADD_HOUR,
209 DATE_ADD_MINUTE,
210 DATE_ADD_SECOND,
211 DATE_DIFF_YEAR,
212 DATE_DIFF_MONTH,
213 DATE_DIFF_DAY,
214 DATE_DIFF_HOUR,
215 DATE_DIFF_MINUTE,
216 DATE_DIFF_SECOND,
217 UTCNOW,
218 LENGTH,
219 LOWER,
220 UPPER,
221 NULLIF,
222 BETWEEN,
223 NOT_BETWEEN,
224 IS_NULL,
225 IS_NOT_NULL,
226 IN,
227 LIKE,
228 VERSION,
229 CASE_WHEN_ELSE,
230 WHEN_THEN,
231 WHEN_VALUE_THEN,
232 COALESCE,
233 STRING,
234 TRIM,
235 LEADING,
236 TRAILING,
237 DECIMAL_OPERATOR,
238 CAST_TO_DECIMAL,
239 ENGINE_VERSION
240 };
241
242
243 class s3select_functions
244 {
245
246 private:
247
248 using FunctionLibrary = std::map<std::string, s3select_func_En_t>;
249 s3select_allocator* m_s3select_allocator;
250 std::set<base_statement*>* m_ast_nodes_for_cleanup;
251
252 const FunctionLibrary m_functions_library =
253 {
254 {"add", s3select_func_En_t::ADD},
255 {"sum", s3select_func_En_t::SUM},
256 {"avg", s3select_func_En_t::AVG},
257 {"count", s3select_func_En_t::COUNT},
258 {"min", s3select_func_En_t::MIN},
259 {"max", s3select_func_En_t::MAX},
260 {"int", s3select_func_En_t::TO_INT},
261 {"float", s3select_func_En_t::TO_FLOAT},
262 {"substring", s3select_func_En_t::SUBSTR},
263 {"to_timestamp", s3select_func_En_t::TO_TIMESTAMP},
264 {"#to_string_constant#",s3select_func_En_t::TO_STRING_CONSTANT},
265 {"#to_string_dynamic#",s3select_func_En_t::TO_STRING_DYNAMIC},
266 {"to_bool", s3select_func_En_t::TO_BOOL},
267 {"#extract_year#", s3select_func_En_t::EXTRACT_YEAR},
268 {"#extract_month#", s3select_func_En_t::EXTRACT_MONTH},
269 {"#extract_day#", s3select_func_En_t::EXTRACT_DAY},
270 {"#extract_hour#", s3select_func_En_t::EXTRACT_HOUR},
271 {"#extract_minute#", s3select_func_En_t::EXTRACT_MINUTE},
272 {"#extract_second#", s3select_func_En_t::EXTRACT_SECOND},
273 {"#extract_week#", s3select_func_En_t::EXTRACT_WEEK},
274 {"#extract_timezone_hour#", s3select_func_En_t::EXTRACT_TIMEZONE_HOUR},
275 {"#extract_timezone_minute#", s3select_func_En_t::EXTRACT_TIMEZONE_MINUTE},
276 {"#dateadd_year#", s3select_func_En_t::DATE_ADD_YEAR},
277 {"#dateadd_month#", s3select_func_En_t::DATE_ADD_MONTH},
278 {"#dateadd_day#", s3select_func_En_t::DATE_ADD_DAY},
279 {"#dateadd_hour#", s3select_func_En_t::DATE_ADD_HOUR},
280 {"#dateadd_minute#", s3select_func_En_t::DATE_ADD_MINUTE},
281 {"#dateadd_second#", s3select_func_En_t::DATE_ADD_SECOND},
282 {"#datediff_year#", s3select_func_En_t::DATE_DIFF_YEAR},
283 {"#datediff_month#", s3select_func_En_t::DATE_DIFF_MONTH},
284 {"#datediff_day#", s3select_func_En_t::DATE_DIFF_DAY},
285 {"#datediff_hour#", s3select_func_En_t::DATE_DIFF_HOUR},
286 {"#datediff_minute#", s3select_func_En_t::DATE_DIFF_MINUTE},
287 {"#datediff_second#", s3select_func_En_t::DATE_DIFF_SECOND},
288 {"utcnow", s3select_func_En_t::UTCNOW},
289 {"character_length", s3select_func_En_t::LENGTH},
290 {"char_length", s3select_func_En_t::LENGTH},
291 {"lower", s3select_func_En_t::LOWER},
292 {"upper", s3select_func_En_t::UPPER},
293 {"nullif", s3select_func_En_t::NULLIF},
294 {"#between#", s3select_func_En_t::BETWEEN},
295 {"#not_between#", s3select_func_En_t::NOT_BETWEEN},
296 {"#is_null#", s3select_func_En_t::IS_NULL},
297 {"#is_not_null#", s3select_func_En_t::IS_NOT_NULL},
298 {"#in_predicate#", s3select_func_En_t::IN},
299 {"#like_predicate#", s3select_func_En_t::LIKE},
300 {"version", s3select_func_En_t::VERSION},
301 {"#when-then#", s3select_func_En_t::WHEN_THEN},
302 {"#when-value-then#", s3select_func_En_t::WHEN_VALUE_THEN},
303 {"#case-when-else#", s3select_func_En_t::CASE_WHEN_ELSE},
304 {"coalesce", s3select_func_En_t::COALESCE},
305 {"string", s3select_func_En_t::STRING},
306 {"#trim#", s3select_func_En_t::TRIM},
307 {"#leading#", s3select_func_En_t::LEADING},
308 {"#trailing#", s3select_func_En_t::TRAILING},
309 {"#decimal_operator#", s3select_func_En_t::DECIMAL_OPERATOR},
310 {"#cast_as_decimal#", s3select_func_En_t::CAST_TO_DECIMAL},
311 {"engine_version", s3select_func_En_t::ENGINE_VERSION}
312
313 };
314
315 public:
316
317 base_function* create(std::string_view fn_name,const bs_stmt_vec_t&);
318
319 s3select_functions():m_s3select_allocator(nullptr),m_ast_nodes_for_cleanup(nullptr)
320 {
321 }
322
323
324 void setAllocator(s3select_allocator* alloc)
325 {
326 m_s3select_allocator = alloc;
327 }
328
329 void set_AST_nodes_for_cleanup(std::set<base_statement*>* ast_for_cleanup)
330 {
331 m_ast_nodes_for_cleanup = ast_for_cleanup;
332 }
333
334 s3select_allocator* getAllocator()
335 {
336 return m_s3select_allocator;
337 }
338
339 void clean();
340
341 };
342
343 class __function : public base_statement
344 {
345
346 private:
347 bs_stmt_vec_t arguments;
348 std::basic_string<char,std::char_traits<char>,ChunkAllocator<char,256>> name;
349 base_function* m_func_impl;
350 s3select_functions* m_s3select_functions;
351 variable m_result;
352 bool m_is_aggregate_function;
353
354 void _resolve_name()
355 {
356 if (m_func_impl)
357 {
358 return;
359 }
360
361 auto string_to_lower = [](std::basic_string<char,std::char_traits<char>,ChunkAllocator<char,256>> s)
362 {
363 std::transform(s.begin(),s.end(),s.begin(),[](unsigned char c){ return std::tolower(c); });
364 return s;
365 };
366
367 //the function name is converted into lowercase to enable case-insensitive
368 base_function* f = m_s3select_functions->create(string_to_lower(name),arguments);
369 if (!f)
370 {
371 throw base_s3select_exception("function not found", base_s3select_exception::s3select_exp_en_t::FATAL); //should abort query
372 }
373 m_func_impl = f;
374 m_is_aggregate_function= m_func_impl->is_aggregate();
375 f->set_function_name(name.c_str());
376 }
377
378 public:
379
380 base_function* impl()
381 {
382 return m_func_impl;
383 }
384
385 void traverse_and_apply(scratch_area* sa, projection_alias* pa,bool json_statement) override
386 {
387 m_scratch = sa;
388 m_aliases = pa;
389 m_json_statement = json_statement;
390 for (base_statement* ba : arguments)
391 {
392 ba->traverse_and_apply(sa, pa, json_statement);
393 }
394 }
395
396 void set_last_call() override
397 {//it cover the use-case where aggregation function is an argument in non-aggregate function.
398 is_last_call = true;
399 for (auto& ba : arguments)
400 {
401 ba->set_last_call();
402 }
403 }
404
405 void set_skip_non_aggregate(bool skip_non_aggregate_op) override
406 {//it cover the use-case where aggregation function is an argument in non-aggregate function.
407 m_skip_non_aggregate_op = skip_non_aggregate_op;
408 for (auto& ba : arguments)
409 {
410 ba->set_skip_non_aggregate(m_skip_non_aggregate_op);
411 }
412 }
413
414 bool is_aggregate() const override
415 {
416 return m_is_aggregate_function;
417 }
418
419 bool semantic() override
420 {
421 return true;
422 }
423
424 __function(const char* fname, s3select_functions* s3f) : name(fname), m_func_impl(nullptr), m_s3select_functions(s3f),m_is_aggregate_function(false){set_operator_name(fname);}
425
426 value& eval() override
427 {
428 return eval_internal();
429 }
430
431 value& eval_internal() override
432 {
433
434 _resolve_name();//node is "resolved" (function is created) upon first call/first row.
435
436 if (is_last_call == false)
437 {//all rows prior to last row
438 if(m_skip_non_aggregate_op == false || is_aggregate() == true)
439 {
440 (*m_func_impl)(&arguments, &m_result);
441 }
442 else if(m_skip_non_aggregate_op == true)
443 {
444 for(auto& p : arguments)
445 {//evaluating the arguments (not the function itself, which is a non-aggregate function)
446 //i.e. in the following use case substring( , sum(),count() ) ; only sum() and count() are evaluated.
447 p->eval();
448 }
449 }
450 }
451 else
452 {//on the last row, the aggregate function is finalized,
453 //and non-aggregate function is evaluated with the result of aggregate function.
454 if(is_aggregate())
455 (*m_func_impl).get_aggregate_result(&m_result);
456 else
457 (*m_func_impl)(&arguments, &m_result);
458 }
459
460 return m_result.get_value();
461 }
462
463 void resolve_node() override
464 {
465 _resolve_name();
466
467 for (auto& arg : arguments)
468 {
469 arg->resolve_node();
470 }
471 }
472
473 std::string print(int ident) override
474 {
475 return std::string(0);
476 }
477
478 void push_argument(base_statement* arg)
479 {
480 arguments.push_back(arg);
481 }
482
483
484 bs_stmt_vec_t& get_arguments()
485 {
486 return arguments;
487 }
488
489 virtual ~__function() = default;
490 };
491
492 /*
493 s3-select function defintions
494 */
495 struct _fn_add : public base_function
496 {
497
498 value var_result;
499
500 bool operator()(bs_stmt_vec_t* args, variable* result) override
501 {
502 check_args_size(args,2);
503
504 auto iter = args->begin();
505 base_statement* x = *iter;
506 iter++;
507 base_statement* y = *iter;
508
509 var_result = x->eval() + y->eval();
510
511 *result = var_result;
512
513 return true;
514 }
515 };
516
517 struct _fn_sum : public base_function
518 {
519
520 value sum;
521
522 _fn_sum()
523 {
524 aggregate = true;
525 sum.setnull();
526 }
527
528 bool operator()(bs_stmt_vec_t* args, variable* result) override
529 {
530 check_args_size(args,1);
531
532 auto iter = args->begin();
533 base_statement* x = *iter;
534
535 try
536 {
537 if(sum.is_null())
538 {
539 sum = 0;
540 }
541 sum = sum + x->eval();
542 }
543 catch (base_s3select_exception& e)
544 {
545 if (e.severity() == base_s3select_exception::s3select_exp_en_t::FATAL)
546 {
547 throw;
548 }
549 }
550
551 return true;
552 }
553
554 void get_aggregate_result(variable* result) override
555 {
556 *result = sum ;
557 }
558 };
559
560 struct _fn_count : public base_function
561 {
562
563 int64_t count;
564
565 _fn_count():count(0)
566 {
567 aggregate=true;
568 }
569
570 bool operator()(bs_stmt_vec_t* args, variable* result) override
571 {
572 if (args->size())
573 {// in case argument exist, should count only non-null.
574 auto iter = args->begin();
575 base_statement* x = *iter;
576
577 if(!x->eval().is_null())
578 {
579 count += 1;
580 }
581 }
582 else
583 {//in case of non-arguments // count()
584 count += 1;
585 }
586
587 return true;
588 }
589
590 void get_aggregate_result(variable* result) override
591 {
592 result->set_value(count);
593 }
594
595 };
596
597 struct _fn_avg : public base_function
598 {
599
600 value sum;
601 value count{0.0};
602
603 _fn_avg() : sum(0) { aggregate = true; }
604
605 bool operator()(bs_stmt_vec_t* args, variable *result) override
606 {
607 check_args_size(args,1);
608
609 auto iter = args->begin();
610 base_statement *x = *iter;
611
612 try
613 {
614 sum = sum + x->eval();
615 count++;
616 }
617 catch (base_s3select_exception &e)
618 {
619 throw base_s3select_exception(e.what());
620 }
621
622 return true;
623 }
624
625 void get_aggregate_result(variable *result) override
626 {
627 if(count == static_cast<value>(0)) {
628 value v_null;
629 v_null.setnull();
630 *result=v_null;
631 } else {
632 *result = sum/count ;
633 }
634 }
635 };
636
637 struct _fn_min : public base_function
638 {
639
640 value min;
641
642 _fn_min()
643 {
644 aggregate=true;
645 min.setnull();
646 }
647
648 bool operator()(bs_stmt_vec_t* args, variable* result) override
649 {
650 check_args_size(args,1);
651
652 auto iter = args->begin();
653 base_statement* x = *iter;
654
655 if(min.is_null() || min > x->eval())
656 {
657 min=x->eval();
658 }
659
660 return true;
661 }
662
663 void get_aggregate_result(variable* result) override
664 {
665 *result = min;
666 }
667
668 };
669
670 struct _fn_max : public base_function
671 {
672
673 value max;
674
675 _fn_max()
676 {
677 aggregate=true;
678 max.setnull();
679 }
680
681 bool operator()(bs_stmt_vec_t* args, variable* result) override
682 {
683 check_args_size(args,1);
684
685 auto iter = args->begin();
686 base_statement* x = *iter;
687
688 if(max.is_null() || max < x->eval())
689 {
690 max=x->eval();
691 }
692
693 return true;
694 }
695
696 void get_aggregate_result(variable* result) override
697 {
698 *result = max;
699 }
700
701 };
702
703 struct _fn_to_int : public base_function
704 {
705 value var_result;
706
707 bool operator()(bs_stmt_vec_t* args, variable* result) override
708 {
709 check_args_size(args,1);
710
711 value v = (*args->begin())->eval();
712
713 switch (v.type) {
714
715 case value::value_En_t::STRING:
716 {
717 char* pend;
718 errno = 0;
719 int64_t i= strtol(v.str(), &pend, 10);
720 if (errno == ERANGE) {
721 throw base_s3select_exception("converted value would fall out of the range of the result type!");
722 }
723 if (pend == v.str()) {
724 // no number found
725 throw base_s3select_exception("text cannot be converted to a number");
726 }
727 if (*pend) {
728 throw base_s3select_exception("extra characters after the number");
729 }
730
731 var_result = i;
732 }
733 break;
734
735 case value::value_En_t::FLOAT:
736 var_result = static_cast<int64_t>(v.dbl());
737 break;
738
739 default:
740 var_result = v.i64();
741 break;
742 }
743
744 *result = var_result;
745 return true;
746 }
747
748 };
749
750 struct _fn_to_float : public base_function
751 {
752 value var_result;
753
754 bool operator()(bs_stmt_vec_t* args, variable* result) override
755 {
756 check_args_size(args,1);
757
758 value v = (*args->begin())->eval();
759
760 switch (v.type) {
761
762 case value::value_En_t::STRING:
763 {
764 char* pend;
765 double d = strtod(v.str(), &pend);
766 if (errno == ERANGE) {
767 throw base_s3select_exception("converted value would fall out of the range of the result type!");
768 }
769 if (pend == v.str()) {
770 // no number found
771 throw base_s3select_exception("text cannot be converted to a number");
772 }
773 if (*pend) {
774 throw base_s3select_exception("extra characters after the number");
775 }
776
777 var_result = d;
778 }
779 break;
780
781 case value::value_En_t::FLOAT:
782 var_result = v.dbl();
783 break;
784
785 default:
786 var_result = v.i64();
787 break;
788 }
789
790 *result = var_result;
791 return true;
792 }
793
794 };
795
796 struct _fn_to_timestamp : public base_function
797 {
798 bsc::rule<> date_separator = bsc::ch_p("-");
799 bsc::rule<> time_separator = bsc::ch_p(":");
800 bsc::rule<> nano_sec_separator = bsc::ch_p(".");
801 bsc::rule<> delimiter = bsc::ch_p("T");
802 bsc::rule<> zero_timezone = bsc::ch_p("Z");
803 bsc::rule<> timezone_sign = bsc::ch_p("-") | bsc::ch_p("+");
804
805 uint32_t yr = 1700, mo = 1, dy = 1;
806 bsc::rule<> dig4 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
807 bsc::rule<> dig2 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p];
808
809 bsc::rule<> d_yyyy_dig = ((dig4[BOOST_BIND_ACTION_PARAM(push_4dig, &yr)]) >> *(delimiter));
810 bsc::rule<> d_yyyymmdd_dig = ((dig4[BOOST_BIND_ACTION_PARAM(push_4dig, &yr)]) >> *(date_separator)
811 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mo)]) >> *(date_separator)
812 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &dy)]) >> *(delimiter));
813
814 uint32_t hr = 0, mn = 0, sc = 0, frac_sec = 0, tz_hr = 0, tz_mn = 0, sign = 0, tm_zone = '0';
815
816 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
817 bsc::rule<> fdig9 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
818 bsc::rule<> fdig8 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
819 bsc::rule<> fdig7 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
820 #endif
821
822 bsc::rule<> fdig6 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
823 bsc::rule<> fdig5 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
824 bsc::rule<> fdig4 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
825 bsc::rule<> fdig3 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p >> bsc::digit_p];
826 bsc::rule<> fdig2 = bsc::lexeme_d[bsc::digit_p >> bsc::digit_p];
827 bsc::rule<> fdig1 = bsc::lexeme_d[bsc::digit_p];
828
829 bsc::rule<> d_timezone_dig = ((timezone_sign[BOOST_BIND_ACTION_PARAM(push_char, &sign)]) >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &tz_hr)]) >> *(time_separator)
830 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &tz_mn)])) | (zero_timezone[BOOST_BIND_ACTION_PARAM(push_char, &tm_zone)]);
831
832 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
833 bsc::rule<> fraction_sec = (fdig9[BOOST_BIND_ACTION_PARAM(push_9fdig, &frac_sec)]) |
834 (fdig8[BOOST_BIND_ACTION_PARAM(push_8fdig, &frac_sec)]) |
835 (fdig7[BOOST_BIND_ACTION_PARAM(push_7fdig, &frac_sec)]) |
836 (fdig6[BOOST_BIND_ACTION_PARAM(push_6fdig, &frac_sec)]) |
837 (fdig5[BOOST_BIND_ACTION_PARAM(push_5fdig, &frac_sec)]) |
838 (fdig4[BOOST_BIND_ACTION_PARAM(push_4fdig, &frac_sec)]) |
839 (fdig3[BOOST_BIND_ACTION_PARAM(push_3fdig, &frac_sec)]) |
840 (fdig2[BOOST_BIND_ACTION_PARAM(push_2fdig, &frac_sec)]) |
841 (fdig1[BOOST_BIND_ACTION_PARAM(push_1fdig, &frac_sec)]);
842 #else
843 bsc::rule<> fraction_sec = (fdig6[BOOST_BIND_ACTION_PARAM(push_6fdig, &frac_sec)]) |
844 (fdig5[BOOST_BIND_ACTION_PARAM(push_5fdig, &frac_sec)]) |
845 (fdig4[BOOST_BIND_ACTION_PARAM(push_4fdig, &frac_sec)]) |
846 (fdig3[BOOST_BIND_ACTION_PARAM(push_3fdig, &frac_sec)]) |
847 (fdig2[BOOST_BIND_ACTION_PARAM(push_2fdig, &frac_sec)]) |
848 (fdig1[BOOST_BIND_ACTION_PARAM(push_1fdig, &frac_sec)]);
849 #endif
850
851 bsc::rule<> d_time_dig = ((dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &hr)]) >> *(time_separator)
852 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mn)]) >> *(time_separator)
853 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &sc)]) >> *(nano_sec_separator)
854 >> (fraction_sec) >> (d_timezone_dig)) |
855 ((dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &hr)]) >> *(time_separator)
856 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mn)]) >> *(time_separator)
857 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &sc)]) >> (d_timezone_dig)) |
858 ((dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &hr)]) >> *(time_separator)
859 >> (dig2[BOOST_BIND_ACTION_PARAM(push_2dig, &mn)]) >> (d_timezone_dig));
860
861 bsc::rule<> d_date_time = ((d_yyyymmdd_dig) >> (d_time_dig)) | (d_yyyymmdd_dig) | (d_yyyy_dig);
862
863 timestamp_t tmstmp;
864 value v_str;
865 int tz_hour, tz_min;
866
867 bool datetime_validation()
868 {
869 if (yr >= 1400 && yr <= 9999 && mo >= 1 && mo <= 12 && dy >= 1 && hr < 24 && mn < 60 && sc < 60 && tz_hour <= 14 && tz_hour >= -12 && tz_mn < 60)
870 {
871 if ( (tz_hour == -12 || tz_hour == 14) && tz_mn > 0)
872 return false;
873
874 switch (mo)
875 {
876 case 1:
877 case 3:
878 case 5:
879 case 7:
880 case 8:
881 case 10:
882 case 12:
883 if(dy <= 31)
884 {
885 return true;
886 }
887 break;
888 case 4:
889 case 6:
890 case 9:
891 case 11:
892 if(dy <= 30)
893 {
894 return true;
895 }
896 break;
897 case 2:
898 if(dy >= 28)
899 {
900 if(!(yr % 4) == 0 && dy > 28)
901 {
902 return false;
903 }
904 else if(!(yr % 100) == 0 && dy <= 29)
905 {
906 return true;
907 }
908 else if(!(yr % 400) == 0 && dy > 28)
909 {
910 return false;
911 }
912 else
913 {
914 return true;
915 }
916 }
917 else
918 {
919 return true;
920 }
921 break;
922 default:
923 return false;
924 break;
925 }
926 }
927 return false;
928 }
929
930 bool operator()(bs_stmt_vec_t* args, variable* result) override
931 {
932
933 hr = 0;
934 mn = 0;
935 sc = 0;
936 frac_sec = 0;
937 tz_hr = 0;
938 tz_mn = 0;
939 tm_zone = '0';
940
941 auto iter = args->begin();
942 int args_size = args->size();
943
944 if (args_size != 1)
945 {
946 throw base_s3select_exception("to_timestamp should have one parameter");
947 }
948
949 base_statement* str = *iter;
950
951 v_str = str->eval();
952
953 if (v_str.type != value::value_En_t::STRING)
954 {
955 throw base_s3select_exception("to_timestamp first argument must be string"); //can skip current row
956 }
957
958 bsc::parse_info<> info_dig = bsc::parse(v_str.str(), d_date_time);
959
960 tz_hour = tz_hr;
961 tz_min = tz_mn;
962 if ((char)sign == '-')
963 {
964 tz_hour *= -1;
965 tz_min *= -1;
966 }
967
968 if(datetime_validation()==false or !info_dig.full)
969 {
970 throw base_s3select_exception("input date-time is illegal");
971 }
972
973 boost::posix_time::ptime new_ptime;
974
975 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
976 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mo, dy),
977 boost::posix_time::hours(hr) +
978 boost::posix_time::minutes(mn) +
979 boost::posix_time::seconds(sc) +
980 boost::posix_time::nanoseconds(frac_sec));
981 #else
982 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mo, dy),
983 boost::posix_time::hours(hr) +
984 boost::posix_time::minutes(mn) +
985 boost::posix_time::seconds(sc) +
986 boost::posix_time::microseconds(frac_sec));
987 #endif
988
989 tmstmp = std::make_tuple(new_ptime, boost::posix_time::time_duration(tz_hour, tz_min, 0), (char)tm_zone == 'Z');
990
991 result->set_value(&tmstmp);
992
993 return true;
994 }
995
996 };
997
998 struct _fn_to_string_constant : public base_timestamp_to_string
999 {
1000 bool operator()(bs_stmt_vec_t* args, variable* result) override
1001 {
1002 param_validation(args);
1003
1004 if (!initialized)
1005 {
1006 prepare_to_string_vector(print_vector, para);
1007 initialized = true;
1008 }
1009
1010 std::string result_ = execute_to_string(print_vector, para);
1011
1012 result->set_value(result_.c_str());
1013 return true;
1014 }
1015 };
1016
1017 struct _fn_to_string_dynamic : public base_timestamp_to_string
1018 {
1019 bool operator()(bs_stmt_vec_t* args, variable* result) override
1020 {
1021 param_validation(args);
1022
1023 print_vector.clear();
1024 para.clear();
1025
1026 prepare_to_string_vector(print_vector, para);
1027
1028 std::string result_ = execute_to_string(print_vector, para);
1029
1030 result->set_value(result_.c_str());
1031 return true;
1032 }
1033 };
1034
1035 struct _fn_extract_year_from_timestamp : public base_date_extract
1036 {
1037 bool operator()(bs_stmt_vec_t* args, variable* result) override
1038 {
1039 param_validation(args);
1040
1041 result->set_value( (int64_t)new_ptime.date().year());
1042 return true;
1043 }
1044 };
1045
1046 struct _fn_extract_month_from_timestamp : public base_date_extract
1047 {
1048 bool operator()(bs_stmt_vec_t* args, variable* result) override
1049 {
1050 param_validation(args);
1051
1052 result->set_value( (int64_t)new_ptime.date().month());
1053 return true;
1054 }
1055 };
1056
1057 struct _fn_extract_day_from_timestamp : public base_date_extract
1058 {
1059 bool operator()(bs_stmt_vec_t* args, variable* result) override
1060 {
1061 param_validation(args);
1062
1063 result->set_value( (int64_t)new_ptime.date().day());
1064 return true;
1065 }
1066 };
1067
1068 struct _fn_extract_hour_from_timestamp : public base_date_extract
1069 {
1070 bool operator()(bs_stmt_vec_t* args, variable* result) override
1071 {
1072 param_validation(args);
1073
1074 result->set_value( (int64_t)new_ptime.time_of_day().hours());
1075 return true;
1076 }
1077 };
1078
1079 struct _fn_extract_minute_from_timestamp : public base_date_extract
1080 {
1081 bool operator()(bs_stmt_vec_t* args, variable* result) override
1082 {
1083 param_validation(args);
1084
1085 result->set_value( (int64_t)new_ptime.time_of_day().minutes());
1086 return true;
1087 }
1088 };
1089
1090 struct _fn_extract_second_from_timestamp : public base_date_extract
1091 {
1092 bool operator()(bs_stmt_vec_t* args, variable* result) override
1093 {
1094 param_validation(args);
1095
1096 result->set_value( (int64_t)new_ptime.time_of_day().seconds());
1097 return true;
1098 }
1099 };
1100
1101 struct _fn_extract_week_from_timestamp : public base_date_extract
1102 {
1103 bool operator()(bs_stmt_vec_t* args, variable* result) override
1104 {
1105 param_validation(args);
1106
1107 result->set_value( (int64_t)new_ptime.date().week_number());
1108 return true;
1109 }
1110 };
1111
1112 struct _fn_extract_tz_hour_from_timestamp : public base_date_extract
1113 {
1114 bool operator()(bs_stmt_vec_t* args, variable* result) override
1115 {
1116 param_validation(args);
1117
1118 result->set_value((int64_t)td.hours());
1119 return true;
1120 }
1121 };
1122
1123 struct _fn_extract_tz_minute_from_timestamp : public base_date_extract
1124 {
1125 bool operator()(bs_stmt_vec_t* args, variable* result) override
1126 {
1127 param_validation(args);
1128
1129 result->set_value((int64_t)td.minutes());
1130 return true;
1131 }
1132 };
1133
1134 struct _fn_diff_year_timestamp : public base_date_diff
1135 {
1136 bool operator()(bs_stmt_vec_t* args, variable* result) override
1137 {
1138 param_validation(args);
1139
1140 int year1 = ptime1.date().year();
1141 int year2 = ptime2.date().year();
1142 boost::posix_time::time_duration time1 = boost::posix_time::time_duration(
1143 ptime1.time_of_day().hours(), ptime1.time_of_day().minutes(),
1144 ptime1.time_of_day().seconds());
1145 boost::posix_time::time_duration time2 = boost::posix_time::time_duration(
1146 ptime2.time_of_day().hours(), ptime2.time_of_day().minutes(),
1147 ptime2.time_of_day().seconds());
1148
1149 if (year2 > year1 && ((ptime2.date().day_of_year() < ptime1.date().day_of_year()) ||
1150 (ptime2.date().day_of_year() == ptime1.date().day_of_year() && time2 < time1)))
1151 {
1152 year2 -= 1;
1153 }
1154 else if (year2 < year1 && ((ptime2.date().day_of_year() > ptime1.date().day_of_year()) ||
1155 (ptime2.date().day_of_year() == ptime1.date().day_of_year() && time2 > time1)))
1156 {
1157 year2 += 1;
1158 }
1159
1160 int64_t yr = year2 - year1;
1161 result->set_value( yr );
1162 return true;
1163 }
1164 };
1165
1166 struct _fn_diff_month_timestamp : public base_date_diff
1167 {
1168 bool operator()(bs_stmt_vec_t* args, variable* result) override
1169 {
1170 param_validation(args);
1171
1172 int year1 = ptime1.date().year();
1173 int year2 = ptime2.date().year();
1174 int mon1 = ptime1.date().month();
1175 int mon2 = ptime2.date().month();
1176 boost::posix_time::time_duration time1 = boost::posix_time::time_duration(
1177 ptime1.time_of_day().hours(), ptime1.time_of_day().minutes(),
1178 ptime1.time_of_day().seconds());
1179 boost::posix_time::time_duration time2 = boost::posix_time::time_duration(
1180 ptime2.time_of_day().hours(), ptime2.time_of_day().minutes(),
1181 ptime2.time_of_day().seconds());
1182
1183 if (year2 > year1)
1184 {
1185 if (ptime2.date().day() < ptime1.date().day() || (ptime2.date().day() == ptime1.date().day() && time2 < time1))
1186 {
1187 mon2 -= 1;
1188 }
1189
1190 if (ptime2.date().month() < ptime1.date().month())
1191 {
1192 mon2 += 12;
1193 year2 -= 1;
1194 }
1195 }
1196 else if (year2 < year1)
1197 {
1198 if (ptime2.date().day() > ptime1.date().day() || (ptime2.date().day() == ptime1.date().day() && time2 > time1))
1199 {
1200 mon1 -= 1;
1201 }
1202
1203 if (ptime2.date().month() > ptime1.date().month())
1204 {
1205 mon1 += 12;
1206 year1 -= 1;
1207 }
1208 }
1209
1210 int64_t mon_diff = (year2 - year1) * 12 + mon2 - mon1;
1211
1212 result->set_value(mon_diff);
1213 return true;
1214 }
1215 };
1216
1217 struct _fn_diff_day_timestamp : public base_date_diff
1218 {
1219 bool operator()(bs_stmt_vec_t* args, variable* result) override
1220 {
1221 param_validation(args);
1222
1223 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1224 int total_seconds = (((td_res.hours() * 60) + td_res.minutes()) * 60) + td_res.seconds();
1225 int64_t days = total_seconds / (24 * 3600);
1226
1227 result->set_value(days);
1228 return true;
1229 }
1230 };
1231
1232 struct _fn_diff_hour_timestamp : public base_date_diff
1233 {
1234 bool operator()(bs_stmt_vec_t* args, variable* result) override
1235 {
1236 param_validation(args);
1237
1238 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1239 result->set_value((int64_t)td_res.hours());
1240 return true;
1241 }
1242 };
1243
1244 struct _fn_diff_minute_timestamp : public base_date_diff
1245 {
1246 bool operator()(bs_stmt_vec_t* args, variable* result) override
1247 {
1248 param_validation(args);
1249
1250 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1251 result->set_value((int64_t)((td_res.hours() * 60) + td_res.minutes()));
1252 return true;
1253 }
1254 };
1255
1256 struct _fn_diff_second_timestamp : public base_date_diff
1257 {
1258 bool operator()(bs_stmt_vec_t* args, variable* result) override
1259 {
1260 param_validation(args);
1261
1262 boost::posix_time::time_duration td_res = ptime2 - ptime1;
1263 result->set_value((int64_t)((((td_res.hours() * 60) + td_res.minutes()) * 60) + td_res.seconds()));
1264 return true;
1265 }
1266 };
1267
1268 struct _fn_add_year_to_timestamp : public base_date_add
1269 {
1270 bool operator()(bs_stmt_vec_t* args, variable* result) override
1271 {
1272 param_validation(args);
1273
1274 new_ptime += boost::gregorian::years( val_quantity.i64() );
1275 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1276 result->set_value( &new_tmstmp );
1277 return true;
1278 }
1279 };
1280
1281 struct _fn_add_month_to_timestamp : public base_date_add
1282 {
1283 bool operator()(bs_stmt_vec_t* args, variable* result) override
1284 {
1285 param_validation(args);
1286
1287 int yr, mn, dy, quant;
1288 quant = val_quantity.i64();
1289 dy = new_ptime.date().day();
1290
1291 int temp = quant % 12;
1292 mn = new_ptime.date().month() + temp;
1293 temp = quant / 12;
1294 yr = new_ptime.date().year() + temp;
1295
1296 if (mn > 12)
1297 {
1298 yr += 1;
1299 temp = mn % 12;
1300 if (temp == 0)
1301 {
1302 temp = 12;
1303 }
1304 mn = temp;
1305 }
1306 else if (mn < 1)
1307 {
1308 yr -= 1;
1309 if (mn == 0)
1310 {
1311 mn = 12;
1312 }
1313 else
1314 {
1315 mn = 12 + mn;
1316 }
1317 }
1318
1319 if ((mn == 4 || mn == 6 || mn == 9 || mn == 11) && dy > 30)
1320 {
1321 dy = 30;
1322 }
1323 else if (mn == 2 && dy > 28)
1324 {
1325 if (!(yr % 4) == 0 || ((yr % 100) == 0 && !(yr % 400) == 0))
1326 {
1327 dy = 28;
1328 }
1329 else
1330 {
1331 dy = 29;
1332 }
1333 }
1334
1335 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
1336 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mn, dy),
1337 boost::posix_time::hours(new_ptime.time_of_day().hours()) +
1338 boost::posix_time::minutes(new_ptime.time_of_day().minutes()) +
1339 boost::posix_time::seconds(new_ptime.time_of_day().seconds()) +
1340 boost::posix_time::nanoseconds(new_ptime.time_of_day().fractional_seconds()));
1341 #else
1342 new_ptime = boost::posix_time::ptime(boost::gregorian::date(yr, mn, dy),
1343 boost::posix_time::hours(new_ptime.time_of_day().hours()) +
1344 boost::posix_time::minutes(new_ptime.time_of_day().minutes()) +
1345 boost::posix_time::seconds(new_ptime.time_of_day().seconds()) +
1346 boost::posix_time::microseconds(new_ptime.time_of_day().fractional_seconds()));
1347 #endif
1348
1349 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1350 result->set_value( &new_tmstmp );
1351 return true;
1352 }
1353 };
1354
1355 struct _fn_add_day_to_timestamp : public base_date_add
1356 {
1357 bool operator()(bs_stmt_vec_t* args, variable* result) override
1358 {
1359 param_validation(args);
1360
1361 new_ptime += boost::gregorian::days( val_quantity.i64() );
1362 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1363 result->set_value( &new_tmstmp );
1364 return true;
1365 }
1366 };
1367
1368 struct _fn_add_hour_to_timestamp : public base_date_add
1369 {
1370 bool operator()(bs_stmt_vec_t* args, variable* result) override
1371 {
1372 param_validation(args);
1373
1374 new_ptime += boost::posix_time::hours( val_quantity.i64() );
1375 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1376 result->set_value( &new_tmstmp );
1377 return true;
1378 }
1379 };
1380
1381 struct _fn_add_minute_to_timestamp : public base_date_add
1382 {
1383 bool operator()(bs_stmt_vec_t* args, variable* result) override
1384 {
1385 param_validation(args);
1386
1387 new_ptime += boost::posix_time::minutes( val_quantity.i64() );
1388 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1389 result->set_value( &new_tmstmp );
1390 return true;
1391 }
1392 };
1393
1394 struct _fn_add_second_to_timestamp : public base_date_add
1395 {
1396 bool operator()(bs_stmt_vec_t* args, variable* result) override
1397 {
1398 param_validation(args);
1399
1400 new_ptime += boost::posix_time::seconds( val_quantity.i64() );
1401 new_tmstmp = std::make_tuple(new_ptime, td, flag);
1402 result->set_value( &new_tmstmp );
1403 return true;
1404 }
1405 };
1406
1407 struct _fn_utcnow : public base_function
1408 {
1409 timestamp_t now_timestamp;
1410
1411 bool operator()(bs_stmt_vec_t* args, variable* result) override
1412 {
1413 int args_size = args->size();
1414
1415 if (args_size != 0)
1416 {
1417 throw base_s3select_exception("utcnow does not expect any parameters");
1418 }
1419
1420 boost::posix_time::ptime now_ptime = boost::posix_time::ptime( boost::posix_time::second_clock::universal_time());
1421 now_timestamp = std::make_tuple(now_ptime, boost::posix_time::time_duration(0, 0, 0), false);
1422 result->set_value( &now_timestamp );
1423
1424 return true;
1425 }
1426 };
1427
1428 struct _fn_between : public base_function
1429 {
1430
1431 value res;
1432
1433 bool operator()(bs_stmt_vec_t* args, variable* result) override
1434 {
1435 int args_size = args->size();
1436
1437
1438 if (args_size != 3)
1439 {
1440 throw base_s3select_exception("between operates on 3 expressions");//TODO FATAL
1441 }
1442
1443 auto iter = args->begin();
1444
1445 base_statement* second_expr = *iter;
1446 iter++;
1447 base_statement* first_expr = *iter;
1448 iter++;
1449 base_statement* main_expr = *iter;
1450
1451 value second_expr_val = second_expr->eval();
1452 value first_expr_val = first_expr->eval();
1453 value main_expr_val = main_expr->eval();
1454
1455 if ((second_expr_val.type == first_expr_val.type && first_expr_val.type == main_expr_val.type) || (second_expr_val.is_number() && first_expr_val.is_number() && main_expr_val.is_number()))
1456 {
1457 if((main_expr_val >= first_expr_val) && (main_expr_val <= second_expr_val)) {
1458 result->set_value(true);
1459 } else {
1460 result->set_value(false);
1461 }
1462 }
1463 return true;
1464 }
1465 };
1466
1467 struct _fn_not_between : public base_function
1468 {
1469
1470 value res;
1471 _fn_between between_op;
1472
1473 bool operator()(bs_stmt_vec_t* args, variable* result) override
1474 {
1475 between_op(args,result);
1476
1477 if (result->get_value().is_true() == 0) {
1478 result->set_value(true);
1479 } else {
1480 result->set_value(false);
1481 }
1482 return true;
1483 }
1484 };
1485
1486 static char s3select_ver[10]="41.a";
1487
1488 struct _fn_version : public base_function
1489 {
1490 value val; //TODO use git to generate sha1
1491 bool operator()(bs_stmt_vec_t* args, variable* result) override
1492 {
1493 val = &s3select_ver[0];
1494 *result = val;
1495 return true;
1496 }
1497 };
1498
1499 struct _fn_isnull : public base_function
1500 {
1501
1502 value res;
1503
1504 bool operator()(bs_stmt_vec_t* args, variable* result) override
1505 {
1506 check_args_size(args,1);
1507
1508 auto iter = args->begin();
1509 base_statement* expr = *iter;
1510 value expr_val = expr->eval();
1511 if ( expr_val.is_null()) {
1512 result->set_value(true);
1513 } else {
1514 result->set_value(false);
1515 }
1516 return true;
1517 }
1518 };
1519
1520 struct _fn_is_not_null : public base_function
1521 {
1522 value res;
1523 _fn_isnull isnull_op;
1524
1525 bool operator()(bs_stmt_vec_t* args, variable* result) override
1526 {
1527
1528 isnull_op(args,result);
1529
1530 if (result->get_value().is_true() == 0)
1531 result->set_value(true);
1532 else
1533 result->set_value(false);
1534
1535 return true;
1536 }
1537 };
1538
1539 struct _fn_in : public base_function
1540 {
1541
1542 value res;
1543
1544 bool operator()(bs_stmt_vec_t *args, variable *result) override
1545 {
1546 check_args_size(args,1);
1547
1548 int args_size = static_cast<int>(args->size()-1);
1549 base_statement *main_expr = (*args)[args_size];
1550 value main_expr_val = main_expr->eval();
1551 args_size--;
1552 while (args_size>=0)
1553 {
1554 base_statement *expr = (*args)[args_size];
1555 value expr_val = expr->eval();
1556 args_size--;
1557 if ((expr_val.type == main_expr_val.type) || (expr_val.is_number() && main_expr_val.is_number()))
1558 {
1559 if (expr_val == main_expr_val)
1560 {
1561 result->set_value(true);
1562 return true;
1563 }
1564 }
1565 }
1566 result->set_value(false);
1567 return true;
1568 }
1569 };
1570
1571 struct _fn_like : public base_like
1572 {
1573 explicit _fn_like(base_statement* esc, base_statement* like_expr)
1574 {
1575 auto is_constant = [&](base_statement* bs) {
1576 if (dynamic_cast<variable*>(bs) && dynamic_cast<variable*>(bs)->m_var_type == variable::var_t::COLUMN_VALUE) {
1577 return true;
1578 } else {
1579 return false;
1580 }
1581 };
1582
1583 if (is_constant(esc) && is_constant(like_expr)) {
1584 constant_state = true;
1585 }
1586
1587 if(constant_state == true)
1588 {
1589 param_validation(esc, like_expr);
1590 std::vector<char> like_as_regex = transform(like_expr_val.str(), *escape_expr_val.str());
1591 compile(like_as_regex);
1592 }
1593 }
1594
1595 bool operator()(bs_stmt_vec_t* args, variable* result) override
1596 {
1597 check_args_size(args,3);
1598
1599 auto iter = args->begin();
1600
1601 base_statement* escape_expr = *iter;
1602 iter++;
1603 base_statement* like_expr = *iter;
1604 iter++;
1605 base_statement* main_expr = *iter;
1606
1607 if (constant_state == false)
1608 {
1609 param_validation(escape_expr, like_expr);
1610 std::vector<char> like_as_regex = transform(like_expr_val.str(), *escape_expr_val.str());
1611 compile(like_as_regex);
1612 }
1613
1614 value main_expr_val = main_expr->eval();
1615 if (main_expr_val.type != value::value_En_t::STRING)
1616 {
1617 throw base_s3select_exception("main expression must be string");
1618 }
1619
1620 match(main_expr_val, result);
1621 return true;
1622 }
1623 };
1624
1625 struct _fn_substr : public base_function
1626 {
1627
1628 char buff[4096];// this buffer is persist for the query life time, it use for the results per row(only for the specific function call)
1629 //it prevent from intensive use of malloc/free (fragmentation).
1630 //should validate result length.
1631 //TODO may replace by std::string (dynamic) , or to replace with global allocator , in query scope.
1632 value v_str;
1633 value v_from;
1634 value v_to;
1635
1636 bool operator()(bs_stmt_vec_t* args, variable* result) override
1637 {
1638 auto iter = args->begin();
1639 int args_size = args->size();
1640
1641
1642 if (args_size<2)
1643 {
1644 throw base_s3select_exception("substr accept 2 arguments or 3");
1645 }
1646
1647 base_statement* str = *iter;
1648 iter++;
1649 base_statement* from = *iter;
1650 base_statement* to;
1651
1652 if (args_size == 3)
1653 {
1654 iter++;
1655 to = *iter;
1656 v_to = to->eval();
1657 if (!v_to.is_number())
1658 {
1659 throw base_s3select_exception("substr third argument must be number"); //can skip row
1660 }
1661 }
1662
1663 v_str = str->eval();
1664
1665 if(v_str.type != value::value_En_t::STRING)
1666 {
1667 throw base_s3select_exception("substr first argument must be string"); //can skip current row
1668 }
1669
1670 int str_length = strlen(v_str.str());
1671
1672 v_from = from->eval();
1673 if(!v_from.is_number())
1674 {
1675 throw base_s3select_exception("substr second argument must be number"); //can skip current row
1676 }
1677
1678 int64_t f;
1679 int64_t t;
1680
1681 if (v_from.type == value::value_En_t::FLOAT)
1682 {
1683 f=v_from.dbl();
1684 }
1685 else
1686 {
1687 f=v_from.i64();
1688 }
1689
1690 if (f <= 0 && args_size == 2)
1691 {
1692 f = 1;
1693 }
1694
1695 if (f>str_length)
1696 {
1697 result->set_value("");
1698 return true;
1699 }
1700
1701 if (str_length>(int)sizeof(buff))
1702 {
1703 throw base_s3select_exception("string too long for internal buffer"); //can skip row
1704 }
1705
1706 if (args_size == 3)
1707 {
1708 if (v_to.type == value::value_En_t::FLOAT)
1709 {
1710 t = v_to.dbl();
1711 }
1712 else
1713 {
1714 t = v_to.i64();
1715 }
1716
1717 if (f <= 0)
1718 {
1719 t = t + f - 1;
1720 f = 1;
1721 }
1722
1723 if (t<0)
1724 {
1725 t = 0;
1726 }
1727
1728 if (t > str_length)
1729 {
1730 t = str_length;
1731 }
1732
1733 if( (str_length-(f-1)-t) <0)
1734 {//in case the requested length is too long, reduce it to exact length.
1735 t = str_length-(f-1);
1736 }
1737
1738 strncpy(buff, v_str.str()+f-1, t);
1739 }
1740 else
1741 {
1742 strcpy(buff, v_str.str()+f-1);
1743 }
1744
1745 result->set_value(buff);
1746
1747 return true;
1748 }
1749 };
1750
1751 struct _fn_charlength : public base_function {
1752
1753 value v_str;
1754
1755 bool operator()(bs_stmt_vec_t* args, variable* result) override
1756 {
1757 check_args_size(args,1);
1758
1759 auto iter = args->begin();
1760 base_statement* str = *iter;
1761 v_str = str->eval();
1762 if(v_str.type != value::value_En_t::STRING) {
1763 throw base_s3select_exception("content is not string!");
1764 } else {
1765 int64_t str_length = strlen(v_str.str());
1766 result->set_value(str_length);
1767 return true;
1768 }
1769 }
1770 };
1771
1772 struct _fn_lower : public base_function {
1773
1774 std::string buff;
1775 value v_str;
1776
1777 bool operator()(bs_stmt_vec_t* args, variable* result) override
1778 {
1779 check_args_size(args,1);
1780
1781 auto iter = args->begin();
1782 base_statement* str = *iter;
1783 v_str = str->eval();
1784 if(v_str.type != value::value_En_t::STRING) {
1785 throw base_s3select_exception("content is not string");
1786 } else {
1787 buff = v_str.str();
1788 boost::algorithm::to_lower(buff);
1789 result->set_value(buff.c_str());
1790 return true;
1791 }
1792 }
1793 };
1794
1795 struct _fn_upper : public base_function {
1796
1797 std::string buff;
1798 value v_str;
1799
1800 bool operator()(bs_stmt_vec_t* args, variable* result) override
1801 {
1802 check_args_size(args,1);
1803
1804 auto iter = args->begin();
1805 base_statement* str = *iter;
1806 v_str = str->eval();
1807 if(v_str.type != value::value_En_t::STRING) {
1808 throw base_s3select_exception("content is not string");
1809 } else {
1810 buff = v_str.str();
1811 boost::algorithm::to_upper(buff);
1812 result->set_value(buff.c_str());
1813 return true;
1814 }
1815 }
1816 };
1817
1818 struct _fn_nullif : public base_function {
1819
1820 value x;
1821 value y;
1822
1823 bool operator()(bs_stmt_vec_t* args, variable* result) override
1824 {
1825 auto iter = args->begin();
1826
1827 int args_size = args->size();
1828 if (args_size != 2)
1829 {
1830 throw base_s3select_exception("nullif accept only 2 arguments");
1831 }
1832 base_statement *first = *iter;
1833 x = first->eval();
1834 iter++;
1835 base_statement *second = *iter;
1836 y = second->eval();
1837 if (x.is_null() && y.is_null())
1838 {
1839 result->set_null();
1840 return true;
1841 }
1842 if (x.is_null())
1843 {
1844 result->set_null();
1845 return true;
1846 }
1847 if (!(x.is_number() && y.is_number())) {
1848 if (x.type != y.type) {
1849 *result = x;
1850 return true;
1851 }
1852 }
1853 if (x != y) {
1854 *result = x;
1855 } else {
1856 result->set_null();
1857 }
1858 return true;
1859 }
1860 };
1861
1862 struct _fn_when_then : public base_function {
1863
1864 value when_value;
1865
1866 bool operator()(bs_stmt_vec_t* args, variable* result) override
1867 {
1868 check_args_size(args,2);
1869
1870 auto iter = args->begin();
1871
1872 base_statement* then_expr = *iter;
1873 iter ++;
1874
1875 base_statement* when_expr = *iter;
1876
1877 when_value = when_expr->eval();
1878
1879 if (when_value.is_true())//true
1880 {
1881 *result = then_expr->eval();
1882 return true;
1883 }
1884
1885 result->set_null();
1886
1887 return true;
1888 }
1889 };
1890
1891 struct _fn_when_value_then : public base_function {
1892
1893 value when_value;
1894 value case_value;
1895 value then_value;
1896
1897 bool operator()(bs_stmt_vec_t* args, variable* result) override
1898 {
1899 check_args_size(args,3);
1900
1901 auto iter = args->begin();
1902
1903 base_statement* then_expr = *iter;
1904 iter++;
1905
1906 base_statement* when_expr = *iter;
1907 iter++;
1908
1909 base_statement* case_expr = *iter;
1910
1911 when_value = when_expr->eval();
1912 case_value = case_expr->eval();
1913 then_value = then_expr->eval();
1914
1915 if (case_value == when_value)
1916 {
1917 *result = then_value;
1918 return true;
1919 }
1920
1921 result->set_null();
1922 return true;
1923 }
1924 };
1925
1926 struct _fn_case_when_else : public base_function {
1927
1928 value when_then_value;
1929
1930 bool operator()(bs_stmt_vec_t* args, variable* result) override
1931 {
1932 check_args_size(args,1);
1933
1934 base_statement* else_expr = *(args->begin());
1935
1936 size_t args_size = args->size() -1;
1937
1938 for(int ivec=args_size;ivec>0;ivec--)
1939 {
1940 when_then_value = (*args)[ivec]->eval();
1941
1942 if(!when_then_value.is_null())
1943 {
1944 *result = when_then_value;
1945 return true;
1946 }
1947
1948 }
1949
1950 *result = else_expr->eval();
1951 return true;
1952 }
1953 };
1954
1955 struct _fn_coalesce : public base_function
1956 {
1957
1958 value res;
1959
1960 bool operator()(bs_stmt_vec_t* args, variable* result) override
1961 {
1962 check_args_size(args,1);
1963
1964 auto iter_begin = args->begin();
1965 int args_size = args->size();
1966 while (args_size >= 1)
1967 {
1968 base_statement* expr = *iter_begin;
1969 value expr_val = expr->eval();
1970 iter_begin++;
1971 if ( !(expr_val.is_null())) {
1972 *result = expr_val;
1973 return true;
1974 }
1975 args_size--;
1976 }
1977 result->set_null();
1978 return true;
1979 }
1980 };
1981
1982 struct _fn_string : public base_function
1983 {
1984
1985 value res;
1986
1987 bool operator()(bs_stmt_vec_t* args, variable* result) override
1988 {
1989 check_args_size(args,1);
1990
1991 auto iter = args->begin();
1992
1993 base_statement* expr = *iter;
1994 value expr_val = expr->eval();
1995 result->set_value((expr_val.to_string()));
1996 return true;
1997 }
1998 };
1999
2000 struct _fn_to_bool : public base_function
2001 {
2002
2003 value func_arg;
2004
2005 bool operator()(bs_stmt_vec_t* args, variable* result) override
2006 {
2007 check_args_size(args,1);
2008
2009 int64_t i=0;
2010 func_arg = (*args->begin())->eval();
2011
2012 if (func_arg.type == value::value_En_t::FLOAT)
2013 {
2014 i = func_arg.dbl();
2015 }
2016 else if (func_arg.type == value::value_En_t::DECIMAL || func_arg.type == value::value_En_t::BOOL)
2017 {
2018 i = func_arg.i64();
2019 }
2020 else
2021 {
2022 i = 0;
2023 }
2024 if (i == 0)
2025 {
2026 result->set_value(false);
2027 }
2028 else
2029 {
2030 result->set_value(true);
2031 }
2032 return true;
2033 }
2034 };
2035
2036 struct _fn_trim : public base_function {
2037
2038 std::string input_string;
2039 value v_remove;
2040 value v_input;
2041
2042 _fn_trim()
2043 {
2044 v_remove = " ";
2045 }
2046
2047 bool operator()(bs_stmt_vec_t* args, variable* result) override
2048 {
2049 check_args_size(args,1);
2050
2051 auto iter = args->begin();
2052 int args_size = args->size();
2053 base_statement* str = *iter;
2054 v_input = str->eval();
2055 if(v_input.type != value::value_En_t::STRING) {
2056 throw base_s3select_exception("content is not string");
2057 }
2058 input_string = v_input.str();
2059 if (args_size == 2) {
2060 iter++;
2061 base_statement* next = *iter;
2062 v_remove = next->eval();
2063 }
2064 boost::trim_right_if(input_string,boost::is_any_of(v_remove.str()));
2065 boost::trim_left_if(input_string,boost::is_any_of(v_remove.str()));
2066 result->set_value(input_string.c_str());
2067 return true;
2068 }
2069 };
2070
2071 struct _fn_leading : public base_function {
2072
2073 std::string input_string;
2074 value v_remove;
2075 value v_input;
2076
2077 _fn_leading()
2078 {
2079 v_remove = " ";
2080 }
2081
2082 bool operator()(bs_stmt_vec_t* args, variable* result) override
2083 {
2084 check_args_size(args,1);
2085
2086 auto iter = args->begin();
2087 int args_size = args->size();
2088 base_statement* str = *iter;
2089 v_input = str->eval();
2090 if(v_input.type != value::value_En_t::STRING) {
2091 throw base_s3select_exception("content is not string");
2092 }
2093 input_string = v_input.str();
2094 if (args_size == 2) {
2095 iter++;
2096 base_statement* next = *iter;
2097 v_remove = next->eval();
2098 }
2099 boost::trim_left_if(input_string,boost::is_any_of(v_remove.str()));
2100 result->set_value(input_string.c_str());
2101 return true;
2102 }
2103 };
2104
2105 struct _fn_trailing : public base_function {
2106
2107 std::string input_string;
2108 value v_remove;
2109 value v_input;
2110
2111 _fn_trailing()
2112 {
2113 v_remove = " ";
2114 }
2115
2116 bool operator()(bs_stmt_vec_t* args, variable* result) override
2117 {
2118 check_args_size(args,1);
2119
2120 auto iter = args->begin();
2121 int args_size = args->size();
2122 base_statement* str = *iter;
2123 v_input = str->eval();
2124 if(v_input.type != value::value_En_t::STRING) {
2125 throw base_s3select_exception("content is not string");
2126 }
2127 input_string = v_input.str();
2128 if (args_size == 2) {
2129 iter++;
2130 base_statement* next = *iter;
2131 v_remove = next->eval();
2132 }
2133 boost::trim_right_if(input_string,boost::is_any_of(v_remove.str()));
2134 result->set_value(input_string.c_str());
2135 return true;
2136 }
2137 };
2138
2139 struct _fn_cast_to_decimal : public base_function {
2140
2141 int32_t precision=-1;
2142 int32_t scale=-1;
2143
2144 bool operator()(bs_stmt_vec_t* args, variable* result) override
2145 {
2146 //cast(expr as decimal(x,y))
2147 check_args_size(args,2);
2148
2149 base_statement* expr = (*args)[1];
2150 //expr_val should be float or integer
2151 //dynamic value for the decimal operator to get the precision and scale
2152
2153 _fn_to_float to_float;
2154 bs_stmt_vec_t args_vec;
2155 args_vec.push_back(expr);
2156 to_float(&args_vec,result);
2157
2158 if (precision == -1 || scale == -1){
2159 base_statement* decimal_expr = (*args)[0];
2160 decimal_expr->eval().get_precision_scale(&precision,&scale);
2161 }
2162
2163 result->set_precision_scale(&precision,&scale);
2164
2165 return true;
2166 }
2167 };
2168
2169 struct _fn_decimal_operator : public base_function {
2170
2171 int32_t precision=-1;
2172 int32_t scale=-1;
2173
2174 bool operator()(bs_stmt_vec_t* args, variable* result) override
2175 {
2176 //decimal(x,y) operator
2177 check_args_size(args,2);
2178
2179 auto iter = args->begin();
2180 base_statement* expr_precision = *iter;
2181 value expr_precision_val = expr_precision->eval();
2182
2183 iter++;
2184 base_statement* expr_scale = *iter;
2185 value expr_scale_val = expr_scale->eval();
2186
2187 precision = expr_precision_val.i64();
2188 scale = expr_scale_val.i64();
2189
2190 result->set_precision_scale(&precision,&scale);
2191
2192 return true;
2193 }
2194 };
2195
2196 struct _fn_engine_version : public base_function {
2197
2198 const char* version_description =R"(PR #137 :
2199 the change handle the use cases where the JSON input starts with an anonymous array/object this may cause wrong search result per the user request(SQL statement)
2200
2201 handle the use-case where the user requests a json-key-path that may point to a non-discrete value. i.e. array or an object.
2202 editorial changes.
2203
2204 fix for CSV flow, in the case of a "broken row" (upon processing stream of data)
2205
2206 null results upon aggregation functions on an empty group (no match for where clause).
2207 )";
2208
2209
2210 _fn_engine_version()
2211 {
2212 aggregate = true;
2213 }
2214
2215 bool operator()(bs_stmt_vec_t* args, variable* result) override
2216 {
2217 result->set_value(version_description);
2218 return true;
2219 }
2220 };
2221
2222 base_function* s3select_functions::create(std::string_view fn_name,const bs_stmt_vec_t &arguments)
2223 {
2224 const FunctionLibrary::const_iterator iter = m_functions_library.find(fn_name.data());
2225
2226 if (iter == m_functions_library.end())
2227 {
2228 std::string msg;
2229 msg = std::string{fn_name} + " " + " function not found";
2230 throw base_s3select_exception(msg, base_s3select_exception::s3select_exp_en_t::FATAL);
2231 }
2232
2233 switch (iter->second)
2234 {
2235 case s3select_func_En_t::ADD:
2236 return S3SELECT_NEW(this,_fn_add);
2237 break;
2238
2239 case s3select_func_En_t::SUM:
2240 return S3SELECT_NEW(this,_fn_sum);
2241 break;
2242
2243 case s3select_func_En_t::COUNT:
2244 return S3SELECT_NEW(this,_fn_count);
2245 break;
2246
2247 case s3select_func_En_t::MIN:
2248 return S3SELECT_NEW(this,_fn_min);
2249 break;
2250
2251 case s3select_func_En_t::MAX:
2252 return S3SELECT_NEW(this,_fn_max);
2253 break;
2254
2255 case s3select_func_En_t::TO_INT:
2256 return S3SELECT_NEW(this,_fn_to_int);
2257 break;
2258
2259 case s3select_func_En_t::TO_FLOAT:
2260 return S3SELECT_NEW(this,_fn_to_float);
2261 break;
2262
2263 case s3select_func_En_t::SUBSTR:
2264 return S3SELECT_NEW(this,_fn_substr);
2265 break;
2266
2267 case s3select_func_En_t::TO_TIMESTAMP:
2268 return S3SELECT_NEW(this,_fn_to_timestamp);
2269 break;
2270
2271 case s3select_func_En_t::TO_STRING_CONSTANT:
2272 return S3SELECT_NEW(this,_fn_to_string_constant);
2273 break;
2274
2275 case s3select_func_En_t::TO_STRING_DYNAMIC:
2276 return S3SELECT_NEW(this,_fn_to_string_dynamic);
2277 break;
2278
2279 case s3select_func_En_t::TO_BOOL:
2280 return S3SELECT_NEW(this,_fn_to_bool);
2281 break;
2282
2283 case s3select_func_En_t::EXTRACT_YEAR:
2284 return S3SELECT_NEW(this,_fn_extract_year_from_timestamp);
2285 break;
2286
2287 case s3select_func_En_t::EXTRACT_MONTH:
2288 return S3SELECT_NEW(this,_fn_extract_month_from_timestamp);
2289 break;
2290
2291 case s3select_func_En_t::EXTRACT_DAY:
2292 return S3SELECT_NEW(this,_fn_extract_day_from_timestamp);
2293 break;
2294
2295 case s3select_func_En_t::EXTRACT_HOUR:
2296 return S3SELECT_NEW(this,_fn_extract_hour_from_timestamp);
2297 break;
2298
2299 case s3select_func_En_t::EXTRACT_MINUTE:
2300 return S3SELECT_NEW(this,_fn_extract_minute_from_timestamp);
2301 break;
2302
2303 case s3select_func_En_t::EXTRACT_SECOND:
2304 return S3SELECT_NEW(this,_fn_extract_second_from_timestamp);
2305 break;
2306
2307 case s3select_func_En_t::EXTRACT_WEEK:
2308 return S3SELECT_NEW(this,_fn_extract_week_from_timestamp);
2309 break;
2310
2311 case s3select_func_En_t::EXTRACT_TIMEZONE_HOUR:
2312 return S3SELECT_NEW(this,_fn_extract_tz_hour_from_timestamp);
2313 break;
2314
2315 case s3select_func_En_t::EXTRACT_TIMEZONE_MINUTE:
2316 return S3SELECT_NEW(this,_fn_extract_tz_minute_from_timestamp);
2317 break;
2318
2319 case s3select_func_En_t::DATE_ADD_YEAR:
2320 return S3SELECT_NEW(this,_fn_add_year_to_timestamp);
2321 break;
2322
2323 case s3select_func_En_t::DATE_ADD_MONTH:
2324 return S3SELECT_NEW(this,_fn_add_month_to_timestamp);
2325 break;
2326
2327 case s3select_func_En_t::DATE_ADD_DAY:
2328 return S3SELECT_NEW(this,_fn_add_day_to_timestamp);
2329 break;
2330
2331 case s3select_func_En_t::DATE_ADD_HOUR:
2332 return S3SELECT_NEW(this,_fn_add_hour_to_timestamp);
2333 break;
2334
2335 case s3select_func_En_t::DATE_ADD_MINUTE:
2336 return S3SELECT_NEW(this,_fn_add_minute_to_timestamp);
2337 break;
2338
2339 case s3select_func_En_t::DATE_ADD_SECOND:
2340 return S3SELECT_NEW(this,_fn_add_second_to_timestamp);
2341 break;
2342
2343 case s3select_func_En_t::DATE_DIFF_YEAR:
2344 return S3SELECT_NEW(this,_fn_diff_year_timestamp);
2345 break;
2346
2347 case s3select_func_En_t::DATE_DIFF_MONTH:
2348 return S3SELECT_NEW(this,_fn_diff_month_timestamp);
2349 break;
2350
2351 case s3select_func_En_t::DATE_DIFF_DAY:
2352 return S3SELECT_NEW(this,_fn_diff_day_timestamp);
2353 break;
2354
2355 case s3select_func_En_t::DATE_DIFF_HOUR:
2356 return S3SELECT_NEW(this,_fn_diff_hour_timestamp);
2357 break;
2358
2359 case s3select_func_En_t::DATE_DIFF_MINUTE:
2360 return S3SELECT_NEW(this,_fn_diff_minute_timestamp);
2361 break;
2362
2363 case s3select_func_En_t::DATE_DIFF_SECOND:
2364 return S3SELECT_NEW(this,_fn_diff_second_timestamp);
2365 break;
2366
2367 case s3select_func_En_t::UTCNOW:
2368 return S3SELECT_NEW(this,_fn_utcnow);
2369 break;
2370
2371 case s3select_func_En_t::AVG:
2372 return S3SELECT_NEW(this,_fn_avg);
2373 break;
2374
2375 case s3select_func_En_t::LOWER:
2376 return S3SELECT_NEW(this,_fn_lower);
2377 break;
2378
2379 case s3select_func_En_t::UPPER:
2380 return S3SELECT_NEW(this,_fn_upper);
2381 break;
2382
2383 case s3select_func_En_t::LENGTH:
2384 return S3SELECT_NEW(this,_fn_charlength);
2385 break;
2386
2387 case s3select_func_En_t::BETWEEN:
2388 return S3SELECT_NEW(this,_fn_between);
2389 break;
2390
2391 case s3select_func_En_t::NOT_BETWEEN:
2392 return S3SELECT_NEW(this,_fn_not_between);
2393 break;
2394
2395 case s3select_func_En_t::IS_NULL:
2396 return S3SELECT_NEW(this,_fn_isnull);
2397 break;
2398
2399 case s3select_func_En_t::IS_NOT_NULL:
2400 return S3SELECT_NEW(this,_fn_is_not_null);
2401 break;
2402
2403 case s3select_func_En_t::IN:
2404 return S3SELECT_NEW(this,_fn_in);
2405 break;
2406
2407 case s3select_func_En_t::VERSION:
2408 return S3SELECT_NEW(this,_fn_version);
2409 break;
2410
2411 case s3select_func_En_t::NULLIF:
2412 return S3SELECT_NEW(this,_fn_nullif);
2413 break;
2414
2415 case s3select_func_En_t::LIKE:
2416 return S3SELECT_NEW(this,_fn_like,arguments[0],arguments[1]);
2417 break;
2418
2419 case s3select_func_En_t::COALESCE:
2420 return S3SELECT_NEW(this,_fn_coalesce);
2421 break;
2422
2423 case s3select_func_En_t::WHEN_THEN:
2424 return S3SELECT_NEW(this,_fn_when_then);
2425 break;
2426
2427 case s3select_func_En_t::WHEN_VALUE_THEN:
2428 return S3SELECT_NEW(this,_fn_when_value_then);
2429 break;
2430
2431 case s3select_func_En_t::CASE_WHEN_ELSE:
2432 return S3SELECT_NEW(this,_fn_case_when_else);
2433 break;
2434
2435 case s3select_func_En_t::STRING:
2436 return S3SELECT_NEW(this,_fn_string);
2437 break;
2438
2439 case s3select_func_En_t::TRIM:
2440 return S3SELECT_NEW(this,_fn_trim);
2441 break;
2442
2443 case s3select_func_En_t::LEADING:
2444 return S3SELECT_NEW(this,_fn_leading);
2445 break;
2446
2447 case s3select_func_En_t::TRAILING:
2448 return S3SELECT_NEW(this,_fn_trailing);
2449 break;
2450
2451 case s3select_func_En_t::DECIMAL_OPERATOR:
2452 return S3SELECT_NEW(this,_fn_decimal_operator);
2453 break;
2454
2455 case s3select_func_En_t::CAST_TO_DECIMAL:
2456 return S3SELECT_NEW(this,_fn_cast_to_decimal);
2457 break;
2458
2459 case s3select_func_En_t::ENGINE_VERSION:
2460 return S3SELECT_NEW(this,_fn_engine_version);
2461 break;
2462
2463 default:
2464 throw base_s3select_exception("internal error while resolving function-name");
2465 break;
2466 }
2467 }
2468
2469 bool base_statement::is_function() const
2470 {
2471 if (dynamic_cast<__function*>(const_cast<base_statement*>(this)))
2472 {
2473 return true;
2474 }
2475 else
2476 {
2477 return false;
2478 }
2479 }
2480
2481 const base_statement* base_statement::get_aggregate() const
2482 {
2483 //search for aggregation function in AST
2484 const base_statement* res = 0;
2485
2486 if (is_aggregate())
2487 {
2488 return this;
2489 }
2490
2491 if (left() && (res=left()->get_aggregate())!=0)
2492 {
2493 return res;
2494 }
2495
2496 if (right() && (res=right()->get_aggregate())!=0)
2497 {
2498 return res;
2499 }
2500
2501 if (is_function())
2502 {
2503 for (auto i : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2504 {
2505 const base_statement* b=i->get_aggregate();
2506 if (b)
2507 {
2508 return b;
2509 }
2510 }
2511 }
2512 return 0;
2513 }
2514
2515 bool base_statement::is_column_reference() const
2516 {
2517 if(is_column())
2518 return true;
2519
2520 if(left())
2521 return left()->is_column_reference();
2522
2523 if(right())
2524 return right()->is_column_reference();
2525
2526 if(is_function())
2527 {
2528 for(auto a : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2529 {
2530 if(a->is_column_reference())
2531 return true;
2532 }
2533 }
2534
2535 return false;
2536 }
2537
2538 bool base_statement::is_nested_aggregate(bool &aggr_flow) const
2539 {
2540 if (is_aggregate())
2541 {
2542 aggr_flow=true;
2543 for (auto& i : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2544 {
2545 if (i->get_aggregate() != nullptr)
2546 {
2547 return true;
2548 }
2549 }
2550 }
2551
2552 if(left() && left()->is_nested_aggregate(aggr_flow))
2553 return true;
2554
2555 if(right() && right()->is_nested_aggregate(aggr_flow))
2556 return true;
2557
2558 if (is_function())
2559 {
2560 for (auto& i : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2561 {
2562 if (i->get_aggregate() != nullptr)
2563 {
2564 return i->is_nested_aggregate(aggr_flow);
2565 }
2566 }
2567 }
2568
2569 return false;
2570 }
2571
2572 bool base_statement::is_statement_contain_star_operation() const
2573 {
2574 if(is_star_operation())
2575 return true;
2576
2577 if(left())
2578 return left()->is_statement_contain_star_operation();
2579
2580 if(right())
2581 return right()->is_statement_contain_star_operation();
2582
2583 if(is_function())
2584 {
2585 for(auto a : dynamic_cast<__function*>(const_cast<base_statement*>(this))->get_arguments())
2586 {
2587 if(a->is_star_operation())
2588 return true;
2589 }
2590 }
2591
2592 return false;
2593 }
2594
2595 bool base_statement::mark_aggreagtion_subtree_to_execute()
2596 {//purpase:: set aggregation subtree as runnable.
2597 //the function search for aggregation function, and mark its subtree {skip = false}
2598 if (is_aggregate())
2599 set_skip_non_aggregate(false);
2600
2601 if (left())
2602 left()->mark_aggreagtion_subtree_to_execute();
2603
2604 if(right())
2605 right()->mark_aggreagtion_subtree_to_execute();
2606
2607 if (is_function())
2608 {
2609 for (auto& i : dynamic_cast<__function*>(this)->get_arguments())
2610 {
2611 i->mark_aggreagtion_subtree_to_execute();
2612 }
2613 }
2614
2615 return true;
2616 }
2617
2618 void base_statement::push_for_cleanup(std::set<base_statement*>& ast_nodes_to_delete)//semantic loop on each projection
2619 {
2620 //placement new is releasing the main-buffer in which all AST nodes
2621 //allocating from it. meaning no calls to destructors.
2622 //the purpose of this routine is to traverse the AST in map all nodes for cleanup.
2623 //the cleanup method will trigger all destructors.
2624
2625 ast_nodes_to_delete.insert(this);
2626
2627 if (left())
2628 left()->push_for_cleanup(ast_nodes_to_delete);
2629
2630 if(right())
2631 right()->push_for_cleanup(ast_nodes_to_delete);
2632
2633 if (is_function())
2634 {
2635 for (auto& i : dynamic_cast<__function*>(this)->get_arguments())
2636 {
2637 i->push_for_cleanup(ast_nodes_to_delete);
2638 }
2639 }
2640 }
2641
2642 #ifdef _ARROW_EXIST
2643 void base_statement::extract_columns(parquet_file_parser::column_pos_t &cols,const uint16_t max_columns)
2644 {// purpose: to extract all column-ids from query
2645 if(is_column()) //column reference or column position
2646 {variable* v = dynamic_cast<variable*>(this);
2647 if(dynamic_cast<variable*>(this)->m_var_type == variable::var_t::VARIABLE_NAME)
2648 {//column reference
2649
2650 if (v->getScratchArea()->get_column_pos(v->get_name().c_str())>=0)
2651 {//column belong to schema
2652 cols.insert( v->getScratchArea()->get_column_pos(v->get_name().c_str() ));
2653 }else {
2654 if(v->getAlias()->search_alias(v->get_name()))
2655 {//column is an alias --> extract columns belong to alias
2656 //TODO cyclic alias to resolve
2657 v->getAlias()->search_alias(v->get_name())->extract_columns(cols,max_columns);
2658 }else {
2659 //column is not alias --> error
2660 std::stringstream ss;
2661 ss << "column " + v->get_name() + " is not part of schema nor an alias";
2662 throw base_s3select_exception(ss.str(),base_s3select_exception::s3select_exp_en_t::FATAL);
2663 }
2664 }
2665 }else if(v->m_var_type == variable::var_t::STAR_OPERATION)
2666 {
2667 for(uint16_t i=0;i<max_columns;i++)
2668 {//push all columns
2669 cols.insert( i );
2670 }
2671 }
2672 else {
2673 if (v->get_column_pos()>=max_columns)
2674 {
2675 std::stringstream ss;
2676 ss << "column " + std::to_string( v->get_column_pos()+1 ) + " exceed max number of columns";
2677 throw base_s3select_exception(ss.str(),base_s3select_exception::s3select_exp_en_t::FATAL);
2678 }
2679 cols.insert(v->get_column_pos());//push column positions
2680 }
2681 }else if(is_function())
2682 {
2683 __function* f = (dynamic_cast<__function*>(this));
2684 bs_stmt_vec_t args = f->get_arguments();
2685 for (auto prm : args)
2686 {//traverse function args
2687 prm->extract_columns(cols,max_columns);
2688 }
2689
2690 }
2691
2692 //keep traversing down the AST
2693 if(left())
2694 left()->extract_columns(cols,max_columns);
2695
2696 if(right())
2697 right()->extract_columns(cols,max_columns);
2698 }
2699 #endif //_ARROW_EXIST
2700
2701 } //namespace s3selectEngine
2702
2703 #endif