1 #ifndef __S3SELECT_FUNCTIONS__
2 #define __S3SELECT_FUNCTIONS__
5 #include "s3select_oper.h"
6 #include <boost/algorithm/string.hpp>
7 #include <boost/algorithm/string/trim.hpp>
8 #include <boost/regex.hpp>
11 using namespace std::string_literals
;
13 #define BOOST_BIND_ACTION_PARAM( push_name ,param ) boost::bind( &push_name::operator(), g_ ## push_name , _1 ,_2, param)
14 namespace s3selectEngine
17 constexpr double sec_scale(int n
)
24 void operator()(const char* a
, const char* b
, uint32_t* n
) const
30 static push_char g_push_char
;
34 void operator()(const char* a
, const char* b
, uint32_t* n
) const
36 *n
= (static_cast<char>(*a
) - 48) * 10 + (static_cast<char>(*(a
+1)) - 48) ;
40 static push_2dig g_push_2dig
;
44 void operator()(const char* a
, const char* b
, uint32_t* n
) const
46 *n
= (static_cast<char>(*a
) - 48) * 1000 + (static_cast<char>(*(a
+1)) - 48) * 100 + (static_cast<char>(*(a
+2)) - 48) * 10 + (static_cast<char>(*(a
+3)) - 48);
50 static push_4dig g_push_4dig
;
54 void operator()(const char* a
, const char* b
, uint32_t* n
) const
56 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
57 const double scale
= sec_scale(9-1); //nano-sec
59 const double scale
= sec_scale(6-1); //micro-sec
62 *n
= ((static_cast<char>(*a
) - 48)) * scale
;
66 static push_1fdig g_push_1fdig
;
70 void operator()(const char* a
, const char* b
, uint32_t* n
) const
72 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
73 const double scale
= sec_scale(9-2); //nano-sec
75 const double scale
= sec_scale(6-2); //micro-sec
78 *n
= ((static_cast<char>(*a
) - 48) * 10 + (static_cast<char>(*(a
+1)) - 48)) * scale
;
82 static push_2fdig g_push_2fdig
;
86 void operator()(const char* a
, const char* b
, uint32_t* n
) const
88 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
89 const double scale
= sec_scale(9-3); //nano-sec
91 const double scale
= sec_scale(6-3); //micro-sec
94 *n
= ((static_cast<char>(*a
) - 48) * 100 + (static_cast<char>(*(a
+1)) - 48) * 10 + (static_cast<char>(*(a
+2)) - 48)) * scale
;
98 static push_3fdig g_push_3fdig
;
102 void operator()(const char* a
, const char* b
, uint32_t* n
) const
104 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
105 const double scale
= sec_scale(9-4); //nano-sec
107 const double scale
= sec_scale(6-4); //micro-sec
110 *n
= ((static_cast<char>(*a
) - 48) * 1000 + (static_cast<char>(*(a
+1)) - 48) * 100 + (static_cast<char>(*(a
+2)) - 48) * 10 + (static_cast<char>(*(a
+3)) - 48)) * scale
;
114 static push_4fdig g_push_4fdig
;
118 void operator()(const char* a
, const char* b
, uint32_t* n
) const
120 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
121 const double scale
= sec_scale(9-5); //nano-sec
123 const double scale
= sec_scale(6-5); //micro-sec
126 *n
= ((static_cast<char>(*a
) - 48) * 10000 + (static_cast<char>(*(a
+1)) - 48) * 1000 + (static_cast<char>(*(a
+2)) - 48) * 100 + (static_cast<char>(*(a
+3)) - 48) * 10 + (static_cast<char>(*(a
+4)) - 48)) * scale
;
130 static push_5fdig g_push_5fdig
;
134 void operator()(const char* a
, const char* b
, uint32_t* n
) const
136 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
137 const double scale
= sec_scale(9-6); //nano-sec
139 const double scale
= sec_scale(6-6); //micro-sec
142 *n
= ((static_cast<char>(*a
) - 48) * 100000 + (static_cast<char>(*(a
+1)) - 48) * 10000 + (static_cast<char>(*(a
+2)) - 48) * 1000 + (static_cast<char>(*(a
+3)) - 48) * 100 + (static_cast<char>(*(a
+4)) - 48) * 10 + (static_cast<char>(*(a
+5)) - 48)) * scale
;
146 static push_6fdig g_push_6fdig
;
148 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
151 void operator()(const char* a
, const char* b
, uint32_t* n
) const
153 const double scale
= sec_scale(9-7); //nano-sec
154 *n
= ((static_cast<char>(*a
) - 48) * 1000000 + (static_cast<char>(*(a
+1)) - 48) * 100000 + (static_cast<char>(*(a
+2)) - 48) * 10000 + (static_cast<char>(*(a
+3)) - 48) * 1000 + (static_cast<char>(*(a
+4)) - 48) * 100 + (static_cast<char>(*(a
+5)) - 48) * 10 + (static_cast<char>(*(a
+6)) - 48)) * scale
;
158 static push_7fdig g_push_7fdig
;
162 void operator()(const char* a
, const char* b
, uint32_t* n
) const
164 const double scale
= sec_scale(9-8); //nano-sec
165 *n
= ((static_cast<char>(*a
) - 48) * 10000000 + (static_cast<char>(*(a
+1)) - 48) * 1000000 + (static_cast<char>(*(a
+2)) - 48) * 100000 + (static_cast<char>(*(a
+3)) - 48) * 10000 + (static_cast<char>(*(a
+4)) - 48) * 1000 + (static_cast<char>(*(a
+5)) - 48) * 100 + (static_cast<char>(*(a
+6)) - 48) * 10 + (static_cast<char>(*(a
+7)) - 48)) * scale
;
169 static push_8fdig g_push_8fdig
;
173 void operator()(const char* a
, const char* b
, uint32_t* n
) const
175 const double scale
= sec_scale(9-9); //nano-sec
176 *n
= ((static_cast<char>(*a
) - 48) * 100000000 + (static_cast<char>(*(a
+1)) - 48) * 10000000 + (static_cast<char>(*(a
+2)) - 48) * 1000000 + (static_cast<char>(*(a
+3)) - 48) * 100000 + (static_cast<char>(*(a
+4)) - 48) * 10000 + (static_cast<char>(*(a
+5)) - 48) * 1000 + (static_cast<char>(*(a
+6)) - 48) * 100 + (static_cast<char>(*(a
+7)) - 48) * 10 + (static_cast<char>(*(a
+8)) - 48)) * scale
;
180 static push_9fdig g_push_9fdig
;
183 enum class s3select_func_En_t
{ADD
,
203 EXTRACT_TIMEZONE_HOUR
,
204 EXTRACT_TIMEZONE_MINUTE
,
243 class s3select_functions
248 using FunctionLibrary
= std::map
<std::string
, s3select_func_En_t
>;
249 s3select_allocator
* m_s3select_allocator
;
250 std::set
<base_statement
*>* m_ast_nodes_for_cleanup
;
252 const FunctionLibrary m_functions_library
=
254 {"add", s3select_func_En_t::ADD
},
255 {"sum", s3select_func_En_t::SUM
},
256 {"avg", s3select_func_En_t::AVG
},
257 {"count", s3select_func_En_t::COUNT
},
258 {"min", s3select_func_En_t::MIN
},
259 {"max", s3select_func_En_t::MAX
},
260 {"int", s3select_func_En_t::TO_INT
},
261 {"float", s3select_func_En_t::TO_FLOAT
},
262 {"substring", s3select_func_En_t::SUBSTR
},
263 {"to_timestamp", s3select_func_En_t::TO_TIMESTAMP
},
264 {"#to_string_constant#",s3select_func_En_t::TO_STRING_CONSTANT
},
265 {"#to_string_dynamic#",s3select_func_En_t::TO_STRING_DYNAMIC
},
266 {"to_bool", s3select_func_En_t::TO_BOOL
},
267 {"#extract_year#", s3select_func_En_t::EXTRACT_YEAR
},
268 {"#extract_month#", s3select_func_En_t::EXTRACT_MONTH
},
269 {"#extract_day#", s3select_func_En_t::EXTRACT_DAY
},
270 {"#extract_hour#", s3select_func_En_t::EXTRACT_HOUR
},
271 {"#extract_minute#", s3select_func_En_t::EXTRACT_MINUTE
},
272 {"#extract_second#", s3select_func_En_t::EXTRACT_SECOND
},
273 {"#extract_week#", s3select_func_En_t::EXTRACT_WEEK
},
274 {"#extract_timezone_hour#", s3select_func_En_t::EXTRACT_TIMEZONE_HOUR
},
275 {"#extract_timezone_minute#", s3select_func_En_t::EXTRACT_TIMEZONE_MINUTE
},
276 {"#dateadd_year#", s3select_func_En_t::DATE_ADD_YEAR
},
277 {"#dateadd_month#", s3select_func_En_t::DATE_ADD_MONTH
},
278 {"#dateadd_day#", s3select_func_En_t::DATE_ADD_DAY
},
279 {"#dateadd_hour#", s3select_func_En_t::DATE_ADD_HOUR
},
280 {"#dateadd_minute#", s3select_func_En_t::DATE_ADD_MINUTE
},
281 {"#dateadd_second#", s3select_func_En_t::DATE_ADD_SECOND
},
282 {"#datediff_year#", s3select_func_En_t::DATE_DIFF_YEAR
},
283 {"#datediff_month#", s3select_func_En_t::DATE_DIFF_MONTH
},
284 {"#datediff_day#", s3select_func_En_t::DATE_DIFF_DAY
},
285 {"#datediff_hour#", s3select_func_En_t::DATE_DIFF_HOUR
},
286 {"#datediff_minute#", s3select_func_En_t::DATE_DIFF_MINUTE
},
287 {"#datediff_second#", s3select_func_En_t::DATE_DIFF_SECOND
},
288 {"utcnow", s3select_func_En_t::UTCNOW
},
289 {"character_length", s3select_func_En_t::LENGTH
},
290 {"char_length", s3select_func_En_t::LENGTH
},
291 {"lower", s3select_func_En_t::LOWER
},
292 {"upper", s3select_func_En_t::UPPER
},
293 {"nullif", s3select_func_En_t::NULLIF
},
294 {"#between#", s3select_func_En_t::BETWEEN
},
295 {"#not_between#", s3select_func_En_t::NOT_BETWEEN
},
296 {"#is_null#", s3select_func_En_t::IS_NULL
},
297 {"#is_not_null#", s3select_func_En_t::IS_NOT_NULL
},
298 {"#in_predicate#", s3select_func_En_t::IN
},
299 {"#like_predicate#", s3select_func_En_t::LIKE
},
300 {"version", s3select_func_En_t::VERSION
},
301 {"#when-then#", s3select_func_En_t::WHEN_THEN
},
302 {"#when-value-then#", s3select_func_En_t::WHEN_VALUE_THEN
},
303 {"#case-when-else#", s3select_func_En_t::CASE_WHEN_ELSE
},
304 {"coalesce", s3select_func_En_t::COALESCE
},
305 {"string", s3select_func_En_t::STRING
},
306 {"#trim#", s3select_func_En_t::TRIM
},
307 {"#leading#", s3select_func_En_t::LEADING
},
308 {"#trailing#", s3select_func_En_t::TRAILING
},
309 {"#decimal_operator#", s3select_func_En_t::DECIMAL_OPERATOR
},
310 {"#cast_as_decimal#", s3select_func_En_t::CAST_TO_DECIMAL
},
311 {"engine_version", s3select_func_En_t::ENGINE_VERSION
}
317 base_function
* create(std::string_view fn_name
,const bs_stmt_vec_t
&);
319 s3select_functions():m_s3select_allocator(nullptr),m_ast_nodes_for_cleanup(nullptr)
324 void setAllocator(s3select_allocator
* alloc
)
326 m_s3select_allocator
= alloc
;
329 void set_AST_nodes_for_cleanup(std::set
<base_statement
*>* ast_for_cleanup
)
331 m_ast_nodes_for_cleanup
= ast_for_cleanup
;
334 s3select_allocator
* getAllocator()
336 return m_s3select_allocator
;
343 class __function
: public base_statement
347 bs_stmt_vec_t arguments
;
348 std::basic_string
<char,std::char_traits
<char>,ChunkAllocator
<char,256>> name
;
349 base_function
* m_func_impl
;
350 s3select_functions
* m_s3select_functions
;
352 bool m_is_aggregate_function
;
361 auto string_to_lower
= [](std::basic_string
<char,std::char_traits
<char>,ChunkAllocator
<char,256>> s
)
363 std::transform(s
.begin(),s
.end(),s
.begin(),[](unsigned char c
){ return std::tolower(c
); });
367 //the function name is converted into lowercase to enable case-insensitive
368 base_function
* f
= m_s3select_functions
->create(string_to_lower(name
),arguments
);
371 throw base_s3select_exception("function not found", base_s3select_exception::s3select_exp_en_t::FATAL
); //should abort query
374 m_is_aggregate_function
= m_func_impl
->is_aggregate();
375 f
->set_function_name(name
.c_str());
380 base_function
* impl()
385 void traverse_and_apply(scratch_area
* sa
, projection_alias
* pa
,bool json_statement
) override
389 m_json_statement
= json_statement
;
390 for (base_statement
* ba
: arguments
)
392 ba
->traverse_and_apply(sa
, pa
, json_statement
);
396 void set_last_call() override
397 {//it cover the use-case where aggregation function is an argument in non-aggregate function.
399 for (auto& ba
: arguments
)
405 void set_skip_non_aggregate(bool skip_non_aggregate_op
) override
406 {//it cover the use-case where aggregation function is an argument in non-aggregate function.
407 m_skip_non_aggregate_op
= skip_non_aggregate_op
;
408 for (auto& ba
: arguments
)
410 ba
->set_skip_non_aggregate(m_skip_non_aggregate_op
);
414 bool is_aggregate() const override
416 return m_is_aggregate_function
;
419 bool semantic() override
424 __function(const char* fname
, s3select_functions
* s3f
) : name(fname
), m_func_impl(nullptr), m_s3select_functions(s3f
),m_is_aggregate_function(false){set_operator_name(fname
);}
426 value
& eval() override
428 return eval_internal();
431 value
& eval_internal() override
434 _resolve_name();//node is "resolved" (function is created) upon first call/first row.
436 if (is_last_call
== false)
437 {//all rows prior to last row
438 if(m_skip_non_aggregate_op
== false || is_aggregate() == true)
440 (*m_func_impl
)(&arguments
, &m_result
);
442 else if(m_skip_non_aggregate_op
== true)
444 for(auto& p
: arguments
)
445 {//evaluating the arguments (not the function itself, which is a non-aggregate function)
446 //i.e. in the following use case substring( , sum(),count() ) ; only sum() and count() are evaluated.
452 {//on the last row, the aggregate function is finalized,
453 //and non-aggregate function is evaluated with the result of aggregate function.
455 (*m_func_impl
).get_aggregate_result(&m_result
);
457 (*m_func_impl
)(&arguments
, &m_result
);
460 return m_result
.get_value();
463 void resolve_node() override
467 for (auto& arg
: arguments
)
473 std::string
print(int ident
) override
475 return std::string(0);
478 void push_argument(base_statement
* arg
)
480 arguments
.push_back(arg
);
484 bs_stmt_vec_t
& get_arguments()
489 virtual ~__function() = default;
493 s3-select function defintions
495 struct _fn_add
: public base_function
500 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
502 check_args_size(args
,2);
504 auto iter
= args
->begin();
505 base_statement
* x
= *iter
;
507 base_statement
* y
= *iter
;
509 var_result
= x
->eval() + y
->eval();
511 *result
= var_result
;
517 struct _fn_sum
: public base_function
528 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
530 check_args_size(args
,1);
532 auto iter
= args
->begin();
533 base_statement
* x
= *iter
;
541 sum
= sum
+ x
->eval();
543 catch (base_s3select_exception
& e
)
545 if (e
.severity() == base_s3select_exception::s3select_exp_en_t::FATAL
)
554 void get_aggregate_result(variable
* result
) override
560 struct _fn_count
: public base_function
570 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
573 {// in case argument exist, should count only non-null.
574 auto iter
= args
->begin();
575 base_statement
* x
= *iter
;
577 if(!x
->eval().is_null())
583 {//in case of non-arguments // count()
590 void get_aggregate_result(variable
* result
) override
592 result
->set_value(count
);
597 struct _fn_avg
: public base_function
603 _fn_avg() : sum(0) { aggregate
= true; }
605 bool operator()(bs_stmt_vec_t
* args
, variable
*result
) override
607 check_args_size(args
,1);
609 auto iter
= args
->begin();
610 base_statement
*x
= *iter
;
614 sum
= sum
+ x
->eval();
617 catch (base_s3select_exception
&e
)
619 throw base_s3select_exception(e
.what());
625 void get_aggregate_result(variable
*result
) override
627 if(count
== static_cast<value
>(0)) {
632 *result
= sum
/count
;
637 struct _fn_min
: public base_function
648 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
650 check_args_size(args
,1);
652 auto iter
= args
->begin();
653 base_statement
* x
= *iter
;
655 if(min
.is_null() || min
> x
->eval())
663 void get_aggregate_result(variable
* result
) override
670 struct _fn_max
: public base_function
681 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
683 check_args_size(args
,1);
685 auto iter
= args
->begin();
686 base_statement
* x
= *iter
;
688 if(max
.is_null() || max
< x
->eval())
696 void get_aggregate_result(variable
* result
) override
703 struct _fn_to_int
: public base_function
707 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
709 check_args_size(args
,1);
711 value v
= (*args
->begin())->eval();
715 case value::value_En_t::STRING
:
719 int64_t i
= strtol(v
.str(), &pend
, 10);
720 if (errno
== ERANGE
) {
721 throw base_s3select_exception("converted value would fall out of the range of the result type!");
723 if (pend
== v
.str()) {
725 throw base_s3select_exception("text cannot be converted to a number");
728 throw base_s3select_exception("extra characters after the number");
735 case value::value_En_t::FLOAT
:
736 var_result
= static_cast<int64_t>(v
.dbl());
740 var_result
= v
.i64();
744 *result
= var_result
;
750 struct _fn_to_float
: public base_function
754 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
756 check_args_size(args
,1);
758 value v
= (*args
->begin())->eval();
762 case value::value_En_t::STRING
:
765 double d
= strtod(v
.str(), &pend
);
766 if (errno
== ERANGE
) {
767 throw base_s3select_exception("converted value would fall out of the range of the result type!");
769 if (pend
== v
.str()) {
771 throw base_s3select_exception("text cannot be converted to a number");
774 throw base_s3select_exception("extra characters after the number");
781 case value::value_En_t::FLOAT
:
782 var_result
= v
.dbl();
786 var_result
= v
.i64();
790 *result
= var_result
;
796 struct _fn_to_timestamp
: public base_function
798 bsc::rule
<> date_separator
= bsc::ch_p("-");
799 bsc::rule
<> time_separator
= bsc::ch_p(":");
800 bsc::rule
<> nano_sec_separator
= bsc::ch_p(".");
801 bsc::rule
<> delimiter
= bsc::ch_p("T");
802 bsc::rule
<> zero_timezone
= bsc::ch_p("Z");
803 bsc::rule
<> timezone_sign
= bsc::ch_p("-") | bsc::ch_p("+");
805 uint32_t yr
= 1700, mo
= 1, dy
= 1;
806 bsc::rule
<> dig4
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
807 bsc::rule
<> dig2
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
];
809 bsc::rule
<> d_yyyy_dig
= ((dig4
[BOOST_BIND_ACTION_PARAM(push_4dig
, &yr
)]) >> *(delimiter
));
810 bsc::rule
<> d_yyyymmdd_dig
= ((dig4
[BOOST_BIND_ACTION_PARAM(push_4dig
, &yr
)]) >> *(date_separator
)
811 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &mo
)]) >> *(date_separator
)
812 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &dy
)]) >> *(delimiter
));
814 uint32_t hr
= 0, mn
= 0, sc
= 0, frac_sec
= 0, tz_hr
= 0, tz_mn
= 0, sign
= 0, tm_zone
= '0';
816 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
817 bsc::rule
<> fdig9
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
818 bsc::rule
<> fdig8
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
819 bsc::rule
<> fdig7
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
822 bsc::rule
<> fdig6
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
823 bsc::rule
<> fdig5
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
824 bsc::rule
<> fdig4
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
825 bsc::rule
<> fdig3
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
>> bsc::digit_p
];
826 bsc::rule
<> fdig2
= bsc::lexeme_d
[bsc::digit_p
>> bsc::digit_p
];
827 bsc::rule
<> fdig1
= bsc::lexeme_d
[bsc::digit_p
];
829 bsc::rule
<> d_timezone_dig
= ((timezone_sign
[BOOST_BIND_ACTION_PARAM(push_char
, &sign
)]) >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &tz_hr
)]) >> *(time_separator
)
830 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &tz_mn
)])) | (zero_timezone
[BOOST_BIND_ACTION_PARAM(push_char
, &tm_zone
)]);
832 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
833 bsc::rule
<> fraction_sec
= (fdig9
[BOOST_BIND_ACTION_PARAM(push_9fdig
, &frac_sec
)]) |
834 (fdig8
[BOOST_BIND_ACTION_PARAM(push_8fdig
, &frac_sec
)]) |
835 (fdig7
[BOOST_BIND_ACTION_PARAM(push_7fdig
, &frac_sec
)]) |
836 (fdig6
[BOOST_BIND_ACTION_PARAM(push_6fdig
, &frac_sec
)]) |
837 (fdig5
[BOOST_BIND_ACTION_PARAM(push_5fdig
, &frac_sec
)]) |
838 (fdig4
[BOOST_BIND_ACTION_PARAM(push_4fdig
, &frac_sec
)]) |
839 (fdig3
[BOOST_BIND_ACTION_PARAM(push_3fdig
, &frac_sec
)]) |
840 (fdig2
[BOOST_BIND_ACTION_PARAM(push_2fdig
, &frac_sec
)]) |
841 (fdig1
[BOOST_BIND_ACTION_PARAM(push_1fdig
, &frac_sec
)]);
843 bsc::rule
<> fraction_sec
= (fdig6
[BOOST_BIND_ACTION_PARAM(push_6fdig
, &frac_sec
)]) |
844 (fdig5
[BOOST_BIND_ACTION_PARAM(push_5fdig
, &frac_sec
)]) |
845 (fdig4
[BOOST_BIND_ACTION_PARAM(push_4fdig
, &frac_sec
)]) |
846 (fdig3
[BOOST_BIND_ACTION_PARAM(push_3fdig
, &frac_sec
)]) |
847 (fdig2
[BOOST_BIND_ACTION_PARAM(push_2fdig
, &frac_sec
)]) |
848 (fdig1
[BOOST_BIND_ACTION_PARAM(push_1fdig
, &frac_sec
)]);
851 bsc::rule
<> d_time_dig
= ((dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &hr
)]) >> *(time_separator
)
852 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &mn
)]) >> *(time_separator
)
853 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &sc
)]) >> *(nano_sec_separator
)
854 >> (fraction_sec
) >> (d_timezone_dig
)) |
855 ((dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &hr
)]) >> *(time_separator
)
856 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &mn
)]) >> *(time_separator
)
857 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &sc
)]) >> (d_timezone_dig
)) |
858 ((dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &hr
)]) >> *(time_separator
)
859 >> (dig2
[BOOST_BIND_ACTION_PARAM(push_2dig
, &mn
)]) >> (d_timezone_dig
));
861 bsc::rule
<> d_date_time
= ((d_yyyymmdd_dig
) >> (d_time_dig
)) | (d_yyyymmdd_dig
) | (d_yyyy_dig
);
867 bool datetime_validation()
869 if (yr
>= 1400 && yr
<= 9999 && mo
>= 1 && mo
<= 12 && dy
>= 1 && hr
< 24 && mn
< 60 && sc
< 60 && tz_hour
<= 14 && tz_hour
>= -12 && tz_mn
< 60)
871 if ( (tz_hour
== -12 || tz_hour
== 14) && tz_mn
> 0)
900 if(!(yr
% 4) == 0 && dy
> 28)
904 else if(!(yr
% 100) == 0 && dy
<= 29)
908 else if(!(yr
% 400) == 0 && dy
> 28)
930 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
941 auto iter
= args
->begin();
942 int args_size
= args
->size();
946 throw base_s3select_exception("to_timestamp should have one parameter");
949 base_statement
* str
= *iter
;
953 if (v_str
.type
!= value::value_En_t::STRING
)
955 throw base_s3select_exception("to_timestamp first argument must be string"); //can skip current row
958 bsc::parse_info
<> info_dig
= bsc::parse(v_str
.str(), d_date_time
);
962 if ((char)sign
== '-')
968 if(datetime_validation()==false or !info_dig
.full
)
970 throw base_s3select_exception("input date-time is illegal");
973 boost::posix_time::ptime new_ptime
;
975 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
976 new_ptime
= boost::posix_time::ptime(boost::gregorian::date(yr
, mo
, dy
),
977 boost::posix_time::hours(hr
) +
978 boost::posix_time::minutes(mn
) +
979 boost::posix_time::seconds(sc
) +
980 boost::posix_time::nanoseconds(frac_sec
));
982 new_ptime
= boost::posix_time::ptime(boost::gregorian::date(yr
, mo
, dy
),
983 boost::posix_time::hours(hr
) +
984 boost::posix_time::minutes(mn
) +
985 boost::posix_time::seconds(sc
) +
986 boost::posix_time::microseconds(frac_sec
));
989 tmstmp
= std::make_tuple(new_ptime
, boost::posix_time::time_duration(tz_hour
, tz_min
, 0), (char)tm_zone
== 'Z');
991 result
->set_value(&tmstmp
);
998 struct _fn_to_string_constant
: public base_timestamp_to_string
1000 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1002 param_validation(args
);
1006 prepare_to_string_vector(print_vector
, para
);
1010 std::string result_
= execute_to_string(print_vector
, para
);
1012 result
->set_value(result_
.c_str());
1017 struct _fn_to_string_dynamic
: public base_timestamp_to_string
1019 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1021 param_validation(args
);
1023 print_vector
.clear();
1026 prepare_to_string_vector(print_vector
, para
);
1028 std::string result_
= execute_to_string(print_vector
, para
);
1030 result
->set_value(result_
.c_str());
1035 struct _fn_extract_year_from_timestamp
: public base_date_extract
1037 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1039 param_validation(args
);
1041 result
->set_value( (int64_t)new_ptime
.date().year());
1046 struct _fn_extract_month_from_timestamp
: public base_date_extract
1048 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1050 param_validation(args
);
1052 result
->set_value( (int64_t)new_ptime
.date().month());
1057 struct _fn_extract_day_from_timestamp
: public base_date_extract
1059 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1061 param_validation(args
);
1063 result
->set_value( (int64_t)new_ptime
.date().day());
1068 struct _fn_extract_hour_from_timestamp
: public base_date_extract
1070 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1072 param_validation(args
);
1074 result
->set_value( (int64_t)new_ptime
.time_of_day().hours());
1079 struct _fn_extract_minute_from_timestamp
: public base_date_extract
1081 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1083 param_validation(args
);
1085 result
->set_value( (int64_t)new_ptime
.time_of_day().minutes());
1090 struct _fn_extract_second_from_timestamp
: public base_date_extract
1092 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1094 param_validation(args
);
1096 result
->set_value( (int64_t)new_ptime
.time_of_day().seconds());
1101 struct _fn_extract_week_from_timestamp
: public base_date_extract
1103 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1105 param_validation(args
);
1107 result
->set_value( (int64_t)new_ptime
.date().week_number());
1112 struct _fn_extract_tz_hour_from_timestamp
: public base_date_extract
1114 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1116 param_validation(args
);
1118 result
->set_value((int64_t)td
.hours());
1123 struct _fn_extract_tz_minute_from_timestamp
: public base_date_extract
1125 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1127 param_validation(args
);
1129 result
->set_value((int64_t)td
.minutes());
1134 struct _fn_diff_year_timestamp
: public base_date_diff
1136 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1138 param_validation(args
);
1140 int year1
= ptime1
.date().year();
1141 int year2
= ptime2
.date().year();
1142 boost::posix_time::time_duration time1
= boost::posix_time::time_duration(
1143 ptime1
.time_of_day().hours(), ptime1
.time_of_day().minutes(),
1144 ptime1
.time_of_day().seconds());
1145 boost::posix_time::time_duration time2
= boost::posix_time::time_duration(
1146 ptime2
.time_of_day().hours(), ptime2
.time_of_day().minutes(),
1147 ptime2
.time_of_day().seconds());
1149 if (year2
> year1
&& ((ptime2
.date().day_of_year() < ptime1
.date().day_of_year()) ||
1150 (ptime2
.date().day_of_year() == ptime1
.date().day_of_year() && time2
< time1
)))
1154 else if (year2
< year1
&& ((ptime2
.date().day_of_year() > ptime1
.date().day_of_year()) ||
1155 (ptime2
.date().day_of_year() == ptime1
.date().day_of_year() && time2
> time1
)))
1160 int64_t yr
= year2
- year1
;
1161 result
->set_value( yr
);
1166 struct _fn_diff_month_timestamp
: public base_date_diff
1168 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1170 param_validation(args
);
1172 int year1
= ptime1
.date().year();
1173 int year2
= ptime2
.date().year();
1174 int mon1
= ptime1
.date().month();
1175 int mon2
= ptime2
.date().month();
1176 boost::posix_time::time_duration time1
= boost::posix_time::time_duration(
1177 ptime1
.time_of_day().hours(), ptime1
.time_of_day().minutes(),
1178 ptime1
.time_of_day().seconds());
1179 boost::posix_time::time_duration time2
= boost::posix_time::time_duration(
1180 ptime2
.time_of_day().hours(), ptime2
.time_of_day().minutes(),
1181 ptime2
.time_of_day().seconds());
1185 if (ptime2
.date().day() < ptime1
.date().day() || (ptime2
.date().day() == ptime1
.date().day() && time2
< time1
))
1190 if (ptime2
.date().month() < ptime1
.date().month())
1196 else if (year2
< year1
)
1198 if (ptime2
.date().day() > ptime1
.date().day() || (ptime2
.date().day() == ptime1
.date().day() && time2
> time1
))
1203 if (ptime2
.date().month() > ptime1
.date().month())
1210 int64_t mon_diff
= (year2
- year1
) * 12 + mon2
- mon1
;
1212 result
->set_value(mon_diff
);
1217 struct _fn_diff_day_timestamp
: public base_date_diff
1219 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1221 param_validation(args
);
1223 boost::posix_time::time_duration td_res
= ptime2
- ptime1
;
1224 int total_seconds
= (((td_res
.hours() * 60) + td_res
.minutes()) * 60) + td_res
.seconds();
1225 int64_t days
= total_seconds
/ (24 * 3600);
1227 result
->set_value(days
);
1232 struct _fn_diff_hour_timestamp
: public base_date_diff
1234 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1236 param_validation(args
);
1238 boost::posix_time::time_duration td_res
= ptime2
- ptime1
;
1239 result
->set_value((int64_t)td_res
.hours());
1244 struct _fn_diff_minute_timestamp
: public base_date_diff
1246 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1248 param_validation(args
);
1250 boost::posix_time::time_duration td_res
= ptime2
- ptime1
;
1251 result
->set_value((int64_t)((td_res
.hours() * 60) + td_res
.minutes()));
1256 struct _fn_diff_second_timestamp
: public base_date_diff
1258 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1260 param_validation(args
);
1262 boost::posix_time::time_duration td_res
= ptime2
- ptime1
;
1263 result
->set_value((int64_t)((((td_res
.hours() * 60) + td_res
.minutes()) * 60) + td_res
.seconds()));
1268 struct _fn_add_year_to_timestamp
: public base_date_add
1270 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1272 param_validation(args
);
1274 new_ptime
+= boost::gregorian::years( val_quantity
.i64() );
1275 new_tmstmp
= std::make_tuple(new_ptime
, td
, flag
);
1276 result
->set_value( &new_tmstmp
);
1281 struct _fn_add_month_to_timestamp
: public base_date_add
1283 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1285 param_validation(args
);
1287 int yr
, mn
, dy
, quant
;
1288 quant
= val_quantity
.i64();
1289 dy
= new_ptime
.date().day();
1291 int temp
= quant
% 12;
1292 mn
= new_ptime
.date().month() + temp
;
1294 yr
= new_ptime
.date().year() + temp
;
1319 if ((mn
== 4 || mn
== 6 || mn
== 9 || mn
== 11) && dy
> 30)
1323 else if (mn
== 2 && dy
> 28)
1325 if (!(yr
% 4) == 0 || ((yr
% 100) == 0 && !(yr
% 400) == 0))
1335 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
1336 new_ptime
= boost::posix_time::ptime(boost::gregorian::date(yr
, mn
, dy
),
1337 boost::posix_time::hours(new_ptime
.time_of_day().hours()) +
1338 boost::posix_time::minutes(new_ptime
.time_of_day().minutes()) +
1339 boost::posix_time::seconds(new_ptime
.time_of_day().seconds()) +
1340 boost::posix_time::nanoseconds(new_ptime
.time_of_day().fractional_seconds()));
1342 new_ptime
= boost::posix_time::ptime(boost::gregorian::date(yr
, mn
, dy
),
1343 boost::posix_time::hours(new_ptime
.time_of_day().hours()) +
1344 boost::posix_time::minutes(new_ptime
.time_of_day().minutes()) +
1345 boost::posix_time::seconds(new_ptime
.time_of_day().seconds()) +
1346 boost::posix_time::microseconds(new_ptime
.time_of_day().fractional_seconds()));
1349 new_tmstmp
= std::make_tuple(new_ptime
, td
, flag
);
1350 result
->set_value( &new_tmstmp
);
1355 struct _fn_add_day_to_timestamp
: public base_date_add
1357 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1359 param_validation(args
);
1361 new_ptime
+= boost::gregorian::days( val_quantity
.i64() );
1362 new_tmstmp
= std::make_tuple(new_ptime
, td
, flag
);
1363 result
->set_value( &new_tmstmp
);
1368 struct _fn_add_hour_to_timestamp
: public base_date_add
1370 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1372 param_validation(args
);
1374 new_ptime
+= boost::posix_time::hours( val_quantity
.i64() );
1375 new_tmstmp
= std::make_tuple(new_ptime
, td
, flag
);
1376 result
->set_value( &new_tmstmp
);
1381 struct _fn_add_minute_to_timestamp
: public base_date_add
1383 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1385 param_validation(args
);
1387 new_ptime
+= boost::posix_time::minutes( val_quantity
.i64() );
1388 new_tmstmp
= std::make_tuple(new_ptime
, td
, flag
);
1389 result
->set_value( &new_tmstmp
);
1394 struct _fn_add_second_to_timestamp
: public base_date_add
1396 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1398 param_validation(args
);
1400 new_ptime
+= boost::posix_time::seconds( val_quantity
.i64() );
1401 new_tmstmp
= std::make_tuple(new_ptime
, td
, flag
);
1402 result
->set_value( &new_tmstmp
);
1407 struct _fn_utcnow
: public base_function
1409 timestamp_t now_timestamp
;
1411 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1413 int args_size
= args
->size();
1417 throw base_s3select_exception("utcnow does not expect any parameters");
1420 boost::posix_time::ptime now_ptime
= boost::posix_time::ptime( boost::posix_time::second_clock::universal_time());
1421 now_timestamp
= std::make_tuple(now_ptime
, boost::posix_time::time_duration(0, 0, 0), false);
1422 result
->set_value( &now_timestamp
);
1428 struct _fn_between
: public base_function
1433 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1435 int args_size
= args
->size();
1440 throw base_s3select_exception("between operates on 3 expressions");//TODO FATAL
1443 auto iter
= args
->begin();
1445 base_statement
* second_expr
= *iter
;
1447 base_statement
* first_expr
= *iter
;
1449 base_statement
* main_expr
= *iter
;
1451 value second_expr_val
= second_expr
->eval();
1452 value first_expr_val
= first_expr
->eval();
1453 value main_expr_val
= main_expr
->eval();
1455 if ((second_expr_val
.type
== first_expr_val
.type
&& first_expr_val
.type
== main_expr_val
.type
) || (second_expr_val
.is_number() && first_expr_val
.is_number() && main_expr_val
.is_number()))
1457 if((main_expr_val
>= first_expr_val
) && (main_expr_val
<= second_expr_val
)) {
1458 result
->set_value(true);
1460 result
->set_value(false);
1467 struct _fn_not_between
: public base_function
1471 _fn_between between_op
;
1473 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1475 between_op(args
,result
);
1477 if (result
->get_value().is_true() == 0) {
1478 result
->set_value(true);
1480 result
->set_value(false);
1486 static char s3select_ver
[10]="41.a";
1488 struct _fn_version
: public base_function
1490 value val
; //TODO use git to generate sha1
1491 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1493 val
= &s3select_ver
[0];
1499 struct _fn_isnull
: public base_function
1504 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1506 check_args_size(args
,1);
1508 auto iter
= args
->begin();
1509 base_statement
* expr
= *iter
;
1510 value expr_val
= expr
->eval();
1511 if ( expr_val
.is_null()) {
1512 result
->set_value(true);
1514 result
->set_value(false);
1520 struct _fn_is_not_null
: public base_function
1523 _fn_isnull isnull_op
;
1525 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1528 isnull_op(args
,result
);
1530 if (result
->get_value().is_true() == 0)
1531 result
->set_value(true);
1533 result
->set_value(false);
1539 struct _fn_in
: public base_function
1544 bool operator()(bs_stmt_vec_t
*args
, variable
*result
) override
1546 check_args_size(args
,1);
1548 int args_size
= static_cast<int>(args
->size()-1);
1549 base_statement
*main_expr
= (*args
)[args_size
];
1550 value main_expr_val
= main_expr
->eval();
1552 while (args_size
>=0)
1554 base_statement
*expr
= (*args
)[args_size
];
1555 value expr_val
= expr
->eval();
1557 if ((expr_val
.type
== main_expr_val
.type
) || (expr_val
.is_number() && main_expr_val
.is_number()))
1559 if (expr_val
== main_expr_val
)
1561 result
->set_value(true);
1566 result
->set_value(false);
1571 struct _fn_like
: public base_like
1573 explicit _fn_like(base_statement
* esc
, base_statement
* like_expr
)
1575 auto is_constant
= [&](base_statement
* bs
) {
1576 if (dynamic_cast<variable
*>(bs
) && dynamic_cast<variable
*>(bs
)->m_var_type
== variable::var_t::COLUMN_VALUE
) {
1583 if (is_constant(esc
) && is_constant(like_expr
)) {
1584 constant_state
= true;
1587 if(constant_state
== true)
1589 param_validation(esc
, like_expr
);
1590 std::vector
<char> like_as_regex
= transform(like_expr_val
.str(), *escape_expr_val
.str());
1591 compile(like_as_regex
);
1595 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1597 check_args_size(args
,3);
1599 auto iter
= args
->begin();
1601 base_statement
* escape_expr
= *iter
;
1603 base_statement
* like_expr
= *iter
;
1605 base_statement
* main_expr
= *iter
;
1607 if (constant_state
== false)
1609 param_validation(escape_expr
, like_expr
);
1610 std::vector
<char> like_as_regex
= transform(like_expr_val
.str(), *escape_expr_val
.str());
1611 compile(like_as_regex
);
1614 value main_expr_val
= main_expr
->eval();
1615 if (main_expr_val
.type
!= value::value_En_t::STRING
)
1617 throw base_s3select_exception("main expression must be string");
1620 match(main_expr_val
, result
);
1625 struct _fn_substr
: public base_function
1628 char buff
[4096];// this buffer is persist for the query life time, it use for the results per row(only for the specific function call)
1629 //it prevent from intensive use of malloc/free (fragmentation).
1630 //should validate result length.
1631 //TODO may replace by std::string (dynamic) , or to replace with global allocator , in query scope.
1636 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1638 auto iter
= args
->begin();
1639 int args_size
= args
->size();
1644 throw base_s3select_exception("substr accept 2 arguments or 3");
1647 base_statement
* str
= *iter
;
1649 base_statement
* from
= *iter
;
1657 if (!v_to
.is_number())
1659 throw base_s3select_exception("substr third argument must be number"); //can skip row
1663 v_str
= str
->eval();
1665 if(v_str
.type
!= value::value_En_t::STRING
)
1667 throw base_s3select_exception("substr first argument must be string"); //can skip current row
1670 int str_length
= strlen(v_str
.str());
1672 v_from
= from
->eval();
1673 if(!v_from
.is_number())
1675 throw base_s3select_exception("substr second argument must be number"); //can skip current row
1681 if (v_from
.type
== value::value_En_t::FLOAT
)
1690 if (f
<= 0 && args_size
== 2)
1697 result
->set_value("");
1701 if (str_length
>(int)sizeof(buff
))
1703 throw base_s3select_exception("string too long for internal buffer"); //can skip row
1708 if (v_to
.type
== value::value_En_t::FLOAT
)
1733 if( (str_length
-(f
-1)-t
) <0)
1734 {//in case the requested length is too long, reduce it to exact length.
1735 t
= str_length
-(f
-1);
1738 strncpy(buff
, v_str
.str()+f
-1, t
);
1742 strcpy(buff
, v_str
.str()+f
-1);
1745 result
->set_value(buff
);
1751 struct _fn_charlength
: public base_function
{
1755 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1757 check_args_size(args
,1);
1759 auto iter
= args
->begin();
1760 base_statement
* str
= *iter
;
1761 v_str
= str
->eval();
1762 if(v_str
.type
!= value::value_En_t::STRING
) {
1763 throw base_s3select_exception("content is not string!");
1765 int64_t str_length
= strlen(v_str
.str());
1766 result
->set_value(str_length
);
1772 struct _fn_lower
: public base_function
{
1777 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1779 check_args_size(args
,1);
1781 auto iter
= args
->begin();
1782 base_statement
* str
= *iter
;
1783 v_str
= str
->eval();
1784 if(v_str
.type
!= value::value_En_t::STRING
) {
1785 throw base_s3select_exception("content is not string");
1788 boost::algorithm::to_lower(buff
);
1789 result
->set_value(buff
.c_str());
1795 struct _fn_upper
: public base_function
{
1800 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1802 check_args_size(args
,1);
1804 auto iter
= args
->begin();
1805 base_statement
* str
= *iter
;
1806 v_str
= str
->eval();
1807 if(v_str
.type
!= value::value_En_t::STRING
) {
1808 throw base_s3select_exception("content is not string");
1811 boost::algorithm::to_upper(buff
);
1812 result
->set_value(buff
.c_str());
1818 struct _fn_nullif
: public base_function
{
1823 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1825 auto iter
= args
->begin();
1827 int args_size
= args
->size();
1830 throw base_s3select_exception("nullif accept only 2 arguments");
1832 base_statement
*first
= *iter
;
1835 base_statement
*second
= *iter
;
1837 if (x
.is_null() && y
.is_null())
1847 if (!(x
.is_number() && y
.is_number())) {
1848 if (x
.type
!= y
.type
) {
1862 struct _fn_when_then
: public base_function
{
1866 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1868 check_args_size(args
,2);
1870 auto iter
= args
->begin();
1872 base_statement
* then_expr
= *iter
;
1875 base_statement
* when_expr
= *iter
;
1877 when_value
= when_expr
->eval();
1879 if (when_value
.is_true())//true
1881 *result
= then_expr
->eval();
1891 struct _fn_when_value_then
: public base_function
{
1897 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1899 check_args_size(args
,3);
1901 auto iter
= args
->begin();
1903 base_statement
* then_expr
= *iter
;
1906 base_statement
* when_expr
= *iter
;
1909 base_statement
* case_expr
= *iter
;
1911 when_value
= when_expr
->eval();
1912 case_value
= case_expr
->eval();
1913 then_value
= then_expr
->eval();
1915 if (case_value
== when_value
)
1917 *result
= then_value
;
1926 struct _fn_case_when_else
: public base_function
{
1928 value when_then_value
;
1930 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1932 check_args_size(args
,1);
1934 base_statement
* else_expr
= *(args
->begin());
1936 size_t args_size
= args
->size() -1;
1938 for(int ivec
=args_size
;ivec
>0;ivec
--)
1940 when_then_value
= (*args
)[ivec
]->eval();
1942 if(!when_then_value
.is_null())
1944 *result
= when_then_value
;
1950 *result
= else_expr
->eval();
1955 struct _fn_coalesce
: public base_function
1960 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1962 check_args_size(args
,1);
1964 auto iter_begin
= args
->begin();
1965 int args_size
= args
->size();
1966 while (args_size
>= 1)
1968 base_statement
* expr
= *iter_begin
;
1969 value expr_val
= expr
->eval();
1971 if ( !(expr_val
.is_null())) {
1982 struct _fn_string
: public base_function
1987 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
1989 check_args_size(args
,1);
1991 auto iter
= args
->begin();
1993 base_statement
* expr
= *iter
;
1994 value expr_val
= expr
->eval();
1995 result
->set_value((expr_val
.to_string()));
2000 struct _fn_to_bool
: public base_function
2005 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
2007 check_args_size(args
,1);
2010 func_arg
= (*args
->begin())->eval();
2012 if (func_arg
.type
== value::value_En_t::FLOAT
)
2016 else if (func_arg
.type
== value::value_En_t::DECIMAL
|| func_arg
.type
== value::value_En_t::BOOL
)
2026 result
->set_value(false);
2030 result
->set_value(true);
2036 struct _fn_trim
: public base_function
{
2038 std::string input_string
;
2047 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
2049 check_args_size(args
,1);
2051 auto iter
= args
->begin();
2052 int args_size
= args
->size();
2053 base_statement
* str
= *iter
;
2054 v_input
= str
->eval();
2055 if(v_input
.type
!= value::value_En_t::STRING
) {
2056 throw base_s3select_exception("content is not string");
2058 input_string
= v_input
.str();
2059 if (args_size
== 2) {
2061 base_statement
* next
= *iter
;
2062 v_remove
= next
->eval();
2064 boost::trim_right_if(input_string
,boost::is_any_of(v_remove
.str()));
2065 boost::trim_left_if(input_string
,boost::is_any_of(v_remove
.str()));
2066 result
->set_value(input_string
.c_str());
2071 struct _fn_leading
: public base_function
{
2073 std::string input_string
;
2082 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
2084 check_args_size(args
,1);
2086 auto iter
= args
->begin();
2087 int args_size
= args
->size();
2088 base_statement
* str
= *iter
;
2089 v_input
= str
->eval();
2090 if(v_input
.type
!= value::value_En_t::STRING
) {
2091 throw base_s3select_exception("content is not string");
2093 input_string
= v_input
.str();
2094 if (args_size
== 2) {
2096 base_statement
* next
= *iter
;
2097 v_remove
= next
->eval();
2099 boost::trim_left_if(input_string
,boost::is_any_of(v_remove
.str()));
2100 result
->set_value(input_string
.c_str());
2105 struct _fn_trailing
: public base_function
{
2107 std::string input_string
;
2116 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
2118 check_args_size(args
,1);
2120 auto iter
= args
->begin();
2121 int args_size
= args
->size();
2122 base_statement
* str
= *iter
;
2123 v_input
= str
->eval();
2124 if(v_input
.type
!= value::value_En_t::STRING
) {
2125 throw base_s3select_exception("content is not string");
2127 input_string
= v_input
.str();
2128 if (args_size
== 2) {
2130 base_statement
* next
= *iter
;
2131 v_remove
= next
->eval();
2133 boost::trim_right_if(input_string
,boost::is_any_of(v_remove
.str()));
2134 result
->set_value(input_string
.c_str());
2139 struct _fn_cast_to_decimal
: public base_function
{
2141 int32_t precision
=-1;
2144 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
2146 //cast(expr as decimal(x,y))
2147 check_args_size(args
,2);
2149 base_statement
* expr
= (*args
)[1];
2150 //expr_val should be float or integer
2151 //dynamic value for the decimal operator to get the precision and scale
2153 _fn_to_float to_float
;
2154 bs_stmt_vec_t args_vec
;
2155 args_vec
.push_back(expr
);
2156 to_float(&args_vec
,result
);
2158 if (precision
== -1 || scale
== -1){
2159 base_statement
* decimal_expr
= (*args
)[0];
2160 decimal_expr
->eval().get_precision_scale(&precision
,&scale
);
2163 result
->set_precision_scale(&precision
,&scale
);
2169 struct _fn_decimal_operator
: public base_function
{
2171 int32_t precision
=-1;
2174 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
2176 //decimal(x,y) operator
2177 check_args_size(args
,2);
2179 auto iter
= args
->begin();
2180 base_statement
* expr_precision
= *iter
;
2181 value expr_precision_val
= expr_precision
->eval();
2184 base_statement
* expr_scale
= *iter
;
2185 value expr_scale_val
= expr_scale
->eval();
2187 precision
= expr_precision_val
.i64();
2188 scale
= expr_scale_val
.i64();
2190 result
->set_precision_scale(&precision
,&scale
);
2196 struct _fn_engine_version
: public base_function
{
2198 const char* version_description
=R
"(PR #137 :
2199 the change handle the use cases where the JSON input starts with an anonymous array/object this may cause wrong search result per the user request(SQL statement)
2201 handle the use-case where the user requests a json-key-path that may point to a non-discrete value. i.e. array or an object.
2204 fix for CSV flow, in the case of a "broken row
" (upon processing stream of data)
2206 null results upon aggregation functions on an empty group (no match for where clause).
2210 _fn_engine_version()
2215 bool operator()(bs_stmt_vec_t
* args
, variable
* result
) override
2217 result
->set_value(version_description
);
2222 base_function
* s3select_functions::create(std::string_view fn_name
,const bs_stmt_vec_t
&arguments
)
2224 const FunctionLibrary::const_iterator iter
= m_functions_library
.find(fn_name
.data());
2226 if (iter
== m_functions_library
.end())
2229 msg
= std::string
{fn_name
} + " " + " function not found";
2230 throw base_s3select_exception(msg
, base_s3select_exception::s3select_exp_en_t::FATAL
);
2233 switch (iter
->second
)
2235 case s3select_func_En_t::ADD
:
2236 return S3SELECT_NEW(this,_fn_add
);
2239 case s3select_func_En_t::SUM
:
2240 return S3SELECT_NEW(this,_fn_sum
);
2243 case s3select_func_En_t::COUNT
:
2244 return S3SELECT_NEW(this,_fn_count
);
2247 case s3select_func_En_t::MIN
:
2248 return S3SELECT_NEW(this,_fn_min
);
2251 case s3select_func_En_t::MAX
:
2252 return S3SELECT_NEW(this,_fn_max
);
2255 case s3select_func_En_t::TO_INT
:
2256 return S3SELECT_NEW(this,_fn_to_int
);
2259 case s3select_func_En_t::TO_FLOAT
:
2260 return S3SELECT_NEW(this,_fn_to_float
);
2263 case s3select_func_En_t::SUBSTR
:
2264 return S3SELECT_NEW(this,_fn_substr
);
2267 case s3select_func_En_t::TO_TIMESTAMP
:
2268 return S3SELECT_NEW(this,_fn_to_timestamp
);
2271 case s3select_func_En_t::TO_STRING_CONSTANT
:
2272 return S3SELECT_NEW(this,_fn_to_string_constant
);
2275 case s3select_func_En_t::TO_STRING_DYNAMIC
:
2276 return S3SELECT_NEW(this,_fn_to_string_dynamic
);
2279 case s3select_func_En_t::TO_BOOL
:
2280 return S3SELECT_NEW(this,_fn_to_bool
);
2283 case s3select_func_En_t::EXTRACT_YEAR
:
2284 return S3SELECT_NEW(this,_fn_extract_year_from_timestamp
);
2287 case s3select_func_En_t::EXTRACT_MONTH
:
2288 return S3SELECT_NEW(this,_fn_extract_month_from_timestamp
);
2291 case s3select_func_En_t::EXTRACT_DAY
:
2292 return S3SELECT_NEW(this,_fn_extract_day_from_timestamp
);
2295 case s3select_func_En_t::EXTRACT_HOUR
:
2296 return S3SELECT_NEW(this,_fn_extract_hour_from_timestamp
);
2299 case s3select_func_En_t::EXTRACT_MINUTE
:
2300 return S3SELECT_NEW(this,_fn_extract_minute_from_timestamp
);
2303 case s3select_func_En_t::EXTRACT_SECOND
:
2304 return S3SELECT_NEW(this,_fn_extract_second_from_timestamp
);
2307 case s3select_func_En_t::EXTRACT_WEEK
:
2308 return S3SELECT_NEW(this,_fn_extract_week_from_timestamp
);
2311 case s3select_func_En_t::EXTRACT_TIMEZONE_HOUR
:
2312 return S3SELECT_NEW(this,_fn_extract_tz_hour_from_timestamp
);
2315 case s3select_func_En_t::EXTRACT_TIMEZONE_MINUTE
:
2316 return S3SELECT_NEW(this,_fn_extract_tz_minute_from_timestamp
);
2319 case s3select_func_En_t::DATE_ADD_YEAR
:
2320 return S3SELECT_NEW(this,_fn_add_year_to_timestamp
);
2323 case s3select_func_En_t::DATE_ADD_MONTH
:
2324 return S3SELECT_NEW(this,_fn_add_month_to_timestamp
);
2327 case s3select_func_En_t::DATE_ADD_DAY
:
2328 return S3SELECT_NEW(this,_fn_add_day_to_timestamp
);
2331 case s3select_func_En_t::DATE_ADD_HOUR
:
2332 return S3SELECT_NEW(this,_fn_add_hour_to_timestamp
);
2335 case s3select_func_En_t::DATE_ADD_MINUTE
:
2336 return S3SELECT_NEW(this,_fn_add_minute_to_timestamp
);
2339 case s3select_func_En_t::DATE_ADD_SECOND
:
2340 return S3SELECT_NEW(this,_fn_add_second_to_timestamp
);
2343 case s3select_func_En_t::DATE_DIFF_YEAR
:
2344 return S3SELECT_NEW(this,_fn_diff_year_timestamp
);
2347 case s3select_func_En_t::DATE_DIFF_MONTH
:
2348 return S3SELECT_NEW(this,_fn_diff_month_timestamp
);
2351 case s3select_func_En_t::DATE_DIFF_DAY
:
2352 return S3SELECT_NEW(this,_fn_diff_day_timestamp
);
2355 case s3select_func_En_t::DATE_DIFF_HOUR
:
2356 return S3SELECT_NEW(this,_fn_diff_hour_timestamp
);
2359 case s3select_func_En_t::DATE_DIFF_MINUTE
:
2360 return S3SELECT_NEW(this,_fn_diff_minute_timestamp
);
2363 case s3select_func_En_t::DATE_DIFF_SECOND
:
2364 return S3SELECT_NEW(this,_fn_diff_second_timestamp
);
2367 case s3select_func_En_t::UTCNOW
:
2368 return S3SELECT_NEW(this,_fn_utcnow
);
2371 case s3select_func_En_t::AVG
:
2372 return S3SELECT_NEW(this,_fn_avg
);
2375 case s3select_func_En_t::LOWER
:
2376 return S3SELECT_NEW(this,_fn_lower
);
2379 case s3select_func_En_t::UPPER
:
2380 return S3SELECT_NEW(this,_fn_upper
);
2383 case s3select_func_En_t::LENGTH
:
2384 return S3SELECT_NEW(this,_fn_charlength
);
2387 case s3select_func_En_t::BETWEEN
:
2388 return S3SELECT_NEW(this,_fn_between
);
2391 case s3select_func_En_t::NOT_BETWEEN
:
2392 return S3SELECT_NEW(this,_fn_not_between
);
2395 case s3select_func_En_t::IS_NULL
:
2396 return S3SELECT_NEW(this,_fn_isnull
);
2399 case s3select_func_En_t::IS_NOT_NULL
:
2400 return S3SELECT_NEW(this,_fn_is_not_null
);
2403 case s3select_func_En_t::IN
:
2404 return S3SELECT_NEW(this,_fn_in
);
2407 case s3select_func_En_t::VERSION
:
2408 return S3SELECT_NEW(this,_fn_version
);
2411 case s3select_func_En_t::NULLIF
:
2412 return S3SELECT_NEW(this,_fn_nullif
);
2415 case s3select_func_En_t::LIKE
:
2416 return S3SELECT_NEW(this,_fn_like
,arguments
[0],arguments
[1]);
2419 case s3select_func_En_t::COALESCE
:
2420 return S3SELECT_NEW(this,_fn_coalesce
);
2423 case s3select_func_En_t::WHEN_THEN
:
2424 return S3SELECT_NEW(this,_fn_when_then
);
2427 case s3select_func_En_t::WHEN_VALUE_THEN
:
2428 return S3SELECT_NEW(this,_fn_when_value_then
);
2431 case s3select_func_En_t::CASE_WHEN_ELSE
:
2432 return S3SELECT_NEW(this,_fn_case_when_else
);
2435 case s3select_func_En_t::STRING
:
2436 return S3SELECT_NEW(this,_fn_string
);
2439 case s3select_func_En_t::TRIM
:
2440 return S3SELECT_NEW(this,_fn_trim
);
2443 case s3select_func_En_t::LEADING
:
2444 return S3SELECT_NEW(this,_fn_leading
);
2447 case s3select_func_En_t::TRAILING
:
2448 return S3SELECT_NEW(this,_fn_trailing
);
2451 case s3select_func_En_t::DECIMAL_OPERATOR
:
2452 return S3SELECT_NEW(this,_fn_decimal_operator
);
2455 case s3select_func_En_t::CAST_TO_DECIMAL
:
2456 return S3SELECT_NEW(this,_fn_cast_to_decimal
);
2459 case s3select_func_En_t::ENGINE_VERSION
:
2460 return S3SELECT_NEW(this,_fn_engine_version
);
2464 throw base_s3select_exception("internal error while resolving function-name");
2469 bool base_statement::is_function() const
2471 if (dynamic_cast<__function
*>(const_cast<base_statement
*>(this)))
2481 const base_statement
* base_statement::get_aggregate() const
2483 //search for aggregation function in AST
2484 const base_statement
* res
= 0;
2491 if (left() && (res
=left()->get_aggregate())!=0)
2496 if (right() && (res
=right()->get_aggregate())!=0)
2503 for (auto i
: dynamic_cast<__function
*>(const_cast<base_statement
*>(this))->get_arguments())
2505 const base_statement
* b
=i
->get_aggregate();
2515 bool base_statement::is_column_reference() const
2521 return left()->is_column_reference();
2524 return right()->is_column_reference();
2528 for(auto a
: dynamic_cast<__function
*>(const_cast<base_statement
*>(this))->get_arguments())
2530 if(a
->is_column_reference())
2538 bool base_statement::is_nested_aggregate(bool &aggr_flow
) const
2543 for (auto& i
: dynamic_cast<__function
*>(const_cast<base_statement
*>(this))->get_arguments())
2545 if (i
->get_aggregate() != nullptr)
2552 if(left() && left()->is_nested_aggregate(aggr_flow
))
2555 if(right() && right()->is_nested_aggregate(aggr_flow
))
2560 for (auto& i
: dynamic_cast<__function
*>(const_cast<base_statement
*>(this))->get_arguments())
2562 if (i
->get_aggregate() != nullptr)
2564 return i
->is_nested_aggregate(aggr_flow
);
2572 bool base_statement::is_statement_contain_star_operation() const
2574 if(is_star_operation())
2578 return left()->is_statement_contain_star_operation();
2581 return right()->is_statement_contain_star_operation();
2585 for(auto a
: dynamic_cast<__function
*>(const_cast<base_statement
*>(this))->get_arguments())
2587 if(a
->is_star_operation())
2595 bool base_statement::mark_aggreagtion_subtree_to_execute()
2596 {//purpase:: set aggregation subtree as runnable.
2597 //the function search for aggregation function, and mark its subtree {skip = false}
2599 set_skip_non_aggregate(false);
2602 left()->mark_aggreagtion_subtree_to_execute();
2605 right()->mark_aggreagtion_subtree_to_execute();
2609 for (auto& i
: dynamic_cast<__function
*>(this)->get_arguments())
2611 i
->mark_aggreagtion_subtree_to_execute();
2618 void base_statement::push_for_cleanup(std::set
<base_statement
*>& ast_nodes_to_delete
)//semantic loop on each projection
2620 //placement new is releasing the main-buffer in which all AST nodes
2621 //allocating from it. meaning no calls to destructors.
2622 //the purpose of this routine is to traverse the AST in map all nodes for cleanup.
2623 //the cleanup method will trigger all destructors.
2625 ast_nodes_to_delete
.insert(this);
2628 left()->push_for_cleanup(ast_nodes_to_delete
);
2631 right()->push_for_cleanup(ast_nodes_to_delete
);
2635 for (auto& i
: dynamic_cast<__function
*>(this)->get_arguments())
2637 i
->push_for_cleanup(ast_nodes_to_delete
);
2643 void base_statement::extract_columns(parquet_file_parser::column_pos_t
&cols
,const uint16_t max_columns
)
2644 {// purpose: to extract all column-ids from query
2645 if(is_column()) //column reference or column position
2646 {variable
* v
= dynamic_cast<variable
*>(this);
2647 if(dynamic_cast<variable
*>(this)->m_var_type
== variable::var_t::VARIABLE_NAME
)
2650 if (v
->getScratchArea()->get_column_pos(v
->get_name().c_str())>=0)
2651 {//column belong to schema
2652 cols
.insert( v
->getScratchArea()->get_column_pos(v
->get_name().c_str() ));
2654 if(v
->getAlias()->search_alias(v
->get_name()))
2655 {//column is an alias --> extract columns belong to alias
2656 //TODO cyclic alias to resolve
2657 v
->getAlias()->search_alias(v
->get_name())->extract_columns(cols
,max_columns
);
2659 //column is not alias --> error
2660 std::stringstream ss
;
2661 ss
<< "column " + v
->get_name() + " is not part of schema nor an alias";
2662 throw base_s3select_exception(ss
.str(),base_s3select_exception::s3select_exp_en_t::FATAL
);
2665 }else if(v
->m_var_type
== variable::var_t::STAR_OPERATION
)
2667 for(uint16_t i
=0;i
<max_columns
;i
++)
2673 if (v
->get_column_pos()>=max_columns
)
2675 std::stringstream ss
;
2676 ss
<< "column " + std::to_string( v
->get_column_pos()+1 ) + " exceed max number of columns";
2677 throw base_s3select_exception(ss
.str(),base_s3select_exception::s3select_exp_en_t::FATAL
);
2679 cols
.insert(v
->get_column_pos());//push column positions
2681 }else if(is_function())
2683 __function
* f
= (dynamic_cast<__function
*>(this));
2684 bs_stmt_vec_t args
= f
->get_arguments();
2685 for (auto prm
: args
)
2686 {//traverse function args
2687 prm
->extract_columns(cols
,max_columns
);
2692 //keep traversing down the AST
2694 left()->extract_columns(cols
,max_columns
);
2697 right()->extract_columns(cols
,max_columns
);
2699 #endif //_ARROW_EXIST
2701 } //namespace s3selectEngine