1 #ifndef __S3SELECT_OPER__
2 #define __S3SELECT_OPER__
12 #include <boost/lexical_cast.hpp>
13 #include <boost/date_time/posix_time/posix_time.hpp>
14 #include <boost/bind.hpp>
15 namespace bsc
= BOOST_SPIRIT_CLASSIC_NS
;
17 namespace s3selectEngine
20 class base_s3select_exception
24 enum class s3select_exp_en_t
32 s3select_exp_en_t m_severity
;
36 base_s3select_exception(const char* n
) : m_severity(s3select_exp_en_t::NONE
)
40 base_s3select_exception(const char* n
, s3select_exp_en_t severity
) : m_severity(severity
)
44 base_s3select_exception(std::string n
, s3select_exp_en_t severity
) : m_severity(severity
)
49 virtual const char* what()
54 s3select_exp_en_t
severity()
59 virtual ~base_s3select_exception() {}
63 // pointer to dynamic allocated buffer , which used for placement new.
64 static __thread
char* _s3select_buff_ptr
=0;
66 class s3select_allocator
//s3select is the "owner"
70 std::vector
<char*> list_of_buff
;
74 #define __S3_ALLOCATION_BUFF__ (8*1024)
75 s3select_allocator():m_idx(0)
77 list_of_buff
.push_back((char*)malloc(__S3_ALLOCATION_BUFF__
));
80 void set_global_buff()
82 char* buff
= list_of_buff
.back();
83 _s3select_buff_ptr
= &buff
[ m_idx
];
86 void check_capacity(size_t sz
)
88 if (sz
>__S3_ALLOCATION_BUFF__
)
90 throw base_s3select_exception("requested size too big", base_s3select_exception::s3select_exp_en_t::FATAL
);
93 if ((m_idx
+ sz
) >= __S3_ALLOCATION_BUFF__
)
95 list_of_buff
.push_back((char*)malloc(__S3_ALLOCATION_BUFF__
));
103 m_idx
+= sizeof(char*) - (m_idx
% sizeof(char*)); //alignment
108 //not a must, its for safty.
109 _s3select_buff_ptr
=0;
112 virtual ~s3select_allocator()
114 for(auto b
: list_of_buff
)
121 class __clt_allocator
124 s3select_allocator
* m_s3select_allocator
;
128 __clt_allocator():m_s3select_allocator(0) {}
130 void set(s3select_allocator
* a
)
132 m_s3select_allocator
= a
;
136 // placement new for allocation of all s3select objects on single(or few) buffers, deallocation of those objects is by releasing the buffer.
137 #define S3SELECT_NEW( type , ... ) [=]() \
139 m_s3select_allocator->check_capacity(sizeof( type )); \
140 m_s3select_allocator->set_global_buff(); \
141 auto res=new (_s3select_buff_ptr) type(__VA_ARGS__); \
142 m_s3select_allocator->inc(sizeof( type )); \
143 m_s3select_allocator->zero(); \
151 std::vector
<std::string_view
> m_columns
{128};
154 std::vector
<std::pair
<std::string
, int >> m_column_name_pos
;
158 void set_column_pos(const char* n
, int pos
)//TODO use std::string
160 m_column_name_pos
.push_back( std::pair
<const char*, int>(n
, pos
));
163 void update(std::vector
<char*> tokens
, size_t num_of_tokens
)
168 if (i
>=num_of_tokens
)
179 int get_column_pos(const char* n
)
181 //done only upon building the AST , not on "runtime"
183 std::vector
<std::pair
<std::string
, int >>::iterator iter
;
185 for( auto iter
: m_column_name_pos
)
187 if (!strcmp(iter
.first
.c_str(), n
))
196 std::string_view
get_column_value(int column_pos
)
199 if ((column_pos
>= m_upper_bound
) || column_pos
< 0)
201 throw base_s3select_exception("column_position_is_wrong", base_s3select_exception::s3select_exp_en_t::ERROR
);
204 return m_columns
[column_pos
];
207 int get_num_of_columns()
209 return m_upper_bound
;
213 class base_statement
;
214 class projection_alias
216 //purpose: mapping between alias-name to base_statement*
217 //those routines are *NOT* intensive, works once per query parse time.
220 std::vector
< std::pair
<std::string
, base_statement
*> > alias_map
;
223 std::vector
< std::pair
<std::string
, base_statement
*> >* get()
228 bool insert_new_entry(std::string alias_name
, base_statement
* bs
)
230 //purpose: only unique alias names.
232 for(auto alias
: alias_map
)
234 if(alias
.first
.compare(alias_name
) == 0)
236 return false; //alias name already exist
240 std::pair
<std::string
, base_statement
*> new_alias(alias_name
, bs
);
241 alias_map
.push_back(new_alias
);
246 base_statement
* search_alias(std::string alias_name
)
248 for(auto alias
: alias_map
)
250 if(alias
.first
.compare(alias_name
) == 0)
252 return alias
.second
; //refernce to execution node
261 double operator()(double a
, double b
)
269 double operator()(double a
, double b
)
277 double operator()(double a
, double b
)
285 double operator()(double a
, double b
)
293 double operator()(double a
, double b
)
306 char* str
;//TODO consider string_view
308 boost::posix_time::ptime
* timestamp
;
313 std::string m_to_string
;
314 std::string m_str_value
;
317 enum class value_En_t
327 value(int64_t n
) : type(value_En_t::DECIMAL
)
331 value(int n
) : type(value_En_t::DECIMAL
)
335 value(bool b
) : type(value_En_t::DECIMAL
)
337 __val
.num
= (int64_t)b
;
339 value(double d
) : type(value_En_t::FLOAT
)
343 value(boost::posix_time::ptime
* timestamp
) : type(value_En_t::TIMESTAMP
)
345 __val
.timestamp
= timestamp
;
348 value(const char* s
) : type(value_En_t::STRING
)
350 m_str_value
.assign(s
);
351 __val
.str
= m_str_value
.data();
354 value():type(value_En_t::NA
)
359 bool is_number() const
361 if ((type
== value_En_t::DECIMAL
|| type
== value_En_t::FLOAT
))
369 bool is_string() const
371 return type
== value_En_t::STRING
;
373 bool is_timestamp() const
375 return type
== value_En_t::TIMESTAMP
;
379 std::string
& to_string() //TODO very intensive , must improve this
382 if (type
!= value_En_t::STRING
)
384 if (type
== value_En_t::DECIMAL
)
386 m_to_string
.assign( boost::lexical_cast
<std::string
>(__val
.num
) );
388 else if(type
== value_En_t::FLOAT
)
390 m_to_string
= boost::lexical_cast
<std::string
>(__val
.dbl
);
394 m_to_string
= to_simple_string( *__val
.timestamp
);
399 m_to_string
.assign( __val
.str
);
406 value
& operator=(value
& o
)
408 if(this->type
== value_En_t::STRING
)
410 m_str_value
.assign(o
.str());
411 __val
.str
= m_str_value
.data();
415 this->__val
= o
.__val
;
423 value
& operator=(const char* s
)
425 m_str_value
.assign(s
);
426 this->__val
.str
= m_str_value
.data();
427 this->type
= value_En_t::STRING
;
432 value
& operator=(int64_t i
)
435 this->type
= value_En_t::DECIMAL
;
440 value
& operator=(double d
)
443 this->type
= value_En_t::FLOAT
;
448 value
& operator=(bool b
)
450 this->__val
.num
= (int64_t)b
;
451 this->type
= value_En_t::DECIMAL
;
456 value
& operator=(boost::posix_time::ptime
* p
)
458 this->__val
.timestamp
= p
;
459 this->type
= value_En_t::TIMESTAMP
;
479 boost::posix_time::ptime
* timestamp() const
481 return __val
.timestamp
;
484 bool operator<(const value
& v
)//basic compare operator , most itensive runtime operation
487 if (is_string() && v
.is_string())
489 return strcmp(__val
.str
, v
.__val
.str
) < 0;
492 if (is_number() && v
.is_number())
495 if(type
!= v
.type
) //conversion //TODO find better way
497 if (type
== value_En_t::DECIMAL
)
499 return (double)__val
.num
< v
.__val
.dbl
;
503 return __val
.dbl
< (double)v
.__val
.num
;
508 if(type
== value_En_t::DECIMAL
)
510 return __val
.num
< v
.__val
.num
;
514 return __val
.dbl
< v
.__val
.dbl
;
520 if(is_timestamp() && v
.is_timestamp())
522 return *timestamp() < *(v
.timestamp());
525 throw base_s3select_exception("operands not of the same type(numeric , string), while comparision");
528 bool operator>(const value
& v
) //basic compare operator , most itensive runtime operation
531 if (is_string() && v
.is_string())
533 return strcmp(__val
.str
, v
.__val
.str
) > 0;
536 if (is_number() && v
.is_number())
539 if(type
!= v
.type
) //conversion //TODO find better way
541 if (type
== value_En_t::DECIMAL
)
543 return (double)__val
.num
> v
.__val
.dbl
;
547 return __val
.dbl
> (double)v
.__val
.num
;
552 if(type
== value_En_t::DECIMAL
)
554 return __val
.num
> v
.__val
.num
;
558 return __val
.dbl
> v
.__val
.dbl
;
564 if(is_timestamp() && v
.is_timestamp())
566 return *timestamp() > *(v
.timestamp());
569 throw base_s3select_exception("operands not of the same type(numeric , string), while comparision");
572 bool operator==(const value
& v
) //basic compare operator , most itensive runtime operation
575 if (is_string() && v
.is_string())
577 return strcmp(__val
.str
, v
.__val
.str
) == 0;
581 if (is_number() && v
.is_number())
584 if(type
!= v
.type
) //conversion //TODO find better way
586 if (type
== value_En_t::DECIMAL
)
588 return (double)__val
.num
== v
.__val
.dbl
;
592 return __val
.dbl
== (double)v
.__val
.num
;
597 if(type
== value_En_t::DECIMAL
)
599 return __val
.num
== v
.__val
.num
;
603 return __val
.dbl
== v
.__val
.dbl
;
609 if(is_timestamp() && v
.is_timestamp())
611 return *timestamp() == *(v
.timestamp());
614 throw base_s3select_exception("operands not of the same type(numeric , string), while comparision");
616 bool operator<=(const value
& v
)
620 bool operator>=(const value
& v
)
624 bool operator!=(const value
& v
)
626 return !(*this == v
);
629 template<typename binop
> //conversion rules for arithmetical binary operations
630 value
& compute(value
& l
, const value
& r
) //left should be this, it contain the result
634 if (l
.is_string() || r
.is_string())
636 throw base_s3select_exception("illegal binary operation with string");
639 if (l
.type
!= r
.type
)
643 if (l
.type
== value_En_t::DECIMAL
)
645 l
.__val
.dbl
= __op((double)l
.__val
.num
, r
.__val
.dbl
);
646 l
.type
= value_En_t::FLOAT
;
650 l
.__val
.dbl
= __op(l
.__val
.dbl
, (double)r
.__val
.num
);
651 l
.type
= value_En_t::FLOAT
;
658 if (l
.type
== value_En_t::DECIMAL
)
660 l
.__val
.num
= __op(l
.__val
.num
, r
.__val
.num
);
661 l
.type
= value_En_t::DECIMAL
;
665 l
.__val
.dbl
= __op(l
.__val
.dbl
, r
.__val
.dbl
);
666 l
.type
= value_En_t::FLOAT
;
673 value
& operator+(const value
& v
)
675 return compute
<binop_plus
>(*this, v
);
678 value
& operator-(const value
& v
)
680 return compute
<binop_minus
>(*this, v
);
683 value
& operator*(const value
& v
)
685 return compute
<binop_mult
>(*this, v
);
688 value
& operator/(const value
& v
) // TODO handle division by zero
690 return compute
<binop_div
>(*this, v
);
693 value
& operator^(const value
& v
)
695 return compute
<binop_pow
>(*this, v
);
705 scratch_area
* m_scratch
;
706 projection_alias
* m_aliases
;
707 bool is_last_call
; //valid only for aggregation functions
708 bool m_is_cache_result
;
709 value m_alias_result
;
710 base_statement
* m_projection_alias
;
711 int m_eval_stack_depth
;
714 base_statement():m_scratch(0), is_last_call(false), m_is_cache_result(false), m_projection_alias(0), m_eval_stack_depth(0) {}
715 virtual value
& eval() =0;
716 virtual base_statement
* left()
720 virtual base_statement
* right()
724 virtual std::string
print(int ident
) =0;//TODO complete it, one option to use level parametr in interface ,
725 virtual bool semantic() =0;//done once , post syntax , traverse all nodes and validate semantics.
727 virtual void traverse_and_apply(scratch_area
* sa
, projection_alias
* pa
)
733 left()->traverse_and_apply(m_scratch
, m_aliases
);
737 right()->traverse_and_apply(m_scratch
, m_aliases
);
741 virtual bool is_aggregate()
745 virtual bool is_column()
751 bool is_aggregate_exist_in_expression(base_statement
* e
);//TODO obsolete ?
752 base_statement
* get_aggregate();
753 bool is_nested_aggregate(base_statement
* e
);
754 bool is_binop_aggregate_and_column(base_statement
* skip
);
756 virtual void set_last_call()
761 left()->set_last_call();
765 right()->set_last_call();
769 bool is_set_last_call()
774 void invalidate_cache_result()
776 m_is_cache_result
= false;
779 bool is_result_cached()
781 return m_is_cache_result
== true;
784 void set_result_cache(value
& eval_result
)
786 m_alias_result
= eval_result
;
787 m_is_cache_result
= true;
790 void dec_call_stack_depth()
792 m_eval_stack_depth
--;
795 value
& get_result_cache()
797 return m_alias_result
;
800 int& get_eval_call_depth()
802 m_eval_stack_depth
++;
803 return m_eval_stack_depth
;
806 virtual ~base_statement() {}
810 class variable
: public base_statement
818 VAR
,//schema column (i.e. age , price , ...)
819 COL_VALUE
, //concrete value
820 POS
, // CSV column number (i.e. _1 , _2 ... )
821 STAR_OPERATION
, //'*'
830 std::string m_star_op_result
;
831 char m_star_op_result_charc
[4096]; //TODO should be dynamic
833 const int undefined_column_pos
= -1;
834 const int column_alias
= -2;
837 variable():m_var_type(var_t::NA
), _name(""), column_pos(-1) {}
839 variable(int64_t i
) : m_var_type(var_t::COL_VALUE
), column_pos(-1), var_value(i
) {}
841 variable(double d
) : m_var_type(var_t::COL_VALUE
), _name("#"), column_pos(-1), var_value(d
) {}
843 variable(int i
) : m_var_type(var_t::COL_VALUE
), column_pos(-1), var_value(i
) {}
845 variable(const std::string
& n
) : m_var_type(var_t::VAR
), _name(n
), column_pos(-1) {}
847 variable(const std::string
& n
, var_t tp
) : m_var_type(var_t::NA
)
849 if(tp
== variable::var_t::POS
)
853 int pos
= atoi( n
.c_str() + 1 ); //TODO >0 < (schema definition , semantic analysis)
854 column_pos
= pos
-1;// _1 is the first column ( zero position )
856 else if (tp
== variable::var_t::COL_VALUE
)
861 var_value
= n
.c_str();
863 else if (tp
==variable::var_t::STAR_OPERATION
)
871 void operator=(value
& v
)
876 void set_value(const char* s
)
881 void set_value(double d
)
886 void set_value(int64_t i
)
891 void set_value(boost::posix_time::ptime
* p
)
896 virtual ~variable() {}
898 virtual bool is_column() //is reference to column.
900 if(m_var_type
== var_t::VAR
|| m_var_type
== var_t::POS
)
909 return var_value
; //TODO is it correct
911 virtual value::value_En_t
get_value_type()
913 return var_value
.type
;
917 value
& star_operation() //purpose return content of all columns in a input stream
923 int num_of_columns
= m_scratch
->get_num_of_columns();
924 for(i
=0; i
<num_of_columns
-1; i
++)
926 size_t len
= m_scratch
->get_column_value(i
).size();
927 if((pos
+len
)>sizeof(m_star_op_result_charc
))
929 throw base_s3select_exception("result line too long", base_s3select_exception::s3select_exp_en_t::FATAL
);
932 memcpy(&m_star_op_result_charc
[pos
], m_scratch
->get_column_value(i
).data(), len
);
934 m_star_op_result_charc
[ pos
] = ',';//TODO need for another abstraction (per file type)
939 size_t len
= m_scratch
->get_column_value(i
).size();
940 if((pos
+len
)>sizeof(m_star_op_result_charc
))
942 throw base_s3select_exception("result line too long", base_s3select_exception::s3select_exp_en_t::FATAL
);
945 memcpy(&m_star_op_result_charc
[pos
], m_scratch
->get_column_value(i
).data(), len
);
946 m_star_op_result_charc
[ pos
+ len
] = 0;
947 var_value
= (char*)&m_star_op_result_charc
[0];
951 virtual value
& eval()
953 if (m_var_type
== var_t::COL_VALUE
)
955 return var_value
; // a literal,could be deciml / float / string
957 else if(m_var_type
== var_t::STAR_OPERATION
)
959 return star_operation();
961 else if (column_pos
== undefined_column_pos
)
963 //done once , for the first time
964 column_pos
= m_scratch
->get_column_pos(_name
.c_str());
966 if(column_pos
>=0 && m_aliases
->search_alias(_name
.c_str()))
968 throw base_s3select_exception(std::string("multiple definition of column {") + _name
+ "} as schema-column and alias", base_s3select_exception::s3select_exp_en_t::FATAL
);
972 if (column_pos
== undefined_column_pos
)
974 //not belong to schema , should exist in aliases
975 m_projection_alias
= m_aliases
->search_alias(_name
.c_str());
977 //not enter this scope again
978 column_pos
= column_alias
;
979 if(m_projection_alias
== 0)
981 throw base_s3select_exception(std::string("alias {")+_name
+std::string("} or column not exist in schema"), base_s3select_exception::s3select_exp_en_t::FATAL
);
987 if (m_projection_alias
)
989 if (m_projection_alias
->get_eval_call_depth()>2)
991 throw base_s3select_exception("number of calls exceed maximum size, probably a cyclic reference to alias", base_s3select_exception::s3select_exp_en_t::FATAL
);
994 if (m_projection_alias
->is_result_cached() == false)
996 var_value
= m_projection_alias
->eval();
997 m_projection_alias
->set_result_cache(var_value
);
1001 var_value
= m_projection_alias
->get_result_cache();
1004 m_projection_alias
->dec_call_stack_depth();
1008 var_value
= (char*)m_scratch
->get_column_value(column_pos
).data(); //no allocation. returning pointer of allocated space
1014 virtual std::string
print(int ident
)
1016 //std::string out = std::string(ident,' ') + std::string("var:") + std::to_string(var_value.__val.num);
1018 return std::string("#");//TBD
1021 virtual bool semantic()
1028 class arithmetic_operand
: public base_statement
1033 enum class cmp_t
{NA
, EQ
, LE
, LT
, GT
, GE
, NE
} ;
1044 virtual bool semantic()
1049 virtual base_statement
* left()
1053 virtual base_statement
* right()
1058 virtual std::string
print(int ident
)
1060 //std::string out = std::string(ident,' ') + "compare:" += std::to_string(_cmp) + "\n" + l->print(ident-5) +r->print(ident+5);
1062 return std::string("#");//TBD
1065 virtual value
& eval()
1071 return var_value
= (l
->eval() == r
->eval());
1075 return var_value
= (l
->eval() <= r
->eval());
1079 return var_value
= (l
->eval() >= r
->eval());
1083 return var_value
= (l
->eval() != r
->eval());
1087 return var_value
= (l
->eval() > r
->eval());
1091 return var_value
= (l
->eval() < r
->eval());
1095 throw base_s3select_exception("internal error");
1100 arithmetic_operand(base_statement
* _l
, cmp_t c
, base_statement
* _r
):l(_l
), r(_r
), _cmp(c
) {}
1102 virtual ~arithmetic_operand() {}
1105 class logical_operand
: public base_statement
1110 enum class oplog_t
{AND
, OR
, NA
};
1121 virtual base_statement
* left()
1125 virtual base_statement
* right()
1130 virtual bool semantic()
1135 logical_operand(base_statement
* _l
, oplog_t _o
, base_statement
* _r
):l(_l
), r(_r
), _oplog(_o
) {}
1137 virtual ~logical_operand() {}
1139 virtual std::string
print(int ident
)
1141 //std::string out = std::string(ident, ' ') + "logical_operand:" += std::to_string(_oplog) + "\n" + l->print(ident - 5) + r->print(ident + 5);
1143 return std::string("#");//TBD
1145 virtual value
& eval()
1147 if (_oplog
== oplog_t::AND
)
1151 throw base_s3select_exception("missing operand for logical and", base_s3select_exception::s3select_exp_en_t::FATAL
);
1153 return var_value
= (l
->eval().i64() && r
->eval().i64());
1159 throw base_s3select_exception("missing operand for logical or", base_s3select_exception::s3select_exp_en_t::FATAL
);
1161 return var_value
= (l
->eval().i64() || r
->eval().i64());
1167 class mulldiv_operation
: public base_statement
1172 enum class muldiv_t
{NA
, MULL
, DIV
, POW
} ;
1183 virtual base_statement
* left()
1187 virtual base_statement
* right()
1192 virtual bool semantic()
1197 virtual std::string
print(int ident
)
1199 //std::string out = std::string(ident, ' ') + "mulldiv_operation:" += std::to_string(_mulldiv) + "\n" + l->print(ident - 5) + r->print(ident + 5);
1201 return std::string("#");//TBD
1204 virtual value
& eval()
1208 case muldiv_t::MULL
:
1209 return var_value
= l
->eval() * r
->eval();
1213 return var_value
= l
->eval() / r
->eval();
1217 return var_value
= l
->eval() ^ r
->eval();
1221 throw base_s3select_exception("internal error");
1226 mulldiv_operation(base_statement
* _l
, muldiv_t c
, base_statement
* _r
):l(_l
), r(_r
), _mulldiv(c
) {}
1228 virtual ~mulldiv_operation() {}
1231 class addsub_operation
: public base_statement
1236 enum class addsub_op_t
{ADD
, SUB
, NA
};
1247 virtual base_statement
* left()
1251 virtual base_statement
* right()
1256 virtual bool semantic()
1261 addsub_operation(base_statement
* _l
, addsub_op_t _o
, base_statement
* _r
):l(_l
), r(_r
), _op(_o
) {}
1263 virtual ~addsub_operation() {}
1265 virtual std::string
print(int ident
)
1267 //std::string out = std::string(ident, ' ') + "addsub_operation:" += std::to_string(_op) + "\n" + l->print(ident - 5) + r->print(ident + 5);
1268 return std::string("#");//TBD
1271 virtual value
& eval()
1273 if (_op
== addsub_op_t::NA
) // -num , +num , unary-operation on number
1277 return var_value
= l
->eval();
1281 return var_value
= r
->eval();
1284 else if (_op
== addsub_op_t::ADD
)
1286 return var_value
= (l
->eval() + r
->eval());
1290 return var_value
= (l
->eval() - r
->eval());
1304 //TODO add semantic to base-function , it operate once on function creation
1305 // validate semantic on creation instead on run-time
1306 virtual bool operator()(std::vector
<base_statement
*>* args
, variable
* result
) = 0;
1307 base_function() : aggregate(false) {}
1310 return aggregate
== true;
1312 virtual void get_aggregate_result(variable
*) {}
1314 virtual ~base_function() {}