//typedef std::vector<base_statement *> bs_stmt_vec_t; //without specific allocator
//ChunkAllocator, prevent allocation from heap.
-typedef std::vector<base_statement*, ChunkAllocator<base_statement*, 256> > bs_stmt_vec_t;
+typedef std::vector<base_statement*, ChunkAllocator<base_statement*, 4096> > bs_stmt_vec_t;
class base_s3select_exception : public std::exception
{
_msg = n;
}
- virtual const char* what() const _GLIBCXX_NOTHROW
+ virtual const char* what() const noexcept
{
return _msg.c_str();
}
std::string m_str_value;
//std::basic_string<char,std::char_traits<char>,ChunkAllocator<char,256>> m_str_value;
+ int32_t m_precision=-1;
+ int32_t m_scale=-1;
+
public:
enum class value_En_t
{
type = value_En_t::S3NULL;
}
+ void set_precision_scale(int32_t* precision, int32_t* scale)
+ {
+ m_precision = *precision;
+ m_scale = *scale;
+ }
+
+ void get_precision_scale(int32_t* precision, int32_t* scale)
+ {
+ *precision = m_precision;
+ *scale = m_scale;
+ }
+
void set_string_nocopy(char* str)
{//purpose: value does not own the string
__val.str = str;
}
else if(type == value_En_t::FLOAT)
{
- m_to_string = boost::lexical_cast<std::string>(__val.dbl);
+ if(m_precision != -1 && m_scale != -1)
+ {
+ std::stringstream ss;
+ ss << std::fixed << std::setprecision(m_scale) << __val.dbl;
+ m_to_string = ss.str();
+ }
+ else
+ {
+ m_to_string.assign( boost::lexical_cast<std::string>(__val.dbl) );
+ }
}
else if (type == value_En_t::TIMESTAMP)
{
if (lhs.is_nan() || rhs.is_nan())
{
return false;
- }
+ }
+// in the case of NULL on right-side or NULL on left-side, the result is false.
+ if(lhs.is_null() || rhs.is_null())
+ {
+ return false;
+ }
+
throw base_s3select_exception("operands not of the same type(numeric , string), while comparision");
}
bool operator<=(const value& v)
json_star_op_cont_t m_json_star_operation;
scratch_area():m_upper_bound(-1),parquet_type(false),buff_loc(0),max_json_idx(-1)
- {//TODO it should resize dynamicly
+ {
m_schema_values = new std::vector<value>(128,value(nullptr));
}
void update(std::vector<char*>& tokens, size_t num_of_tokens)
{
size_t i=0;
+ //increase the Vector::m_schema_values capacity(it should happen few times)
+ if ((*m_schema_values).capacity() < tokens.size())
+ {
+ (*m_schema_values).resize( tokens.size() * 2 );
+ }
+
for(auto s : tokens)
{
if (i>=num_of_tokens)
}
void get_column_value(uint16_t column_pos, value &v)
- {// TODO handle out of boundaries
+ {
+ if (column_pos > ((*m_schema_values).size()-1))
+ {
+ throw base_s3select_exception("accessing scratch buffer beyond its size");
+ }
+
v = (*m_schema_values)[ column_pos ];
}
value* get_column_value(uint16_t column_pos)
{
+ if (column_pos > ((*m_schema_values).size()-1))
+ {
+ throw base_s3select_exception("accessing scratch buffer beyond its size");
+ }
+
return &(*m_schema_values)[ column_pos ];
}
{
max_json_idx = json_idx;
}
+
+ //increase the Vector::m_schema_values capacity(it should happen few times)
+ if ((*m_schema_values).capacity() < static_cast<unsigned long long>(max_json_idx))
+ {
+ (*m_schema_values).resize(max_json_idx * 2);
+ }
+
(*m_schema_values)[ json_idx ] = v;
if(json_idx>m_upper_bound)
m_upper_bound =0;
buff_loc=0;
+ //increase the Vector::m_schema_values capacity(it should happen few times)
+ if ((*m_schema_values).capacity() < parquet_row_value.size())
+ {
+ (*m_schema_values).resize(parquet_row_value.size() * 2);
+ }
+
+ if (*column_pos_iter > ((*m_schema_values).size()-1))
+ {
+ throw base_s3select_exception("accessing scratch buffer beyond its size");
+ }
+
for(auto v : parquet_row_value)
{
//TODO (parquet_value_t) --> (value) , or better get it as value (i.e. parquet reader know class-value)
value value_na;
//JSON queries has different syntax from other data-sources(Parquet,CSV)
bool m_json_statement;
+ uint64_t number_of_calls = 0;
+ std::string operator_name;
public:
base_statement():m_scratch(nullptr), is_last_call(false), m_is_cache_result(false),
m_projection_alias(nullptr), m_eval_stack_depth(0), m_skip_non_aggregate_op(false),m_json_statement(false) {}
+ void set_operator_name(const char* op)
+ {
+#ifdef S3SELECT_PROF
+ operator_name = op;
+#endif
+ }
+
virtual value& eval()
{
+#ifdef S3SELECT_PROF
+ number_of_calls++;
+#endif
//purpose: on aggregation flow to run only the correct subtree(aggregation subtree)
if (m_skip_non_aggregate_op == false)
return m_eval_stack_depth;
}
- virtual ~base_statement() {}
+ virtual ~base_statement()
+{
+#ifdef S3SELECT_PROF
+std::cout<< operator_name << ":" << number_of_calls <<std::endl;
+#endif
+}
void dtor()
{
const int undefined_column_pos = -1;
const int column_alias = -2;
+ const char* this_operator_name = "variable";
public:
- variable():m_var_type(var_t::NA), _name(""), column_pos(-1), json_variable_idx(-1) {}
+ variable():m_var_type(var_t::NA), _name(""), column_pos(-1), json_variable_idx(-1){set_operator_name(this_operator_name);}
- explicit variable(int64_t i) : m_var_type(var_t::COLUMN_VALUE), column_pos(-1), var_value(i), json_variable_idx(-1) {}
+ explicit variable(int64_t i) : m_var_type(var_t::COLUMN_VALUE), column_pos(-1), var_value(i), json_variable_idx(-1){set_operator_name(this_operator_name);}
- explicit variable(double d) : m_var_type(var_t::COLUMN_VALUE), _name("#"), column_pos(-1), var_value(d), json_variable_idx(-1) {}
+ explicit variable(double d) : m_var_type(var_t::COLUMN_VALUE), _name("#"), column_pos(-1), var_value(d), json_variable_idx(-1){set_operator_name(this_operator_name);}
- explicit variable(int i) : m_var_type(var_t::COLUMN_VALUE), column_pos(-1), var_value(i), json_variable_idx(-1) {}
+ explicit variable(int i) : m_var_type(var_t::COLUMN_VALUE), column_pos(-1), var_value(i), json_variable_idx(-1){set_operator_name(this_operator_name);}
- explicit variable(const std::string& n) : m_var_type(var_t::VARIABLE_NAME), _name(n), column_pos(-1), json_variable_idx(-1) {}
+ explicit variable(const std::string& n) : m_var_type(var_t::VARIABLE_NAME), _name(n), column_pos(-1), json_variable_idx(-1){set_operator_name(this_operator_name);}
explicit variable(const std::string& n, var_t tp, size_t json_idx) : m_var_type(var_t::NA)
{//only upon JSON use case
+ set_operator_name(this_operator_name);
if(tp == variable::var_t::JSON_VARIABLE)
{
m_var_type = variable::var_t::JSON_VARIABLE;
variable(const std::string& n, var_t tp) : m_var_type(var_t::NA)
{
+ set_operator_name(this_operator_name);
if(tp == variable::var_t::POS)
{
_name = n;
explicit variable(s3select_reserved_word::reserve_word_en_t reserve_word)
{
+ set_operator_name(this_operator_name);
if (reserve_word == s3select_reserved_word::reserve_word_en_t::S3S_NULL)
{
m_var_type = variable::var_t::COLUMN_VALUE;
var_value.setnull();
}
+ void set_precision_scale(int32_t* p, int32_t* s)
+ {
+ var_value.set_precision_scale(p, s);
+ }
+
virtual ~variable() {}
virtual bool is_column() const //is reference to column.
{
m_scratch->get_column_value(column_pos,var_value);
//in the case of successive column-delimiter {1,some_data,,3}=> third column is NULL
- if (var_value.is_string() && (var_value.str()== 0 || (var_value.str() && *var_value.str()==0)))
+ if (var_value.is_string() && (var_value.str()== 0 || (var_value.str() && *var_value.str()==0))){
var_value.setnull();//TODO is it correct for Parquet
+ }
}
return var_value;
virtual value& eval_internal()
{
- if ((l->eval()).is_null()) {
+ value l_val = l->eval();
+ value r_val;
+ if (l_val.is_null()) {
var_value.setnull();
return var_value;
- } else if((r->eval()).is_null()) {
+ } else {r_val = r->eval();}
+ if(r_val.is_null()) {
var_value.setnull();
return var_value;
}
switch (_cmp)
{
case cmp_t::EQ:
- return var_value = bool( (l->eval() == r->eval()) ^ negation_result );
+ return var_value = bool( (l_val == r_val) ^ negation_result );
break;
case cmp_t::LE:
- return var_value = bool( (l->eval() <= r->eval()) ^ negation_result );
+ return var_value = bool( (l_val <= r_val) ^ negation_result );
break;
case cmp_t::GE:
- return var_value = bool( (l->eval() >= r->eval()) ^ negation_result );
+ return var_value = bool( (l_val >= r_val) ^ negation_result );
break;
case cmp_t::NE:
- return var_value = bool( (l->eval() != r->eval()) ^ negation_result );
+ return var_value = bool( (l_val != r_val) ^ negation_result );
break;
case cmp_t::GT:
- return var_value = bool( (l->eval() > r->eval()) ^ negation_result );
+ return var_value = bool( (l_val > r_val) ^ negation_result );
break;
case cmp_t::LT:
- return var_value = bool( (l->eval() < r->eval()) ^ negation_result );
+ return var_value = bool( (l_val < r_val) ^ negation_result );
break;
default:
}
}
- arithmetic_operand(base_statement* _l, cmp_t c, base_statement* _r):l(_l), r(_r), _cmp(c),negation_result(false) {}
+ arithmetic_operand(base_statement* _l, cmp_t c, base_statement* _r):l(_l), r(_r), _cmp(c),negation_result(false){set_operator_name("arithmetic_operand");}
explicit arithmetic_operand(base_statement* p)//NOT operator
{
return true;
}
- logical_operand(base_statement* _l, oplog_t _o, base_statement* _r):l(_l), r(_r), _oplog(_o),negation_result(false) {}
+ logical_operand(base_statement* _l, oplog_t _o, base_statement* _r):l(_l), r(_r), _oplog(_o),negation_result(false){set_operator_name("logical_operand");}
explicit logical_operand(base_statement * p)//NOT operator
{
}
}
- mulldiv_operation(base_statement* _l, muldiv_t c, base_statement* _r):l(_l), r(_r), _mulldiv(c) {}
+ mulldiv_operation(base_statement* _l, muldiv_t c, base_statement* _r):l(_l), r(_r), _mulldiv(c){set_operator_name("mulldiv_operation");}
virtual ~mulldiv_operation() {}
};
public:
- explicit negate_function_operation(base_statement *f):function_to_negate(f){}
+ explicit negate_function_operation(base_statement *f):function_to_negate(f){set_operator_name("negate_function_operation");}
virtual std::string print(int ident)
{
//TODO add semantic to base-function , it operate once on function creation
// validate semantic on creation instead on run-time
virtual bool operator()(bs_stmt_vec_t* args, variable* result) = 0;
+ std::string m_function_name;
base_function() : aggregate(false) {}
bool is_aggregate() const
{
this->~base_function();
}
+ void check_args_size(bs_stmt_vec_t* args, uint16_t required, const char* error_msg)
+ {//verify for atleast required parameters
+ if(args->size() < required)
+ {
+ throw base_s3select_exception(error_msg,base_s3select_exception::s3select_exp_en_t::FATAL);
+ }
+ }
+
+ void check_args_size(bs_stmt_vec_t* args,uint16_t required)
+ {
+ if(args->size() < required)
+ {
+ std::string error_msg = m_function_name + " requires for " + std::to_string(required) + " arguments";
+ throw base_s3select_exception(error_msg,base_s3select_exception::s3select_exp_en_t::FATAL);
+ }
+ }
+
+ void set_function_name(const char* name)
+ {
+ m_function_name.assign(name);
+ }
};
class base_date_extract : public base_function