]> git.proxmox.com Git - ceph.git/blob - ceph/src/s3select/test/s3select_test.cpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / s3select / test / s3select_test.cpp
1 /*
2 * /usr/include/boost/bind.hpp:36:1: note: ‘#pragma message: The practice of declaring the Bind placeholders (_1, _2, ...) in the global namespace is deprecated. Please use <boost/bind/bind.hpp> + using namespace boost::placeholders, or define BOOST_BIND_GLOBAL_PLACEHOLDERS to retain the current behavior.’
3 */
4 #define BOOST_BIND_GLOBAL_PLACEHOLDERS
5
6 #include "s3select.h"
7 #include "gtest/gtest.h"
8 #include <string>
9 #include <fstream>
10 #include <iomanip>
11 #include <algorithm>
12 #include "boost/date_time/gregorian/gregorian.hpp"
13 #include "boost/date_time/posix_time/posix_time.hpp"
14
15 using namespace s3selectEngine;
16
17 // parquet conversion
18 // ============================================================ //
19 #include <cassert>
20 #include <fstream>
21 #include <iostream>
22 #include <memory>
23
24 #ifdef _ARROW_EXIST
25
26 #include <arrow/io/file.h>
27 #include <arrow/util/logging.h>
28
29 #include <parquet/api/reader.h>
30 #include <parquet/api/writer.h>
31
32 using parquet::ConvertedType;
33 using parquet::Repetition;
34 using parquet::Type;
35 using parquet::schema::GroupNode;
36 using parquet::schema::PrimitiveNode;
37
38 #endif
39
40 constexpr int NUM_ROWS = 100000;
41 constexpr int64_t ROW_GROUP_SIZE = 1024 * 1024;
42 const char PARQUET_FILENAME[] = "/tmp/csv_converted.parquet";
43
44 class tokenize {
45
46 public:
47 const char *s;
48 std::string input;
49 const char *p;
50 bool last_token;
51
52 tokenize(std::string& in):s(0),input(in),p(input.c_str()),last_token(false)
53 {
54 };
55
56 void get_token(std::string& token)
57 {
58 if(!*p)
59 {
60 token = "";
61 last_token = true;
62 return;
63 }
64
65 s=p;
66 while(*p && *p != ',' && *p != '\n') p++;
67
68 token = std::string(s,p);
69 p++;
70 }
71
72 bool is_last()
73 {
74 return last_token == true;
75 }
76 };
77
78 #ifdef _ARROW_EXIST
79
80 static std::shared_ptr<GroupNode> column_string_2(uint32_t num_of_columns) {
81
82 parquet::schema::NodeVector fields;
83
84 for(uint32_t i=0;i<num_of_columns;i++)
85 {
86 std::string column_name = "column_" + std::to_string(i) ;
87 fields.push_back(PrimitiveNode::Make(column_name, Repetition::OPTIONAL, Type::BYTE_ARRAY,
88 ConvertedType::NONE));
89 }
90
91 return std::static_pointer_cast<GroupNode>(
92 GroupNode::Make("schema", Repetition::REQUIRED, fields));
93 }
94
95 int csv_to_parquet(std::string & csv_object)
96 {
97
98 auto csv_num_of_columns = std::count( csv_object.begin(),csv_object.begin() + csv_object.find('\n'),',')+1;
99 auto csv_num_of_rows = std::count(csv_object.begin(),csv_object.end(),'\n');
100
101 tokenize csv_tokens(csv_object);
102
103 try {
104 // Create a local file output stream instance.
105
106 using FileClass = ::arrow::io::FileOutputStream;
107 std::shared_ptr<FileClass> out_file;
108 PARQUET_ASSIGN_OR_THROW(out_file, FileClass::Open(PARQUET_FILENAME));
109
110 // Setup the parquet schema
111 std::shared_ptr<GroupNode> schema = column_string_2(csv_num_of_columns);
112
113 // Add writer properties
114 parquet::WriterProperties::Builder builder;
115 // builder.compression(parquet::Compression::SNAPPY);
116 std::shared_ptr<parquet::WriterProperties> props = builder.build();
117
118 // Create a ParquetFileWriter instance
119 std::shared_ptr<parquet::ParquetFileWriter> file_writer =
120 parquet::ParquetFileWriter::Open(out_file, schema, props);
121
122 // Append a BufferedRowGroup to keep the RowGroup open until a certain size
123 parquet::RowGroupWriter* rg_writer = file_writer->AppendBufferedRowGroup();
124
125 int num_columns = file_writer->num_columns();
126 std::vector<int64_t> buffered_values_estimate(num_columns, 0);
127
128 for (int i = 0; !csv_tokens.is_last() && i<csv_num_of_rows; i++) {
129 int64_t estimated_bytes = 0;
130 // Get the estimated size of the values that are not written to a page yet
131 for (int n = 0; n < num_columns; n++) {
132 estimated_bytes += buffered_values_estimate[n];
133 }
134
135 // We need to consider the compressed pages
136 // as well as the values that are not compressed yet
137 if ((rg_writer->total_bytes_written() + rg_writer->total_compressed_bytes() +
138 estimated_bytes) > ROW_GROUP_SIZE) {
139 rg_writer->Close();
140 std::fill(buffered_values_estimate.begin(), buffered_values_estimate.end(), 0);
141 rg_writer = file_writer->AppendBufferedRowGroup();
142 }
143
144
145 int col_id;
146 for(col_id=0;col_id<num_columns && !csv_tokens.is_last();col_id++)
147 {
148 // Write the byte-array column
149 parquet::ByteArrayWriter* ba_writer =
150 static_cast<parquet::ByteArrayWriter*>(rg_writer->column(col_id));
151 parquet::ByteArray ba_value;
152
153 std::string token;
154 csv_tokens.get_token(token);
155 if(token.size() == 0)
156 {//null column
157 int16_t definition_level = 0;
158 ba_writer->WriteBatch(1, &definition_level, nullptr, nullptr);
159 }
160 else
161 {
162 int16_t definition_level = 1;
163 ba_value.ptr = (uint8_t*)(token.data());
164 ba_value.len = token.size();
165 ba_writer->WriteBatch(1, &definition_level, nullptr, &ba_value);
166 }
167
168 buffered_values_estimate[col_id] = ba_writer->EstimatedBufferedValueBytes();
169
170
171 } //end-for columns
172
173 if(csv_tokens.is_last() && col_id<num_columns)
174 {
175 for(;col_id<num_columns;col_id++)
176 {
177 parquet::ByteArrayWriter* ba_writer =
178 static_cast<parquet::ByteArrayWriter*>(rg_writer->column(col_id));
179
180 int16_t definition_level = 0;
181 ba_writer->WriteBatch(1, &definition_level, nullptr, nullptr);
182
183 buffered_values_estimate[col_id] = ba_writer->EstimatedBufferedValueBytes();
184 }
185
186 }
187
188 } // end-for rows
189
190 // Close the RowGroupWriter
191 rg_writer->Close();
192 // Close the ParquetFileWriter
193 file_writer->Close();
194
195 // Write the bytes to file
196 DCHECK(out_file->Close().ok());
197
198 } catch (const std::exception& e) {
199 std::cerr << "Parquet write error: " << e.what() << std::endl;
200 return -1;
201 }
202
203 return 0;
204 }
205
206 int run_query_on_parquet_file(const char* input_query, const char* input_file, std::string &result)
207 {
208 int status;
209 s3select s3select_syntax;
210 result.clear();
211
212 status = s3select_syntax.parse_query(input_query);
213 if (status != 0)
214 {
215 std::cout << "failed to parse query " << s3select_syntax.get_error_description() << std::endl;
216 return -1;
217 }
218
219 FILE *fp;
220
221 fp=fopen(input_file,"r");
222
223 if(!fp){
224 std::cout << "can not open " << input_file << std::endl;
225 return -1;
226 }
227
228 std::function<int(void)> fp_get_size=[&]()
229 {
230 struct stat l_buf;
231 lstat(input_file,&l_buf);
232 return l_buf.st_size;
233 };
234
235 std::function<size_t(int64_t,int64_t,void*,optional_yield*)> fp_range_req=[&](int64_t start,int64_t length,void *buff,optional_yield*y)
236 {
237 fseek(fp,start,SEEK_SET);
238 fread(buff, length, 1, fp);
239 return length;
240 };
241
242 rgw_s3select_api rgw;
243 rgw.set_get_size_api(fp_get_size);
244 rgw.set_range_req_api(fp_range_req);
245
246 std::function<int(std::string&)> fp_s3select_result_format = [](std::string& result){return 0;};//append
247 std::function<int(std::string&)> fp_s3select_header_format = [](std::string& result){return 0;};//append
248
249 parquet_object parquet_processor(input_file,&s3select_syntax,&rgw);
250
251 //std::string result;
252
253 do
254 {
255 try
256 {
257 status = parquet_processor.run_s3select_on_object(result,fp_s3select_result_format,fp_s3select_header_format);
258 }
259 catch (base_s3select_exception &e)
260 {
261 if (e.severity() == base_s3select_exception::s3select_exp_en_t::FATAL) //abort query execution
262 {
263 return -1;
264 }
265 }
266
267 if (status < 0)
268 break;
269
270 } while (0);
271
272 return 0;
273 }// ============================================================ //
274 #else
275 int run_query_on_parquet_file(const char* input_query, const char* input_file, std::string &result)
276 {
277 return 0;
278 }
279 #endif //_ARROW_EXIST
280
281
282 std::string run_expression_in_C_prog(const char* expression)
283 {
284 //purpose: per use-case a c-file is generated, compiles , and finally executed.
285
286 // side note: its possible to do the following: cat test_hello.c | gcc -pipe -x c - -o /dev/stdout > ./1
287 // gcc can read and write from/to pipe (use pipe2()) i.e. not using file-system , BUT should also run gcc-output from memory
288
289 const int C_FILE_SIZE=(1024*1024);
290 std::string c_test_file = std::string("/tmp/test_s3.c");
291 std::string c_run_file = std::string("/tmp/s3test");
292
293 FILE* fp_c_file = fopen(c_test_file.c_str(), "w");
294
295 //contain return result
296 char result_buff[100];
297
298 char* prog_c = 0;
299
300 if(fp_c_file)
301 {
302 prog_c = (char*)malloc(C_FILE_SIZE);
303
304 size_t sz=sprintf(prog_c,"#include <stdio.h>\n \
305 #include <float.h>\n \
306 int main() \
307 {\
308 printf(\"%%.*e\\n\",DECIMAL_DIG,(double)(%s));\
309 } ", expression);
310
311 fwrite(prog_c, 1, sz, fp_c_file);
312 fclose(fp_c_file);
313 }
314
315 std::string gcc_and_run_cmd = std::string("gcc ") + c_test_file + " -o " + c_run_file + " -Wall && " + c_run_file;
316
317 FILE* fp_build = popen(gcc_and_run_cmd.c_str(), "r"); //TODO read stderr from pipe
318
319 if(!fp_build)
320 {
321 if(prog_c)
322 free(prog_c);
323
324 return std::string("#ERROR#");
325 }
326
327 fgets(result_buff, sizeof(result_buff), fp_build);
328
329 unlink(c_run_file.c_str());
330 unlink(c_test_file.c_str());
331 fclose(fp_build);
332
333 if(prog_c)
334 free(prog_c);
335
336 return std::string(result_buff);
337 }
338
339 #define OPER oper[ rand() % oper.size() ]
340
341 class gen_expr
342 {
343
344 private:
345
346 int open = 0;
347 std::string oper= {"+-+*/*"};
348
349 std::string gexpr()
350 {
351 return std::to_string(rand() % 1000) + ".0" + OPER + std::to_string(rand() % 1000) + ".0";
352 }
353
354 std::string g_openp()
355 {
356 if ((rand() % 3) == 0)
357 {
358 open++;
359 return std::string("(");
360 }
361 return std::string("");
362 }
363
364 std::string g_closep()
365 {
366 if ((rand() % 2) == 0 && open > 0)
367 {
368 open--;
369 return std::string(")");
370 }
371 return std::string("");
372 }
373
374 public:
375
376 std::string generate()
377 {
378 std::string exp = "";
379 open = 0;
380
381 for (int i = 0; i < 10; i++)
382 {
383 exp = (exp.size() > 0 ? exp + OPER : std::string("")) + g_openp() + gexpr() + OPER + gexpr() + g_closep();
384 }
385
386 if (open)
387 for (; open--;)
388 {
389 exp += ")";
390 }
391
392 return exp;
393 }
394 };
395
396 const std::string failure_sign("#failure#");
397
398 std::string string_to_quot(std::string& s, char quot = '"')
399 {
400 std::string result = "";
401 std::stringstream str_strm;
402 str_strm << s;
403 std::string temp_str;
404 int temp_int;
405 while(!str_strm.eof()) {
406 str_strm >> temp_str;
407 if(std::stringstream(temp_str) >> temp_int) {
408 std::stringstream s1;
409 s1 << temp_int;
410 result += quot + s1.str() + quot + "\n";
411 }
412 temp_str = "";
413 }
414 return result;
415 }
416
417 void parquet_csv_report_error(std::string a, std::string b)
418 {
419 #ifdef _ARROW_EXIST
420 ASSERT_EQ(a,b);
421 #else
422 ASSERT_EQ(0,0);
423 #endif
424 }
425
426 std::string run_s3select(std::string expression)
427 {//purpose: run query on single row and return result(single projections).
428 s3select s3select_syntax;
429
430 int status = s3select_syntax.parse_query(expression.c_str());
431
432 if(status)
433 return failure_sign;
434
435 std::string s3select_result;
436 s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
437 std::string in = "1,1,1,1\n";
438 std::string csv_obj = in;
439 std::string parquet_result;
440
441 s3_csv_object.run_s3select_on_object(s3select_result, in.c_str(), in.size(), false, false, true);
442
443 s3select_result = s3select_result.substr(0, s3select_result.find_first_of(","));
444 s3select_result = s3select_result.substr(0, s3select_result.find_first_of("\n"));//remove last \n
445
446 #ifdef _ARROW_EXIST
447 csv_to_parquet(csv_obj);
448 run_query_on_parquet_file(expression.c_str(),PARQUET_FILENAME,parquet_result);
449 parquet_result = parquet_result.substr(0, parquet_result.find_first_of(","));
450 parquet_result = parquet_result.substr(0, parquet_result.find_first_of("\n"));//remove last \n
451
452 parquet_csv_report_error(parquet_result,s3select_result);
453 #endif
454
455 return s3select_result;
456 }
457
458 void run_s3select_test_opserialization(std::string expression,std::string input, char *row_delimiter, char *column_delimiter)
459 {//purpose: run query on multiple rows and return result(multiple projections).
460 s3select s3select_syntax;
461
462 int status = s3select_syntax.parse_query(expression.c_str());
463
464 if(status)
465 return;
466
467 std::string s3select_result;
468 csv_object::csv_defintions csv;
469 csv.redundant_column = false;
470
471 csv.output_row_delimiter = *row_delimiter;
472 csv.output_column_delimiter = *column_delimiter;
473
474 s3selectEngine::csv_object s3_csv_object(&s3select_syntax, csv);
475
476 s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(), false, false, true);
477
478 std::string s3select_result1 = s3select_result;
479
480 csv.row_delimiter = *row_delimiter;
481 csv.column_delimiter = *column_delimiter;
482 csv.output_row_delimiter = *row_delimiter;
483 csv.output_column_delimiter = *column_delimiter;
484 csv.redundant_column = false;
485 std::string s3select_result_second_phase;
486
487 s3selectEngine::csv_object s3_csv_object_second(&s3select_syntax, csv);
488
489 s3_csv_object_second.run_s3select_on_object(s3select_result_second_phase, s3select_result.c_str(), s3select_result.size(), false, false, true);
490
491 ASSERT_EQ(s3select_result_second_phase, s3select_result1);
492 }
493
494 std::string run_s3select_opserialization_quot(std::string expression,std::string input, bool quot_always = false, char quot_char = '"')
495 {//purpose: run query on multiple rows and return result(multiple projections).
496 s3select s3select_syntax;
497
498 int status = s3select_syntax.parse_query(expression.c_str());
499
500 if(status)
501 return failure_sign;
502
503 std::string s3select_result;
504 csv_object::csv_defintions csv;
505
506 csv.redundant_column = false;
507 csv.quote_fields_always = quot_always;
508 csv.output_quot_char = quot_char;
509
510 s3selectEngine::csv_object s3_csv_object(&s3select_syntax, csv);
511
512 s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(), false, false, true);
513
514 return s3select_result;
515 }
516
517 std::string run_s3select(std::string expression,std::string input)
518 {//purpose: run query on multiple rows and return result(multiple projections).
519 s3select s3select_syntax;
520 std::string parquet_input = input;
521
522
523 int status = s3select_syntax.parse_query(expression.c_str());
524
525 if(status)
526 return failure_sign;
527
528 std::string s3select_result;
529 s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
530 s3_csv_object.m_csv_defintion.redundant_column = false;
531
532 s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(), false, false, true);
533
534 #ifdef _ARROW_EXIST
535 static int file_no = 1;
536 csv_to_parquet(parquet_input);
537 std::string parquet_result;
538 run_query_on_parquet_file(expression.c_str(),PARQUET_FILENAME,parquet_result);
539
540 if (strcmp(parquet_result.c_str(),s3select_result.c_str()))
541 {
542 std::cout << "failed on query " << expression << std::endl;
543
544 {
545 std::string buffer;
546
547 std::ifstream f(PARQUET_FILENAME);
548 f.seekg(0, std::ios::end);
549 buffer.resize(f.tellg());
550 f.seekg(0);
551 f.read(buffer.data(), buffer.size());
552
553 std::string fn = std::string("./parquet_copy") + std::to_string(file_no);
554 std::ofstream fw(fn.c_str());
555 fw.write(buffer.data(), buffer.size());
556
557 fn = std::string("./csv_copy") + std::to_string(file_no++);
558 std::ofstream fw2(fn.c_str());
559 fw2.write(parquet_input.data(), parquet_input.size());
560
561 }
562 }
563
564 parquet_csv_report_error(parquet_result,s3select_result);
565 #endif //_ARROW_EXIST
566
567 return s3select_result;
568 }
569
570 TEST(TestS3SElect, s3select_vs_C)
571 {
572 //purpose: validate correct processing of arithmetical expression, it is done by running the same expression
573 // in C program.
574 // the test validate that syntax and execution-tree (including precedence rules) are done correctly
575
576 for(int y=0; y<10; y++)
577 {
578 gen_expr g;
579 std::string exp = g.generate();
580 std::string c_result = run_expression_in_C_prog( exp.c_str() );
581
582 char* err=0;
583 double c_dbl_res = strtod(c_result.c_str(), &err);
584
585 std::string input_query = "select " + exp + " from stdin;" ;
586 std::string s3select_res = run_s3select(input_query);
587
588 double s3select_dbl_res = strtod(s3select_res.c_str(), &err);
589
590 //std::cout << exp << " " << s3select_dbl_res << " " << s3select_res << " " << c_dbl_res/s3select_dbl_res << std::endl;
591 //std::cout << exp << std::endl;
592
593 ASSERT_EQ(c_dbl_res, s3select_dbl_res);
594 }
595 }
596
597 TEST(TestS3SElect, ParseQuery)
598 {
599 //TODO syntax issues ?
600 //TODO error messeges ?
601
602 s3select s3select_syntax;
603
604 run_s3select(std::string("select (1+1) from stdin;"));
605
606 ASSERT_EQ(0, 0);
607 }
608
609 TEST(TestS3SElect, int_compare_operator)
610 {
611 value a10(10), b11(11), c10(10);
612
613 ASSERT_EQ( a10 < b11, true );
614 ASSERT_EQ( a10 > b11, false );
615 ASSERT_EQ( a10 >= c10, true );
616 ASSERT_EQ( a10 <= c10, true );
617 ASSERT_EQ( a10 != b11, true );
618 ASSERT_EQ( a10 == b11, false );
619 ASSERT_EQ( a10 == c10, true );
620 }
621
622 TEST(TestS3SElect, float_compare_operator)
623 {
624 value a10(10.1), b11(11.2), c10(10.1);
625
626 ASSERT_EQ( a10 < b11, true );
627 ASSERT_EQ( a10 > b11, false );
628 ASSERT_EQ( a10 >= c10, true );
629 ASSERT_EQ( a10 <= c10, true );
630 ASSERT_EQ( a10 != b11, true );
631 ASSERT_EQ( a10 == b11, false );
632 ASSERT_EQ( a10 == c10, true );
633
634 }
635
636 TEST(TestS3SElect, string_compare_operator)
637 {
638 value s1("abc"), s2("def"), s3("abc");
639
640 ASSERT_EQ( s1 < s2, true );
641 ASSERT_EQ( s1 > s2, false );
642 ASSERT_EQ( s1 <= s3, true );
643 ASSERT_EQ( s1 >= s3, true );
644 ASSERT_EQ( s1 != s2, true );
645 ASSERT_EQ( s1 == s3, true );
646 ASSERT_EQ( s1 == s2, false );
647 }
648
649 TEST(TestS3SElect, arithmetic_operator)
650 {
651 value a(1), b(2), c(3), d(4);
652
653 ASSERT_EQ( (a+b).i64(), 3 );
654
655 ASSERT_EQ( (value(0)-value(2)*value(4)).i64(), -8 );
656 ASSERT_EQ( (value(1.23)-value(0.1)*value(2)).dbl(), 1.03 );
657
658 a=int64_t(1); //a+b modify a
659 ASSERT_EQ( ( (a+b) * (c+d) ).i64(), 21 );
660 }
661
662 TEST(TestS3SElect, intnan_compare_operator)
663 {
664 value a10(10), b11(11), c10(10), d, e;
665 d.set_nan();
666 e.set_nan();
667 ASSERT_EQ( d > b11, false );
668 ASSERT_EQ( d >= c10, false );
669 ASSERT_EQ( d < a10, false );
670 ASSERT_EQ( d <= b11, false );
671 ASSERT_EQ( d != a10, true );
672 ASSERT_EQ( d != e, true );
673 ASSERT_EQ( d == a10, false );
674 }
675
676 TEST(TestS3SElect, floatnan_compare_operator)
677 {
678 value a10(10.1), b11(11.2), c10(10.1), d, e;
679 d.set_nan();
680 e.set_nan();
681 ASSERT_EQ( d > b11, false );
682 ASSERT_EQ( d >= c10, false );
683 ASSERT_EQ( d < a10, false );
684 ASSERT_EQ( d <= b11, false );
685 ASSERT_EQ( d != a10, true );
686 ASSERT_EQ( d != e, true );
687 ASSERT_EQ( d == a10, false );
688 }
689
690 TEST(TestS3SElect, null_arithmetic_operator)
691 {
692 const char *cnull = "null";
693 value a(7), d, e(0);
694 d.setnull();
695
696 ASSERT_EQ(*(a - d).to_string(), *cnull );
697 ASSERT_EQ(*(a * d).to_string(), *cnull );
698 ASSERT_EQ(*(a / d).to_string(), *cnull );
699 ASSERT_EQ(*(a / e).to_string(), *cnull );
700 ASSERT_EQ(*(d + a).to_string(), *cnull );
701 ASSERT_EQ(*(d - a).to_string(), *cnull );
702 ASSERT_EQ(*(d * a).to_string(), *cnull );
703 ASSERT_EQ(*(d / a).to_string(), *cnull );
704 ASSERT_EQ(*(e / a).to_string(), *cnull );
705 }
706
707 TEST(TestS3SElect, nan_arithmetic_operator)
708 {
709 value a(7), d, y(0);
710 d.set_nan();
711 float b = ((a + d).dbl() );
712 float c = ((a - d).dbl() );
713 float v = ((a * d).dbl() );
714 float w = ((a / d).dbl() );
715 float x = ((d / y).dbl() );
716 float r = ((d + a).dbl() );
717 float z = ((d - a).dbl() );
718 float u = ((d * a).dbl() );
719 float t = ((d / a).dbl() );
720 EXPECT_FALSE(b <= b);
721 EXPECT_FALSE(c <= c);
722 EXPECT_FALSE(v <= v);
723 EXPECT_FALSE(w <= w);
724 EXPECT_FALSE(x <= x);
725 EXPECT_FALSE(r <= r);
726 EXPECT_FALSE(z <= z);
727 EXPECT_FALSE(u <= u);
728 EXPECT_FALSE(t <= t);
729 }
730
731 TEST(TestS3selectFunctions, to_timestamp)
732 {
733 std::string timestamp = "2007T";
734 std::string out_timestamp = "2007-01-01T00:00:00+00:00";
735 std::string input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
736 auto s3select_res = run_s3select(input_query);
737 EXPECT_EQ(s3select_res, out_timestamp);
738
739 timestamp = "2007-09-17T";
740 out_timestamp = "2007-09-17T00:00:00+00:00";
741 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
742 s3select_res = run_s3select(input_query);
743 EXPECT_EQ(s3select_res, out_timestamp);
744
745 timestamp = "2007-09-17T17:56Z";
746 out_timestamp = "2007-09-17T17:56:00Z";
747 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
748 s3select_res = run_s3select(input_query);
749 EXPECT_EQ(s3select_res, out_timestamp);
750
751 timestamp = "2007-09-17T17:56:05Z";
752 out_timestamp = "2007-09-17T17:56:05Z";
753 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
754 s3select_res = run_s3select(input_query);
755 EXPECT_EQ(s3select_res, out_timestamp);
756
757 timestamp = "2007-09-17T17:56:05.234Z";
758 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
759 out_timestamp = "2007-09-17T17:56:05.234000000Z";
760 #else
761 out_timestamp = "2007-09-17T17:56:05.234000Z";
762 #endif
763 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
764 s3select_res = run_s3select(input_query);
765 EXPECT_EQ(s3select_res, out_timestamp);
766
767 timestamp = "2007-09-17T17:56+12:08";
768 out_timestamp = "2007-09-17T17:56:00+12:08";
769 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
770 s3select_res = run_s3select(input_query);
771 EXPECT_EQ(s3select_res, out_timestamp);
772
773 timestamp = "2007-09-17T17:56:05-05:30";
774 out_timestamp = "2007-09-17T17:56:05-05:30";
775 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
776 s3select_res = run_s3select(input_query);
777 EXPECT_EQ(s3select_res, out_timestamp);
778
779 timestamp = "2007-09-17T17:56:05.234+02:44";
780 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
781 out_timestamp = "2007-09-17T17:56:05.234000000+02:44";
782 #else
783 out_timestamp = "2007-09-17T17:56:05.234000+02:44";
784 #endif
785 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
786 s3select_res = run_s3select(input_query);
787 EXPECT_EQ(s3select_res, out_timestamp);
788
789 timestamp = "2007-09-17T17:56:05.00234+02:44";
790 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
791 out_timestamp = "2007-09-17T17:56:05.002340000+02:44";
792 #else
793 out_timestamp = "2007-09-17T17:56:05.002340+02:44";
794 #endif
795 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
796 s3select_res = run_s3select(input_query);
797 EXPECT_EQ(s3select_res, out_timestamp);
798
799 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
800 timestamp = "2007-09-17T17:56:05.012345678-00:45";
801 out_timestamp = "2007-09-17T17:56:05.012345678-00:45";
802 input_query = "select to_timestamp(\'" + timestamp + "\') from stdin;" ;
803 s3select_res = run_s3select(input_query);
804 EXPECT_EQ(s3select_res, out_timestamp);
805 #endif
806
807 }
808
809 TEST(TestS3selectFunctions, date_diff)
810 {
811 std::string input_query = "select date_diff(year, to_timestamp(\'2009-09-17T17:56:06.234Z\'), to_timestamp(\'2007-09-17T19:30:05.234Z\')) from stdin;" ;
812 auto s3select_res = run_s3select(input_query);
813 EXPECT_EQ(s3select_res, "-1");
814
815 input_query = "select date_diff(month, to_timestamp(\'2009-09-17T17:56:06.234Z\'), to_timestamp(\'2007-09-17T19:30:05.234Z\')) from stdin;" ;
816 s3select_res = run_s3select(input_query);
817 EXPECT_EQ(s3select_res, "-23");
818
819 input_query = "select date_diff(day, to_timestamp(\'2009-09-17T17:56:06.234Z\'), to_timestamp(\'2007-09-17T19:30:05.234Z\')) from stdin;" ;
820 s3select_res = run_s3select(input_query);
821 EXPECT_EQ(s3select_res, "-730");
822
823 input_query = "select date_diff(hour, to_timestamp(\'2007-09-17T17:56:06.234Z\'), to_timestamp(\'2009-09-17T19:30:05.234Z\')) from stdin;" ;
824 s3select_res = run_s3select(input_query);
825 EXPECT_EQ(s3select_res, "17545");
826
827 input_query = "select date_diff(hour, to_timestamp(\'2009-09-17T19:30:05.234Z\'), to_timestamp(\'2007-09-17T17:56:06.234Z\')) from stdin;" ;
828 s3select_res = run_s3select(input_query);
829 EXPECT_EQ(s3select_res, "-17545");
830
831 input_query = "select date_diff(minute, to_timestamp(\'2007-09-17T17:56:06.234Z\'), to_timestamp(\'2009-09-17T19:30:05.234Z\')) from stdin;" ;
832 s3select_res = run_s3select(input_query);
833 EXPECT_EQ(s3select_res, "1052733");
834
835 input_query = "select date_diff(minute, to_timestamp(\'2009-09-17T19:30:05.234Z\'), to_timestamp(\'2007-09-17T17:56:06.234Z\')) from stdin;" ;
836 s3select_res = run_s3select(input_query);
837 EXPECT_EQ(s3select_res, "-1052733");
838
839 input_query = "select date_diff(second, to_timestamp(\'2009-09-17T17:56:06.234Z\'), to_timestamp(\'2009-09-17T19:30:05.234Z\')) from stdin;" ;
840 s3select_res = run_s3select(input_query);
841 EXPECT_EQ(s3select_res, "5639");
842
843 input_query = "select date_diff(hour, to_timestamp(\'2007-09-17T17:56:06.234-03:45\'), to_timestamp(\'2007-09-17T17:56:06.234+13:30\')) from stdin;" ;
844 s3select_res = run_s3select(input_query);
845 EXPECT_EQ(s3select_res, "-17");
846
847 input_query = "select date_diff(hour, to_timestamp(\'2007-09-17T17:56:06.234+03:45\'), to_timestamp(\'2007-09-17T17:56:06.234+13:30\')) from stdin;" ;
848 s3select_res = run_s3select(input_query);
849 EXPECT_EQ(s3select_res, "-9");
850
851 input_query = "select date_diff(hour, to_timestamp(\'2007-09-17T17:56:06.234Z\'), to_timestamp(\'2007-09-17T17:56:06.234+13:30\')) from stdin;" ;
852 s3select_res = run_s3select(input_query);
853 EXPECT_EQ(s3select_res, "-13");
854
855 input_query = "select date_diff(hour, to_timestamp(\'2007-09-17T17:56:06.234+14:00\'), to_timestamp(\'2007-09-17T17:56:06.234Z\')) from stdin;" ;
856 s3select_res = run_s3select(input_query);
857 EXPECT_EQ(s3select_res, "14");
858
859 input_query = "select date_diff(minute, to_timestamp(\'2007-09-17T17:56:06.234-03:45\'), to_timestamp(\'2007-09-17T17:56:06.234+13:30\')) from stdin;" ;
860 s3select_res = run_s3select(input_query);
861 EXPECT_EQ(s3select_res, "-1035");
862
863 input_query = "select date_diff(minute, to_timestamp(\'2007-09-17T17:56:06.234+03:45\'), to_timestamp(\'2007-09-17T17:56:06.234+13:30\')) from stdin;" ;
864 s3select_res = run_s3select(input_query);
865 EXPECT_EQ(s3select_res, "-585");
866
867 input_query = "select date_diff(minute, to_timestamp(\'2007-09-17T17:56:06.234Z\'), to_timestamp(\'2007-09-17T17:56:06.234+13:30\')) from stdin;" ;
868 s3select_res = run_s3select(input_query);
869 EXPECT_EQ(s3select_res, "-810");
870
871 input_query = "select date_diff(minute, to_timestamp(\'2007-09-17T17:56:06.234+14:00\'), to_timestamp(\'2007-09-17T17:56:06.234Z\')) from stdin;" ;
872 s3select_res = run_s3select(input_query);
873 EXPECT_EQ(s3select_res, "840");
874
875 input_query = "select date_diff(hour, to_timestamp(\'2007-09-17T17:56:06.234+14:00\'), to_timestamp(\'2007-09-17T03:56:06.234Z\')) from stdin;" ;
876 s3select_res = run_s3select(input_query);
877 EXPECT_EQ(s3select_res, "0");
878 }
879
880 TEST(TestS3selectFunctions, date_add)
881 {
882 std::string input_query = "select date_add(year, 2, to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
883 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
884 std::string expected_res = "2011-09-17T17:56:06.234567000Z";
885 #else
886 std::string expected_res = "2011-09-17T17:56:06.234567Z";
887 #endif
888 auto s3select_res = run_s3select(input_query);
889 EXPECT_EQ(s3select_res, expected_res);
890
891 input_query = "select date_add(month, -5, to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
892 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
893 expected_res = "2009-04-17T17:56:06.234567000Z";
894 #else
895 expected_res = "2009-04-17T17:56:06.234567Z";
896 #endif
897 s3select_res = run_s3select(input_query);
898 EXPECT_EQ(s3select_res, expected_res);
899
900 input_query = "select date_add(day, 3, to_timestamp(\'2009-09-17T17:56:06.234567-09:15\')) from stdin;" ;
901 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
902 expected_res = "2009-09-20T17:56:06.234567000-09:15";
903 #else
904 expected_res = "2009-09-20T17:56:06.234567-09:15";
905 #endif
906 s3select_res = run_s3select(input_query);
907 EXPECT_EQ(s3select_res, expected_res);
908
909 input_query = "select date_add(hour, 1, to_timestamp(\'2007-09-17T17:56:06.234567Z\')) from stdin;" ;
910 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
911 expected_res = "2007-09-17T18:56:06.234567000Z";
912 #else
913 expected_res = "2007-09-17T18:56:06.234567Z";
914 #endif
915 s3select_res = run_s3select(input_query);
916 EXPECT_EQ(s3select_res, expected_res);
917
918 input_query = "select date_add(minute, 14, to_timestamp(\'2007-09-17T17:56:06.234567+11:00\')) from stdin;" ;
919 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
920 expected_res = "2007-09-17T18:10:06.234567000+11:00";
921 #else
922 expected_res = "2007-09-17T18:10:06.234567+11:00";
923 #endif
924 s3select_res = run_s3select(input_query);
925 EXPECT_EQ(s3select_res, expected_res);
926
927 input_query = "select date_add(second, -26, to_timestamp(\'2009-09-17T17:56:06.234567-00:30\')) from stdin;" ;
928 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
929 expected_res = "2009-09-17T17:55:40.234567000-00:30";
930 #else
931 expected_res = "2009-09-17T17:55:40.234567-00:30";
932 #endif
933 s3select_res = run_s3select(input_query);
934 EXPECT_EQ(s3select_res, expected_res);
935
936 input_query = "select date_add(month, 1,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
937 expected_res = "2007-10-17T17:57:06Z";
938 s3select_res = run_s3select(input_query);
939 EXPECT_EQ(s3select_res, expected_res);
940
941 input_query = "select date_add(month, 3,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
942 expected_res = "2007-12-17T17:57:06Z";
943 s3select_res = run_s3select(input_query);
944 EXPECT_EQ(s3select_res, expected_res);
945
946 input_query = "select date_add(month, 4,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
947 expected_res = "2008-01-17T17:57:06Z";
948 s3select_res = run_s3select(input_query);
949 EXPECT_EQ(s3select_res, expected_res);
950
951 input_query = "select date_add(month, 15,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
952 expected_res = "2008-12-17T17:57:06Z";
953 s3select_res = run_s3select(input_query);
954 EXPECT_EQ(s3select_res, expected_res);
955
956 input_query = "select date_add(month, -1,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
957 expected_res = "2007-08-17T17:57:06Z";
958 s3select_res = run_s3select(input_query);
959 EXPECT_EQ(s3select_res, expected_res);
960
961 input_query = "select date_add(month, -8,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
962 expected_res = "2007-01-17T17:57:06Z";
963 s3select_res = run_s3select(input_query);
964 EXPECT_EQ(s3select_res, expected_res);
965
966 input_query = "select date_add(month, -9,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
967 expected_res = "2006-12-17T17:57:06Z";
968 s3select_res = run_s3select(input_query);
969 EXPECT_EQ(s3select_res, expected_res);
970
971 input_query = "select date_add(month, -10,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
972 expected_res = "2006-11-17T17:57:06Z";
973 s3select_res = run_s3select(input_query);
974 EXPECT_EQ(s3select_res, expected_res);
975
976 input_query = "select date_add(month, -15,to_timestamp(\'2007-09-17T17:57:06Z\')) from stdin;" ;
977 expected_res = "2006-06-17T17:57:06Z";
978 s3select_res = run_s3select(input_query);
979 EXPECT_EQ(s3select_res, expected_res);
980 }
981
982 TEST(TestS3selectFunctions, extract)
983 {
984 std::string input_query = "select extract(year from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
985 auto s3select_res = run_s3select(input_query);
986 EXPECT_EQ(s3select_res, "2009");
987
988 input_query = "select extract(month from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
989 s3select_res = run_s3select(input_query);
990 EXPECT_EQ(s3select_res, "9");
991
992 input_query = "select extract(day from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
993 s3select_res = run_s3select(input_query);
994 EXPECT_EQ(s3select_res, "17");
995
996 input_query = "select extract(week from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
997 s3select_res = run_s3select(input_query);
998 EXPECT_EQ(s3select_res, "38");
999
1000 input_query = "select extract(hour from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
1001 s3select_res = run_s3select(input_query);
1002 EXPECT_EQ(s3select_res, "17");
1003
1004 input_query = "select extract(minute from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
1005 s3select_res = run_s3select(input_query);
1006 EXPECT_EQ(s3select_res, "56");
1007
1008 input_query = "select extract(second from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
1009 s3select_res = run_s3select(input_query);
1010 EXPECT_EQ(s3select_res, "6");
1011
1012 input_query = "select extract(timezone_hour from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
1013 s3select_res = run_s3select(input_query);
1014 EXPECT_EQ(s3select_res, "0");
1015
1016 input_query = "select extract(timezone_hour from to_timestamp(\'2009-09-17T17:56:06.234567-07:45\')) from stdin;" ;
1017 s3select_res = run_s3select(input_query);
1018 EXPECT_EQ(s3select_res, "-7");
1019
1020 input_query = "select extract(timezone_hour from to_timestamp(\'2009-09-17T17:56:06.234567+07:45\')) from stdin;" ;
1021 s3select_res = run_s3select(input_query);
1022 EXPECT_EQ(s3select_res, "7");
1023
1024 input_query = "select extract(timezone_minute from to_timestamp(\'2009-09-17T17:56:06.234567Z\')) from stdin;" ;
1025 s3select_res = run_s3select(input_query);
1026 EXPECT_EQ(s3select_res, "0");
1027
1028 input_query = "select extract(timezone_minute from to_timestamp(\'2009-09-17T17:56:06.234567-07:45\')) from stdin;" ;
1029 s3select_res = run_s3select(input_query);
1030 EXPECT_EQ(s3select_res, "-45");
1031
1032 input_query = "select extract(timezone_minute from to_timestamp(\'2009-09-17T17:56:06.234567+07:45\')) from stdin;" ;
1033 s3select_res = run_s3select(input_query);
1034 EXPECT_EQ(s3select_res, "45");
1035 }
1036
1037 TEST(TestS3selectFunctions, to_string)
1038 {
1039 std::string input_query = "select to_string(to_timestamp(\'2009-09-17T17:56:06.234567Z\'), \'yyyyMMdd-H:m:s\') from stdin;" ;
1040 auto s3select_res = run_s3select(input_query);
1041 EXPECT_EQ(s3select_res, "20090917-17:56:6");
1042
1043 input_query = "select to_string(to_timestamp(\'2009-03-17T17:56:06.234567Z\'), \'yydaMMMM h m s.n\') from stdin;" ;
1044 s3select_res = run_s3select(input_query);
1045 EXPECT_EQ(s3select_res, "0917PMMarch 5 56 6.234567000");
1046
1047 input_query = "select to_string(to_timestamp(\'2009-03-07T01:08:06.234567Z\'), \'yyyyyy yyyy yyy yy y MMMMM MMMM MMM MM M dd dTHH H hh h : mm m ss s SSSSSSSSSS SSSSSS SSS SS S n - a X XX XXX XXXX XXXXX x xx xxx xxxx xxxxx\') from stdin;" ;
1048 s3select_res = run_s3select(input_query);
1049 EXPECT_EQ(s3select_res, "002009 2009 2009 09 2009 M March Mar 03 3 07 7T01 1 01 1 : 08 8 06 6 2345670000 234567 234 23 2 234567000 - AM Z Z Z Z Z +00 +0000 +00:00 +0000 +00:00");
1050
1051 input_query = "select to_string(to_timestamp(\'2009-03-07T01:08:06.234567-04:25\'), \'X XX XXX XXXX XXXXX x xx xxx xxxx xxxxx\') from stdin;" ;
1052 s3select_res = run_s3select(input_query);
1053 EXPECT_EQ(s3select_res, "-0425 -0425 -04:25 -0425 -04:25 -0425 -0425 -04:25 -0425 -04:25");
1054
1055 input_query = "select to_string(to_timestamp(\'2009-03-07T01:08:06.234567+12:05\'), \'X XX XXX XXXX XXXXX x xx xxx xxxx xxxxx\') from stdin;" ;
1056 s3select_res = run_s3select(input_query);
1057 EXPECT_EQ(s3select_res, "+1205 +1205 +12:05 +1205 +12:05 +1205 +1205 +12:05 +1205 +12:05");
1058
1059 input_query = "select to_string(to_timestamp(\'2009-03-07T01:08:06.2345+00:00\'), \'n SSS SSSSSS SSSSSSSSS SSSSSSSSSSS\') from stdin;" ;
1060 s3select_res = run_s3select(input_query);
1061 EXPECT_EQ(s3select_res, "234500000 234 234500 234500000 23450000000");
1062
1063 input_query = "select to_string(to_timestamp(\'2009-03-07T01:08:06.002345Z\'), \'n SSS SSSSSS SSSSSSSSS SSSSSSSSSSS\') from stdin;" ;
1064 s3select_res = run_s3select(input_query);
1065 EXPECT_EQ(s3select_res, "2345000 002 002345 002345000 00234500000");
1066
1067 input_query = "select to_string(to_timestamp(\'2009-03-07T01:08:06Z\'), \'n SSS SSSSSS SSSSSSSSS SSSSSSSSSSS\') from stdin;" ;
1068 s3select_res = run_s3select(input_query);
1069 EXPECT_EQ(s3select_res, "0 000 000000 000000000 00000000000");
1070 }
1071
1072 TEST(TestS3selectFunctions, utcnow)
1073 {
1074 const boost::posix_time::ptime now(boost::posix_time::second_clock::universal_time());
1075 const std::string input_query = "select utcnow() from stdin;" ;
1076 auto s3select_res = run_s3select(input_query);
1077 const boost::posix_time::ptime res_now;
1078 ASSERT_EQ(s3select_res, boost::posix_time::to_iso_extended_string(now) + "+00:00");
1079 }
1080
1081 TEST(TestS3selectFunctions, add)
1082 {
1083 const std::string input_query = "select add(-5, 0.5) from stdin;" ;
1084 auto s3select_res = run_s3select(input_query);
1085 ASSERT_EQ(s3select_res, std::string("-4.5"));
1086 }
1087
1088 void generate_fix_columns_csv(std::string& out, size_t size) {
1089 std::stringstream ss;
1090 for (auto i = 0U; i < size; ++i) {
1091 ss << 1 << "," << 2 << "," << 3 << "," << 4 << "," << 5 << std::endl;
1092 }
1093 out = ss.str();
1094 }
1095
1096 void generate_rand_csv(std::string& out, size_t size) {
1097 // schema is: int, float, string, string
1098 std::stringstream ss;
1099 for (auto i = 0U; i < size; ++i) {
1100 ss << rand()%1000 << "," << rand()%1000 << "," << rand()%1000 << "," << "foo"+std::to_string(i) << "," << std::to_string(i)+"bar" << std::endl;
1101 }
1102 out = ss.str();
1103 }
1104
1105 void generate_csv(std::string& out, size_t size) {
1106 // schema is: int, float, string, string
1107 std::stringstream ss;
1108 for (auto i = 0U; i < size; ++i) {
1109 ss << i << "," << i/10.0 << "," << "foo"+std::to_string(i) << "," << std::to_string(i)+"bar" << std::endl;
1110 }
1111 out = ss.str();
1112 }
1113
1114 void generate_csv_escape(std::string& out, size_t size) {
1115 // schema is: int, float, string, string
1116 std::stringstream ss;
1117 for (auto i = 0U; i < size; ++i) {
1118 ss << "_ar" << "," << "aeio_" << "," << "foo"+std::to_string(i) << "," << std::to_string(i)+"bar" << std::endl;
1119 }
1120 out = ss.str();
1121 }
1122
1123 void generate_columns_csv(std::string& out, size_t size) {
1124 std::stringstream ss;
1125
1126 for (auto i = 0U; i < size; ++i) {
1127 ss << i << "," << i+1 << "," << i << "," << i << "," << i << "," << i << "," << i << "," << i << "," << i << "," << i << std::endl;
1128 }
1129 out = ss.str();
1130 }
1131
1132 void generate_rand_columns_csv(std::string& out, size_t size) {
1133 std::stringstream ss;
1134 auto r = [](){return rand()%1000;};
1135
1136 for (auto i = 0U; i < size; ++i) {
1137 ss << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << std::endl;
1138 }
1139 out = ss.str();
1140 }
1141
1142 void generate_rand_columns_csv_with_null(std::string& out, size_t size) {
1143 std::stringstream ss;
1144 auto r = [](){ int x=rand()%1000;if (x<100) return std::string(""); else return std::to_string(x);};
1145
1146 for (auto i = 0U; i < size; ++i) {
1147 ss << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << "," << r() << std::endl;
1148 }
1149 out = ss.str();
1150 }
1151
1152 void generate_csv_trim(std::string& out, size_t size) {
1153 // schema is: int, float, string, string
1154 std::stringstream ss;
1155 for (auto i = 0U; i < size; ++i) {
1156 ss << " aeiou " << "," << std::endl;
1157 }
1158 out = ss.str();
1159 }
1160
1161 void generate_csv_like(std::string& out, size_t size) {
1162 // schema is: int, float, string, string
1163 std::stringstream ss;
1164 for (auto i = 0U; i < size; ++i) {
1165 ss << "fooaeioubrs" << "," << std::endl;
1166 }
1167 out = ss.str();
1168 }
1169
1170 void generate_rand_columns_csv_datetime(std::string& out, size_t size) {
1171 std::stringstream ss;
1172 auto year = [](){return rand()%100 + 1900;};
1173 auto month = [](){return 1 + rand()%12;};
1174 auto day = [](){return 1 + rand()%28;};
1175 auto hours = [](){return rand()%24;};
1176 auto minutes = [](){return rand()%60;};
1177 auto seconds = [](){return rand()%60;};
1178
1179 for (auto i = 0U; i < size; ++i) {
1180 ss << year() << "-" << std::setw(2) << std::setfill('0')<< month() << "-" << std::setw(2) << std::setfill('0')<< day() << "T" <<std::setw(2) << std::setfill('0')<< hours() << ":" << std::setw(2) << std::setfill('0')<< minutes() << ":" << std::setw(2) << std::setfill('0')<<seconds() << "Z" << "," << std::endl;
1181 }
1182 out = ss.str();
1183 }
1184
1185 void generate_rand_csv_datetime_to_string(std::string& out, std::string& result, size_t size, bool const_frmt = true) {
1186 std::stringstream ss_out, ss_res;
1187 std::string format = "yyyysMMMMMdddSSSSSSSSSSSMMMM HHa:m -:-";
1188 std::string months[12] = {"January", "February", "March","April", "May", "June", "July", "August", "September", "October", "November", "December"};
1189 auto year = [](){return rand()%100 + 1900;};
1190 auto month = [](){return 1 + rand()%12;};
1191 auto day = [](){return 1 + rand()%28;};
1192 auto hours = [](){return rand()%24;};
1193 auto minutes = [](){return rand()%60;};
1194 auto seconds = [](){return rand()%60;};
1195 auto fracation_sec = [](){return rand()%1000000;};
1196
1197 for (auto i = 0U; i < size; ++i)
1198 {
1199 auto yr = year();
1200 auto mnth = month();
1201 auto dy = day();
1202 auto hr = hours();
1203 auto mint = minutes();
1204 auto sec = seconds();
1205 auto frac_sec = fracation_sec();
1206
1207 if (const_frmt)
1208 {
1209 ss_out << yr << "-" << std::setw(2) << std::setfill('0') << mnth << "-" << std::setw(2) << std::setfill('0') << dy << "T" <<std::setw(2) << std::setfill('0') << hr << ":" << std::setw(2) << std::setfill('0') << mint << ":" << std::setw(2) << std::setfill('0') <<sec << "." << frac_sec << "Z" << std::endl;
1210
1211 ss_res << yr << sec << months[mnth-1].substr(0, 1) << std::setw(2) << std::setfill('0') << dy << dy << frac_sec << std::string(11 - std::to_string(frac_sec).length(), '0') << months[mnth-1] << " " << std::setw(2) << std::setfill('0') << hr << (hr < 12 ? "AM" : "PM") << ":" << mint << " -:-" << std::endl;
1212 }
1213 else
1214 {
1215 switch(rand()%5)
1216 {
1217 case 0:
1218 format = "yyyysMMMMMdddSSSSSSSSSSSMMMM HHa:m -:-";
1219 ss_res << yr << sec << months[mnth-1].substr(0, 1) << std::setw(2) << std::setfill('0') << dy << dy << frac_sec << std::string(11 - std::to_string(frac_sec).length(), '0') << months[mnth-1] << " " << std::setw(2) << std::setfill('0') << hr << (hr < 12 ? "AM" : "PM") << ":" << mint << " -:-" << std::endl;
1220 break;
1221 case 1:
1222 format = "aMMhh";
1223 ss_res << (hr < 12 ? "AM" : "PM") << std::setw(2) << std::setfill('0') << mnth << std::setw(2) << std::setfill('0') << (hr%12 == 0 ? 12 : hr%12) << std::endl;
1224 break;
1225 case 2:
1226 format = "y M d ABCDEF";
1227 ss_res << yr << " " << mnth << " " << dy << " ABCDEF" << std::endl;
1228 break;
1229 case 3:
1230 format = "W h:MMMM";
1231 ss_res << "W " << (hr%12 == 0 ? 12 : hr%12) << ":" << months[mnth-1] << std::endl;
1232 break;
1233 case 4:
1234 format = "H:m:s";
1235 ss_res << hr << ":" << mint << ":" << sec << std::endl;
1236 break;
1237 }
1238
1239 ss_out << yr << "-" << std::setw(2) << std::setfill('0') << mnth << "-" << std::setw(2) << std::setfill('0') << dy << "T" <<std::setw(2) << std::setfill('0') << hr << ":" << std::setw(2) << std::setfill('0') << mint << ":" << std::setw(2) << std::setfill('0') <<sec << "." << frac_sec << "Z" << "," << format << std::endl;
1240 }
1241 }
1242 out = ss_out.str();
1243 result = ss_res.str();
1244 }
1245
1246 TEST(TestS3selectFunctions, sum)
1247 {
1248 std::string input;
1249 size_t size = 128;
1250 generate_columns_csv(input, size);
1251 const std::string input_query_1 = "select sum(int(_1)), sum(float(_2)) from stdin;";
1252
1253 std::string s3select_result_1 = run_s3select(input_query_1,input);
1254
1255 ASSERT_EQ(s3select_result_1,"8128,8256");
1256 }
1257
1258 TEST(TestS3selectFunctions, between)
1259 {
1260 std::string input;
1261 size_t size = 128;
1262 generate_rand_columns_csv(input, size);
1263 const std::string input_query_1 = "select count(0) from stdin where int(_1) between int(_2) and int(_3);";
1264
1265 std::string s3select_result_1 = run_s3select(input_query_1,input);
1266
1267 const std::string input_query_2 = "select count(0) from stdin where int(_1) >= int(_2) and int(_1) <= int(_3);";
1268
1269 std::string s3select_result_2 = run_s3select(input_query_1,input);
1270
1271 ASSERT_EQ(s3select_result_1,s3select_result_2);
1272 }
1273
1274 TEST(TestS3selectFunctions, count)
1275 {
1276 std::string input;
1277 size_t size = 128;
1278 generate_columns_csv(input, size);
1279 const std::string input_query_1 = "select count(*) from stdin;";
1280
1281 std::string s3select_result_1 = run_s3select(input_query_1,input);
1282
1283 ASSERT_EQ(s3select_result_1,"128");
1284 }
1285
1286 TEST(TestS3selectFunctions, min)
1287 {
1288 std::string input;
1289 size_t size = 128;
1290 generate_columns_csv(input, size);
1291 const std::string input_query_1 = "select min(int(_1)), min(float(_2)) from stdin;";
1292
1293 std::string s3select_result_1 = run_s3select(input_query_1,input);
1294
1295 ASSERT_EQ(s3select_result_1,"0,1");
1296 }
1297
1298 TEST(TestS3selectFunctions, max)
1299 {
1300 std::string input;
1301 size_t size = 128;
1302 generate_columns_csv(input, size);
1303 const std::string input_query_1 = "select max(int(_1)), max(float(_2)) from stdin;";
1304
1305 std::string s3select_result_1 = run_s3select(input_query_1,input);
1306
1307 ASSERT_EQ(s3select_result_1,"127,128");
1308 }
1309
1310 int count_string(std::string in,std::string substr)
1311 {
1312 int count = 0;
1313 size_t nPos = in.find(substr, 0); // first occurrence
1314 while(nPos != std::string::npos)
1315 {
1316 count++;
1317 nPos = in.find(substr, nPos + 1);
1318 }
1319
1320 return count;
1321 }
1322
1323 void test_single_column_single_row(const char* input_query,const char* expected_result,const char * error_description = 0)
1324 {
1325 s3select s3select_syntax;
1326 auto status = s3select_syntax.parse_query(input_query);
1327 if(strcmp(expected_result,"#failure#") == 0 && status != 0)
1328 {
1329 ASSERT_TRUE(true);
1330 return;
1331 }
1332
1333 s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
1334 std::string s3select_result;
1335 std::string input;
1336 size_t size = 1;
1337 generate_csv(input, size);
1338
1339 #ifdef _ARROW_EXIST
1340 csv_to_parquet(input);
1341 std::string parquet_result;
1342 run_query_on_parquet_file(input_query,PARQUET_FILENAME,parquet_result);
1343 #endif
1344
1345 s3_csv_object.m_csv_defintion.redundant_column = false;
1346 status = s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(),
1347 false, // dont skip first line
1348 false, // dont skip last line
1349 true // aggregate call
1350 );
1351
1352 if(strcmp(expected_result,"#failure#") == 0)
1353 {
1354 if (status==0 && s3select_result.compare("#failure#")==0)
1355 {
1356 ASSERT_TRUE(false);
1357 }
1358 ASSERT_EQ(s3_csv_object.get_error_description(),error_description);
1359 return;
1360 }
1361
1362 ASSERT_EQ(status, 0);
1363 #ifdef _ARROW_EXIST
1364 parquet_csv_report_error(parquet_result,s3select_result);
1365 #endif
1366 ASSERT_EQ(s3select_result, std::string(expected_result));
1367 }
1368
1369 TEST(TestS3selectFunctions, syntax_1)
1370 {
1371 //where not not (1<11) is not null; syntax failure ; with parentheses it pass syntax i.e. /not (not (1<11)) is not null;/
1372 //where not 1<11 is null; syntax failure ; with parentheses it pass syntax i.e. not (1<11) is null;
1373 //where not (1); AST failure , expression result,any result implictly define true/false result
1374 //where not (1+1); AST failure
1375 //where not(not (1<11)) ; OK
1376 //where (not (1<11)) ; OK
1377 //where not (1<11) ; OK
1378 test_single_column_single_row("select count(*) from stdin where not (not (1<11)) is not null;","0");
1379 test_single_column_single_row("select count(*) from stdin where ((not (1<11)) is not null);","1");
1380 test_single_column_single_row("select count(*) from stdin where not(not (1<11));","1");
1381 test_single_column_single_row("select count(*) from stdin where not (1<11);","0");
1382 test_single_column_single_row("select count(*) from stdin where 1=1 or 2=2 and 4=4 and 2=4;","1");
1383 test_single_column_single_row("select count(*) from stdin where 2=2 and 4=4 and 2=4 or 1=1;","1");
1384 }
1385
1386 TEST(TestS3selectFunctions, binop_constant)
1387 {
1388 //bug-fix for expresion with constant value on the left side(the bug change the constant values between rows)
1389 s3select s3select_syntax;
1390 const std::string input_query = "select 10+1,20-12,2*3,128/2,29%5,2^10 from stdin;";
1391 auto status = s3select_syntax.parse_query(input_query.c_str());
1392 ASSERT_EQ(status, 0);
1393 s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
1394 std::string s3select_result;
1395 std::string input;
1396 size_t size = 128;
1397 generate_csv(input, size);
1398 status = s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(),
1399 false, // dont skip first line
1400 false, // dont skip last line
1401 true // aggregate call
1402 );
1403 ASSERT_EQ(status, 0);
1404
1405 int count = count_string(s3select_result,"11,8,6,64,4,1024");
1406 ASSERT_EQ(count,size);
1407 }
1408
1409 TEST(TestS3selectOperator, add)
1410 {
1411 const std::string input_query = "select -5 + 0.5 + -0.25 from stdin;" ;
1412 auto s3select_res = run_s3select(input_query);
1413 ASSERT_EQ(s3select_res, std::string("-4.75"));
1414 }
1415
1416 TEST(TestS3selectOperator, sub)
1417 {
1418 const std::string input_query = "select -5 - 0.5 - -0.25 from stdin;" ;
1419 auto s3select_res = run_s3select(input_query);
1420 ASSERT_EQ(s3select_res, std::string("-5.25"));
1421 }
1422
1423 TEST(TestS3selectOperator, mul)
1424 {
1425 const std::string input_query = "select -5 * (0.5 - -0.25) from stdin;" ;
1426 auto s3select_res = run_s3select(input_query);
1427 ASSERT_EQ(s3select_res, std::string("-3.75"));
1428 }
1429
1430 TEST(TestS3selectOperator, div)
1431 {
1432 const std::string input_query = "select -5 / (0.5 - -0.25) from stdin;" ;
1433 auto s3select_res = run_s3select(input_query);
1434 ASSERT_EQ(s3select_res, std::string("-6.666666666666667"));
1435 }
1436
1437 TEST(TestS3selectOperator, pow)
1438 {
1439 const std::string input_query = "select 5 ^ (0.5 - -0.25) from stdin;" ;
1440 auto s3select_res = run_s3select(input_query);
1441 ASSERT_EQ(s3select_res, std::string("3.34370152488211"));
1442 }
1443
1444 TEST(TestS3selectOperator, not_operator)
1445 {
1446 const std::string input_query = "select \"true\" from stdin where not ( (1+4) = 2 ) and (not(1 > (5*6)));" ;
1447 auto s3select_res = run_s3select(input_query);
1448 ASSERT_EQ(s3select_res, std::string("true"));
1449 }
1450
1451 TEST(TestS3SElect, from_stdin)
1452 {
1453 s3select s3select_syntax;
1454 const std::string input_query = "select * from stdin;";
1455 auto status = s3select_syntax.parse_query(input_query.c_str());
1456 ASSERT_EQ(status, 0);
1457 s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
1458 std::string s3select_result;
1459 std::string input;
1460 size_t size = 128;
1461 generate_csv(input, size);
1462 status = s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(),
1463 false, // dont skip first line
1464 false, // dont skip last line
1465 true // aggregate call
1466 );
1467 ASSERT_EQ(status, 0);
1468 }
1469
1470 TEST(TestS3SElect, from_valid_object)
1471 {
1472 s3select s3select_syntax;
1473 const std::string input_query = "select * from /objectname;";
1474 auto status = s3select_syntax.parse_query(input_query.c_str());
1475 ASSERT_EQ(status, 0);
1476 s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
1477 std::string s3select_result;
1478 std::string input;
1479 size_t size = 128;
1480 generate_csv(input, size);
1481 status = s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(),
1482 false, // dont skip first line
1483 false, // dont skip last line
1484 true // aggregate call
1485 );
1486 ASSERT_EQ(status, 0);
1487 }
1488
1489 TEST(TestS3SElect, from_invalid_object)
1490 {
1491 s3select s3select_syntax;
1492 const std::string input_query = "select sum(1) from file.txt;";
1493 auto status = s3select_syntax.parse_query(input_query.c_str());
1494 ASSERT_EQ(status, -1);
1495 auto s3select_res = run_s3select(input_query);
1496 ASSERT_EQ(s3select_res,failure_sign);
1497 }
1498
1499 TEST(TestS3selectFunctions, avg)
1500 {
1501 std::string input;
1502 size_t size = 128;
1503 generate_columns_csv(input, size);
1504 const std::string input_query_1 = "select avg(int(_1)) from stdin;";
1505
1506 std::string s3select_result_1 = run_s3select(input_query_1,input);
1507
1508 ASSERT_EQ(s3select_result_1,"63.5");
1509 }
1510
1511 TEST(TestS3selectFunctions, avgzero)
1512 {
1513 s3select s3select_syntax;
1514 const std::string input_query = "select avg(int(_1)) from stdin;";
1515 auto status = s3select_syntax.parse_query(input_query.c_str());
1516 ASSERT_EQ(status, 0);
1517 s3selectEngine::csv_object s3_csv_object(&s3select_syntax);
1518 std::string s3select_result;
1519 std::string input;
1520 size_t size = 0;
1521 generate_csv(input, size);
1522 status = s3_csv_object.run_s3select_on_object(s3select_result, input.c_str(), input.size(),
1523 false, // dont skip first line
1524 false, // dont skip last line
1525 true // aggregate call
1526 );
1527 ASSERT_EQ(status, -1);
1528 ASSERT_EQ(s3select_result, std::string(""));
1529 }
1530
1531 TEST(TestS3selectFunctions, floatavg)
1532 {
1533 std::string input;
1534 size_t size = 128;
1535 generate_columns_csv(input, size);
1536
1537 const std::string input_query_1 = "select avg(float(_1)) from stdin;";
1538
1539 std::string s3select_result_1 = run_s3select(input_query_1,input);
1540
1541 ASSERT_EQ(s3select_result_1,"63.5");
1542 }
1543
1544 TEST(TestS3selectFunctions, case_when_condition_multiplerows)
1545 {
1546 std::string input;
1547 size_t size = 10000;
1548 generate_rand_columns_csv(input, size);
1549 const std::string input_query = "select case when cast(_3 as int)>99 and cast(_3 as int)<1000 then \"case_1_1\" else \"case_2_2\" end from s3object;";
1550
1551 std::string s3select_result = run_s3select(input_query,input);
1552
1553 const std::string input_query_2 = "select case when char_length(_3)=3 then \"case_1_1\" else \"case_2_2\" end from s3object;";
1554
1555 std::string s3select_result_2 = run_s3select(input_query_2,input);
1556
1557 ASSERT_EQ(s3select_result,s3select_result_2);
1558 }
1559
1560 TEST(TestS3selectFunctions, case_value_multiplerows)
1561 {
1562 std::string input;
1563 size_t size = 10000;
1564 generate_rand_columns_csv(input, size);
1565 const std::string input_query = "select case cast(_1 as int) when cast(_2 as int) then \"case_1_1\" else \"case_2_2\" end from s3object;";
1566
1567 std::string s3select_result = run_s3select(input_query,input);
1568
1569 const std::string input_query_2 = "select case when cast(_1 as int) = cast(_2 as int) then \"case_1_1\" else \"case_2_2\" end from s3object;";
1570
1571 std::string s3select_result_2 = run_s3select(input_query_2,input);
1572
1573 ASSERT_EQ(s3select_result,s3select_result_2);
1574 }
1575
1576 TEST(TestS3selectFunctions, nested_call_aggregate_with_non_aggregate )
1577 {
1578 std::string input;
1579 size_t size = 128;
1580
1581 generate_fix_columns_csv(input, size);
1582
1583 const std::string input_query = "select sum(cast(_1 as int)),max(cast(_3 as int)),substring('abcdefghijklm',(2-1)*3+sum(cast(_1 as int))/sum(cast(_1 as int))+1,(count() + count(0))/count(0)) from stdin;";
1584
1585 std::string s3select_result = run_s3select(input_query,input);
1586
1587 ASSERT_EQ(s3select_result,"128,3,ef");
1588 }
1589
1590 TEST(TestS3selectFunctions, cast_1 )
1591 {
1592 std::string input;
1593 size_t size = 10000;
1594 generate_rand_columns_csv(input, size);
1595 const std::string input_query = "select count(*) from s3object where cast(_3 as int)>99 and cast(_3 as int)<1000;";
1596
1597 std::string s3select_result = run_s3select(input_query,input);
1598
1599 const std::string input_query_2 = "select count(*) from s3object where char_length(_3)=3;";
1600
1601 std::string s3select_result_2 = run_s3select(input_query_2,input);
1602
1603 ASSERT_EQ(s3select_result,s3select_result_2);
1604 }
1605
1606 TEST(TestS3selectFunctions, null_column )
1607 {
1608 std::string input;
1609 size_t size = 10000;
1610
1611 generate_rand_columns_csv_with_null(input, size);
1612
1613 const std::string input_query = "select count(*) from s3object where _3 is null;";
1614
1615 std::string s3select_result = run_s3select(input_query,input);
1616
1617 ASSERT_NE(s3select_result,failure_sign);
1618
1619 const std::string input_query_2 = "select count(*) from s3object where nullif(_3,null) is null;";
1620
1621 std::string s3select_result_2 = run_s3select(input_query_2,input);
1622
1623 ASSERT_NE(s3select_result_2,failure_sign);
1624
1625 ASSERT_EQ(s3select_result,s3select_result_2);
1626 }
1627
1628 TEST(TestS3selectFunctions, count_operation)
1629 {
1630 std::string input;
1631 size_t size = 10000;
1632 generate_rand_columns_csv(input, size);
1633 const std::string input_query = "select count(*) from s3object;";
1634
1635 std::string s3select_result = run_s3select(input_query,input);
1636
1637 ASSERT_NE(s3select_result,failure_sign);
1638
1639 ASSERT_EQ(s3select_result,"10000");
1640 }
1641
1642 TEST(TestS3selectFunctions, nullif_expressions)
1643 {
1644 std::string input;
1645 size_t size = 10000;
1646 generate_rand_columns_csv(input, size);
1647 const std::string input_query_1 = "select count(*) from s3object where nullif(_1,_2) is null;";
1648
1649 std::string s3select_result_1 = run_s3select(input_query_1,input);
1650
1651 ASSERT_NE(s3select_result_1,failure_sign);
1652
1653 const std::string input_query_2 = "select count(*) from s3object where _1 = _2;";
1654
1655 std::string s3select_result_2 = run_s3select(input_query_2,input);
1656
1657 ASSERT_EQ(s3select_result_1, s3select_result_2);
1658
1659 const std::string input_query_3 = "select count(*) from s3object where not nullif(_1,_2) is null;";
1660
1661 std::string s3select_result_3 = run_s3select(input_query_3,input);
1662
1663 ASSERT_NE(s3select_result_3,failure_sign);
1664
1665 const std::string input_query_4 = "select count(*) from s3object where _1 != _2;";
1666
1667 std::string s3select_result_4 = run_s3select(input_query_4,input);
1668
1669 ASSERT_EQ(s3select_result_3, s3select_result_4);
1670
1671 const std::string input_query_5 = "select count(*) from s3object where nullif(_1,_2) = _1 ;";
1672
1673 std::string s3select_result_5 = run_s3select(input_query_5,input);
1674
1675 ASSERT_NE(s3select_result_5,failure_sign);
1676
1677 const std::string input_query_6 = "select count(*) from s3object where _1 != _2;";
1678
1679 std::string s3select_result_6 = run_s3select(input_query_6,input);
1680
1681 ASSERT_EQ(s3select_result_5, s3select_result_6);
1682 }
1683
1684 TEST(TestS3selectFunctions, lower_upper_expressions)
1685 {
1686 std::string input;
1687 size_t size = 1;
1688 generate_csv(input, size);
1689 const std::string input_query_1 = "select lower(\"AB12cd$$\") from s3object;";
1690
1691 std::string s3select_result_1 = run_s3select(input_query_1,input);
1692
1693 ASSERT_NE(s3select_result_1,failure_sign);
1694
1695 ASSERT_EQ(s3select_result_1, "ab12cd$$\n");
1696
1697 const std::string input_query_2 = "select upper(\"ab12CD$$\") from s3object;";
1698
1699 std::string s3select_result_2 = run_s3select(input_query_2,input);
1700
1701 ASSERT_NE(s3select_result_2,failure_sign);
1702
1703 ASSERT_EQ(s3select_result_2, "AB12CD$$\n");
1704 }
1705
1706 TEST(TestS3selectFunctions, in_expressions)
1707 {
1708 std::string input;
1709 size_t size = 10000;
1710 generate_rand_columns_csv(input, size);
1711 const std::string input_query_1 = "select int(_1) from s3object where int(_1) in(1);";
1712
1713 std::string s3select_result_1 = run_s3select(input_query_1,input);
1714
1715 ASSERT_NE(s3select_result_1,failure_sign);
1716
1717 const std::string input_query_2 = "select int(_1) from s3object where int(_1) = 1;";
1718
1719 std::string s3select_result_2 = run_s3select(input_query_2,input);
1720
1721 ASSERT_EQ(s3select_result_1, s3select_result_2);
1722
1723 const std::string input_query_3 = "select int(_1) from s3object where int(_1) in(1,0);";
1724
1725 std::string s3select_result_3 = run_s3select(input_query_3,input);
1726
1727 ASSERT_NE(s3select_result_3,failure_sign);
1728
1729 const std::string input_query_4 = "select int(_1) from s3object where int(_1) = 1 or int(_1) = 0;";
1730
1731 std::string s3select_result_4 = run_s3select(input_query_4,input);
1732
1733 ASSERT_EQ(s3select_result_3, s3select_result_4);
1734
1735 const std::string input_query_5 = "select int(_2) from s3object where int(_2) in(1,0,2);";
1736
1737 std::string s3select_result_5 = run_s3select(input_query_5,input);
1738
1739 ASSERT_NE(s3select_result_5,failure_sign);
1740
1741 const std::string input_query_6 = "select int(_2) from s3object where int(_2) = 1 or int(_2) = 0 or int(_2) = 2;";
1742
1743 std::string s3select_result_6 = run_s3select(input_query_6,input);
1744
1745 ASSERT_EQ(s3select_result_5, s3select_result_6);
1746
1747 const std::string input_query_7 = "select int(_2) from s3object where int(_2)*2 in(int(_3)*2,int(_4)*3,int(_5)*5);";
1748
1749 std::string s3select_result_7 = run_s3select(input_query_7,input);
1750
1751 ASSERT_NE(s3select_result_7,failure_sign);
1752
1753 const std::string input_query_8 = "select int(_2) from s3object where int(_2)*2 = int(_3)*2 or int(_2)*2 = int(_4)*3 or int(_2)*2 = int(_5)*5;";
1754
1755 std::string s3select_result_8 = run_s3select(input_query_8,input);
1756
1757 ASSERT_EQ(s3select_result_7, s3select_result_8);
1758
1759 const std::string input_query_9 = "select int(_1) from s3object where character_length(_1) = 2 and substring(_1,2,1) in (\"3\");";
1760
1761 std::string s3select_result_9 = run_s3select(input_query_9,input);
1762
1763 ASSERT_NE(s3select_result_9,failure_sign);
1764
1765 const std::string input_query_10 = "select int(_1) from s3object where _1 like \"_3\";";
1766
1767 std::string s3select_result_10 = run_s3select(input_query_10,input);
1768
1769 ASSERT_EQ(s3select_result_9, s3select_result_10);
1770 }
1771
1772 TEST(TestS3selectFunctions, test_coalesce_expressions)
1773 {
1774 std::string input;
1775 size_t size = 10000;
1776 generate_rand_columns_csv(input, size);
1777 const std::string input_query_1 = "select count(*) from s3object where char_length(_3)>2 and char_length(_4)>2 and cast(substring(_3,1,2) as int) = cast(substring(_4,1,2) as int);";
1778
1779 std::string s3select_result_1 = run_s3select(input_query_1,input);
1780
1781 ASSERT_NE(s3select_result_1,failure_sign);
1782
1783 const std::string input_query_2 = "select count(*) from s3object where cast(_3 as int)>99 and cast(_4 as int)>99 and coalesce(nullif(cast(substring(_3,1,2) as int),cast(substring(_4,1,2) as int)),7) = 7;";
1784
1785 std::string s3select_result_2 = run_s3select(input_query_2,input);
1786
1787 ASSERT_EQ(s3select_result_1, s3select_result_2);
1788
1789 const std::string input_query_3 = "select coalesce(nullif(_5,_5),nullif(_1,_1),_2) from s3object;";
1790
1791 std::string s3select_result_3 = run_s3select(input_query_3,input);
1792
1793 ASSERT_NE(s3select_result_3,failure_sign);
1794
1795 const std::string input_query_4 = "select coalesce(_2) from s3object;";
1796
1797 std::string s3select_result_4 = run_s3select(input_query_4,input);
1798
1799 ASSERT_EQ(s3select_result_3, s3select_result_4);
1800 }
1801
1802 TEST(TestS3selectFunctions, test_cast_expressions)
1803 {
1804 std::string input;
1805 size_t size = 10000;
1806 generate_rand_columns_csv(input, size);
1807 const std::string input_query_1 = "select count(*) from s3object where cast(_3 as int)>999;";
1808
1809 std::string s3select_result_1 = run_s3select(input_query_1,input);
1810
1811 ASSERT_NE(s3select_result_1,failure_sign);
1812
1813 const std::string input_query_2 = "select count(*) from s3object where char_length(_3)>3;";
1814
1815 std::string s3select_result_2 = run_s3select(input_query_2,input);
1816
1817 ASSERT_EQ(s3select_result_1, s3select_result_2);
1818
1819 const std::string input_query_3 = "select count(*) from s3object where char_length(_3)=3;";
1820
1821 std::string s3select_result_3 = run_s3select(input_query_3,input);
1822
1823 ASSERT_NE(s3select_result_3,failure_sign);
1824
1825 const std::string input_query_4 = "select count(*) from s3object where cast(_3 as int)>99 and cast(_3 as int)<1000;";
1826
1827 std::string s3select_result_4 = run_s3select(input_query_4,input);
1828
1829 ASSERT_EQ(s3select_result_3, s3select_result_4);
1830 }
1831
1832 TEST(TestS3selectFunctions, test_version)
1833 {
1834 std::string input;
1835 size_t size = 1;
1836 generate_rand_columns_csv(input, size);
1837 const std::string input_query_1 = "select version() from stdin;";
1838
1839 std::string s3select_result_1 = run_s3select(input_query_1,input);
1840
1841 ASSERT_NE(s3select_result_1,failure_sign);
1842
1843 ASSERT_EQ(s3select_result_1, "41.a\n");
1844 }
1845
1846 TEST(TestS3selectFunctions, multirow_datetime_to_string_constant)
1847 {
1848 std::string input, expected_res;
1849 std::string format = "yyyysMMMMMdddSSSSSSSSSSSMMMM HHa:m -:-";
1850 size_t size = 100;
1851
1852 generate_rand_csv_datetime_to_string(input, expected_res, size);
1853
1854 const std::string input_query = "select to_string(to_timestamp(_1), \'" + format + "\') from s3object;";
1855 std::string s3select_result = run_s3select(input_query, input);
1856 EXPECT_EQ(s3select_result, expected_res);
1857 }
1858
1859 TEST(TestS3selectFunctions, multirow_datetime_to_string_dynamic)
1860 {
1861 std::string input, expected_res;
1862 size_t size = 100;
1863
1864 generate_rand_csv_datetime_to_string(input, expected_res, size, false);
1865
1866 const std::string input_query = "select to_string(to_timestamp(_1), _2) from s3object;";
1867 std::string s3select_result = run_s3select(input_query, input);
1868 EXPECT_EQ(s3select_result, expected_res);
1869 }
1870
1871 TEST(TestS3selectFunctions, test_date_time_expressions)
1872 {
1873 std::string input;
1874 size_t size = 10000;
1875 generate_rand_columns_csv_datetime(input, size);
1876 const std::string input_query_1 = "select count(*) from s3object where extract(year from to_timestamp(_1)) > 1950 and extract(year from to_timestamp(_1)) < 1960;";
1877
1878 std::string s3select_result_1 = run_s3select(input_query_1,input);
1879
1880 ASSERT_NE(s3select_result_1,failure_sign);
1881
1882 const std::string input_query_2 = "select count(*) from s3object where int(substring(_1,1,4))>1950 and int(substring(_1,1,4))<1960;";
1883
1884 std::string s3select_result_2 = run_s3select(input_query_2,input);
1885
1886 ASSERT_EQ(s3select_result_1, s3select_result_2);
1887
1888 const std::string input_query_3 = "select count(*) from s3object where date_diff(month,to_timestamp(_1),date_add(month,2,to_timestamp(_1)) ) = 2;";
1889
1890 std::string s3select_result_3 = run_s3select(input_query_3,input);
1891
1892 ASSERT_NE(s3select_result_3,failure_sign);
1893
1894 const std::string input_query_4 = "select count(*) from s3object;";
1895
1896 std::string s3select_result_4 = run_s3select(input_query_4,input);
1897
1898 ASSERT_NE(s3select_result_4,failure_sign);
1899
1900 ASSERT_EQ(s3select_result_3, s3select_result_4);
1901
1902 const std::string input_query_5 = "select count(0) from stdin where date_diff(year,to_timestamp(_1),date_add(day, 366 ,to_timestamp(_1))) = 1;";
1903
1904 std::string s3select_result_5 = run_s3select(input_query_5,input);
1905
1906 ASSERT_EQ(s3select_result_5, s3select_result_4);
1907
1908 const std::string input_query_6 = "select count(0) from stdin where date_diff(hour,utcnow(),date_add(day,1,utcnow())) = 24;";
1909
1910 std::string s3select_result_6 = run_s3select(input_query_6,input);
1911
1912 ASSERT_EQ(s3select_result_6, s3select_result_4);
1913
1914 std::string input_query_7 = "select extract(year from to_timestamp(_1)) from stdin;";
1915 std::string s3select_result_7 = run_s3select(input_query_7, input);
1916 ASSERT_NE(s3select_result_7, failure_sign);
1917 std::string input_query_8 = "select substring(_1, 1, 4) from stdin;";
1918 std::string s3select_result_8 = run_s3select(input_query_8, input);
1919 ASSERT_NE(s3select_result_8, failure_sign);
1920 EXPECT_EQ(s3select_result_7, s3select_result_8);
1921
1922 std::string input_query_9 = "select to_timestamp(_1) from stdin where extract(month from to_timestamp(_1)) = 5;";
1923 std::string s3select_result_9 = run_s3select(input_query_9, input);
1924 ASSERT_NE(s3select_result_9, failure_sign);
1925 std::string input_query_10 = "select substring(_1, 1, char_length(_1)) from stdin where _1 like \'____-05%\';";
1926 std::string s3select_result_10 = run_s3select(input_query_10, input);
1927 ASSERT_NE(s3select_result_10, failure_sign);
1928 EXPECT_EQ(s3select_result_9, s3select_result_10);
1929
1930 std::string input_query_11 = "select _1 from stdin where extract(month from to_timestamp(_1)) = 5 or extract(month from to_timestamp(_1)) = 6;";
1931 std::string s3select_result_11 = run_s3select(input_query_11,input);
1932 ASSERT_NE(s3select_result_11, failure_sign);
1933 std::string input_query_12 = "select _1 from stdin where to_string(to_timestamp(_1), 'MMMM') in ('May', 'June');";
1934 std::string s3select_result_12 = run_s3select(input_query_12,input);
1935 ASSERT_NE(s3select_result_12, failure_sign);
1936 EXPECT_EQ(s3select_result_11, s3select_result_12);
1937
1938 std::string input_query_13 = "select to_string(to_timestamp(_1), 'y,M,H,m') from stdin where cast(to_string(to_timestamp(_1), 'd') as int) >= 1 and cast(to_string(to_timestamp(_1), 'd') as int) <= 10;";
1939 std::string s3select_result_13 = run_s3select(input_query_13, input);
1940 ASSERT_NE(s3select_result_13, failure_sign);
1941 std::string input_query_14 = "select extract(year from to_timestamp(_1)), extract(month from to_timestamp(_1)), extract(hour from to_timestamp(_1)), extract(minute from to_timestamp(_1)) from stdin where int(substring(_1, 9, 2)) between 1 and 10;";
1942 std::string s3select_result_14 = run_s3select(input_query_14, input);
1943 ASSERT_NE(s3select_result_14, failure_sign);
1944 EXPECT_EQ(s3select_result_13, s3select_result_14);
1945 }
1946
1947 TEST(TestS3selectFunctions, test_like_expressions)
1948 {
1949 std::string input, input1;
1950 size_t size = 10000;
1951 generate_csv(input, size);
1952 const std::string input_query_1 = "select count(*) from stdin where _4 like \"%ar\";";
1953
1954 std::string s3select_result_1 = run_s3select(input_query_1,input);
1955
1956 ASSERT_NE(s3select_result_1,failure_sign);
1957
1958 const std::string input_query_2 = "select count(*) from stdin where substring(_4,char_length(_4),1) = \"r\" and substring(_4,char_length(_4)-1,1) = \"a\";";
1959
1960 std::string s3select_result_2 = run_s3select(input_query_2,input);
1961
1962 ASSERT_EQ(s3select_result_1, s3select_result_2);
1963
1964 generate_csv_like(input1, size);
1965
1966 const std::string input_query_3 = "select count(*) from stdin where _1 like \"%aeio%\";";
1967
1968 std::string s3select_result_3 = run_s3select(input_query_3,input1);
1969
1970 ASSERT_NE(s3select_result_3,failure_sign);
1971
1972 const std::string input_query_4 = "select count(*) from stdin where substring(_1,4,4) = \"aeio\";";
1973
1974 std::string s3select_result_4 = run_s3select(input_query_4,input1);
1975
1976 ASSERT_EQ(s3select_result_3, s3select_result_4);
1977
1978 const std::string input_query_5 = "select count(*) from stdin where _1 like \"%r[r-s]\";";
1979
1980 std::string s3select_result_5 = run_s3select(input_query_5,input);
1981
1982 ASSERT_NE(s3select_result_5,failure_sign);
1983
1984 const std::string input_query_6 = "select count(*) from stdin where substring(_1,char_length(_1),1) between \"r\" and \"s\" and substring(_1,char_length(_1)-1,1) = \"r\";";
1985
1986 std::string s3select_result_6 = run_s3select(input_query_6,input);
1987
1988 ASSERT_EQ(s3select_result_5, s3select_result_6);
1989
1990 const std::string input_query_7 = "select count(*) from stdin where _1 like \"%br_\";";
1991
1992 std::string s3select_result_7 = run_s3select(input_query_7,input);
1993
1994 ASSERT_NE(s3select_result_7,failure_sign);
1995
1996 const std::string input_query_8 = "select count(*) from stdin where substring(_1,char_length(_1)-1,1) = \"r\" and substring(_1,char_length(_1)-2,1) = \"b\";";
1997
1998 std::string s3select_result_8 = run_s3select(input_query_8,input);
1999
2000 ASSERT_EQ(s3select_result_7, s3select_result_8);
2001
2002 const std::string input_query_9 = "select count(*) from stdin where _1 like \"f%s\";";
2003
2004 std::string s3select_result_9 = run_s3select(input_query_9,input);
2005
2006 ASSERT_NE(s3select_result_9,failure_sign);
2007
2008 const std::string input_query_10 = "select count(*) from stdin where substring(_1,char_length(_1),1) = \"s\" and substring(_1,1,1) = \"f\";";
2009
2010 std::string s3select_result_10 = run_s3select(input_query_10,input);
2011
2012 ASSERT_EQ(s3select_result_9, s3select_result_10);
2013 }
2014
2015 TEST(TestS3selectFunctions, test_when_then_else_expressions)
2016 {
2017 std::string input;
2018 size_t size = 10000;
2019 generate_rand_columns_csv(input, size);
2020 const std::string input_query_1 = "select case when cast(_1 as int)>100 and cast(_1 as int)<200 then \"a\" when cast(_1 as int)>200 and cast(_1 as int)<300 then \"b\" else \"c\" end from s3object;";
2021
2022 std::string s3select_result_1 = run_s3select(input_query_1,input);
2023
2024 ASSERT_NE(s3select_result_1,failure_sign);
2025
2026 int count1 = std::count(s3select_result_1.begin(), s3select_result_1.end(),'a') ;
2027 int count2 = std::count(s3select_result_1.begin(), s3select_result_1.end(), 'b');
2028 int count3 = std::count(s3select_result_1.begin(), s3select_result_1.end(), 'c');
2029
2030 const std::string input_query_2 = "select count(*) from s3object where cast(_1 as int)>100 and cast(_1 as int)<200;";
2031
2032 std::string s3select_result_2 = run_s3select(input_query_2,input);
2033
2034 ASSERT_NE(s3select_result_2,failure_sign);
2035
2036 ASSERT_EQ(stoi(s3select_result_2), count1);
2037
2038 const std::string input_query_3 = "select count(*) from s3object where cast(_1 as int)>200 and cast(_1 as int)<300;";
2039
2040 std::string s3select_result_3 = run_s3select(input_query_3,input);
2041
2042 ASSERT_NE(s3select_result_3,failure_sign);
2043
2044 ASSERT_EQ(stoi(s3select_result_3), count2);
2045
2046 const std::string input_query_4 = "select count(*) from s3object where cast(_1 as int)<=100 or cast(_1 as int)>=300 or cast(_1 as int)=200;";
2047
2048 std::string s3select_result_4 = run_s3select(input_query_4,input);
2049
2050 ASSERT_NE(s3select_result_4,failure_sign);
2051
2052 ASSERT_EQ(stoi(s3select_result_4), count3);
2053 }
2054
2055 TEST(TestS3selectFunctions, test_case_value_when_then_else_expressions)
2056 {
2057 std::string input;
2058 size_t size = 10000;
2059 generate_rand_columns_csv(input, size);
2060 const std::string input_query_1 = "select case cast(_1 as int) + 1 when 2 then \"a\" when 3 then \"b\" else \"c\" end from s3object;";
2061
2062 std::string s3select_result_1 = run_s3select(input_query_1,input);
2063
2064 ASSERT_NE(s3select_result_1,failure_sign);
2065
2066 int count1 = std::count(s3select_result_1.begin(), s3select_result_1.end(),'a') ;
2067 int count2 = std::count(s3select_result_1.begin(), s3select_result_1.end(), 'b');
2068 int count3 = std::count(s3select_result_1.begin(), s3select_result_1.end(), 'c');
2069
2070 const std::string input_query_2 = "select count(*) from s3object where cast(_1 as int) + 1 = 2;";
2071
2072 std::string s3select_result_2 = run_s3select(input_query_2,input);
2073
2074 ASSERT_NE(s3select_result_2,failure_sign);
2075
2076 ASSERT_EQ(stoi(s3select_result_2), count1);
2077
2078 const std::string input_query_3 = "select count(*) from s3object where cast(_1 as int) + 1 = 3;";
2079
2080 std::string s3select_result_3 = run_s3select(input_query_3,input);
2081
2082 ASSERT_NE(s3select_result_3,failure_sign);
2083
2084 ASSERT_EQ(stoi(s3select_result_3), count2);
2085
2086 const std::string input_query_4 = "select count(*) from s3object where cast(_1 as int) + 1 < 2 or cast(_1 as int) + 1 > 3;";
2087
2088 std::string s3select_result_4 = run_s3select(input_query_4,input);
2089
2090 ASSERT_NE(s3select_result_4,failure_sign);
2091
2092 ASSERT_EQ(stoi(s3select_result_4), count3);
2093 }
2094
2095 TEST(TestS3selectFunctions, test_trim_expressions)
2096 {
2097 std::string input;
2098 size_t size = 10000;
2099 generate_csv_trim(input, size);
2100 const std::string input_query_1 = "select count(*) from stdin where trim(_1) = \"aeiou\";";
2101
2102 std::string s3select_result_1 = run_s3select(input_query_1,input);
2103
2104 ASSERT_NE(s3select_result_1,failure_sign);
2105
2106 const std::string input_query_2 = "select count(*) from stdin where substring(_1 from 6 for 5) = \"aeiou\";";
2107
2108 std::string s3select_result_2 = run_s3select(input_query_2,input);
2109
2110 ASSERT_EQ(s3select_result_1, s3select_result_2);
2111
2112 const std::string input_query_3 = "select count(*) from stdin where trim(both from _1) = \"aeiou\";";
2113
2114 std::string s3select_result_3 = run_s3select(input_query_3,input);
2115
2116 ASSERT_NE(s3select_result_3,failure_sign);
2117
2118 const std::string input_query_4 = "select count(*) from stdin where substring(_1,6,5) = \"aeiou\";";
2119
2120 std::string s3select_result_4 = run_s3select(input_query_4,input);
2121
2122 ASSERT_EQ(s3select_result_3, s3select_result_4);
2123 }
2124
2125 TEST(TestS3selectFunctions, truefalse)
2126 {
2127 test_single_column_single_row("select 2 from s3object where true or false;","2\n");
2128 test_single_column_single_row("select 2 from s3object where true or true;","2\n");
2129 test_single_column_single_row("select 2 from s3object where null or true ;","2\n");
2130 test_single_column_single_row("select 2 from s3object where true and true;","2\n");
2131 test_single_column_single_row("select 2 from s3object where true = true ;","2\n");
2132 test_single_column_single_row("select 2 from stdin where 1<2 = true;","2\n");
2133 test_single_column_single_row("select 2 from stdin where 1=1 = true;","2\n");
2134 test_single_column_single_row("select 2 from stdin where false=false = true;","2\n");
2135 test_single_column_single_row("select 2 from s3object where false or true;","2\n");
2136 test_single_column_single_row("select true,false from s3object where false = false;","true,false\n");
2137 test_single_column_single_row("select count(*) from s3object where not (1>2) = true;","1");
2138 test_single_column_single_row("select count(*) from s3object where not (1>2) = (not false);","1");
2139 test_single_column_single_row("select (true or false) from s3object;","true\n");
2140 test_single_column_single_row("select (true and true) from s3object;","true\n");
2141 test_single_column_single_row("select (true and null) from s3object;","null\n");
2142 test_single_column_single_row("select (false or false) from s3object;","false\n");
2143 test_single_column_single_row("select (not true) from s3object;","false\n");
2144 test_single_column_single_row("select (not 1 > 2) from s3object;","true\n");
2145 test_single_column_single_row("select (not 1 > 2) as a1,cast(a1 as int)*4 from s3object;","true,4\n");
2146 test_single_column_single_row("select (1 > 2) from s3object;","false\n");
2147 test_single_column_single_row("select case when (nullif(3,3) is null) = true then \"case_1_1\" else \"case_2_2\" end, case when (\"a\" in (\"a\",\"b\")) = true then \"case_3_3\" else \"case_4_4\" end, case when 1>3 then \"case_5_5\" else \"case_6_6\" end from s3object where (3*3 = 9);","case_1_1,case_3_3,case_6_6\n");
2148 }
2149
2150 TEST(TestS3selectFunctions, boolcast)
2151 {
2152 test_single_column_single_row("select cast(5 as bool) from s3object;","true\n");
2153 test_single_column_single_row("select cast(0 as bool) from s3object;","false\n");
2154 test_single_column_single_row("select cast(true as bool) from s3object;","true\n");
2155 test_single_column_single_row("select cast('a' as bool) from s3object;","false\n");
2156 }
2157
2158 TEST(TestS3selectFunctions, floatcast)
2159 {
2160 test_single_column_single_row("select cast('1234a' as float) from s3object;","#failure#","extra characters after the number");
2161 test_single_column_single_row("select cast('a1234' as float) from s3object;","#failure#","text cannot be converted to a number");
2162 test_single_column_single_row("select cast('999e+999' as float) from s3object;","#failure#","converted value would fall out of the range of the result type!");
2163 }
2164
2165 TEST(TestS3selectFunctions, intcast)
2166 {
2167 test_single_column_single_row("select cast('1234a' as int) from s3object;","#failure#","extra characters after the number");
2168 test_single_column_single_row("select cast('a1234' as int) from s3object;","#failure#","text cannot be converted to a number");
2169 test_single_column_single_row("select cast('9223372036854775808' as int) from s3object;","#failure#","converted value would fall out of the range of the result type!");
2170 test_single_column_single_row("select cast('-9223372036854775809' as int) from s3object;","#failure#","converted value would fall out of the range of the result type!");
2171 }
2172
2173 TEST(TestS3selectFunctions, predicate_as_projection_column)
2174 {
2175 std::string input;
2176 size_t size = 10000;
2177 generate_rand_columns_csv(input, size);
2178 const std::string input_query = "select (int(_2) between int(_3) and int(_4)) from s3object where int(_2)>int(_3) and int(_2)<int(_4);";
2179
2180 std::string s3select_result = run_s3select(input_query,input);
2181
2182 ASSERT_NE(s3select_result,failure_sign);
2183
2184 auto count = std::count(s3select_result.begin(), s3select_result.end(), '0');
2185
2186 ASSERT_EQ(count,0);
2187
2188 const std::string input_query_1 = "select (nullif(_1,_2) is null) from s3object where _1 = _2;";
2189
2190 std::string s3select_result_1 = run_s3select(input_query_1,input);
2191
2192 ASSERT_NE(s3select_result_1,failure_sign);
2193
2194 auto count_1 = std::count(s3select_result_1.begin(), s3select_result_1.end(), '0');
2195
2196 ASSERT_EQ(count_1,0);
2197
2198 const std::string input_query_2 = "select (nullif(_1,_2) is not null) from s3object where _1 != _2;";
2199
2200 std::string s3select_result_2 = run_s3select(input_query_2,input);
2201
2202 ASSERT_NE(s3select_result_2,failure_sign);
2203
2204 auto count_2 = std::count(s3select_result_2.begin(), s3select_result_2.end(), '0');
2205
2206 ASSERT_EQ(count_2,0);
2207
2208 const std::string input_query_3 = "select (_1 like \"_3\") from s3object where character_length(_1) = 2 and substring(_1,2,1) in (\"3\");";
2209
2210 std::string s3select_result_3 = run_s3select(input_query_3,input);
2211
2212 ASSERT_NE(s3select_result_3,failure_sign);
2213
2214 auto count_3 = std::count(s3select_result_3.begin(), s3select_result_3.end(), '0');
2215
2216 ASSERT_EQ(count_3,0);
2217
2218 const std::string input_query_4 = "select (int(_1) in (1)) from s3object where int(_1) = 1;";
2219
2220 std::string s3select_result_4 = run_s3select(input_query_4,input);
2221
2222 ASSERT_NE(s3select_result_4,failure_sign);
2223
2224 auto count_4 = std::count(s3select_result_4.begin(), s3select_result_4.end(), '0');
2225
2226 ASSERT_EQ(count_4,0);
2227 }
2228
2229 TEST(TestS3selectFunctions, truefalse_multirows_expressions)
2230 {
2231 std::string input, input1;
2232 size_t size = 10000;
2233 generate_rand_columns_csv(input, size);
2234 const std::string input_query_1 = "select count(*) from s3object where cast(_3 as int)>999 = true;";
2235
2236 std::string s3select_result_1 = run_s3select(input_query_1,input);
2237
2238 ASSERT_NE(s3select_result_1,failure_sign);
2239
2240 const std::string input_query_2 = "select count(*) from s3object where char_length(_3)>3 = true;";
2241
2242 std::string s3select_result_2 = run_s3select(input_query_2,input);
2243
2244 ASSERT_EQ(s3select_result_1, s3select_result_2);
2245
2246 const std::string input_query_3 = "select count(*) from s3object where char_length(_3)=3 = true;";
2247
2248 std::string s3select_result_3 = run_s3select(input_query_3,input);
2249
2250 ASSERT_NE(s3select_result_3,failure_sign);
2251
2252 const std::string input_query_4 = "select count(*) from s3object where cast(_3 as int)>99 = true and cast(_3 as int)<1000 = true;";
2253
2254 std::string s3select_result_4 = run_s3select(input_query_4,input);
2255
2256 ASSERT_EQ(s3select_result_3, s3select_result_4);
2257
2258 generate_rand_columns_csv_with_null(input1, size);
2259
2260 const std::string input_query_5 = "select count(*) from s3object where (_3 is null) = true;";
2261
2262 std::string s3select_result_5 = run_s3select(input_query_5,input1);
2263
2264 ASSERT_NE(s3select_result_5,failure_sign);
2265
2266 const std::string input_query_6 = "select count(*) from s3object where (nullif(_3,null) is null) = true;";
2267
2268 std::string s3select_result_6 = run_s3select(input_query_6,input1);
2269
2270 ASSERT_NE(s3select_result_6,failure_sign);
2271
2272 ASSERT_EQ(s3select_result_5,s3select_result_6);
2273 }
2274
2275 TEST(TestS3selectFunctions, truefalse_date_time_expressions)
2276 {
2277 std::string input;
2278 size_t size = 10000;
2279 generate_rand_columns_csv_datetime(input, size);
2280 const std::string input_query_1 = "select count(*) from s3object where extract(year from to_timestamp(_1)) > 1950 = true and extract(year from to_timestamp(_1)) < 1960 = true;";
2281
2282 std::string s3select_result_1 = run_s3select(input_query_1,input);
2283
2284 ASSERT_NE(s3select_result_1,failure_sign);
2285
2286 const std::string input_query_2 = "select count(*) from s3object where int(substring(_1,1,4))>1950 = true and int(substring(_1,1,4))<1960 = true;";
2287
2288 std::string s3select_result_2 = run_s3select(input_query_2,input);
2289
2290 ASSERT_EQ(s3select_result_1, s3select_result_2);
2291 }
2292
2293 TEST(TestS3selectFunctions, truefalse_trim_expressions)
2294 {
2295 std::string input;
2296 size_t size = 10000;
2297 generate_csv_trim(input, size);
2298 const std::string input_query_1 = "select count(*) from stdin where trim(_1) = \"aeiou\" = true;";
2299
2300 std::string s3select_result_1 = run_s3select(input_query_1,input);
2301
2302 ASSERT_NE(s3select_result_1,failure_sign);
2303
2304 const std::string input_query_2 = "select count(*) from stdin where substring(_1 from 6 for 5) = \"aeiou\" = true;";
2305
2306 std::string s3select_result_2 = run_s3select(input_query_2,input);
2307
2308 ASSERT_EQ(s3select_result_1, s3select_result_2);
2309 }
2310
2311 TEST(TestS3selectFunctions, tuefalse_like_expressions)
2312 {
2313 std::string input, input1;
2314 size_t size = 10000;
2315 generate_csv(input, size);
2316 const std::string input_query_1 = "select count(*) from stdin where (_4 like \"%ar\") = true;";
2317
2318 std::string s3select_result_1 = run_s3select(input_query_1,input);
2319
2320 ASSERT_NE(s3select_result_1,failure_sign);
2321
2322 const std::string input_query_2 = "select count(*) from stdin where (substring(_4,char_length(_4),1) = \"r\") = true and (substring(_4,char_length(_4)-1,1) = \"a\") = true;";
2323
2324 std::string s3select_result_2 = run_s3select(input_query_2,input);
2325
2326 ASSERT_EQ(s3select_result_1, s3select_result_2);
2327
2328 generate_csv_like(input1, size);
2329
2330 const std::string input_query_3 = "select count(*) from stdin where (_1 like \"%aeio%\") = true;";
2331
2332 std::string s3select_result_3 = run_s3select(input_query_3,input1);
2333
2334 ASSERT_NE(s3select_result_3,failure_sign);
2335
2336 const std::string input_query_4 = "select count(*) from stdin where (substring(_1,4,4) = \"aeio\") = true;";
2337
2338 std::string s3select_result_4 = run_s3select(input_query_4,input1);
2339
2340 ASSERT_EQ(s3select_result_3, s3select_result_4);
2341
2342 const std::string input_query_5 = "select count(*) from stdin where (_1 like \"%r[r-s]\") = true;";
2343
2344 std::string s3select_result_5 = run_s3select(input_query_5,input);
2345
2346 ASSERT_NE(s3select_result_5,failure_sign);
2347
2348 const std::string input_query_6 = "select count(*) from stdin where (substring(_1,char_length(_1),1) between \"r\" and \"s\") = true and (substring(_1,char_length(_1)-1,1) = \"r\") = true;";
2349
2350 std::string s3select_result_6 = run_s3select(input_query_6,input);
2351
2352 ASSERT_EQ(s3select_result_5, s3select_result_6);
2353
2354 const std::string input_query_7 = "select count(*) from stdin where (_1 like \"%br_\") = true;";
2355
2356 std::string s3select_result_7 = run_s3select(input_query_7,input);
2357
2358 ASSERT_NE(s3select_result_7,failure_sign);
2359
2360 const std::string input_query_8 = "select count(*) from stdin where (substring(_1,char_length(_1)-1,1) = \"r\") = true and (substring(_1,char_length(_1)-2,1) = \"b\") = true;";
2361
2362 std::string s3select_result_8 = run_s3select(input_query_8,input);
2363
2364 ASSERT_EQ(s3select_result_7, s3select_result_8);
2365 }
2366
2367 TEST(TestS3selectFunctions, truefalse_coalesce_expressions)
2368 {
2369 std::string input;
2370 size_t size = 10000;
2371 generate_rand_columns_csv(input, size);
2372 const std::string input_query_1 = "select count(*) from s3object where char_length(_3)>2 and char_length(_4)>2 = true and cast(substring(_3,1,2) as int) = cast(substring(_4,1,2) as int) = true;";
2373
2374 std::string s3select_result_1 = run_s3select(input_query_1,input);
2375
2376 ASSERT_NE(s3select_result_1,failure_sign);
2377
2378 const std::string input_query_2 = "select count(*) from s3object where cast(_3 as int)>99 = true and cast(_4 as int)>99 = true and (coalesce(nullif(cast(substring(_3,1,2) as int),cast(substring(_4,1,2) as int)),7) = 7) = true;";
2379
2380 std::string s3select_result_2 = run_s3select(input_query_2,input);
2381
2382 ASSERT_EQ(s3select_result_1, s3select_result_2);
2383 }
2384
2385 TEST(TestS3selectFunctions, truefalse_in_expressions)
2386 {
2387 std::string input;
2388 size_t size = 10000;
2389 generate_rand_columns_csv(input, size);
2390 const std::string input_query_1 = "select int(_1) from s3object where (int(_1) in(1)) = true;";
2391
2392 std::string s3select_result_1 = run_s3select(input_query_1,input);
2393
2394 ASSERT_NE(s3select_result_1,failure_sign);
2395
2396 const std::string input_query_2 = "select int(_1) from s3object where int(_1) = 1 = true;";
2397
2398 std::string s3select_result_2 = run_s3select(input_query_2,input);
2399
2400 ASSERT_EQ(s3select_result_1, s3select_result_2);
2401
2402 const std::string input_query_7 = "select int(_2) from s3object where (int(_2)*2 in(int(_3)*2,int(_4)*3,int(_5)*5)) = true;";
2403
2404 std::string s3select_result_7 = run_s3select(input_query_7,input);
2405
2406 ASSERT_NE(s3select_result_7,failure_sign);
2407
2408 const std::string input_query_8 = "select int(_2) from s3object where int(_2)*2 = int(_3)*2 = true or int(_2)*2 = int(_4)*3 = true or int(_2)*2 = int(_5)*5 = true;";
2409
2410 std::string s3select_result_8 = run_s3select(input_query_8,input);
2411
2412 ASSERT_EQ(s3select_result_7, s3select_result_8);
2413
2414 const std::string input_query_9 = "select int(_1) from s3object where character_length(_1) = 2 = true and (substring(_1,2,1) in (\"3\")) = true;";
2415
2416 std::string s3select_result_9 = run_s3select(input_query_9,input);
2417
2418 ASSERT_NE(s3select_result_9,failure_sign);
2419
2420 const std::string input_query_10 = "select int(_1) from s3object where (_1 like \"_3\") = true;";
2421
2422 std::string s3select_result_10 = run_s3select(input_query_10,input);
2423
2424 ASSERT_EQ(s3select_result_9, s3select_result_10);
2425 }
2426
2427 TEST(TestS3selectFunctions, truefalse_alias_expressions)
2428 {
2429 std::string input;
2430 size_t size = 100;
2431 generate_rand_columns_csv(input, size);
2432 const std::string input_query_1 = "select (int(_1) > int(_2)) as a1 from s3object where a1 = true ;";
2433
2434 std::string s3select_result_1 = run_s3select(input_query_1,input);
2435
2436 ASSERT_NE(s3select_result_1,failure_sign);
2437
2438 const std::string input_query_2 = "select (int(_1) > int(_2)) from s3object where int(_1) > int(_2) = true;";
2439
2440 std::string s3select_result_2 = run_s3select(input_query_2,input);
2441
2442 ASSERT_EQ(s3select_result_1, s3select_result_2);
2443 }
2444 TEST(TestS3selectFunctions, charlength)
2445 {
2446 test_single_column_single_row( "select char_length(\"abcde\") from stdin;","5\n");
2447 }
2448
2449 TEST(TestS3selectFunctions, characterlength)
2450 {
2451 test_single_column_single_row( "select character_length(\"abcde\") from stdin;","5\n");
2452 }
2453
2454 TEST(TestS3selectFunctions, emptystring)
2455 {
2456 test_single_column_single_row( "select char_length(\"\") from stdin;","0\n");
2457 }
2458
2459 TEST(TestS3selectFunctions, lower)
2460 {
2461 test_single_column_single_row( "select lower(\"ABcD12#$e\") from stdin;","abcd12#$e\n");
2462 }
2463
2464 TEST(TestS3selectFunctions, upper)
2465 {
2466 test_single_column_single_row( "select upper(\"abCD12#$e\") from stdin;","ABCD12#$E\n");
2467 }
2468
2469 TEST(TestS3selectFunctions, mod)
2470 {
2471 test_single_column_single_row( "select 5%2 from stdin;","1\n");
2472 }
2473
2474 TEST(TestS3selectFunctions, modzero)
2475 {
2476 test_single_column_single_row( "select 0%2 from stdin;","0\n");
2477 }
2478
2479 TEST(TestS3selectFunctions, nullif)
2480 {
2481 test_single_column_single_row( "select nullif(5,3) from stdin;","5\n");
2482 }
2483
2484 TEST(TestS3selectFunctions, nullifeq)
2485 {
2486 test_single_column_single_row( "select nullif(5,5) from stdin;","null\n");
2487 }
2488
2489 TEST(TestS3selectFunctions, nullifnull)
2490 {
2491 test_single_column_single_row( "select nullif(null,null) from stdin;","null\n");
2492 }
2493
2494 TEST(TestS3selectFunctions, nullifintnull)
2495 {
2496 test_single_column_single_row( "select nullif(7, null) from stdin;","7\n");
2497 }
2498
2499 TEST(TestS3selectFunctions, nullifintstring)
2500 {
2501 test_single_column_single_row( "select nullif(5, \"hello\") from stdin;","5\n");
2502 }
2503
2504 TEST(TestS3selectFunctions, nullifstring)
2505 {
2506 test_single_column_single_row( "select nullif(\"james\",\"bond\") from stdin;","james\n");
2507 }
2508
2509 TEST(TestS3selectFunctions, nullifeqstring)
2510 {
2511 test_single_column_single_row( "select nullif(\"redhat\",\"redhat\") from stdin;","null\n");
2512 }
2513
2514 TEST(TestS3selectFunctions, nullifnumericeq)
2515 {
2516 test_single_column_single_row( "select nullif(1, 1.0) from stdin;","null\n");
2517 }
2518
2519 TEST(TestS3selectFunctions, nulladdition)
2520 {
2521 test_single_column_single_row( "select 1 + null from stdin;","null\n");
2522 }
2523
2524 TEST(TestS3selectFunctions, isnull)
2525 {
2526 test_single_column_single_row( "select \"true\" from stdin where nullif(1,1) is null;" ,"true\n");
2527 }
2528
2529 TEST(TestS3selectFunctions, isnullnot)
2530 {
2531 test_single_column_single_row( "select \"true\" from stdin where not nullif(1,2) is null;" ,"true\n");
2532 }
2533
2534 TEST(TestS3selectFunctions, isnull1)
2535 {
2536 test_single_column_single_row( "select \"true\" from stdin where 7 + null is null;" ,"true\n");
2537 }
2538
2539 TEST(TestS3selectFunctions, isnull2)
2540 {
2541 test_single_column_single_row( "select \"true\" from stdin where null + 7 is null;" ,"true\n");
2542 }
2543
2544 TEST(TestS3selectFunctions, isnull3)
2545 {
2546 test_single_column_single_row( "select \"true\" from stdin where (null > 1) is null;" ,"true\n");
2547 }
2548
2549 TEST(TestS3selectFunctions, isnull4)
2550 {
2551 test_single_column_single_row( "select \"true\" from stdin where (1 <= null) is null;" ,"true\n");
2552 }
2553
2554 TEST(TestS3selectFunctions, isnull5)
2555 {
2556 test_single_column_single_row( "select \"true\" from stdin where (null > 2 and 1 = 0) is not null;" ,"true\n");
2557 }
2558
2559 TEST(TestS3selectFunctions, isnull6)
2560 {
2561 test_single_column_single_row( "select \"true\" from stdin where (null>2 and 2>1) is null;" ,"true\n");
2562 }
2563
2564 TEST(TestS3selectFunctions, isnull7)
2565 {
2566 test_single_column_single_row( "select \"true\" from stdin where (null>2 or null<=3) is null;" ,"true\n");
2567 }
2568
2569 TEST(TestS3selectFunctions, isnull8)
2570 {
2571 test_single_column_single_row( "select \"true\" from stdin where (5<4 or null<=3) is null;" ,"true\n");
2572 }
2573
2574 TEST(TestS3selectFunctions, isnull9)
2575 {
2576 test_single_column_single_row( "select \"true\" from stdin where (null<=3 or 5<3) is null;" ,"true\n");
2577 }
2578
2579 TEST(TestS3selectFunctions, isnull10)
2580 {
2581 test_single_column_single_row( "select \"true\" from stdin where (null<=3 or 5>3) ;" ,"true\n");
2582 }
2583
2584 TEST(TestS3selectFunctions, nullnot)
2585 {
2586 test_single_column_single_row( "select \"true\" from stdin where not (null>0 and 7<3) ;" ,"true\n");
2587 }
2588
2589 TEST(TestS3selectFunctions, nullnot1)
2590 {
2591 test_single_column_single_row( "select \"true\" from stdin where not (null>0 or 4>3) and (7<1) ;" ,"true\n");
2592 }
2593
2594 TEST(TestS3selectFunctions, isnull11)
2595 {
2596 test_single_column_single_row( "select \"true\" from stdin where (5>3 or null<1) ;" ,"true\n");
2597 }
2598
2599 TEST(TestS3selectFunctions, likeop)
2600 {
2601 test_single_column_single_row( "select \"true\" from stdin where \"qwertyabcde\" like \"%abcde\";" ,"true\n");
2602 }
2603
2604 TEST(TestS3selectFunctions, likeopfalse)
2605 {
2606 test_single_column_single_row( "select \"true\" from stdin where not \"qwertybcde\" like \"%abcde\";" ,"true\n");
2607 }
2608
2609 TEST(TestS3selectFunctions, likeop1)
2610 {
2611 test_single_column_single_row( "select \"true\" from stdin where \"qwertyabcdeqwerty\" like \"%abcde%\";" ,"true\n");
2612 }
2613
2614 TEST(TestS3selectFunctions, likeop1false)
2615 {
2616 test_single_column_single_row( "select \"true\" from stdin where not \"qwertyabcdqwerty\" like \"%abcde%\";" ,"true\n");
2617 }
2618
2619 TEST(TestS3selectFunctions, likeop2)
2620 {
2621 test_single_column_single_row( "select \"true\" from stdin where \"abcdeqwerty\" like \"abcde%\";" ,"true\n");
2622 }
2623
2624 TEST(TestS3selectFunctions, likeop2false)
2625 {
2626 test_single_column_single_row( "select \"true\" from stdin where not \"abdeqwerty\" like \"abcde%\";" ,"true\n");
2627 }
2628
2629 TEST(TestS3selectFunctions, likeop6)
2630 {
2631 test_single_column_single_row( "select \"true\" from stdin where \"abqwertyde\" like \"ab%de\";" ,"true\n");
2632 }
2633
2634 TEST(TestS3selectFunctions, likeop3false)
2635 {
2636 test_single_column_single_row( "select \"true\" from stdin where not \"aabcde\" like \"_bcde\";" ,"true\n");
2637 }
2638
2639 TEST(TestS3selectFunctions, likeop3mix)
2640 {
2641 test_single_column_single_row( "select \"true\" from stdin where \"aabbccdef\" like \"_ab%\";" ,"true\n");
2642 }
2643
2644 TEST(TestS3selectFunctions, likeop4mix)
2645 {
2646 test_single_column_single_row( "select \"true\" from stdin where \"aabbccdef\" like \"%de_\";" ,"true\n");
2647 }
2648
2649 TEST(TestS3selectFunctions, likeop4)
2650 {
2651 test_single_column_single_row( "select \"true\" from stdin where \"abcde\" like \"abc_e\";" ,"true\n");
2652 }
2653
2654 TEST(TestS3selectFunctions, likeop4false)
2655 {
2656 test_single_column_single_row( "select \"true\" from stdin where not \"abcccddyddyde\" like \"abc_e\";" ,"true\n");
2657 }
2658
2659 TEST(TestS3selectFunctions, likeop5)
2660 {
2661 test_single_column_single_row( "select \"true\" from stdin where \"ebcde\" like \"[d-f]bcde\";" ,"true\n");
2662 }
2663
2664 TEST(TestS3selectFunctions, likeop5false)
2665 {
2666 test_single_column_single_row( "select \"true\" from stdin where not \"abcde\" like \"[d-f]bcde\";" ,"true\n");
2667 }
2668
2669 TEST(TestS3selectFunctions, likeopdynamic)
2670 {
2671 test_single_column_single_row( "select \"true\" from stdin where \"abcde\" like substring(\"abcdefg\",1,5);" ,"true\n");
2672 }
2673
2674 TEST(TestS3selectFunctions, likeop5not)
2675 {
2676 test_single_column_single_row( "select \"true\" from stdin where \"abcde\" like \"[^d-f]bcde\";" ,"true\n");
2677 }
2678
2679 TEST(TestS3selectFunctions, likeop7)
2680 {
2681 test_single_column_single_row( "select \"true\" from stdin where \"qwertyabcde\" like \"%%%%abcde\";" ,"true\n");
2682 }
2683
2684 TEST(TestS3selectFunctions, likeop8beginning)
2685 {
2686 test_single_column_single_row( "select \"true\" from stdin where \"abcde\" like \"[abc]%\";" ,"true\n");
2687 }
2688
2689 TEST(TestS3selectFunctions, likeop8false)
2690 {
2691 test_single_column_single_row( "select \"true\" from stdin where not \"dabc\" like \"[abc]%\";" ,"true\n");
2692 }
2693
2694 TEST(TestS3selectFunctions, likeop8end)
2695 {
2696 test_single_column_single_row( "select \"true\" from stdin where \"xyza\" like \"%[abc]\";" ,"true\n");
2697 }
2698
2699 TEST(TestS3selectFunctions, inoperator)
2700 {
2701 test_single_column_single_row( "select \"true\" from stdin where \"a\" in (\"b\", \"a\");" ,"true\n");
2702 }
2703
2704 TEST(TestS3selectFunctions, inoperatorfalse)
2705 {
2706 test_single_column_single_row( "select \"true\" from stdin where not \"a\" in (\"b\", \"c\");" ,"true\n");
2707 }
2708
2709 TEST(TestS3selectFunctions, inoperatormore)
2710 {
2711 test_single_column_single_row( "select \"true\" from stdin where \"a\" in (\"b\", \"a\", \"d\", \"e\", \"f\");" ,"true\n");
2712 }
2713
2714 TEST(TestS3selectFunctions, inoperatormixtype)
2715 {
2716 test_single_column_single_row( "select \"true\" from stdin where 10 in (5.0*2.0, 12+1, 9+1.2, 22/2, 12-3);" ,"true\n");
2717 }
2718
2719 TEST(TestS3selectFunctions, mix)
2720 {
2721 test_single_column_single_row( "select \"true\" from stdin where \"abcde\" like \"abc_e\" and 10 in (5.0*2.0, 12+1) and nullif(2,2) is null;" ,"true\n");
2722 }
2723
2724 TEST(TestS3selectFunctions, case_when_then_else)
2725 {
2726 test_single_column_single_row( "select case when (1+1+1*1=(2+1)*3) then \"case_1_1\" when ((4*3)=(12)) then \"case_1_2\" else \"case_else_1\" end , case when 1+1*7=(2+1)*3 then \"case_2_1\" when ((4*3)=(12)+1) then \"case_2_2\" else \"case_else_2\" end from stdin where (3*3=9);" ,"case_1_2,case_else_2\n");
2727 }
2728
2729 TEST(TestS3selectFunctions, simple_case_when)
2730 {
2731 test_single_column_single_row( "select case 2+1 when (3+4) then \"case_1_1\" when 3 then \"case_3\" else \"case_else_1\" end from stdin;","case_3\n");
2732 }
2733
2734 TEST(TestS3selectFunctions, nested_case)
2735 {
2736 test_single_column_single_row( "select case when ((3+4) = (7 *1)) then \"case_1_1\" else \"case_2_2\" end, case 1+3 when 2+3 then \"case_1_2\" else \"case_2_1\" end from stdin where (3*3 = 9);","case_1_1,case_2_1\n");
2737 }
2738
2739 TEST(TestS3selectFunctions, substr11)
2740 {
2741 test_single_column_single_row( "select substring(\"01234567890\",2*0+1,1.53*0+3) from stdin ;" ,"012\n");
2742 }
2743
2744 TEST(TestS3selectFunctions, substr12)
2745 {
2746 test_single_column_single_row( "select substring(\"01234567890\",2*0+1,1+2.0) from stdin ;" ,"012\n");
2747 }
2748
2749 TEST(TestS3selectFunctions, substr13)
2750 {
2751 test_single_column_single_row( "select substring(\"01234567890\",2.5*2+1,1+2) from stdin ;" ,"567\n");
2752 }
2753
2754 TEST(TestS3selectFunctions, substr14)
2755 {
2756 test_single_column_single_row( "select substring(\"123456789\",0) from stdin ;" ,"123456789\n");
2757 }
2758
2759 TEST(TestS3selectFunctions, substr15)
2760 {
2761 test_single_column_single_row( "select substring(\"123456789\",-4) from stdin ;" ,"123456789\n");
2762 }
2763
2764 TEST(TestS3selectFunctions, substr16)
2765 {
2766 test_single_column_single_row( "select substring(\"123456789\",0,100) from stdin ;" ,"123456789\n");
2767 }
2768
2769 TEST(TestS3selectFunctions, substr17)
2770 {
2771 test_single_column_single_row( "select substring(\"12345\",0,5) from stdin ;" ,"1234\n");
2772 }
2773
2774 TEST(TestS3selectFunctions, substr18)
2775 {
2776 test_single_column_single_row( "select substring(\"12345\",-1,5) from stdin ;" ,"123\n");
2777 }
2778
2779 TEST(TestS3selectFunctions, substr19)
2780 {
2781 test_single_column_single_row( "select substring(\"123456789\" from 0) from stdin ;" ,"123456789\n");
2782 }
2783
2784 TEST(TestS3selectFunctions, substr20)
2785 {
2786 test_single_column_single_row( "select substring(\"123456789\" from -4) from stdin ;" ,"123456789\n");
2787 }
2788
2789 TEST(TestS3selectFunctions, substr21)
2790 {
2791 test_single_column_single_row( "select substring(\"123456789\" from 0 for 100) from stdin ;" ,"123456789\n");
2792 }
2793
2794 TEST(TestS3selectFunctions, substr22)
2795 {
2796 test_single_column_single_row( "select \"true\" from stdin where 5 = cast(substring(\"523\",1,1) as int);" ,"true\n");
2797 }
2798
2799 TEST(TestS3selectFunctions, substr23)
2800 {
2801 test_single_column_single_row( "select \"true\" from stdin where cast(substring(\"523\",1,1) as int) > cast(substring(\"123\",1,1) as int) ;" ,"true\n");
2802 }
2803
2804 TEST(TestS3selectFunctions, coalesce)
2805 {
2806 test_single_column_single_row( "select coalesce(5,3) from stdin;","5\n");
2807 }
2808
2809 TEST(TestS3selectFunctions, coalesceallnull)
2810 {
2811 test_single_column_single_row( "select coalesce(nullif(5,5),nullif(1,1.0)) from stdin;","null\n");
2812 }
2813
2814 TEST(TestS3selectFunctions, coalesceanull)
2815 {
2816 test_single_column_single_row( "select coalesce(nullif(5,5),nullif(1,1.0),2) from stdin;","2\n");
2817 }
2818
2819 TEST(TestS3selectFunctions, coalescewhere)
2820 {
2821 test_single_column_single_row( "select \"true\" from stdin where coalesce(nullif(7.0,7),nullif(4,4.0),6) = 6;" ,"true\n");
2822 }
2823
2824 TEST(TestS3selectFunctions, castint)
2825 {
2826 test_single_column_single_row( "select cast(5.123 as int) from stdin ;" ,"5\n");
2827 }
2828
2829 TEST(TestS3selectFunctions, castfloat)
2830 {
2831 test_single_column_single_row( "select cast(1.234 as float) from stdin ;" ,"1.234\n");
2832 }
2833
2834 TEST(TestS3selectFunctions, castfloatoperation)
2835 {
2836 test_single_column_single_row( "select cast(1.234 as float) + cast(1.235 as float) from stdin ;" ,"2.4690000000000003\n");
2837 }
2838
2839 TEST(TestS3selectFunctions, caststring)
2840 {
2841 test_single_column_single_row( "select cast(1234 as string) from stdin ;" ,"1234\n");
2842 }
2843
2844 TEST(TestS3selectFunctions, caststring1)
2845 {
2846 test_single_column_single_row( "select cast('12hddd' as int) from stdin ;" ,"#failure#","extra characters after the number");
2847 }
2848
2849 TEST(TestS3selectFunctions, caststring2)
2850 {
2851 test_single_column_single_row( "select cast('124' as int) + 1 from stdin ;" ,"125\n");
2852 }
2853
2854 TEST(TestS3selectFunctions, castsubstr)
2855 {
2856 test_single_column_single_row( "select substring(cast(cast(\"1234567\" as int) as string),2,2) from stdin ;" ,"23\n");
2857 }
2858
2859 TEST(TestS3selectFunctions, casttimestamp)
2860 {
2861 test_single_column_single_row( "select cast('2010-01-15T13:30:10Z' as timestamp) from stdin ;" ,"2010-01-15T13:30:10Z\n");
2862 }
2863
2864 TEST(TestS3selectFunctions, castdateadd)
2865 {
2866 test_single_column_single_row( "select date_add(day, 2, cast('2010-01-15T13:30:10Z' as timestamp)) from stdin ;" ,"2010-01-17T13:30:10Z\n");
2867 }
2868
2869 TEST(TestS3selectFunctions, castdatediff)
2870 {
2871 test_single_column_single_row( "select date_diff(year,cast('2010-01-15T13:30:10Z' as timestamp), cast('2020-01-15T13:30:10Z' as timestamp)) from stdin ;" ,"10\n");
2872 }
2873
2874 TEST(TestS3selectFunctions, trim)
2875 {
2876 test_single_column_single_row( "select trim(\" \twelcome\t \") from stdin ;" ,"\twelcome\t\n");
2877 }
2878
2879 TEST(TestS3selectFunctions, trim1)
2880 {
2881 test_single_column_single_row( "select trim(\" foobar \") from stdin ;" ,"foobar\n");
2882 }
2883
2884 TEST(TestS3selectFunctions, trim2)
2885 {
2886 test_single_column_single_row( "select trim(trailing from \" foobar \") from stdin ;" ," foobar\n");
2887 }
2888
2889 TEST(TestS3selectFunctions, trim3)
2890 {
2891 test_single_column_single_row( "select trim(leading from \" foobar \") from stdin ;" ,"foobar \n");
2892 }
2893
2894 TEST(TestS3selectFunctions, trim4)
2895 {
2896 test_single_column_single_row( "select trim(both from \" foobar \") from stdin ;" ,"foobar\n");
2897 }
2898
2899 TEST(TestS3selectFunctions, trim5)
2900 {
2901 test_single_column_single_row( "select trim(from \" foobar \") from stdin ;" ,"foobar\n");
2902 }
2903
2904 TEST(TestS3selectFunctions, trim6)
2905 {
2906 test_single_column_single_row( "select trim(both \"12\" from \"1112211foobar22211122\") from stdin ;" ,"foobar\n");
2907 }
2908
2909 TEST(TestS3selectFunctions, trim7)
2910 {
2911 test_single_column_single_row( "select substring(trim(both from ' foobar '),2,3) from stdin ;" ,"oob\n");
2912 }
2913
2914 TEST(TestS3selectFunctions, trim8)
2915 {
2916 test_single_column_single_row( "select substring(trim(both '12' from '1112211foobar22211122'),1,6) from stdin ;" ,"foobar\n");
2917 }
2918
2919 TEST(TestS3selectFunctions, trim9)
2920 {
2921 test_single_column_single_row( "select cast(trim(both \"12\" from \"111221134567822211122\") as int) + 5 from stdin ;" ,"345683\n");
2922 }
2923
2924 TEST(TestS3selectFunctions, trimefalse)
2925 {
2926 test_single_column_single_row( "select cast(trim(both from \"12\" \"111221134567822211122\") as int) + 5 from stdin ;" ,"#failure#","");
2927 }
2928
2929 TEST(TestS3selectFunctions, trim10)
2930 {
2931 test_single_column_single_row( "select trim(trim(leading from \" foobar \")) from stdin ;" ,"foobar\n");
2932 }
2933
2934 TEST(TestS3selectFunctions, trim11)
2935 {
2936 test_single_column_single_row( "select trim(trailing from trim(leading from \" foobar \")) from stdin ;" ,"foobar\n");
2937 }
2938
2939 TEST(TestS3selectFunctions, likescape)
2940 {
2941 test_single_column_single_row("select \"true\" from stdin where \"abc_defgh\" like \"abc$_defgh\" escape \"$\";","true\n");
2942 test_single_column_single_row("select \"true\" from s3object where \"j_kerhai\" like \"j#_%\" escape \"#\";","true\n");
2943 test_single_column_single_row("select \"true\" from s3object where \"jok_ai\" like \"%#_ai\" escape \"#\";","true\n");
2944 test_single_column_single_row("select \"true\" from s3object where \"jo_aibc\" like \"%#_ai%\" escape \"#\";","true\n");
2945 test_single_column_single_row("select \"true\" from s3object where \"jok%abc\" like \"jok$%abc\" escape \"$\";","true\n");
2946 test_single_column_single_row("select \"true\" from s3object where \"ab%%a\" like \"ab$%%a\" escape \"$\";","true\n");
2947 test_single_column_single_row("select \"true\" from s3object where \"_a_\" like \"=_a=_\" escape \"=\";","true\n");
2948 test_single_column_single_row("select \"true\" from s3object where \"abc#efgh\" like \"abc##efgh\" escape \"#\";","true\n");
2949 test_single_column_single_row("select \"true\" from s3object where \"%abs%\" like \"#%abs#%\" escape \"#\";","true\n");
2950 test_single_column_single_row("select \"true\" from s3object where \"abc##efgh\" like \"abc####efgh\" escape \"#\";","true\n");
2951 }
2952
2953 TEST(TestS3selectFunctions, likescapedynamic)
2954 {
2955 test_single_column_single_row( "select \"true\" from s3object where \"abc#efgh\" like substring(\"abc##efghi\",1,9) escape \"#\";" ,"true\n");
2956 test_single_column_single_row( "select \"true\" from s3object where \"abcdefgh\" like substring(\"abcd%abc\",1,5);" ,"true\n");
2957 test_single_column_single_row( "select \"true\" from s3object where substring(\"abcde\",1,5) like \"abcd_\" ;" ,"true\n");
2958 test_single_column_single_row( "select \"true\" from s3object where substring(\"abcde\",1,5) like substring(\"abcd_ab\",1,5) ;" ,"true\n");
2959 }
2960
2961 TEST(TestS3selectFunctions, test_escape_expressions)
2962 {
2963 std::string input, input1;
2964 size_t size = 10000;
2965 generate_csv_escape(input, size);
2966 const std::string input_query_1 = "select count(*) from stdin where _1 like \"%_ar\" escape \"%\";";
2967
2968 std::string s3select_result_1 = run_s3select(input_query_1,input);
2969
2970 ASSERT_NE(s3select_result_1,failure_sign);
2971
2972 const std::string input_query_2 = "select count(*) from stdin where substring(_1,char_length(_1),1) = \"r\" and substring(_1,char_length(_1)-1,1) = \"a\" and substring(_1,char_length(_1)-2,1) = \"_\";";
2973
2974 std::string s3select_result_2 = run_s3select(input_query_2,input);
2975
2976 ASSERT_EQ(s3select_result_1, s3select_result_2);
2977
2978 const std::string input_query_3 = "select count(*) from stdin where _2 like \"%aeio$_\" escape \"$\";";
2979
2980 std::string s3select_result_3 = run_s3select(input_query_3,input);
2981
2982 ASSERT_NE(s3select_result_3,failure_sign);
2983
2984 const std::string input_query_4 = "select count(*) from stdin where substring(_2,1,5) = \"aeio_\";";
2985
2986 std::string s3select_result_4 = run_s3select(input_query_4,input);
2987
2988 ASSERT_EQ(s3select_result_3, s3select_result_4);
2989 }
2990
2991 void generate_csv_multirow(std::string& out) {
2992 // schema is: int, float, string, string
2993 std::stringstream ss;
2994 ss << "1,42926,7334,5.5,Brandise,Letsou,Brandise.Letsou@yopmail.com,worker,2020-10-26T11:21:30.397Z" << std::endl;
2995 ss << "2,21169,3648,9.0,Zaria,Weinreb,Zaria.Weinreb@yopmail.com,worker,2009-12-02T01:22:45.8327+09:45" << std::endl;
2996 ss << "3,35581,9091,2.1,Bibby,Primalia,Bibby.Primalia@yopmail.com,doctor,2001-02-27T23:18:23.446633-12:00" << std::endl;
2997 ss << "4,38388,7345,4.7,Damaris,Arley,Damaris.Arley@yopmail.com,firefighter,1995-08-24T01:40:00+12:30" << std::endl;
2998 ss << "5,42802,6464,7.0,Georgina,Georas,Georgina.Georas@yopmail.com,worker,2013-01-30T05:27:59.2Z" << std::endl;
2999 ss << "6,45582,5863,0.1,Kelly,Hamil,Kelly.Hamil@yopmail.com,police officer,1998-03-31T17:25-01:05" << std::endl;
3000 ss << "7,8548,7665,3.6,Claresta,Flita,Claresta.Flita@yopmail.com,doctor,2007-10-10T22:00:30Z" << std::endl;
3001 ss << "8,22633,528,5.3,Bibby,Virgin,Bibby.Virgin@yopmail.com,developer,2020-06-30T11:07:01.23323-00:30" << std::endl;
3002 ss << "9,38439,5645,2.8,Mahalia,Aldric,Mahalia.Aldric@yopmail.com,doctor,2019-04-20T20:21:22.23+05:15" << std::endl;
3003 ss << "10,6611,7287,1.0,Pamella,Sibyls,Pamella.Sibyls@yopmail.com,police officer,2000-09-13T14:41Z" << std::endl;
3004 out = ss.str();
3005 }
3006
3007 TEST(TestS3selectFunctions, nested_query_single_row_result)
3008 {
3009 std::string input_csv, input_query, expected_res;
3010 generate_csv_multirow(input_csv);
3011
3012 input_query = "select to_string(to_timestamp(\'2009-09-17T17:56:06.234567Z\'), substring(\' athmywopgss-nghjkl\', 3, 10)) from stdin;";
3013 expected_res = "t5562009wopg06";
3014 std::cout << "Running query: 1" << std::endl;
3015 auto s3select_res = run_s3select(input_query);
3016 EXPECT_EQ(s3select_res, expected_res);
3017
3018 input_query = "select to_timestamp(upper(\'2009-09-17t17:56:06.234567z\')) from stdin;";
3019 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
3020 expected_res = "2009-09-17T17:56:06.234567000Z";
3021 #else
3022 expected_res = "2009-09-17T17:56:06.234567Z";
3023 #endif
3024 std::cout << "Running query: 2" << std::endl;
3025 s3select_res = run_s3select(input_query);
3026 EXPECT_EQ(s3select_res, expected_res);
3027
3028 input_query = "select count(0) from stdin where extract( year from to_timestamp(_9)) < 2010;";
3029 expected_res = "6";
3030 std::cout << "Running query: 3" << std::endl;
3031 s3select_res = run_s3select(input_query, input_csv);
3032 EXPECT_EQ(s3select_res, expected_res);
3033
3034 }
3035
3036 TEST(TestS3selectFunctions, nested_query_multirow_result)
3037 {
3038 std::string input_csv, input_query, expected_res;
3039 generate_csv_multirow(input_csv);
3040
3041 input_query = "select to_string(to_timestamp(_9), substring(\' athmywopgssMMMMdXXXXX-nghjkl\', 2, 25)) from stdin;";
3042 expected_res = "AMt11212020wopg30October26Z-397000000g11\nAMt1222009wopg45December2+09:45-832700000g1\nPMt11182001wopg23February27-12:00-446633000g11\nAMt1401995wopg00August24+12:30-0g1\nAMt5272013wopg59January30Z-200000000g5\nPMt5251998wopg00March31-01:05-0g5\nPMt1002007wopg30October10Z-0g10\nAMt1172020wopg01June30-00:30-233230000g11\nPMt8212019wopg22April20+05:15-230000000g8\nPMt2412000wopg00September13Z-0g2\n";
3043 std::cout << "Running query: 1" << std::endl;
3044 auto s3select_res = run_s3select(input_query, input_csv);
3045 EXPECT_EQ(s3select_res, expected_res);
3046
3047 input_query = "select to_timestamp(upper(lower(_9))) from stdin;";
3048 #if BOOST_DATE_TIME_POSIX_TIME_STD_CONFIG
3049 expected_res = "2020-10-26T11:21:30.397000000Z\n2009-12-02T01:22:45.832700000+09:45\n2001-02-27T23:18:23.446633000-12:00\n1995-08-24T01:40:00+12:30\n2013-01-30T05:27:59.200000000Z\n1998-03-31T17:25:00-01:05\n2007-10-10T22:00:30Z\n2020-06-30T11:07:01.233230000-00:30\n2019-04-20T20:21:22.230000000+05:15\n2000-09-13T14:41:00Z\n";
3050 #else
3051 expected_res = "2020-10-26T11:21:30.397000Z\n2009-12-02T01:22:45.832700+09:45\n2001-02-27T23:18:23.446633-12:00\n1995-08-24T01:40:00+12:30\n2013-01-30T05:27:59.200000Z\n1998-03-31T17:25:00-01:05\n2007-10-10T22:00:30Z\n2020-06-30T11:07:01.233230-00:30\n2019-04-20T20:21:22.230000+05:15\n2000-09-13T14:41:00Z\n";
3052 #endif
3053 std::cout << "Running query: 2" << std::endl;
3054 s3select_res = run_s3select(input_query, input_csv);
3055 EXPECT_EQ(s3select_res, expected_res);
3056
3057 input_query = "select count(*) from s3object where extract( year from to_timestamp(_9)) > 2010;";
3058 expected_res = "4";
3059 std::cout << "Running query: 3" << std::endl;
3060 s3select_res = run_s3select(input_query, input_csv);
3061 EXPECT_EQ(s3select_res, expected_res);
3062
3063 input_query = "select _9 from s3object where extract( year from to_timestamp(_9)) > 2010;";
3064 expected_res = "2020-10-26T11:21:30.397Z\n2013-01-30T05:27:59.2Z\n2020-06-30T11:07:01.23323-00:30\n2019-04-20T20:21:22.23+05:15\n";
3065 std::cout << "Running query: 4" << std::endl;
3066 s3select_res = run_s3select(input_query, input_csv);
3067 EXPECT_EQ(s3select_res, expected_res);
3068 }
3069
3070 TEST(TestS3selectFunctions, opserialization_expressions)
3071 {
3072 std::string input;
3073 size_t size = 10;
3074 generate_rand_columns_csv(input, size);
3075
3076 char a[5] = {'@', '#', '$', '%'};
3077 char b[4] = {'!', '^', '&', '*'};
3078 char x = a[rand() % 4];
3079 char y = b[rand() % 4];
3080
3081 const std::string input_query = "select * from s3object ;";
3082
3083 run_s3select_test_opserialization(input_query, input, &x, &y);
3084
3085 const std::string input_query_1 = "select int(_1) from s3object where nullif(_1, _2) is not null;";
3086
3087 std::string s3select_result_1 = run_s3select_opserialization_quot(input_query_1,input, true);
3088
3089 const std::string input_query_2 = "select int(_1) from s3object where int(_1) != int(_2);";
3090
3091 std::string s3select_result_2 = run_s3select(input_query_2,input);
3092
3093 std::string s3select_result_2_final = string_to_quot(s3select_result_2);
3094
3095 ASSERT_EQ(s3select_result_1, s3select_result_2_final);
3096
3097 const std::string input_query_3 = "select int(_1) from s3object where int(_1) != int(_2);";
3098
3099 std::string s3select_result_3 = run_s3select_opserialization_quot(input_query_3,input);
3100
3101 ASSERT_NE(s3select_result_1, s3select_result_3);
3102
3103 const std::string input_query_4 = "select int(_1) from s3object where nullif(_1, _2) is not null;";
3104
3105 std::string s3select_result_4 = run_s3select_opserialization_quot(input_query_4,input, true, x);
3106
3107 const std::string input_query_5 = "select int(_1) from s3object where int(_1) != int(_2);";
3108
3109 std::string s3select_result_5 = run_s3select(input_query_5,input);
3110
3111 std::string s3select_result_5_final = string_to_quot(s3select_result_5, x);
3112
3113 ASSERT_EQ(s3select_result_4, s3select_result_5_final);
3114
3115 ASSERT_NE(s3select_result_4, s3select_result_1);
3116 }
3117
3118 TEST(TestS3selectFunctions, presto_syntax_alignments)
3119 {
3120 /*
3121 * the purpose of this test is to compare 2 queries with different syntax but with the same semantics
3122 * differences are case-insensitive, table-alias, semicolon at the end-of-statement
3123 */
3124
3125 std::string input;
3126 size_t size = 10000;
3127
3128 generate_rand_csv(input, size);
3129 std::string input_for_presto = input;
3130
3131 const std::string input_query = "select _1,_2 from s3object where _1 = _2;";
3132
3133 auto s3select_res = run_s3select(input_query, input);
3134
3135 const std::string input_presto_query = "Select t._1,t._2 fRom s3OBJECT t whEre _1 = _2";
3136
3137 auto s3select_presto_res = run_s3select(input_presto_query, input_for_presto);
3138
3139 ASSERT_EQ(s3select_res, s3select_presto_res);
3140
3141 }
3142