]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <gtest/gtest.h> | |
19 | #include <math.h> | |
20 | #include <time.h> | |
21 | ||
22 | #include "arrow/memory_pool.h" | |
23 | #include "gandiva/precompiled/time_constants.h" | |
24 | #include "gandiva/projector.h" | |
25 | #include "gandiva/tests/test_util.h" | |
26 | #include "gandiva/tree_expr_builder.h" | |
27 | ||
28 | namespace gandiva { | |
29 | ||
30 | using arrow::boolean; | |
31 | using arrow::date32; | |
32 | using arrow::date64; | |
33 | using arrow::float32; | |
34 | using arrow::int32; | |
35 | using arrow::int64; | |
36 | using arrow::timestamp; | |
37 | ||
38 | class TestProjector : public ::testing::Test { | |
39 | public: | |
40 | void SetUp() { pool_ = arrow::default_memory_pool(); } | |
41 | ||
42 | protected: | |
43 | arrow::MemoryPool* pool_; | |
44 | }; | |
45 | ||
46 | time_t Epoch() { | |
47 | // HACK: MSVC mktime() fails on UTC times before 1970-01-01 00:00:00. | |
48 | // But it first converts its argument from local time to UTC time, | |
49 | // so we ask for 1970-01-02 to avoid failing in timezones ahead of UTC. | |
50 | struct tm y1970; | |
51 | memset(&y1970, 0, sizeof(struct tm)); | |
52 | y1970.tm_year = 70; | |
53 | y1970.tm_mon = 0; | |
54 | y1970.tm_mday = 2; | |
55 | y1970.tm_hour = 0; | |
56 | y1970.tm_min = 0; | |
57 | y1970.tm_sec = 0; | |
58 | time_t epoch = mktime(&y1970); | |
59 | if (epoch == static_cast<time_t>(-1)) { | |
60 | ARROW_LOG(FATAL) << "mktime() failed"; | |
61 | } | |
62 | // Adjust for the 24h offset above. | |
63 | return epoch - 24 * 3600; | |
64 | } | |
65 | ||
66 | int32_t MillisInDay(int32_t hh, int32_t mm, int32_t ss, int32_t millis) { | |
67 | int32_t mins = hh * 60 + mm; | |
68 | int32_t secs = mins * 60 + ss; | |
69 | ||
70 | return secs * 1000 + millis; | |
71 | } | |
72 | ||
73 | int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr, | |
74 | int32_t min, int32_t sec, int32_t millis) { | |
75 | struct tm given_ts; | |
76 | memset(&given_ts, 0, sizeof(struct tm)); | |
77 | given_ts.tm_year = (yy - 1900); | |
78 | given_ts.tm_mon = (mm - 1); | |
79 | given_ts.tm_mday = dd; | |
80 | given_ts.tm_hour = hr; | |
81 | given_ts.tm_min = min; | |
82 | given_ts.tm_sec = sec; | |
83 | ||
84 | time_t ts = mktime(&given_ts); | |
85 | if (ts == static_cast<time_t>(-1)) { | |
86 | ARROW_LOG(FATAL) << "mktime() failed"; | |
87 | } | |
88 | // time_t is an arithmetic type on both POSIX and Windows, we can simply | |
89 | // subtract to get a duration in seconds. | |
90 | return static_cast<int64_t>(ts - base_line) * 1000 + millis; | |
91 | } | |
92 | ||
93 | int32_t DaysSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr, | |
94 | int32_t min, int32_t sec, int32_t millis) { | |
95 | struct tm given_ts; | |
96 | memset(&given_ts, 0, sizeof(struct tm)); | |
97 | given_ts.tm_year = (yy - 1900); | |
98 | given_ts.tm_mon = (mm - 1); | |
99 | given_ts.tm_mday = dd; | |
100 | given_ts.tm_hour = hr; | |
101 | given_ts.tm_min = min; | |
102 | given_ts.tm_sec = sec; | |
103 | ||
104 | time_t ts = mktime(&given_ts); | |
105 | if (ts == static_cast<time_t>(-1)) { | |
106 | ARROW_LOG(FATAL) << "mktime() failed"; | |
107 | } | |
108 | // time_t is an arithmetic type on both POSIX and Windows, we can simply | |
109 | // subtract to get a duration in seconds. | |
110 | return static_cast<int32_t>(((ts - base_line) * 1000 + millis) / MILLIS_IN_DAY); | |
111 | } | |
112 | ||
113 | TEST_F(TestProjector, TestIsNull) { | |
114 | auto d0 = field("d0", date64()); | |
115 | auto t0 = field("t0", time32(arrow::TimeUnit::MILLI)); | |
116 | auto schema = arrow::schema({d0, t0}); | |
117 | ||
118 | // output fields | |
119 | auto b0 = field("isnull", boolean()); | |
120 | ||
121 | // isnull and isnotnull | |
122 | auto isnull_expr = TreeExprBuilder::MakeExpression("isnull", {d0}, b0); | |
123 | auto isnotnull_expr = TreeExprBuilder::MakeExpression("isnotnull", {t0}, b0); | |
124 | ||
125 | std::shared_ptr<Projector> projector; | |
126 | auto status = Projector::Make(schema, {isnull_expr, isnotnull_expr}, | |
127 | TestConfiguration(), &projector); | |
128 | ASSERT_TRUE(status.ok()); | |
129 | ||
130 | int num_records = 4; | |
131 | std::vector<int64_t> d0_data = {0, 100, 0, 1000}; | |
132 | auto t0_data = {0, 100, 0, 1000}; | |
133 | auto validity = {false, true, false, true}; | |
134 | auto d0_array = | |
135 | MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), d0_data, validity); | |
136 | auto t0_array = MakeArrowTypeArray<arrow::Time32Type, int32_t>( | |
137 | time32(arrow::TimeUnit::MILLI), t0_data, validity); | |
138 | ||
139 | // expected output | |
140 | auto exp_isnull = | |
141 | MakeArrowArrayBool({true, false, true, false}, {true, true, true, true}); | |
142 | auto exp_isnotnull = MakeArrowArrayBool(validity, {true, true, true, true}); | |
143 | ||
144 | // prepare input record batch | |
145 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {d0_array, t0_array}); | |
146 | ||
147 | // Evaluate expression | |
148 | arrow::ArrayVector outputs; | |
149 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
150 | EXPECT_TRUE(status.ok()); | |
151 | ||
152 | // Validate results | |
153 | EXPECT_ARROW_ARRAY_EQUALS(exp_isnull, outputs.at(0)); | |
154 | EXPECT_ARROW_ARRAY_EQUALS(exp_isnotnull, outputs.at(1)); | |
155 | } | |
156 | ||
157 | TEST_F(TestProjector, TestDate32IsNull) { | |
158 | auto d0 = field("d0", date32()); | |
159 | auto schema = arrow::schema({d0}); | |
160 | ||
161 | // output fields | |
162 | auto b0 = field("isnull", boolean()); | |
163 | ||
164 | // isnull and isnotnull | |
165 | auto isnull_expr = TreeExprBuilder::MakeExpression("isnull", {d0}, b0); | |
166 | ||
167 | std::shared_ptr<Projector> projector; | |
168 | auto status = Projector::Make(schema, {isnull_expr}, TestConfiguration(), &projector); | |
169 | ASSERT_TRUE(status.ok()); | |
170 | ||
171 | int num_records = 4; | |
172 | std::vector<int32_t> d0_data = {0, 100, 0, 1000}; | |
173 | auto validity = {false, true, false, true}; | |
174 | auto d0_array = | |
175 | MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), d0_data, validity); | |
176 | ||
177 | // expected output | |
178 | auto exp_isnull = | |
179 | MakeArrowArrayBool({true, false, true, false}, {true, true, true, true}); | |
180 | ||
181 | // prepare input record batch | |
182 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {d0_array}); | |
183 | ||
184 | // Evaluate expression | |
185 | arrow::ArrayVector outputs; | |
186 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
187 | EXPECT_TRUE(status.ok()); | |
188 | ||
189 | // Validate results | |
190 | EXPECT_ARROW_ARRAY_EQUALS(exp_isnull, outputs.at(0)); | |
191 | } | |
192 | ||
193 | TEST_F(TestProjector, TestDateTime) { | |
194 | auto field0 = field("f0", date64()); | |
195 | auto field1 = field("f1", date32()); | |
196 | auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI)); | |
197 | auto schema = arrow::schema({field0, field1, field2}); | |
198 | ||
199 | // output fields | |
200 | auto field_year = field("yy", int64()); | |
201 | auto field_month = field("mm", int64()); | |
202 | auto field_day = field("dd", int64()); | |
203 | auto field_hour = field("hh", int64()); | |
204 | auto field_date64 = field("date64", date64()); | |
205 | ||
206 | // extract year and month from date | |
207 | auto date2year_expr = | |
208 | TreeExprBuilder::MakeExpression("extractYear", {field0}, field_year); | |
209 | auto date2month_expr = | |
210 | TreeExprBuilder::MakeExpression("extractMonth", {field0}, field_month); | |
211 | ||
212 | // extract year and month from date32, cast to date64 first | |
213 | auto node_f1 = TreeExprBuilder::MakeField(field1); | |
214 | auto date32_to_date64_func = | |
215 | TreeExprBuilder::MakeFunction("castDATE", {node_f1}, date64()); | |
216 | ||
217 | auto date64_2year_func = | |
218 | TreeExprBuilder::MakeFunction("extractYear", {date32_to_date64_func}, int64()); | |
219 | auto date64_2year_expr = TreeExprBuilder::MakeExpression(date64_2year_func, field_year); | |
220 | ||
221 | auto date64_2month_func = | |
222 | TreeExprBuilder::MakeFunction("extractMonth", {date32_to_date64_func}, int64()); | |
223 | auto date64_2month_expr = | |
224 | TreeExprBuilder::MakeExpression(date64_2month_func, field_month); | |
225 | ||
226 | // extract month and day from timestamp | |
227 | auto ts2month_expr = | |
228 | TreeExprBuilder::MakeExpression("extractMonth", {field2}, field_month); | |
229 | auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day); | |
230 | ||
231 | std::shared_ptr<Projector> projector; | |
232 | auto status = Projector::Make(schema, | |
233 | {date2year_expr, date2month_expr, date64_2year_expr, | |
234 | date64_2month_expr, ts2month_expr, ts2day_expr}, | |
235 | TestConfiguration(), &projector); | |
236 | ASSERT_TRUE(status.ok()); | |
237 | ||
238 | // Create a row-batch with some sample data | |
239 | time_t epoch = Epoch(); | |
240 | int num_records = 4; | |
241 | auto validity = {true, true, true, true}; | |
242 | std::vector<int64_t> field0_data = {MillisSince(epoch, 2000, 1, 1, 5, 0, 0, 0), | |
243 | MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0), | |
244 | MillisSince(epoch, 2015, 6, 30, 20, 0, 0, 0), | |
245 | MillisSince(epoch, 2015, 7, 1, 20, 0, 0, 0)}; | |
246 | auto array0 = | |
247 | MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), field0_data, validity); | |
248 | ||
249 | std::vector<int32_t> field1_data = {DaysSince(epoch, 2000, 1, 1, 5, 0, 0, 0), | |
250 | DaysSince(epoch, 1999, 12, 31, 5, 0, 0, 0), | |
251 | DaysSince(epoch, 2015, 6, 30, 20, 0, 0, 0), | |
252 | DaysSince(epoch, 2015, 7, 1, 20, 0, 0, 0)}; | |
253 | auto array1 = | |
254 | MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), field1_data, validity); | |
255 | ||
256 | std::vector<int64_t> field2_data = {MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0), | |
257 | MillisSince(epoch, 2000, 1, 2, 5, 0, 0, 0), | |
258 | MillisSince(epoch, 2015, 7, 1, 1, 0, 0, 0), | |
259 | MillisSince(epoch, 2015, 6, 29, 23, 0, 0, 0)}; | |
260 | ||
261 | auto array2 = MakeArrowTypeArray<arrow::TimestampType, int64_t>( | |
262 | arrow::timestamp(arrow::TimeUnit::MILLI), field2_data, validity); | |
263 | ||
264 | // expected output | |
265 | // date 2 year and date 2 month for date64 | |
266 | auto exp_yy_from_date64 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity); | |
267 | auto exp_mm_from_date64 = MakeArrowArrayInt64({1, 12, 6, 7}, validity); | |
268 | ||
269 | // date 2 year and date 2 month for date32 | |
270 | auto exp_yy_from_date32 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity); | |
271 | auto exp_mm_from_date32 = MakeArrowArrayInt64({1, 12, 6, 7}, validity); | |
272 | ||
273 | // ts 2 month and ts 2 day | |
274 | auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity); | |
275 | auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity); | |
276 | ||
277 | // prepare input record batch | |
278 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2}); | |
279 | ||
280 | // Evaluate expression | |
281 | arrow::ArrayVector outputs; | |
282 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
283 | EXPECT_TRUE(status.ok()); | |
284 | ||
285 | // Validate results | |
286 | EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date64, outputs.at(0)); | |
287 | EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date64, outputs.at(1)); | |
288 | EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date32, outputs.at(2)); | |
289 | EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date32, outputs.at(3)); | |
290 | EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(4)); | |
291 | EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(5)); | |
292 | } | |
293 | ||
294 | TEST_F(TestProjector, TestTime) { | |
295 | auto field0 = field("f0", time32(arrow::TimeUnit::MILLI)); | |
296 | auto schema = arrow::schema({field0}); | |
297 | ||
298 | auto field_min = field("mm", int64()); | |
299 | auto field_hour = field("hh", int64()); | |
300 | ||
301 | // extract day and hour from time32 | |
302 | auto time2min_expr = | |
303 | TreeExprBuilder::MakeExpression("extractMinute", {field0}, field_min); | |
304 | auto time2hour_expr = | |
305 | TreeExprBuilder::MakeExpression("extractHour", {field0}, field_hour); | |
306 | ||
307 | std::shared_ptr<Projector> projector; | |
308 | auto status = Projector::Make(schema, {time2min_expr, time2hour_expr}, | |
309 | TestConfiguration(), &projector); | |
310 | ASSERT_TRUE(status.ok()); | |
311 | ||
312 | // create input data | |
313 | int num_records = 4; | |
314 | auto validity = {true, true, true, true}; | |
315 | std::vector<int32_t> field_data = { | |
316 | MillisInDay(5, 35, 25, 0), // 5:35:25 | |
317 | MillisInDay(0, 59, 0, 0), // 0:59:12 | |
318 | MillisInDay(12, 30, 0, 0), // 12:30:0 | |
319 | MillisInDay(23, 0, 0, 0) // 23:0:0 | |
320 | }; | |
321 | auto array = MakeArrowTypeArray<arrow::Time32Type, int32_t>( | |
322 | time32(arrow::TimeUnit::MILLI), field_data, validity); | |
323 | ||
324 | // expected output | |
325 | auto exp_min = MakeArrowArrayInt64({35, 59, 30, 0}, validity); | |
326 | auto exp_hour = MakeArrowArrayInt64({5, 0, 12, 23}, validity); | |
327 | ||
328 | // prepare input record batch | |
329 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array}); | |
330 | ||
331 | // Evaluate expression | |
332 | arrow::ArrayVector outputs; | |
333 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
334 | EXPECT_TRUE(status.ok()); | |
335 | ||
336 | // Validate results | |
337 | EXPECT_ARROW_ARRAY_EQUALS(exp_min, outputs.at(0)); | |
338 | EXPECT_ARROW_ARRAY_EQUALS(exp_hour, outputs.at(1)); | |
339 | } | |
340 | ||
341 | TEST_F(TestProjector, TestTimestampDiff) { | |
342 | auto f0 = field("f0", timestamp(arrow::TimeUnit::MILLI)); | |
343 | auto f1 = field("f1", timestamp(arrow::TimeUnit::MILLI)); | |
344 | auto schema = arrow::schema({f0, f1}); | |
345 | ||
346 | // output fields | |
347 | auto diff_seconds = field("ss", int32()); | |
348 | ||
349 | // get diff | |
350 | auto diff_secs_expr = | |
351 | TreeExprBuilder::MakeExpression("timestampdiffSecond", {f0, f1}, diff_seconds); | |
352 | ||
353 | auto diff_mins_expr = | |
354 | TreeExprBuilder::MakeExpression("timestampdiffMinute", {f0, f1}, diff_seconds); | |
355 | ||
356 | auto diff_hours_expr = | |
357 | TreeExprBuilder::MakeExpression("timestampdiffHour", {f0, f1}, diff_seconds); | |
358 | ||
359 | auto diff_days_expr = | |
360 | TreeExprBuilder::MakeExpression("timestampdiffDay", {f0, f1}, diff_seconds); | |
361 | ||
362 | auto diff_days_expr_with_datediff_fn = | |
363 | TreeExprBuilder::MakeExpression("datediff", {f0, f1}, diff_seconds); | |
364 | ||
365 | auto diff_weeks_expr = | |
366 | TreeExprBuilder::MakeExpression("timestampdiffWeek", {f0, f1}, diff_seconds); | |
367 | ||
368 | auto diff_months_expr = | |
369 | TreeExprBuilder::MakeExpression("timestampdiffMonth", {f0, f1}, diff_seconds); | |
370 | ||
371 | auto diff_quarters_expr = | |
372 | TreeExprBuilder::MakeExpression("timestampdiffQuarter", {f0, f1}, diff_seconds); | |
373 | ||
374 | auto diff_years_expr = | |
375 | TreeExprBuilder::MakeExpression("timestampdiffYear", {f0, f1}, diff_seconds); | |
376 | ||
377 | std::shared_ptr<Projector> projector; | |
378 | auto exprs = {diff_secs_expr, | |
379 | diff_mins_expr, | |
380 | diff_hours_expr, | |
381 | diff_days_expr, | |
382 | diff_days_expr_with_datediff_fn, | |
383 | diff_weeks_expr, | |
384 | diff_months_expr, | |
385 | diff_quarters_expr, | |
386 | diff_years_expr}; | |
387 | auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector); | |
388 | ASSERT_TRUE(status.ok()); | |
389 | ||
390 | time_t epoch = Epoch(); | |
391 | ||
392 | // 2015-09-10T20:49:42.000 | |
393 | auto start_millis = MillisSince(epoch, 2015, 9, 10, 20, 49, 42, 0); | |
394 | // 2017-03-30T22:50:59.050 | |
395 | auto end_millis = MillisSince(epoch, 2017, 3, 30, 22, 50, 59, 50); | |
396 | std::vector<int64_t> f0_data = {start_millis, end_millis, | |
397 | // 2015-09-10T20:49:42.999 | |
398 | start_millis + 999, | |
399 | // 2015-09-10T20:49:42.999 | |
400 | MillisSince(epoch, 2015, 9, 10, 20, 49, 42, 999)}; | |
401 | std::vector<int64_t> f1_data = {end_millis, start_millis, | |
402 | // 2015-09-10T20:49:42.999 | |
403 | start_millis + 999, | |
404 | // 2015-09-9T21:49:42.999 (23 hours behind) | |
405 | MillisSince(epoch, 2015, 9, 9, 21, 49, 42, 999)}; | |
406 | ||
407 | int64_t num_records = f0_data.size(); | |
408 | std::vector<bool> validity(num_records, true); | |
409 | auto array0 = MakeArrowTypeArray<arrow::TimestampType, int64_t>( | |
410 | arrow::timestamp(arrow::TimeUnit::MILLI), f0_data, validity); | |
411 | auto array1 = MakeArrowTypeArray<arrow::TimestampType, int64_t>( | |
412 | arrow::timestamp(arrow::TimeUnit::MILLI), f1_data, validity); | |
413 | ||
414 | // expected output | |
415 | std::vector<ArrayPtr> exp_output; | |
416 | exp_output.push_back( | |
417 | MakeArrowArrayInt32({48996077, -48996077, 0, -23 * 3600}, validity)); | |
418 | exp_output.push_back(MakeArrowArrayInt32({816601, -816601, 0, -23 * 60}, validity)); | |
419 | exp_output.push_back(MakeArrowArrayInt32({13610, -13610, 0, -23}, validity)); | |
420 | exp_output.push_back(MakeArrowArrayInt32({567, -567, 0, 0}, validity)); | |
421 | exp_output.push_back(MakeArrowArrayInt32({567, -567, 0, 0}, validity)); | |
422 | exp_output.push_back(MakeArrowArrayInt32({81, -81, 0, 0}, validity)); | |
423 | exp_output.push_back(MakeArrowArrayInt32({18, -18, 0, 0}, validity)); | |
424 | exp_output.push_back(MakeArrowArrayInt32({6, -6, 0, 0}, validity)); | |
425 | exp_output.push_back(MakeArrowArrayInt32({1, -1, 0, 0}, validity)); | |
426 | ||
427 | // prepare input record batch | |
428 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); | |
429 | ||
430 | // Evaluate expression | |
431 | arrow::ArrayVector outputs; | |
432 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
433 | EXPECT_TRUE(status.ok()); | |
434 | ||
435 | // Validate results | |
436 | for (uint32_t i = 0; i < exp_output.size(); i++) { | |
437 | EXPECT_ARROW_ARRAY_EQUALS(exp_output.at(i), outputs.at(i)); | |
438 | } | |
439 | } | |
440 | ||
441 | TEST_F(TestProjector, TestTimestampDiffMonth) { | |
442 | auto f0 = field("f0", timestamp(arrow::TimeUnit::MILLI)); | |
443 | auto f1 = field("f1", timestamp(arrow::TimeUnit::MILLI)); | |
444 | auto schema = arrow::schema({f0, f1}); | |
445 | ||
446 | // output fields | |
447 | auto diff_seconds = field("ss", int32()); | |
448 | ||
449 | auto diff_months_expr = | |
450 | TreeExprBuilder::MakeExpression("timestampdiffMonth", {f0, f1}, diff_seconds); | |
451 | ||
452 | std::shared_ptr<Projector> projector; | |
453 | auto status = | |
454 | Projector::Make(schema, {diff_months_expr}, TestConfiguration(), &projector); | |
455 | std::cout << status.message(); | |
456 | ASSERT_TRUE(status.ok()); | |
457 | ||
458 | time_t epoch = Epoch(); | |
459 | ||
460 | // Create a row-batch with some sample data | |
461 | std::vector<int64_t> f0_data = {MillisSince(epoch, 2019, 1, 31, 0, 0, 0, 0), | |
462 | MillisSince(epoch, 2020, 1, 31, 0, 0, 0, 0), | |
463 | MillisSince(epoch, 2020, 1, 31, 0, 0, 0, 0), | |
464 | MillisSince(epoch, 2019, 3, 31, 0, 0, 0, 0), | |
465 | MillisSince(epoch, 2020, 3, 30, 0, 0, 0, 0), | |
466 | MillisSince(epoch, 2020, 5, 31, 0, 0, 0, 0)}; | |
467 | std::vector<int64_t> f1_data = {MillisSince(epoch, 2019, 2, 28, 0, 0, 0, 0), | |
468 | MillisSince(epoch, 2020, 2, 28, 0, 0, 0, 0), | |
469 | MillisSince(epoch, 2020, 2, 29, 0, 0, 0, 0), | |
470 | MillisSince(epoch, 2019, 4, 30, 0, 0, 0, 0), | |
471 | MillisSince(epoch, 2020, 2, 29, 0, 0, 0, 0), | |
472 | MillisSince(epoch, 2020, 9, 30, 0, 0, 0, 0)}; | |
473 | int64_t num_records = f0_data.size(); | |
474 | std::vector<bool> validity(num_records, true); | |
475 | ||
476 | auto array0 = MakeArrowTypeArray<arrow::TimestampType, int64_t>( | |
477 | arrow::timestamp(arrow::TimeUnit::MILLI), f0_data, validity); | |
478 | auto array1 = MakeArrowTypeArray<arrow::TimestampType, int64_t>( | |
479 | arrow::timestamp(arrow::TimeUnit::MILLI), f1_data, validity); | |
480 | ||
481 | // expected output | |
482 | std::vector<ArrayPtr> exp_output; | |
483 | exp_output.push_back(MakeArrowArrayInt32({1, 0, 1, 1, -1, 4}, validity)); | |
484 | ||
485 | // prepare input record batch | |
486 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); | |
487 | ||
488 | // Evaluate expression | |
489 | arrow::ArrayVector outputs; | |
490 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
491 | EXPECT_TRUE(status.ok()); | |
492 | ||
493 | // Validate results | |
494 | for (uint32_t i = 0; i < exp_output.size(); i++) { | |
495 | EXPECT_ARROW_ARRAY_EQUALS(exp_output.at(i), outputs.at(i)); | |
496 | } | |
497 | } | |
498 | ||
499 | TEST_F(TestProjector, TestMonthsBetween) { | |
500 | auto f0 = field("f0", arrow::date64()); | |
501 | auto f1 = field("f1", arrow::date64()); | |
502 | auto schema = arrow::schema({f0, f1}); | |
503 | ||
504 | // output fields | |
505 | auto output = field("out", arrow::float64()); | |
506 | ||
507 | auto months_between_expr = | |
508 | TreeExprBuilder::MakeExpression("months_between", {f0, f1}, output); | |
509 | ||
510 | std::shared_ptr<Projector> projector; | |
511 | auto status = | |
512 | Projector::Make(schema, {months_between_expr}, TestConfiguration(), &projector); | |
513 | std::cout << status.message(); | |
514 | ASSERT_TRUE(status.ok()); | |
515 | ||
516 | time_t epoch = Epoch(); | |
517 | ||
518 | // Create a row-batch with some sample data | |
519 | int num_records = 4; | |
520 | auto validity = {true, true, true, true}; | |
521 | std::vector<int64_t> f0_data = {MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0), | |
522 | MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0), | |
523 | MillisSince(epoch, 1995, 3, 31, 0, 0, 0, 0), | |
524 | MillisSince(epoch, 1996, 3, 31, 0, 0, 0, 0)}; | |
525 | ||
526 | auto array0 = | |
527 | MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f0_data, validity); | |
528 | ||
529 | std::vector<int64_t> f1_data = {MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0), | |
530 | MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0), | |
531 | MillisSince(epoch, 1995, 2, 28, 0, 0, 0, 0), | |
532 | MillisSince(epoch, 1996, 2, 29, 0, 0, 0, 0)}; | |
533 | ||
534 | auto array1 = | |
535 | MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f1_data, validity); | |
536 | ||
537 | // expected output | |
538 | auto exp_output = MakeArrowArrayFloat64({1.0, -1.0, 1.0, 1.0}, validity); | |
539 | ||
540 | // prepare input record batch | |
541 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); | |
542 | ||
543 | // Evaluate expression | |
544 | arrow::ArrayVector outputs; | |
545 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
546 | EXPECT_TRUE(status.ok()); | |
547 | ||
548 | // Validate results | |
549 | EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0)); | |
550 | } | |
551 | ||
552 | TEST_F(TestProjector, TestLastDay) { | |
553 | auto f0 = field("f0", arrow::date64()); | |
554 | auto schema = arrow::schema({f0}); | |
555 | ||
556 | // output fields | |
557 | auto output = field("out", arrow::date64()); | |
558 | ||
559 | auto last_day_expr = TreeExprBuilder::MakeExpression("last_day", {f0}, output); | |
560 | ||
561 | std::shared_ptr<Projector> projector; | |
562 | auto status = Projector::Make(schema, {last_day_expr}, TestConfiguration(), &projector); | |
563 | std::cout << status.message(); | |
564 | ASSERT_TRUE(status.ok()); | |
565 | ||
566 | time_t epoch = Epoch(); | |
567 | ||
568 | // Create a row-batch with some sample data | |
569 | // Used a leap year as example. | |
570 | int num_records = 5; | |
571 | auto validity = {true, true, true, true, true}; | |
572 | std::vector<int64_t> f0_data = {MillisSince(epoch, 2016, 2, 3, 8, 20, 10, 34), | |
573 | MillisSince(epoch, 2016, 2, 29, 23, 59, 59, 59), | |
574 | MillisSince(epoch, 2016, 1, 30, 1, 15, 20, 0), | |
575 | MillisSince(epoch, 2017, 2, 3, 23, 15, 20, 0), | |
576 | MillisSince(epoch, 2015, 12, 30, 22, 50, 11, 0)}; | |
577 | ||
578 | auto array0 = | |
579 | MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f0_data, validity); | |
580 | ||
581 | std::vector<int64_t> f0_output_data = {MillisSince(epoch, 2016, 2, 29, 0, 0, 0, 0), | |
582 | MillisSince(epoch, 2016, 2, 29, 0, 0, 0, 0), | |
583 | MillisSince(epoch, 2016, 1, 31, 0, 0, 0, 0), | |
584 | MillisSince(epoch, 2017, 2, 28, 0, 0, 0, 0), | |
585 | MillisSince(epoch, 2015, 12, 31, 0, 0, 0, 0)}; | |
586 | ||
587 | // expected output | |
588 | auto exp_output = MakeArrowArrayDate64(f0_output_data, validity); | |
589 | ||
590 | // prepare input record batch | |
591 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0}); | |
592 | ||
593 | // Evaluate expression | |
594 | arrow::ArrayVector outputs; | |
595 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
596 | EXPECT_TRUE(status.ok()); | |
597 | ||
598 | // Validate results | |
599 | EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0)); | |
600 | } | |
601 | ||
602 | } // namespace gandiva |