]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / kernels / scalar_temporal_binary.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cmath>
19#include <initializer_list>
20#include <sstream>
21
22#include "arrow/builder.h"
23#include "arrow/compute/api_scalar.h"
24#include "arrow/compute/kernels/common.h"
25#include "arrow/compute/kernels/temporal_internal.h"
26#include "arrow/util/checked_cast.h"
27#include "arrow/util/time.h"
28#include "arrow/vendored/datetime.h"
29
30namespace arrow {
31
32using internal::checked_cast;
33using internal::checked_pointer_cast;
34
35namespace compute {
36namespace internal {
37
38namespace {
39
40using arrow_vendored::date::days;
41using arrow_vendored::date::floor;
42using arrow_vendored::date::hh_mm_ss;
43using arrow_vendored::date::local_days;
44using arrow_vendored::date::local_time;
45using arrow_vendored::date::locate_zone;
46using arrow_vendored::date::sys_days;
47using arrow_vendored::date::sys_time;
48using arrow_vendored::date::time_zone;
49using arrow_vendored::date::trunc;
50using arrow_vendored::date::weekday;
51using arrow_vendored::date::weeks;
52using arrow_vendored::date::year_month_day;
53using arrow_vendored::date::year_month_weekday;
54using arrow_vendored::date::years;
55using arrow_vendored::date::zoned_time;
56using arrow_vendored::date::literals::dec;
57using arrow_vendored::date::literals::jan;
58using arrow_vendored::date::literals::last;
59using arrow_vendored::date::literals::mon;
60using arrow_vendored::date::literals::sun;
61using arrow_vendored::date::literals::thu;
62using arrow_vendored::date::literals::wed;
63using internal::applicator::ScalarBinaryNotNullStatefulEqualTypes;
64
65using DayOfWeekState = OptionsWrapper<DayOfWeekOptions>;
66using WeekState = OptionsWrapper<WeekOptions>;
67
68Status CheckTimezones(const ExecBatch& batch) {
69 const auto& timezone = GetInputTimezone(batch.values[0]);
70 for (int i = 1; i < batch.num_values(); i++) {
71 const auto& other_timezone = GetInputTimezone(batch.values[i]);
72 if (other_timezone != timezone) {
73 return Status::TypeError("Got differing time zone '", other_timezone,
74 "' for argument ", i + 1, "; expected '", timezone, "'");
75 }
76 }
77 return Status::OK();
78}
79
80template <template <typename...> class Op, typename Duration, typename InType,
81 typename OutType>
82struct TemporalBinary {
83 template <typename OptionsType, typename T = InType>
84 static enable_if_timestamp<T, Status> ExecWithOptions(KernelContext* ctx,
85 const OptionsType* options,
86 const ExecBatch& batch,
87 Datum* out) {
88 RETURN_NOT_OK(CheckTimezones(batch));
89
90 const auto& timezone = GetInputTimezone(batch.values[0]);
91 if (timezone.empty()) {
92 using ExecTemplate = Op<Duration, NonZonedLocalizer>;
93 auto op = ExecTemplate(options, NonZonedLocalizer());
94 applicator::ScalarBinaryNotNullStatefulEqualTypes<OutType, T, ExecTemplate> kernel{
95 op};
96 return kernel.Exec(ctx, batch, out);
97 } else {
98 ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
99 using ExecTemplate = Op<Duration, ZonedLocalizer>;
100 auto op = ExecTemplate(options, ZonedLocalizer{tz});
101 applicator::ScalarBinaryNotNullStatefulEqualTypes<OutType, T, ExecTemplate> kernel{
102 op};
103 return kernel.Exec(ctx, batch, out);
104 }
105 }
106
107 template <typename OptionsType, typename T = InType>
108 static enable_if_t<!is_timestamp_type<T>::value, Status> ExecWithOptions(
109 KernelContext* ctx, const OptionsType* options, const ExecBatch& batch,
110 Datum* out) {
111 using ExecTemplate = Op<Duration, NonZonedLocalizer>;
112 auto op = ExecTemplate(options, NonZonedLocalizer());
113 applicator::ScalarBinaryNotNullStatefulEqualTypes<OutType, T, ExecTemplate> kernel{
114 op};
115 return kernel.Exec(ctx, batch, out);
116 }
117
118 static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
119 const FunctionOptions* options = nullptr;
120 return ExecWithOptions(ctx, options, batch, out);
121 }
122};
123
124template <template <typename...> class Op, typename Duration, typename InType,
125 typename OutType>
126struct TemporalDayOfWeekBinary : public TemporalBinary<Op, Duration, InType, OutType> {
127 using Base = TemporalBinary<Op, Duration, InType, OutType>;
128
129 static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
130 const DayOfWeekOptions& options = DayOfWeekState::Get(ctx);
131 RETURN_NOT_OK(ValidateDayOfWeekOptions(options));
132 return Base::ExecWithOptions(ctx, &options, batch, out);
133 }
134};
135
136// ----------------------------------------------------------------------
137// Compute boundary crossings between two timestamps
138
139template <typename Duration, typename Localizer>
140struct YearsBetween {
141 YearsBetween(const FunctionOptions* options, Localizer&& localizer)
142 : localizer_(std::move(localizer)) {}
143
144 template <typename T, typename Arg0, typename Arg1>
145 T Call(KernelContext*, Arg0 arg0, Arg1 arg1, Status*) const {
146 year_month_day from(
147 floor<days>(localizer_.template ConvertTimePoint<Duration>(arg0)));
148 year_month_day to(floor<days>(localizer_.template ConvertTimePoint<Duration>(arg1)));
149 return static_cast<T>((to.year() - from.year()).count());
150 }
151
152 Localizer localizer_;
153};
154
155template <typename Duration, typename Localizer>
156struct QuartersBetween {
157 QuartersBetween(const FunctionOptions* options, Localizer&& localizer)
158 : localizer_(std::move(localizer)) {}
159
160 static int64_t GetQuarters(const year_month_day& ymd) {
161 return static_cast<int64_t>(static_cast<int32_t>(ymd.year())) * 4 + GetQuarter(ymd);
162 }
163
164 template <typename T, typename Arg0, typename Arg1>
165 T Call(KernelContext*, Arg0 arg0, Arg1 arg1, Status*) const {
166 year_month_day from_ymd(
167 floor<days>(localizer_.template ConvertTimePoint<Duration>(arg0)));
168 year_month_day to_ymd(
169 floor<days>(localizer_.template ConvertTimePoint<Duration>(arg1)));
170 int64_t from_quarters = GetQuarters(from_ymd);
171 int64_t to_quarters = GetQuarters(to_ymd);
172 return static_cast<T>(to_quarters - from_quarters);
173 }
174
175 Localizer localizer_;
176};
177
178template <typename Duration, typename Localizer>
179struct MonthsBetween {
180 MonthsBetween(const FunctionOptions* options, Localizer&& localizer)
181 : localizer_(std::move(localizer)) {}
182
183 template <typename T, typename Arg0, typename Arg1>
184 T Call(KernelContext*, Arg0 arg0, Arg1 arg1, Status*) const {
185 year_month_day from(
186 floor<days>(localizer_.template ConvertTimePoint<Duration>(arg0)));
187 year_month_day to(floor<days>(localizer_.template ConvertTimePoint<Duration>(arg1)));
188 return static_cast<T>((to.year() / to.month() - from.year() / from.month()).count());
189 }
190
191 Localizer localizer_;
192};
193
194template <typename Duration, typename Localizer>
195struct WeeksBetween {
196 using days_t = typename Localizer::days_t;
197
198 WeeksBetween(const DayOfWeekOptions* options, Localizer&& localizer)
199 : week_start_(options->week_start), localizer_(std::move(localizer)) {}
200
201 /// Adjust the day backwards to land on the start of the week.
202 days_t ToWeekStart(days_t point) const {
203 const weekday dow(point);
204 const weekday start_of_week(week_start_);
205 if (dow == start_of_week) return point;
206 const days delta = start_of_week - dow;
207 // delta is always positive and in [0, 6]
208 return point - days(7 - delta.count());
209 }
210
211 template <typename T, typename Arg0, typename Arg1>
212 T Call(KernelContext*, Arg0 arg0, Arg1 arg1, Status*) const {
213 auto from =
214 ToWeekStart(floor<days>(localizer_.template ConvertTimePoint<Duration>(arg0)));
215 auto to =
216 ToWeekStart(floor<days>(localizer_.template ConvertTimePoint<Duration>(arg1)));
217 return (to - from).count() / 7;
218 }
219
220 uint32_t week_start_;
221 Localizer localizer_;
222};
223
224template <typename Duration, typename Localizer>
225struct MonthDayNanoBetween {
226 MonthDayNanoBetween(const FunctionOptions* options, Localizer&& localizer)
227 : localizer_(std::move(localizer)) {}
228
229 template <typename T, typename Arg0, typename Arg1>
230 T Call(KernelContext*, Arg0 arg0, Arg1 arg1, Status*) const {
231 static_assert(std::is_same<T, MonthDayNanoIntervalType::MonthDayNanos>::value, "");
232 auto from = localizer_.template ConvertTimePoint<Duration>(arg0);
233 auto to = localizer_.template ConvertTimePoint<Duration>(arg1);
234 year_month_day from_ymd(floor<days>(from));
235 year_month_day to_ymd(floor<days>(to));
236 const int32_t num_months = static_cast<int32_t>(
237 (to_ymd.year() / to_ymd.month() - from_ymd.year() / from_ymd.month()).count());
238 const int32_t num_days = static_cast<int32_t>(static_cast<uint32_t>(to_ymd.day())) -
239 static_cast<int32_t>(static_cast<uint32_t>(from_ymd.day()));
240 auto from_time = static_cast<int64_t>(
241 std::chrono::duration_cast<std::chrono::nanoseconds>(from - floor<days>(from))
242 .count());
243 auto to_time = static_cast<int64_t>(
244 std::chrono::duration_cast<std::chrono::nanoseconds>(to - floor<days>(to))
245 .count());
246 const int64_t num_nanos = to_time - from_time;
247 return T{num_months, num_days, num_nanos};
248 }
249
250 Localizer localizer_;
251};
252
253template <typename Duration, typename Localizer>
254struct DayTimeBetween {
255 DayTimeBetween(const FunctionOptions* options, Localizer&& localizer)
256 : localizer_(std::move(localizer)) {}
257
258 template <typename T, typename Arg0, typename Arg1>
259 T Call(KernelContext*, Arg0 arg0, Arg1 arg1, Status*) const {
260 static_assert(std::is_same<T, DayTimeIntervalType::DayMilliseconds>::value, "");
261 auto from = localizer_.template ConvertTimePoint<Duration>(arg0);
262 auto to = localizer_.template ConvertTimePoint<Duration>(arg1);
263 const int32_t num_days =
264 static_cast<int32_t>((floor<days>(to) - floor<days>(from)).count());
265 auto from_time = static_cast<int32_t>(
266 std::chrono::duration_cast<std::chrono::milliseconds>(from - floor<days>(from))
267 .count());
268 auto to_time = static_cast<int32_t>(
269 std::chrono::duration_cast<std::chrono::milliseconds>(to - floor<days>(to))
270 .count());
271 const int32_t num_millis = to_time - from_time;
272 return DayTimeIntervalType::DayMilliseconds{num_days, num_millis};
273 }
274
275 Localizer localizer_;
276};
277
278template <typename Unit, typename Duration, typename Localizer>
279struct UnitsBetween {
280 UnitsBetween(const FunctionOptions* options, Localizer&& localizer)
281 : localizer_(std::move(localizer)) {}
282
283 template <typename T, typename Arg0, typename Arg1>
284 T Call(KernelContext*, Arg0 arg0, Arg1 arg1, Status*) const {
285 auto from = floor<Unit>(localizer_.template ConvertTimePoint<Duration>(arg0));
286 auto to = floor<Unit>(localizer_.template ConvertTimePoint<Duration>(arg1));
287 return static_cast<T>((to - from).count());
288 }
289
290 Localizer localizer_;
291};
292
293template <typename Duration, typename Localizer>
294using DaysBetween = UnitsBetween<days, Duration, Localizer>;
295
296template <typename Duration, typename Localizer>
297using HoursBetween = UnitsBetween<std::chrono::hours, Duration, Localizer>;
298
299template <typename Duration, typename Localizer>
300using MinutesBetween = UnitsBetween<std::chrono::minutes, Duration, Localizer>;
301
302template <typename Duration, typename Localizer>
303using SecondsBetween = UnitsBetween<std::chrono::seconds, Duration, Localizer>;
304
305template <typename Duration, typename Localizer>
306using MillisecondsBetween = UnitsBetween<std::chrono::milliseconds, Duration, Localizer>;
307
308template <typename Duration, typename Localizer>
309using MicrosecondsBetween = UnitsBetween<std::chrono::microseconds, Duration, Localizer>;
310
311template <typename Duration, typename Localizer>
312using NanosecondsBetween = UnitsBetween<std::chrono::nanoseconds, Duration, Localizer>;
313
314// ----------------------------------------------------------------------
315// Registration helpers
316
317template <template <typename...> class Op,
318 template <template <typename...> class OpExec, typename Duration,
319 typename InType, typename OutType, typename... Args>
320 class ExecTemplate,
321 typename OutType>
322struct BinaryTemporalFactory {
323 OutputType out_type;
324 KernelInit init;
325 std::shared_ptr<ScalarFunction> func;
326
327 template <typename... WithTypes>
328 static std::shared_ptr<ScalarFunction> Make(
329 std::string name, OutputType out_type, const FunctionDoc* doc,
330 const FunctionOptions* default_options = NULLPTR, KernelInit init = NULLPTR) {
331 DCHECK_NE(sizeof...(WithTypes), 0);
332 BinaryTemporalFactory self{
333 out_type, init,
334 std::make_shared<ScalarFunction>(name, Arity::Binary(), doc, default_options)};
335 AddTemporalKernels(&self, WithTypes{}...);
336 return self.func;
337 }
338
339 template <typename Duration, typename InType>
340 void AddKernel(InputType in_type) {
341 auto exec = ExecTemplate<Op, Duration, InType, OutType>::Exec;
342 DCHECK_OK(func->AddKernel({in_type, in_type}, out_type, std::move(exec), init));
343 }
344};
345
346const FunctionDoc years_between_doc{
347 "Compute the number of years between two timestamps",
348 ("Returns the number of year boundaries crossed from `start` to `end`.\n"
349 "That is, the difference is calculated as if the timestamps were\n"
350 "truncated to the year.\n"
351 "Null values emit null."),
352 {"start", "end"}};
353
354const FunctionDoc quarters_between_doc{
355 "Compute the number of quarters between two timestamps",
356 ("Returns the number of quarter start boundaries crossed from `start` to `end`.\n"
357 "That is, the difference is calculated as if the timestamps were\n"
358 "truncated to the quarter.\n"
359 "Null values emit null."),
360 {"start", "end"}};
361
362const FunctionDoc months_between_doc{
363 "Compute the number of months between two timestamps",
364 ("Returns the number of month boundaries crossed from `start` to `end`.\n"
365 "That is, the difference is calculated as if the timestamps were\n"
366 "truncated to the month.\n"
367 "Null values emit null."),
368 {"start", "end"}};
369
370const FunctionDoc month_day_nano_interval_between_doc{
371 "Compute the number of months, days and nanoseconds between two timestamps",
372 ("Returns the number of months, days, and nanoseconds from `start` to `end`.\n"
373 "That is, first the difference in months is computed as if both timestamps\n"
374 "were truncated to the months, then the difference between the days\n"
375 "is computed, and finally the difference between the times of the two\n"
376 "timestamps is computed as if both times were truncated to the nanosecond.\n"
377 "Null values return null."),
378 {"start", "end"}};
379
380const FunctionDoc weeks_between_doc{
381 "Compute the number of weeks between two timestamps",
382 ("Returns the number of week boundaries crossed from `start` to `end`.\n"
383 "That is, the difference is calculated as if the timestamps were\n"
384 "truncated to the week.\n"
385 "Null values emit null."),
386 {"start", "end"},
387 "DayOfWeekOptions"};
388
389const FunctionDoc day_time_interval_between_doc{
390 "Compute the number of days and milliseconds between two timestamps",
391 ("Returns the number of days and milliseconds from `start` to `end`.\n"
392 "That is, first the difference in days is computed as if both\n"
393 "timestamps were truncated to the day, then the difference between time times\n"
394 "of the two timestamps is computed as if both times were truncated to the\n"
395 "millisecond.\n"
396 "Null values return null."),
397 {"start", "end"}};
398
399const FunctionDoc days_between_doc{
400 "Compute the number of days between two timestamps",
401 ("Returns the number of day boundaries crossed from `start` to `end`.\n"
402 "That is, the difference is calculated as if the timestamps were\n"
403 "truncated to the day.\n"
404 "Null values emit null."),
405 {"start", "end"}};
406
407const FunctionDoc hours_between_doc{
408 "Compute the number of hours between two timestamps",
409 ("Returns the number of hour boundaries crossed from `start` to `end`.\n"
410 "That is, the difference is calculated as if the timestamps were\n"
411 "truncated to the hour.\n"
412 "Null values emit null."),
413 {"start", "end"}};
414
415const FunctionDoc minutes_between_doc{
416 "Compute the number of minute boundaries between two timestamps",
417 ("Returns the number of minute boundaries crossed from `start` to `end`.\n"
418 "That is, the difference is calculated as if the timestamps were\n"
419 "truncated to the minute.\n"
420 "Null values emit null."),
421 {"start", "end"}};
422
423const FunctionDoc seconds_between_doc{
424 "Compute the number of seconds between two timestamps",
425 ("Returns the number of second boundaries crossed from `start` to `end`.\n"
426 "That is, the difference is calculated as if the timestamps were\n"
427 "truncated to the second.\n"
428 "Null values emit null."),
429 {"start", "end"}};
430
431const FunctionDoc milliseconds_between_doc{
432 "Compute the number of millisecond boundaries between two timestamps",
433 ("Returns the number of millisecond boundaries crossed from `start` to `end`.\n"
434 "That is, the difference is calculated as if the timestamps were\n"
435 "truncated to the millisecond.\n"
436 "Null values emit null."),
437 {"start", "end"}};
438
439const FunctionDoc microseconds_between_doc{
440 "Compute the number of microseconds between two timestamps",
441 ("Returns the number of microsecond boundaries crossed from `start` to `end`.\n"
442 "That is, the difference is calculated as if the timestamps were\n"
443 "truncated to the microsecond.\n"
444 "Null values emit null."),
445 {"start", "end"}};
446
447const FunctionDoc nanoseconds_between_doc{
448 "Compute the number of nanoseconds between two timestamps",
449 ("Returns the number of nanosecond boundaries crossed from `start` to `end`.\n"
450 "That is, the difference is calculated as if the timestamps were\n"
451 "truncated to the nanosecond.\n"
452 "Null values emit null."),
453 {"start", "end"}};
454
455} // namespace
456
457void RegisterScalarTemporalBinary(FunctionRegistry* registry) {
458 // Temporal difference functions
459 auto years_between =
460 BinaryTemporalFactory<YearsBetween, TemporalBinary, Int64Type>::Make<
461 WithDates, WithTimestamps>("years_between", int64(), &years_between_doc);
462 DCHECK_OK(registry->AddFunction(std::move(years_between)));
463
464 auto quarters_between =
465 BinaryTemporalFactory<QuartersBetween, TemporalBinary, Int64Type>::Make<
466 WithDates, WithTimestamps>("quarters_between", int64(), &quarters_between_doc);
467 DCHECK_OK(registry->AddFunction(std::move(quarters_between)));
468
469 auto month_interval_between =
470 BinaryTemporalFactory<MonthsBetween, TemporalBinary, MonthIntervalType>::Make<
471 WithDates, WithTimestamps>("month_interval_between", month_interval(),
472 &months_between_doc);
473 DCHECK_OK(registry->AddFunction(std::move(month_interval_between)));
474
475 auto month_day_nano_interval_between =
476 BinaryTemporalFactory<MonthDayNanoBetween, TemporalBinary,
477 MonthDayNanoIntervalType>::Make<WithDates, WithTimes,
478 WithTimestamps>(
479 "month_day_nano_interval_between", month_day_nano_interval(),
480 &month_day_nano_interval_between_doc);
481 DCHECK_OK(registry->AddFunction(std::move(month_day_nano_interval_between)));
482
483 static const auto default_day_of_week_options = DayOfWeekOptions::Defaults();
484 auto weeks_between =
485 BinaryTemporalFactory<WeeksBetween, TemporalDayOfWeekBinary, Int64Type>::Make<
486 WithDates, WithTimestamps>("weeks_between", int64(), &weeks_between_doc,
487 &default_day_of_week_options, DayOfWeekState::Init);
488 DCHECK_OK(registry->AddFunction(std::move(weeks_between)));
489
490 auto day_time_interval_between =
491 BinaryTemporalFactory<DayTimeBetween, TemporalBinary, DayTimeIntervalType>::Make<
492 WithDates, WithTimes, WithTimestamps>("day_time_interval_between",
493 day_time_interval(),
494 &day_time_interval_between_doc);
495 DCHECK_OK(registry->AddFunction(std::move(day_time_interval_between)));
496
497 auto days_between =
498 BinaryTemporalFactory<DaysBetween, TemporalBinary, Int64Type>::Make<WithDates,
499 WithTimestamps>(
500 "days_between", int64(), &days_between_doc);
501 DCHECK_OK(registry->AddFunction(std::move(days_between)));
502
503 auto hours_between =
504 BinaryTemporalFactory<HoursBetween, TemporalBinary, Int64Type>::Make<
505 WithDates, WithTimes, WithTimestamps>("hours_between", int64(),
506 &hours_between_doc);
507 DCHECK_OK(registry->AddFunction(std::move(hours_between)));
508
509 auto minutes_between =
510 BinaryTemporalFactory<MinutesBetween, TemporalBinary, Int64Type>::Make<
511 WithDates, WithTimes, WithTimestamps>("minutes_between", int64(),
512 &minutes_between_doc);
513 DCHECK_OK(registry->AddFunction(std::move(minutes_between)));
514
515 auto seconds_between =
516 BinaryTemporalFactory<SecondsBetween, TemporalBinary, Int64Type>::Make<
517 WithDates, WithTimes, WithTimestamps>("seconds_between", int64(),
518 &seconds_between_doc);
519 DCHECK_OK(registry->AddFunction(std::move(seconds_between)));
520
521 auto milliseconds_between =
522 BinaryTemporalFactory<MillisecondsBetween, TemporalBinary, Int64Type>::Make<
523 WithDates, WithTimes, WithTimestamps>("milliseconds_between", int64(),
524 &milliseconds_between_doc);
525 DCHECK_OK(registry->AddFunction(std::move(milliseconds_between)));
526
527 auto microseconds_between =
528 BinaryTemporalFactory<MicrosecondsBetween, TemporalBinary, Int64Type>::Make<
529 WithDates, WithTimes, WithTimestamps>("microseconds_between", int64(),
530 &microseconds_between_doc);
531 DCHECK_OK(registry->AddFunction(std::move(microseconds_between)));
532
533 auto nanoseconds_between =
534 BinaryTemporalFactory<NanosecondsBetween, TemporalBinary, Int64Type>::Make<
535 WithDates, WithTimes, WithTimestamps>("nanoseconds_between", int64(),
536 &nanoseconds_between_doc);
537 DCHECK_OK(registry->AddFunction(std::move(nanoseconds_between)));
538}
539
540} // namespace internal
541} // namespace compute
542} // namespace arrow