]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/compute/api_scalar.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / api_scalar.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/compute/api_scalar.h"
19
20 #include <memory>
21 #include <sstream>
22 #include <string>
23
24 #include "arrow/array/array_base.h"
25 #include "arrow/compute/exec.h"
26 #include "arrow/compute/function_internal.h"
27 #include "arrow/compute/registry.h"
28 #include "arrow/status.h"
29 #include "arrow/type.h"
30 #include "arrow/util/checked_cast.h"
31 #include "arrow/util/logging.h"
32
33 namespace arrow {
34
35 namespace internal {
36 template <>
37 struct EnumTraits<compute::JoinOptions::NullHandlingBehavior>
38 : BasicEnumTraits<compute::JoinOptions::NullHandlingBehavior,
39 compute::JoinOptions::NullHandlingBehavior::EMIT_NULL,
40 compute::JoinOptions::NullHandlingBehavior::SKIP,
41 compute::JoinOptions::NullHandlingBehavior::REPLACE> {
42 static std::string name() { return "JoinOptions::NullHandlingBehavior"; }
43 static std::string value_name(compute::JoinOptions::NullHandlingBehavior value) {
44 switch (value) {
45 case compute::JoinOptions::NullHandlingBehavior::EMIT_NULL:
46 return "EMIT_NULL";
47 case compute::JoinOptions::NullHandlingBehavior::SKIP:
48 return "SKIP";
49 case compute::JoinOptions::NullHandlingBehavior::REPLACE:
50 return "REPLACE";
51 }
52 return "<INVALID>";
53 }
54 };
55
56 template <>
57 struct EnumTraits<TimeUnit::type>
58 : BasicEnumTraits<TimeUnit::type, TimeUnit::type::SECOND, TimeUnit::type::MILLI,
59 TimeUnit::type::MICRO, TimeUnit::type::NANO> {
60 static std::string name() { return "TimeUnit::type"; }
61 static std::string value_name(TimeUnit::type value) {
62 switch (value) {
63 case TimeUnit::type::SECOND:
64 return "SECOND";
65 case TimeUnit::type::MILLI:
66 return "MILLI";
67 case TimeUnit::type::MICRO:
68 return "MICRO";
69 case TimeUnit::type::NANO:
70 return "NANO";
71 }
72 return "<INVALID>";
73 }
74 };
75
76 template <>
77 struct EnumTraits<compute::CompareOperator>
78 : BasicEnumTraits<
79 compute::CompareOperator, compute::CompareOperator::EQUAL,
80 compute::CompareOperator::NOT_EQUAL, compute::CompareOperator::GREATER,
81 compute::CompareOperator::GREATER_EQUAL, compute::CompareOperator::LESS,
82 compute::CompareOperator::LESS_EQUAL> {
83 static std::string name() { return "compute::CompareOperator"; }
84 static std::string value_name(compute::CompareOperator value) {
85 switch (value) {
86 case compute::CompareOperator::EQUAL:
87 return "EQUAL";
88 case compute::CompareOperator::NOT_EQUAL:
89 return "NOT_EQUAL";
90 case compute::CompareOperator::GREATER:
91 return "GREATER";
92 case compute::CompareOperator::GREATER_EQUAL:
93 return "GREATER_EQUAL";
94 case compute::CompareOperator::LESS:
95 return "LESS";
96 case compute::CompareOperator::LESS_EQUAL:
97 return "LESS_EQUAL";
98 }
99 return "<INVALID>";
100 }
101 };
102 template <>
103 struct EnumTraits<compute::AssumeTimezoneOptions::Ambiguous>
104 : BasicEnumTraits<compute::AssumeTimezoneOptions::Ambiguous,
105 compute::AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_RAISE,
106 compute::AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_EARLIEST,
107 compute::AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_LATEST> {
108 static std::string name() { return "AssumeTimezoneOptions::Ambiguous"; }
109 static std::string value_name(compute::AssumeTimezoneOptions::Ambiguous value) {
110 switch (value) {
111 case compute::AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_RAISE:
112 return "AMBIGUOUS_RAISE";
113 case compute::AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_EARLIEST:
114 return "AMBIGUOUS_EARLIEST";
115 case compute::AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_LATEST:
116 return "AMBIGUOUS_LATEST";
117 }
118 return "<INVALID>";
119 }
120 };
121 template <>
122 struct EnumTraits<compute::AssumeTimezoneOptions::Nonexistent>
123 : BasicEnumTraits<compute::AssumeTimezoneOptions::Nonexistent,
124 compute::AssumeTimezoneOptions::Nonexistent::NONEXISTENT_RAISE,
125 compute::AssumeTimezoneOptions::Nonexistent::NONEXISTENT_EARLIEST,
126 compute::AssumeTimezoneOptions::Nonexistent::NONEXISTENT_LATEST> {
127 static std::string name() { return "AssumeTimezoneOptions::Nonexistent"; }
128 static std::string value_name(compute::AssumeTimezoneOptions::Nonexistent value) {
129 switch (value) {
130 case compute::AssumeTimezoneOptions::Nonexistent::NONEXISTENT_RAISE:
131 return "NONEXISTENT_RAISE";
132 case compute::AssumeTimezoneOptions::Nonexistent::NONEXISTENT_EARLIEST:
133 return "NONEXISTENT_EARLIEST";
134 case compute::AssumeTimezoneOptions::Nonexistent::NONEXISTENT_LATEST:
135 return "NONEXISTENT_LATEST";
136 }
137 return "<INVALID>";
138 }
139 };
140
141 template <>
142 struct EnumTraits<compute::RoundMode>
143 : BasicEnumTraits<compute::RoundMode, compute::RoundMode::DOWN,
144 compute::RoundMode::UP, compute::RoundMode::TOWARDS_ZERO,
145 compute::RoundMode::TOWARDS_INFINITY, compute::RoundMode::HALF_DOWN,
146 compute::RoundMode::HALF_UP, compute::RoundMode::HALF_TOWARDS_ZERO,
147 compute::RoundMode::HALF_TOWARDS_INFINITY,
148 compute::RoundMode::HALF_TO_EVEN, compute::RoundMode::HALF_TO_ODD> {
149 static std::string name() { return "compute::RoundMode"; }
150 static std::string value_name(compute::RoundMode value) {
151 switch (value) {
152 case compute::RoundMode::DOWN:
153 return "DOWN";
154 case compute::RoundMode::UP:
155 return "UP";
156 case compute::RoundMode::TOWARDS_ZERO:
157 return "TOWARDS_ZERO";
158 case compute::RoundMode::TOWARDS_INFINITY:
159 return "TOWARDS_INFINITY";
160 case compute::RoundMode::HALF_DOWN:
161 return "HALF_DOWN";
162 case compute::RoundMode::HALF_UP:
163 return "HALF_UP";
164 case compute::RoundMode::HALF_TOWARDS_ZERO:
165 return "HALF_TOWARDS_ZERO";
166 case compute::RoundMode::HALF_TOWARDS_INFINITY:
167 return "HALF_TOWARDS_INFINITY";
168 case compute::RoundMode::HALF_TO_EVEN:
169 return "HALF_TO_EVEN";
170 case compute::RoundMode::HALF_TO_ODD:
171 return "HALF_TO_ODD";
172 }
173 return "<INVALID>";
174 }
175 };
176 } // namespace internal
177
178 namespace compute {
179
180 // ----------------------------------------------------------------------
181 // Function options
182
183 using ::arrow::internal::checked_cast;
184
185 namespace internal {
186 namespace {
187 using ::arrow::internal::DataMember;
188 static auto kArithmeticOptionsType = GetFunctionOptionsType<ArithmeticOptions>(
189 DataMember("check_overflow", &ArithmeticOptions::check_overflow));
190 static auto kElementWiseAggregateOptionsType =
191 GetFunctionOptionsType<ElementWiseAggregateOptions>(
192 DataMember("skip_nulls", &ElementWiseAggregateOptions::skip_nulls));
193 static auto kRoundOptionsType = GetFunctionOptionsType<RoundOptions>(
194 DataMember("ndigits", &RoundOptions::ndigits),
195 DataMember("round_mode", &RoundOptions::round_mode));
196 static auto kRoundToMultipleOptionsType = GetFunctionOptionsType<RoundToMultipleOptions>(
197 DataMember("multiple", &RoundToMultipleOptions::multiple),
198 DataMember("round_mode", &RoundToMultipleOptions::round_mode));
199 static auto kJoinOptionsType = GetFunctionOptionsType<JoinOptions>(
200 DataMember("null_handling", &JoinOptions::null_handling),
201 DataMember("null_replacement", &JoinOptions::null_replacement));
202 static auto kMatchSubstringOptionsType = GetFunctionOptionsType<MatchSubstringOptions>(
203 DataMember("pattern", &MatchSubstringOptions::pattern),
204 DataMember("ignore_case", &MatchSubstringOptions::ignore_case));
205 static auto kSplitOptionsType = GetFunctionOptionsType<SplitOptions>(
206 DataMember("max_splits", &SplitOptions::max_splits),
207 DataMember("reverse", &SplitOptions::reverse));
208 static auto kSplitPatternOptionsType = GetFunctionOptionsType<SplitPatternOptions>(
209 DataMember("pattern", &SplitPatternOptions::pattern),
210 DataMember("max_splits", &SplitPatternOptions::max_splits),
211 DataMember("reverse", &SplitPatternOptions::reverse));
212 static auto kReplaceSliceOptionsType = GetFunctionOptionsType<ReplaceSliceOptions>(
213 DataMember("start", &ReplaceSliceOptions::start),
214 DataMember("stop", &ReplaceSliceOptions::stop),
215 DataMember("replacement", &ReplaceSliceOptions::replacement));
216 static auto kReplaceSubstringOptionsType =
217 GetFunctionOptionsType<ReplaceSubstringOptions>(
218 DataMember("pattern", &ReplaceSubstringOptions::pattern),
219 DataMember("replacement", &ReplaceSubstringOptions::replacement),
220 DataMember("max_replacements", &ReplaceSubstringOptions::max_replacements));
221 static auto kExtractRegexOptionsType = GetFunctionOptionsType<ExtractRegexOptions>(
222 DataMember("pattern", &ExtractRegexOptions::pattern));
223 static auto kSetLookupOptionsType = GetFunctionOptionsType<SetLookupOptions>(
224 DataMember("value_set", &SetLookupOptions::value_set),
225 DataMember("skip_nulls", &SetLookupOptions::skip_nulls));
226 static auto kStrptimeOptionsType = GetFunctionOptionsType<StrptimeOptions>(
227 DataMember("format", &StrptimeOptions::format),
228 DataMember("unit", &StrptimeOptions::unit));
229 static auto kStrftimeOptionsType = GetFunctionOptionsType<StrftimeOptions>(
230 DataMember("format", &StrftimeOptions::format));
231 static auto kAssumeTimezoneOptionsType = GetFunctionOptionsType<AssumeTimezoneOptions>(
232 DataMember("timezone", &AssumeTimezoneOptions::timezone),
233 DataMember("ambiguous", &AssumeTimezoneOptions::ambiguous),
234 DataMember("nonexistent", &AssumeTimezoneOptions::nonexistent));
235 static auto kPadOptionsType = GetFunctionOptionsType<PadOptions>(
236 DataMember("width", &PadOptions::width), DataMember("padding", &PadOptions::padding));
237 static auto kTrimOptionsType = GetFunctionOptionsType<TrimOptions>(
238 DataMember("characters", &TrimOptions::characters));
239 static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
240 DataMember("start", &SliceOptions::start), DataMember("stop", &SliceOptions::stop),
241 DataMember("step", &SliceOptions::step));
242 static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>(
243 DataMember("field_names", &MakeStructOptions::field_names),
244 DataMember("field_nullability", &MakeStructOptions::field_nullability),
245 DataMember("field_metadata", &MakeStructOptions::field_metadata));
246 static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
247 DataMember("count_from_zero", &DayOfWeekOptions::count_from_zero),
248 DataMember("week_start", &DayOfWeekOptions::week_start));
249 static auto kWeekOptionsType = GetFunctionOptionsType<WeekOptions>(
250 DataMember("week_starts_monday", &WeekOptions::week_starts_monday),
251 DataMember("count_from_zero", &WeekOptions::count_from_zero),
252 DataMember("first_week_is_fully_in_year", &WeekOptions::first_week_is_fully_in_year));
253 static auto kNullOptionsType = GetFunctionOptionsType<NullOptions>(
254 DataMember("nan_is_null", &NullOptions::nan_is_null));
255 } // namespace
256 } // namespace internal
257
258 ArithmeticOptions::ArithmeticOptions(bool check_overflow)
259 : FunctionOptions(internal::kArithmeticOptionsType), check_overflow(check_overflow) {}
260 constexpr char ArithmeticOptions::kTypeName[];
261
262 ElementWiseAggregateOptions::ElementWiseAggregateOptions(bool skip_nulls)
263 : FunctionOptions(internal::kElementWiseAggregateOptionsType),
264 skip_nulls(skip_nulls) {}
265 constexpr char ElementWiseAggregateOptions::kTypeName[];
266
267 RoundOptions::RoundOptions(int64_t ndigits, RoundMode round_mode)
268 : FunctionOptions(internal::kRoundOptionsType),
269 ndigits(ndigits),
270 round_mode(round_mode) {
271 static_assert(RoundMode::HALF_DOWN > RoundMode::DOWN &&
272 RoundMode::HALF_DOWN > RoundMode::UP &&
273 RoundMode::HALF_DOWN > RoundMode::TOWARDS_ZERO &&
274 RoundMode::HALF_DOWN > RoundMode::TOWARDS_INFINITY &&
275 RoundMode::HALF_DOWN < RoundMode::HALF_UP &&
276 RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_ZERO &&
277 RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_INFINITY &&
278 RoundMode::HALF_DOWN < RoundMode::HALF_TO_EVEN &&
279 RoundMode::HALF_DOWN < RoundMode::HALF_TO_ODD,
280 "Invalid order of round modes. Modes prefixed with HALF need to be "
281 "enumerated last with HALF_DOWN being the first among them.");
282 }
283 constexpr char RoundOptions::kTypeName[];
284
285 RoundToMultipleOptions::RoundToMultipleOptions(double multiple, RoundMode round_mode)
286 : RoundToMultipleOptions(std::make_shared<DoubleScalar>(multiple), round_mode) {}
287 RoundToMultipleOptions::RoundToMultipleOptions(std::shared_ptr<Scalar> multiple,
288 RoundMode round_mode)
289 : FunctionOptions(internal::kRoundToMultipleOptionsType),
290 multiple(std::move(multiple)),
291 round_mode(round_mode) {}
292 constexpr char RoundToMultipleOptions::kTypeName[];
293
294 JoinOptions::JoinOptions(NullHandlingBehavior null_handling, std::string null_replacement)
295 : FunctionOptions(internal::kJoinOptionsType),
296 null_handling(null_handling),
297 null_replacement(std::move(null_replacement)) {}
298 constexpr char JoinOptions::kTypeName[];
299
300 MatchSubstringOptions::MatchSubstringOptions(std::string pattern, bool ignore_case)
301 : FunctionOptions(internal::kMatchSubstringOptionsType),
302 pattern(std::move(pattern)),
303 ignore_case(ignore_case) {}
304 MatchSubstringOptions::MatchSubstringOptions() : MatchSubstringOptions("", false) {}
305 constexpr char MatchSubstringOptions::kTypeName[];
306
307 SplitOptions::SplitOptions(int64_t max_splits, bool reverse)
308 : FunctionOptions(internal::kSplitOptionsType),
309 max_splits(max_splits),
310 reverse(reverse) {}
311 constexpr char SplitOptions::kTypeName[];
312
313 SplitPatternOptions::SplitPatternOptions(std::string pattern, int64_t max_splits,
314 bool reverse)
315 : FunctionOptions(internal::kSplitPatternOptionsType),
316 pattern(std::move(pattern)),
317 max_splits(max_splits),
318 reverse(reverse) {}
319 SplitPatternOptions::SplitPatternOptions() : SplitPatternOptions("", -1, false) {}
320 constexpr char SplitPatternOptions::kTypeName[];
321
322 ReplaceSliceOptions::ReplaceSliceOptions(int64_t start, int64_t stop,
323 std::string replacement)
324 : FunctionOptions(internal::kReplaceSliceOptionsType),
325 start(start),
326 stop(stop),
327 replacement(std::move(replacement)) {}
328 ReplaceSliceOptions::ReplaceSliceOptions() : ReplaceSliceOptions(0, 0, "") {}
329 constexpr char ReplaceSliceOptions::kTypeName[];
330
331 ReplaceSubstringOptions::ReplaceSubstringOptions(std::string pattern,
332 std::string replacement,
333 int64_t max_replacements)
334 : FunctionOptions(internal::kReplaceSubstringOptionsType),
335 pattern(std::move(pattern)),
336 replacement(std::move(replacement)),
337 max_replacements(max_replacements) {}
338 ReplaceSubstringOptions::ReplaceSubstringOptions()
339 : ReplaceSubstringOptions("", "", -1) {}
340 constexpr char ReplaceSubstringOptions::kTypeName[];
341
342 ExtractRegexOptions::ExtractRegexOptions(std::string pattern)
343 : FunctionOptions(internal::kExtractRegexOptionsType), pattern(std::move(pattern)) {}
344 ExtractRegexOptions::ExtractRegexOptions() : ExtractRegexOptions("") {}
345 constexpr char ExtractRegexOptions::kTypeName[];
346
347 SetLookupOptions::SetLookupOptions(Datum value_set, bool skip_nulls)
348 : FunctionOptions(internal::kSetLookupOptionsType),
349 value_set(std::move(value_set)),
350 skip_nulls(skip_nulls) {}
351 SetLookupOptions::SetLookupOptions() : SetLookupOptions({}, false) {}
352 constexpr char SetLookupOptions::kTypeName[];
353
354 StrptimeOptions::StrptimeOptions(std::string format, TimeUnit::type unit)
355 : FunctionOptions(internal::kStrptimeOptionsType),
356 format(std::move(format)),
357 unit(unit) {}
358 StrptimeOptions::StrptimeOptions() : StrptimeOptions("", TimeUnit::SECOND) {}
359 constexpr char StrptimeOptions::kTypeName[];
360
361 StrftimeOptions::StrftimeOptions(std::string format, std::string locale)
362 : FunctionOptions(internal::kStrftimeOptionsType),
363 format(std::move(format)),
364 locale(std::move(locale)) {}
365 StrftimeOptions::StrftimeOptions() : StrftimeOptions(kDefaultFormat) {}
366 constexpr char StrftimeOptions::kTypeName[];
367 constexpr const char* StrftimeOptions::kDefaultFormat;
368
369 AssumeTimezoneOptions::AssumeTimezoneOptions(std::string timezone, Ambiguous ambiguous,
370 Nonexistent nonexistent)
371 : FunctionOptions(internal::kAssumeTimezoneOptionsType),
372 timezone(std::move(timezone)),
373 ambiguous(ambiguous),
374 nonexistent(nonexistent) {}
375 AssumeTimezoneOptions::AssumeTimezoneOptions() : AssumeTimezoneOptions("UTC") {}
376 constexpr char AssumeTimezoneOptions::kTypeName[];
377
378 PadOptions::PadOptions(int64_t width, std::string padding)
379 : FunctionOptions(internal::kPadOptionsType),
380 width(width),
381 padding(std::move(padding)) {}
382 PadOptions::PadOptions() : PadOptions(0, " ") {}
383 constexpr char PadOptions::kTypeName[];
384
385 TrimOptions::TrimOptions(std::string characters)
386 : FunctionOptions(internal::kTrimOptionsType), characters(std::move(characters)) {}
387 TrimOptions::TrimOptions() : TrimOptions("") {}
388 constexpr char TrimOptions::kTypeName[];
389
390 SliceOptions::SliceOptions(int64_t start, int64_t stop, int64_t step)
391 : FunctionOptions(internal::kSliceOptionsType),
392 start(start),
393 stop(stop),
394 step(step) {}
395 SliceOptions::SliceOptions() : SliceOptions(0, 0, 1) {}
396 constexpr char SliceOptions::kTypeName[];
397
398 MakeStructOptions::MakeStructOptions(
399 std::vector<std::string> n, std::vector<bool> r,
400 std::vector<std::shared_ptr<const KeyValueMetadata>> m)
401 : FunctionOptions(internal::kMakeStructOptionsType),
402 field_names(std::move(n)),
403 field_nullability(std::move(r)),
404 field_metadata(std::move(m)) {}
405
406 MakeStructOptions::MakeStructOptions(std::vector<std::string> n)
407 : FunctionOptions(internal::kMakeStructOptionsType),
408 field_names(std::move(n)),
409 field_nullability(field_names.size(), true),
410 field_metadata(field_names.size(), NULLPTR) {}
411
412 MakeStructOptions::MakeStructOptions() : MakeStructOptions(std::vector<std::string>()) {}
413 constexpr char MakeStructOptions::kTypeName[];
414
415 DayOfWeekOptions::DayOfWeekOptions(bool count_from_zero, uint32_t week_start)
416 : FunctionOptions(internal::kDayOfWeekOptionsType),
417 count_from_zero(count_from_zero),
418 week_start(week_start) {}
419 constexpr char DayOfWeekOptions::kTypeName[];
420
421 WeekOptions::WeekOptions(bool week_starts_monday, bool count_from_zero,
422 bool first_week_is_fully_in_year)
423 : FunctionOptions(internal::kWeekOptionsType),
424 week_starts_monday(week_starts_monday),
425 count_from_zero(count_from_zero),
426 first_week_is_fully_in_year(first_week_is_fully_in_year) {}
427 constexpr char WeekOptions::kTypeName[];
428
429 NullOptions::NullOptions(bool nan_is_null)
430 : FunctionOptions(internal::kNullOptionsType), nan_is_null(nan_is_null) {}
431 constexpr char NullOptions::kTypeName[];
432
433 namespace internal {
434 void RegisterScalarOptions(FunctionRegistry* registry) {
435 DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
436 DCHECK_OK(registry->AddFunctionOptionsType(kElementWiseAggregateOptionsType));
437 DCHECK_OK(registry->AddFunctionOptionsType(kRoundOptionsType));
438 DCHECK_OK(registry->AddFunctionOptionsType(kRoundToMultipleOptionsType));
439 DCHECK_OK(registry->AddFunctionOptionsType(kJoinOptionsType));
440 DCHECK_OK(registry->AddFunctionOptionsType(kMatchSubstringOptionsType));
441 DCHECK_OK(registry->AddFunctionOptionsType(kSplitOptionsType));
442 DCHECK_OK(registry->AddFunctionOptionsType(kSplitPatternOptionsType));
443 DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSliceOptionsType));
444 DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSubstringOptionsType));
445 DCHECK_OK(registry->AddFunctionOptionsType(kExtractRegexOptionsType));
446 DCHECK_OK(registry->AddFunctionOptionsType(kSetLookupOptionsType));
447 DCHECK_OK(registry->AddFunctionOptionsType(kStrptimeOptionsType));
448 DCHECK_OK(registry->AddFunctionOptionsType(kStrftimeOptionsType));
449 DCHECK_OK(registry->AddFunctionOptionsType(kAssumeTimezoneOptionsType));
450 DCHECK_OK(registry->AddFunctionOptionsType(kPadOptionsType));
451 DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
452 DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
453 DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType));
454 DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
455 DCHECK_OK(registry->AddFunctionOptionsType(kWeekOptionsType));
456 DCHECK_OK(registry->AddFunctionOptionsType(kNullOptionsType));
457 }
458 } // namespace internal
459
460 #define SCALAR_EAGER_UNARY(NAME, REGISTRY_NAME) \
461 Result<Datum> NAME(const Datum& value, ExecContext* ctx) { \
462 return CallFunction(REGISTRY_NAME, {value}, ctx); \
463 }
464
465 #define SCALAR_EAGER_BINARY(NAME, REGISTRY_NAME) \
466 Result<Datum> NAME(const Datum& left, const Datum& right, ExecContext* ctx) { \
467 return CallFunction(REGISTRY_NAME, {left, right}, ctx); \
468 }
469
470 // ----------------------------------------------------------------------
471 // Arithmetic
472
473 #define SCALAR_ARITHMETIC_UNARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME) \
474 Result<Datum> NAME(const Datum& arg, ArithmeticOptions options, ExecContext* ctx) { \
475 auto func_name = (options.check_overflow) ? REGISTRY_CHECKED_NAME : REGISTRY_NAME; \
476 return CallFunction(func_name, {arg}, ctx); \
477 }
478
479 SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
480 SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
481 SCALAR_EAGER_UNARY(Sign, "sign")
482 SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
483 SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked")
484 SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
485 SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
486 SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
487 SCALAR_EAGER_UNARY(Atan, "atan")
488 SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked")
489 SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked")
490 SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked")
491 SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked")
492
493 Result<Datum> Round(const Datum& arg, RoundOptions options, ExecContext* ctx) {
494 return CallFunction("round", {arg}, &options, ctx);
495 }
496
497 Result<Datum> RoundToMultiple(const Datum& arg, RoundToMultipleOptions options,
498 ExecContext* ctx) {
499 return CallFunction("round_to_multiple", {arg}, &options, ctx);
500 }
501
502 #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME) \
503 Result<Datum> NAME(const Datum& left, const Datum& right, ArithmeticOptions options, \
504 ExecContext* ctx) { \
505 auto func_name = (options.check_overflow) ? REGISTRY_CHECKED_NAME : REGISTRY_NAME; \
506 return CallFunction(func_name, {left, right}, ctx); \
507 }
508
509 SCALAR_ARITHMETIC_BINARY(Add, "add", "add_checked")
510 SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked")
511 SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
512 SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
513 SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
514 SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked")
515 SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked")
516 SCALAR_ARITHMETIC_BINARY(Logb, "logb", "logb_checked")
517 SCALAR_EAGER_BINARY(Atan2, "atan2")
518 SCALAR_EAGER_UNARY(Floor, "floor")
519 SCALAR_EAGER_UNARY(Ceil, "ceil")
520 SCALAR_EAGER_UNARY(Trunc, "trunc")
521
522 Result<Datum> MaxElementWise(const std::vector<Datum>& args,
523 ElementWiseAggregateOptions options, ExecContext* ctx) {
524 return CallFunction("max_element_wise", args, &options, ctx);
525 }
526
527 Result<Datum> MinElementWise(const std::vector<Datum>& args,
528 ElementWiseAggregateOptions options, ExecContext* ctx) {
529 return CallFunction("min_element_wise", args, &options, ctx);
530 }
531
532 // ----------------------------------------------------------------------
533 // Set-related operations
534
535 static Result<Datum> ExecSetLookup(const std::string& func_name, const Datum& data,
536 const SetLookupOptions& options, ExecContext* ctx) {
537 if (!options.value_set.is_arraylike()) {
538 return Status::Invalid("Set lookup value set must be Array or ChunkedArray");
539 }
540 std::shared_ptr<DataType> data_type;
541 if (data.type()->id() == Type::DICTIONARY) {
542 data_type =
543 arrow::internal::checked_pointer_cast<DictionaryType>(data.type())->value_type();
544 } else {
545 data_type = data.type();
546 }
547
548 if (options.value_set.length() > 0 && !data_type->Equals(options.value_set.type())) {
549 std::stringstream ss;
550 ss << "Array type didn't match type of values set: " << data_type->ToString()
551 << " vs " << options.value_set.type()->ToString();
552 return Status::Invalid(ss.str());
553 }
554 return CallFunction(func_name, {data}, &options, ctx);
555 }
556
557 Result<Datum> IsIn(const Datum& values, const SetLookupOptions& options,
558 ExecContext* ctx) {
559 return ExecSetLookup("is_in", values, options, ctx);
560 }
561
562 Result<Datum> IsIn(const Datum& values, const Datum& value_set, ExecContext* ctx) {
563 return ExecSetLookup("is_in", values, SetLookupOptions{value_set}, ctx);
564 }
565
566 Result<Datum> IndexIn(const Datum& values, const SetLookupOptions& options,
567 ExecContext* ctx) {
568 return ExecSetLookup("index_in", values, options, ctx);
569 }
570
571 Result<Datum> IndexIn(const Datum& values, const Datum& value_set, ExecContext* ctx) {
572 return ExecSetLookup("index_in", values, SetLookupOptions{value_set}, ctx);
573 }
574
575 // ----------------------------------------------------------------------
576 // Boolean functions
577
578 SCALAR_EAGER_UNARY(Invert, "invert")
579 SCALAR_EAGER_BINARY(And, "and")
580 SCALAR_EAGER_BINARY(KleeneAnd, "and_kleene")
581 SCALAR_EAGER_BINARY(Or, "or")
582 SCALAR_EAGER_BINARY(KleeneOr, "or_kleene")
583 SCALAR_EAGER_BINARY(Xor, "xor")
584 SCALAR_EAGER_BINARY(AndNot, "and_not")
585 SCALAR_EAGER_BINARY(KleeneAndNot, "and_not_kleene")
586
587 // ----------------------------------------------------------------------
588
589 Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions options,
590 ExecContext* ctx) {
591 std::string func_name;
592 switch (options.op) {
593 case CompareOperator::EQUAL:
594 func_name = "equal";
595 break;
596 case CompareOperator::NOT_EQUAL:
597 func_name = "not_equal";
598 break;
599 case CompareOperator::GREATER:
600 func_name = "greater";
601 break;
602 case CompareOperator::GREATER_EQUAL:
603 func_name = "greater_equal";
604 break;
605 case CompareOperator::LESS:
606 func_name = "less";
607 break;
608 case CompareOperator::LESS_EQUAL:
609 func_name = "less_equal";
610 break;
611 }
612 return CallFunction(func_name, {left, right}, nullptr, ctx);
613 }
614
615 // ----------------------------------------------------------------------
616 // Validity functions
617
618 SCALAR_EAGER_UNARY(IsValid, "is_valid")
619 SCALAR_EAGER_UNARY(IsNan, "is_nan")
620
621 Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_false,
622 ExecContext* ctx) {
623 return CallFunction("if_else", {cond, if_true, if_false}, ctx);
624 }
625
626 Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases,
627 ExecContext* ctx) {
628 std::vector<Datum> args = {cond};
629 args.reserve(cases.size() + 1);
630 args.insert(args.end(), cases.begin(), cases.end());
631 return CallFunction("case_when", args, ctx);
632 }
633
634 Result<Datum> IsNull(const Datum& arg, NullOptions options, ExecContext* ctx) {
635 return CallFunction("is_null", {arg}, &options, ctx);
636 }
637
638 // ----------------------------------------------------------------------
639 // Temporal functions
640
641 SCALAR_EAGER_UNARY(Year, "year")
642 SCALAR_EAGER_UNARY(Month, "month")
643 SCALAR_EAGER_UNARY(Day, "day")
644 SCALAR_EAGER_UNARY(DayOfYear, "day_of_year")
645 SCALAR_EAGER_UNARY(ISOYear, "iso_year")
646 SCALAR_EAGER_UNARY(ISOWeek, "iso_week")
647 SCALAR_EAGER_UNARY(USWeek, "us_week")
648 SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar")
649 SCALAR_EAGER_UNARY(Quarter, "quarter")
650 SCALAR_EAGER_UNARY(Hour, "hour")
651 SCALAR_EAGER_UNARY(Minute, "minute")
652 SCALAR_EAGER_UNARY(Second, "second")
653 SCALAR_EAGER_UNARY(Millisecond, "millisecond")
654 SCALAR_EAGER_UNARY(Microsecond, "microsecond")
655 SCALAR_EAGER_UNARY(Nanosecond, "nanosecond")
656 SCALAR_EAGER_UNARY(Subsecond, "subsecond")
657
658 Result<Datum> DayOfWeek(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) {
659 return CallFunction("day_of_week", {arg}, &options, ctx);
660 }
661
662 Result<Datum> AssumeTimezone(const Datum& arg, AssumeTimezoneOptions options,
663 ExecContext* ctx) {
664 return CallFunction("assume_timezone", {arg}, &options, ctx);
665 }
666
667 Result<Datum> Week(const Datum& arg, WeekOptions options, ExecContext* ctx) {
668 return CallFunction("week", {arg}, &options, ctx);
669 }
670
671 Result<Datum> Strftime(const Datum& arg, StrftimeOptions options, ExecContext* ctx) {
672 return CallFunction("strftime", {arg}, &options, ctx);
673 }
674
675 } // namespace compute
676 } // namespace arrow