]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/compute/kernels/scalar_validity.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / kernels / scalar_validity.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cmath>
19
20#include "arrow/compute/api_scalar.h"
21#include "arrow/compute/kernels/common.h"
22
23#include "arrow/util/bit_util.h"
24#include "arrow/util/bitmap_ops.h"
25
26namespace arrow {
27
28using internal::CopyBitmap;
29using internal::InvertBitmap;
30
31namespace compute {
32namespace internal {
33namespace {
34
35struct IsValidOperator {
36 static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
37 checked_cast<BooleanScalar*>(out)->value = in.is_valid;
38 return Status::OK();
39 }
40
41 static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
42 DCHECK_EQ(out->offset, 0);
43 DCHECK_LE(out->length, arr.length);
44 if (arr.MayHaveNulls()) {
45 // Input has nulls => output is the null (validity) bitmap.
46 // To avoid copying the null bitmap, slice from the starting byte offset
47 // and set the offset to the remaining bit offset.
48 out->offset = arr.offset % 8;
49 out->buffers[1] =
50 arr.offset == 0 ? arr.buffers[0]
51 : SliceBuffer(arr.buffers[0], arr.offset / 8,
52 BitUtil::BytesForBits(out->length + out->offset));
53 return Status::OK();
54 }
55
56 // Input has no nulls => output is entirely true.
57 ARROW_ASSIGN_OR_RAISE(out->buffers[1],
58 ctx->AllocateBitmap(out->length + out->offset));
59 BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, true);
60 return Status::OK();
61 }
62};
63
64struct IsFiniteOperator {
65 template <typename OutType, typename InType>
66 static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
67 return std::isfinite(value);
68 }
69};
70
71struct IsInfOperator {
72 template <typename OutType, typename InType>
73 static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
74 return std::isinf(value);
75 }
76};
77
78using NanOptionsState = OptionsWrapper<NullOptions>;
79
80struct IsNullOperator {
81 static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
82 const auto& options = NanOptionsState::Get(ctx);
83 bool* out_value = &checked_cast<BooleanScalar*>(out)->value;
84
85 if (in.is_valid) {
86 if (options.nan_is_null && is_floating(in.type->id())) {
87 switch (in.type->id()) {
88 case Type::FLOAT:
89 *out_value = std::isnan(internal::UnboxScalar<FloatType>::Unbox(in));
90 break;
91 case Type::DOUBLE:
92 *out_value = std::isnan(internal::UnboxScalar<DoubleType>::Unbox(in));
93 break;
94 default:
95 return Status::NotImplemented("NaN detection not implemented for type ",
96 in.type->ToString());
97 }
98 } else {
99 *out_value = false;
100 }
101 } else {
102 *out_value = true;
103 }
104
105 return Status::OK();
106 }
107
108 template <typename T>
109 static void SetNanBits(const ArrayData& arr, uint8_t* out_bitmap, int64_t out_offset) {
110 const T* data = arr.GetValues<T>(1);
111 for (int64_t i = 0; i < arr.length; ++i) {
112 if (std::isnan(data[i])) {
113 BitUtil::SetBit(out_bitmap, i + out_offset);
114 }
115 }
116 }
117
118 static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
119 const auto& options = NanOptionsState::Get(ctx);
120
121 uint8_t* out_bitmap = out->buffers[1]->mutable_data();
122 if (arr.GetNullCount() > 0) {
123 // Input has nulls => output is the inverted null (validity) bitmap.
124 InvertBitmap(arr.buffers[0]->data(), arr.offset, arr.length, out_bitmap,
125 out->offset);
126 } else {
127 // Input has no nulls => output is entirely false.
128 BitUtil::SetBitsTo(out_bitmap, out->offset, out->length, false);
129 }
130
131 if (is_floating(arr.type->id()) && options.nan_is_null) {
132 switch (arr.type->id()) {
133 case Type::FLOAT:
134 SetNanBits<float>(arr, out_bitmap, out->offset);
135 break;
136 case Type::DOUBLE:
137 SetNanBits<double>(arr, out_bitmap, out->offset);
138 break;
139 default:
140 return Status::NotImplemented("NaN detection not implemented for type ",
141 arr.type->ToString());
142 }
143 }
144 return Status::OK();
145 }
146};
147
148struct IsNanOperator {
149 template <typename OutType, typename InType>
150 static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
151 return std::isnan(value);
152 }
153};
154
155void MakeFunction(std::string name, const FunctionDoc* doc,
156 std::vector<InputType> in_types, OutputType out_type,
157 ArrayKernelExec exec, FunctionRegistry* registry,
158 MemAllocation::type mem_allocation, bool can_write_into_slices,
159 const FunctionOptions* default_options = NULLPTR,
160 KernelInit init = NULLPTR) {
161 Arity arity{static_cast<int>(in_types.size())};
162 auto func = std::make_shared<ScalarFunction>(name, arity, doc, default_options);
163
164 ScalarKernel kernel(std::move(in_types), out_type, exec, init);
165 kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
166 kernel.can_write_into_slices = can_write_into_slices;
167 kernel.mem_allocation = mem_allocation;
168
169 DCHECK_OK(func->AddKernel(std::move(kernel)));
170 DCHECK_OK(registry->AddFunction(std::move(func)));
171}
172
173template <typename InType, typename Op>
174void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
175 DCHECK_OK(func->AddKernel({ty}, boolean(),
176 applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
177}
178
179std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
180 const FunctionDoc* doc) {
181 auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
182
183 AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
184 AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());
185
186 return func;
187}
188
189std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
190 const FunctionDoc* doc) {
191 auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
192
193 AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
194 AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());
195
196 return func;
197}
198
199std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
200 const FunctionDoc* doc) {
201 auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
202
203 AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
204 AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());
205
206 return func;
207}
208
209Status IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
210 const Datum& arg0 = batch[0];
211 if (arg0.type()->id() == Type::NA) {
212 auto false_value = std::make_shared<BooleanScalar>(false);
213 if (arg0.kind() == Datum::SCALAR) {
214 out->value = false_value;
215 } else {
216 std::shared_ptr<Array> false_values;
217 RETURN_NOT_OK(MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool())
218 .Value(&false_values));
219 out->value = false_values->data();
220 }
221 return Status::OK();
222 } else {
223 return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
224 }
225}
226
227Status IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
228 const Datum& arg0 = batch[0];
229 if (arg0.type()->id() == Type::NA) {
230 if (arg0.kind() == Datum::SCALAR) {
231 out->value = std::make_shared<BooleanScalar>(true);
232 } else {
233 // Data is preallocated
234 ArrayData* out_arr = out->mutable_array();
235 BitUtil::SetBitsTo(out_arr->buffers[1]->mutable_data(), out_arr->offset,
236 out_arr->length, true);
237 }
238 return Status::OK();
239 } else {
240 return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
241 }
242}
243
244const FunctionDoc is_valid_doc(
245 "Return true if non-null",
246 ("For each input value, emit true iff the value is valid (non-null)."), {"values"});
247
248const FunctionDoc is_finite_doc(
249 "Return true if value is finite",
250 ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."),
251 {"values"});
252
253const FunctionDoc is_inf_doc(
254 "Return true if infinity",
255 ("For each input value, emit true iff the value is infinite (inf or -inf)."),
256 {"values"});
257
258const FunctionDoc is_null_doc(
259 "Return true if null (and optionally NaN)",
260 ("For each input value, emit true iff the value is null.\n"
261 "True may also be emitted for NaN values by setting the `nan_is_null` flag."),
262 {"values"}, "NullOptions");
263
264const FunctionDoc is_nan_doc("Return true if NaN",
265 ("For each input value, emit true iff the value is NaN."),
266 {"values"});
267
268} // namespace
269
270void RegisterScalarValidity(FunctionRegistry* registry) {
271 static auto kNullOptions = NullOptions::Defaults();
272 MakeFunction("is_valid", &is_valid_doc, {ValueDescr::ANY}, boolean(), IsValidExec,
273 registry, MemAllocation::NO_PREALLOCATE, /*can_write_into_slices=*/false);
274
275 MakeFunction("is_null", &is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec,
276 registry, MemAllocation::PREALLOCATE,
277 /*can_write_into_slices=*/true, &kNullOptions, NanOptionsState::Init);
278
279 DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
280 DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));
281 DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
282}
283
284} // namespace internal
285} // namespace compute
286} // namespace arrow