]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <cmath> | |
19 | ||
20 | #include "arrow/compute/api_scalar.h" | |
21 | #include "arrow/compute/kernels/common.h" | |
22 | ||
23 | #include "arrow/util/bit_util.h" | |
24 | #include "arrow/util/bitmap_ops.h" | |
25 | ||
26 | namespace arrow { | |
27 | ||
28 | using internal::CopyBitmap; | |
29 | using internal::InvertBitmap; | |
30 | ||
31 | namespace compute { | |
32 | namespace internal { | |
33 | namespace { | |
34 | ||
35 | struct IsValidOperator { | |
36 | static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { | |
37 | checked_cast<BooleanScalar*>(out)->value = in.is_valid; | |
38 | return Status::OK(); | |
39 | } | |
40 | ||
41 | static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) { | |
42 | DCHECK_EQ(out->offset, 0); | |
43 | DCHECK_LE(out->length, arr.length); | |
44 | if (arr.MayHaveNulls()) { | |
45 | // Input has nulls => output is the null (validity) bitmap. | |
46 | // To avoid copying the null bitmap, slice from the starting byte offset | |
47 | // and set the offset to the remaining bit offset. | |
48 | out->offset = arr.offset % 8; | |
49 | out->buffers[1] = | |
50 | arr.offset == 0 ? arr.buffers[0] | |
51 | : SliceBuffer(arr.buffers[0], arr.offset / 8, | |
52 | BitUtil::BytesForBits(out->length + out->offset)); | |
53 | return Status::OK(); | |
54 | } | |
55 | ||
56 | // Input has no nulls => output is entirely true. | |
57 | ARROW_ASSIGN_OR_RAISE(out->buffers[1], | |
58 | ctx->AllocateBitmap(out->length + out->offset)); | |
59 | BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, true); | |
60 | return Status::OK(); | |
61 | } | |
62 | }; | |
63 | ||
64 | struct IsFiniteOperator { | |
65 | template <typename OutType, typename InType> | |
66 | static constexpr OutType Call(KernelContext*, const InType& value, Status*) { | |
67 | return std::isfinite(value); | |
68 | } | |
69 | }; | |
70 | ||
71 | struct IsInfOperator { | |
72 | template <typename OutType, typename InType> | |
73 | static constexpr OutType Call(KernelContext*, const InType& value, Status*) { | |
74 | return std::isinf(value); | |
75 | } | |
76 | }; | |
77 | ||
78 | using NanOptionsState = OptionsWrapper<NullOptions>; | |
79 | ||
80 | struct IsNullOperator { | |
81 | static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { | |
82 | const auto& options = NanOptionsState::Get(ctx); | |
83 | bool* out_value = &checked_cast<BooleanScalar*>(out)->value; | |
84 | ||
85 | if (in.is_valid) { | |
86 | if (options.nan_is_null && is_floating(in.type->id())) { | |
87 | switch (in.type->id()) { | |
88 | case Type::FLOAT: | |
89 | *out_value = std::isnan(internal::UnboxScalar<FloatType>::Unbox(in)); | |
90 | break; | |
91 | case Type::DOUBLE: | |
92 | *out_value = std::isnan(internal::UnboxScalar<DoubleType>::Unbox(in)); | |
93 | break; | |
94 | default: | |
95 | return Status::NotImplemented("NaN detection not implemented for type ", | |
96 | in.type->ToString()); | |
97 | } | |
98 | } else { | |
99 | *out_value = false; | |
100 | } | |
101 | } else { | |
102 | *out_value = true; | |
103 | } | |
104 | ||
105 | return Status::OK(); | |
106 | } | |
107 | ||
108 | template <typename T> | |
109 | static void SetNanBits(const ArrayData& arr, uint8_t* out_bitmap, int64_t out_offset) { | |
110 | const T* data = arr.GetValues<T>(1); | |
111 | for (int64_t i = 0; i < arr.length; ++i) { | |
112 | if (std::isnan(data[i])) { | |
113 | BitUtil::SetBit(out_bitmap, i + out_offset); | |
114 | } | |
115 | } | |
116 | } | |
117 | ||
118 | static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) { | |
119 | const auto& options = NanOptionsState::Get(ctx); | |
120 | ||
121 | uint8_t* out_bitmap = out->buffers[1]->mutable_data(); | |
122 | if (arr.GetNullCount() > 0) { | |
123 | // Input has nulls => output is the inverted null (validity) bitmap. | |
124 | InvertBitmap(arr.buffers[0]->data(), arr.offset, arr.length, out_bitmap, | |
125 | out->offset); | |
126 | } else { | |
127 | // Input has no nulls => output is entirely false. | |
128 | BitUtil::SetBitsTo(out_bitmap, out->offset, out->length, false); | |
129 | } | |
130 | ||
131 | if (is_floating(arr.type->id()) && options.nan_is_null) { | |
132 | switch (arr.type->id()) { | |
133 | case Type::FLOAT: | |
134 | SetNanBits<float>(arr, out_bitmap, out->offset); | |
135 | break; | |
136 | case Type::DOUBLE: | |
137 | SetNanBits<double>(arr, out_bitmap, out->offset); | |
138 | break; | |
139 | default: | |
140 | return Status::NotImplemented("NaN detection not implemented for type ", | |
141 | arr.type->ToString()); | |
142 | } | |
143 | } | |
144 | return Status::OK(); | |
145 | } | |
146 | }; | |
147 | ||
148 | struct IsNanOperator { | |
149 | template <typename OutType, typename InType> | |
150 | static constexpr OutType Call(KernelContext*, const InType& value, Status*) { | |
151 | return std::isnan(value); | |
152 | } | |
153 | }; | |
154 | ||
155 | void MakeFunction(std::string name, const FunctionDoc* doc, | |
156 | std::vector<InputType> in_types, OutputType out_type, | |
157 | ArrayKernelExec exec, FunctionRegistry* registry, | |
158 | MemAllocation::type mem_allocation, bool can_write_into_slices, | |
159 | const FunctionOptions* default_options = NULLPTR, | |
160 | KernelInit init = NULLPTR) { | |
161 | Arity arity{static_cast<int>(in_types.size())}; | |
162 | auto func = std::make_shared<ScalarFunction>(name, arity, doc, default_options); | |
163 | ||
164 | ScalarKernel kernel(std::move(in_types), out_type, exec, init); | |
165 | kernel.null_handling = NullHandling::OUTPUT_NOT_NULL; | |
166 | kernel.can_write_into_slices = can_write_into_slices; | |
167 | kernel.mem_allocation = mem_allocation; | |
168 | ||
169 | DCHECK_OK(func->AddKernel(std::move(kernel))); | |
170 | DCHECK_OK(registry->AddFunction(std::move(func))); | |
171 | } | |
172 | ||
173 | template <typename InType, typename Op> | |
174 | void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) { | |
175 | DCHECK_OK(func->AddKernel({ty}, boolean(), | |
176 | applicator::ScalarUnary<BooleanType, InType, Op>::Exec)); | |
177 | } | |
178 | ||
179 | std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name, | |
180 | const FunctionDoc* doc) { | |
181 | auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); | |
182 | ||
183 | AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get()); | |
184 | AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get()); | |
185 | ||
186 | return func; | |
187 | } | |
188 | ||
189 | std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name, | |
190 | const FunctionDoc* doc) { | |
191 | auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); | |
192 | ||
193 | AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get()); | |
194 | AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get()); | |
195 | ||
196 | return func; | |
197 | } | |
198 | ||
199 | std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name, | |
200 | const FunctionDoc* doc) { | |
201 | auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); | |
202 | ||
203 | AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get()); | |
204 | AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get()); | |
205 | ||
206 | return func; | |
207 | } | |
208 | ||
209 | Status IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { | |
210 | const Datum& arg0 = batch[0]; | |
211 | if (arg0.type()->id() == Type::NA) { | |
212 | auto false_value = std::make_shared<BooleanScalar>(false); | |
213 | if (arg0.kind() == Datum::SCALAR) { | |
214 | out->value = false_value; | |
215 | } else { | |
216 | std::shared_ptr<Array> false_values; | |
217 | RETURN_NOT_OK(MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool()) | |
218 | .Value(&false_values)); | |
219 | out->value = false_values->data(); | |
220 | } | |
221 | return Status::OK(); | |
222 | } else { | |
223 | return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out); | |
224 | } | |
225 | } | |
226 | ||
227 | Status IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { | |
228 | const Datum& arg0 = batch[0]; | |
229 | if (arg0.type()->id() == Type::NA) { | |
230 | if (arg0.kind() == Datum::SCALAR) { | |
231 | out->value = std::make_shared<BooleanScalar>(true); | |
232 | } else { | |
233 | // Data is preallocated | |
234 | ArrayData* out_arr = out->mutable_array(); | |
235 | BitUtil::SetBitsTo(out_arr->buffers[1]->mutable_data(), out_arr->offset, | |
236 | out_arr->length, true); | |
237 | } | |
238 | return Status::OK(); | |
239 | } else { | |
240 | return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out); | |
241 | } | |
242 | } | |
243 | ||
244 | const FunctionDoc is_valid_doc( | |
245 | "Return true if non-null", | |
246 | ("For each input value, emit true iff the value is valid (non-null)."), {"values"}); | |
247 | ||
248 | const FunctionDoc is_finite_doc( | |
249 | "Return true if value is finite", | |
250 | ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."), | |
251 | {"values"}); | |
252 | ||
253 | const FunctionDoc is_inf_doc( | |
254 | "Return true if infinity", | |
255 | ("For each input value, emit true iff the value is infinite (inf or -inf)."), | |
256 | {"values"}); | |
257 | ||
258 | const FunctionDoc is_null_doc( | |
259 | "Return true if null (and optionally NaN)", | |
260 | ("For each input value, emit true iff the value is null.\n" | |
261 | "True may also be emitted for NaN values by setting the `nan_is_null` flag."), | |
262 | {"values"}, "NullOptions"); | |
263 | ||
264 | const FunctionDoc is_nan_doc("Return true if NaN", | |
265 | ("For each input value, emit true iff the value is NaN."), | |
266 | {"values"}); | |
267 | ||
268 | } // namespace | |
269 | ||
270 | void RegisterScalarValidity(FunctionRegistry* registry) { | |
271 | static auto kNullOptions = NullOptions::Defaults(); | |
272 | MakeFunction("is_valid", &is_valid_doc, {ValueDescr::ANY}, boolean(), IsValidExec, | |
273 | registry, MemAllocation::NO_PREALLOCATE, /*can_write_into_slices=*/false); | |
274 | ||
275 | MakeFunction("is_null", &is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec, | |
276 | registry, MemAllocation::PREALLOCATE, | |
277 | /*can_write_into_slices=*/true, &kNullOptions, NanOptionsState::Init); | |
278 | ||
279 | DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc))); | |
280 | DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc))); | |
281 | DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc))); | |
282 | } | |
283 | ||
284 | } // namespace internal | |
285 | } // namespace compute | |
286 | } // namespace arrow |