1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
20 #include "arrow/compute/kernels/common.h"
21 #include "arrow/util/bit_util.h"
22 #include "arrow/util/bitmap.h"
23 #include "arrow/util/bitmap_ops.h"
27 using internal::Bitmap
;
33 template <typename ComputeWord
>
34 void ComputeKleene(ComputeWord
&& compute_word
, KernelContext
* ctx
, const ArrayData
& left
,
35 const ArrayData
& right
, ArrayData
* out
) {
36 DCHECK(left
.null_count
!= 0 || right
.null_count
!= 0)
37 << "ComputeKleene is unnecessarily expensive for the non-null case";
39 Bitmap left_valid_bm
{left
.buffers
[0], left
.offset
, left
.length
};
40 Bitmap left_data_bm
{left
.buffers
[1], left
.offset
, left
.length
};
42 Bitmap right_valid_bm
{right
.buffers
[0], right
.offset
, right
.length
};
43 Bitmap right_data_bm
{right
.buffers
[1], right
.offset
, right
.length
};
45 std::array
<Bitmap
, 2> out_bms
{Bitmap(out
->buffers
[0], out
->offset
, out
->length
),
46 Bitmap(out
->buffers
[1], out
->offset
, out
->length
)};
48 auto apply
= [&](uint64_t left_valid
, uint64_t left_data
, uint64_t right_valid
,
49 uint64_t right_data
, uint64_t* out_validity
, uint64_t* out_data
) {
50 auto left_true
= left_valid
& left_data
;
51 auto left_false
= left_valid
& ~left_data
;
53 auto right_true
= right_valid
& right_data
;
54 auto right_false
= right_valid
& ~right_data
;
56 compute_word(left_true
, left_false
, right_true
, right_false
, out_validity
, out_data
);
59 if (right
.null_count
== 0) {
60 std::array
<Bitmap
, 3> in_bms
{left_valid_bm
, left_data_bm
, right_data_bm
};
61 Bitmap::VisitWordsAndWrite(
63 [&](const std::array
<uint64_t, 3>& in
, std::array
<uint64_t, 2>* out
) {
64 apply(in
[0], in
[1], ~uint64_t(0), in
[2], &(out
->at(0)), &(out
->at(1)));
69 if (left
.null_count
== 0) {
70 std::array
<Bitmap
, 3> in_bms
{left_data_bm
, right_valid_bm
, right_data_bm
};
71 Bitmap::VisitWordsAndWrite(
73 [&](const std::array
<uint64_t, 3>& in
, std::array
<uint64_t, 2>* out
) {
74 apply(~uint64_t(0), in
[0], in
[1], in
[2], &(out
->at(0)), &(out
->at(1)));
79 DCHECK(left
.null_count
!= 0 && right
.null_count
!= 0);
80 std::array
<Bitmap
, 4> in_bms
{left_valid_bm
, left_data_bm
, right_valid_bm
,
82 Bitmap::VisitWordsAndWrite(
84 [&](const std::array
<uint64_t, 4>& in
, std::array
<uint64_t, 2>* out
) {
85 apply(in
[0], in
[1], in
[2], in
[3], &(out
->at(0)), &(out
->at(1)));
89 inline BooleanScalar
InvertScalar(const Scalar
& in
) {
90 return in
.is_valid
? BooleanScalar(!checked_cast
<const BooleanScalar
&>(in
).value
)
94 inline Bitmap
GetBitmap(const ArrayData
& arr
, int index
) {
95 return Bitmap
{arr
.buffers
[index
], arr
.offset
, arr
.length
};
99 static Status
Call(KernelContext
* ctx
, const Scalar
& in
, Scalar
* out
) {
100 *checked_cast
<BooleanScalar
*>(out
) = InvertScalar(in
);
104 static Status
Call(KernelContext
* ctx
, const ArrayData
& in
, ArrayData
* out
) {
105 GetBitmap(*out
, 1).CopyFromInverted(GetBitmap(in
, 1));
110 template <typename Op
>
112 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const ArrayData
& right
,
114 return Op::Call(ctx
, right
, left
, out
);
118 struct AndOp
: Commutative
<AndOp
> {
119 using Commutative
<AndOp
>::Call
;
121 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const Scalar
& right
,
123 if (left
.is_valid
&& right
.is_valid
) {
124 checked_cast
<BooleanScalar
*>(out
)->value
=
125 checked_cast
<const BooleanScalar
&>(left
).value
&&
126 checked_cast
<const BooleanScalar
&>(right
).value
;
131 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const Scalar
& right
,
133 if (right
.is_valid
) {
134 checked_cast
<const BooleanScalar
&>(right
).value
135 ? GetBitmap(*out
, 1).CopyFrom(GetBitmap(left
, 1))
136 : GetBitmap(*out
, 1).SetBitsTo(false);
141 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const ArrayData
& right
,
143 ::arrow::internal::BitmapAnd(left
.buffers
[1]->data(), left
.offset
,
144 right
.buffers
[1]->data(), right
.offset
, right
.length
,
145 out
->offset
, out
->buffers
[1]->mutable_data());
150 struct KleeneAndOp
: Commutative
<KleeneAndOp
> {
151 using Commutative
<KleeneAndOp
>::Call
;
153 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const Scalar
& right
,
155 bool left_true
= left
.is_valid
&& checked_cast
<const BooleanScalar
&>(left
).value
;
156 bool left_false
= left
.is_valid
&& !checked_cast
<const BooleanScalar
&>(left
).value
;
158 bool right_true
= right
.is_valid
&& checked_cast
<const BooleanScalar
&>(right
).value
;
159 bool right_false
= right
.is_valid
&& !checked_cast
<const BooleanScalar
&>(right
).value
;
161 checked_cast
<BooleanScalar
*>(out
)->value
= left_true
&& right_true
;
162 out
->is_valid
= left_false
|| right_false
|| (left_true
&& right_true
);
166 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const Scalar
& right
,
168 bool right_true
= right
.is_valid
&& checked_cast
<const BooleanScalar
&>(right
).value
;
169 bool right_false
= right
.is_valid
&& !checked_cast
<const BooleanScalar
&>(right
).value
;
173 out
->buffers
[0] = nullptr;
174 GetBitmap(*out
, 1).SetBitsTo(false); // all false case
179 if (left
.GetNullCount() == 0) {
181 out
->buffers
[0] = nullptr;
183 GetBitmap(*out
, 0).CopyFrom(GetBitmap(left
, 0));
185 GetBitmap(*out
, 1).CopyFrom(GetBitmap(left
, 1));
189 // scalar was null: out[i] is valid iff left[i] was false
190 if (left
.GetNullCount() == 0) {
191 ::arrow::internal::InvertBitmap(left
.buffers
[1]->data(), left
.offset
, left
.length
,
192 out
->buffers
[0]->mutable_data(), out
->offset
);
194 ::arrow::internal::BitmapAndNot(left
.buffers
[0]->data(), left
.offset
,
195 left
.buffers
[1]->data(), left
.offset
, left
.length
,
196 out
->offset
, out
->buffers
[0]->mutable_data());
198 ::arrow::internal::CopyBitmap(left
.buffers
[1]->data(), left
.offset
, left
.length
,
199 out
->buffers
[1]->mutable_data(), out
->offset
);
203 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const ArrayData
& right
,
205 if (left
.GetNullCount() == 0 && right
.GetNullCount() == 0) {
207 // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
208 BitUtil::SetBitmap(out
->buffers
[0]->mutable_data(), out
->offset
, out
->length
);
209 return AndOp::Call(ctx
, left
, right
, out
);
211 auto compute_word
= [](uint64_t left_true
, uint64_t left_false
, uint64_t right_true
,
212 uint64_t right_false
, uint64_t* out_valid
,
213 uint64_t* out_data
) {
214 *out_data
= left_true
& right_true
;
215 *out_valid
= left_false
| right_false
| (left_true
& right_true
);
217 ComputeKleene(compute_word
, ctx
, left
, right
, out
);
222 struct OrOp
: Commutative
<OrOp
> {
223 using Commutative
<OrOp
>::Call
;
225 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const Scalar
& right
,
227 if (left
.is_valid
&& right
.is_valid
) {
228 checked_cast
<BooleanScalar
*>(out
)->value
=
229 checked_cast
<const BooleanScalar
&>(left
).value
||
230 checked_cast
<const BooleanScalar
&>(right
).value
;
235 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const Scalar
& right
,
237 if (right
.is_valid
) {
238 checked_cast
<const BooleanScalar
&>(right
).value
239 ? GetBitmap(*out
, 1).SetBitsTo(true)
240 : GetBitmap(*out
, 1).CopyFrom(GetBitmap(left
, 1));
245 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const ArrayData
& right
,
247 ::arrow::internal::BitmapOr(left
.buffers
[1]->data(), left
.offset
,
248 right
.buffers
[1]->data(), right
.offset
, right
.length
,
249 out
->offset
, out
->buffers
[1]->mutable_data());
254 struct KleeneOrOp
: Commutative
<KleeneOrOp
> {
255 using Commutative
<KleeneOrOp
>::Call
;
257 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const Scalar
& right
,
259 bool left_true
= left
.is_valid
&& checked_cast
<const BooleanScalar
&>(left
).value
;
260 bool left_false
= left
.is_valid
&& !checked_cast
<const BooleanScalar
&>(left
).value
;
262 bool right_true
= right
.is_valid
&& checked_cast
<const BooleanScalar
&>(right
).value
;
263 bool right_false
= right
.is_valid
&& !checked_cast
<const BooleanScalar
&>(right
).value
;
265 checked_cast
<BooleanScalar
*>(out
)->value
= left_true
|| right_true
;
266 out
->is_valid
= left_true
|| right_true
|| (left_false
&& right_false
);
270 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const Scalar
& right
,
272 bool right_true
= right
.is_valid
&& checked_cast
<const BooleanScalar
&>(right
).value
;
273 bool right_false
= right
.is_valid
&& !checked_cast
<const BooleanScalar
&>(right
).value
;
277 out
->buffers
[0] = nullptr;
278 GetBitmap(*out
, 1).SetBitsTo(true); // all true case
283 if (left
.GetNullCount() == 0) {
285 out
->buffers
[0] = nullptr;
287 GetBitmap(*out
, 0).CopyFrom(GetBitmap(left
, 0));
289 GetBitmap(*out
, 1).CopyFrom(GetBitmap(left
, 1));
293 // scalar was null: out[i] is valid iff left[i] was true
294 if (left
.GetNullCount() == 0) {
295 ::arrow::internal::CopyBitmap(left
.buffers
[1]->data(), left
.offset
, left
.length
,
296 out
->buffers
[0]->mutable_data(), out
->offset
);
298 ::arrow::internal::BitmapAnd(left
.buffers
[0]->data(), left
.offset
,
299 left
.buffers
[1]->data(), left
.offset
, left
.length
,
300 out
->offset
, out
->buffers
[0]->mutable_data());
302 ::arrow::internal::CopyBitmap(left
.buffers
[1]->data(), left
.offset
, left
.length
,
303 out
->buffers
[1]->mutable_data(), out
->offset
);
307 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const ArrayData
& right
,
309 if (left
.GetNullCount() == 0 && right
.GetNullCount() == 0) {
311 // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
312 BitUtil::SetBitmap(out
->buffers
[0]->mutable_data(), out
->offset
, out
->length
);
313 return OrOp::Call(ctx
, left
, right
, out
);
316 static auto compute_word
= [](uint64_t left_true
, uint64_t left_false
,
317 uint64_t right_true
, uint64_t right_false
,
318 uint64_t* out_valid
, uint64_t* out_data
) {
319 *out_data
= left_true
| right_true
;
320 *out_valid
= left_true
| right_true
| (left_false
& right_false
);
323 ComputeKleene(compute_word
, ctx
, left
, right
, out
);
328 struct XorOp
: Commutative
<XorOp
> {
329 using Commutative
<XorOp
>::Call
;
331 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const Scalar
& right
,
333 if (left
.is_valid
&& right
.is_valid
) {
334 checked_cast
<BooleanScalar
*>(out
)->value
=
335 checked_cast
<const BooleanScalar
&>(left
).value
^
336 checked_cast
<const BooleanScalar
&>(right
).value
;
341 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const Scalar
& right
,
343 if (right
.is_valid
) {
344 checked_cast
<const BooleanScalar
&>(right
).value
345 ? GetBitmap(*out
, 1).CopyFromInverted(GetBitmap(left
, 1))
346 : GetBitmap(*out
, 1).CopyFrom(GetBitmap(left
, 1));
351 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const ArrayData
& right
,
353 ::arrow::internal::BitmapXor(left
.buffers
[1]->data(), left
.offset
,
354 right
.buffers
[1]->data(), right
.offset
, right
.length
,
355 out
->offset
, out
->buffers
[1]->mutable_data());
361 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const Scalar
& right
,
363 return AndOp::Call(ctx
, left
, InvertScalar(right
), out
);
366 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const ArrayData
& right
,
369 checked_cast
<const BooleanScalar
&>(left
).value
370 ? GetBitmap(*out
, 1).CopyFromInverted(GetBitmap(right
, 1))
371 : GetBitmap(*out
, 1).SetBitsTo(false);
376 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const Scalar
& right
,
378 return AndOp::Call(ctx
, left
, InvertScalar(right
), out
);
381 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const ArrayData
& right
,
383 ::arrow::internal::BitmapAndNot(left
.buffers
[1]->data(), left
.offset
,
384 right
.buffers
[1]->data(), right
.offset
, right
.length
,
385 out
->offset
, out
->buffers
[1]->mutable_data());
390 struct KleeneAndNotOp
{
391 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const Scalar
& right
,
393 return KleeneAndOp::Call(ctx
, left
, InvertScalar(right
), out
);
396 static Status
Call(KernelContext
* ctx
, const Scalar
& left
, const ArrayData
& right
,
398 bool left_true
= left
.is_valid
&& checked_cast
<const BooleanScalar
&>(left
).value
;
399 bool left_false
= left
.is_valid
&& !checked_cast
<const BooleanScalar
&>(left
).value
;
403 out
->buffers
[0] = nullptr;
404 GetBitmap(*out
, 1).SetBitsTo(false); // all false case
409 if (right
.GetNullCount() == 0) {
411 out
->buffers
[0] = nullptr;
413 GetBitmap(*out
, 0).CopyFrom(GetBitmap(right
, 0));
415 GetBitmap(*out
, 1).CopyFromInverted(GetBitmap(right
, 1));
419 // scalar was null: out[i] is valid iff right[i] was true
420 if (right
.GetNullCount() == 0) {
421 ::arrow::internal::CopyBitmap(right
.buffers
[1]->data(), right
.offset
, right
.length
,
422 out
->buffers
[0]->mutable_data(), out
->offset
);
424 ::arrow::internal::BitmapAnd(right
.buffers
[0]->data(), right
.offset
,
425 right
.buffers
[1]->data(), right
.offset
, right
.length
,
426 out
->offset
, out
->buffers
[0]->mutable_data());
428 ::arrow::internal::InvertBitmap(right
.buffers
[1]->data(), right
.offset
, right
.length
,
429 out
->buffers
[1]->mutable_data(), out
->offset
);
433 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const Scalar
& right
,
435 return KleeneAndOp::Call(ctx
, left
, InvertScalar(right
), out
);
438 static Status
Call(KernelContext
* ctx
, const ArrayData
& left
, const ArrayData
& right
,
440 if (left
.GetNullCount() == 0 && right
.GetNullCount() == 0) {
442 // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
443 BitUtil::SetBitmap(out
->buffers
[0]->mutable_data(), out
->offset
, out
->length
);
444 return AndNotOp::Call(ctx
, left
, right
, out
);
447 static auto compute_word
= [](uint64_t left_true
, uint64_t left_false
,
448 uint64_t right_true
, uint64_t right_false
,
449 uint64_t* out_valid
, uint64_t* out_data
) {
450 *out_data
= left_true
& right_false
;
451 *out_valid
= left_false
| right_true
| (left_true
& right_false
);
454 ComputeKleene(compute_word
, ctx
, left
, right
, out
);
459 void MakeFunction(const std::string
& name
, int arity
, ArrayKernelExec exec
,
460 const FunctionDoc
* doc
, FunctionRegistry
* registry
,
461 NullHandling::type null_handling
= NullHandling::INTERSECTION
) {
462 auto func
= std::make_shared
<ScalarFunction
>(name
, Arity(arity
), doc
);
464 // Scalar arguments not yet supported
465 std::vector
<InputType
> in_types(arity
, InputType(boolean()));
466 ScalarKernel
kernel(std::move(in_types
), boolean(), exec
);
467 kernel
.null_handling
= null_handling
;
469 DCHECK_OK(func
->AddKernel(kernel
));
470 DCHECK_OK(registry
->AddFunction(std::move(func
)));
473 const FunctionDoc invert_doc
{"Invert boolean values", "", {"values"}};
475 const FunctionDoc and_doc
{
476 "Logical 'and' boolean values",
477 ("When a null is encountered in either input, a null is output.\n"
478 "For a different null behavior, see function \"and_kleene\"."),
481 const FunctionDoc and_not_doc
{
482 "Logical 'and not' boolean values",
483 ("When a null is encountered in either input, a null is output.\n"
484 "For a different null behavior, see function \"and_not_kleene\"."),
487 const FunctionDoc or_doc
{
488 "Logical 'or' boolean values",
489 ("When a null is encountered in either input, a null is output.\n"
490 "For a different null behavior, see function \"or_kleene\"."),
493 const FunctionDoc xor_doc
{
494 "Logical 'xor' boolean values",
495 ("When a null is encountered in either input, a null is output."),
498 const FunctionDoc and_kleene_doc
{
499 "Logical 'and' boolean values (Kleene logic)",
500 ("This function behaves as follows with nulls:\n\n"
501 "- true and null = null\n"
502 "- null and true = null\n"
503 "- false and null = false\n"
504 "- null and false = false\n"
505 "- null and null = null\n"
507 "In other words, in this context a null value really means \"unknown\",\n"
508 "and an unknown value 'and' false is always false.\n"
509 "For a different null behavior, see function \"and\"."),
512 const FunctionDoc and_not_kleene_doc
{
513 "Logical 'and not' boolean values (Kleene logic)",
514 ("This function behaves as follows with nulls:\n\n"
515 "- true and null = null\n"
516 "- null and false = null\n"
517 "- false and null = false\n"
518 "- null and true = false\n"
519 "- null and null = null\n"
521 "In other words, in this context a null value really means \"unknown\",\n"
522 "and an unknown value 'and not' true is always false, as is false\n"
523 "'and not' an unknown value.\n"
524 "For a different null behavior, see function \"and_not\"."),
527 const FunctionDoc or_kleene_doc
{
528 "Logical 'or' boolean values (Kleene logic)",
529 ("This function behaves as follows with nulls:\n\n"
530 "- true or null = true\n"
531 "- null and true = true\n"
532 "- false and null = null\n"
533 "- null and false = null\n"
534 "- null and null = null\n"
536 "In other words, in this context a null value really means \"unknown\",\n"
537 "and an unknown value 'or' true is always true.\n"
538 "For a different null behavior, see function \"and\"."),
545 void RegisterScalarBoolean(FunctionRegistry
* registry
) {
546 // These functions can write into sliced output bitmaps
547 MakeFunction("invert", 1, applicator::SimpleUnary
<InvertOp
>, &invert_doc
, registry
);
548 MakeFunction("and", 2, applicator::SimpleBinary
<AndOp
>, &and_doc
, registry
);
549 MakeFunction("and_not", 2, applicator::SimpleBinary
<AndNotOp
>, &and_not_doc
, registry
);
550 MakeFunction("or", 2, applicator::SimpleBinary
<OrOp
>, &or_doc
, registry
);
551 MakeFunction("xor", 2, applicator::SimpleBinary
<XorOp
>, &xor_doc
, registry
);
553 MakeFunction("and_kleene", 2, applicator::SimpleBinary
<KleeneAndOp
>, &and_kleene_doc
,
554 registry
, NullHandling::COMPUTED_PREALLOCATE
);
555 MakeFunction("and_not_kleene", 2, applicator::SimpleBinary
<KleeneAndNotOp
>,
556 &and_not_kleene_doc
, registry
, NullHandling::COMPUTED_PREALLOCATE
);
557 MakeFunction("or_kleene", 2, applicator::SimpleBinary
<KleeneOrOp
>, &or_kleene_doc
,
558 registry
, NullHandling::COMPUTED_PREALLOCATE
);
561 } // namespace internal
562 } // namespace compute