]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/compute/kernels/scalar_boolean.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / kernels / scalar_boolean.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <array>
19
20 #include "arrow/compute/kernels/common.h"
21 #include "arrow/util/bit_util.h"
22 #include "arrow/util/bitmap.h"
23 #include "arrow/util/bitmap_ops.h"
24
25 namespace arrow {
26
27 using internal::Bitmap;
28
29 namespace compute {
30
31 namespace {
32
33 template <typename ComputeWord>
34 void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayData& left,
35 const ArrayData& right, ArrayData* out) {
36 DCHECK(left.null_count != 0 || right.null_count != 0)
37 << "ComputeKleene is unnecessarily expensive for the non-null case";
38
39 Bitmap left_valid_bm{left.buffers[0], left.offset, left.length};
40 Bitmap left_data_bm{left.buffers[1], left.offset, left.length};
41
42 Bitmap right_valid_bm{right.buffers[0], right.offset, right.length};
43 Bitmap right_data_bm{right.buffers[1], right.offset, right.length};
44
45 std::array<Bitmap, 2> out_bms{Bitmap(out->buffers[0], out->offset, out->length),
46 Bitmap(out->buffers[1], out->offset, out->length)};
47
48 auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid,
49 uint64_t right_data, uint64_t* out_validity, uint64_t* out_data) {
50 auto left_true = left_valid & left_data;
51 auto left_false = left_valid & ~left_data;
52
53 auto right_true = right_valid & right_data;
54 auto right_false = right_valid & ~right_data;
55
56 compute_word(left_true, left_false, right_true, right_false, out_validity, out_data);
57 };
58
59 if (right.null_count == 0) {
60 std::array<Bitmap, 3> in_bms{left_valid_bm, left_data_bm, right_data_bm};
61 Bitmap::VisitWordsAndWrite(
62 in_bms, &out_bms,
63 [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
64 apply(in[0], in[1], ~uint64_t(0), in[2], &(out->at(0)), &(out->at(1)));
65 });
66 return;
67 }
68
69 if (left.null_count == 0) {
70 std::array<Bitmap, 3> in_bms{left_data_bm, right_valid_bm, right_data_bm};
71 Bitmap::VisitWordsAndWrite(
72 in_bms, &out_bms,
73 [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
74 apply(~uint64_t(0), in[0], in[1], in[2], &(out->at(0)), &(out->at(1)));
75 });
76 return;
77 }
78
79 DCHECK(left.null_count != 0 && right.null_count != 0);
80 std::array<Bitmap, 4> in_bms{left_valid_bm, left_data_bm, right_valid_bm,
81 right_data_bm};
82 Bitmap::VisitWordsAndWrite(
83 in_bms, &out_bms,
84 [&](const std::array<uint64_t, 4>& in, std::array<uint64_t, 2>* out) {
85 apply(in[0], in[1], in[2], in[3], &(out->at(0)), &(out->at(1)));
86 });
87 }
88
89 inline BooleanScalar InvertScalar(const Scalar& in) {
90 return in.is_valid ? BooleanScalar(!checked_cast<const BooleanScalar&>(in).value)
91 : BooleanScalar();
92 }
93
94 inline Bitmap GetBitmap(const ArrayData& arr, int index) {
95 return Bitmap{arr.buffers[index], arr.offset, arr.length};
96 }
97
98 struct InvertOp {
99 static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
100 *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
101 return Status::OK();
102 }
103
104 static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
105 GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1));
106 return Status::OK();
107 }
108 };
109
110 template <typename Op>
111 struct Commutative {
112 static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
113 ArrayData* out) {
114 return Op::Call(ctx, right, left, out);
115 }
116 };
117
118 struct AndOp : Commutative<AndOp> {
119 using Commutative<AndOp>::Call;
120
121 static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
122 Scalar* out) {
123 if (left.is_valid && right.is_valid) {
124 checked_cast<BooleanScalar*>(out)->value =
125 checked_cast<const BooleanScalar&>(left).value &&
126 checked_cast<const BooleanScalar&>(right).value;
127 }
128 return Status::OK();
129 }
130
131 static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
132 ArrayData* out) {
133 if (right.is_valid) {
134 checked_cast<const BooleanScalar&>(right).value
135 ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))
136 : GetBitmap(*out, 1).SetBitsTo(false);
137 }
138 return Status::OK();
139 }
140
141 static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
142 ArrayData* out) {
143 ::arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset,
144 right.buffers[1]->data(), right.offset, right.length,
145 out->offset, out->buffers[1]->mutable_data());
146 return Status::OK();
147 }
148 };
149
150 struct KleeneAndOp : Commutative<KleeneAndOp> {
151 using Commutative<KleeneAndOp>::Call;
152
153 static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
154 Scalar* out) {
155 bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
156 bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
157
158 bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
159 bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
160
161 checked_cast<BooleanScalar*>(out)->value = left_true && right_true;
162 out->is_valid = left_false || right_false || (left_true && right_true);
163 return Status::OK();
164 }
165
166 static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
167 ArrayData* out) {
168 bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
169 bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
170
171 if (right_false) {
172 out->null_count = 0;
173 out->buffers[0] = nullptr;
174 GetBitmap(*out, 1).SetBitsTo(false); // all false case
175 return Status::OK();
176 }
177
178 if (right_true) {
179 if (left.GetNullCount() == 0) {
180 out->null_count = 0;
181 out->buffers[0] = nullptr;
182 } else {
183 GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
184 }
185 GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
186 return Status::OK();
187 }
188
189 // scalar was null: out[i] is valid iff left[i] was false
190 if (left.GetNullCount() == 0) {
191 ::arrow::internal::InvertBitmap(left.buffers[1]->data(), left.offset, left.length,
192 out->buffers[0]->mutable_data(), out->offset);
193 } else {
194 ::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
195 left.buffers[1]->data(), left.offset, left.length,
196 out->offset, out->buffers[0]->mutable_data());
197 }
198 ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
199 out->buffers[1]->mutable_data(), out->offset);
200 return Status::OK();
201 }
202
203 static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
204 ArrayData* out) {
205 if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
206 out->null_count = 0;
207 // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
208 BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
209 return AndOp::Call(ctx, left, right, out);
210 }
211 auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
212 uint64_t right_false, uint64_t* out_valid,
213 uint64_t* out_data) {
214 *out_data = left_true & right_true;
215 *out_valid = left_false | right_false | (left_true & right_true);
216 };
217 ComputeKleene(compute_word, ctx, left, right, out);
218 return Status::OK();
219 }
220 };
221
222 struct OrOp : Commutative<OrOp> {
223 using Commutative<OrOp>::Call;
224
225 static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
226 Scalar* out) {
227 if (left.is_valid && right.is_valid) {
228 checked_cast<BooleanScalar*>(out)->value =
229 checked_cast<const BooleanScalar&>(left).value ||
230 checked_cast<const BooleanScalar&>(right).value;
231 }
232 return Status::OK();
233 }
234
235 static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
236 ArrayData* out) {
237 if (right.is_valid) {
238 checked_cast<const BooleanScalar&>(right).value
239 ? GetBitmap(*out, 1).SetBitsTo(true)
240 : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
241 }
242 return Status::OK();
243 }
244
245 static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
246 ArrayData* out) {
247 ::arrow::internal::BitmapOr(left.buffers[1]->data(), left.offset,
248 right.buffers[1]->data(), right.offset, right.length,
249 out->offset, out->buffers[1]->mutable_data());
250 return Status::OK();
251 }
252 };
253
254 struct KleeneOrOp : Commutative<KleeneOrOp> {
255 using Commutative<KleeneOrOp>::Call;
256
257 static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
258 Scalar* out) {
259 bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
260 bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
261
262 bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
263 bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
264
265 checked_cast<BooleanScalar*>(out)->value = left_true || right_true;
266 out->is_valid = left_true || right_true || (left_false && right_false);
267 return Status::OK();
268 }
269
270 static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
271 ArrayData* out) {
272 bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
273 bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
274
275 if (right_true) {
276 out->null_count = 0;
277 out->buffers[0] = nullptr;
278 GetBitmap(*out, 1).SetBitsTo(true); // all true case
279 return Status::OK();
280 }
281
282 if (right_false) {
283 if (left.GetNullCount() == 0) {
284 out->null_count = 0;
285 out->buffers[0] = nullptr;
286 } else {
287 GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
288 }
289 GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
290 return Status::OK();
291 }
292
293 // scalar was null: out[i] is valid iff left[i] was true
294 if (left.GetNullCount() == 0) {
295 ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
296 out->buffers[0]->mutable_data(), out->offset);
297 } else {
298 ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
299 left.buffers[1]->data(), left.offset, left.length,
300 out->offset, out->buffers[0]->mutable_data());
301 }
302 ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
303 out->buffers[1]->mutable_data(), out->offset);
304 return Status::OK();
305 }
306
307 static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
308 ArrayData* out) {
309 if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
310 out->null_count = 0;
311 // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
312 BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
313 return OrOp::Call(ctx, left, right, out);
314 }
315
316 static auto compute_word = [](uint64_t left_true, uint64_t left_false,
317 uint64_t right_true, uint64_t right_false,
318 uint64_t* out_valid, uint64_t* out_data) {
319 *out_data = left_true | right_true;
320 *out_valid = left_true | right_true | (left_false & right_false);
321 };
322
323 ComputeKleene(compute_word, ctx, left, right, out);
324 return Status::OK();
325 }
326 };
327
328 struct XorOp : Commutative<XorOp> {
329 using Commutative<XorOp>::Call;
330
331 static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
332 Scalar* out) {
333 if (left.is_valid && right.is_valid) {
334 checked_cast<BooleanScalar*>(out)->value =
335 checked_cast<const BooleanScalar&>(left).value ^
336 checked_cast<const BooleanScalar&>(right).value;
337 }
338 return Status::OK();
339 }
340
341 static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
342 ArrayData* out) {
343 if (right.is_valid) {
344 checked_cast<const BooleanScalar&>(right).value
345 ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(left, 1))
346 : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
347 }
348 return Status::OK();
349 }
350
351 static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
352 ArrayData* out) {
353 ::arrow::internal::BitmapXor(left.buffers[1]->data(), left.offset,
354 right.buffers[1]->data(), right.offset, right.length,
355 out->offset, out->buffers[1]->mutable_data());
356 return Status::OK();
357 }
358 };
359
360 struct AndNotOp {
361 static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
362 Scalar* out) {
363 return AndOp::Call(ctx, left, InvertScalar(right), out);
364 }
365
366 static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
367 ArrayData* out) {
368 if (left.is_valid) {
369 checked_cast<const BooleanScalar&>(left).value
370 ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
371 : GetBitmap(*out, 1).SetBitsTo(false);
372 }
373 return Status::OK();
374 }
375
376 static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
377 ArrayData* out) {
378 return AndOp::Call(ctx, left, InvertScalar(right), out);
379 }
380
381 static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
382 ArrayData* out) {
383 ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
384 right.buffers[1]->data(), right.offset, right.length,
385 out->offset, out->buffers[1]->mutable_data());
386 return Status::OK();
387 }
388 };
389
390 struct KleeneAndNotOp {
391 static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
392 Scalar* out) {
393 return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
394 }
395
396 static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
397 ArrayData* out) {
398 bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
399 bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
400
401 if (left_false) {
402 out->null_count = 0;
403 out->buffers[0] = nullptr;
404 GetBitmap(*out, 1).SetBitsTo(false); // all false case
405 return Status::OK();
406 }
407
408 if (left_true) {
409 if (right.GetNullCount() == 0) {
410 out->null_count = 0;
411 out->buffers[0] = nullptr;
412 } else {
413 GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
414 }
415 GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
416 return Status::OK();
417 }
418
419 // scalar was null: out[i] is valid iff right[i] was true
420 if (right.GetNullCount() == 0) {
421 ::arrow::internal::CopyBitmap(right.buffers[1]->data(), right.offset, right.length,
422 out->buffers[0]->mutable_data(), out->offset);
423 } else {
424 ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
425 right.buffers[1]->data(), right.offset, right.length,
426 out->offset, out->buffers[0]->mutable_data());
427 }
428 ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
429 out->buffers[1]->mutable_data(), out->offset);
430 return Status::OK();
431 }
432
433 static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
434 ArrayData* out) {
435 return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
436 }
437
438 static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
439 ArrayData* out) {
440 if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
441 out->null_count = 0;
442 // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
443 BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
444 return AndNotOp::Call(ctx, left, right, out);
445 }
446
447 static auto compute_word = [](uint64_t left_true, uint64_t left_false,
448 uint64_t right_true, uint64_t right_false,
449 uint64_t* out_valid, uint64_t* out_data) {
450 *out_data = left_true & right_false;
451 *out_valid = left_false | right_true | (left_true & right_false);
452 };
453
454 ComputeKleene(compute_word, ctx, left, right, out);
455 return Status::OK();
456 }
457 };
458
459 void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec,
460 const FunctionDoc* doc, FunctionRegistry* registry,
461 NullHandling::type null_handling = NullHandling::INTERSECTION) {
462 auto func = std::make_shared<ScalarFunction>(name, Arity(arity), doc);
463
464 // Scalar arguments not yet supported
465 std::vector<InputType> in_types(arity, InputType(boolean()));
466 ScalarKernel kernel(std::move(in_types), boolean(), exec);
467 kernel.null_handling = null_handling;
468
469 DCHECK_OK(func->AddKernel(kernel));
470 DCHECK_OK(registry->AddFunction(std::move(func)));
471 }
472
473 const FunctionDoc invert_doc{"Invert boolean values", "", {"values"}};
474
475 const FunctionDoc and_doc{
476 "Logical 'and' boolean values",
477 ("When a null is encountered in either input, a null is output.\n"
478 "For a different null behavior, see function \"and_kleene\"."),
479 {"x", "y"}};
480
481 const FunctionDoc and_not_doc{
482 "Logical 'and not' boolean values",
483 ("When a null is encountered in either input, a null is output.\n"
484 "For a different null behavior, see function \"and_not_kleene\"."),
485 {"x", "y"}};
486
487 const FunctionDoc or_doc{
488 "Logical 'or' boolean values",
489 ("When a null is encountered in either input, a null is output.\n"
490 "For a different null behavior, see function \"or_kleene\"."),
491 {"x", "y"}};
492
493 const FunctionDoc xor_doc{
494 "Logical 'xor' boolean values",
495 ("When a null is encountered in either input, a null is output."),
496 {"x", "y"}};
497
498 const FunctionDoc and_kleene_doc{
499 "Logical 'and' boolean values (Kleene logic)",
500 ("This function behaves as follows with nulls:\n\n"
501 "- true and null = null\n"
502 "- null and true = null\n"
503 "- false and null = false\n"
504 "- null and false = false\n"
505 "- null and null = null\n"
506 "\n"
507 "In other words, in this context a null value really means \"unknown\",\n"
508 "and an unknown value 'and' false is always false.\n"
509 "For a different null behavior, see function \"and\"."),
510 {"x", "y"}};
511
512 const FunctionDoc and_not_kleene_doc{
513 "Logical 'and not' boolean values (Kleene logic)",
514 ("This function behaves as follows with nulls:\n\n"
515 "- true and null = null\n"
516 "- null and false = null\n"
517 "- false and null = false\n"
518 "- null and true = false\n"
519 "- null and null = null\n"
520 "\n"
521 "In other words, in this context a null value really means \"unknown\",\n"
522 "and an unknown value 'and not' true is always false, as is false\n"
523 "'and not' an unknown value.\n"
524 "For a different null behavior, see function \"and_not\"."),
525 {"x", "y"}};
526
527 const FunctionDoc or_kleene_doc{
528 "Logical 'or' boolean values (Kleene logic)",
529 ("This function behaves as follows with nulls:\n\n"
530 "- true or null = true\n"
531 "- null and true = true\n"
532 "- false and null = null\n"
533 "- null and false = null\n"
534 "- null and null = null\n"
535 "\n"
536 "In other words, in this context a null value really means \"unknown\",\n"
537 "and an unknown value 'or' true is always true.\n"
538 "For a different null behavior, see function \"and\"."),
539 {"x", "y"}};
540
541 } // namespace
542
543 namespace internal {
544
545 void RegisterScalarBoolean(FunctionRegistry* registry) {
546 // These functions can write into sliced output bitmaps
547 MakeFunction("invert", 1, applicator::SimpleUnary<InvertOp>, &invert_doc, registry);
548 MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, &and_doc, registry);
549 MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, &and_not_doc, registry);
550 MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, &or_doc, registry);
551 MakeFunction("xor", 2, applicator::SimpleBinary<XorOp>, &xor_doc, registry);
552
553 MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAndOp>, &and_kleene_doc,
554 registry, NullHandling::COMPUTED_PREALLOCATE);
555 MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNotOp>,
556 &and_not_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE);
557 MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOrOp>, &or_kleene_doc,
558 registry, NullHandling::COMPUTED_PREALLOCATE);
559 }
560
561 } // namespace internal
562 } // namespace compute
563 } // namespace arrow