]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/util/spaced.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / util / spaced.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <cassert>
21 #include <cstdint>
22 #include <cstring>
23
24 #include "arrow/util/bit_run_reader.h"
25
26 namespace arrow {
27 namespace util {
28 namespace internal {
29
30 /// \brief Compress the buffer to spaced, excluding the null entries.
31 ///
32 /// \param[in] src the source buffer
33 /// \param[in] num_values the size of source buffer
34 /// \param[in] valid_bits bitmap data indicating position of valid slots
35 /// \param[in] valid_bits_offset offset into valid_bits
36 /// \param[out] output the output buffer spaced
37 /// \return The size of spaced buffer.
38 template <typename T>
39 inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits,
40 int64_t valid_bits_offset, T* output) {
41 int num_valid_values = 0;
42
43 arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values);
44 while (true) {
45 const auto run = reader.NextRun();
46 if (run.length == 0) {
47 break;
48 }
49 std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T));
50 num_valid_values += static_cast<int32_t>(run.length);
51 }
52
53 return num_valid_values;
54 }
55
56 /// \brief Relocate values in buffer into positions of non-null values as indicated by
57 /// a validity bitmap.
58 ///
59 /// \param[in, out] buffer the in-place buffer
60 /// \param[in] num_values total size of buffer including null slots
61 /// \param[in] null_count number of null slots
62 /// \param[in] valid_bits bitmap data indicating position of valid slots
63 /// \param[in] valid_bits_offset offset into valid_bits
64 /// \return The number of values expanded, including nulls.
65 template <typename T>
66 inline int SpacedExpand(T* buffer, int num_values, int null_count,
67 const uint8_t* valid_bits, int64_t valid_bits_offset) {
68 // Point to end as we add the spacing from the back.
69 int idx_decode = num_values - null_count;
70
71 // Depending on the number of nulls, some of the value slots in buffer may
72 // be uninitialized, and this will cause valgrind warnings / potentially UB
73 std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T));
74 if (idx_decode == 0) {
75 // All nulls, nothing more to do
76 return num_values;
77 }
78
79 arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset,
80 num_values);
81 while (true) {
82 const auto run = reader.NextRun();
83 if (run.length == 0) {
84 break;
85 }
86 idx_decode -= static_cast<int32_t>(run.length);
87 assert(idx_decode >= 0);
88 std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T));
89 }
90
91 // Otherwise caller gave an incorrect null_count
92 assert(idx_decode == 0);
93 return num_values;
94 }
95
96 } // namespace internal
97 } // namespace util
98 } // namespace arrow