]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #pragma once | |
19 | ||
20 | #include <cassert> | |
21 | #include <cstdint> | |
22 | #include <cstring> | |
23 | ||
24 | #include "arrow/util/bit_run_reader.h" | |
25 | ||
26 | namespace arrow { | |
27 | namespace util { | |
28 | namespace internal { | |
29 | ||
30 | /// \brief Compress the buffer to spaced, excluding the null entries. | |
31 | /// | |
32 | /// \param[in] src the source buffer | |
33 | /// \param[in] num_values the size of source buffer | |
34 | /// \param[in] valid_bits bitmap data indicating position of valid slots | |
35 | /// \param[in] valid_bits_offset offset into valid_bits | |
36 | /// \param[out] output the output buffer spaced | |
37 | /// \return The size of spaced buffer. | |
38 | template <typename T> | |
39 | inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits, | |
40 | int64_t valid_bits_offset, T* output) { | |
41 | int num_valid_values = 0; | |
42 | ||
43 | arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values); | |
44 | while (true) { | |
45 | const auto run = reader.NextRun(); | |
46 | if (run.length == 0) { | |
47 | break; | |
48 | } | |
49 | std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T)); | |
50 | num_valid_values += static_cast<int32_t>(run.length); | |
51 | } | |
52 | ||
53 | return num_valid_values; | |
54 | } | |
55 | ||
56 | /// \brief Relocate values in buffer into positions of non-null values as indicated by | |
57 | /// a validity bitmap. | |
58 | /// | |
59 | /// \param[in, out] buffer the in-place buffer | |
60 | /// \param[in] num_values total size of buffer including null slots | |
61 | /// \param[in] null_count number of null slots | |
62 | /// \param[in] valid_bits bitmap data indicating position of valid slots | |
63 | /// \param[in] valid_bits_offset offset into valid_bits | |
64 | /// \return The number of values expanded, including nulls. | |
65 | template <typename T> | |
66 | inline int SpacedExpand(T* buffer, int num_values, int null_count, | |
67 | const uint8_t* valid_bits, int64_t valid_bits_offset) { | |
68 | // Point to end as we add the spacing from the back. | |
69 | int idx_decode = num_values - null_count; | |
70 | ||
71 | // Depending on the number of nulls, some of the value slots in buffer may | |
72 | // be uninitialized, and this will cause valgrind warnings / potentially UB | |
73 | std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T)); | |
74 | if (idx_decode == 0) { | |
75 | // All nulls, nothing more to do | |
76 | return num_values; | |
77 | } | |
78 | ||
79 | arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset, | |
80 | num_values); | |
81 | while (true) { | |
82 | const auto run = reader.NextRun(); | |
83 | if (run.length == 0) { | |
84 | break; | |
85 | } | |
86 | idx_decode -= static_cast<int32_t>(run.length); | |
87 | assert(idx_decode >= 0); | |
88 | std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T)); | |
89 | } | |
90 | ||
91 | // Otherwise caller gave an incorrect null_count | |
92 | assert(idx_decode == 0); | |
93 | return num_values; | |
94 | } | |
95 | ||
96 | } // namespace internal | |
97 | } // namespace util | |
98 | } // namespace arrow |