]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/array/builder_dict.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / builder_dict.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "arrow/array/builder_dict.h"
19
20#include <cstdint>
21#include <utility>
22
23#include "arrow/array/dict_internal.h"
24#include "arrow/status.h"
25#include "arrow/type.h"
26#include "arrow/type_traits.h"
27#include "arrow/util/checked_cast.h"
28#include "arrow/util/hashing.h"
29#include "arrow/util/logging.h"
30#include "arrow/visitor_inline.h"
31
32namespace arrow {
33
34// ----------------------------------------------------------------------
35// DictionaryBuilder
36
37namespace internal {
38
39class DictionaryMemoTable::DictionaryMemoTableImpl {
40 // Type-dependent visitor for memo table initialization
41 struct MemoTableInitializer {
42 std::shared_ptr<DataType> value_type_;
43 MemoryPool* pool_;
44 std::unique_ptr<MemoTable>* memo_table_;
45
46 template <typename T>
47 enable_if_no_memoize<T, Status> Visit(const T&) {
48 return Status::NotImplemented("Initialization of ", value_type_->ToString(),
49 " memo table is not implemented");
50 }
51
52 template <typename T>
53 enable_if_memoize<T, Status> Visit(const T&) {
54 using MemoTable = typename DictionaryTraits<T>::MemoTableType;
55 memo_table_->reset(new MemoTable(pool_, 0));
56 return Status::OK();
57 }
58 };
59
60 // Type-dependent visitor for memo table insertion
61 struct ArrayValuesInserter {
62 DictionaryMemoTableImpl* impl_;
63 const Array& values_;
64
65 template <typename T>
66 Status Visit(const T& type) {
67 using ArrayType = typename TypeTraits<T>::ArrayType;
68 return InsertValues(type, checked_cast<const ArrayType&>(values_));
69 }
70
71 private:
72 template <typename T, typename ArrayType>
73 enable_if_no_memoize<T, Status> InsertValues(const T& type, const ArrayType&) {
74 return Status::NotImplemented("Inserting array values of ", type,
75 " is not implemented");
76 }
77
78 template <typename T, typename ArrayType>
79 enable_if_memoize<T, Status> InsertValues(const T&, const ArrayType& array) {
80 if (array.null_count() > 0) {
81 return Status::Invalid("Cannot insert dictionary values containing nulls");
82 }
83 for (int64_t i = 0; i < array.length(); ++i) {
84 int32_t unused_memo_index;
85 RETURN_NOT_OK(impl_->GetOrInsert<T>(array.GetView(i), &unused_memo_index));
86 }
87 return Status::OK();
88 }
89 };
90
91 // Type-dependent visitor for building ArrayData from memo table
92 struct ArrayDataGetter {
93 std::shared_ptr<DataType> value_type_;
94 MemoTable* memo_table_;
95 MemoryPool* pool_;
96 int64_t start_offset_;
97 std::shared_ptr<ArrayData>* out_;
98
99 template <typename T>
100 enable_if_no_memoize<T, Status> Visit(const T&) {
101 return Status::NotImplemented("Getting array data of ", value_type_,
102 " is not implemented");
103 }
104
105 template <typename T>
106 enable_if_memoize<T, Status> Visit(const T&) {
107 using ConcreteMemoTable = typename DictionaryTraits<T>::MemoTableType;
108 auto memo_table = checked_cast<ConcreteMemoTable*>(memo_table_);
109 return DictionaryTraits<T>::GetDictionaryArrayData(pool_, value_type_, *memo_table,
110 start_offset_, out_);
111 }
112 };
113
114 public:
115 DictionaryMemoTableImpl(MemoryPool* pool, std::shared_ptr<DataType> type)
116 : pool_(pool), type_(std::move(type)), memo_table_(nullptr) {
117 MemoTableInitializer visitor{type_, pool_, &memo_table_};
118 ARROW_CHECK_OK(VisitTypeInline(*type_, &visitor));
119 }
120
121 Status InsertValues(const Array& array) {
122 if (!array.type()->Equals(*type_)) {
123 return Status::Invalid("Array value type does not match memo type: ",
124 array.type()->ToString());
125 }
126 ArrayValuesInserter visitor{this, array};
127 return VisitTypeInline(*array.type(), &visitor);
128 }
129
130 template <typename PhysicalType,
131 typename CType = typename DictionaryValue<PhysicalType>::type>
132 Status GetOrInsert(CType value, int32_t* out) {
133 using ConcreteMemoTable = typename DictionaryTraits<PhysicalType>::MemoTableType;
134 return checked_cast<ConcreteMemoTable*>(memo_table_.get())->GetOrInsert(value, out);
135 }
136
137 Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out) {
138 ArrayDataGetter visitor{type_, memo_table_.get(), pool_, start_offset, out};
139 return VisitTypeInline(*type_, &visitor);
140 }
141
142 int32_t size() const { return memo_table_->size(); }
143
144 private:
145 MemoryPool* pool_;
146 std::shared_ptr<DataType> type_;
147 std::unique_ptr<MemoTable> memo_table_;
148};
149
150DictionaryMemoTable::DictionaryMemoTable(MemoryPool* pool,
151 const std::shared_ptr<DataType>& type)
152 : impl_(new DictionaryMemoTableImpl(pool, type)) {}
153
154DictionaryMemoTable::DictionaryMemoTable(MemoryPool* pool,
155 const std::shared_ptr<Array>& dictionary)
156 : impl_(new DictionaryMemoTableImpl(pool, dictionary->type())) {
157 ARROW_CHECK_OK(impl_->InsertValues(*dictionary));
158}
159
160DictionaryMemoTable::~DictionaryMemoTable() = default;
161
162#define GET_OR_INSERT(ARROW_TYPE) \
163 Status DictionaryMemoTable::GetOrInsert( \
164 const ARROW_TYPE*, typename ARROW_TYPE::c_type value, int32_t* out) { \
165 return impl_->GetOrInsert<ARROW_TYPE>(value, out); \
166 }
167
168GET_OR_INSERT(BooleanType)
169GET_OR_INSERT(Int8Type)
170GET_OR_INSERT(Int16Type)
171GET_OR_INSERT(Int32Type)
172GET_OR_INSERT(Int64Type)
173GET_OR_INSERT(UInt8Type)
174GET_OR_INSERT(UInt16Type)
175GET_OR_INSERT(UInt32Type)
176GET_OR_INSERT(UInt64Type)
177GET_OR_INSERT(FloatType)
178GET_OR_INSERT(DoubleType)
179GET_OR_INSERT(DurationType);
180GET_OR_INSERT(TimestampType);
181GET_OR_INSERT(Date32Type);
182GET_OR_INSERT(Date64Type);
183GET_OR_INSERT(Time32Type);
184GET_OR_INSERT(Time64Type);
185GET_OR_INSERT(MonthDayNanoIntervalType);
186GET_OR_INSERT(DayTimeIntervalType);
187GET_OR_INSERT(MonthIntervalType);
188
189#undef GET_OR_INSERT
190
191Status DictionaryMemoTable::GetOrInsert(const BinaryType*, util::string_view value,
192 int32_t* out) {
193 return impl_->GetOrInsert<BinaryType>(value, out);
194}
195
196Status DictionaryMemoTable::GetOrInsert(const LargeBinaryType*, util::string_view value,
197 int32_t* out) {
198 return impl_->GetOrInsert<LargeBinaryType>(value, out);
199}
200
201Status DictionaryMemoTable::GetArrayData(int64_t start_offset,
202 std::shared_ptr<ArrayData>* out) {
203 return impl_->GetArrayData(start_offset, out);
204}
205
206Status DictionaryMemoTable::InsertValues(const Array& array) {
207 return impl_->InsertValues(array);
208}
209
210int32_t DictionaryMemoTable::size() const { return impl_->size(); }
211
212} // namespace internal
213} // namespace arrow