]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/r/src/arrow_types.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / r / src / arrow_types.h
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <cpp11/R.hpp>
21
22#include "./arrow_cpp11.h"
23
24#if defined(ARROW_R_WITH_ARROW)
25
26#include <arrow/buffer.h> // for RBuffer definition below
27#include <arrow/result.h>
28#include <arrow/status.h>
29
30#include <limits>
31#include <memory>
32#include <utility>
33
34// forward declaration-only headers
35#include <arrow/c/abi.h>
36#include <arrow/compute/type_fwd.h>
37#include <arrow/csv/type_fwd.h>
38
39#if defined(ARROW_R_WITH_DATASET)
40#include <arrow/dataset/type_fwd.h>
41#endif
42
43#include <arrow/filesystem/type_fwd.h>
44#include <arrow/io/type_fwd.h>
45#include <arrow/ipc/type_fwd.h>
46
47#if defined(ARROW_R_WITH_JSON)
48#include <arrow/json/type_fwd.h>
49#endif
50
51#include <arrow/type_fwd.h>
52#include <arrow/util/type_fwd.h>
53
54namespace arrow {
55namespace compute {
56
57class ExecPlan;
58class ExecNode;
59
60} // namespace compute
61} // namespace arrow
62
63#if defined(ARROW_R_WITH_PARQUET)
64#include <parquet/type_fwd.h>
65#endif
66
67#if defined(ARROW_R_WITH_DATASET)
68namespace ds = ::arrow::dataset;
69#endif
70
71namespace compute = ::arrow::compute;
72namespace fs = ::arrow::fs;
73
74std::shared_ptr<arrow::RecordBatch> RecordBatch__from_arrays(SEXP, SEXP);
75arrow::MemoryPool* gc_memory_pool();
76arrow::compute::ExecContext* gc_context();
77
78#if (R_VERSION < R_Version(3, 5, 0))
79#define LOGICAL_RO(x) ((const int*)LOGICAL(x))
80#define INTEGER_RO(x) ((const int*)INTEGER(x))
81#define REAL_RO(x) ((const double*)REAL(x))
82#define COMPLEX_RO(x) ((const Rcomplex*)COMPLEX(x))
83#define STRING_PTR_RO(x) ((const SEXP*)STRING_PTR(x))
84#define RAW_RO(x) ((const Rbyte*)RAW(x))
85#define DATAPTR_RO(x) ((const void*)STRING_PTR(x))
86#define DATAPTR(x) (void*)STRING_PTR(x)
87#endif
88
89#define VECTOR_PTR_RO(x) ((const SEXP*)DATAPTR_RO(x))
90
91namespace arrow {
92
93static inline void StopIfNotOk(const Status& status) {
94 if (!status.ok()) {
95 // ARROW-13039: be careful not to interpret our error message as a %-format string
96 std::string s = status.ToString();
97 cpp11::stop("%s", s.c_str());
98 }
99}
100
101template <typename R>
102auto ValueOrStop(R&& result) -> decltype(std::forward<R>(result).ValueOrDie()) {
103 StopIfNotOk(result.status());
104 return std::forward<R>(result).ValueOrDie();
105}
106
107namespace r {
108class RTasks;
109
110std::shared_ptr<arrow::DataType> InferArrowType(SEXP x);
111std::shared_ptr<arrow::Array> vec_to_arrow__reuse_memory(SEXP x);
112bool can_reuse_memory(SEXP x, const std::shared_ptr<arrow::DataType>& type);
113
114Status count_fields(SEXP lst, int* out);
115
116void inspect(SEXP obj);
117std::shared_ptr<arrow::Array> vec_to_arrow(SEXP x,
118 const std::shared_ptr<arrow::DataType>& type,
119 bool type_inferred);
120
121// the integer64 sentinel
122constexpr int64_t NA_INT64 = std::numeric_limits<int64_t>::min();
123
124template <typename RVector>
125class RBuffer : public MutableBuffer {
126 public:
127 explicit RBuffer(RVector vec)
128 : MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)),
129 vec.size() * sizeof(typename RVector::value_type),
130 arrow::CPUDevice::memory_manager(gc_memory_pool())),
131 vec_(vec) {}
132
133 private:
134 // vec_ holds the memory
135 RVector vec_;
136};
137
138std::shared_ptr<arrow::DataType> InferArrowTypeFromFactor(SEXP);
139
140void validate_slice_offset(R_xlen_t offset, int64_t len);
141
142void validate_slice_length(R_xlen_t length, int64_t available);
143
144void validate_index(int i, int len);
145
146template <typename Lambda>
147void TraverseDots(cpp11::list dots, int num_fields, Lambda lambda) {
148 cpp11::strings names(dots.attr(R_NamesSymbol));
149
150 for (R_xlen_t i = 0, j = 0; j < num_fields; i++) {
151 auto name_i = names[i];
152
153 if (name_i.size() == 0) {
154 cpp11::list x_i = dots[i];
155 cpp11::strings names_x_i(x_i.attr(R_NamesSymbol));
156 R_xlen_t n_i = x_i.size();
157 for (R_xlen_t k = 0; k < n_i; k++, j++) {
158 lambda(j, x_i[k], names_x_i[k]);
159 }
160 } else {
161 lambda(j, dots[i], name_i);
162 j++;
163 }
164 }
165}
166
167inline cpp11::writable::list FlattenDots(cpp11::list dots, int num_fields) {
168 std::vector<SEXP> out(num_fields);
169 auto set = [&](int j, SEXP x, cpp11::r_string) { out[j] = x; };
170 TraverseDots(dots, num_fields, set);
171
172 return cpp11::writable::list(out.begin(), out.end());
173}
174
175arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields,
176 std::shared_ptr<arrow::Schema>& schema);
177
178arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
179 std::shared_ptr<arrow::Schema>& schema);
180
181namespace altrep {
182
183#if defined(HAS_ALTREP)
184void Init_Altrep_classes(DllInfo* dll);
185#endif
186
187SEXP MakeAltrepVector(const std::shared_ptr<ChunkedArray>& chunked_array);
188
189} // namespace altrep
190
191} // namespace r
192} // namespace arrow
193
194namespace cpp11 {
195
196template <typename T>
197struct r6_class_name {
198 static const char* get(const std::shared_ptr<T>& ptr) {
199 static const std::string name = arrow::util::nameof<T>(/*strip_namespace=*/true);
200 return name.c_str();
201 }
202};
203
204// Overrides of default R6 class names:
205#define R6_CLASS_NAME(CLASS, NAME) \
206 template <> \
207 struct r6_class_name<CLASS> { \
208 static const char* get(const std::shared_ptr<CLASS>&) { return NAME; } \
209 }
210
211R6_CLASS_NAME(arrow::csv::ReadOptions, "CsvReadOptions");
212R6_CLASS_NAME(arrow::csv::ParseOptions, "CsvParseOptions");
213R6_CLASS_NAME(arrow::csv::ConvertOptions, "CsvConvertOptions");
214R6_CLASS_NAME(arrow::csv::TableReader, "CsvTableReader");
215R6_CLASS_NAME(arrow::csv::WriteOptions, "CsvWriteOptions");
216
217#if defined(ARROW_R_WITH_PARQUET)
218R6_CLASS_NAME(parquet::ArrowReaderProperties, "ParquetArrowReaderProperties");
219R6_CLASS_NAME(parquet::ArrowWriterProperties, "ParquetArrowWriterProperties");
220R6_CLASS_NAME(parquet::WriterProperties, "ParquetWriterProperties");
221R6_CLASS_NAME(parquet::arrow::FileReader, "ParquetFileReader");
222R6_CLASS_NAME(parquet::WriterPropertiesBuilder, "ParquetWriterPropertiesBuilder");
223R6_CLASS_NAME(parquet::arrow::FileWriter, "ParquetFileWriter");
224#endif
225
226R6_CLASS_NAME(arrow::ipc::feather::Reader, "FeatherReader");
227
228#if defined(ARROW_R_WITH_JSON)
229R6_CLASS_NAME(arrow::json::ReadOptions, "JsonReadOptions");
230R6_CLASS_NAME(arrow::json::ParseOptions, "JsonParseOptions");
231R6_CLASS_NAME(arrow::json::TableReader, "JsonTableReader");
232#endif
233
234#undef R6_CLASS_NAME
235
236// Declarations of discriminated base classes.
237// Definitions reside in corresponding .cpp files.
238template <>
239struct r6_class_name<fs::FileSystem> {
240 static const char* get(const std::shared_ptr<fs::FileSystem>&);
241};
242
243template <>
244struct r6_class_name<arrow::Array> {
245 static const char* get(const std::shared_ptr<arrow::Array>&);
246};
247
248template <>
249struct r6_class_name<arrow::Scalar> {
250 static const char* get(const std::shared_ptr<arrow::Scalar>&);
251};
252
253template <>
254struct r6_class_name<arrow::DataType> {
255 static const char* get(const std::shared_ptr<arrow::DataType>&);
256};
257
258#if defined(ARROW_R_WITH_DATASET)
259
260template <>
261struct r6_class_name<ds::Dataset> {
262 static const char* get(const std::shared_ptr<ds::Dataset>&);
263};
264
265template <>
266struct r6_class_name<ds::FileFormat> {
267 static const char* get(const std::shared_ptr<ds::FileFormat>&);
268};
269
270#endif
271
272} // namespace cpp11
273
274#endif