]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #pragma once | |
19 | ||
20 | #include <cpp11/R.hpp> | |
21 | ||
22 | #include "./arrow_cpp11.h" | |
23 | ||
24 | #if defined(ARROW_R_WITH_ARROW) | |
25 | ||
26 | #include <arrow/buffer.h> // for RBuffer definition below | |
27 | #include <arrow/result.h> | |
28 | #include <arrow/status.h> | |
29 | ||
30 | #include <limits> | |
31 | #include <memory> | |
32 | #include <utility> | |
33 | ||
34 | // forward declaration-only headers | |
35 | #include <arrow/c/abi.h> | |
36 | #include <arrow/compute/type_fwd.h> | |
37 | #include <arrow/csv/type_fwd.h> | |
38 | ||
39 | #if defined(ARROW_R_WITH_DATASET) | |
40 | #include <arrow/dataset/type_fwd.h> | |
41 | #endif | |
42 | ||
43 | #include <arrow/filesystem/type_fwd.h> | |
44 | #include <arrow/io/type_fwd.h> | |
45 | #include <arrow/ipc/type_fwd.h> | |
46 | ||
47 | #if defined(ARROW_R_WITH_JSON) | |
48 | #include <arrow/json/type_fwd.h> | |
49 | #endif | |
50 | ||
51 | #include <arrow/type_fwd.h> | |
52 | #include <arrow/util/type_fwd.h> | |
53 | ||
54 | namespace arrow { | |
55 | namespace compute { | |
56 | ||
57 | class ExecPlan; | |
58 | class ExecNode; | |
59 | ||
60 | } // namespace compute | |
61 | } // namespace arrow | |
62 | ||
63 | #if defined(ARROW_R_WITH_PARQUET) | |
64 | #include <parquet/type_fwd.h> | |
65 | #endif | |
66 | ||
67 | #if defined(ARROW_R_WITH_DATASET) | |
68 | namespace ds = ::arrow::dataset; | |
69 | #endif | |
70 | ||
71 | namespace compute = ::arrow::compute; | |
72 | namespace fs = ::arrow::fs; | |
73 | ||
74 | std::shared_ptr<arrow::RecordBatch> RecordBatch__from_arrays(SEXP, SEXP); | |
75 | arrow::MemoryPool* gc_memory_pool(); | |
76 | arrow::compute::ExecContext* gc_context(); | |
77 | ||
78 | #if (R_VERSION < R_Version(3, 5, 0)) | |
79 | #define LOGICAL_RO(x) ((const int*)LOGICAL(x)) | |
80 | #define INTEGER_RO(x) ((const int*)INTEGER(x)) | |
81 | #define REAL_RO(x) ((const double*)REAL(x)) | |
82 | #define COMPLEX_RO(x) ((const Rcomplex*)COMPLEX(x)) | |
83 | #define STRING_PTR_RO(x) ((const SEXP*)STRING_PTR(x)) | |
84 | #define RAW_RO(x) ((const Rbyte*)RAW(x)) | |
85 | #define DATAPTR_RO(x) ((const void*)STRING_PTR(x)) | |
86 | #define DATAPTR(x) (void*)STRING_PTR(x) | |
87 | #endif | |
88 | ||
89 | #define VECTOR_PTR_RO(x) ((const SEXP*)DATAPTR_RO(x)) | |
90 | ||
91 | namespace arrow { | |
92 | ||
93 | static inline void StopIfNotOk(const Status& status) { | |
94 | if (!status.ok()) { | |
95 | // ARROW-13039: be careful not to interpret our error message as a %-format string | |
96 | std::string s = status.ToString(); | |
97 | cpp11::stop("%s", s.c_str()); | |
98 | } | |
99 | } | |
100 | ||
101 | template <typename R> | |
102 | auto ValueOrStop(R&& result) -> decltype(std::forward<R>(result).ValueOrDie()) { | |
103 | StopIfNotOk(result.status()); | |
104 | return std::forward<R>(result).ValueOrDie(); | |
105 | } | |
106 | ||
107 | namespace r { | |
108 | class RTasks; | |
109 | ||
110 | std::shared_ptr<arrow::DataType> InferArrowType(SEXP x); | |
111 | std::shared_ptr<arrow::Array> vec_to_arrow__reuse_memory(SEXP x); | |
112 | bool can_reuse_memory(SEXP x, const std::shared_ptr<arrow::DataType>& type); | |
113 | ||
114 | Status count_fields(SEXP lst, int* out); | |
115 | ||
116 | void inspect(SEXP obj); | |
117 | std::shared_ptr<arrow::Array> vec_to_arrow(SEXP x, | |
118 | const std::shared_ptr<arrow::DataType>& type, | |
119 | bool type_inferred); | |
120 | ||
121 | // the integer64 sentinel | |
122 | constexpr int64_t NA_INT64 = std::numeric_limits<int64_t>::min(); | |
123 | ||
124 | template <typename RVector> | |
125 | class RBuffer : public MutableBuffer { | |
126 | public: | |
127 | explicit RBuffer(RVector vec) | |
128 | : MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)), | |
129 | vec.size() * sizeof(typename RVector::value_type), | |
130 | arrow::CPUDevice::memory_manager(gc_memory_pool())), | |
131 | vec_(vec) {} | |
132 | ||
133 | private: | |
134 | // vec_ holds the memory | |
135 | RVector vec_; | |
136 | }; | |
137 | ||
138 | std::shared_ptr<arrow::DataType> InferArrowTypeFromFactor(SEXP); | |
139 | ||
140 | void validate_slice_offset(R_xlen_t offset, int64_t len); | |
141 | ||
142 | void validate_slice_length(R_xlen_t length, int64_t available); | |
143 | ||
144 | void validate_index(int i, int len); | |
145 | ||
146 | template <typename Lambda> | |
147 | void TraverseDots(cpp11::list dots, int num_fields, Lambda lambda) { | |
148 | cpp11::strings names(dots.attr(R_NamesSymbol)); | |
149 | ||
150 | for (R_xlen_t i = 0, j = 0; j < num_fields; i++) { | |
151 | auto name_i = names[i]; | |
152 | ||
153 | if (name_i.size() == 0) { | |
154 | cpp11::list x_i = dots[i]; | |
155 | cpp11::strings names_x_i(x_i.attr(R_NamesSymbol)); | |
156 | R_xlen_t n_i = x_i.size(); | |
157 | for (R_xlen_t k = 0; k < n_i; k++, j++) { | |
158 | lambda(j, x_i[k], names_x_i[k]); | |
159 | } | |
160 | } else { | |
161 | lambda(j, dots[i], name_i); | |
162 | j++; | |
163 | } | |
164 | } | |
165 | } | |
166 | ||
167 | inline cpp11::writable::list FlattenDots(cpp11::list dots, int num_fields) { | |
168 | std::vector<SEXP> out(num_fields); | |
169 | auto set = [&](int j, SEXP x, cpp11::r_string) { out[j] = x; }; | |
170 | TraverseDots(dots, num_fields, set); | |
171 | ||
172 | return cpp11::writable::list(out.begin(), out.end()); | |
173 | } | |
174 | ||
175 | arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields, | |
176 | std::shared_ptr<arrow::Schema>& schema); | |
177 | ||
178 | arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, | |
179 | std::shared_ptr<arrow::Schema>& schema); | |
180 | ||
181 | namespace altrep { | |
182 | ||
183 | #if defined(HAS_ALTREP) | |
184 | void Init_Altrep_classes(DllInfo* dll); | |
185 | #endif | |
186 | ||
187 | SEXP MakeAltrepVector(const std::shared_ptr<ChunkedArray>& chunked_array); | |
188 | ||
189 | } // namespace altrep | |
190 | ||
191 | } // namespace r | |
192 | } // namespace arrow | |
193 | ||
194 | namespace cpp11 { | |
195 | ||
196 | template <typename T> | |
197 | struct r6_class_name { | |
198 | static const char* get(const std::shared_ptr<T>& ptr) { | |
199 | static const std::string name = arrow::util::nameof<T>(/*strip_namespace=*/true); | |
200 | return name.c_str(); | |
201 | } | |
202 | }; | |
203 | ||
204 | // Overrides of default R6 class names: | |
205 | #define R6_CLASS_NAME(CLASS, NAME) \ | |
206 | template <> \ | |
207 | struct r6_class_name<CLASS> { \ | |
208 | static const char* get(const std::shared_ptr<CLASS>&) { return NAME; } \ | |
209 | } | |
210 | ||
211 | R6_CLASS_NAME(arrow::csv::ReadOptions, "CsvReadOptions"); | |
212 | R6_CLASS_NAME(arrow::csv::ParseOptions, "CsvParseOptions"); | |
213 | R6_CLASS_NAME(arrow::csv::ConvertOptions, "CsvConvertOptions"); | |
214 | R6_CLASS_NAME(arrow::csv::TableReader, "CsvTableReader"); | |
215 | R6_CLASS_NAME(arrow::csv::WriteOptions, "CsvWriteOptions"); | |
216 | ||
217 | #if defined(ARROW_R_WITH_PARQUET) | |
218 | R6_CLASS_NAME(parquet::ArrowReaderProperties, "ParquetArrowReaderProperties"); | |
219 | R6_CLASS_NAME(parquet::ArrowWriterProperties, "ParquetArrowWriterProperties"); | |
220 | R6_CLASS_NAME(parquet::WriterProperties, "ParquetWriterProperties"); | |
221 | R6_CLASS_NAME(parquet::arrow::FileReader, "ParquetFileReader"); | |
222 | R6_CLASS_NAME(parquet::WriterPropertiesBuilder, "ParquetWriterPropertiesBuilder"); | |
223 | R6_CLASS_NAME(parquet::arrow::FileWriter, "ParquetFileWriter"); | |
224 | #endif | |
225 | ||
226 | R6_CLASS_NAME(arrow::ipc::feather::Reader, "FeatherReader"); | |
227 | ||
228 | #if defined(ARROW_R_WITH_JSON) | |
229 | R6_CLASS_NAME(arrow::json::ReadOptions, "JsonReadOptions"); | |
230 | R6_CLASS_NAME(arrow::json::ParseOptions, "JsonParseOptions"); | |
231 | R6_CLASS_NAME(arrow::json::TableReader, "JsonTableReader"); | |
232 | #endif | |
233 | ||
234 | #undef R6_CLASS_NAME | |
235 | ||
236 | // Declarations of discriminated base classes. | |
237 | // Definitions reside in corresponding .cpp files. | |
238 | template <> | |
239 | struct r6_class_name<fs::FileSystem> { | |
240 | static const char* get(const std::shared_ptr<fs::FileSystem>&); | |
241 | }; | |
242 | ||
243 | template <> | |
244 | struct r6_class_name<arrow::Array> { | |
245 | static const char* get(const std::shared_ptr<arrow::Array>&); | |
246 | }; | |
247 | ||
248 | template <> | |
249 | struct r6_class_name<arrow::Scalar> { | |
250 | static const char* get(const std::shared_ptr<arrow::Scalar>&); | |
251 | }; | |
252 | ||
253 | template <> | |
254 | struct r6_class_name<arrow::DataType> { | |
255 | static const char* get(const std::shared_ptr<arrow::DataType>&); | |
256 | }; | |
257 | ||
258 | #if defined(ARROW_R_WITH_DATASET) | |
259 | ||
260 | template <> | |
261 | struct r6_class_name<ds::Dataset> { | |
262 | static const char* get(const std::shared_ptr<ds::Dataset>&); | |
263 | }; | |
264 | ||
265 | template <> | |
266 | struct r6_class_name<ds::FileFormat> { | |
267 | static const char* get(const std::shared_ptr<ds::FileFormat>&); | |
268 | }; | |
269 | ||
270 | #endif | |
271 | ||
272 | } // namespace cpp11 | |
273 | ||
274 | #endif |