]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include "./arrow_types.h" | |
19 | ||
20 | #if defined(ARROW_R_WITH_ARROW) | |
21 | ||
22 | #include <arrow/array.h> | |
23 | #include <arrow/chunked_array.h> | |
24 | #include <arrow/compute/api.h> | |
25 | #include <arrow/util/bitmap_reader.h> | |
26 | ||
27 | #include <cpp11/altrep.hpp> | |
28 | #include <cpp11/declarations.hpp> | |
29 | #if defined(HAS_ALTREP) | |
30 | ||
31 | #if R_VERSION < R_Version(3, 6, 0) | |
32 | ||
33 | // workaround because R's <R_ext/Altrep.h> not so conveniently uses `class` | |
34 | // as a variable name, and C++ is not happy about that | |
35 | // | |
36 | // SEXP R_new_altrep(R_altrep_class_t class, SEXP data1, SEXP data2); | |
37 | // | |
38 | #define class klass | |
39 | ||
40 | // Because functions declared in <R_ext/Altrep.h> have C linkage | |
41 | extern "C" { | |
42 | #include <R_ext/Altrep.h> | |
43 | } | |
44 | ||
45 | // undo the workaround | |
46 | #undef class | |
47 | ||
48 | #else | |
49 | #include <R_ext/Altrep.h> | |
50 | #endif | |
51 | ||
52 | #include "./r_task_group.h" | |
53 | ||
54 | namespace arrow { | |
55 | namespace r { | |
56 | namespace altrep { | |
57 | ||
58 | namespace { | |
59 | template <typename c_type> | |
60 | R_xlen_t Standard_Get_region(SEXP data2, R_xlen_t i, R_xlen_t n, c_type* buf); | |
61 | ||
62 | template <> | |
63 | R_xlen_t Standard_Get_region<double>(SEXP data2, R_xlen_t i, R_xlen_t n, double* buf) { | |
64 | return REAL_GET_REGION(data2, i, n, buf); | |
65 | } | |
66 | ||
67 | template <> | |
68 | R_xlen_t Standard_Get_region<int>(SEXP data2, R_xlen_t i, R_xlen_t n, int* buf) { | |
69 | return INTEGER_GET_REGION(data2, i, n, buf); | |
70 | } | |
71 | ||
72 | void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; } | |
73 | using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>; | |
74 | ||
75 | // the Array that is being wrapped by the altrep object | |
76 | static const std::shared_ptr<Array>& GetArray(SEXP alt) { | |
77 | return *Pointer(R_altrep_data1(alt)); | |
78 | } | |
79 | ||
80 | // base class for all altrep vectors | |
81 | // | |
82 | // data1: the Array as an external pointer. | |
83 | // data2: starts as NULL, and becomes a standard R vector with the same | |
84 | // data if necessary: if materialization is needed, e.g. if we need | |
85 | // to access its data pointer, with DATAPTR(). | |
86 | template <typename Impl> | |
87 | struct AltrepVectorBase { | |
88 | // store the Array as an external pointer in data1, mark as immutable | |
89 | static SEXP Make(const std::shared_ptr<Array>& array) { | |
90 | SEXP alt = R_new_altrep(Impl::class_t, Pointer(new std::shared_ptr<Array>(array)), | |
91 | R_NilValue); | |
92 | MARK_NOT_MUTABLE(alt); | |
93 | ||
94 | return alt; | |
95 | } | |
96 | ||
97 | // Is the vector materialized, i.e. does the data2 slot contain a | |
98 | // standard R vector with the same data as the array. | |
99 | static bool IsMaterialized(SEXP alt) { return !Rf_isNull(R_altrep_data2(alt)); } | |
100 | ||
101 | static R_xlen_t Length(SEXP alt) { return GetArray(alt)->length(); } | |
102 | ||
103 | static int No_NA(SEXP alt) { return GetArray(alt)->null_count() == 0; } | |
104 | ||
105 | static int Is_sorted(SEXP alt) { return UNKNOWN_SORTEDNESS; } | |
106 | ||
107 | // What gets printed on .Internal(inspect(<the altrep object>)) | |
108 | static Rboolean Inspect(SEXP alt, int pre, int deep, int pvec, | |
109 | void (*inspect_subtree)(SEXP, int, int, int)) { | |
110 | const auto& array = GetArray(alt); | |
111 | Rprintf("arrow::Array<%s, %d nulls> len=%d, Array=<%p>\n", | |
112 | array->type()->ToString().c_str(), array->null_count(), array->length(), | |
113 | array.get()); | |
114 | return TRUE; | |
115 | } | |
116 | ||
117 | // Duplication is done by first materializing the vector and | |
118 | // then make a lazy duplicate of data2 | |
119 | static SEXP Duplicate(SEXP alt, Rboolean /* deep */) { | |
120 | return Rf_lazy_duplicate(Impl::Materialize(alt)); | |
121 | } | |
122 | ||
123 | static SEXP Coerce(SEXP alt, int type) { | |
124 | return Rf_coerceVector(Impl::Materialize(alt), type); | |
125 | } | |
126 | ||
127 | static SEXP Serialized_state(SEXP alt) { return Impl::Materialize(alt); } | |
128 | ||
129 | static SEXP Unserialize(SEXP /* class_ */, SEXP state) { return state; } | |
130 | }; | |
131 | ||
132 | // altrep R vector shadowing an primitive (int or double) Array. | |
133 | // | |
134 | // This tries as much as possible to directly use the data | |
135 | // from the Array and minimize data copies. | |
136 | template <int sexp_type> | |
137 | struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sexp_type>> { | |
138 | using Base = AltrepVectorBase<AltrepVectorPrimitive<sexp_type>>; | |
139 | ||
140 | // singleton altrep class description | |
141 | static R_altrep_class_t class_t; | |
142 | ||
143 | using c_type = typename std::conditional<sexp_type == REALSXP, double, int>::type; | |
144 | ||
145 | // Force materialization. After calling this, the data2 slot of the altrep | |
146 | // object contains a standard R vector with the same data, with | |
147 | // R sentinels where the Array has nulls. | |
148 | // | |
149 | // The Array remains available so that it can be used by Length(), Min(), etc ... | |
150 | static SEXP Materialize(SEXP alt) { | |
151 | if (!Base::IsMaterialized(alt)) { | |
152 | auto size = Base::Length(alt); | |
153 | ||
154 | // create a standard R vector | |
155 | SEXP copy = PROTECT(Rf_allocVector(sexp_type, size)); | |
156 | ||
157 | // copy the data from the array, through Get_region | |
158 | Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy))); | |
159 | ||
160 | // store as data2, this is now considered materialized | |
161 | R_set_altrep_data2(alt, copy); | |
162 | MARK_NOT_MUTABLE(copy); | |
163 | ||
164 | UNPROTECT(1); | |
165 | } | |
166 | return R_altrep_data2(alt); | |
167 | } | |
168 | ||
169 | // R calls this to get a pointer to the start of the vector data | |
170 | // but only if this is possible without allocating (in the R sense). | |
171 | static const void* Dataptr_or_null(SEXP alt) { | |
172 | // data2 has been created, and so the R sentinels are in place where the array has | |
173 | // nulls | |
174 | if (Base::IsMaterialized(alt)) { | |
175 | return DATAPTR_RO(R_altrep_data2(alt)); | |
176 | } | |
177 | ||
178 | // the Array has no nulls, we can directly return the start of its data | |
179 | const auto& array = GetArray(alt); | |
180 | if (array->null_count() == 0) { | |
181 | return reinterpret_cast<const void*>(array->data()->template GetValues<c_type>(1)); | |
182 | } | |
183 | ||
184 | // Otherwise: if the array has nulls and data2 has not been generated: give up | |
185 | return nullptr; | |
186 | } | |
187 | ||
188 | // R calls this to get a pointer to the start of the data, R allocations are allowed. | |
189 | static void* Dataptr(SEXP alt, Rboolean writeable) { | |
190 | // If the object hasn't been materialized, and the array has no | |
191 | // nulls we can directly point to the array data. | |
192 | if (!Base::IsMaterialized(alt)) { | |
193 | const auto& array = GetArray(alt); | |
194 | ||
195 | if (array->null_count() == 0) { | |
196 | return reinterpret_cast<void*>( | |
197 | const_cast<c_type*>(array->data()->template GetValues<c_type>(1))); | |
198 | } | |
199 | } | |
200 | ||
201 | // Otherwise we have to materialize and hand the pointer to data2 | |
202 | // | |
203 | // NOTE: this returns the DATAPTR() of data2 even in the case writeable = TRUE | |
204 | // | |
205 | // which is risky because C(++) clients of this object might | |
206 | // modify data2, and therefore make it diverge from the data of the Array, | |
207 | // but the object was marked as immutable on creation, so doing this is | |
208 | // disregarding the R api. | |
209 | // | |
210 | // Simply stop() when `writeable = TRUE` is too strong, e.g. this fails | |
211 | // identical() which calls DATAPTR() even though DATAPTR_RO() would | |
212 | // be enough | |
213 | return DATAPTR(Materialize(alt)); | |
214 | } | |
215 | ||
216 | // The value at position i | |
217 | static c_type Elt(SEXP alt, R_xlen_t i) { | |
218 | const auto& array = GetArray(alt); | |
219 | return array->IsNull(i) ? cpp11::na<c_type>() | |
220 | : array->data()->template GetValues<c_type>(1)[i]; | |
221 | } | |
222 | ||
223 | // R calls this when it wants data from position `i` to `i + n` copied into `buf` | |
224 | // The returned value is the number of values that were really copied | |
225 | // (this can be lower than n) | |
226 | static R_xlen_t Get_region(SEXP alt, R_xlen_t i, R_xlen_t n, c_type* buf) { | |
227 | // If we have data2, we can just copy the region into buf | |
228 | // using the standard Get_region for this R type | |
229 | if (Base::IsMaterialized(alt)) { | |
230 | return Standard_Get_region<c_type>(R_altrep_data2(alt), i, n, buf); | |
231 | } | |
232 | ||
233 | // The vector was not materialized, aka we don't have data2 | |
234 | // | |
235 | // In that case, we copy the data from the Array, and then | |
236 | // do a second pass to force the R sentinels for where the | |
237 | // array has nulls | |
238 | // | |
239 | // This only materialize the region, into buf. Not the entire vector. | |
240 | auto slice = GetArray(alt)->Slice(i, n); | |
241 | R_xlen_t ncopy = slice->length(); | |
242 | ||
243 | // first copy the data buffer | |
244 | memcpy(buf, slice->data()->template GetValues<c_type>(1), ncopy * sizeof(c_type)); | |
245 | ||
246 | // then set the R NA sentinels if needed | |
247 | if (slice->null_count() > 0) { | |
248 | internal::BitmapReader bitmap_reader(slice->null_bitmap()->data(), slice->offset(), | |
249 | ncopy); | |
250 | ||
251 | for (R_xlen_t j = 0; j < ncopy; j++, bitmap_reader.Next()) { | |
252 | if (bitmap_reader.IsNotSet()) { | |
253 | buf[j] = cpp11::na<c_type>(); | |
254 | } | |
255 | } | |
256 | } | |
257 | ||
258 | return ncopy; | |
259 | } | |
260 | ||
261 | static std::shared_ptr<arrow::compute::ScalarAggregateOptions> NaRmOptions( | |
262 | const std::shared_ptr<Array>& array, bool na_rm) { | |
263 | auto options = std::make_shared<arrow::compute::ScalarAggregateOptions>( | |
264 | arrow::compute::ScalarAggregateOptions::Defaults()); | |
265 | options->min_count = 0; | |
266 | options->skip_nulls = na_rm; | |
267 | return options; | |
268 | } | |
269 | ||
270 | template <bool Min> | |
271 | static SEXP MinMax(SEXP alt, Rboolean narm) { | |
272 | using data_type = typename std::conditional<sexp_type == REALSXP, double, int>::type; | |
273 | using scalar_type = | |
274 | typename std::conditional<sexp_type == INTSXP, Int32Scalar, DoubleScalar>::type; | |
275 | ||
276 | const auto& array = GetArray(alt); | |
277 | bool na_rm = narm == TRUE; | |
278 | auto n = array->length(); | |
279 | auto null_count = array->null_count(); | |
280 | if ((na_rm || n == 0) && null_count == n) { | |
281 | return Rf_ScalarReal(Min ? R_PosInf : R_NegInf); | |
282 | } | |
283 | if (!na_rm && null_count > 0) { | |
284 | return cpp11::as_sexp(cpp11::na<data_type>()); | |
285 | } | |
286 | ||
287 | auto options = NaRmOptions(array, na_rm); | |
288 | ||
289 | const auto& minmax = | |
290 | ValueOrStop(arrow::compute::CallFunction("min_max", {array}, options.get())); | |
291 | const auto& minmax_scalar = | |
292 | internal::checked_cast<const StructScalar&>(*minmax.scalar()); | |
293 | ||
294 | const auto& result_scalar = internal::checked_cast<const scalar_type&>( | |
295 | *ValueOrStop(minmax_scalar.field(Min ? "min" : "max"))); | |
296 | return cpp11::as_sexp(result_scalar.value); | |
297 | } | |
298 | ||
299 | static SEXP Min(SEXP alt, Rboolean narm) { return MinMax<true>(alt, narm); } | |
300 | ||
301 | static SEXP Max(SEXP alt, Rboolean narm) { return MinMax<false>(alt, narm); } | |
302 | ||
303 | static SEXP Sum(SEXP alt, Rboolean narm) { | |
304 | using data_type = typename std::conditional<sexp_type == REALSXP, double, int>::type; | |
305 | ||
306 | const auto& array = GetArray(alt); | |
307 | bool na_rm = narm == TRUE; | |
308 | auto null_count = array->null_count(); | |
309 | ||
310 | if (!na_rm && null_count > 0) { | |
311 | return cpp11::as_sexp(cpp11::na<data_type>()); | |
312 | } | |
313 | auto options = NaRmOptions(array, na_rm); | |
314 | ||
315 | const auto& sum = | |
316 | ValueOrStop(arrow::compute::CallFunction("sum", {array}, options.get())); | |
317 | ||
318 | if (sexp_type == INTSXP) { | |
319 | // When calling the "sum" function on an int32 array, we get an Int64 scalar | |
320 | // in case of overflow, make it a double like R | |
321 | int64_t value = internal::checked_cast<const Int64Scalar&>(*sum.scalar()).value; | |
322 | if (value <= INT32_MIN || value > INT32_MAX) { | |
323 | return Rf_ScalarReal(static_cast<double>(value)); | |
324 | } else { | |
325 | return Rf_ScalarInteger(static_cast<int>(value)); | |
326 | } | |
327 | } else { | |
328 | return Rf_ScalarReal( | |
329 | internal::checked_cast<const DoubleScalar&>(*sum.scalar()).value); | |
330 | } | |
331 | } | |
332 | }; | |
333 | template <int sexp_type> | |
334 | R_altrep_class_t AltrepVectorPrimitive<sexp_type>::class_t; | |
335 | ||
336 | // Implementation for string arrays | |
337 | template <typename Type> | |
338 | struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> { | |
339 | using Base = AltrepVectorBase<AltrepVectorString<Type>>; | |
340 | ||
341 | static R_altrep_class_t class_t; | |
342 | using StringArrayType = typename TypeTraits<Type>::ArrayType; | |
343 | ||
344 | // Helper class to convert to R strings | |
345 | struct RStringViewer { | |
346 | explicit RStringViewer(const std::shared_ptr<Array>& array) | |
347 | : array_(array), | |
348 | string_array_(internal::checked_cast<const StringArrayType*>(array.get())), | |
349 | strip_out_nuls_(GetBoolOption("arrow.skip_nul", false)), | |
350 | nul_was_stripped_(false) {} | |
351 | ||
352 | // convert the i'th string of the Array to an R string (CHARSXP) | |
353 | SEXP Convert(size_t i) { | |
354 | if (array_->IsNull(i)) { | |
355 | return NA_STRING; | |
356 | } | |
357 | ||
358 | view_ = string_array_->GetView(i); | |
359 | bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end(); | |
360 | ||
361 | if (no_nul) { | |
362 | return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8); | |
363 | } else if (strip_out_nuls_) { | |
364 | return ConvertStripNul(); | |
365 | } else { | |
366 | Error(); | |
367 | ||
368 | // not reached | |
369 | return R_NilValue; | |
370 | } | |
371 | } | |
372 | ||
373 | // strip the nuls and then convert to R string | |
374 | SEXP ConvertStripNul() { | |
375 | const char* old_string = view_.data(); | |
376 | ||
377 | size_t stripped_len = 0, nul_count = 0; | |
378 | ||
379 | for (size_t i = 0; i < view_.size(); i++) { | |
380 | if (old_string[i] == '\0') { | |
381 | ++nul_count; | |
382 | ||
383 | if (nul_count == 1) { | |
384 | // first nul spotted: allocate stripped string storage | |
385 | stripped_string_.assign(view_.begin(), view_.end()); | |
386 | stripped_len = i; | |
387 | } | |
388 | ||
389 | // don't copy old_string[i] (which is \0) into stripped_string | |
390 | continue; | |
391 | } | |
392 | ||
393 | if (nul_count > 0) { | |
394 | stripped_string_[stripped_len++] = old_string[i]; | |
395 | } | |
396 | } | |
397 | ||
398 | nul_was_stripped_ = true; | |
399 | return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8); | |
400 | } | |
401 | ||
402 | bool nul_was_stripped() const { return nul_was_stripped_; } | |
403 | ||
404 | // throw R error about embedded nul | |
405 | void Error() { | |
406 | stripped_string_ = "embedded nul in string: '"; | |
407 | for (char c : view_) { | |
408 | if (c) { | |
409 | stripped_string_ += c; | |
410 | } else { | |
411 | stripped_string_ += "\\0"; | |
412 | } | |
413 | } | |
414 | ||
415 | stripped_string_ += | |
416 | "'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul " | |
417 | "= TRUE)"; | |
418 | ||
419 | Rf_error(stripped_string_.c_str()); | |
420 | } | |
421 | ||
422 | const std::shared_ptr<Array>& array_; | |
423 | const StringArrayType* string_array_; | |
424 | std::string stripped_string_; | |
425 | const bool strip_out_nuls_; | |
426 | bool nul_was_stripped_; | |
427 | util::string_view view_; | |
428 | }; | |
429 | ||
430 | // Get a single string, as a CHARSXP SEXP, | |
431 | // either from data2 or directly from the Array | |
432 | static SEXP Elt(SEXP alt, R_xlen_t i) { | |
433 | if (Base::IsMaterialized(alt)) { | |
434 | return STRING_ELT(R_altrep_data2(alt), i); | |
435 | } | |
436 | ||
437 | BEGIN_CPP11 | |
438 | ||
439 | const auto& array = GetArray(alt); | |
440 | RStringViewer r_string_viewer(array); | |
441 | ||
442 | // r_string_viewer.Convert(i) might jump so it's wrapped | |
443 | // in cpp11::unwind_protect() so that string_viewer | |
444 | // can be properly destructed before the unwinding continues | |
445 | SEXP s = NA_STRING; | |
446 | cpp11::unwind_protect([&]() { | |
447 | s = r_string_viewer.Convert(i); | |
448 | if (r_string_viewer.nul_was_stripped()) { | |
449 | cpp11::warning("Stripping '\\0' (nul) from character vector"); | |
450 | } | |
451 | }); | |
452 | return s; | |
453 | ||
454 | END_CPP11 | |
455 | } | |
456 | ||
457 | static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); } | |
458 | ||
459 | static SEXP Materialize(SEXP alt) { | |
460 | if (Base::IsMaterialized(alt)) { | |
461 | return R_altrep_data2(alt); | |
462 | } | |
463 | ||
464 | BEGIN_CPP11 | |
465 | ||
466 | const auto& array = GetArray(alt); | |
467 | R_xlen_t n = array->length(); | |
468 | SEXP data2 = PROTECT(Rf_allocVector(STRSXP, n)); | |
469 | MARK_NOT_MUTABLE(data2); | |
470 | ||
471 | RStringViewer r_string_viewer(array); | |
472 | ||
473 | // r_string_viewer.Convert(i) might jump so we have to | |
474 | // wrap it in unwind_protect() to: | |
475 | // - correctly destruct the C++ objects | |
476 | // - resume the unwinding | |
477 | cpp11::unwind_protect([&]() { | |
478 | for (R_xlen_t i = 0; i < n; i++) { | |
479 | SET_STRING_ELT(data2, i, r_string_viewer.Convert(i)); | |
480 | } | |
481 | ||
482 | if (r_string_viewer.nul_was_stripped()) { | |
483 | cpp11::warning("Stripping '\\0' (nul) from character vector"); | |
484 | } | |
485 | }); | |
486 | ||
487 | // only set to data2 if all the values have been converted | |
488 | R_set_altrep_data2(alt, data2); | |
489 | UNPROTECT(1); // data2 | |
490 | ||
491 | return data2; | |
492 | ||
493 | END_CPP11 | |
494 | } | |
495 | ||
496 | static const void* Dataptr_or_null(SEXP alt) { | |
497 | if (Base::IsMaterialized(alt)) return DATAPTR(R_altrep_data2(alt)); | |
498 | ||
499 | // otherwise give up | |
500 | return nullptr; | |
501 | } | |
502 | ||
503 | static void Set_elt(SEXP alt, R_xlen_t i, SEXP v) { | |
504 | Rf_error("ALTSTRING objects of type <arrow::array_string_vector> are immutable"); | |
505 | } | |
506 | }; | |
507 | ||
508 | template <typename Type> | |
509 | R_altrep_class_t AltrepVectorString<Type>::class_t; | |
510 | ||
511 | // initialize altrep, altvec, altreal, and altinteger methods | |
512 | template <typename AltrepClass> | |
513 | void InitAltrepMethods(R_altrep_class_t class_t, DllInfo* dll) { | |
514 | R_set_altrep_Length_method(class_t, AltrepClass::Length); | |
515 | R_set_altrep_Inspect_method(class_t, AltrepClass::Inspect); | |
516 | R_set_altrep_Duplicate_method(class_t, AltrepClass::Duplicate); | |
517 | R_set_altrep_Serialized_state_method(class_t, AltrepClass::Serialized_state); | |
518 | R_set_altrep_Unserialize_method(class_t, AltrepClass::Unserialize); | |
519 | R_set_altrep_Coerce_method(class_t, AltrepClass::Coerce); | |
520 | } | |
521 | ||
522 | template <typename AltrepClass> | |
523 | void InitAltvecMethods(R_altrep_class_t class_t, DllInfo* dll) { | |
524 | R_set_altvec_Dataptr_method(class_t, AltrepClass::Dataptr); | |
525 | R_set_altvec_Dataptr_or_null_method(class_t, AltrepClass::Dataptr_or_null); | |
526 | } | |
527 | ||
528 | template <typename AltrepClass> | |
529 | void InitAltRealMethods(R_altrep_class_t class_t, DllInfo* dll) { | |
530 | R_set_altreal_No_NA_method(class_t, AltrepClass::No_NA); | |
531 | R_set_altreal_Is_sorted_method(class_t, AltrepClass::Is_sorted); | |
532 | ||
533 | R_set_altreal_Sum_method(class_t, AltrepClass::Sum); | |
534 | R_set_altreal_Min_method(class_t, AltrepClass::Min); | |
535 | R_set_altreal_Max_method(class_t, AltrepClass::Max); | |
536 | ||
537 | R_set_altreal_Elt_method(class_t, AltrepClass::Elt); | |
538 | R_set_altreal_Get_region_method(class_t, AltrepClass::Get_region); | |
539 | } | |
540 | ||
541 | template <typename AltrepClass> | |
542 | void InitAltIntegerMethods(R_altrep_class_t class_t, DllInfo* dll) { | |
543 | R_set_altinteger_No_NA_method(class_t, AltrepClass::No_NA); | |
544 | R_set_altinteger_Is_sorted_method(class_t, AltrepClass::Is_sorted); | |
545 | ||
546 | R_set_altinteger_Sum_method(class_t, AltrepClass::Sum); | |
547 | R_set_altinteger_Min_method(class_t, AltrepClass::Min); | |
548 | R_set_altinteger_Max_method(class_t, AltrepClass::Max); | |
549 | ||
550 | R_set_altinteger_Elt_method(class_t, AltrepClass::Elt); | |
551 | R_set_altinteger_Get_region_method(class_t, AltrepClass::Get_region); | |
552 | } | |
553 | ||
554 | template <typename AltrepClass> | |
555 | void InitAltRealClass(DllInfo* dll, const char* name) { | |
556 | AltrepClass::class_t = R_make_altreal_class(name, "arrow", dll); | |
557 | InitAltrepMethods<AltrepClass>(AltrepClass::class_t, dll); | |
558 | InitAltvecMethods<AltrepClass>(AltrepClass::class_t, dll); | |
559 | InitAltRealMethods<AltrepClass>(AltrepClass::class_t, dll); | |
560 | } | |
561 | ||
562 | template <typename AltrepClass> | |
563 | void InitAltIntegerClass(DllInfo* dll, const char* name) { | |
564 | AltrepClass::class_t = R_make_altinteger_class(name, "arrow", dll); | |
565 | InitAltrepMethods<AltrepClass>(AltrepClass::class_t, dll); | |
566 | InitAltvecMethods<AltrepClass>(AltrepClass::class_t, dll); | |
567 | InitAltIntegerMethods<AltrepClass>(AltrepClass::class_t, dll); | |
568 | } | |
569 | ||
570 | template <typename AltrepClass> | |
571 | void InitAltStringClass(DllInfo* dll, const char* name) { | |
572 | AltrepClass::class_t = R_make_altstring_class(name, "arrow", dll); | |
573 | R_set_altrep_Length_method(AltrepClass::class_t, AltrepClass::Length); | |
574 | R_set_altrep_Inspect_method(AltrepClass::class_t, AltrepClass::Inspect); | |
575 | R_set_altrep_Duplicate_method(AltrepClass::class_t, AltrepClass::Duplicate); | |
576 | R_set_altrep_Serialized_state_method(AltrepClass::class_t, | |
577 | AltrepClass::Serialized_state); | |
578 | R_set_altrep_Unserialize_method(AltrepClass::class_t, AltrepClass::Unserialize); | |
579 | R_set_altrep_Coerce_method(AltrepClass::class_t, AltrepClass::Coerce); | |
580 | ||
581 | R_set_altvec_Dataptr_method(AltrepClass::class_t, AltrepClass::Dataptr); | |
582 | R_set_altvec_Dataptr_or_null_method(AltrepClass::class_t, AltrepClass::Dataptr_or_null); | |
583 | ||
584 | R_set_altstring_Elt_method(AltrepClass::class_t, AltrepClass::Elt); | |
585 | R_set_altstring_Set_elt_method(AltrepClass::class_t, AltrepClass::Set_elt); | |
586 | R_set_altstring_No_NA_method(AltrepClass::class_t, AltrepClass::No_NA); | |
587 | R_set_altstring_Is_sorted_method(AltrepClass::class_t, AltrepClass::Is_sorted); | |
588 | } | |
589 | ||
590 | } // namespace | |
591 | ||
592 | // initialize the altrep classes | |
593 | void Init_Altrep_classes(DllInfo* dll) { | |
594 | InitAltRealClass<AltrepVectorPrimitive<REALSXP>>(dll, "arrow::array_dbl_vector"); | |
595 | InitAltIntegerClass<AltrepVectorPrimitive<INTSXP>>(dll, "arrow::array_int_vector"); | |
596 | ||
597 | InitAltStringClass<AltrepVectorString<StringType>>(dll, "arrow::array_string_vector"); | |
598 | InitAltStringClass<AltrepVectorString<LargeStringType>>( | |
599 | dll, "arrow::array_large_string_vector"); | |
600 | } | |
601 | ||
602 | // return an altrep R vector that shadows the array if possible | |
603 | SEXP MakeAltrepVector(const std::shared_ptr<ChunkedArray>& chunked_array) { | |
604 | // special case when there is only one array | |
605 | if (chunked_array->num_chunks() == 1) { | |
606 | const auto& array = chunked_array->chunk(0); | |
607 | // using altrep if | |
608 | // - the arrow.use_altrep is set to TRUE or unset (implicit TRUE) | |
609 | // - the array has at least one element | |
610 | if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0) { | |
611 | switch (array->type()->id()) { | |
612 | case arrow::Type::DOUBLE: | |
613 | return altrep::AltrepVectorPrimitive<REALSXP>::Make(array); | |
614 | ||
615 | case arrow::Type::INT32: | |
616 | return altrep::AltrepVectorPrimitive<INTSXP>::Make(array); | |
617 | ||
618 | case arrow::Type::STRING: | |
619 | return altrep::AltrepVectorString<StringType>::Make(array); | |
620 | ||
621 | case arrow::Type::LARGE_STRING: | |
622 | return altrep::AltrepVectorString<LargeStringType>::Make(array); | |
623 | ||
624 | default: | |
625 | break; | |
626 | } | |
627 | } | |
628 | } | |
629 | return R_NilValue; | |
630 | } | |
631 | ||
632 | bool is_arrow_altrep(SEXP x) { | |
633 | if (ALTREP(x)) { | |
634 | SEXP info = ALTREP_CLASS_SERIALIZED_CLASS(ALTREP_CLASS(x)); | |
635 | SEXP pkg = ALTREP_SERIALIZED_CLASS_PKGSYM(info); | |
636 | ||
637 | if (pkg == symbols::arrow) return true; | |
638 | } | |
639 | ||
640 | return false; | |
641 | } | |
642 | ||
643 | std::shared_ptr<Array> vec_to_arrow_altrep_bypass(SEXP x) { | |
644 | if (is_arrow_altrep(x)) { | |
645 | return GetArray(x); | |
646 | } | |
647 | ||
648 | return nullptr; | |
649 | } | |
650 | ||
651 | } // namespace altrep | |
652 | } // namespace r | |
653 | } // namespace arrow | |
654 | ||
655 | #else // HAS_ALTREP | |
656 | ||
657 | namespace arrow { | |
658 | namespace r { | |
659 | namespace altrep { | |
660 | ||
661 | // return an altrep R vector that shadows the array if possible | |
662 | SEXP MakeAltrepVector(const std::shared_ptr<ChunkedArray>& chunked_array) { | |
663 | return R_NilValue; | |
664 | } | |
665 | ||
666 | bool is_arrow_altrep(SEXP) { return false; } | |
667 | ||
668 | std::shared_ptr<Array> vec_to_arrow_altrep_bypass(SEXP x) { return nullptr; } | |
669 | ||
670 | } // namespace altrep | |
671 | } // namespace r | |
672 | } // namespace arrow | |
673 | ||
674 | #endif | |
675 | ||
676 | // [[arrow::export]] | |
677 | void test_SET_STRING_ELT(SEXP s) { SET_STRING_ELT(s, 0, Rf_mkChar("forbidden")); } | |
678 | ||
679 | // [[arrow::export]] | |
680 | bool test_same_Array(SEXP x, SEXP y) { | |
681 | auto* p_x = reinterpret_cast<std::shared_ptr<arrow::Array>*>(x); | |
682 | auto* p_y = reinterpret_cast<std::shared_ptr<arrow::Array>*>(y); | |
683 | ||
684 | return p_x->get() == p_y->get(); | |
685 | } | |
686 | ||
687 | // [[arrow::export]] | |
688 | bool is_arrow_altrep(SEXP x) { return arrow::r::altrep::is_arrow_altrep(x); } | |
689 | ||
690 | #endif |