]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/r/tests/testthat/test-Array.R
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / r / tests / testthat / test-Array.R
1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
16 # under the License.
17
18 test_that("Integer Array", {
19 ints <- c(1:10, 1:10, 1:5)
20 x <- expect_array_roundtrip(ints, int32())
21 })
22
23 test_that("binary Array", {
24 # if the type is given, we just need a list of raw vectors
25 bin <- list(as.raw(1:10), as.raw(1:10))
26 expect_array_roundtrip(bin, binary(), as = binary())
27 expect_array_roundtrip(bin, large_binary(), as = large_binary())
28 expect_array_roundtrip(bin, fixed_size_binary(10), as = fixed_size_binary(10))
29
30 bin[[1L]] <- as.raw(1:20)
31 expect_error(Array$create(bin, fixed_size_binary(10)))
32
33 # otherwise the arrow type is deduced from the R classes
34 bin <- vctrs::new_vctr(
35 list(as.raw(1:10), as.raw(11:20)),
36 class = "arrow_binary"
37 )
38 expect_array_roundtrip(bin, binary())
39
40 bin <- vctrs::new_vctr(
41 list(as.raw(1:10), as.raw(11:20)),
42 class = "arrow_large_binary"
43 )
44 expect_array_roundtrip(bin, large_binary())
45
46 bin <- vctrs::new_vctr(
47 list(as.raw(1:10), as.raw(11:20)),
48 class = "arrow_fixed_size_binary",
49 byte_width = 10L
50 )
51 expect_array_roundtrip(bin, fixed_size_binary(byte_width = 10))
52
53 # degenerate cases
54 skip_on_valgrind() # valgrind errors on these tests ARROW-12638
55 bin <- vctrs::new_vctr(
56 list(1:10),
57 class = "arrow_binary"
58 )
59 expect_error(Array$create(bin))
60
61 bin <- vctrs::new_vctr(
62 list(1:10),
63 ptype = raw(),
64 class = "arrow_large_binary"
65 )
66 expect_error(Array$create(bin))
67
68 bin <- vctrs::new_vctr(
69 list(1:10),
70 class = "arrow_fixed_size_binary",
71 byte_width = 10
72 )
73 expect_error(Array$create(bin))
74
75 bin <- vctrs::new_vctr(
76 list(as.raw(1:5)),
77 class = "arrow_fixed_size_binary",
78 byte_width = 10
79 )
80 expect_error(Array$create(bin))
81
82 bin <- vctrs::new_vctr(
83 list(as.raw(1:5)),
84 class = "arrow_fixed_size_binary"
85 )
86 expect_error(Array$create(bin))
87 })
88
89 test_that("Slice() and RangeEquals()", {
90 ints <- c(1:10, 101:110, 201:205)
91 x <- Array$create(ints)
92
93 y <- x$Slice(10)
94 expect_equal(y$type, int32())
95 expect_equal(length(y), 15L)
96 expect_as_vector(y, c(101:110, 201:205))
97 expect_true(x$RangeEquals(y, 10, 24))
98 expect_false(x$RangeEquals(y, 9, 23))
99 expect_false(x$RangeEquals(y, 11, 24))
100
101 z <- x$Slice(10, 5)
102 expect_as_vector(z, c(101:105))
103 expect_true(x$RangeEquals(z, 10, 15, 0))
104
105 # Input validation
106 expect_error(x$Slice("ten"))
107 expect_error(x$Slice(NA_integer_), "Slice 'offset' cannot be NA")
108 expect_error(x$Slice(NA), "Slice 'offset' cannot be NA")
109 expect_error(x$Slice(10, "ten"))
110 expect_error(x$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
111 expect_error(x$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
112 expect_error(x$Slice(c(10, 10)))
113 expect_error(x$Slice(10, c(10, 10)))
114 expect_error(x$Slice(1000), "Slice 'offset' greater than array length")
115 expect_error(x$Slice(-1), "Slice 'offset' cannot be negative")
116 expect_error(z$Slice(10, 10), "Slice 'offset' greater than array length")
117 expect_error(x$Slice(10, -1), "Slice 'length' cannot be negative")
118 expect_error(x$Slice(-1, 10), "Slice 'offset' cannot be negative")
119
120 expect_warning(x$Slice(10, 15), NA)
121 expect_warning(
122 overslice <- x$Slice(10, 16),
123 "Slice 'length' greater than available length"
124 )
125 expect_equal(length(overslice), 15)
126 expect_warning(z$Slice(2, 10), "Slice 'length' greater than available length")
127
128 expect_error(x$RangeEquals(10, 24, 0), 'other must be a "Array"')
129 expect_error(x$RangeEquals(y, NA, 24), "'start_idx' cannot be NA")
130 expect_error(x$RangeEquals(y, 10, NA), "'end_idx' cannot be NA")
131 expect_error(x$RangeEquals(y, 10, 24, NA), "'other_start_idx' cannot be NA")
132 expect_error(x$RangeEquals(y, "ten", 24))
133
134 skip("TODO: (if anyone uses RangeEquals)")
135 expect_error(x$RangeEquals(y, 10, 2400, 0)) # does not error
136 expect_error(x$RangeEquals(y, 1000, 24, 0)) # does not error
137 expect_error(x$RangeEquals(y, 10, 24, 1000)) # does not error
138 })
139
140 test_that("Double Array", {
141 dbls <- c(1, 2, 3, 4, 5, 6)
142 x_dbl <- expect_array_roundtrip(dbls, float64())
143 })
144
145 test_that("Array print method includes type", {
146 x <- Array$create(c(1:10, 1:10, 1:5))
147 expect_output(print(x), "Array\n<int32>\n[\n", fixed = TRUE)
148 })
149
150 test_that("Array supports NA", {
151 x_int <- Array$create(as.integer(c(1:10, NA)))
152 x_dbl <- Array$create(as.numeric(c(1:10, NA)))
153 expect_true(x_int$IsValid(0))
154 expect_true(x_dbl$IsValid(0L))
155 expect_true(x_int$IsNull(10L))
156 expect_true(x_dbl$IsNull(10))
157
158 expect_as_vector(is.na(x_int), c(rep(FALSE, 10), TRUE))
159 expect_as_vector(is.na(x_dbl), c(rep(FALSE, 10), TRUE))
160
161 # Input validation
162 expect_error(x_int$IsValid("ten"))
163 expect_error(x_int$IsNull("ten"))
164 expect_error(x_int$IsValid(c(10, 10)))
165 expect_error(x_int$IsNull(c(10, 10)))
166 expect_error(x_int$IsValid(NA), "'i' cannot be NA")
167 expect_error(x_int$IsNull(NA), "'i' cannot be NA")
168 expect_error(x_int$IsValid(1000), "subscript out of bounds")
169 expect_error(x_int$IsValid(-1), "subscript out of bounds")
170 expect_error(x_int$IsNull(1000), "subscript out of bounds")
171 expect_error(x_int$IsNull(-1), "subscript out of bounds")
172 })
173
174 test_that("Array support null type (ARROW-7064)", {
175 expect_array_roundtrip(vctrs::unspecified(10), null())
176 })
177
178 test_that("Array supports logical vectors (ARROW-3341)", {
179 # with NA
180 x <- sample(c(TRUE, FALSE, NA), 1000, replace = TRUE)
181 expect_array_roundtrip(x, bool())
182
183 # without NA
184 x <- sample(c(TRUE, FALSE), 1000, replace = TRUE)
185 expect_array_roundtrip(x, bool())
186 })
187
188 test_that("Array supports character vectors (ARROW-3339)", {
189 # without NA
190 expect_array_roundtrip(c("itsy", "bitsy", "spider"), utf8())
191 expect_array_roundtrip(c("itsy", "bitsy", "spider"), large_utf8(), as = large_utf8())
192
193 # with NA
194 expect_array_roundtrip(c("itsy", NA, "spider"), utf8())
195 expect_array_roundtrip(c("itsy", NA, "spider"), large_utf8(), as = large_utf8())
196 })
197
198 test_that("Character vectors > 2GB become large_utf8", {
199 skip_on_cran()
200 skip_if_not_running_large_memory_tests()
201 big <- make_big_string()
202 expect_array_roundtrip(big, large_utf8())
203 })
204
205 test_that("empty arrays are supported", {
206 expect_array_roundtrip(character(), utf8())
207 expect_array_roundtrip(character(), large_utf8(), as = large_utf8())
208 expect_array_roundtrip(integer(), int32())
209 expect_array_roundtrip(numeric(), float64())
210 expect_array_roundtrip(factor(character()), dictionary(int8(), utf8()))
211 expect_array_roundtrip(logical(), bool())
212 })
213
214 test_that("array with all nulls are supported", {
215 nas <- c(NA, NA)
216 expect_array_roundtrip(as.character(nas), utf8())
217 expect_array_roundtrip(as.integer(nas), int32())
218 expect_array_roundtrip(as.numeric(nas), float64())
219 expect_array_roundtrip(as.factor(nas), dictionary(int8(), utf8()))
220 expect_array_roundtrip(as.logical(nas), bool())
221 })
222
223 test_that("Array supports unordered factors (ARROW-3355)", {
224 # without NA
225 f <- factor(c("itsy", "bitsy", "spider", "spider"))
226 expect_array_roundtrip(f, dictionary(int8(), utf8()))
227
228 # with NA
229 f <- factor(c("itsy", "bitsy", NA, "spider", "spider"))
230 expect_array_roundtrip(f, dictionary(int8(), utf8()))
231 })
232
233 test_that("Array supports ordered factors (ARROW-3355)", {
234 # without NA
235 f <- ordered(c("itsy", "bitsy", "spider", "spider"))
236 arr_fac <- expect_array_roundtrip(f, dictionary(int8(), utf8(), ordered = TRUE))
237 expect_true(arr_fac$ordered)
238
239 # with NA
240 f <- ordered(c("itsy", "bitsy", NA, "spider", "spider"))
241 expect_array_roundtrip(f, dictionary(int8(), utf8(), ordered = TRUE))
242 })
243
244 test_that("array supports Date (ARROW-3340)", {
245 d <- Sys.Date() + 1:10
246 expect_array_roundtrip(d, date32())
247
248 d[5] <- NA
249 expect_array_roundtrip(d, date32())
250 })
251
252 test_that("array supports POSIXct (ARROW-3340)", {
253 times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10
254 expect_array_roundtrip(times, timestamp("us", "UTC"))
255
256 times[5] <- NA
257 expect_array_roundtrip(times, timestamp("us", "UTC"))
258
259 times2 <- lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10
260 expect_array_roundtrip(times2, timestamp("us", "US/Eastern"))
261 })
262
263 test_that("array supports POSIXct without timezone", {
264 # Make sure timezone is not set
265 withr::with_envvar(c(TZ = ""), {
266 times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
267 expect_array_roundtrip(times, timestamp("us", ""))
268
269 # Also test the INTSXP code path
270 skip("Ingest_POSIXct only implemented for REALSXP")
271 times_int <- as.integer(times)
272 attributes(times_int) <- attributes(times)
273 expect_array_roundtrip(times_int, timestamp("us", ""))
274 })
275 })
276
277 test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", {
278 # Write a feather file as that's what the initial bug report used
279 df <- tibble::tibble(
280 no_tz = lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10,
281 yes_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas") + 1:10
282 )
283 if (!identical(Sys.timezone(), "Pacific/Marquesas")) {
284 # Confirming that the columns are in fact different
285 expect_false(any(df$no_tz == df$yes_tz))
286 }
287 feather_file <- tempfile()
288 on.exit(unlink(feather_file))
289 write_feather(df, feather_file)
290 expect_identical(read_feather(feather_file), df)
291 })
292
293 test_that("array supports integer64", {
294 x <- bit64::as.integer64(1:10) + MAX_INT
295 expect_array_roundtrip(x, int64())
296
297 x[4] <- NA
298 expect_array_roundtrip(x, int64())
299
300 # all NA int64 (ARROW-3795)
301 all_na <- Array$create(bit64::as.integer64(NA))
302 expect_type_equal(all_na, int64())
303 expect_true(as.vector(is.na(all_na)))
304 })
305
306 test_that("array supports difftime", {
307 time <- hms::hms(56, 34, 12)
308 expect_array_roundtrip(c(time, time), time32("s"))
309 expect_array_roundtrip(vctrs::vec_c(NA, time), time32("s"))
310 })
311
312 test_that("support for NaN (ARROW-3615)", {
313 x <- c(1, NA, NaN, -1)
314 y <- Array$create(x)
315 expect_true(y$IsValid(2))
316 expect_equal(y$null_count, 1L)
317 })
318
319 test_that("is.nan() evalutes to FALSE on NA (for consistency with base R)", {
320 x <- c(1.0, NA, NaN, -1.0)
321 compare_expression(is.nan(.input), x)
322 })
323
324 test_that("is.nan() evalutes to FALSE on non-floats (for consistency with base R)", {
325 x <- c(1L, 2L, 3L)
326 y <- c("foo", "bar")
327 compare_expression(is.nan(.input), x)
328 compare_expression(is.nan(.input), y)
329 })
330
331 test_that("is.na() evalutes to TRUE on NaN (for consistency with base R)", {
332 x <- c(1, NA, NaN, -1)
333 compare_expression(is.na(.input), x)
334 })
335
336 test_that("integer types casts (ARROW-3741)", {
337 # Defining some type groups for use here and in the following tests
338 int_types <- c(int8(), int16(), int32(), int64())
339 uint_types <- c(uint8(), uint16(), uint32(), uint64())
340 float_types <- c(float32(), float64()) # float16() not really supported in C++ yet
341
342 a <- Array$create(c(1:10, NA))
343 for (type in c(int_types, uint_types)) {
344 casted <- a$cast(type)
345 expect_equal(casted$type, type)
346 expect_identical(as.vector(is.na(casted)), c(rep(FALSE, 10), TRUE))
347 }
348 })
349
350 test_that("integer types cast safety (ARROW-3741, ARROW-5541)", {
351 a <- Array$create(-(1:10))
352 for (type in uint_types) {
353 expect_error(a$cast(type), regexp = "Integer value -1 not in range")
354 expect_error(a$cast(type, safe = FALSE), NA)
355 }
356 })
357
358 test_that("float types casts (ARROW-3741)", {
359 x <- c(1, 2, 3, NA)
360 a <- Array$create(x)
361 for (type in float_types) {
362 casted <- a$cast(type)
363 expect_equal(casted$type, type)
364 expect_identical(as.vector(is.na(casted)), c(rep(FALSE, 3), TRUE))
365 expect_identical(as.vector(casted), x)
366 }
367 })
368
369 test_that("cast to half float works", {
370 skip("Need halffloat support: https://issues.apache.org/jira/browse/ARROW-3802")
371 a <- Array$create(1:4)
372 a_f16 <- a$cast(float16())
373 expect_type_equal(a_16$type, float16())
374 })
375
376 test_that("cast input validation", {
377 a <- Array$create(1:4)
378 expect_error(a$cast("not a type"), "type must be a DataType, not character")
379 })
380
381 test_that("Array$create() supports the type= argument. conversion from INTSXP and int64 to all int types", {
382 num_int32 <- 12L
383 num_int64 <- bit64::as.integer64(10)
384
385 types <- c(
386 int_types,
387 uint_types,
388 float_types,
389 double() # not actually a type, a base R function but should be alias for float64
390 )
391 for (type in types) {
392 expect_type_equal(Array$create(num_int32, type = type)$type, as_type(type))
393 expect_type_equal(Array$create(num_int64, type = type)$type, as_type(type))
394 }
395
396 # Input validation
397 expect_error(
398 Array$create(5, type = "not a type"),
399 "type must be a DataType, not character"
400 )
401 })
402
403 test_that("Array$create() aborts on overflow", {
404 expect_error(Array$create(128L, type = int8()))
405 expect_error(Array$create(-129L, type = int8()))
406
407 expect_error(Array$create(256L, type = uint8()))
408 expect_error(Array$create(-1L, type = uint8()))
409
410 expect_error(Array$create(32768L, type = int16()))
411 expect_error(Array$create(-32769L, type = int16()))
412
413 expect_error(Array$create(65536L, type = uint16()))
414 expect_error(Array$create(-1L, type = uint16()))
415
416 expect_error(Array$create(65536L, type = uint16()))
417 expect_error(Array$create(-1L, type = uint16()))
418
419 expect_error(Array$create(bit64::as.integer64(2^31), type = int32()))
420 expect_error(Array$create(bit64::as.integer64(2^32), type = uint32()))
421 })
422
423 test_that("Array$create() does not convert doubles to integer", {
424 for (type in c(int_types, uint_types)) {
425 a <- Array$create(10, type = type)
426 expect_type_equal(a$type, type)
427 expect_true(as.vector(a) == 10L)
428 }
429 })
430
431 test_that("Array$create() converts raw vectors to uint8 arrays (ARROW-3794)", {
432 expect_type_equal(Array$create(as.raw(1:10))$type, uint8())
433 })
434
435 test_that("Array<int8>$as_vector() converts to integer (ARROW-3794)", {
436 i8 <- (-128):127
437 a <- Array$create(i8)$cast(int8())
438 expect_type_equal(a, int8())
439 expect_as_vector(a, i8)
440
441 u8 <- 0:255
442 a <- Array$create(u8)$cast(uint8())
443 expect_type_equal(a, uint8())
444 expect_as_vector(a, u8)
445 })
446
447 test_that("Arrays of {,u}int{32,64} convert to integer if they can fit", {
448 u32 <- Array$create(1L)$cast(uint32())
449 expect_identical(as.vector(u32), 1L)
450
451 u64 <- Array$create(1L)$cast(uint64())
452 expect_identical(as.vector(u64), 1L)
453
454 i64 <- Array$create(bit64::as.integer64(1:10))
455 expect_identical(as.vector(i64), 1:10)
456 })
457
458 test_that("Arrays of uint{32,64} convert to numeric if they can't fit integer", {
459 u32 <- Array$create(bit64::as.integer64(1) + MAX_INT)$cast(uint32())
460 expect_identical(as.vector(u32), 1 + MAX_INT)
461
462 u64 <- Array$create(bit64::as.integer64(1) + MAX_INT)$cast(uint64())
463 expect_identical(as.vector(u64), 1 + MAX_INT)
464 })
465
466 test_that("Array$create() recognise arrow::Array (ARROW-3815)", {
467 a <- Array$create(1:10)
468 expect_equal(a, Array$create(a))
469 })
470
471 test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", {
472 df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
473 a <- Array$create(df)
474 expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8()))
475 expect_as_vector(a, df)
476
477 df <- structure(
478 list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")),
479 class = "data.frame", row.names = c(NA, -1L)
480 )
481 a <- Array$create(df)
482 expect_type_equal(a$type, struct(col = list_of(list_of(list_of(float64())))))
483 expect_as_vector(a, df, ignore_attr = TRUE)
484 })
485
486 test_that("StructArray methods", {
487 df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
488 a <- Array$create(df)
489 expect_equal(a$x, Array$create(df$x))
490 expect_equal(a[["x"]], Array$create(df$x))
491 expect_equal(a[[1]], Array$create(df$x))
492 expect_identical(names(a), c("x", "y", "z"))
493 expect_identical(dim(a), c(10L, 3L))
494 })
495
496 test_that("Array$create() can handle data frame with custom struct type (not inferred)", {
497 df <- tibble::tibble(x = 1:10, y = 1:10)
498 type <- struct(x = float64(), y = int16())
499 a <- Array$create(df, type = type)
500 expect_type_equal(a$type, type)
501
502 type <- struct(x = float64(), y = int16(), z = int32())
503 expect_error(
504 Array$create(df, type = type),
505 regexp = "Number of fields in struct.* incompatible with number of columns in the data frame"
506 )
507
508 type <- struct(y = int16(), x = float64())
509 expect_error(
510 Array$create(df, type = type),
511 regexp = "Field name in position.*does not match the name of the column of the data frame"
512 )
513
514 type <- struct(x = float64(), y = utf8())
515 expect_error(Array$create(df, type = type), regexp = "Invalid")
516 })
517
518 test_that("Array$create() supports tibble with no columns (ARROW-8354)", {
519 df <- tibble::tibble()
520 expect_equal(Array$create(df)$as_vector(), df)
521 })
522
523 test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
524 # Should be able to create an empty list with a type hint.
525 expect_r6_class(Array$create(list(), list_of(bool())), "ListArray")
526
527 # logical
528 expect_array_roundtrip(list(NA), list_of(bool()))
529 expect_array_roundtrip(list(logical(0)), list_of(bool()))
530 expect_array_roundtrip(list(c(TRUE), c(FALSE), c(FALSE, TRUE)), list_of(bool()))
531 expect_array_roundtrip(list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)), list_of(bool()))
532
533 # integer
534 expect_array_roundtrip(list(NA_integer_), list_of(int32()))
535 expect_array_roundtrip(list(integer(0)), list_of(int32()))
536 expect_array_roundtrip(list(1:2, 3:4, 12:18), list_of(int32()))
537 expect_array_roundtrip(list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)), list_of(int32()))
538
539 # numeric
540 expect_array_roundtrip(list(NA_real_), list_of(float64()))
541 expect_array_roundtrip(list(numeric(0)), list_of(float64()))
542 expect_array_roundtrip(list(1, c(2, 3), 4), list_of(float64()))
543 expect_array_roundtrip(list(1, numeric(0), c(2, 3, NA_real_), 4), list_of(float64()))
544
545 # character
546 expect_array_roundtrip(list(NA_character_), list_of(utf8()))
547 expect_array_roundtrip(list(character(0)), list_of(utf8()))
548 expect_array_roundtrip(list("itsy", c("bitsy", "spider"), c("is")), list_of(utf8()))
549 expect_array_roundtrip(list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")), list_of(utf8()))
550
551 # factor
552 expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), list_of(dictionary(int8(), utf8())))
553 expect_array_roundtrip(list(factor(NA, levels = c("a", "b"))), list_of(dictionary(int8(), utf8())))
554
555 # struct
556 expect_array_roundtrip(
557 list(tibble::tibble(a = integer(0), b = integer(0), c = character(0), d = logical(0))),
558 list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()))
559 )
560 expect_array_roundtrip(
561 list(tibble::tibble(a = list(integer()))),
562 list_of(struct(a = list_of(int32())))
563 )
564 # degenerated data frame
565 df <- structure(list(x = 1:2, y = 1), class = "data.frame", row.names = 1:2)
566 expect_error(Array$create(list(df)))
567 })
568
569 test_that("Array$create() handles vector -> large list arrays", {
570 # Should be able to create an empty list with a type hint.
571 expect_r6_class(Array$create(list(), type = large_list_of(bool())), "LargeListArray")
572
573 # logical
574 expect_array_roundtrip(list(NA), large_list_of(bool()), as = large_list_of(bool()))
575 expect_array_roundtrip(list(logical(0)), large_list_of(bool()), as = large_list_of(bool()))
576 expect_array_roundtrip(list(c(TRUE), c(FALSE), c(FALSE, TRUE)), large_list_of(bool()), as = large_list_of(bool()))
577 expect_array_roundtrip(
578 list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)),
579 large_list_of(bool()),
580 as = large_list_of(bool())
581 )
582
583 # integer
584 expect_array_roundtrip(list(NA_integer_), large_list_of(int32()), as = large_list_of(int32()))
585 expect_array_roundtrip(list(integer(0)), large_list_of(int32()), as = large_list_of(int32()))
586 expect_array_roundtrip(list(1:2, 3:4, 12:18), large_list_of(int32()), as = large_list_of(int32()))
587 expect_array_roundtrip(
588 list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)),
589 large_list_of(int32()),
590 as = large_list_of(int32())
591 )
592
593 # numeric
594 expect_array_roundtrip(list(NA_real_), large_list_of(float64()), as = large_list_of(float64()))
595 expect_array_roundtrip(list(numeric(0)), large_list_of(float64()), as = large_list_of(float64()))
596 expect_array_roundtrip(list(1, c(2, 3), 4), large_list_of(float64()), as = large_list_of(float64()))
597 expect_array_roundtrip(
598 list(1, numeric(0), c(2, 3, NA_real_), 4),
599 large_list_of(float64()),
600 as = large_list_of(float64())
601 )
602
603 # character
604 expect_array_roundtrip(list(NA_character_), large_list_of(utf8()), as = large_list_of(utf8()))
605 expect_array_roundtrip(list(character(0)), large_list_of(utf8()), as = large_list_of(utf8()))
606 expect_array_roundtrip(
607 list("itsy", c("bitsy", "spider"), c("is")),
608 large_list_of(utf8()),
609 as = large_list_of(utf8())
610 )
611 expect_array_roundtrip(
612 list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")),
613 large_list_of(utf8()),
614 as = large_list_of(utf8())
615 )
616
617 # factor
618 expect_array_roundtrip(
619 list(factor(c("b", "a"), levels = c("a", "b"))),
620 large_list_of(dictionary(int8(), utf8())),
621 as = large_list_of(dictionary(int8(), utf8()))
622 )
623 expect_array_roundtrip(
624 list(factor(NA, levels = c("a", "b"))),
625 large_list_of(dictionary(int8(), utf8())),
626 as = large_list_of(dictionary(int8(), utf8()))
627 )
628
629 # struct
630 expect_array_roundtrip(
631 list(tibble::tibble(a = integer(0), b = integer(0), c = character(0), d = logical(0))),
632 large_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool())),
633 as = large_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()))
634 )
635 expect_array_roundtrip(
636 list(tibble::tibble(a = list(integer()))),
637 large_list_of(struct(a = list_of(int32()))),
638 as = large_list_of(struct(a = list_of(int32())))
639 )
640 })
641
642 test_that("Array$create() handles vector -> fixed size list arrays", {
643 # Should be able to create an empty list with a type hint.
644 expect_r6_class(Array$create(list(), type = fixed_size_list_of(bool(), 20)), "FixedSizeListArray")
645
646 # logical
647 expect_array_roundtrip(list(NA), fixed_size_list_of(bool(), 1L), as = fixed_size_list_of(bool(), 1L))
648 expect_array_roundtrip(
649 list(c(TRUE, FALSE), c(FALSE, TRUE)),
650 fixed_size_list_of(bool(), 2L),
651 as = fixed_size_list_of(bool(), 2L)
652 )
653 expect_array_roundtrip(
654 list(c(TRUE), c(FALSE), NA),
655 fixed_size_list_of(bool(), 1L),
656 as = fixed_size_list_of(bool(), 1L)
657 )
658
659 # integer
660 expect_array_roundtrip(list(NA_integer_), fixed_size_list_of(int32(), 1L), as = fixed_size_list_of(int32(), 1L))
661 expect_array_roundtrip(list(1:2, 3:4, 11:12), fixed_size_list_of(int32(), 2L), as = fixed_size_list_of(int32(), 2L))
662 expect_array_roundtrip(
663 list(c(1:2), c(NA_integer_, 3L)),
664 fixed_size_list_of(int32(), 2L),
665 as = fixed_size_list_of(int32(), 2L)
666 )
667
668 # numeric
669 expect_array_roundtrip(list(NA_real_), fixed_size_list_of(float64(), 1L), as = fixed_size_list_of(float64(), 1L))
670 expect_array_roundtrip(
671 list(c(1, 2), c(2, 3)),
672 fixed_size_list_of(float64(), 2L),
673 as = fixed_size_list_of(float64(), 2L)
674 )
675 expect_array_roundtrip(
676 list(c(1, 2), c(NA_real_, 4)),
677 fixed_size_list_of(float64(), 2L),
678 as = fixed_size_list_of(float64(), 2L)
679 )
680
681 # character
682 expect_array_roundtrip(list(NA_character_), fixed_size_list_of(utf8(), 1L), as = fixed_size_list_of(utf8(), 1L))
683 expect_array_roundtrip(
684 list(c("itsy", "bitsy"), c("spider", "is"), c(NA_character_, NA_character_), c("", "")),
685 fixed_size_list_of(utf8(), 2L),
686 as = fixed_size_list_of(utf8(), 2L)
687 )
688
689 # factor
690 expect_array_roundtrip(
691 list(factor(c("b", "a"), levels = c("a", "b"))),
692 fixed_size_list_of(dictionary(int8(), utf8()), 2L),
693 as = fixed_size_list_of(dictionary(int8(), utf8()), 2L)
694 )
695
696 # struct
697 expect_array_roundtrip(
698 list(tibble::tibble(a = 1L, b = 1L, c = "", d = TRUE)),
699 fixed_size_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()), 1L),
700 as = fixed_size_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()), 1L)
701 )
702 expect_array_roundtrip(
703 list(tibble::tibble(a = list(1L))),
704 fixed_size_list_of(struct(a = list_of(int32())), 1L),
705 as = fixed_size_list_of(struct(a = list_of(int32())), 1L)
706 )
707 expect_array_roundtrip(
708 list(tibble::tibble(a = list(1L))),
709 list_of(struct(a = fixed_size_list_of(int32(), 1L))),
710 as = list_of(struct(a = fixed_size_list_of(int32(), 1L)))
711 )
712 })
713
714 test_that("Handling string data with embedded nuls", {
715 raws <- structure(list(
716 as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
717 as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
718 as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
719 as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
720 as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
721 as.raw(c(0x74, 0x76))
722 ),
723 class = c("arrow_binary", "vctrs_vctr", "list")
724 )
725 expect_error(
726 rawToChar(raws[[3]]),
727 "embedded nul in string: 'ma\\0n'", # See?
728 fixed = TRUE
729 )
730 array_with_nul <- Array$create(raws)$cast(utf8())
731
732 # The behavior of the warnings/errors is slightly different with and without
733 # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
734 # on `as.vector()` where as with it, the error only happens on materialization)
735 skip_if_r_version("3.5.0")
736
737 # no error on conversion, because altrep laziness
738 v <- expect_error(as.vector(array_with_nul), NA)
739
740 # attempting materialization -> error
741
742 expect_error(v[],
743 paste0(
744 "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
745 "to R, set options(arrow.skip_nul = TRUE)"
746 ),
747 fixed = TRUE
748 )
749
750 # also error on materializing v[3]
751 expect_error(v[3],
752 paste0(
753 "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
754 "to R, set options(arrow.skip_nul = TRUE)"
755 ),
756 fixed = TRUE
757 )
758
759 withr::with_options(list(arrow.skip_nul = TRUE), {
760 # no warning yet because altrep laziness
761 v <- as.vector(array_with_nul)
762
763 expect_warning(
764 expect_identical(
765 v[],
766 c("person", "woman", "man", "fan", "camera", "tv")
767 ),
768 "Stripping '\\0' (nul) from character vector",
769 fixed = TRUE
770 )
771
772 v <- as.vector(array_with_nul)
773 expect_warning(
774 expect_identical(v[3], "man"),
775 "Stripping '\\0' (nul) from character vector",
776 fixed = TRUE
777 )
778
779 v <- as.vector(array_with_nul)
780 expect_warning(
781 expect_identical(v[4], "fan"),
782 "Stripping '\\0' (nul) from character vector",
783 fixed = TRUE
784 )
785 })
786 })
787
788 test_that("Array$create() should have helpful error", {
789 expect_error(Array$create(list(numeric(0)), list_of(bool())), "Expecting a logical vector")
790
791 lgl <- logical(0)
792 int <- integer(0)
793 num <- numeric(0)
794 char <- character(0)
795 expect_error(Array$create(list()), "Requires at least one element to infer")
796 expect_error(Array$create(list(lgl, lgl, int)), "Expecting a logical vector")
797 expect_error(Array$create(list(char, num, char)), "Expecting a character vector")
798 })
799
800 test_that("Array$View() (ARROW-6542)", {
801 a <- Array$create(1:3)
802 b <- a$View(float32())
803 expect_equal(b$type, float32())
804 expect_equal(length(b), 3L)
805
806 # Input validation
807 expect_error(a$View("not a type"), "type must be a DataType, not character")
808 })
809
810 test_that("Array$Validate()", {
811 a <- Array$create(1:10)
812 expect_error(a$Validate(), NA)
813 })
814
815 test_that("is.Array", {
816 a <- Array$create(1, type = int32())
817 expect_true(is.Array(a))
818 expect_true(is.Array(a, "int32"))
819 expect_true(is.Array(a, c("int32", "int16")))
820 expect_false(is.Array(a, "utf8"))
821 expect_true(is.Array(a$View(float32())), "float32")
822 expect_false(is.Array(1))
823 expect_true(is.Array(ChunkedArray$create(1, 2)))
824 })
825
826 test_that("Array$Take()", {
827 a <- Array$create(10:20)
828 expect_as_vector(a$Take(c(4, 2)), c(14, 12))
829 })
830
831 test_that("[ method on Array", {
832 vec <- 11:20
833 a <- Array$create(vec)
834 expect_as_vector(a[5:9], vec[5:9])
835 expect_as_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
836 expect_as_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
837 expect_as_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
838 expect_as_vector(a[-4], vec[-4])
839 expect_as_vector(a[-1], vec[-1])
840 })
841
842 test_that("[ accepts Arrays and otherwise handles bad input", {
843 vec <- 11:20
844 a <- Array$create(vec)
845 ind <- c(9, 3, 5)
846 expect_error(
847 a[Array$create(ind)],
848 "Cannot extract rows with an Array of type double"
849 )
850 expect_as_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
851 expect_as_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
852 expect_as_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])
853
854 filt <- seq_along(vec) %in% ind
855 expect_as_vector(a[Array$create(filt)], vec[filt])
856
857 expect_error(
858 a["string"],
859 "Cannot extract rows with an object of class character"
860 )
861 })
862
863 test_that("%in% works on dictionary arrays", {
864 a1 <- Array$create(as.factor(c("A", "B", "C")))
865 a2 <- DictionaryArray$create(c(0L, 1L, 2L), c(4.5, 3.2, 1.1))
866 c1 <- Array$create(c(FALSE, TRUE, FALSE))
867 c2 <- Array$create(c(FALSE, FALSE, FALSE))
868 b1 <- Array$create("B")
869 b2 <- Array$create(5.4)
870
871 expect_equal(is_in(a1, b1), c1)
872 expect_equal(is_in(a2, b2), c2)
873 expect_error(is_in(a1, b2))
874 })
875
876 test_that("[ accepts Expressions", {
877 vec <- 11:20
878 a <- Array$create(vec)
879 b <- Array$create(1:10)
880 expect_as_vector(a[b > 4], vec[5:10])
881 })
882
883 test_that("Array head/tail", {
884 vec <- 11:20
885 a <- Array$create(vec)
886 expect_as_vector(head(a), head(vec))
887 expect_as_vector(head(a, 4), head(vec, 4))
888 expect_as_vector(head(a, 40), head(vec, 40))
889 expect_as_vector(head(a, -4), head(vec, -4))
890 expect_as_vector(head(a, -40), head(vec, -40))
891 expect_as_vector(tail(a), tail(vec))
892 expect_as_vector(tail(a, 4), tail(vec, 4))
893 expect_as_vector(tail(a, 40), tail(vec, 40))
894 expect_as_vector(tail(a, -40), tail(vec, -40))
895 })
896
897 test_that("Dictionary array: create from arrays, not factor", {
898 a <- DictionaryArray$create(c(2L, 1L, 1L, 2L, 0L), c(4.5, 3.2, 1.1))
899 expect_equal(a$type, dictionary(int32(), float64()))
900 })
901
902 test_that("Dictionary array: translate to R when dict isn't string", {
903 a <- DictionaryArray$create(c(2L, 1L, 1L, 2L, 0L), c(4.5, 3.2, 1.1))
904 expect_warning(
905 expect_identical(
906 as.vector(a),
907 factor(c(3, 2, 2, 3, 1), labels = c("4.5", "3.2", "1.1"))
908 )
909 )
910 })
911
912 test_that("Array$Equals", {
913 vec <- 11:20
914 a <- Array$create(vec)
915 b <- Array$create(vec)
916 d <- Array$create(3:4)
917 expect_equal(a, b)
918 expect_true(a$Equals(b))
919 expect_false(a$Equals(vec))
920 expect_false(a$Equals(d))
921 })
922
923 test_that("Array$ApproxEquals", {
924 vec <- c(1.0000000000001, 2.400000000000001)
925 a <- Array$create(vec)
926 b <- Array$create(round(vec, 1))
927 expect_false(a$Equals(b))
928 expect_true(a$ApproxEquals(b))
929 expect_false(a$ApproxEquals(vec))
930 })
931
932 test_that("auto int64 conversion to int can be disabled (ARROW-10093)", {
933 withr::with_options(list(arrow.int64_downcast = FALSE), {
934 a <- Array$create(1:10, int64())
935 expect_true(inherits(a$as_vector(), "integer64"))
936
937 batch <- RecordBatch$create(x = a)
938 expect_true(inherits(as.data.frame(batch)$x, "integer64"))
939
940 tab <- Table$create(x = a)
941 expect_true(inherits(as.data.frame(batch)$x, "integer64"))
942 })
943 })
944
945
946 test_that("Array to C-interface", {
947 # create a struct array since that's one of the more complicated array types
948 df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
949 arr <- Array$create(df)
950
951 # export the array via the C-interface
952 schema_ptr <- allocate_arrow_schema()
953 array_ptr <- allocate_arrow_array()
954 arr$export_to_c(array_ptr, schema_ptr)
955
956 # then import it and check that the roundtripped value is the same
957 circle <- Array$import_from_c(array_ptr, schema_ptr)
958 expect_equal(arr, circle)
959
960 # must clean up the pointers or we leak
961 delete_arrow_schema(schema_ptr)
962 delete_arrow_array(array_ptr)
963 })