]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | # Licensed to the Apache Software Foundation (ASF) under one |
2 | # or more contributor license agreements. See the NOTICE file | |
3 | # distributed with this work for additional information | |
4 | # regarding copyright ownership. The ASF licenses this file | |
5 | # to you under the Apache License, Version 2.0 (the | |
6 | # "License"); you may not use this file except in compliance | |
7 | # with the License. You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, | |
12 | # software distributed under the License is distributed on an | |
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | # KIND, either express or implied. See the License for the | |
15 | # specific language governing permissions and limitations | |
16 | # under the License. | |
17 | ||
18 | skip_if_not_available("dataset") | |
19 | ||
20 | library(dplyr, warn.conflicts = FALSE) | |
21 | suppressPackageStartupMessages(library(bit64)) | |
22 | ||
23 | ||
24 | tbl <- example_data | |
25 | ||
26 | test_that("explicit type conversions with cast()", { | |
27 | num_int32 <- 12L | |
28 | num_int64 <- bit64::as.integer64(10) | |
29 | ||
30 | int_types <- c(int8(), int16(), int32(), int64()) | |
31 | uint_types <- c(uint8(), uint16(), uint32(), uint64()) | |
32 | float_types <- c(float32(), float64()) | |
33 | ||
34 | types <- c( | |
35 | int_types, | |
36 | uint_types, | |
37 | float_types, | |
38 | double(), # not actually a type, a base R function but should be alias for float64 | |
39 | string() | |
40 | ) | |
41 | ||
42 | for (type in types) { | |
43 | expect_type_equal( | |
44 | object = { | |
45 | t1 <- Table$create(x = num_int32) %>% | |
46 | transmute(x = cast(x, type)) %>% | |
47 | compute() | |
48 | t1$schema[[1]]$type | |
49 | }, | |
50 | as_type(type) | |
51 | ) | |
52 | expect_type_equal( | |
53 | object = { | |
54 | t1 <- Table$create(x = num_int64) %>% | |
55 | transmute(x = cast(x, type)) %>% | |
56 | compute() | |
57 | t1$schema[[1]]$type | |
58 | }, | |
59 | as_type(type) | |
60 | ) | |
61 | } | |
62 | ||
63 | # Arrow errors when truncating floats... | |
64 | expect_error( | |
65 | expect_type_equal( | |
66 | object = { | |
67 | t1 <- Table$create(pi = pi) %>% | |
68 | transmute(three = cast(pi, int32())) %>% | |
69 | compute() | |
70 | t1$schema[[1]]$type | |
71 | }, | |
72 | int32() | |
73 | ), | |
74 | "truncated" | |
75 | ) | |
76 | ||
77 | # ... unless safe = FALSE (or allow_float_truncate = TRUE) | |
78 | expect_type_equal( | |
79 | object = { | |
80 | t1 <- Table$create(pi = pi) %>% | |
81 | transmute(three = cast(pi, int32(), safe = FALSE)) %>% | |
82 | compute() | |
83 | t1$schema[[1]]$type | |
84 | }, | |
85 | int32() | |
86 | ) | |
87 | }) | |
88 | ||
89 | test_that("explicit type conversions with as.*()", { | |
90 | library(bit64) | |
91 | compare_dplyr_binding( | |
92 | .input %>% | |
93 | transmute( | |
94 | int2chr = as.character(int), | |
95 | int2dbl = as.double(int), | |
96 | int2int = as.integer(int), | |
97 | int2num = as.numeric(int), | |
98 | dbl2chr = as.character(dbl), | |
99 | dbl2dbl = as.double(dbl), | |
100 | dbl2int = as.integer(dbl), | |
101 | dbl2num = as.numeric(dbl), | |
102 | ) %>% | |
103 | collect(), | |
104 | tbl | |
105 | ) | |
106 | compare_dplyr_binding( | |
107 | .input %>% | |
108 | transmute( | |
109 | chr2chr = as.character(chr), | |
110 | chr2dbl = as.double(chr), | |
111 | chr2int = as.integer(chr), | |
112 | chr2num = as.numeric(chr) | |
113 | ) %>% | |
114 | collect(), | |
115 | tibble(chr = c("1", "2", "3")) | |
116 | ) | |
117 | compare_dplyr_binding( | |
118 | .input %>% | |
119 | transmute( | |
120 | chr2i64 = as.integer64(chr), | |
121 | dbl2i64 = as.integer64(dbl), | |
122 | i642i64 = as.integer64(i64), | |
123 | ) %>% | |
124 | collect(), | |
125 | tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10)) | |
126 | ) | |
127 | compare_dplyr_binding( | |
128 | .input %>% | |
129 | transmute( | |
130 | chr2lgl = as.logical(chr), | |
131 | dbl2lgl = as.logical(dbl), | |
132 | int2lgl = as.logical(int) | |
133 | ) %>% | |
134 | collect(), | |
135 | tibble( | |
136 | chr = c("TRUE", "FALSE", "true", "false"), | |
137 | dbl = c(1, 0, -99, 0), | |
138 | int = c(1L, 0L, -99L, 0L) | |
139 | ) | |
140 | ) | |
141 | compare_dplyr_binding( | |
142 | .input %>% | |
143 | transmute( | |
144 | dbl2chr = as.character(dbl), | |
145 | dbl2dbl = as.double(dbl), | |
146 | dbl2int = as.integer(dbl), | |
147 | dbl2lgl = as.logical(dbl), | |
148 | int2chr = as.character(int), | |
149 | int2dbl = as.double(int), | |
150 | int2int = as.integer(int), | |
151 | int2lgl = as.logical(int), | |
152 | lgl2chr = as.character(lgl), # Arrow returns "true", "false" here ... | |
153 | lgl2dbl = as.double(lgl), | |
154 | lgl2int = as.integer(lgl), | |
155 | lgl2lgl = as.logical(lgl) | |
156 | ) %>% | |
157 | collect() %>% | |
158 | # need to use toupper() *after* collect() or else skip if utf8proc not available | |
159 | mutate(lgl2chr = toupper(lgl2chr)), # ... but we need "TRUE", "FALSE" | |
160 | tibble( | |
161 | dbl = c(1, 0, NA_real_), | |
162 | int = c(1L, 0L, NA_integer_), | |
163 | lgl = c(TRUE, FALSE, NA) | |
164 | ) | |
165 | ) | |
166 | }) | |
167 | ||
168 | test_that("is.finite(), is.infinite(), is.nan()", { | |
169 | df <- tibble(x = c( | |
170 | -4.94065645841246544e-324, 1.79769313486231570e+308, 0, | |
171 | NA_real_, NaN, Inf, -Inf | |
172 | )) | |
173 | compare_dplyr_binding( | |
174 | .input %>% | |
175 | transmute( | |
176 | is_fin = is.finite(x), | |
177 | is_inf = is.infinite(x) | |
178 | ) %>% | |
179 | collect(), | |
180 | df | |
181 | ) | |
182 | # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850) | |
183 | compare_dplyr_binding( | |
184 | .input %>% | |
185 | transmute( | |
186 | is_nan = is.nan(x) | |
187 | ) %>% | |
188 | collect(), | |
189 | df | |
190 | ) | |
191 | }) | |
192 | ||
193 | test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", { | |
194 | df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7)) | |
195 | compare_dplyr_binding( | |
196 | .input %>% | |
197 | transmute( | |
198 | is_na = is.na(x) | |
199 | ) %>% | |
200 | collect(), | |
201 | df | |
202 | ) | |
203 | }) | |
204 | ||
205 | test_that("type checks with is() giving Arrow types", { | |
206 | # with class2=DataType | |
207 | expect_equal( | |
208 | Table$create( | |
209 | i32 = Array$create(1, int32()), | |
210 | dec = Array$create(pi)$cast(decimal(3, 2)), | |
211 | f64 = Array$create(1.1, float64()), | |
212 | str = Array$create("a", arrow::string()) | |
213 | ) %>% transmute( | |
214 | i32_is_i32 = is(i32, int32()), | |
215 | i32_is_dec = is(i32, decimal(3, 2)), | |
216 | i32_is_i64 = is(i32, float64()), | |
217 | i32_is_str = is(i32, arrow::string()), | |
218 | dec_is_i32 = is(dec, int32()), | |
219 | dec_is_dec = is(dec, decimal(3, 2)), | |
220 | dec_is_i64 = is(dec, float64()), | |
221 | dec_is_str = is(dec, arrow::string()), | |
222 | f64_is_i32 = is(f64, int32()), | |
223 | f64_is_dec = is(f64, decimal(3, 2)), | |
224 | f64_is_i64 = is(f64, float64()), | |
225 | f64_is_str = is(f64, arrow::string()), | |
226 | str_is_i32 = is(str, int32()), | |
227 | str_is_dec = is(str, decimal(3, 2)), | |
228 | str_is_i64 = is(str, float64()), | |
229 | str_is_str = is(str, arrow::string()) | |
230 | ) %>% | |
231 | collect() %>% | |
232 | t() %>% | |
233 | as.vector(), | |
234 | c( | |
235 | TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, | |
236 | FALSE, FALSE, FALSE, FALSE, TRUE | |
237 | ) | |
238 | ) | |
239 | # with class2=string | |
240 | expect_equal( | |
241 | Table$create( | |
242 | i32 = Array$create(1, int32()), | |
243 | f64 = Array$create(1.1, float64()), | |
244 | str = Array$create("a", arrow::string()) | |
245 | ) %>% transmute( | |
246 | i32_is_i32 = is(i32, "int32"), | |
247 | i32_is_i64 = is(i32, "double"), | |
248 | i32_is_str = is(i32, "string"), | |
249 | f64_is_i32 = is(f64, "int32"), | |
250 | f64_is_i64 = is(f64, "double"), | |
251 | f64_is_str = is(f64, "string"), | |
252 | str_is_i32 = is(str, "int32"), | |
253 | str_is_i64 = is(str, "double"), | |
254 | str_is_str = is(str, "string") | |
255 | ) %>% | |
256 | collect() %>% | |
257 | t() %>% | |
258 | as.vector(), | |
259 | c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE) | |
260 | ) | |
261 | # with class2=string alias | |
262 | expect_equal( | |
263 | Table$create( | |
264 | f16 = Array$create(NA_real_, halffloat()), | |
265 | f32 = Array$create(1.1, float()), | |
266 | f64 = Array$create(2.2, float64()), | |
267 | lgl = Array$create(TRUE, bool()), | |
268 | str = Array$create("a", arrow::string()) | |
269 | ) %>% transmute( | |
270 | f16_is_f16 = is(f16, "float16"), | |
271 | f16_is_f32 = is(f16, "float32"), | |
272 | f16_is_f64 = is(f16, "float64"), | |
273 | f16_is_lgl = is(f16, "boolean"), | |
274 | f16_is_str = is(f16, "utf8"), | |
275 | f32_is_f16 = is(f32, "float16"), | |
276 | f32_is_f32 = is(f32, "float32"), | |
277 | f32_is_f64 = is(f32, "float64"), | |
278 | f32_is_lgl = is(f32, "boolean"), | |
279 | f32_is_str = is(f32, "utf8"), | |
280 | f64_is_f16 = is(f64, "float16"), | |
281 | f64_is_f32 = is(f64, "float32"), | |
282 | f64_is_f64 = is(f64, "float64"), | |
283 | f64_is_lgl = is(f64, "boolean"), | |
284 | f64_is_str = is(f64, "utf8"), | |
285 | lgl_is_f16 = is(lgl, "float16"), | |
286 | lgl_is_f32 = is(lgl, "float32"), | |
287 | lgl_is_f64 = is(lgl, "float64"), | |
288 | lgl_is_lgl = is(lgl, "boolean"), | |
289 | lgl_is_str = is(lgl, "utf8"), | |
290 | str_is_f16 = is(str, "float16"), | |
291 | str_is_f32 = is(str, "float32"), | |
292 | str_is_f64 = is(str, "float64"), | |
293 | str_is_lgl = is(str, "boolean"), | |
294 | str_is_str = is(str, "utf8") | |
295 | ) %>% | |
296 | collect() %>% | |
297 | t() %>% | |
298 | as.vector(), | |
299 | c( | |
300 | TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, | |
301 | FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, | |
302 | FALSE, FALSE, TRUE | |
303 | ) | |
304 | ) | |
305 | }) | |
306 | ||
307 | test_that("type checks with is() giving R types", { | |
308 | library(bit64) | |
309 | compare_dplyr_binding( | |
310 | .input %>% | |
311 | transmute( | |
312 | chr_is_chr = is(chr, "character"), | |
313 | chr_is_fct = is(chr, "factor"), | |
314 | chr_is_int = is(chr, "integer"), | |
315 | chr_is_i64 = is(chr, "integer64"), | |
316 | chr_is_lst = is(chr, "list"), | |
317 | chr_is_lgl = is(chr, "logical"), | |
318 | chr_is_num = is(chr, "numeric"), | |
319 | dbl_is_chr = is(dbl, "character"), | |
320 | dbl_is_fct = is(dbl, "factor"), | |
321 | dbl_is_int = is(dbl, "integer"), | |
322 | dbl_is_i64 = is(dbl, "integer64"), | |
323 | dbl_is_lst = is(dbl, "list"), | |
324 | dbl_is_lgl = is(dbl, "logical"), | |
325 | dbl_is_num = is(dbl, "numeric"), | |
326 | fct_is_chr = is(fct, "character"), | |
327 | fct_is_fct = is(fct, "factor"), | |
328 | fct_is_int = is(fct, "integer"), | |
329 | fct_is_i64 = is(fct, "integer64"), | |
330 | fct_is_lst = is(fct, "list"), | |
331 | fct_is_lgl = is(fct, "logical"), | |
332 | fct_is_num = is(fct, "numeric"), | |
333 | int_is_chr = is(int, "character"), | |
334 | int_is_fct = is(int, "factor"), | |
335 | int_is_int = is(int, "integer"), | |
336 | int_is_i64 = is(int, "integer64"), | |
337 | int_is_lst = is(int, "list"), | |
338 | int_is_lgl = is(int, "logical"), | |
339 | int_is_num = is(int, "numeric"), | |
340 | lgl_is_chr = is(lgl, "character"), | |
341 | lgl_is_fct = is(lgl, "factor"), | |
342 | lgl_is_int = is(lgl, "integer"), | |
343 | lgl_is_i64 = is(lgl, "integer64"), | |
344 | lgl_is_lst = is(lgl, "list"), | |
345 | lgl_is_lgl = is(lgl, "logical"), | |
346 | lgl_is_num = is(lgl, "numeric") | |
347 | ) %>% | |
348 | collect(), | |
349 | tbl | |
350 | ) | |
351 | compare_dplyr_binding( | |
352 | .input %>% | |
353 | transmute( | |
354 | i64_is_chr = is(i64, "character"), | |
355 | i64_is_fct = is(i64, "factor"), | |
356 | # we want Arrow to return TRUE, but bit64 returns FALSE | |
357 | # i64_is_int = is(i64, "integer"), | |
358 | i64_is_i64 = is(i64, "integer64"), | |
359 | i64_is_lst = is(i64, "list"), | |
360 | i64_is_lgl = is(i64, "logical"), | |
361 | # we want Arrow to return TRUE, but bit64 returns FALSE | |
362 | # i64_is_num = is(i64, "numeric"), | |
363 | lst_is_chr = is(lst, "character"), | |
364 | lst_is_fct = is(lst, "factor"), | |
365 | lst_is_int = is(lst, "integer"), | |
366 | lst_is_i64 = is(lst, "integer64"), | |
367 | lst_is_lst = is(lst, "list"), | |
368 | lst_is_lgl = is(lst, "logical"), | |
369 | lst_is_num = is(lst, "numeric") | |
370 | ) %>% | |
371 | collect(), | |
372 | tibble( | |
373 | i64 = as.integer64(1:3), | |
374 | lst = list(c("a", "b"), c("d", "e"), c("f", "g")) | |
375 | ) | |
376 | ) | |
377 | }) | |
378 | ||
379 | test_that("type checks with is.*()", { | |
380 | library(bit64) | |
381 | compare_dplyr_binding( | |
382 | .input %>% | |
383 | transmute( | |
384 | chr_is_chr = is.character(chr), | |
385 | chr_is_dbl = is.double(chr), | |
386 | chr_is_fct = is.factor(chr), | |
387 | chr_is_int = is.integer(chr), | |
388 | chr_is_i64 = is.integer64(chr), | |
389 | chr_is_lst = is.list(chr), | |
390 | chr_is_lgl = is.logical(chr), | |
391 | chr_is_num = is.numeric(chr), | |
392 | dbl_is_chr = is.character(dbl), | |
393 | dbl_is_dbl = is.double(dbl), | |
394 | dbl_is_fct = is.factor(dbl), | |
395 | dbl_is_int = is.integer(dbl), | |
396 | dbl_is_i64 = is.integer64(dbl), | |
397 | dbl_is_lst = is.list(dbl), | |
398 | dbl_is_lgl = is.logical(dbl), | |
399 | dbl_is_num = is.numeric(dbl), | |
400 | fct_is_chr = is.character(fct), | |
401 | fct_is_dbl = is.double(fct), | |
402 | fct_is_fct = is.factor(fct), | |
403 | fct_is_int = is.integer(fct), | |
404 | fct_is_i64 = is.integer64(fct), | |
405 | fct_is_lst = is.list(fct), | |
406 | fct_is_lgl = is.logical(fct), | |
407 | fct_is_num = is.numeric(fct), | |
408 | int_is_chr = is.character(int), | |
409 | int_is_dbl = is.double(int), | |
410 | int_is_fct = is.factor(int), | |
411 | int_is_int = is.integer(int), | |
412 | int_is_i64 = is.integer64(int), | |
413 | int_is_lst = is.list(int), | |
414 | int_is_lgl = is.logical(int), | |
415 | int_is_num = is.numeric(int), | |
416 | lgl_is_chr = is.character(lgl), | |
417 | lgl_is_dbl = is.double(lgl), | |
418 | lgl_is_fct = is.factor(lgl), | |
419 | lgl_is_int = is.integer(lgl), | |
420 | lgl_is_i64 = is.integer64(lgl), | |
421 | lgl_is_lst = is.list(lgl), | |
422 | lgl_is_lgl = is.logical(lgl), | |
423 | lgl_is_num = is.numeric(lgl) | |
424 | ) %>% | |
425 | collect(), | |
426 | tbl | |
427 | ) | |
428 | compare_dplyr_binding( | |
429 | .input %>% | |
430 | transmute( | |
431 | i64_is_chr = is.character(i64), | |
432 | # TODO: investigate why this is not matching when testthat runs it | |
433 | # i64_is_dbl = is.double(i64), | |
434 | i64_is_fct = is.factor(i64), | |
435 | # we want Arrow to return TRUE, but bit64 returns FALSE | |
436 | # i64_is_int = is.integer(i64), | |
437 | i64_is_i64 = is.integer64(i64), | |
438 | i64_is_lst = is.list(i64), | |
439 | i64_is_lgl = is.logical(i64), | |
440 | i64_is_num = is.numeric(i64), | |
441 | lst_is_chr = is.character(lst), | |
442 | lst_is_dbl = is.double(lst), | |
443 | lst_is_fct = is.factor(lst), | |
444 | lst_is_int = is.integer(lst), | |
445 | lst_is_i64 = is.integer64(lst), | |
446 | lst_is_lst = is.list(lst), | |
447 | lst_is_lgl = is.logical(lst), | |
448 | lst_is_num = is.numeric(lst) | |
449 | ) %>% | |
450 | collect(), | |
451 | tibble( | |
452 | i64 = as.integer64(1:3), | |
453 | lst = list(c("a", "b"), c("d", "e"), c("f", "g")) | |
454 | ) | |
455 | ) | |
456 | }) | |
457 | ||
458 | test_that("type checks with is_*()", { | |
459 | library(rlang, warn.conflicts = FALSE) | |
460 | compare_dplyr_binding( | |
461 | .input %>% | |
462 | transmute( | |
463 | chr_is_chr = is_character(chr), | |
464 | chr_is_dbl = is_double(chr), | |
465 | chr_is_int = is_integer(chr), | |
466 | chr_is_lst = is_list(chr), | |
467 | chr_is_lgl = is_logical(chr), | |
468 | dbl_is_chr = is_character(dbl), | |
469 | dbl_is_dbl = is_double(dbl), | |
470 | dbl_is_int = is_integer(dbl), | |
471 | dbl_is_lst = is_list(dbl), | |
472 | dbl_is_lgl = is_logical(dbl), | |
473 | int_is_chr = is_character(int), | |
474 | int_is_dbl = is_double(int), | |
475 | int_is_int = is_integer(int), | |
476 | int_is_lst = is_list(int), | |
477 | int_is_lgl = is_logical(int), | |
478 | lgl_is_chr = is_character(lgl), | |
479 | lgl_is_dbl = is_double(lgl), | |
480 | lgl_is_int = is_integer(lgl), | |
481 | lgl_is_lst = is_list(lgl), | |
482 | lgl_is_lgl = is_logical(lgl) | |
483 | ) %>% | |
484 | collect(), | |
485 | tbl | |
486 | ) | |
487 | }) | |
488 | ||
489 | test_that("type checks on expressions", { | |
490 | compare_dplyr_binding( | |
491 | .input %>% | |
492 | transmute( | |
493 | a = is.character(as.character(int)), | |
494 | b = is.integer(as.character(int)), | |
495 | c = is.integer(int + int), | |
496 | d = is.double(int + dbl), | |
497 | e = is.logical(dbl > pi) | |
498 | ) %>% | |
499 | collect(), | |
500 | tbl | |
501 | ) | |
502 | ||
503 | # the code in the expectation below depends on RE2 | |
504 | skip_if_not_available("re2") | |
505 | ||
506 | compare_dplyr_binding( | |
507 | .input %>% | |
508 | transmute( | |
509 | a = is.logical(grepl("[def]", chr)) | |
510 | ) %>% | |
511 | collect(), | |
512 | tbl | |
513 | ) | |
514 | }) | |
515 | ||
516 | test_that("type checks on R scalar literals", { | |
517 | compare_dplyr_binding( | |
518 | .input %>% | |
519 | transmute( | |
520 | chr_is_chr = is.character("foo"), | |
521 | int_is_chr = is.character(42L), | |
522 | int_is_int = is.integer(42L), | |
523 | chr_is_int = is.integer("foo"), | |
524 | dbl_is_num = is.numeric(3.14159), | |
525 | int_is_num = is.numeric(42L), | |
526 | chr_is_num = is.numeric("foo"), | |
527 | dbl_is_dbl = is.double(3.14159), | |
528 | chr_is_dbl = is.double("foo"), | |
529 | lgl_is_lgl = is.logical(TRUE), | |
530 | chr_is_lgl = is.logical("foo"), | |
531 | fct_is_fct = is.factor(factor("foo", levels = c("foo", "bar", "baz"))), | |
532 | chr_is_fct = is.factor("foo"), | |
533 | lst_is_lst = is.list(list(c(a = "foo", b = "bar"))), | |
534 | chr_is_lst = is.list("foo") | |
535 | ) %>% | |
536 | collect(), | |
537 | tbl | |
538 | ) | |
539 | }) | |
540 | ||
541 | test_that("as.factor()/dictionary_encode()", { | |
542 | skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression") | |
543 | df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B")) | |
544 | df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8)) | |
545 | ||
546 | compare_dplyr_binding( | |
547 | .input %>% | |
548 | transmute(x = as.factor(x)) %>% | |
549 | collect(), | |
550 | df1 | |
551 | ) | |
552 | ||
553 | expect_warning( | |
554 | compare_dplyr_binding( | |
555 | .input %>% | |
556 | transmute(x = as.factor(x)) %>% | |
557 | collect(), | |
558 | df2 | |
559 | ), | |
560 | "Coercing dictionary values to R character factor levels" | |
561 | ) | |
562 | ||
563 | # dictionary values with default null encoding behavior ("mask") omits | |
564 | # nulls from the dictionary values | |
565 | expect_equal( | |
566 | object = { | |
567 | rb1 <- df1 %>% | |
568 | record_batch() %>% | |
569 | transmute(x = dictionary_encode(x)) %>% | |
570 | compute() | |
571 | dict <- rb1$x$dictionary() | |
572 | as.vector(dict$Take(dict$SortIndices())) | |
573 | }, | |
574 | sort(unique(df1$x), na.last = NA) | |
575 | ) | |
576 | ||
577 | # dictionary values with "encode" null encoding behavior includes nulls in | |
578 | # the dictionary values | |
579 | expect_equal( | |
580 | object = { | |
581 | rb1 <- df1 %>% | |
582 | record_batch() %>% | |
583 | transmute(x = dictionary_encode(x, null_encoding_behavior = "encode")) %>% | |
584 | compute() | |
585 | dict <- rb1$x$dictionary() | |
586 | as.vector(dict$Take(dict$SortIndices())) | |
587 | }, | |
588 | sort(unique(df1$x), na.last = TRUE) | |
589 | ) | |
590 | }) | |
591 | ||
592 | test_that("bad explicit type conversions with as.*()", { | |
593 | ||
594 | # Arrow returns lowercase "true", "false" (instead of "TRUE", "FALSE" like R) | |
595 | expect_error( | |
596 | compare_dplyr_binding( | |
597 | .input %>% | |
598 | transmute(lgl2chr = as.character(lgl)) %>% | |
599 | collect(), | |
600 | tibble(lgl = c(TRUE, FALSE, NA)) | |
601 | ) | |
602 | ) | |
603 | ||
604 | # Arrow fails to parse these strings as numbers (instead of returning NAs with | |
605 | # a warning like R does) | |
606 | expect_error( | |
607 | expect_warning( | |
608 | compare_dplyr_binding( | |
609 | .input %>% | |
610 | transmute(chr2num = as.numeric(chr)) %>% | |
611 | collect(), | |
612 | tibble(chr = c("l.O", "S.S", "")) | |
613 | ) | |
614 | ) | |
615 | ) | |
616 | ||
617 | # Arrow fails to parse these strings as Booleans (instead of returning NAs | |
618 | # like R does) | |
619 | expect_error( | |
620 | compare_dplyr_binding( | |
621 | .input %>% | |
622 | transmute(chr2lgl = as.logical(chr)) %>% | |
623 | collect(), | |
624 | tibble(chr = c("TRU", "FAX", "")) | |
625 | ) | |
626 | ) | |
627 | }) |