1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
18 skip_if_not_available("dataset")
20 library(dplyr, warn.conflicts = FALSE)
24 # Add some better string data
25 tbl$verses <- verses[[1]]
26 # c(" a ", " b ", " c ", ...) increasing padding
27 # nchar = 3 5 7 9 11 13 15 17 19 21
28 tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
29 tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl)) # nolint
31 test_that("filter() on is.na()", {
32 compare_dplyr_binding(
34 filter(is.na(lgl)) %>%
35 select(chr, int, lgl) %>%
41 test_that("filter() with NAs in selection", {
42 compare_dplyr_binding(
45 select(chr, int, lgl) %>%
51 test_that("Filter returning an empty Table should not segfault (ARROW-8354)", {
52 compare_dplyr_binding(
55 select(chr, int, lgl) %>%
61 test_that("filtering with expression", {
63 compare_dplyr_binding(
65 filter(chr == char_sym) %>%
66 select(string = chr, int) %>%
72 test_that("filtering with arithmetic", {
73 compare_dplyr_binding(
75 filter(dbl + 1 > 3) %>%
76 select(string = chr, int, dbl) %>%
81 compare_dplyr_binding(
83 filter(dbl / 2 > 3) %>%
84 select(string = chr, int, dbl) %>%
89 compare_dplyr_binding(
91 filter(dbl / 2L > 3) %>%
92 select(string = chr, int, dbl) %>%
97 compare_dplyr_binding(
99 filter(int / 2 > 3) %>%
100 select(string = chr, int, dbl) %>%
105 compare_dplyr_binding(
107 filter(int / 2L > 3) %>%
108 select(string = chr, int, dbl) %>%
113 compare_dplyr_binding(
115 filter(dbl %/% 2 > 3) %>%
116 select(string = chr, int, dbl) %>%
121 compare_dplyr_binding(
123 filter(dbl^2 > 3) %>%
124 select(string = chr, int, dbl) %>%
130 test_that("filtering with expression + autocasting", {
131 compare_dplyr_binding(
133 filter(dbl + 1 > 3L) %>% # test autocasting with comparison to 3L
134 select(string = chr, int, dbl) %>%
139 compare_dplyr_binding(
141 filter(int + 1 > 3) %>%
142 select(string = chr, int, dbl) %>%
147 compare_dplyr_binding(
149 filter(int^2 > 3) %>%
150 select(string = chr, int, dbl) %>%
156 test_that("More complex select/filter", {
157 compare_dplyr_binding(
159 filter(dbl > 2, chr == "d" | chr == "f") %>%
160 select(chr, int, lgl) %>%
168 test_that("filter() with %in%", {
169 compare_dplyr_binding(
171 filter(dbl > 2, chr %in% c("d", "f")) %>%
177 test_that("Negative scalar values", {
178 compare_dplyr_binding(
180 filter(some_negative > -2) %>%
184 compare_dplyr_binding(
186 filter(some_negative %in% -1) %>%
190 compare_dplyr_binding(
192 filter(int == -some_negative) %>%
198 test_that("filter() with between()", {
199 compare_dplyr_binding(
201 filter(between(dbl, 1, 2)) %>%
206 compare_dplyr_binding(
208 filter(between(dbl, 0.5, 2)) %>%
216 filter(between(dbl, int, dbl2)) %>%
219 filter(dbl >= int, dbl <= dbl2)
225 filter(between(dbl, 1, "2")) %>%
232 filter(between(dbl, 1, NA)) %>%
239 filter(between(chr, 1, 2)) %>%
244 test_that("filter() with string ops", {
245 skip_if_not_available("utf8proc")
246 compare_dplyr_binding(
248 filter(dbl > 2, str_length(verses) > 25) %>%
253 compare_dplyr_binding(
255 filter(dbl > 2, str_length(str_trim(padded_strings, "left")) > 5) %>%
261 test_that("filter environment scope", {
262 # "object 'b_var' not found"
263 compare_dplyr_error(.input %>% filter(chr == b_var), tbl)
266 compare_dplyr_binding(
268 filter(chr == b_var) %>%
273 # 'could not find function "isEqualTo"' because we haven't defined it yet
274 compare_dplyr_error(.input %>% filter(isEqualTo(int, 4)), tbl)
276 # This works but only because there are S3 methods for those operations
277 isEqualTo <- function(x, y) x == y & !is.na(x)
278 compare_dplyr_binding(
280 select(-fct) %>% # factor levels aren't identical
281 filter(isEqualTo(int, 4)) %>%
285 # Try something that needs to call another nse_func
286 compare_dplyr_binding(
289 filter(nchar(padded_strings) < 10) %>%
293 isShortString <- function(x) nchar(x) < 10
295 compare_dplyr_binding(
298 filter(isShortString(padded_strings)) %>%
304 test_that("Filtering on a column that doesn't exist errors correctly", {
305 with_language("fr", {
306 # expect_warning(., NA) because the usual behavior when it hits a filter
307 # that it can't evaluate is to raise a warning, collect() to R, and retry
308 # the filter. But we want this to error the first time because it's
309 # a user error, not solvable by retrying in R
312 tbl %>% record_batch() %>% filter(not_a_col == 42) %>% collect(),
313 "objet 'not_a_col' introuvable"
318 with_language("en", {
321 tbl %>% record_batch() %>% filter(not_a_col == 42) %>% collect(),
322 "object 'not_a_col' not found"
329 test_that("Filtering with unsupported functions", {
330 compare_dplyr_binding(
332 filter(int > 2, pnorm(dbl) > .99) %>%
335 warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R"
337 compare_dplyr_binding(
340 nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
342 pnorm(dbl) > .99 # bad, opaque
346 warning = '\\* In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1, allowNA = TRUE not supported by Arrow
347 \\* Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow
352 test_that("Calling Arrow compute functions 'directly'", {
356 filter(arrow_add(dbl, 1) > 3L) %>%
357 select(string = chr, int, dbl) %>%
360 filter(dbl + 1 > 3L) %>%
361 select(string = chr, int, dbl)
364 compare_dplyr_binding(
367 filter(arrow_greater(arrow_add(dbl, 1), 3L)) %>%
368 select(string = chr, int, dbl) %>%
371 filter(dbl + 1 > 3L) %>%
372 select(string = chr, int, dbl)
376 test_that("filter() with .data pronoun", {
377 compare_dplyr_binding(
379 filter(.data$dbl > 4) %>%
380 select(.data$chr, .data$int, .data$lgl) %>%
385 compare_dplyr_binding(
387 filter(is.na(.data$lgl)) %>%
388 select(.data$chr, .data$int, .data$lgl) %>%
393 # and the .env pronoun too!
395 compare_dplyr_binding(
397 filter(.data$dbl > .env$chr) %>%
398 select(.data$chr, .data$int, .data$lgl) %>%
403 skip("test now faulty - code no longer gives error & outputs a empty tibble")
404 # but there is an error if we don't override the masking with `.env`
407 filter(.data$dbl > chr) %>%
408 select(.data$chr, .data$int, .data$lgl) %>%