[ceph.git] / ceph / src / arrow / r / tests / testthat / test-dplyr-funcs-type.R

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

skip_if_not_available("dataset")

library(dplyr, warn.conflicts = FALSE)
suppressPackageStartupMessages(library(bit64))


tbl <- example_data

test_that("explicit type conversions with cast()", {
  num_int32 <- 12L
  num_int64 <- bit64::as.integer64(10)

  int_types <- c(int8(), int16(), int32(), int64())
  uint_types <- c(uint8(), uint16(), uint32(), uint64())
  float_types <- c(float32(), float64())

  types <- c(
    int_types,
    uint_types,
    float_types,
    double(), # not actually a type, a base R function but should be alias for float64
    string()
  )

  for (type in types) {
    expect_type_equal(
      object = {
        t1 <- Table$create(x = num_int32) %>%
          transmute(x = cast(x, type)) %>%
          compute()
        t1$schema[[1]]$type
      },
      as_type(type)
    )
    expect_type_equal(
      object = {
        t1 <- Table$create(x = num_int64) %>%
          transmute(x = cast(x, type)) %>%
          compute()
        t1$schema[[1]]$type
      },
      as_type(type)
    )
  }

  # Arrow errors when truncating floats...
  expect_error(
    expect_type_equal(
      object = {
        t1 <- Table$create(pi = pi) %>%
          transmute(three = cast(pi, int32())) %>%
          compute()
        t1$schema[[1]]$type
      },
      int32()
    ),
    "truncated"
  )

  # ... unless safe = FALSE (or allow_float_truncate = TRUE)
  expect_type_equal(
    object = {
      t1 <- Table$create(pi = pi) %>%
        transmute(three = cast(pi, int32(), safe = FALSE)) %>%
        compute()
      t1$schema[[1]]$type
    },
    int32()
  )
})

test_that("explicit type conversions with as.*()", {
  library(bit64)
  compare_dplyr_binding(
    .input %>%
      transmute(
        int2chr = as.character(int),
        int2dbl = as.double(int),
        int2int = as.integer(int),
        int2num = as.numeric(int),
        dbl2chr = as.character(dbl),
        dbl2dbl = as.double(dbl),
        dbl2int = as.integer(dbl),
        dbl2num = as.numeric(dbl),
      ) %>%
      collect(),
    tbl
  )
  compare_dplyr_binding(
    .input %>%
      transmute(
        chr2chr = as.character(chr),
        chr2dbl = as.double(chr),
        chr2int = as.integer(chr),
        chr2num = as.numeric(chr)
      ) %>%
      collect(),
    tibble(chr = c("1", "2", "3"))
  )
  compare_dplyr_binding(
    .input %>%
      transmute(
        chr2i64 = as.integer64(chr),
        dbl2i64 = as.integer64(dbl),
        i642i64 = as.integer64(i64),
      ) %>%
      collect(),
    tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10))
  )
  compare_dplyr_binding(
    .input %>%
      transmute(
        chr2lgl = as.logical(chr),
        dbl2lgl = as.logical(dbl),
        int2lgl = as.logical(int)
      ) %>%
      collect(),
    tibble(
      chr = c("TRUE", "FALSE", "true", "false"),
      dbl = c(1, 0, -99, 0),
      int = c(1L, 0L, -99L, 0L)
    )
  )
  compare_dplyr_binding(
    .input %>%
      transmute(
        dbl2chr = as.character(dbl),
        dbl2dbl = as.double(dbl),
        dbl2int = as.integer(dbl),
        dbl2lgl = as.logical(dbl),
        int2chr = as.character(int),
        int2dbl = as.double(int),
        int2int = as.integer(int),
        int2lgl = as.logical(int),
        lgl2chr = as.character(lgl), # Arrow returns "true", "false" here ...
        lgl2dbl = as.double(lgl),
        lgl2int = as.integer(lgl),
        lgl2lgl = as.logical(lgl)
      ) %>%
      collect() %>%
      # need to use toupper() *after* collect() or else skip if utf8proc not available
      mutate(lgl2chr = toupper(lgl2chr)), # ... but we need "TRUE", "FALSE"
    tibble(
      dbl = c(1, 0, NA_real_),
      int = c(1L, 0L, NA_integer_),
      lgl = c(TRUE, FALSE, NA)
    )
  )
})

test_that("is.finite(), is.infinite(), is.nan()", {
  df <- tibble(x = c(
    -4.94065645841246544e-324, 1.79769313486231570e+308, 0,
    NA_real_, NaN, Inf, -Inf
  ))
  compare_dplyr_binding(
    .input %>%
      transmute(
        is_fin = is.finite(x),
        is_inf = is.infinite(x)
      ) %>%
      collect(),
    df
  )
  # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850)
  compare_dplyr_binding(
    .input %>%
      transmute(
        is_nan = is.nan(x)
      ) %>%
      collect(),
    df
  )
})

test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", {
  df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7))
  compare_dplyr_binding(
    .input %>%
      transmute(
        is_na = is.na(x)
      ) %>%
      collect(),
    df
  )
})

test_that("type checks with is() giving Arrow types", {
  # with class2=DataType
  expect_equal(
    Table$create(
      i32 = Array$create(1, int32()),
      dec = Array$create(pi)$cast(decimal(3, 2)),
      f64 = Array$create(1.1, float64()),
      str = Array$create("a", arrow::string())
    ) %>% transmute(
      i32_is_i32 = is(i32, int32()),
      i32_is_dec = is(i32, decimal(3, 2)),
      i32_is_i64 = is(i32, float64()),
      i32_is_str = is(i32, arrow::string()),
      dec_is_i32 = is(dec, int32()),
      dec_is_dec = is(dec, decimal(3, 2)),
      dec_is_i64 = is(dec, float64()),
      dec_is_str = is(dec, arrow::string()),
      f64_is_i32 = is(f64, int32()),
      f64_is_dec = is(f64, decimal(3, 2)),
      f64_is_i64 = is(f64, float64()),
      f64_is_str = is(f64, arrow::string()),
      str_is_i32 = is(str, int32()),
      str_is_dec = is(str, decimal(3, 2)),
      str_is_i64 = is(str, float64()),
      str_is_str = is(str, arrow::string())
    ) %>%
      collect() %>%
      t() %>%
      as.vector(),
    c(
      TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,
      FALSE, FALSE, FALSE, FALSE, TRUE
    )
  )
  # with class2=string
  expect_equal(
    Table$create(
      i32 = Array$create(1, int32()),
      f64 = Array$create(1.1, float64()),
      str = Array$create("a", arrow::string())
    ) %>% transmute(
      i32_is_i32 = is(i32, "int32"),
      i32_is_i64 = is(i32, "double"),
      i32_is_str = is(i32, "string"),
      f64_is_i32 = is(f64, "int32"),
      f64_is_i64 = is(f64, "double"),
      f64_is_str = is(f64, "string"),
      str_is_i32 = is(str, "int32"),
      str_is_i64 = is(str, "double"),
      str_is_str = is(str, "string")
    ) %>%
      collect() %>%
      t() %>%
      as.vector(),
    c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE)
  )
  # with class2=string alias
  expect_equal(
    Table$create(
      f16 = Array$create(NA_real_, halffloat()),
      f32 = Array$create(1.1, float()),
      f64 = Array$create(2.2, float64()),
      lgl = Array$create(TRUE, bool()),
      str = Array$create("a", arrow::string())
    ) %>% transmute(
      f16_is_f16 = is(f16, "float16"),
      f16_is_f32 = is(f16, "float32"),
      f16_is_f64 = is(f16, "float64"),
      f16_is_lgl = is(f16, "boolean"),
      f16_is_str = is(f16, "utf8"),
      f32_is_f16 = is(f32, "float16"),
      f32_is_f32 = is(f32, "float32"),
      f32_is_f64 = is(f32, "float64"),
      f32_is_lgl = is(f32, "boolean"),
      f32_is_str = is(f32, "utf8"),
      f64_is_f16 = is(f64, "float16"),
      f64_is_f32 = is(f64, "float32"),
      f64_is_f64 = is(f64, "float64"),
      f64_is_lgl = is(f64, "boolean"),
      f64_is_str = is(f64, "utf8"),
      lgl_is_f16 = is(lgl, "float16"),
      lgl_is_f32 = is(lgl, "float32"),
      lgl_is_f64 = is(lgl, "float64"),
      lgl_is_lgl = is(lgl, "boolean"),
      lgl_is_str = is(lgl, "utf8"),
      str_is_f16 = is(str, "float16"),
      str_is_f32 = is(str, "float32"),
      str_is_f64 = is(str, "float64"),
      str_is_lgl = is(str, "boolean"),
      str_is_str = is(str, "utf8")
    ) %>%
      collect() %>%
      t() %>%
      as.vector(),
    c(
      TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
      FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
      FALSE, FALSE, TRUE
    )
  )
})

test_that("type checks with is() giving R types", {
  library(bit64)
  compare_dplyr_binding(
    .input %>%
      transmute(
        chr_is_chr = is(chr, "character"),
        chr_is_fct = is(chr, "factor"),
        chr_is_int = is(chr, "integer"),
        chr_is_i64 = is(chr, "integer64"),
        chr_is_lst = is(chr, "list"),
        chr_is_lgl = is(chr, "logical"),
        chr_is_num = is(chr, "numeric"),
        dbl_is_chr = is(dbl, "character"),
        dbl_is_fct = is(dbl, "factor"),
        dbl_is_int = is(dbl, "integer"),
        dbl_is_i64 = is(dbl, "integer64"),
        dbl_is_lst = is(dbl, "list"),
        dbl_is_lgl = is(dbl, "logical"),
        dbl_is_num = is(dbl, "numeric"),
        fct_is_chr = is(fct, "character"),
        fct_is_fct = is(fct, "factor"),
        fct_is_int = is(fct, "integer"),
        fct_is_i64 = is(fct, "integer64"),
        fct_is_lst = is(fct, "list"),
        fct_is_lgl = is(fct, "logical"),
        fct_is_num = is(fct, "numeric"),
        int_is_chr = is(int, "character"),
        int_is_fct = is(int, "factor"),
        int_is_int = is(int, "integer"),
        int_is_i64 = is(int, "integer64"),
        int_is_lst = is(int, "list"),
        int_is_lgl = is(int, "logical"),
        int_is_num = is(int, "numeric"),
        lgl_is_chr = is(lgl, "character"),
        lgl_is_fct = is(lgl, "factor"),
        lgl_is_int = is(lgl, "integer"),
        lgl_is_i64 = is(lgl, "integer64"),
        lgl_is_lst = is(lgl, "list"),
        lgl_is_lgl = is(lgl, "logical"),
        lgl_is_num = is(lgl, "numeric")
      ) %>%
      collect(),
    tbl
  )
  compare_dplyr_binding(
    .input %>%
      transmute(
        i64_is_chr = is(i64, "character"),
        i64_is_fct = is(i64, "factor"),
        # we want Arrow to return TRUE, but bit64 returns FALSE
        # i64_is_int = is(i64, "integer"),
        i64_is_i64 = is(i64, "integer64"),
        i64_is_lst = is(i64, "list"),
        i64_is_lgl = is(i64, "logical"),
        # we want Arrow to return TRUE, but bit64 returns FALSE
        # i64_is_num = is(i64, "numeric"),
        lst_is_chr = is(lst, "character"),
        lst_is_fct = is(lst, "factor"),
        lst_is_int = is(lst, "integer"),
        lst_is_i64 = is(lst, "integer64"),
        lst_is_lst = is(lst, "list"),
        lst_is_lgl = is(lst, "logical"),
        lst_is_num = is(lst, "numeric")
      ) %>%
      collect(),
    tibble(
      i64 = as.integer64(1:3),
      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
    )
  )
})

test_that("type checks with is.*()", {
  library(bit64)
  compare_dplyr_binding(
    .input %>%
      transmute(
        chr_is_chr = is.character(chr),
        chr_is_dbl = is.double(chr),
        chr_is_fct = is.factor(chr),
        chr_is_int = is.integer(chr),
        chr_is_i64 = is.integer64(chr),
        chr_is_lst = is.list(chr),
        chr_is_lgl = is.logical(chr),
        chr_is_num = is.numeric(chr),
        dbl_is_chr = is.character(dbl),
        dbl_is_dbl = is.double(dbl),
        dbl_is_fct = is.factor(dbl),
        dbl_is_int = is.integer(dbl),
        dbl_is_i64 = is.integer64(dbl),
        dbl_is_lst = is.list(dbl),
        dbl_is_lgl = is.logical(dbl),
        dbl_is_num = is.numeric(dbl),
        fct_is_chr = is.character(fct),
        fct_is_dbl = is.double(fct),
        fct_is_fct = is.factor(fct),
        fct_is_int = is.integer(fct),
        fct_is_i64 = is.integer64(fct),
        fct_is_lst = is.list(fct),
        fct_is_lgl = is.logical(fct),
        fct_is_num = is.numeric(fct),
        int_is_chr = is.character(int),
        int_is_dbl = is.double(int),
        int_is_fct = is.factor(int),
        int_is_int = is.integer(int),
        int_is_i64 = is.integer64(int),
        int_is_lst = is.list(int),
        int_is_lgl = is.logical(int),
        int_is_num = is.numeric(int),
        lgl_is_chr = is.character(lgl),
        lgl_is_dbl = is.double(lgl),
        lgl_is_fct = is.factor(lgl),
        lgl_is_int = is.integer(lgl),
        lgl_is_i64 = is.integer64(lgl),
        lgl_is_lst = is.list(lgl),
        lgl_is_lgl = is.logical(lgl),
        lgl_is_num = is.numeric(lgl)
      ) %>%
      collect(),
    tbl
  )
  compare_dplyr_binding(
    .input %>%
      transmute(
        i64_is_chr = is.character(i64),
        # TODO: investigate why this is not matching when testthat runs it
        # i64_is_dbl = is.double(i64),
        i64_is_fct = is.factor(i64),
        # we want Arrow to return TRUE, but bit64 returns FALSE
        # i64_is_int = is.integer(i64),
        i64_is_i64 = is.integer64(i64),
        i64_is_lst = is.list(i64),
        i64_is_lgl = is.logical(i64),
        i64_is_num = is.numeric(i64),
        lst_is_chr = is.character(lst),
        lst_is_dbl = is.double(lst),
        lst_is_fct = is.factor(lst),
        lst_is_int = is.integer(lst),
        lst_is_i64 = is.integer64(lst),
        lst_is_lst = is.list(lst),
        lst_is_lgl = is.logical(lst),
        lst_is_num = is.numeric(lst)
      ) %>%
      collect(),
    tibble(
      i64 = as.integer64(1:3),
      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
    )
  )
})

test_that("type checks with is_*()", {
  library(rlang, warn.conflicts = FALSE)
  compare_dplyr_binding(
    .input %>%
      transmute(
        chr_is_chr = is_character(chr),
        chr_is_dbl = is_double(chr),
        chr_is_int = is_integer(chr),
        chr_is_lst = is_list(chr),
        chr_is_lgl = is_logical(chr),
        dbl_is_chr = is_character(dbl),
        dbl_is_dbl = is_double(dbl),
        dbl_is_int = is_integer(dbl),
        dbl_is_lst = is_list(dbl),
        dbl_is_lgl = is_logical(dbl),
        int_is_chr = is_character(int),
        int_is_dbl = is_double(int),
        int_is_int = is_integer(int),
        int_is_lst = is_list(int),
        int_is_lgl = is_logical(int),
        lgl_is_chr = is_character(lgl),
        lgl_is_dbl = is_double(lgl),
        lgl_is_int = is_integer(lgl),
        lgl_is_lst = is_list(lgl),
        lgl_is_lgl = is_logical(lgl)
      ) %>%
      collect(),
    tbl
  )
})

test_that("type checks on expressions", {
  compare_dplyr_binding(
    .input %>%
      transmute(
        a = is.character(as.character(int)),
        b = is.integer(as.character(int)),
        c = is.integer(int + int),
        d = is.double(int + dbl),
        e = is.logical(dbl > pi)
      ) %>%
      collect(),
    tbl
  )

  # the code in the expectation below depends on RE2
  skip_if_not_available("re2")

  compare_dplyr_binding(
    .input %>%
      transmute(
        a = is.logical(grepl("[def]", chr))
      ) %>%
      collect(),
    tbl
  )
})

test_that("type checks on R scalar literals", {
  compare_dplyr_binding(
    .input %>%
      transmute(
        chr_is_chr = is.character("foo"),
        int_is_chr = is.character(42L),
        int_is_int = is.integer(42L),
        chr_is_int = is.integer("foo"),
        dbl_is_num = is.numeric(3.14159),
        int_is_num = is.numeric(42L),
        chr_is_num = is.numeric("foo"),
        dbl_is_dbl = is.double(3.14159),
        chr_is_dbl = is.double("foo"),
        lgl_is_lgl = is.logical(TRUE),
        chr_is_lgl = is.logical("foo"),
        fct_is_fct = is.factor(factor("foo", levels = c("foo", "bar", "baz"))),
        chr_is_fct = is.factor("foo"),
        lst_is_lst = is.list(list(c(a = "foo", b = "bar"))),
        chr_is_lst = is.list("foo")
      ) %>%
      collect(),
    tbl
  )
})

test_that("as.factor()/dictionary_encode()", {
  skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression")
  df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
  df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8))

  compare_dplyr_binding(
    .input %>%
      transmute(x = as.factor(x)) %>%
      collect(),
    df1
  )

  expect_warning(
    compare_dplyr_binding(
      .input %>%
        transmute(x = as.factor(x)) %>%
        collect(),
      df2
    ),
    "Coercing dictionary values to R character factor levels"
  )

  # dictionary values with default null encoding behavior ("mask") omits
  # nulls from the dictionary values
  expect_equal(
    object = {
      rb1 <- df1 %>%
        record_batch() %>%
        transmute(x = dictionary_encode(x)) %>%
        compute()
      dict <- rb1$x$dictionary()
      as.vector(dict$Take(dict$SortIndices()))
    },
    sort(unique(df1$x), na.last = NA)
  )

  # dictionary values with "encode" null encoding behavior includes nulls in
  # the dictionary values
  expect_equal(
    object = {
      rb1 <- df1 %>%
        record_batch() %>%
        transmute(x = dictionary_encode(x, null_encoding_behavior = "encode")) %>%
        compute()
      dict <- rb1$x$dictionary()
      as.vector(dict$Take(dict$SortIndices()))
    },
    sort(unique(df1$x), na.last = TRUE)
  )
})

test_that("bad explicit type conversions with as.*()", {

  # Arrow returns lowercase "true", "false" (instead of "TRUE", "FALSE" like R)
  expect_error(
    compare_dplyr_binding(
      .input %>%
        transmute(lgl2chr = as.character(lgl)) %>%
        collect(),
      tibble(lgl = c(TRUE, FALSE, NA))
    )
  )

  # Arrow fails to parse these strings as numbers (instead of returning NAs with
  # a warning like R does)
  expect_error(
    expect_warning(
      compare_dplyr_binding(
        .input %>%
          transmute(chr2num = as.numeric(chr)) %>%
          collect(),
        tibble(chr = c("l.O", "S.S", ""))
      )
    )
  )

  # Arrow fails to parse these strings as Booleans (instead of returning NAs
  # like R does)
  expect_error(
    compare_dplyr_binding(
      .input %>%
        transmute(chr2lgl = as.logical(chr)) %>%
        collect(),
      tibble(chr = c("TRU", "FAX", ""))
    )
  )
})
Commit	Line	Data
1d09f67e TL	1	# Licensed to the Apache Software Foundation (ASF) under one
	2	# or more contributor license agreements. See the NOTICE file
	3	# distributed with this work for additional information
	4	# regarding copyright ownership. The ASF licenses this file
	5	# to you under the Apache License, Version 2.0 (the
	6	# "License"); you may not use this file except in compliance
	7	# with the License. You may obtain a copy of the License at
	8	#
	9	# http://www.apache.org/licenses/LICENSE-2.0
	10	#
	11	# Unless required by applicable law or agreed to in writing,
	12	# software distributed under the License is distributed on an
	13	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	14	# KIND, either express or implied. See the License for the
	15	# specific language governing permissions and limitations
	16	# under the License.
	17
	18	skip_if_not_available("dataset")
	19
	20	library(dplyr, warn.conflicts = FALSE)
	21	suppressPackageStartupMessages(library(bit64))
	22
	23
	24	tbl <- example_data
	25
	26	test_that("explicit type conversions with cast()", {
	27	num_int32 <- 12L
	28	num_int64 <- bit64::as.integer64(10)
	29
	30	int_types <- c(int8(), int16(), int32(), int64())
	31	uint_types <- c(uint8(), uint16(), uint32(), uint64())
	32	float_types <- c(float32(), float64())
	33
	34	types <- c(
	35	int_types,
	36	uint_types,
	37	float_types,
	38	double(), # not actually a type, a base R function but should be alias for float64
	39	string()
	40	)
	41
	42	for (type in types) {
	43	expect_type_equal(
	44	object = {
	45	t1 <- Table$create(x = num_int32) %>%
	46	transmute(x = cast(x, type)) %>%
	47	compute()
	48	t1$schema[[1]]$type
	49	},
	50	as_type(type)
	51	)
	52	expect_type_equal(
	53	object = {
	54	t1 <- Table$create(x = num_int64) %>%
	55	transmute(x = cast(x, type)) %>%
	56	compute()
	57	t1$schema[[1]]$type
	58	},
	59	as_type(type)
	60	)
	61	}
	62
	63	# Arrow errors when truncating floats...
	64	expect_error(
65	expect_type_equal(
66	object = {
67	t1 <- Table$create(pi = pi) %>%
68	transmute(three = cast(pi, int32())) %>%
69	compute()
70	t1$schema[[1]]$type
71	},
72	int32()
73	),
74	"truncated"
75	)
76
77	# ... unless safe = FALSE (or allow_float_truncate = TRUE)
78	expect_type_equal(
79	object = {
80	t1 <- Table$create(pi = pi) %>%
81	transmute(three = cast(pi, int32(), safe = FALSE)) %>%
82	compute()
83	t1$schema[[1]]$type
84	},
85	int32()
86	)
87	})
88
89	test_that("explicit type conversions with as.*()", {
90	library(bit64)
91	compare_dplyr_binding(
92	.input %>%
93	transmute(
94	int2chr = as.character(int),
95	int2dbl = as.double(int),
96	int2int = as.integer(int),
97	int2num = as.numeric(int),
98	dbl2chr = as.character(dbl),
99	dbl2dbl = as.double(dbl),
100	dbl2int = as.integer(dbl),
101	dbl2num = as.numeric(dbl),
102	) %>%
103	collect(),
104	tbl
105	)
106	compare_dplyr_binding(
107	.input %>%
108	transmute(
109	chr2chr = as.character(chr),
110	chr2dbl = as.double(chr),
111	chr2int = as.integer(chr),
112	chr2num = as.numeric(chr)
113	) %>%
114	collect(),
115	tibble(chr = c("1", "2", "3"))
116	)
117	compare_dplyr_binding(
118	.input %>%
119	transmute(
120	chr2i64 = as.integer64(chr),
121	dbl2i64 = as.integer64(dbl),
122	i642i64 = as.integer64(i64),
123	) %>%
124	collect(),
125	tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10))
126	)
127	compare_dplyr_binding(
128	.input %>%
129	transmute(
130	chr2lgl = as.logical(chr),
131	dbl2lgl = as.logical(dbl),
132	int2lgl = as.logical(int)
133	) %>%
134	collect(),
135	tibble(
136	chr = c("TRUE", "FALSE", "true", "false"),
137	dbl = c(1, 0, -99, 0),
138	int = c(1L, 0L, -99L, 0L)
139	)
140	)
141	compare_dplyr_binding(
142	.input %>%
143	transmute(
144	dbl2chr = as.character(dbl),
145	dbl2dbl = as.double(dbl),
146	dbl2int = as.integer(dbl),
147	dbl2lgl = as.logical(dbl),
148	int2chr = as.character(int),
149	int2dbl = as.double(int),
150	int2int = as.integer(int),
151	int2lgl = as.logical(int),
152	lgl2chr = as.character(lgl), # Arrow returns "true", "false" here ...
153	lgl2dbl = as.double(lgl),
154	lgl2int = as.integer(lgl),
155	lgl2lgl = as.logical(lgl)
156	) %>%
157	collect() %>%
158	# need to use toupper() after collect() or else skip if utf8proc not available
159	mutate(lgl2chr = toupper(lgl2chr)), # ... but we need "TRUE", "FALSE"
160	tibble(
161	dbl = c(1, 0, NA_real_),
162	int = c(1L, 0L, NA_integer_),
163	lgl = c(TRUE, FALSE, NA)
164	)
165	)
166	})
167
168	test_that("is.finite(), is.infinite(), is.nan()", {
169	df <- tibble(x = c(
170	-4.94065645841246544e-324, 1.79769313486231570e+308, 0,
171	NA_real_, NaN, Inf, -Inf
172	))
173	compare_dplyr_binding(
174	.input %>%
175	transmute(
176	is_fin = is.finite(x),
177	is_inf = is.infinite(x)
178	) %>%
179	collect(),
180	df
181	)
182	# is.nan() evaluates to FALSE on NA_real_ (ARROW-12850)
183	compare_dplyr_binding(
184	.input %>%
185	transmute(
186	is_nan = is.nan(x)
187	) %>%
188	collect(),
189	df
190	)
191	})
192
193	test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", {
194	df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7))
195	compare_dplyr_binding(
196	.input %>%
197	transmute(
198	is_na = is.na(x)
199	) %>%
200	collect(),
201	df
202	)
203	})
204
205	test_that("type checks with is() giving Arrow types", {
206	# with class2=DataType
207	expect_equal(
208	Table$create(
209	i32 = Array$create(1, int32()),
210	dec = Array$create(pi)$cast(decimal(3, 2)),
211	f64 = Array$create(1.1, float64()),
212	str = Array$create("a", arrow::string())
213	) %>% transmute(
214	i32_is_i32 = is(i32, int32()),
215	i32_is_dec = is(i32, decimal(3, 2)),
216	i32_is_i64 = is(i32, float64()),
217	i32_is_str = is(i32, arrow::string()),
218	dec_is_i32 = is(dec, int32()),
219	dec_is_dec = is(dec, decimal(3, 2)),
220	dec_is_i64 = is(dec, float64()),
221	dec_is_str = is(dec, arrow::string()),
222	f64_is_i32 = is(f64, int32()),
223	f64_is_dec = is(f64, decimal(3, 2)),
224	f64_is_i64 = is(f64, float64()),
225	f64_is_str = is(f64, arrow::string()),
226	str_is_i32 = is(str, int32()),
227	str_is_dec = is(str, decimal(3, 2)),
228	str_is_i64 = is(str, float64()),
229	str_is_str = is(str, arrow::string())
230	) %>%
231	collect() %>%
232	t() %>%
233	as.vector(),
234	c(
235	TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,
236	FALSE, FALSE, FALSE, FALSE, TRUE
237	)
238	)
239	# with class2=string
240	expect_equal(
241	Table$create(
242	i32 = Array$create(1, int32()),
243	f64 = Array$create(1.1, float64()),
244	str = Array$create("a", arrow::string())
245	) %>% transmute(
246	i32_is_i32 = is(i32, "int32"),
247	i32_is_i64 = is(i32, "double"),
248	i32_is_str = is(i32, "string"),
249	f64_is_i32 = is(f64, "int32"),
250	f64_is_i64 = is(f64, "double"),
251	f64_is_str = is(f64, "string"),
252	str_is_i32 = is(str, "int32"),
253	str_is_i64 = is(str, "double"),
254	str_is_str = is(str, "string")
255	) %>%
256	collect() %>%
257	t() %>%
258	as.vector(),
259	c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE)
260	)
261	# with class2=string alias
262	expect_equal(
263	Table$create(
264	f16 = Array$create(NA_real_, halffloat()),
265	f32 = Array$create(1.1, float()),
266	f64 = Array$create(2.2, float64()),
267	lgl = Array$create(TRUE, bool()),
268	str = Array$create("a", arrow::string())
269	) %>% transmute(
270	f16_is_f16 = is(f16, "float16"),
271	f16_is_f32 = is(f16, "float32"),
272	f16_is_f64 = is(f16, "float64"),
273	f16_is_lgl = is(f16, "boolean"),
274	f16_is_str = is(f16, "utf8"),
275	f32_is_f16 = is(f32, "float16"),
276	f32_is_f32 = is(f32, "float32"),
277	f32_is_f64 = is(f32, "float64"),
278	f32_is_lgl = is(f32, "boolean"),
279	f32_is_str = is(f32, "utf8"),
280	f64_is_f16 = is(f64, "float16"),
281	f64_is_f32 = is(f64, "float32"),
282	f64_is_f64 = is(f64, "float64"),
283	f64_is_lgl = is(f64, "boolean"),
284	f64_is_str = is(f64, "utf8"),
285	lgl_is_f16 = is(lgl, "float16"),
286	lgl_is_f32 = is(lgl, "float32"),
287	lgl_is_f64 = is(lgl, "float64"),
288	lgl_is_lgl = is(lgl, "boolean"),
289	lgl_is_str = is(lgl, "utf8"),
290	str_is_f16 = is(str, "float16"),
291	str_is_f32 = is(str, "float32"),
292	str_is_f64 = is(str, "float64"),
293	str_is_lgl = is(str, "boolean"),
294	str_is_str = is(str, "utf8")
295	) %>%
296	collect() %>%
297	t() %>%
298	as.vector(),
299	c(
300	TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
301	FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
302	FALSE, FALSE, TRUE
303	)
304	)
305	})
306
307	test_that("type checks with is() giving R types", {
308	library(bit64)
309	compare_dplyr_binding(
310	.input %>%
311	transmute(
312	chr_is_chr = is(chr, "character"),
313	chr_is_fct = is(chr, "factor"),
314	chr_is_int = is(chr, "integer"),
315	chr_is_i64 = is(chr, "integer64"),
316	chr_is_lst = is(chr, "list"),
317	chr_is_lgl = is(chr, "logical"),
318	chr_is_num = is(chr, "numeric"),
319	dbl_is_chr = is(dbl, "character"),
320	dbl_is_fct = is(dbl, "factor"),
321	dbl_is_int = is(dbl, "integer"),
322	dbl_is_i64 = is(dbl, "integer64"),
323	dbl_is_lst = is(dbl, "list"),
324	dbl_is_lgl = is(dbl, "logical"),
325	dbl_is_num = is(dbl, "numeric"),
326	fct_is_chr = is(fct, "character"),
327	fct_is_fct = is(fct, "factor"),
328	fct_is_int = is(fct, "integer"),
329	fct_is_i64 = is(fct, "integer64"),
330	fct_is_lst = is(fct, "list"),
331	fct_is_lgl = is(fct, "logical"),
332	fct_is_num = is(fct, "numeric"),
333	int_is_chr = is(int, "character"),
334	int_is_fct = is(int, "factor"),
335	int_is_int = is(int, "integer"),
336	int_is_i64 = is(int, "integer64"),
337	int_is_lst = is(int, "list"),
338	int_is_lgl = is(int, "logical"),
339	int_is_num = is(int, "numeric"),
340	lgl_is_chr = is(lgl, "character"),
341	lgl_is_fct = is(lgl, "factor"),
342	lgl_is_int = is(lgl, "integer"),
343	lgl_is_i64 = is(lgl, "integer64"),
344	lgl_is_lst = is(lgl, "list"),
345	lgl_is_lgl = is(lgl, "logical"),
346	lgl_is_num = is(lgl, "numeric")
347	) %>%
348	collect(),
349	tbl
350	)
351	compare_dplyr_binding(
352	.input %>%
353	transmute(
354	i64_is_chr = is(i64, "character"),
355	i64_is_fct = is(i64, "factor"),
356	# we want Arrow to return TRUE, but bit64 returns FALSE
357	# i64_is_int = is(i64, "integer"),
358	i64_is_i64 = is(i64, "integer64"),
359	i64_is_lst = is(i64, "list"),
360	i64_is_lgl = is(i64, "logical"),
361	# we want Arrow to return TRUE, but bit64 returns FALSE
362	# i64_is_num = is(i64, "numeric"),
363	lst_is_chr = is(lst, "character"),
364	lst_is_fct = is(lst, "factor"),
365	lst_is_int = is(lst, "integer"),
366	lst_is_i64 = is(lst, "integer64"),
367	lst_is_lst = is(lst, "list"),
368	lst_is_lgl = is(lst, "logical"),
369	lst_is_num = is(lst, "numeric")
370	) %>%
371	collect(),
372	tibble(
373	i64 = as.integer64(1:3),
374	lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
375	)
376	)
377	})
378
379	test_that("type checks with is.*()", {
380	library(bit64)
381	compare_dplyr_binding(
382	.input %>%
383	transmute(
384	chr_is_chr = is.character(chr),
385	chr_is_dbl = is.double(chr),
386	chr_is_fct = is.factor(chr),
387	chr_is_int = is.integer(chr),
388	chr_is_i64 = is.integer64(chr),
389	chr_is_lst = is.list(chr),
390	chr_is_lgl = is.logical(chr),
391	chr_is_num = is.numeric(chr),
392	dbl_is_chr = is.character(dbl),
393	dbl_is_dbl = is.double(dbl),
394	dbl_is_fct = is.factor(dbl),
395	dbl_is_int = is.integer(dbl),
396	dbl_is_i64 = is.integer64(dbl),
397	dbl_is_lst = is.list(dbl),
398	dbl_is_lgl = is.logical(dbl),
399	dbl_is_num = is.numeric(dbl),
400	fct_is_chr = is.character(fct),
401	fct_is_dbl = is.double(fct),
402	fct_is_fct = is.factor(fct),
403	fct_is_int = is.integer(fct),
404	fct_is_i64 = is.integer64(fct),
405	fct_is_lst = is.list(fct),
406	fct_is_lgl = is.logical(fct),
407	fct_is_num = is.numeric(fct),
408	int_is_chr = is.character(int),
409	int_is_dbl = is.double(int),
410	int_is_fct = is.factor(int),
411	int_is_int = is.integer(int),
412	int_is_i64 = is.integer64(int),
413	int_is_lst = is.list(int),
414	int_is_lgl = is.logical(int),
415	int_is_num = is.numeric(int),
416	lgl_is_chr = is.character(lgl),
417	lgl_is_dbl = is.double(lgl),
418	lgl_is_fct = is.factor(lgl),
419	lgl_is_int = is.integer(lgl),
420	lgl_is_i64 = is.integer64(lgl),
421	lgl_is_lst = is.list(lgl),
422	lgl_is_lgl = is.logical(lgl),
423	lgl_is_num = is.numeric(lgl)
424	) %>%
425	collect(),
426	tbl
427	)
428	compare_dplyr_binding(
429	.input %>%
430	transmute(
431	i64_is_chr = is.character(i64),
432	# TODO: investigate why this is not matching when testthat runs it
433	# i64_is_dbl = is.double(i64),
434	i64_is_fct = is.factor(i64),
435	# we want Arrow to return TRUE, but bit64 returns FALSE
436	# i64_is_int = is.integer(i64),
437	i64_is_i64 = is.integer64(i64),
438	i64_is_lst = is.list(i64),
439	i64_is_lgl = is.logical(i64),
440	i64_is_num = is.numeric(i64),
441	lst_is_chr = is.character(lst),
442	lst_is_dbl = is.double(lst),
443	lst_is_fct = is.factor(lst),
444	lst_is_int = is.integer(lst),
445	lst_is_i64 = is.integer64(lst),
446	lst_is_lst = is.list(lst),
447	lst_is_lgl = is.logical(lst),
448	lst_is_num = is.numeric(lst)
449	) %>%
450	collect(),
451	tibble(
452	i64 = as.integer64(1:3),
453	lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
454	)
455	)
456	})
457
458	test_that("type checks with is_*()", {
459	library(rlang, warn.conflicts = FALSE)
460	compare_dplyr_binding(
461	.input %>%
462	transmute(
463	chr_is_chr = is_character(chr),
464	chr_is_dbl = is_double(chr),
465	chr_is_int = is_integer(chr),
466	chr_is_lst = is_list(chr),
467	chr_is_lgl = is_logical(chr),
468	dbl_is_chr = is_character(dbl),
469	dbl_is_dbl = is_double(dbl),
470	dbl_is_int = is_integer(dbl),
471	dbl_is_lst = is_list(dbl),
472	dbl_is_lgl = is_logical(dbl),
473	int_is_chr = is_character(int),
474	int_is_dbl = is_double(int),
475	int_is_int = is_integer(int),
476	int_is_lst = is_list(int),
477	int_is_lgl = is_logical(int),
478	lgl_is_chr = is_character(lgl),
479	lgl_is_dbl = is_double(lgl),
480	lgl_is_int = is_integer(lgl),
481	lgl_is_lst = is_list(lgl),
482	lgl_is_lgl = is_logical(lgl)
483	) %>%
484	collect(),
485	tbl
486	)
487	})
488
489	test_that("type checks on expressions", {
490	compare_dplyr_binding(
491	.input %>%
492	transmute(
493	a = is.character(as.character(int)),
494	b = is.integer(as.character(int)),
495	c = is.integer(int + int),
496	d = is.double(int + dbl),
497	e = is.logical(dbl > pi)
498	) %>%
499	collect(),
500	tbl
501	)
502
503	# the code in the expectation below depends on RE2
504	skip_if_not_available("re2")
505
506	compare_dplyr_binding(
507	.input %>%
508	transmute(
509	a = is.logical(grepl("[def]", chr))
510	) %>%
511	collect(),
512	tbl
513	)
514	})
515
516	test_that("type checks on R scalar literals", {
517	compare_dplyr_binding(
518	.input %>%
519	transmute(
520	chr_is_chr = is.character("foo"),
521	int_is_chr = is.character(42L),
522	int_is_int = is.integer(42L),
523	chr_is_int = is.integer("foo"),
524	dbl_is_num = is.numeric(3.14159),
525	int_is_num = is.numeric(42L),
526	chr_is_num = is.numeric("foo"),
527	dbl_is_dbl = is.double(3.14159),
528	chr_is_dbl = is.double("foo"),
529	lgl_is_lgl = is.logical(TRUE),
530	chr_is_lgl = is.logical("foo"),
531	fct_is_fct = is.factor(factor("foo", levels = c("foo", "bar", "baz"))),
532	chr_is_fct = is.factor("foo"),
533	lst_is_lst = is.list(list(c(a = "foo", b = "bar"))),
534	chr_is_lst = is.list("foo")
535	) %>%
536	collect(),
537	tbl
538	)
539	})
540
541	test_that("as.factor()/dictionary_encode()", {
542	skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression")
543	df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
544	df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8))
545
546	compare_dplyr_binding(
547	.input %>%
548	transmute(x = as.factor(x)) %>%
549	collect(),
550	df1
551	)
552
553	expect_warning(
554	compare_dplyr_binding(
555	.input %>%
556	transmute(x = as.factor(x)) %>%
557	collect(),
558	df2
559	),
560	"Coercing dictionary values to R character factor levels"
561	)
562
563	# dictionary values with default null encoding behavior ("mask") omits
564	# nulls from the dictionary values
565	expect_equal(
566	object = {
567	rb1 <- df1 %>%
568	record_batch() %>%
569	transmute(x = dictionary_encode(x)) %>%
570	compute()
571	dict <- rb1$x$dictionary()
572	as.vector(dict$Take(dict$SortIndices()))
573	},
574	sort(unique(df1$x), na.last = NA)
575	)
576
577	# dictionary values with "encode" null encoding behavior includes nulls in
578	# the dictionary values
579	expect_equal(
580	object = {
581	rb1 <- df1 %>%
582	record_batch() %>%
583	transmute(x = dictionary_encode(x, null_encoding_behavior = "encode")) %>%
584	compute()
585	dict <- rb1$x$dictionary()
586	as.vector(dict$Take(dict$SortIndices()))
587	},
588	sort(unique(df1$x), na.last = TRUE)
589	)
590	})
591
592	test_that("bad explicit type conversions with as.*()", {
593
594	# Arrow returns lowercase "true", "false" (instead of "TRUE", "FALSE" like R)
595	expect_error(
596	compare_dplyr_binding(
597	.input %>%
598	transmute(lgl2chr = as.character(lgl)) %>%
599	collect(),
600	tibble(lgl = c(TRUE, FALSE, NA))
601	)
602	)
603
604	# Arrow fails to parse these strings as numbers (instead of returning NAs with
605	# a warning like R does)
606	expect_error(
607	expect_warning(
608	compare_dplyr_binding(
609	.input %>%
610	transmute(chr2num = as.numeric(chr)) %>%
611	collect(),
612	tibble(chr = c("l.O", "S.S", ""))
613	)
614	)
615	)
616
617	# Arrow fails to parse these strings as Booleans (instead of returning NAs
618	# like R does)
619	expect_error(
620	compare_dplyr_binding(
621	.input %>%
622	transmute(chr2lgl = as.logical(chr)) %>%
623	collect(),
624	tibble(chr = c("TRU", "FAX", ""))
625	)
626	)
627	})