ceph/src/arrow/r/R/dplyr-group-by.R

   1 # Licensed to the Apache Software Foundation (ASF) under one
   2 # or more contributor license agreements.  See the NOTICE file
   3 # distributed with this work for additional information
   4 # regarding copyright ownership.  The ASF licenses this file
   5 # to you under the Apache License, Version 2.0 (the
   6 # "License"); you may not use this file except in compliance
   7 # with the License.  You may obtain a copy of the License at
   8 #
   9 #   http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing,
  12 # software distributed under the License is distributed on an
  13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14 # KIND, either express or implied.  See the License for the
  15 # specific language governing permissions and limitations
  16 # under the License.
  17
  18
  19 # The following S3 methods are registered on load if dplyr is present
  20
  21 group_by.arrow_dplyr_query <- function(.data,
  22                                        ...,
  23                                        .add = FALSE,
  24                                        add = .add,
  25                                        .drop = dplyr::group_by_drop_default(.data)) {
  26   .data <- as_adq(.data)
  27   new_groups <- enquos(...)
  28   # ... can contain expressions (i.e. can add (or rename?) columns) and so we
  29   # need to identify those and add them on to the query with mutate. Specifically,
  30   # we want to mark as new:
  31   #   * expressions (named or otherwise)
  32   #   * variables that have new names
  33   # All others (i.e. simple references to variables) should not be (re)-added
  34
  35   # Identify any groups with names which aren't in names of .data
  36   new_group_ind <- map_lgl(new_groups, ~ !(quo_name(.x) %in% names(.data)))
  37   # Identify any groups which don't have names
  38   named_group_ind <- map_lgl(names(new_groups), nzchar)
  39   # Retain any new groups identified above
  40   new_groups <- new_groups[new_group_ind | named_group_ind]
  41   if (length(new_groups)) {
  42     # now either use the name that was given in ... or if that is "" then use the expr
  43     names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x), .y))
  44
  45     # Add them to the data
  46     .data <- dplyr::mutate(.data, !!!new_groups)
  47   }
  48   if (".add" %in% names(formals(dplyr::group_by))) {
  49     # For compatibility with dplyr >= 1.0
  50     gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
  51   } else {
  52     gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
  53   }
  54   .data$group_by_vars <- gv
  55   .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
  56   .data
  57 }
  58 group_by.Dataset <- group_by.ArrowTabular <- group_by.arrow_dplyr_query
  59
  60 groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
  61 groups.Dataset <- groups.ArrowTabular <- function(x) NULL
  62
  63 group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
  64 group_vars.Dataset <- function(x) NULL
  65 group_vars.RecordBatchReader <- function(x) NULL
  66 group_vars.ArrowTabular <- function(x) {
  67   x$r_metadata$attributes$.group_vars
  68 }
  69
  70 # the logical literal in the two functions below controls the default value of
  71 # the .drop argument to group_by()
  72 group_by_drop_default.arrow_dplyr_query <-
  73   function(.tbl) .tbl$drop_empty_groups %||% TRUE
  74 group_by_drop_default.Dataset <- group_by_drop_default.ArrowTabular <-
  75   function(.tbl) TRUE
  76
  77 ungroup.arrow_dplyr_query <- function(x, ...) {
  78   x$group_by_vars <- character()
  79   x$drop_empty_groups <- NULL
  80   x
  81 }
  82 ungroup.Dataset <- force
  83 ungroup.ArrowTabular <- function(x) {
  84   x$r_metadata$attributes$.group_vars <- NULL
  85   x
  86 }