From 6d437e11d4acfef8fa194e6977e5b6cf196af965 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Tue, 27 Aug 2024 16:04:49 -0400 Subject: [PATCH] Move `.by` to the data frame method, remove `.by` handling, misc tweaks --- NEWS.md | 6 ++-- R/fill.R | 57 ++++++++++++++++++++--------------- man/fill.Rd | 24 +++++++++------ tests/testthat/_snaps/fill.md | 25 +++++++++++---- tests/testthat/test-fill.R | 44 ++++++++++++++++++++------- 5 files changed, 102 insertions(+), 54 deletions(-) diff --git a/NEWS.md b/NEWS.md index edd30991..41d9f6f6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # tidyr (development version) +* `fill()` gains a `.by` argument as an alternative to `dplyr::group_by()` for + applying the fill per group, similar to `nest(.by =)` and + `dplyr::mutate(.by =)` (@olivroy, #1439). + * `expand_grid()` gains a new `.vary` argument, allowing users to control whether the first column varies fastest or slowest (#1543, @JamesHWade). @@ -12,8 +16,6 @@ * tidyr now requires dplyr >=1.1.0 (#1568, @catalamarti). -* `fill()` gains `.by` similar to how `mutate()` works (@olivroy, #1439). - # tidyr 1.3.1 * `pivot_wider` now uses `.by` and `|>` syntax for the dplyr helper message to diff --git a/R/fill.R b/R/fill.R index 704f4024..0ae7403a 100644 --- a/R/fill.R +++ b/R/fill.R @@ -9,11 +9,11 @@ #' @section Grouped data frames: #' With grouped data frames created by [dplyr::group_by()], `fill()` will be #' applied _within_ each group, meaning that it won't fill across group -#' boundaries. +#' boundaries. This can also be accomplished using the `.by` argument to +#' `fill()`, which creates a temporary grouping for just this operation. #' #' @param data A data frame. #' @param ... <[`tidy-select`][tidyr_tidy_select]> Columns to fill. -#' @inheritParams dplyr::mutate #' @param .direction Direction in which to fill missing values. Currently #' either "down" (the default), "up", "downup" (i.e. first down and then up) #' or "updown" (first up and then down). @@ -83,27 +83,39 @@ #' 3, "Danielle", "Observer", NA #' ) #' -#' # The values are inconsistently missing by position within the group -#' # Use .direction = "downup" to fill missing values in both directions +#' # The values are inconsistently missing by position within the `group`. +#' # Use `.direction = "downup"` to fill missing values in both directions +#' # and `.by = group` to apply the fill per group. +#' squirrels %>% +#' fill(n_squirrels, .direction = "downup", .by = group) +#' +#' # If you want, you can also supply a data frame grouped with `group_by()`, +#' # but don't forget to `ungroup()`! #' squirrels %>% #' dplyr::group_by(group) %>% #' fill(n_squirrels, .direction = "downup") %>% #' dplyr::ungroup() -#' -#' # Using `.direction = "updown"` accomplishes the same goal in this example -#' -#' # Using .by simplifies this example. -#' squirrels %>% -#' fill(n_squirrels, .direction = "downup", .by = group) -fill <- function(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL) { - check_dots_unnamed() +fill <- function(data, + ..., + .direction = c("down", "up", "downup", "updown")) { UseMethod("fill") } +#' @inheritParams dplyr::mutate +#' @rdname fill #' @export -fill.data.frame <- function(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL) { - vars <- names(tidyselect::eval_select(expr(c(...)), data = data, allow_rename = FALSE)) - by <- names(tidyselect::eval_select(enquo(.by), data = data, allow_rename = FALSE)) +fill.data.frame <- function(data, + ..., + .direction = c("down", "up", "downup", "updown"), + .by = NULL) { + # Must be here rather than in the generic due to the placement of `.by` + check_dots_unnamed() + + vars <- names(tidyselect::eval_select( + expr = expr(c(...)), + data = data, + allow_rename = FALSE + )) .direction <- arg_match0( arg = .direction, @@ -114,14 +126,9 @@ fill.data.frame <- function(data, ..., .direction = c("down", "up", "downup", "u vec_fill_missing(col, direction = .direction) } - if (dplyr::is_grouped_df(data)) { - if (length(by) > 0) { - cli::cli_abort(c( - "Can't supply {.arg .by} when {.arg data} is a grouped data frame." - )) - } - dplyr::mutate(data, dplyr::across(any_of(vars), .fns = fn)) - } else { - dplyr::mutate(data, dplyr::across(any_of(vars), .fns = fn), .by = all_of(by)) - } + dplyr::mutate( + data, + dplyr::across(any_of(vars), .fns = fn), + .by = {{ .by }} + ) } diff --git a/man/fill.Rd b/man/fill.Rd index c3b02183..01d486fe 100644 --- a/man/fill.Rd +++ b/man/fill.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/fill.R \name{fill} \alias{fill} +\alias{fill.data.frame} \title{Fill in missing values with previous or next value} \usage{ -fill(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL) +fill(data, ..., .direction = c("down", "up", "downup", "updown")) + +\method{fill}{data.frame}(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL) } \arguments{ \item{data}{A data frame.} @@ -33,7 +36,8 @@ Missing values are replaced in atomic vectors; \code{NULL}s are replaced in list With grouped data frames created by \code{\link[dplyr:group_by]{dplyr::group_by()}}, \code{fill()} will be applied \emph{within} each group, meaning that it won't fill across group -boundaries. +boundaries. This can also be accomplished using the \code{.by} argument to +\code{fill()}, which creates a temporary grouping for just this operation. } \examples{ @@ -101,16 +105,16 @@ squirrels <- tibble::tribble( 3, "Danielle", "Observer", NA ) -# The values are inconsistently missing by position within the group -# Use .direction = "downup" to fill missing values in both directions +# The values are inconsistently missing by position within the `group`. +# Use `.direction = "downup"` to fill missing values in both directions +# and `.by = group` to apply the fill per group. +squirrels \%>\% + fill(n_squirrels, .direction = "downup", .by = group) + +# If you want, you can also supply a data frame grouped with `group_by()`, +# but don't forget to `ungroup()`! squirrels \%>\% dplyr::group_by(group) \%>\% fill(n_squirrels, .direction = "downup") \%>\% dplyr::ungroup() - -# Using `.direction = "updown"` accomplishes the same goal in this example - -# Using .by simplifies this example. -squirrels \%>\% - fill(n_squirrels, .direction = "downup", .by = group) } diff --git a/tests/testthat/_snaps/fill.md b/tests/testthat/_snaps/fill.md index 35e90036..f5f92aad 100644 --- a/tests/testthat/_snaps/fill.md +++ b/tests/testthat/_snaps/fill.md @@ -1,3 +1,13 @@ +# errors on named `...` inputs + + Code + fill(df, fooy = x) + Condition + Error in `fill()`: + ! Arguments in `...` must be passed by position, not name. + x Problematic argument: + * fooy = x + # validates its inputs Code @@ -6,17 +16,20 @@ Error in `fill()`: ! `.direction` must be one of "down", "up", "downup", or "updown", not "foo". -# fill works with .by +# `.by` can't select columns that don't exist Code - df %>% fill(y, .by = z) + fill(df, y, .by = z) Condition - Error in `fill()`: + Error in `dplyr::mutate()`: ! Can't select columns that don't exist. x Column `z` doesn't exist. + +# `.by` can't be used on a grouped data frame + Code - gr_df %>% fill(y, .by = x) + fill(df, y, .by = x) Condition - Error in `fill()`: - ! Can't supply `.by` when `data` is a grouped data frame. + Error in `dplyr::mutate()`: + ! Can't supply `.by` when `.data` is a grouped data frame. diff --git a/tests/testthat/test-fill.R b/tests/testthat/test-fill.R index cdd1f82e..9a3f686c 100644 --- a/tests/testthat/test-fill.R +++ b/tests/testthat/test-fill.R @@ -106,25 +106,35 @@ test_that("fill preserves attributes", { expect_equal(attributes(out_u$x), attributes(df$x)) }) -test_that("fill respects grouping and `.by`", { +test_that("fill respects existing grouping and `.by`", { df <- tibble(x = c(1, 1, 2), y = c(1, NA, NA)) + out <- df %>% dplyr::group_by(x) %>% fill(y) - expect_equal(out$y, c(1, 1, NA)) + expect_identical(out$y, c(1, 1, NA)) + expect_identical(dplyr::group_vars(out), "x") + out <- df %>% fill(y, .by = x) - expect_equal(out$y, c(1, 1, NA)) - - + expect_identical(out$y, c(1, 1, NA)) + expect_identical(dplyr::group_vars(out), character()) }) test_that("works when there is a column named `.direction` in the data (#1319)", { df <- tibble(x = c(1, NA, 2), .direction = 1:3) - expect_no_error(out <- fill(df, x)) + out <- fill(df, x) expect_identical(out$x, c(1, 1, 2)) }) +test_that("errors on named `...` inputs", { + df <- tibble(x = c(1, NA, 2)) + + expect_snapshot(error = TRUE, { + fill(df, fooy = x) + }) +}) + test_that("validates its inputs", { df <- tibble(x = c(1, NA, 2)) expect_snapshot(error = TRUE, { @@ -132,11 +142,23 @@ test_that("validates its inputs", { }) }) -test_that("fill works with .by", { - df <- tibble(x = c(1, 1, 2), y = c(1, NA, NA)) - gr_df <- dplyr::group_by(df, x) +test_that("`.by` can't select columns that don't exist", { + df <- tibble(x = 1, y = 2) + + # This shows `mutate()` in the error, but we accept that to not have to handle + # `.by` in any way. + expect_snapshot(error = TRUE, { + fill(df, y, .by = z) + }) +}) + +test_that("`.by` can't be used on a grouped data frame", { + df <- tibble(x = 1, y = 2) + df <- dplyr::group_by(df, x) + + # This shows `mutate()` in the error, but we accept that to not have to handle + # `.by` in any way. expect_snapshot(error = TRUE, { - df %>% fill(y, .by = z) - gr_df %>% fill(y, .by = x) + fill(df, y, .by = x) }) })