From 6d437e11d4acfef8fa194e6977e5b6cf196af965 Mon Sep 17 00:00:00 2001
From: Davis Vaughan <davis@posit.co>
Date: Tue, 27 Aug 2024 16:04:49 -0400
Subject: [PATCH] Move `.by` to the data frame method, remove `.by` handling,
 misc tweaks

---
 NEWS.md                       |  6 ++--
 R/fill.R                      | 57 ++++++++++++++++++++---------------
 man/fill.Rd                   | 24 +++++++++------
 tests/testthat/_snaps/fill.md | 25 +++++++++++----
 tests/testthat/test-fill.R    | 44 ++++++++++++++++++++-------
 5 files changed, 102 insertions(+), 54 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index edd30991..41d9f6f6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # tidyr (development version)
 
+* `fill()` gains a `.by` argument as an alternative to `dplyr::group_by()` for
+  applying the fill per group, similar to `nest(.by =)` and
+  `dplyr::mutate(.by =)` (@olivroy, #1439).
+
 * `expand_grid()` gains a new `.vary` argument, allowing users to control
   whether the first column varies fastest or slowest (#1543, @JamesHWade).
 
@@ -12,8 +16,6 @@
 
 * tidyr now requires dplyr >=1.1.0 (#1568, @catalamarti).
 
-* `fill()` gains `.by` similar to how `mutate()` works (@olivroy, #1439).
-
 # tidyr 1.3.1
 
 * `pivot_wider` now uses `.by` and `|>` syntax for the dplyr helper message to
diff --git a/R/fill.R b/R/fill.R
index 704f4024..0ae7403a 100644
--- a/R/fill.R
+++ b/R/fill.R
@@ -9,11 +9,11 @@
 #' @section Grouped data frames:
 #' With grouped data frames created by [dplyr::group_by()], `fill()` will be
 #' applied _within_ each group, meaning that it won't fill across group
-#' boundaries.
+#' boundaries. This can also be accomplished using the `.by` argument to
+#' `fill()`, which creates a temporary grouping for just this operation.
 #'
 #' @param data A data frame.
 #' @param ... <[`tidy-select`][tidyr_tidy_select]> Columns to fill.
-#' @inheritParams dplyr::mutate
 #' @param .direction Direction in which to fill missing values. Currently
 #'   either "down" (the default), "up", "downup" (i.e. first down and then up)
 #'   or "updown" (first up and then down).
@@ -83,27 +83,39 @@
 #'   3, "Danielle",    "Observer",   NA
 #' )
 #'
-#' # The values are inconsistently missing by position within the group
-#' # Use .direction = "downup" to fill missing values in both directions
+#' # The values are inconsistently missing by position within the `group`.
+#' # Use `.direction = "downup"` to fill missing values in both directions
+#' # and `.by = group` to apply the fill per group.
+#' squirrels %>%
+#'   fill(n_squirrels, .direction = "downup", .by = group)
+#'
+#' # If you want, you can also supply a data frame grouped with `group_by()`,
+#' # but don't forget to `ungroup()`!
 #' squirrels %>%
 #'   dplyr::group_by(group) %>%
 #'   fill(n_squirrels, .direction = "downup") %>%
 #'   dplyr::ungroup()
-#'
-#' # Using `.direction = "updown"` accomplishes the same goal in this example
-#'
-#' # Using .by simplifies this example.
-#' squirrels %>%
-#'   fill(n_squirrels, .direction = "downup", .by = group)
-fill <- function(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL) {
-  check_dots_unnamed()
+fill <- function(data,
+                 ...,
+                 .direction = c("down", "up", "downup", "updown")) {
   UseMethod("fill")
 }
 
+#' @inheritParams dplyr::mutate
+#' @rdname fill
 #' @export
-fill.data.frame <- function(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL) {
-  vars <- names(tidyselect::eval_select(expr(c(...)), data = data, allow_rename = FALSE))
-  by   <- names(tidyselect::eval_select(enquo(.by), data = data, allow_rename = FALSE))
+fill.data.frame <- function(data,
+                            ...,
+                            .direction = c("down", "up", "downup", "updown"),
+                            .by = NULL) {
+  # Must be here rather than in the generic due to the placement of `.by`
+  check_dots_unnamed()
+
+  vars <- names(tidyselect::eval_select(
+    expr = expr(c(...)),
+    data = data,
+    allow_rename = FALSE
+  ))
 
   .direction <- arg_match0(
     arg = .direction,
@@ -114,14 +126,9 @@ fill.data.frame <- function(data, ..., .direction = c("down", "up", "downup", "u
     vec_fill_missing(col, direction = .direction)
   }
 
-  if (dplyr::is_grouped_df(data)) {
-    if (length(by) > 0) {
-      cli::cli_abort(c(
-        "Can't supply {.arg .by} when {.arg data} is a grouped data frame."
-      ))
-    }
-    dplyr::mutate(data, dplyr::across(any_of(vars), .fns = fn))
-  } else {
-    dplyr::mutate(data, dplyr::across(any_of(vars), .fns = fn), .by = all_of(by))
-  }
+  dplyr::mutate(
+    data,
+    dplyr::across(any_of(vars), .fns = fn),
+    .by = {{ .by }}
+  )
 }
diff --git a/man/fill.Rd b/man/fill.Rd
index c3b02183..01d486fe 100644
--- a/man/fill.Rd
+++ b/man/fill.Rd
@@ -2,9 +2,12 @@
 % Please edit documentation in R/fill.R
 \name{fill}
 \alias{fill}
+\alias{fill.data.frame}
 \title{Fill in missing values with previous or next value}
 \usage{
-fill(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL)
+fill(data, ..., .direction = c("down", "up", "downup", "updown"))
+
+\method{fill}{data.frame}(data, ..., .direction = c("down", "up", "downup", "updown"), .by = NULL)
 }
 \arguments{
 \item{data}{A data frame.}
@@ -33,7 +36,8 @@ Missing values are replaced in atomic vectors; \code{NULL}s are replaced in list
 
 With grouped data frames created by \code{\link[dplyr:group_by]{dplyr::group_by()}}, \code{fill()} will be
 applied \emph{within} each group, meaning that it won't fill across group
-boundaries.
+boundaries. This can also be accomplished using the \code{.by} argument to
+\code{fill()}, which creates a temporary grouping for just this operation.
 }
 
 \examples{
@@ -101,16 +105,16 @@ squirrels <- tibble::tribble(
   3, "Danielle",    "Observer",   NA
 )
 
-# The values are inconsistently missing by position within the group
-# Use .direction = "downup" to fill missing values in both directions
+# The values are inconsistently missing by position within the `group`.
+# Use `.direction = "downup"` to fill missing values in both directions
+# and `.by = group` to apply the fill per group.
+squirrels \%>\%
+  fill(n_squirrels, .direction = "downup", .by = group)
+
+# If you want, you can also supply a data frame grouped with `group_by()`,
+# but don't forget to `ungroup()`!
 squirrels \%>\%
   dplyr::group_by(group) \%>\%
   fill(n_squirrels, .direction = "downup") \%>\%
   dplyr::ungroup()
-
-# Using `.direction = "updown"` accomplishes the same goal in this example
-
-# Using .by simplifies this example.
-squirrels \%>\%
-  fill(n_squirrels, .direction = "downup", .by = group)
 }
diff --git a/tests/testthat/_snaps/fill.md b/tests/testthat/_snaps/fill.md
index 35e90036..f5f92aad 100644
--- a/tests/testthat/_snaps/fill.md
+++ b/tests/testthat/_snaps/fill.md
@@ -1,3 +1,13 @@
+# errors on named `...` inputs
+
+    Code
+      fill(df, fooy = x)
+    Condition
+      Error in `fill()`:
+      ! Arguments in `...` must be passed by position, not name.
+      x Problematic argument:
+      * fooy = x
+
 # validates its inputs
 
     Code
@@ -6,17 +16,20 @@
       Error in `fill()`:
       ! `.direction` must be one of "down", "up", "downup", or "updown", not "foo".
 
-# fill works with .by
+# `.by` can't select columns that don't exist
 
     Code
-      df %>% fill(y, .by = z)
+      fill(df, y, .by = z)
     Condition
-      Error in `fill()`:
+      Error in `dplyr::mutate()`:
       ! Can't select columns that don't exist.
       x Column `z` doesn't exist.
+
+# `.by` can't be used on a grouped data frame
+
     Code
-      gr_df %>% fill(y, .by = x)
+      fill(df, y, .by = x)
     Condition
-      Error in `fill()`:
-      ! Can't supply `.by` when `data` is a grouped data frame.
+      Error in `dplyr::mutate()`:
+      ! Can't supply `.by` when `.data` is a grouped data frame.
 
diff --git a/tests/testthat/test-fill.R b/tests/testthat/test-fill.R
index cdd1f82e..9a3f686c 100644
--- a/tests/testthat/test-fill.R
+++ b/tests/testthat/test-fill.R
@@ -106,25 +106,35 @@ test_that("fill preserves attributes", {
   expect_equal(attributes(out_u$x), attributes(df$x))
 })
 
-test_that("fill respects grouping and `.by`", {
+test_that("fill respects existing grouping and `.by`", {
   df <- tibble(x = c(1, 1, 2), y = c(1, NA, NA))
+
   out <- df %>%
     dplyr::group_by(x) %>%
     fill(y)
-  expect_equal(out$y, c(1, 1, NA))
+  expect_identical(out$y, c(1, 1, NA))
+  expect_identical(dplyr::group_vars(out), "x")
+
   out <- df %>%
     fill(y, .by = x)
-  expect_equal(out$y, c(1, 1, NA))
-
-
+  expect_identical(out$y, c(1, 1, NA))
+  expect_identical(dplyr::group_vars(out), character())
 })
 
 test_that("works when there is a column named `.direction` in the data (#1319)", {
   df <- tibble(x = c(1, NA, 2), .direction = 1:3)
-  expect_no_error(out <- fill(df, x))
+  out <- fill(df, x)
   expect_identical(out$x, c(1, 1, 2))
 })
 
+test_that("errors on named `...` inputs", {
+  df <- tibble(x = c(1, NA, 2))
+
+  expect_snapshot(error = TRUE, {
+    fill(df, fooy = x)
+  })
+})
+
 test_that("validates its inputs", {
   df <- tibble(x = c(1, NA, 2))
   expect_snapshot(error = TRUE, {
@@ -132,11 +142,23 @@ test_that("validates its inputs", {
   })
 })
 
-test_that("fill works with .by", {
-  df <- tibble(x = c(1, 1, 2), y = c(1, NA, NA))
-  gr_df <- dplyr::group_by(df, x)
+test_that("`.by` can't select columns that don't exist", {
+  df <- tibble(x = 1, y = 2)
+
+  # This shows `mutate()` in the error, but we accept that to not have to handle
+  # `.by` in any way.
+  expect_snapshot(error = TRUE, {
+    fill(df, y, .by = z)
+  })
+})
+
+test_that("`.by` can't be used on a grouped data frame", {
+  df <- tibble(x = 1, y = 2)
+  df <- dplyr::group_by(df, x)
+
+  # This shows `mutate()` in the error, but we accept that to not have to handle
+  # `.by` in any way.
   expect_snapshot(error = TRUE, {
-    df %>% fill(y, .by = z)
-    gr_df %>% fill(y, .by = x)
+    fill(df, y, .by = x)
   })
 })