easystats · ANAMASGARD · Nov 14, 2025 · Nov 16, 2025 · Nov 16, 2025 · gemini-code-assist
diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,20 @@
 
 * Improved documentation for printing-methods.
 
+* `check_model()` now automatically hides data points in diagnostic plots for
+  models with only categorical predictors. This improves the visualization of
+  variance and linearity patterns across groups by reducing visual clutter from
+  overlapping points. Users can still override this behavior by explicitly
+  setting `show_dots = TRUE` (#873, @DominiqueMakowski).
+
+## Known Issues
+
+* The `theme` argument in `check_model()` currently has no effect due to a bug
+  in the **see** package's plot method. The `check_model()` function correctly
+  stores the theme attribute, but the **see** package's `plot.check_model()`
+  function does not properly handle it. This issue is tracked in #851 and
+  requires a fix in the **see** package repository.
+
 # performance 0.15.2
 
 ## Bug fixes

diff --git a/R/check_model.R b/R/check_model.R
@@ -46,7 +46,10 @@
 #' to `FALSE` for models with many observations, if generating the plot is too
 #' time-consuming. By default, `show_dots = NULL`. In this case `check_model()`
 #' tries to guess whether performance will be poor due to a very large model
-#' and thus automatically shows or hides dots.
+#' and thus automatically shows or hides dots. For models with only categorical
+#' predictors, dots are automatically hidden to improve the visualization of
+#' variance patterns across groups (but you can override this by explicitly
+#' setting `show_dots = TRUE`).
 #' @param verbose If `FALSE` (default), suppress most warning messages.
 #' @param ... Arguments passed down to the individual check functions, especially
 #' to `check_predictions()` and `binned_residuals()`.
@@ -62,7 +65,7 @@
 #'
 #' @details For Bayesian models from packages **rstanarm** or **brms**,
 #' models will be "converted" to their frequentist counterpart, using
 #' [`bayestestR::bayesian_as_frequentist`](https://easystats.github.io/bayestestR/reference/convert_bayesian_as_frequentist.html).
 #' A more advanced model-check for Bayesian models will be implemented at a
 #' later stage.
 #'
@@ -90,7 +93,7 @@
 #' plots are helpful to check model assumptions, they do not necessarily indicate
 #' so-called "lack of fit", e.g. missed non-linear relationships or interactions.
 #' Thus, it is always recommended to also look at
 #' [effect plots, including partial residuals](https://strengejacke.github.io/ggeffects/articles/introduction_partial_residuals.html).
 #'
 #' @section Homogeneity of Variance:
 #' This plot checks the assumption of equal variance (homoscedasticity). The
@@ -274,7 +277,7 @@
    insight::format_error(
      paste("`check_model()` returned following error:", cleaned_string),
      paste0(
        "\nIf the error message does not help identifying your problem, another reason why `check_model()` failed might be that models of class `",
        class(x)[1],
        "` are not yet supported."
      ) # nolint
@@ -300,10 +303,15 @@
     type <- "discrete_interval"
   }
 
-  # set default for show_dots, based on "model size"
+  # set default for show_dots, based on "model size" and predictor types
   if (is.null(show_dots)) {
     n <- .safe(insight::n_obs(x))
     show_dots <- is.null(n) || n <= 1e5
+    # Auto-disable dots for models with only categorical predictors
+    # to improve visualization of variance patterns
+    if (show_dots && .has_only_categorical_predictors(x)) {
+      show_dots <- FALSE
+    }
   }
 
   attr(assumptions_data, "panel") <- panel
@@ -742,3 +750,91 @@
   class(dat) <- c("check_model", "see_check_model", "data.frame")
   dat
 }
+
+# Helper function to detect if model has only categorical predictors ----
+
+.has_only_categorical_predictors <- function(model) {
+  tryCatch(
+    {
+      # Get the model matrix (excludes intercept)
+      mm <- insight::get_modelmatrix(model, verbose = FALSE)
+      if (is.null(mm) || ncol(mm) <= 1) {
+        # Only intercept or no predictors
+        return(FALSE)
+      }
+
+      # Remove intercept column if present
+      mm_cols <- colnames(mm)
+      mm_cols <- mm_cols[mm_cols != "(Intercept)"]
+
+      if (length(mm_cols) == 0) {
+        return(FALSE)
+      }
+
+      # Get predictors from the model
+      predictors <- insight::find_predictors(model, flatten = TRUE)
+      if (is.null(predictors) || length(predictors) == 0) {
+        return(FALSE)
+      }
+
+      # Get model data
+      model_data <- insight::get_data(model, verbose = FALSE)
+      if (is.null(model_data)) {
+        return(FALSE)
+      }
+
+      # Check each predictor
+      # A predictor is considered categorical if:
+      # 1. It's a factor/character in the data, OR
+      # 2. It appears in the formula wrapped in factor(), as.factor(), etc., OR
+      # 3. It generates dummy columns in the model matrix
+      predictor_is_categorical <- vapply(
+        predictors,
+        function(pred) {
+          # Check if it's a factor/character in the data
+          if (pred %in% names(model_data)) {
+            var <- model_data[[pred]]
+            if (is.factor(var) || is.character(var)) {
+              return(TRUE)
+            }
+          }
+
+          # Check for explicit factor conversion in formula like factor() or as.factor()
+          # This correctly handles binary factors converted in the formula.
+          factor_patterns <- paste(
+            c(
+              paste0("^factor\\(", pred, "\\)"),
+              paste0("^as\\.factor\\(", pred, "\\)")
+            ),
+            collapse = "|"
+          )
+          if (any(grepl(factor_patterns, mm_cols))) {
+            return(TRUE)
+          }
+
+          # Check if this predictor generates dummy columns
+          # (indicates categorical treatment for non-binary factors)
+          # This pattern correctly identifies dummy variables like 'cyl6' from 'cyl'
+          # without incorrectly matching partial names like 'cyl_long'.
+          pred_pattern <- paste0("^", pred, "(?![a-zA-Z_])")
+          pred_cols <- grep(pred_pattern, mm_cols, value = TRUE, perl = TRUE)
+
+          # Exclude exact matches (continuous predictors appear as-is in model matrix)
+          # Only count dummy variable columns (e.g., 'cyl6', 'cyl8' but not 'cyl')
+          pred_cols <- pred_cols[pred_cols != pred]
+
+          # If one or more dummy columns for this predictor, it's categorical
+          length(pred_cols) >= 1
+        },
+        FUN.VALUE = logical(1)
+      )
+
+      # Return TRUE only if all predictors are categorical
+      length(predictor_is_categorical) > 0 && all(predictor_is_categorical)
+    },
+    error = function(e) {
+      # If there's any error, default to FALSE
+      FALSE
+    }
+  )
+}
diff --git a/man/check_model.Rd b/man/check_model.Rd
diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R
@@ -206,3 +206,65 @@ test_that("`check_model()` with transformed response when named as function", {
   out <- check_predictions(model)
   expect_s3_class(out, "performance_pp_check")
 })
+
+
+test_that("`check_model()` with show_dots parameter", {
+  data(mtcars)
+  m <- lm(mpg ~ factor(cyl) + wt, data = mtcars)
+
+  # Test with show_dots = FALSE
+  result <- check_model(m, show_dots = FALSE, verbose = FALSE)
+
+  expect_s3_class(result, "check_model")
+  expect_false(attr(result, "show_dots"))
+})
+
+
+test_that("`check_model()` auto-detects categorical predictors", {
+  skip_if_not_installed("curl")
+
+  # Load test data
+  star <- tryCatch(
+    {
+      read.csv("https://drmankin.github.io/disc_stats/star.csv")
+    },
+    error = function(e) {
+      NULL
+    }
+  )
+
+  skip_if(is.null(star), message = "Could not download test data")
+
+  star$star2 <- factor(star$star2)
+
+  m <- lm(math2 ~ star2, data = star, na.action = na.exclude)
+  result <- check_model(m, verbose = FALSE)
+
+  # Should auto-disable dots for categorical-only models
+  expect_s3_class(result, "check_model")
+  expect_false(attr(result, "show_dots"))
+})
+
+
+test_that("`check_model()` keeps dots for mixed continuous/categorical models", {
+  data(mtcars)
+  m <- lm(mpg ~ factor(cyl) + wt + hp, data = mtcars)
+
+  result <- check_model(m, verbose = FALSE)
+
+  expect_s3_class(result, "check_model")
+  # Should keep dots by default for mixed models
+  expect_true(attr(result, "show_dots"))
+})
+
+
+test_that("`check_model()` auto-disables dots for binary factor in formula", {
+  data(mtcars)
+  m <- lm(mpg ~ as.factor(am), data = mtcars)
+
+  result <- check_model(m, verbose = FALSE)
+
+  # Should auto-disable dots for categorical-only models (even binary)
+  expect_s3_class(result, "check_model")
+  expect_false(attr(result, "show_dots"))
+})