Preparation for adding Grubbs test in outlier function

2f27a9a7 · Boris Koch · 7669ab39 · 2f27a9a7
Commit 2f27a9a7 authored 1 month ago by Boris Koch
--- a/R/stats.R
+++ b/R/stats.R
@@ -23,6 +23,71 @@

 stats_outlier <- function(dt, check_col = "ppm", msg = FALSE, ...){

+
+  #' #' @title Grubbs' Test for Outliers
+  #' #' @description Tests whether the most extreme value in a numeric sample is a statistical outlier.
+  #' #'
+  #' #' @param x A numeric vector with at least 3 values.
+  #' #' @param alpha Significance level for the test (default: 0.05).
+  #' #'
+  #' #' @return A list containing the following elements:
+  #' #' \item{G_statistic}{The calculated Grubbs' test statistic.}
+  #' #' \item{G_critical}{The critical value for the test.}
+  #' #' \item{p_value}{The computed p-value of the test.}
+  #' #' \item{outlier}{A logical value (`TRUE` if the most extreme value is a significant outlier, otherwise `FALSE`).}
+  #' #' \item{extreme_value}{The tested extreme value.}
+  #' #'
+  #' #' @details
+  #' #' Grubbs' test checks whether the most extreme value in a dataset significantly deviates from the rest.
+  #' #' It assumes that the data follows a normal distribution.
+  #' #'
+  #' #' The critical value is based on the t-distribution with `n-2` degrees of freedom.
+  #' #' The p-value is computed using the inverse t-distribution.
+  #' #'
+  #' #' @examples
+  #' #' set.seed(42)
+  #' #' x <- c(10, 12, 15, 17, 19, 50)
+  #' #' grubbs_test(x)
+  #' #'
+  #' #' @references
+  #' #' Grubbs, F. E. (1950). "Sample criteria for testing outlying observations". *The Annals of Mathematical Statistics*, 21(1), 27-58.
+  #' #'
+  #' #' @export
+  #' grubbs_test <- function(x, alpha = 0.05) {
+  #'   if (!is.numeric(x) || length(x) < 3)
+  #'     stop("Grubbs' test requires a numeric vector with at least 3 values.")
+  #'
+  #'   n <- length(x)
+  #'   mean_x <- mean(x)
+  #'   sd_x <- sd(x)
+  #'
+  #'   # Identify the most extreme value
+  #'   abs_deviation <- abs(x - mean_x)
+  #'   max_index <- which.max(abs_deviation)
+  #'   x_extreme <- x[max_index]
+  #'
+  #'   # Compute Grubbs' test statistic
+  #'   G <- abs(x_extreme - mean_x) / sd_x
+  #'
+  #'   # Compute critical value
+  #'   t_crit <- qt(1 - alpha / (2 * n), df = n - 2)
+  #'   G_crit <- ((n - 1) * t_crit) / sqrt(n * (n - 2 + t_crit^2))
+  #'
+  #'   # Compute p-value
+  #'   p_value <- 2 * (1 - pt(G * sqrt(n - 2) / sqrt(n - 1 - G^2), df = n - 2))
+  #'
+  #'   # Return results
+  #'   list(
+  #'     G_statistic = G,
+  #'     G_critical = G_crit,
+  #'     p_value = p_value,
+  #'     outlier = G > G_crit,
+  #'     extreme_value = x_extreme
+  #'   )
+  #' }
+  #'
+
+
  out_score <- out_box <- out_quantile <- out_hampel <- NULL
  Outlier <- Value <- out_rosner <- NULL