Skip to content
Snippets Groups Projects
Commit 2f27a9a7 authored by Boris Koch's avatar Boris Koch
Browse files

Preparation for adding Grubbs test in outlier function

parent 7669ab39
No related branches found
No related tags found
No related merge requests found
......@@ -23,6 +23,71 @@
stats_outlier <- function(dt, check_col = "ppm", msg = FALSE, ...){
#' #' @title Grubbs' Test for Outliers
#' #' @description Tests whether the most extreme value in a numeric sample is a statistical outlier.
#' #'
#' #' @param x A numeric vector with at least 3 values.
#' #' @param alpha Significance level for the test (default: 0.05).
#' #'
#' #' @return A list containing the following elements:
#' #' \item{G_statistic}{The calculated Grubbs' test statistic.}
#' #' \item{G_critical}{The critical value for the test.}
#' #' \item{p_value}{The computed p-value of the test.}
#' #' \item{outlier}{A logical value (`TRUE` if the most extreme value is a significant outlier, otherwise `FALSE`).}
#' #' \item{extreme_value}{The tested extreme value.}
#' #'
#' #' @details
#' #' Grubbs' test checks whether the most extreme value in a dataset significantly deviates from the rest.
#' #' It assumes that the data follows a normal distribution.
#' #'
#' #' The critical value is based on the t-distribution with `n-2` degrees of freedom.
#' #' The p-value is computed using the inverse t-distribution.
#' #'
#' #' @examples
#' #' set.seed(42)
#' #' x <- c(10, 12, 15, 17, 19, 50)
#' #' grubbs_test(x)
#' #'
#' #' @references
#' #' Grubbs, F. E. (1950). "Sample criteria for testing outlying observations". *The Annals of Mathematical Statistics*, 21(1), 27-58.
#' #'
#' #' @export
#' grubbs_test <- function(x, alpha = 0.05) {
#' if (!is.numeric(x) || length(x) < 3)
#' stop("Grubbs' test requires a numeric vector with at least 3 values.")
#'
#' n <- length(x)
#' mean_x <- mean(x)
#' sd_x <- sd(x)
#'
#' # Identify the most extreme value
#' abs_deviation <- abs(x - mean_x)
#' max_index <- which.max(abs_deviation)
#' x_extreme <- x[max_index]
#'
#' # Compute Grubbs' test statistic
#' G <- abs(x_extreme - mean_x) / sd_x
#'
#' # Compute critical value
#' t_crit <- qt(1 - alpha / (2 * n), df = n - 2)
#' G_crit <- ((n - 1) * t_crit) / sqrt(n * (n - 2 + t_crit^2))
#'
#' # Compute p-value
#' p_value <- 2 * (1 - pt(G * sqrt(n - 2) / sqrt(n - 1 - G^2), df = n - 2))
#'
#' # Return results
#' list(
#' G_statistic = G,
#' G_critical = G_crit,
#' p_value = p_value,
#' outlier = G > G_crit,
#' extreme_value = x_extreme
#' )
#' }
#'
out_score <- out_box <- out_quantile <- out_hampel <- NULL
Outlier <- Value <- out_rosner <- NULL
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment