Introduction
Assessing the normality of your data is an essential part of many statistical analyses. This all-in-one interactive tool combines multiple statistical tests and graphical techniques to help you evaluate normality effectively. Use it to:
- Load built-in or custom datasets
- Apply transformations (Log, Square Root, Box-Cox)
- Visualize data with histogram, Q-Q plot, and boxplot
- Run Shapiro-Wilk, Kolmogorov-Smirnov, and Anderson-Darling tests
- Interpret skewness and kurtosis
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1300
library(shiny)
library(bslib)
library(ggplot2)
library(moments)
library(nortest)
library(bsicons)
library(vroom)
library(shinyjs)
ui <- page_sidebar(
title = "Normality Assessment Tool",
sidebar = sidebar(
width = 350,
card(
card_header("Data Input"),
textAreaInput("data_input", "Enter your data (one value per row):", rows = 8,
placeholder = "Paste your numeric data here...\nExample:\n23.4\n19.2\n21.5\n22.1\n18.9"),
div(
actionLink("use_example", "Use example data", style = "color:#0275d8;"),
tags$span(bs_icon("file-earmark-text"), style = "margin-left: 5px; color: #0275d8;")
),
fileInput("file_upload", "Upload CSV or TXT file:",
accept = c("text/csv", "text/comma-separated-values", "text/plain", ".csv", ".txt")),
checkboxInput("header", "File has header", TRUE),
conditionalPanel(
condition = "output.file_uploaded",
div(
selectInput("selected_var", "Select variable:", choices = NULL),
actionButton("clear_file", "Clear File", class = "btn-danger btn-sm")
)
),
actionButton("run_test", "Run Test", class = "btn btn-primary")
),
hr(),
sliderInput("bins", "Number of Histogram Bins:", min = 5, max = 100, value = 30),
checkboxInput("density", "Show Density Curve", value = TRUE),
checkboxInput("normal", "Show Normal Curve", value = TRUE),
checkboxInput("rug", "Show Rug Plot", value = FALSE),
hr(),
selectInput("transform", "Transformation:",
choices = c("None", "Log", "Square Root", "Box-Cox")),
conditionalPanel(
condition = "input.transform == 'Box-Cox'",
sliderInput("lambda", "Lambda Parameter:", min = -2, max = 2, value = 0, step = 0.1)
)
),
layout_column_wrap(
width = 1,
card(
card_header("Histogram"),
plotOutput("histogram")
),
layout_column_wrap(
width = 1/2,
card(
card_header("Q-Q Plot"),
plotOutput("qqplot")
),
card(
card_header("Normality Tests"),
tableOutput("normality_tests")
)
),
card(
card_header("Summary Statistics"),
layout_column_wrap(
width = 1/2,
tableOutput("summary_stats"),
plotOutput("boxplot")
)
)
)
)
server <- function(input, output, session) {
input_source <- reactiveVal(NULL)
observeEvent(input$use_example, {
example <- c("21.5", "22.1", "23.3", "20.9", "21.0", "23.8", "22.5", "21.6", "22.0", "23.1")
updateTextAreaInput(session, "data_input", value = paste(example, collapse = "\n"))
input_source("text")
updateSelectInput(session, "selected_var", choices = NULL)
reset("file_upload")
})
observeEvent(input$clear_file, {
reset("file_upload")
updateSelectInput(session, "selected_var", choices = NULL)
input_source("text")
})
observeEvent(input$file_upload, {
if (!is.null(input$file_upload)) {
updateTextAreaInput(session, "data_input", value = "")
input_source("file")
}
})
observeEvent(input$data_input, {
if (input$data_input != "" && input_source() != "text") {
updateSelectInput(session, "selected_var", choices = NULL)
reset("file_upload")
input_source("text")
}
})
file_data <- reactive({
req(input$file_upload)
tryCatch({
vroom(input$file_upload$datapath, delim = NULL, col_names = input$header, show_col_types = FALSE)
}, error = function(e) {
showNotification(paste("File read error:", e$message), type = "error")
return(NULL)
})
})
observe({
df <- file_data()
if (!is.null(df)) {
num_vars <- names(df)[sapply(df, is.numeric)]
updateSelectInput(session, "selected_var", choices = num_vars)
}
})
output$file_uploaded <- reactive({
!is.null(input$file_upload)
})
outputOptions(output, "file_uploaded", suspendWhenHidden = FALSE)
data_values <- reactive({
if (!is.null(input$file_upload) && !is.null(input$selected_var)) {
df <- file_data()
return(na.omit(df[[input$selected_var]]))
}
req(input$data_input)
input_lines <- strsplit(input$data_input, "\n")[[1]]
input_lines <- input_lines[input_lines != ""]
numeric_values <- suppressWarnings(as.numeric(input_lines))
if(any(is.na(numeric_values))) return(NULL)
numeric_values
})
get_transformed_data <- reactive({
req(data_values())
x <- data_values()
if (input$transform == "Log") {
if (any(x <= 0)) return(NULL)
return(log(x))
} else if (input$transform == "Square Root") {
if (any(x < 0)) return(NULL)
return(sqrt(x))
} else if (input$transform == "Box-Cox") {
lambda <- input$lambda
if (any(x <= 0)) return(NULL)
if (abs(lambda) < 1e-8) return(log(x))
return(((x ^ lambda) - 1) / lambda)
}
return(x)
})
output$histogram <- renderPlot({
req(input$run_test > 0, get_transformed_data())
x <- get_transformed_data()
title_transform <- ifelse(input$transform != "None", paste0(" (", input$transform, " transformed)"), "")
p <- ggplot(data.frame(x = x), aes(x = x)) +
geom_histogram(aes(y = after_stat(density)), bins = input$bins,
fill = "steelblue", color = "white", alpha = 0.7) +
labs(title = paste0("Distribution of ", input$selected_var, title_transform),
x = input$selected_var, y = "Density") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16))
if (input$density) p <- p + geom_density(color = "darkred", linewidth = 1)
if (input$normal) p <- p + stat_function(fun = dnorm, args = list(mean = mean(x), sd = sd(x)),
color = "darkgreen", linewidth = 1)
if (input$rug) p <- p + geom_rug(alpha = 0.5)
p
})
output$qqplot <- renderPlot({
req(input$run_test > 0, get_transformed_data())
x <- get_transformed_data()
ggplot(data.frame(x = x), aes(sample = x)) +
stat_qq() +
stat_qq_line(color = "red") +
labs(title = paste0("Q-Q Plot for ", input$selected_var)) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16))
})
output$boxplot <- renderPlot({
req(input$run_test > 0, get_transformed_data())
x <- get_transformed_data()
ggplot(data.frame(x = x), aes(y = x)) +
geom_boxplot(fill = "steelblue", alpha = 0.7) +
labs(title = paste0("Boxplot of ", input$selected_var), y = input$selected_var) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
})
output$summary_stats <- renderTable({
x <- get_transformed_data()
req(x)
data.frame(
Statistic = c("Mean", "Median", "Standard Deviation", "Minimum", "Maximum", "Skewness", "Kurtosis"),
Value = round(c(mean(x), median(x), sd(x), min(x), max(x), skewness(x), kurtosis(x)), 4)
)
})
output$normality_tests <- renderTable({
x <- get_transformed_data()
req(x)
if (length(x) < 3 || sd(x) == 0) {
return(data.frame(Test = "Not enough variability or too few values", Statistic = NA, P.value = NA))
}
tests <- list()
if (length(x) <= 5000) {
sw <- shapiro.test(x)
tests[["Shapiro-Wilk"]] <- c(sw$statistic, sw$p.value)
} else {
tests[["Shapiro-Wilk"]] <- c(NA, "Sample too large")
}
ks <- ks.test(x, "pnorm", mean = mean(x), sd = sd(x))
tests[["Kolmogorov-Smirnov"]] <- c(ks$statistic, ks$p.value)
ad <- nortest::ad.test(x)
tests[["Anderson-Darling"]] <- c(ad$statistic, ad$p.value)
df <- do.call(rbind, lapply(names(tests), function(name) {
stat <- as.numeric(tests[[name]][1])
pval <- tests[[name]][2]
pval_str <- if (!is.na(pval) && is.numeric(pval)) {
if (pval < 0.001) "<0.001" else round(pval, 4)
} else {
pval
}
data.frame(Test = name, Statistic = round(stat, 4), P.value = pval_str)
}))
df
})
}
shinyApp(ui, server)
Recommended Usage
This tool is ideal when:
- You’re preparing data for statistical modeling
- You want to evaluate both numerical and visual evidence of normality
- You’re unsure which test is best for your dataset
Reuse
Citation
@online{kassambara2025,
author = {Kassambara, Alboukadel},
title = {D-Stat: {Comprehensive} {Normality} {Test} in {R}},
date = {2025-04-06},
url = {https://www.datanovia.com/apps/statfusion/analysis/inferential/goodness-fit/normality/index.html},
langid = {en}
}