d-stat: Comprehensive Normality Test in R

All-in-One Tool for Assessing Distribution Normality

This interactive app provides a complete framework for assessing normality in R. It combines multiple normality tests (Shapiro-Wilk, Kolmogorov-Smirnov, Anderson-Darling) with visualization tools such as histograms, Q-Q plots, and boxplots. You can transform data (log, sqrt, Box-Cox) and analyze variables from your own dataset or built-in datasets.

Tools
Author
Affiliation
Published

April 6, 2025

Modified

April 16, 2025

Keywords

normality test R, shapiro test, qq plot, ks test, boxcox transformation, normality shiny app, all-in-one normality

Introduction

Assessing the normality of your data is an essential part of many statistical analyses. This all-in-one interactive tool combines multiple statistical tests and graphical techniques to help you evaluate normality effectively. Use it to:

  • Load built-in or custom datasets
  • Apply transformations (Log, Square Root, Box-Cox)
  • Visualize data with histogram, Q-Q plot, and boxplot
  • Run Shapiro-Wilk, Kolmogorov-Smirnov, and Anderson-Darling tests
  • Interpret skewness and kurtosis


#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1300

library(shiny)
library(bslib)
library(ggplot2)
library(moments)
library(nortest)
library(bsicons)
library(vroom)
library(shinyjs)

ui <- page_sidebar(
  title = "Normality Assessment Tool",
  sidebar = sidebar(
    width = 350,

    card(
      card_header("Data Input"),
      textAreaInput("data_input", "Enter your data (one value per row):", rows = 8,
                    placeholder = "Paste your numeric data here...\nExample:\n23.4\n19.2\n21.5\n22.1\n18.9"),
      div(
        actionLink("use_example", "Use example data", style = "color:#0275d8;"),
        tags$span(bs_icon("file-earmark-text"), style = "margin-left: 5px; color: #0275d8;")
      ),
      fileInput("file_upload", "Upload CSV or TXT file:",
                accept = c("text/csv", "text/comma-separated-values", "text/plain", ".csv", ".txt")),
      checkboxInput("header", "File has header", TRUE),
      conditionalPanel(
        condition = "output.file_uploaded",
        div(
          selectInput("selected_var", "Select variable:", choices = NULL),
          actionButton("clear_file", "Clear File", class = "btn-danger btn-sm")
        )
      ),
      actionButton("run_test", "Run Test", class = "btn btn-primary")
    ),

    hr(),
    sliderInput("bins", "Number of Histogram Bins:", min = 5, max = 100, value = 30),
    checkboxInput("density", "Show Density Curve", value = TRUE),
    checkboxInput("normal", "Show Normal Curve", value = TRUE),
    checkboxInput("rug", "Show Rug Plot", value = FALSE),
    hr(),
    selectInput("transform", "Transformation:",
                choices = c("None", "Log", "Square Root", "Box-Cox")),
    conditionalPanel(
      condition = "input.transform == 'Box-Cox'",
      sliderInput("lambda", "Lambda Parameter:", min = -2, max = 2, value = 0, step = 0.1)
    )
  ),

  layout_column_wrap(
    width = 1,
    card(
      card_header("Histogram"),
      plotOutput("histogram")
    ),

    layout_column_wrap(
      width = 1/2,
      card(
        card_header("Q-Q Plot"),
        plotOutput("qqplot")
      ),
      card(
        card_header("Normality Tests"),
        tableOutput("normality_tests")
      )
    ),

    card(
      card_header("Summary Statistics"),
      layout_column_wrap(
        width = 1/2,
        tableOutput("summary_stats"),
        plotOutput("boxplot")
      )
    )
  )
)

server <- function(input, output, session) {
  input_source <- reactiveVal(NULL)

  observeEvent(input$use_example, {
    example <- c("21.5", "22.1", "23.3", "20.9", "21.0", "23.8", "22.5", "21.6", "22.0", "23.1")
    updateTextAreaInput(session, "data_input", value = paste(example, collapse = "\n"))
    input_source("text")
    updateSelectInput(session, "selected_var", choices = NULL)
    reset("file_upload")
  })

  observeEvent(input$clear_file, {
    reset("file_upload")
    updateSelectInput(session, "selected_var", choices = NULL)
    input_source("text")
  })

  observeEvent(input$file_upload, {
    if (!is.null(input$file_upload)) {
      updateTextAreaInput(session, "data_input", value = "")
      input_source("file")
    }
  })

  observeEvent(input$data_input, {
    if (input$data_input != "" && input_source() != "text") {
      updateSelectInput(session, "selected_var", choices = NULL)
      reset("file_upload")
      input_source("text")
    }
  })

  file_data <- reactive({
    req(input$file_upload)
    tryCatch({
      vroom(input$file_upload$datapath, delim = NULL, col_names = input$header, show_col_types = FALSE)
    }, error = function(e) {
      showNotification(paste("File read error:", e$message), type = "error")
      return(NULL)
    })
  })

  observe({
    df <- file_data()
    if (!is.null(df)) {
      num_vars <- names(df)[sapply(df, is.numeric)]
      updateSelectInput(session, "selected_var", choices = num_vars)
    }
  })

  output$file_uploaded <- reactive({
    !is.null(input$file_upload)
  })
  outputOptions(output, "file_uploaded", suspendWhenHidden = FALSE)

  data_values <- reactive({
    if (!is.null(input$file_upload) && !is.null(input$selected_var)) {
      df <- file_data()
      return(na.omit(df[[input$selected_var]]))
    }
    req(input$data_input)
    input_lines <- strsplit(input$data_input, "\n")[[1]]
    input_lines <- input_lines[input_lines != ""]
    numeric_values <- suppressWarnings(as.numeric(input_lines))
    if(any(is.na(numeric_values))) return(NULL)
    numeric_values
  })

  get_transformed_data <- reactive({
    req(data_values())
    x <- data_values()

    if (input$transform == "Log") {
      if (any(x <= 0)) return(NULL)
      return(log(x))
    } else if (input$transform == "Square Root") {
      if (any(x < 0)) return(NULL)
      return(sqrt(x))
    } else if (input$transform == "Box-Cox") {
      lambda <- input$lambda
      if (any(x <= 0)) return(NULL)
      if (abs(lambda) < 1e-8) return(log(x))
      return(((x ^ lambda) - 1) / lambda)
    }
    return(x)
  })

  output$histogram <- renderPlot({
    req(input$run_test > 0, get_transformed_data())
    x <- get_transformed_data()
    title_transform <- ifelse(input$transform != "None", paste0(" (", input$transform, " transformed)"), "")

    p <- ggplot(data.frame(x = x), aes(x = x)) +
      geom_histogram(aes(y = after_stat(density)), bins = input$bins,
                     fill = "steelblue", color = "white", alpha = 0.7) +
      labs(title = paste0("Distribution of ", input$selected_var, title_transform),
           x = input$selected_var, y = "Density") +
      theme_minimal() +
      theme(plot.title = element_text(hjust = 0.5, size = 16))

    if (input$density) p <- p + geom_density(color = "darkred", linewidth = 1)
    if (input$normal) p <- p + stat_function(fun = dnorm, args = list(mean = mean(x), sd = sd(x)),
                                             color = "darkgreen", linewidth = 1)
    if (input$rug) p <- p + geom_rug(alpha = 0.5)
    p
  })

  output$qqplot <- renderPlot({
    req(input$run_test > 0, get_transformed_data())
    x <- get_transformed_data()
    ggplot(data.frame(x = x), aes(sample = x)) +
      stat_qq() +
      stat_qq_line(color = "red") +
      labs(title = paste0("Q-Q Plot for ", input$selected_var)) +
      theme_minimal() +
      theme(plot.title = element_text(hjust = 0.5, size = 16))
  })

  output$boxplot <- renderPlot({
    req(input$run_test > 0, get_transformed_data())
    x <- get_transformed_data()
    ggplot(data.frame(x = x), aes(y = x)) +
      geom_boxplot(fill = "steelblue", alpha = 0.7) +
      labs(title = paste0("Boxplot of ", input$selected_var), y = input$selected_var) +
      theme_minimal() +
      theme(plot.title = element_text(hjust = 0.5, size = 16),
            axis.text.x = element_blank(),
            axis.ticks.x = element_blank())
  })

  output$summary_stats <- renderTable({
    x <- get_transformed_data()
    req(x)
    data.frame(
      Statistic = c("Mean", "Median", "Standard Deviation", "Minimum", "Maximum", "Skewness", "Kurtosis"),
      Value = round(c(mean(x), median(x), sd(x), min(x), max(x), skewness(x), kurtosis(x)), 4)
    )
  })

  output$normality_tests <- renderTable({
    x <- get_transformed_data()
    req(x)

    if (length(x) < 3 || sd(x) == 0) {
      return(data.frame(Test = "Not enough variability or too few values", Statistic = NA, P.value = NA))
    }

    tests <- list()

    if (length(x) <= 5000) {
      sw <- shapiro.test(x)
      tests[["Shapiro-Wilk"]] <- c(sw$statistic, sw$p.value)
    } else {
      tests[["Shapiro-Wilk"]] <- c(NA, "Sample too large")
    }

    ks <- ks.test(x, "pnorm", mean = mean(x), sd = sd(x))
    tests[["Kolmogorov-Smirnov"]] <- c(ks$statistic, ks$p.value)

    ad <- nortest::ad.test(x)
    tests[["Anderson-Darling"]] <- c(ad$statistic, ad$p.value)

    df <- do.call(rbind, lapply(names(tests), function(name) {
      stat <- as.numeric(tests[[name]][1])
      pval <- tests[[name]][2]
      pval_str <- if (!is.na(pval) && is.numeric(pval)) {
        if (pval < 0.001) "<0.001" else round(pval, 4)
      } else {
        pval
      }
      data.frame(Test = name, Statistic = round(stat, 4), P.value = pval_str)
    }))

    df
  })
}

shinyApp(ui, server)


Back to top

Reuse

Citation

BibTeX citation:
@online{kassambara2025,
  author = {Kassambara, Alboukadel},
  title = {D-Stat: {Comprehensive} {Normality} {Test} in {R}},
  date = {2025-04-06},
  url = {https://www.datanovia.com/apps/statfusion/analysis/inferential/goodness-fit/normality/index.html},
  langid = {en}
}
For attribution, please cite this work as:
Kassambara, Alboukadel. 2025. “D-Stat: Comprehensive Normality Test in R.” April 6, 2025. https://www.datanovia.com/apps/statfusion/analysis/inferential/goodness-fit/normality/index.html.