wilcoxonFlowchart = {
// Canvas setup with width increased by 20%
const width = 1200; //
const height = 400;
const padding = 24; // Slightly increased padding
// Node definitions with adjusted positions - scaled horizontally by ~20%
const nodes = [
{id: "A", label: "Group A Data", x: padding + 60, y: 140},
{id: "B", label: "Group B Data", x: padding + 60, y: 240},
{id: "C", label: "Combined Data", x: padding + 240, y: 190},
{id: "D", label: "Rank all values", x: padding + 400, y: 190},
{id: "E", label: "Calculate rank sums", x: padding + 560, y: 190},
{id: "F", label: "Calculate test\nstatistic W", x: padding + 720, y: 190},
{id: "G", label: "p-value < 0.05?", x: padding + 860, y: 190, isDecision: true},
{id: "H", label: "Reject null\nhypothesis", x: padding + 780, y: 300},
{id: "I", label: "Retain null\nhypothesis", x: padding + 920, y: 300}
];
// Edge definitions remain the same
const edges = [
{source: "A", target: "C", label: ""},
{source: "B", target: "C", label: ""},
{source: "C", target: "D", label: ""},
{source: "D", target: "E", label: ""},
{source: "E", target: "F", label: ""},
{source: "F", target: "G", label: ""},
{source: "G", target: "H", label: "Yes"},
{source: "G", target: "I", label: "No"}
];
// Create SVG with explicit preserveAspectRatio to ensure it fits
const svg = d3.create("svg")
.attr("width", width)
.attr("height", height)
.attr("viewBox", [0, 0, width, height])
.attr("preserveAspectRatio", "xMidYMid meet")
.attr("style", "max-width: 100%; height: auto; font: 14px sans-serif;");
// Add title
svg.append("text")
.attr("x", width / 2)
.attr("y", 30)
.attr("text-anchor", "middle")
.attr("font-size", "20px")
.attr("font-weight", "bold")
.text("Wilcoxon Rank Sum-Test Procedure");
// Define arrow marker
svg.append("defs").append("marker")
.attr("id", "arrowhead")
.attr("viewBox", "0 0 10 10")
.attr("refX", 8)
.attr("refY", 5)
.attr("markerWidth", 8)
.attr("markerHeight", 8)
.attr("orient", "auto")
.append("path")
.attr("d", "M 0 0 L 10 5 L 0 10 z")
.attr("fill", "#666");
// Draw edges
const edgeLines = svg.selectAll("path.edge")
.data(edges)
.join("path")
.attr("class", "edge")
.attr("d", d => {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
// Calculate connector points
let sourceX, sourceY, targetX, targetY;
// Decision diamond special case
if (source.isDecision) {
if (d.label === "Yes") {
// Going down and left
sourceX = source.x - 15;
sourceY = source.y + 25;
targetX = target.x;
targetY = target.y - 25;
return `M${sourceX},${sourceY} L${sourceX},${targetY - 30} L${targetX},${targetY - 30} L${targetX},${targetY}`;
} else if (d.label === "No") {
// Going down and right
sourceX = source.x + 15;
sourceY = source.y + 25;
targetX = target.x;
targetY = target.y - 25;
return `M${sourceX},${sourceY} L${sourceX},${targetY - 30} L${targetX},${targetY - 30} L${targetX},${targetY}`;
}
} else {
// Normal case - horizontal flow (LR)
sourceX = source.x + 60;
sourceY = source.y;
targetX = target.x - 60;
targetY = target.y;
// Special handling for Group A/B to Combined Data
if (source.id === "A" && target.id === "C") {
sourceX = source.x + 60;
sourceY = source.y;
targetX = target.x - 60;
targetY = target.y - 20;
return `M${sourceX},${sourceY} L${(sourceX+targetX)/2},${sourceY} L${(sourceX+targetX)/2},${targetY} L${targetX},${targetY}`;
}
else if (source.id === "B" && target.id === "C") {
sourceX = source.x + 60;
sourceY = source.y;
targetX = target.x - 60;
targetY = target.y + 20;
return `M${sourceX},${sourceY} L${(sourceX+targetX)/2},${sourceY} L${(sourceX+targetX)/2},${targetY} L${targetX},${targetY}`;
}
}
return `M${sourceX},${sourceY} L${targetX},${targetY}`;
})
.attr("stroke", "#666")
.attr("stroke-width", 2)
.attr("fill", "none")
.attr("marker-end", "url(#arrowhead)");
// Draw Yes/No labels with backgrounds
svg.selectAll(".labelBg")
.data(edges.filter(d => d.label === "Yes" || d.label === "No"))
.join("g")
.attr("class", "labelBg")
.each(function(d) {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
const g = d3.select(this);
let x, y;
if (d.label === "Yes") {
x = source.x - 40;
y = target.y - 50;
} else { // No
x = source.x + 40;
y = target.y - 50;
}
// Add white background rectangle
g.append("rect")
.attr("x", x - 14)
.attr("y", y - 12)
.attr("width", 28)
.attr("height", 24)
.attr("rx", 3)
.attr("fill", "white")
.attr("stroke", "#ddd")
.attr("stroke-width", 1);
// Add label text
g.append("text")
.attr("x", x)
.attr("y", y)
.attr("text-anchor", "middle")
.attr("dominant-baseline", "middle")
.attr("font-size", "12px") // Increased from 11px
.attr("font-weight", "bold")
.attr("fill", "#333")
.text(d.label);
});
// Draw nodes with larger boxes
const node = svg.selectAll(".node")
.data(nodes)
.join("g")
.attr("class", "node")
.attr("transform", d => `translate(${d.x},${d.y})`);
// Add node shapes (rectangles or diamonds)
node.each(function(d) {
const elem = d3.select(this);
if (d.isDecision) {
// Diamond for decision node
elem.append("polygon")
.attr("points", "0,-30 40,0 0,30 -40,0")
.attr("fill", "#f8d56f")
.attr("stroke", "#d4a82e")
.attr("stroke-width", 2);
} else {
// Rectangle for regular node (increased width)
const boxWidth = 120; // Increased from 100
elem.append("rect")
.attr("x", -boxWidth/2)
.attr("y", -22) // Slightly taller
.attr("width", boxWidth)
.attr("height", 44) // Slightly taller
.attr("rx", 5)
.attr("ry", 5)
.attr("fill", d => {
if (d.id === "H" || d.id === "I") return "#f0f0f0";
return "#b3deff";
})
.attr("stroke", d => {
if (d.id === "H" || d.id === "I") return "#999";
return "#4a98e0";
})
.attr("stroke-width", 1.5);
}
});
// Add node labels (increased font size)
node.append("text")
.attr("text-anchor", "middle")
.attr("dominant-baseline", "middle")
.attr("font-size", "13px") // Increased from 11px
.attr("font-weight", d => (d.id === "G" ? "bold" : "normal"))
.attr("fill", "#333")
.each(function(d) {
const lines = d.label.split('\n');
const elem = d3.select(this);
if (lines.length === 1) {
elem.text(d.label);
} else {
lines.forEach((line, i) => {
const lineHeight = 14; // Increased from 12
const yOffset = (i - (lines.length - 1) / 2) * lineHeight;
elem.append("tspan")
.attr("x", 0)
.attr("y", yOffset)
.text(line);
});
}
});
// Add interactivity
node.on("mouseover", function(event, d) {
d3.select(this).select("rect, polygon")
.transition()
.duration(200)
.attr("fill", d => d.isDecision ? "#ffc107" : "#7fc9ff");
})
.on("mouseout", function(event, d) {
d3.select(this).select("rect, polygon")
.transition()
.duration(200)
.attr("fill", d => {
if (d.isDecision) return "#f8d56f";
if (d.id === "H" || d.id === "I") return "#f0f0f0";
return "#b3deff";
});
});
return svg.node();
}
Key Takeaways: Wilcoxon Rank-Sum Test
Tip
- Purpose: Non-parametric alternative to the independent t-test for comparing two unrelated groups
- When to use: When data doesn’t meet normality assumptions or contains outliers
- Data requirements: Independent samples, at least 5 observations per group recommended
- Null hypothesis: No difference between the distributions of the two groups
- Interpretation: If p < 0.05, there is a significant difference between the groups
- Advantages: More robust with non-normal data; works with ordinal data; resistant to outliers
What is the Wilcoxon Rank-Sum Test?
The Wilcoxon rank-sum test (also known as the Mann-Whitney U test) is a powerful non-parametric statistical method for comparing two independent groups. Unlike the t-test, it doesn’t assume that your data follows a normal distribution, making it an excellent choice for real-world data analysis.
Tip
When to use the Wilcoxon test instead of a t-test:
- When your data doesn’t follow a normal distribution
- When dealing with small sample sizes
- When analyzing ordinal data or ranked measurements
- When your data contains outliers that would skew parametric tests
This online calculator allows you to quickly perform a Wilcoxon rank-sum test on your data, visualize the results, and interpret the findings with confidence.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1400
library(shiny)
library(bslib)
library(ggplot2)
library(bsicons)
library(vroom)
library(shinyjs)
ui <- page_sidebar(
title = "Wilcoxon Rank-Sum Test (Mann-Whitney U Test)",
useShinyjs(), # Enable shinyjs for resetting inputs
sidebar = sidebar(
width = 400,
card(
card_header("Data Input"),
accordion(
accordion_panel(
"Manual Input",
layout_column_wrap(
width = 1/2,
style = css(grid_template_columns = "1fr 1fr"),
textAreaInput("group_input", "Grouping variable [categorical, One value per row]", rows = 8,
placeholder = "Paste values here (only two levels)..."),
textAreaInput("response_input", "Response variable [numeric, One value per row]", rows = 8,
placeholder = "Paste values here...")
),
div(
actionLink("use_example", "Use example data", style = "color:#0275d8;"),
tags$span(bs_icon("file-earmark-text"), style = "margin-left: 5px; color: #0275d8;")
)
),
accordion_panel(
"File Upload",
fileInput("file_upload", "Upload CSV or TXT file:",
accept = c("text/csv", "text/plain", ".csv", ".txt")),
checkboxInput("header", "File has header", TRUE),
conditionalPanel(
condition = "output.file_uploaded",
div(
layout_column_wrap(
width = 1/2,
style = css(grid_template_columns = "1fr 1fr"),
selectInput("group_var", "Grouping variable:", choices = NULL),
selectInput("response_var", "Response variable:", choices = NULL)
),
actionButton("clear_file", "Clear File", class = "btn-danger btn-sm")
)
)
),
id = "input_method",
open = 1
),
# Advanced Options accordion with simplified options
accordion(
accordion_panel(
"Advanced Options",
radioButtons("alternative", tags$strong("Alternative hypothesis:"),
choices = c("Two-sided" = "two.sided",
"Difference < 0" = "less",
"Difference > 0" = "greater"),
selected = "two.sided"),
radioButtons("test_type", tags$strong("Type of Test:"),
choices = c("Default" = "default",
"Exact" = "exact",
"Normal approximation" = "normal",
"Normal approximation with continuity correction" = "correction"),
selected = "default")
),
open = FALSE
),
actionButton("run_test", "Run Test", class = "btn btn-primary")
),
hr(),
card(
card_header("Interpretation"),
card_body(
div(class = "alert alert-info",
tags$ul(
tags$li("The Wilcoxon Rank-Sum Test (Mann-Whitney U Test) is a non-parametric alternative to the independent t-test."),
tags$li(tags$b("Null hypothesis:"), " The distributions of both groups are identical."),
tags$li(tags$b("Alternative:"), " The distributions differ in location (median)."),
tags$li("If p-value < 0.05, there is a significant difference between the groups.")
)
)
)
)
),
layout_column_wrap(
width = 1,
card(
card_header("Test Results"),
card_body(
navset_tab(
nav_panel("Results",
uiOutput("error_message"),
verbatimTextOutput("test_results")),
nav_panel("Normality Check",
plotOutput("qq_plot"),
verbatimTextOutput("shapiro_test"),
div(class = "alert alert-info mt-3",
"If p < 0.05 in the Shapiro-Wilk test, your data significantly deviates from normality, supporting the use of the Wilcoxon test.")),
nav_panel("Explanation", div(style = "font-size: 0.9rem;",
p("The Wilcoxon Rank-Sum Test compares two independent groups:"),
tags$ul(
tags$li("It does not assume normality, making it more robust than the t-test."),
tags$li("The test ranks all observations from both groups together and compares the sum of ranks."),
tags$li("A small p-value indicates the groups likely come from different distributions.")
),
p("Statistical References:"),
tags$ul(
tags$li("Wilcoxon, F. (1945). Individual comparisons by ranking methods. Biometrics Bulletin, 1(6), 80-83."),
tags$li("Mann, H.B. and Whitney, D.R. (1947). On a test of whether one of two random variables is stochastically larger than the other. The Annals of Mathematical Statistics, 50-60.")
)
))
)
)
),
card(
card_header("Visual Assessment"),
card_body(
navset_tab(
nav_panel("Boxplot",
navset_tab(
nav_panel("Plot", plotOutput("boxplot")),
nav_panel("Explanation", div(style = "font-size: 0.9rem;",
p("The boxplot shows the distribution of each group:"),
tags$ul(
tags$li("The box represents the interquartile range (IQR) with the median shown as a line."),
tags$li("Whiskers extend to the smallest and largest values within 1.5 times the IQR."),
tags$li("Points outside the whiskers are potential outliers.")
)
))
)
),
nav_panel("Density Plot",
navset_tab(
nav_panel("Plot", plotOutput("densityplot")),
nav_panel("Explanation", div(style = "font-size: 0.9rem;",
p("The density plot shows the distribution of each group:"),
tags$ul(
tags$li("The shape shows the probability distribution of values in each group."),
tags$li("The vertical dashed lines show the median of each group."),
tags$li("Overlapping areas indicate where the groups have similar values.")
)
))
)
),
nav_panel("Rank Visualization",
navset_tab(
nav_panel("Plot", plotOutput("rankplot")),
nav_panel("Explanation", div(style = "font-size: 0.9rem;",
p("The rank visualization shows how the Wilcoxon test works:"),
tags$ul(
tags$li("All values from both groups are ranked from lowest to highest."),
tags$li("The ranks are then assigned back to their original groups."),
tags$li("If one group consistently has higher ranks, the test will detect this difference."),
tags$li("This plot helps visualize the rank-based nature of the test.")
)
))
)
)
)
)
)
)
)
server <- function(input, output, session) {
# Example data
example_group <- "GRP1\nGRP1\nGRP1\nGRP1\nGRP1\nGRP1\nGRP1\nGRP1\nGRP1\nGRP1\nGRP2\nGRP2\nGRP2\nGRP2\nGRP2\nGRP2\nGRP2\nGRP2\nGRP2\nGRP2"
example_response <- "8.44\n7.16\n16.94\n9.59\n13.25\n12.94\n11\n5.61\n10.6\n12.81\n33.28\n25.61\n27.22\n30.8\n21.95\n29.15\n27.93\n28.13\n29.77\n33.72"
# Track input method
input_method <- reactiveVal("manual")
# Function to clear file inputs
clear_file_inputs <- function() {
updateSelectInput(session, "group_var", choices = NULL)
updateSelectInput(session, "response_var", choices = NULL)
reset("file_upload")
}
# Function to clear text inputs
clear_text_inputs <- function() {
updateTextAreaInput(session, "group_input", value = "")
updateTextAreaInput(session, "response_input", value = "")
}
# When example data is used, clear file inputs and set text inputs
observeEvent(input$use_example, {
input_method("manual")
clear_file_inputs()
updateTextAreaInput(session, "group_input", value = example_group)
updateTextAreaInput(session, "response_input", value = example_response)
})
# When file is uploaded, clear text inputs and set file method
observeEvent(input$file_upload, {
if (!is.null(input$file_upload)) {
input_method("file")
clear_text_inputs()
# Add a loading indicator
showNotification("Processing file...", type = "message", id = "fileLoading")
}
})
# When clear file button is clicked, clear file and set manual method
observeEvent(input$clear_file, {
input_method("manual")
clear_file_inputs()
})
# When text inputs change, clear file inputs if they have content
observeEvent(input$group_input, {
if (!is.null(input$group_input) && nchar(input$group_input) > 0) {
input_method("manual")
clear_file_inputs()
}
}, ignoreInit = TRUE)
observeEvent(input$response_input, {
if (!is.null(input$response_input) && nchar(input$response_input) > 0) {
input_method("manual")
clear_file_inputs()
}
}, ignoreInit = TRUE)
file_data <- reactive({
req(input$file_upload)
tryCatch({
data <- vroom::vroom(input$file_upload$datapath, delim = NULL, col_names = input$header, show_col_types = FALSE)
removeNotification("fileLoading")
return(data)
}, error = function(e) {
removeNotification("fileLoading")
showNotification(paste("File read error:", e$message), type = "error")
NULL
})
})
observe({
df <- file_data()
if (!is.null(df)) {
# Get variable types
var_types <- sapply(df, function(x) {
if(is.numeric(x)) return("numeric")
else return("categorical")
})
# Identify categorical and numeric variables
cat_vars <- names(df)[var_types == "categorical"]
num_vars <- names(df)[var_types == "numeric"]
# Also include character variables with 2 unique values as potential group variables
for(col in names(df)) {
if(!col %in% cat_vars && !is.numeric(df[[col]])) {
unique_vals <- unique(na.omit(df[[col]]))
if(length(unique_vals) == 2) {
cat_vars <- c(cat_vars, col)
}
}
}
# Update select inputs
updateSelectInput(session, "group_var", choices = cat_vars)
updateSelectInput(session, "response_var", choices = num_vars)
}
})
output$file_uploaded <- reactive({
!is.null(input$file_upload)
})
outputOptions(output, "file_uploaded", suspendWhenHidden = FALSE)
# Function to parse text input for numeric values
parse_numeric_input <- function(text) {
if (is.null(text) || text == "") return(NULL)
input_lines <- strsplit(text, "\\r?\\n")[[1]]
input_lines <- input_lines[input_lines != ""]
numeric_values <- suppressWarnings(as.numeric(input_lines))
return(numeric_values)
}
# Function to parse text input for categorical/grouping values
parse_group_input <- function(text) {
if (is.null(text) || text == "") return(NULL)
input_lines <- strsplit(text, "\\r?\\n")[[1]]
input_lines <- input_lines[input_lines != ""]
return(input_lines)
}
# Create a data frame with the manual input
manual_data <- reactive({
grp <- parse_group_input(input$group_input)
resp <- parse_numeric_input(input$response_input)
if (is.null(grp) || is.null(resp)) return(NULL)
# If lengths are different, truncate to the shorter length
min_length <- min(length(grp), length(resp))
grp <- grp[1:min_length]
resp <- resp[1:min_length]
# Remove any NA values in the numeric response
valid_idx <- !is.na(resp)
if(sum(valid_idx) == 0) return(NULL)
data.frame(
group = grp[valid_idx],
response = resp[valid_idx]
)
})
# Get the data from either manual input or file upload
analysis_data <- reactive({
if(input_method() == "file" && !is.null(file_data()) &&
!is.null(input$group_var) && !is.null(input$response_var)) {
df <- file_data()
result <- data.frame(
group = df[[input$group_var]],
response = df[[input$response_var]]
) |> na.omit()
return(result)
} else {
return(manual_data())
}
})
# Validate the data for analysis
validate_data <- reactive({
data <- analysis_data()
if(is.null(data) || nrow(data) == 0) {
return("Error: Please provide valid input data.")
}
# Check if response values are numeric
if(any(is.na(data$response))) {
return("Error: Response values must be numeric.")
}
# Check that group variable has exactly two levels
unique_groups <- unique(data$group)
if(length(unique_groups) != 2) {
return(paste("Error: Grouping variable must have exactly 2 levels. Found", length(unique_groups), "levels."))
}
# Check minimum sample size per group
group_counts <- table(data$group)
if(any(group_counts < 3)) {
return("Error: Each group should have at least 3 observations for the Wilcoxon test.")
}
# Check if all values in a group are identical
group_values <- split(data$response, data$group)
if(any(sapply(group_values, function(x) length(unique(x)) == 1))) {
return("Warning: One of your groups has identical values for all observations. This may affect the test results.")
}
return(NULL)
})
output$error_message <- renderUI({
error <- validate_data()
if(!is.null(error) && input$run_test > 0) {
div(class = "alert alert-danger", error)
}
})
# Extract values for each group
group_values <- reactive({
data <- analysis_data()
if(is.null(data)) return(NULL)
unique_groups <- unique(data$group)
if(length(unique_groups) != 2) return(NULL)
list(
group1 = data$response[data$group == unique_groups[1]],
group2 = data$response[data$group == unique_groups[2]],
labels = unique_groups
)
})
# Run the Wilcoxon test
test_result <- eventReactive(input$run_test, {
showNotification("Calculating results...", type = "message", id = "calculating")
error <- validate_data()
if(!is.null(error)) {
removeNotification("calculating")
return(NULL)
}
values <- group_values()
if(is.null(values)) {
removeNotification("calculating")
return(NULL)
}
# Set exact, correct parameters based on test_type selection
exact <- NULL
correct <- NULL
if(input$test_type == "exact") {
exact <- TRUE
} else if(input$test_type == "normal") {
exact <- FALSE
correct <- FALSE
} else if(input$test_type == "correction") {
exact <- FALSE
correct <- TRUE
}
# default case: leave both as NULL to use R's default behavior
result <- wilcox.test(
values$group1,
values$group2,
paired = FALSE,
alternative = input$alternative,
exact = exact,
correct = correct
)
# Add group labels to the result
result$group_labels <- values$labels
# Calculate confidence interval for Hodges-Lehmann estimator (median difference)
if(input$alternative == "two.sided") {
conf_int <- wilcox.test(
values$group1,
values$group2,
paired = FALSE,
alternative = "two.sided",
conf.int = TRUE,
conf.level = 0.95
)$conf.int
result$conf_int <- conf_int
}
removeNotification("calculating")
return(result)
})
# Run Shapiro-Wilk test to check for normality
shapiro_result <- eventReactive(input$run_test, {
values <- group_values()
if(is.null(values)) return(NULL)
list(
group1 = shapiro.test(values$group1),
group2 = shapiro.test(values$group2),
labels = values$labels
)
})
# Output for Shapiro-Wilk test
output$shapiro_test <- renderPrint({
req(input$run_test > 0, !is.null(shapiro_result()))
res <- shapiro_result()
if(is.null(res)) return(NULL)
cat("Shapiro-Wilk Normality Test Results:\n\n")
cat(res$labels[1], "group:\n")
cat("W =", round(res$group1$statistic, 4), ", p-value =", round(res$group1$p.value, 6), "\n")
if(res$group1$p.value < 0.05) {
cat("The data significantly deviates from normality.\n\n")
} else {
cat("The data appears to be normally distributed.\n\n")
}
cat(res$labels[2], "group:\n")
cat("W =", round(res$group2$statistic, 4), ", p-value =", round(res$group2$p.value, 6), "\n")
if(res$group2$p.value < 0.05) {
cat("The data significantly deviates from normality.\n\n")
} else {
cat("The data appears to be normally distributed.\n\n")
}
if(res$group1$p.value < 0.05 || res$group2$p.value < 0.05) {
cat("Since at least one group deviates from normality, the Wilcoxon test is appropriate.\n")
} else {
cat("Both groups appear normally distributed; a t-test might also be suitable.\n")
}
})
# QQ plot for checking normality
output$qq_plot <- renderPlot({
req(input$run_test > 0, !is.null(group_values()))
values <- group_values()
if(is.null(values)) return(NULL)
# Create QQ plots for both groups
par(mfrow=c(1,2))
qqnorm(values$group1, main=paste("Q-Q Plot for", values$labels[1]))
qqline(values$group1, col="red")
qqnorm(values$group2, main=paste("Q-Q Plot for", values$labels[2]))
qqline(values$group2, col="red")
})
# Display test results
output$test_results <- renderPrint({
if(is.null(test_result())) return(NULL)
result <- test_result()
values <- group_values()
# Use alternative input for interpretation
alt_text <- switch(
input$alternative,
"two.sided" = "different from",
"less" = "less than",
"greater" = "greater than"
)
# Add test type to the output
test_type_text <- switch(
input$test_type,
"default" = "Default method",
"exact" = "Exact test",
"normal" = "Normal approximation",
"correction" = "Normal approximation with continuity correction"
)
group_labels <- result$group_labels
cat("Wilcoxon Rank-Sum Test Results:\n")
cat("-------------------------------\n")
cat("Method:", test_type_text, "\n\n")
cat("W statistic:", round(result$statistic, 4), "\n")
cat("p-value:", format.pval(result$p.value, digits = 4), "\n")
# Display confidence interval if available
if(!is.null(result$conf_int)) {
cat("\n95% Confidence Interval (Hodges-Lehmann):\n")
cat(round(result$conf_int[1], 4), "to", round(result$conf_int[2], 4), "\n")
}
cat("\nData Summary:\n")
cat(group_labels[1], "- Median:", median(values$group1), ", n =", length(values$group1), "\n")
cat(group_labels[2], "- Median:", median(values$group2), ", n =", length(values$group2), "\n")
cat("Estimated median difference:", median(values$group2) - median(values$group1), "\n\n")
if(result$p.value < 0.05) {
cat("Interpretation: There is a significant difference between the groups (p < 0.05).\n")
cat("The location (median) of", group_labels[1], "is significantly", alt_text, "the location of", group_labels[2], ".\n")
} else {
cat("Interpretation: No significant difference detected between the groups (p ≥ 0.05).\n")
cat("We cannot conclude that the location of", group_labels[1], "is", alt_text, "the location of", group_labels[2], ".\n")
}
})
# Generate boxplot
output$boxplot <- renderPlot({
req(input$run_test > 0, !is.null(test_result()))
values <- group_values()
if(is.null(values)) return(NULL)
result <- test_result()
# Prepare data for ggplot
df <- data.frame(
Value = c(values$group1, values$group2),
Group = factor(rep(values$labels, c(length(values$group1), length(values$group2))))
)
# Create simple boxplot with p-value annotation in subtitle
ggplot(df, aes(x = Group, y = Value, fill = Group)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_fill_manual(values = c("#5dade2", "#ff7f0e")) +
theme_minimal(base_size = 14) +
labs(title = "Comparison of Group Values",
subtitle = paste("Wilcoxon test: p =", format.pval(result$p.value, digits = 3)),
y = "Value") +
theme(legend.position = "none",
plot.subtitle = element_text(face = "italic"))
})
# Generate density plot
output$densityplot <- renderPlot({
req(input$run_test > 0, !is.null(test_result()))
values <- group_values()
if(is.null(values)) return(NULL)
result <- test_result()
# Calculate medians
med1 <- median(values$group1)
med2 <- median(values$group2)
# Find range for x-axis
all_values <- c(values$group1, values$group2)
min_val <- min(all_values)
max_val <- max(all_values)
range_val <- max_val - min_val
x_min <- min_val - range_val * 0.1
x_max <- max_val + range_val * 0.1
df <- data.frame(
Value = all_values,
Group = factor(rep(values$labels, c(length(values$group1), length(values$group2))))
)
# Create the density plot
p <- ggplot(df, aes(x = Value, fill = Group, color = Group)) +
geom_density(alpha = 0.5) +
geom_vline(xintercept = c(med1, med2),
color = c("#5dade2", "#ff7f0e"),
linetype = "dashed",
linewidth = 1) +
scale_fill_manual(values = c("#5dade2", "#ff7f0e")) +
scale_color_manual(values = c("#2874a6", "#d35400")) +
annotate("text", x = med1, y = 0,
label = paste("Median =", round(med1, 2)),
hjust = -0.1, vjust = -1,
color = "#2874a6", fontface = "bold") +
annotate("text", x = med2, y = 0,
label = paste("Median =", round(med2, 2)),
hjust = -0.1, vjust = -2.5,
color = "#d35400", fontface = "bold") +
coord_cartesian(xlim = c(x_min, x_max)) +
labs(title = "Density Distribution by Group",
subtitle = paste("Median difference:", round(med2 - med1, 2)),
x = "Value",
y = "Density") +
theme_minimal(base_size = 14)
# If a confidence interval is available, add shaded area
if(!is.null(result$conf_int)) {
# Get max density value for scaling
max_density <- max(ggplot_build(p)$data[[1]]$density)
# Add confidence interval shading
p <- p + annotate("rect",
xmin = result$conf_int[1],
xmax = result$conf_int[2],
ymin = 0,
ymax = max_density * 0.15,
alpha = 0.2,
fill = "darkred") +
annotate("text",
x = mean(result$conf_int),
y = max_density * 0.17,
label = "95% CI",
color = "darkred",
size = 3)
}
return(p)
})
# Generate rank visualization
output$rankplot <- renderPlot({
req(input$run_test > 0, !is.null(test_result()))
values <- group_values()
if(is.null(values)) return(NULL)
# Combine all values and calculate ranks
all_values <- c(values$group1, values$group2)
all_ranks <- rank(all_values)
# Create dataframe with original values, groups, and ranks
df <- data.frame(
Value = all_values,
Group = factor(rep(values$labels, c(length(values$group1), length(values$group2)))),
Rank = all_ranks
)
# Sort by rank for visualization
df <- df[order(df$Rank), ]
# Add index column for positioning
df$Index <- 1:nrow(df)
# Calculate mean rank per group for annotation
grp1_mean_rank <- mean(df$Rank[df$Group == values$labels[1]])
grp2_mean_rank <- mean(df$Rank[df$Group == values$labels[2]])
# Create the rank visualization
ggplot(df, aes(x = Rank, y = 0.5, color = Group)) +
geom_point(size = 4, alpha = 0.7) +
geom_segment(aes(x = Rank, xend = Rank, y = 0.3, yend = 0.7), linewidth = 1) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "gray50", alpha = 0.5) +
scale_color_manual(values = c("#5dade2", "#ff7f0e")) +
annotate("segment", x = grp1_mean_rank, xend = grp1_mean_rank,
y = 0.1, yend = 0.3, color = "#5dade2", linewidth = 1.2,
arrow = arrow(length = unit(0.3, "cm"))) +
annotate("text", x = grp1_mean_rank, y = 0,
label = paste("Mean rank =", round(grp1_mean_rank, 1)),
color = "#2874a6") +
annotate("segment", x = grp2_mean_rank, xend = grp2_mean_rank,
y = 0.7, yend = 0.9, color = "#ff7f0e", linewidth = 1.2,
arrow = arrow(length = unit(0.3, "cm"))) +
annotate("text", x = grp2_mean_rank, y = 1,
label = paste("Mean rank =", round(grp2_mean_rank, 1)),
color = "#d35400") +
theme_minimal(base_size = 14) +
labs(title = "Rank Distribution of Values",
subtitle = "The Wilcoxon test compares the ranks between groups",
x = "Rank",
y = "") +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
legend.position = "top")
})
}
# Run the application
shinyApp(ui = ui, server = server)
Parametric vs. Non-parametric Tests: When to Use Each
Before diving into the calculator, it’s important to understand when to use non-parametric tests like the Wilcoxon test versus parametric alternatives like the t-test.
Feature | Parametric Tests (e.g., t-test) | Non-parametric Tests (e.g., Wilcoxon) |
---|---|---|
Distribution assumptions | Requires normal distribution | No distributional assumptions |
Sensitivity to outliers | Highly sensitive | Resistant to outliers |
Sample size requirements | Generally needs larger samples | Works well with small samples |
Type of data | Interval or ratio scale | Works with ordinal or ranked data |
Statistical power | Higher power when assumptions are met | Lower power but more robust |
What it compares | Means | Medians or distributions |
How the Wilcoxon Rank-Sum Test Works
The Wilcoxon rank-sum test works by:
- Combining all values from both groups
- Ranking all values from lowest to highest
- Calculating the sum of ranks for each group
- Determining if the rank sums differ more than would be expected by chance
Mathematical Procedure
- Combine samples: Combine observations from both groups while keeping track of which observation came from which group
- Rank observations: Arrange all observations in ascending order and assign ranks (1 for smallest, 2 for second smallest, etc.)
- Handle ties: If multiple observations have the same value, assign them the average of the ranks they would have received
- Calculate rank sums: For each group, sum the ranks of all observations in that group
- Calculate test statistic:
- W = sum of ranks for group 1
- Or U = W - [n₁(n₁+1)]/2 (Mann-Whitney U formulation)
- Calculate p-value:
- For small samples: Use exact distribution
- For large samples: Use normal approximation with
- Mean = n₁(n₁+n₂+1)/2
- Standard Deviation = √(n₁n₂(n₁+n₂+1)/12)
Assumptions of the Wilcoxon Rank-Sum Test
The Wilcoxon test has fewer assumptions than parametric tests:
- Independence: Observations within and between groups are independent
- Similar distributions: The distributions should have similar shapes (though not necessarily normal)
- Ordinal data: The data can be ranked (at minimum ordinal scale)
Statistical Power Considerations
Important
Statistical Power Note: The Wilcoxon rank-sum test has approximately 95% of the power of the t-test when the t-test’s assumptions are perfectly met. However, when those assumptions are violated, the Wilcoxon test often has much greater power.
For optimal statistical power:
- With small samples (n < 20 per group), the Wilcoxon test is often more powerful than the t-test
- With larger samples where normality is questionable, the Wilcoxon test remains a strong choice
- Consider checking your data’s distribution before choosing a test (but don’t use the same data to both select and perform the test)
Example 1: Treatment vs. Control Group
A pharmaceutical company wants to compare a new pain relief medication against a placebo. Pain reduction is measured on a 0-10 scale for 8 patients receiving the medication and 8 receiving a placebo.
Data:
Patient | Treatment Group | Control Group |
---|---|---|
1 | 7 | 3 |
2 | 5 | 2 |
3 | 8 | 1 |
4 | 6 | 4 |
5 | 9 | 2 |
6 | 7 | 3 |
7 | 8 | 2 |
8 | 6 | 1 |
Analysis Steps:
Combine and rank all observations:
Value Group Rank 1 Control 1.5 1 Control 1.5 2 Control 4 2 Control 4 2 Control 4 3 Control 6.5 3 Control 6.5 4 Control 8 5 Treatment 9 6 Treatment 10.5 6 Treatment 10.5 7 Treatment 12.5 7 Treatment 12.5 8 Treatment 14.5 8 Treatment 14.5 9 Treatment 16 Calculate rank sums:
- Treatment group: 9 + 10.5 + 10.5 + 12.5 + 12.5 + 14.5 + 14.5 + 16 = 100
- Control group: 1.5 + 1.5 + 4 + 4 + 4 + 6.5 + 6.5 + 8 = 36
Calculate test statistic:
- W = 100 (sum of ranks for treatment group)
- Expected sum under H₀ = 8(8+8+1)/2 = 68
- The observed sum (100) is much higher than expected (68)
Calculate p-value:
- p = 0.0008 (two-sided)
Results:
- W = 100, p = 0.0008
- Median treatment: 7, Median control: 2
- Interpretation: There is a statistically significant difference in pain reduction between the treatment and control groups (p < 0.05).
How to Report: “Pain reduction was significantly higher in the treatment group (Mdn = 7) compared to the control group (Mdn = 2), W = 100, p = 0.0008, indicating the new medication was effective in reducing pain.”
Example 2: Comparing Two Teaching Methods
An educator wants to compare student performance under two different teaching methods. Students were randomly assigned to either Method A or Method B.
Data:
Student | Method A | Method B |
---|---|---|
1 | 82 | 71 |
2 | 78 | 64 |
3 | 85 | 72 |
4 | 90 | 67 |
5 | 76 | 70 |
6 | 84 | 68 |
7 | 87 | 73 |
8 | 79 | 69 |
9 | 83 | 74 |
10 | 88 | 65 |
Results:
- W = 155, p = 0.0001
- Median Method A: 83.5, Median Method B: 69.5
- Interpretation: There is a statistically significant difference in student performance between the two teaching methods (p < 0.05).
How to Report: “Students taught using Method A performed significantly better (Mdn = 83.5) than those taught using Method B (Mdn = 69.5), W = 155, p = 0.0001, suggesting that Method A is more effective for student learning.”
How to Report Wilcoxon Rank-Sum Test Results
When reporting the results of a Wilcoxon rank-sum test in academic papers or research reports, include the following elements:
[Group 1] showed significantly [higher/lower/different] [variable] (Mdn = [value1])
"[Group 2] (Mdn = [value2]), W = [test statistic], p = [p-value]." compared to
For example:
"The treatment group showed significantly higher pain reduction (Mdn = 7) than the control group (Mdn = 2), W = 100, p = 0.0008."
Additional information to consider including:
- Sample sizes for each group
- Effect size (r = Z/√N where Z is the standardized test statistic and N is the total sample size)
- Confidence intervals for the difference in medians (if available)
- Whether the test was one-tailed or two-tailed
APA Style Reporting
For APA style papers (7th edition), report the Wilcoxon rank-sum test results as follows:
We conducted a Wilcoxon rank-sum test to examine whether there was a significant difference in [variable] between [group 1] and [group 2]. Results indicated that [variable] was
[higher/lower] in [group with higher median] (Mdn = [value]) compared to
significantly [group with lower median] (Mdn = [value]), W = [test statistic], z = [z-value if available],
[exact p-value], r = [effect size]. p =
Reporting in Tables
When reporting multiple Wilcoxon rank-sum test results in a table, include these columns:
- Variables being compared
- Medians for both groups
- W statistic
- Z statistic (if using normal approximation)
- p-value
- Effect size
- Sample sizes
Test Your Understanding
- When is the Wilcoxon rank-sum test most appropriate?
- When comparing paired measurements from the same subjects
- When comparing two independent groups with non-normal data
- When comparing more than two groups
- When testing for equality of variances
- What is the main advantage of the Wilcoxon rank-sum test over the independent t-test?
- It always provides higher statistical power
- It can be used with categorical data
- It doesn’t require the assumption of normality
- It can handle missing values better
- What does the Wilcoxon rank-sum test primarily compare?
- The means of the two groups
- The medians or distributions of the two groups
- The variances of the two groups
- The ranges of the two groups
- A researcher finds W = 42, p = 0.03 when comparing two groups. What can they conclude?
- There is no significant difference between the groups
- There is a significant difference between the groups
- The test is invalid
- More data is needed
- What happens when there are ties in the data during ranking?
- The tied observations are excluded from the analysis
- The tied observations receive the same rank
- The tied observations receive the average of the ranks they would have received
- The test cannot be performed with tied data
Answers: 1-B, 2-C, 3-B, 4-B, 5-C
Common Questions About the Wilcoxon Test
What’s the difference between Wilcoxon rank-sum and Mann-Whitney U tests?
They are mathematically equivalent tests with different formulations. The Wilcoxon rank-sum test uses the sum of ranks as the test statistic, while the Mann-Whitney U test counts the number of pairwise comparisons where one group exceeds the other. The relationship between the statistics is: U = W - [n₁(n₁+1)]/2, where n₁ is the sample size of the first group and W is the Wilcoxon rank sum.
Can I use the Wilcoxon test with very small samples?
Yes! The Wilcoxon test works well for small samples (even n=3 per group), unlike parametric tests which typically require larger samples for reliable results. For very small samples, the test uses exact p-value calculations rather than normal approximations.
What should I report in my results?
For a complete report, include: the test statistic (W), p-value, sample sizes for each group, and the medians of each group. For example: “The treatment group (Median = 15.5, n = 10) showed significantly higher scores than the control group (Median = 8.2, n = 10), W = 89, p = 0.003.”
How do I handle ties in the rankings?
The Wilcoxon test handles ties by assigning the average rank to tied values. For example, if the 4th and 5th smallest values are identical, both receive a rank of 4.5. Modern statistical software (including this calculator) automatically applies the appropriate correction for ties.
What is the null hypothesis for the Wilcoxon rank-sum test?
The null hypothesis is that the distributions of both populations are identical. Another way to express it: the probability of an observation from group A exceeding an observation from group B equals the probability of an observation from group B exceeding an observation from group A.
When should I use the Wilcoxon signed-rank test instead of the Wilcoxon rank-sum test?
Use the Wilcoxon signed-rank test for paired data (before/after measurements or matched subjects), and use the Wilcoxon rank-sum test for independent samples (comparing two separate groups). The signed-rank test analyzes within-subject differences, while the rank-sum test compares between groups.
Examples of When to Use the Wilcoxon Rank-Sum Test
- Clinical trials: Comparing treatment effects when data doesn’t follow normal distribution
- Customer satisfaction: Comparing ratings between two different product versions
- Environmental studies: Comparing pollution levels between two locations
- Educational research: Comparing test scores between two teaching methods
- Financial analysis: Comparing returns from two different investment strategies
- Quality control: Comparing product durability between two manufacturing processes
- Ecological research: Comparing species diversity between two habitats
- Public health: Comparing health outcomes between two demographic groups
References
quarto ren - Wilcoxon, F. (1945). Individual comparisons by ranking methods. Biometrics Bulletin, 1(6), 80-83. - Mann, H.B. and Whitney, D.R. (1947). On a test of whether one of two random variables is stochastically larger than the other. The Annals of Mathematical Statistics, 50-60. - Rosner, B. (2015). Fundamentals of Biostatistics (8th ed.). Cengage Learning. - Lehmann, E. L. (2006). Nonparametrics: Statistical methods based on ranks. Springer. - Fay, M.P. and Proschan, M.A. (2010). Wilcoxon-Mann-Whitney or t-test? On assumptions for hypothesis tests and multiple interpretations of decision rules. Statistics Surveys, 4, 1-39. - Divine, G., Norton, H.J., Hunt, R., and Dienemann, J. (2013). Statistical grand rounds: A review of analysis and sample size calculation considerations for Wilcoxon tests. Anesthesia & Analgesia, 117(3), 699-710.
Reuse
Citation
BibTeX citation:
@online{kassambara2025,
author = {Kassambara, Alboukadel},
title = {Wilcoxon {Rank-Sum} {Test} {(Mann-Whitney} {U} {Test)}
{Calculator}},
date = {2025-04-06},
url = {https://www.datanovia.com/apps/statfusion/analysis/inferential/non-parametric/two-sample/wilcoxon-rank-sum-test-independent.html},
langid = {en}
}
For attribution, please cite this work as:
Kassambara, Alboukadel. 2025. “Wilcoxon Rank-Sum Test
(Mann-Whitney U Test) Calculator.” April 6, 2025. https://www.datanovia.com/apps/statfusion/analysis/inferential/non-parametric/two-sample/wilcoxon-rank-sum-test-independent.html.