flowchartDiagram = {
// Canvas setup - increased width and added padding
const width = 1100;
const height = 550;
const padding = 60; // Add padding on all sides
// Node definitions with coordinates - adjusted with padding and better spacing
const nodes = [
{id: "A", label: "Measurement 1", x: padding + 50, y: 100},
{id: "B", label: "Measurement 2", x: padding + 50, y: 300},
{id: "C", label: "Calculate Difference\nfor Each Pair", x: padding + 250, y: 200},
{id: "D", label: "Ignore Zero\nDifferences", x: padding + 450, y: 200},
{id: "E", label: "Rank Absolute\nDifferences", x: padding + 650, y: 200},
{id: "F", label: "Assign Original\nSigns to Ranks", x: padding + 850, y: 200},
{id: "G", label: "Calculate Sum of\nPositive & Negative\nRanks", x: padding + 850, y: 300},
{id: "H", label: "Determine Test\nStatistic W or V", x: padding + 650, y: 300},
{id: "I", label: "Calculate p-value", x: padding + 450, y: 300},
{id: "J", label: "p-value < 0.05?", x: padding + 250, y: 300, isDecision: true},
{id: "K", label: "Reject null\nhypothesis", x: padding + 130, y: 400},
{id: "L", label: "Retain null\nhypothesis", x: padding + 370, y: 400}
];
// Edge definitions remain the same
const edges = [
{source: "A", target: "C", label: ""},
{source: "B", target: "C", label: ""},
{source: "C", target: "D", label: ""},
{source: "D", target: "E", label: ""},
{source: "E", target: "F", label: ""},
{source: "F", target: "G", label: ""},
{source: "G", target: "H", label: ""},
{source: "H", target: "I", label: ""},
{source: "I", target: "J", label: ""},
{source: "J", target: "K", label: "Yes"},
{source: "J", target: "L", label: "No"}
];
// Create SVG with explicit viewBox to ensure all content is visible
const svg = d3.create("svg")
.attr("width", width)
.attr("height", height)
.attr("viewBox", [0, 0, width, height])
.attr("style", "max-width: 100%; height: auto; font: 16px sans-serif;");
// Add title
svg.append("text")
.attr("x", width / 2)
.attr("y", 30)
.attr("text-anchor", "middle")
.attr("font-size", "20px")
.attr("font-weight", "bold")
.text("Wilcoxon Signed-Rank Test Procedure");
// Define arrow marker
svg.append("defs").append("marker")
.attr("id", "arrowhead")
.attr("viewBox", "0 0 10 10")
.attr("refX", 8)
.attr("refY", 5)
.attr("markerWidth", 8)
.attr("markerHeight", 8)
.attr("orient", "auto")
.append("path")
.attr("d", "M 0 0 L 10 5 L 0 10 z")
.attr("fill", "#666");
// Draw edges with improved path calculation
const edgeLines = svg.selectAll("path.edge")
.data(edges)
.join("path")
.attr("class", "edge")
.attr("d", d => {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
// Calculate connector points
let sourceX, sourceY, targetX, targetY;
// Decision diamond special case
if (source.isDecision) {
if (d.label === "Yes") {
// Going down and left to K
sourceX = source.x - 30;
sourceY = source.y + 20;
targetX = target.x;
targetY = target.y - 25;
// Return path with offset to avoid overlapping
return `M${sourceX},${sourceY} L${sourceX},${(sourceY + targetY)/2} L${targetX},${(sourceY + targetY)/2} L${targetX},${targetY}`;
} else if (d.label === "No") {
// Going down and right to L
sourceX = source.x + 30;
sourceY = source.y + 20;
targetX = target.x;
targetY = target.y - 25;
// Return path with offset to avoid overlapping
return `M${sourceX},${sourceY} L${sourceX},${(sourceY + targetY)/2} L${targetX},${(sourceY + targetY)/2} L${targetX},${targetY}`;
} else {
// Going across
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 70;
targetY = target.y;
}
} else if (target.isDecision) {
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 40;
targetY = target.y;
} else {
// Normal case
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 70;
targetY = target.y;
// If target is below source, adjust
if (target.y > source.y) {
sourceX = source.x;
sourceY = source.y + 25;
targetX = target.x;
targetY = target.y - 25;
}
// If target is above source
else if (target.y < source.y) {
sourceX = source.x;
sourceY = source.y - 25;
targetX = target.x;
targetY = target.y + 25;
}
}
return `M${sourceX},${sourceY} L${targetX},${targetY}`;
})
.attr("stroke", "#666")
.attr("stroke-width", 2)
.attr("fill", "none")
.attr("marker-end", "url(#arrowhead)");
// Add edge labels with better positioning
svg.selectAll(".edgelabel")
.data(edges.filter(d => d.label !== ""))
.join("text")
.attr("class", "edgelabel")
.attr("text-anchor", "middle")
.attr("dominant-baseline", "middle")
.attr("x", d => {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
if (d.label === "Yes") {
return source.x - 50;
} else if (d.label === "No") {
return source.x + 50;
} else if (target.y > source.y) {
// Vertical edge
return source.x;
} else {
// Horizontal edge
return (source.x + target.x) / 2;
}
})
.attr("y", d => {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
if (d.label === "Yes" || d.label === "No") {
return (source.y + target.y) / 2 - 10;
} else if (target.y > source.y) {
// Vertical edge
return (source.y + target.y) / 2;
} else {
// Horizontal edge
return source.y - 10;
}
})
.attr("font-size", "14px")
.attr("fill", "#333")
.text(d => d.label);
// Draw nodes with fixed box sizes
const node = svg.selectAll(".node")
.data(nodes)
.join("g")
.attr("class", "node")
.attr("transform", d => `translate(${d.x},${d.y})`);
// Add node shapes (rectangles or diamonds) with consistent sizing
node.each(function(d) {
const elem = d3.select(this);
if (d.isDecision) {
// Diamond for decision node
elem.append("polygon")
.attr("points", "0,-30 60,0 0,30 -60,0")
.attr("fill", "#f8d56f")
.attr("stroke", "#d4a82e")
.attr("stroke-width", 2);
} else {
// Rectangle for regular node with fixed width
const boxWidth = 140;
elem.append("rect")
.attr("x", -boxWidth/2)
.attr("y", -25)
.attr("width", boxWidth)
.attr("height", 50)
.attr("rx", 5)
.attr("ry", 5)
.attr("fill", d => {
if (d.id === "K" || d.id === "L") return "#f0f0f0";
return "#b3deff";
})
.attr("stroke", d => {
if (d.id === "K" || d.id === "L") return "#999";
return "#4a98e0";
})
.attr("stroke-width", 2);
}
});
// Add node labels with better text wrapping
node.append("text")
.attr("text-anchor", "middle")
.attr("dominant-baseline", "middle")
.attr("font-size", "14px")
.attr("font-weight", d => (d.id === "J" ? "bold" : "normal"))
.attr("fill", "#333")
.each(function(d) {
const lines = d.label.split('\n');
const elem = d3.select(this);
if (lines.length === 1) {
elem.text(d.label);
} else {
lines.forEach((line, i) => {
const lineHeight = 16;
const yOffset = (i - (lines.length - 1) / 2) * lineHeight;
elem.append("tspan")
.attr("x", 0)
.attr("y", yOffset)
.text(line);
});
}
});
// Add interactivity
node.on("mouseover", function(event, d) {
d3.select(this).select("rect, polygon")
.transition()
.duration(200)
.attr("fill", d => d.isDecision ? "#ffc107" : "#7fc9ff");
})
.on("mouseout", function(event, d) {
d3.select(this).select("rect, polygon")
.transition()
.duration(200)
.attr("fill", d => {
if (d.isDecision) return "#f8d56f";
if (d.id === "K" || d.id === "L") return "#f0f0f0";
return "#b3deff";
});
});
return svg.node();
}
Key Takeaways: Wilcoxon Signed-Rank Test
Tip
- Purpose: Non-parametric alternative to the paired t-test for comparing two related samples
- When to use: For paired/before-after data when normality assumptions are violated
- Data requirements: At least 5 pairs recommended; works with ordinal data
- Null hypothesis: No difference between the paired measurements
- Interpretation: If p < 0.05, there is a significant difference between the paired groups
- Advantages: More robust with non-normal data and outliers; analyzes medians rather than means
What is the Wilcoxon Signed-Rank Test?
The Wilcoxon signed-rank test is a non-parametric statistical method used to compare two related samples, matched samples, or repeated measurements on a single sample. It’s the non-parametric alternative to the paired t-test when the data cannot be assumed to be normally distributed.
Tip
When to use the Wilcoxon signed-rank test:
- When analyzing paired or before-after data
- When your data doesn’t follow a normal distribution
- When dealing with ordinal data or ranked measurements
- When your sample size is small and normality cannot be verified
- When your data contains outliers that would skew a paired t-test
This online calculator allows you to quickly perform a Wilcoxon signed-rank test, visualize your data, and interpret the results with confidence.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1300
library(shiny)
library(bslib)
library(ggplot2)
library(bsicons)
library(vroom)
library(shinyjs)
ui <- page_sidebar(
title = "Wilcoxon Signed-Rank Test Calculator",
useShinyjs(), # Enable shinyjs for resetting inputs
sidebar = sidebar(
width = 400,
card(
card_header("Data Input"),
accordion(
accordion_panel(
"Manual Input",
layout_column_wrap(
width = 1/2,
style = css(grid_template_columns = "1fr 1fr"),
textAreaInput("group1_input", "Group 1 [One value per row]", rows = 8,
placeholder = "Paste values here..."),
textAreaInput("group2_input", "Group 2 [One value per row]", rows = 8,
placeholder = "Paste values here...")
),
div(
actionLink("use_example", "Use example data", style = "color:#0275d8;"),
tags$span(bs_icon("file-earmark-text"), style = "margin-left: 5px; color: #0275d8;")
)
),
accordion_panel(
"File Upload",
fileInput("file_upload", "Upload CSV or TXT file:",
accept = c("text/csv", "text/plain", ".csv", ".txt")),
checkboxInput("header", "File has header", TRUE),
conditionalPanel(
condition = "output.file_uploaded",
div(
layout_column_wrap(
width = 1/2,
style = css(grid_template_columns = "1fr 1fr"),
selectInput("group1_var", "Group 1 variable:", choices = NULL),
selectInput("group2_var", "Group 2 variable:", choices = NULL)
),
actionButton("clear_file", "Clear File", class = "btn-danger btn-sm")
)
)
),
id = "input_method",
open = 1
),
# Advanced Options accordion with added Type of Test option
accordion(
accordion_panel(
"Advanced Options",
radioButtons("alternative", tags$strong("Alternative hypothesis:"),
choices = c("Two-sided" = "two.sided",
"Difference < 0" = "less",
"Difference > 0" = "greater"),
selected = "two.sided"),
radioButtons("test_type", tags$strong("Type of Test:"),
choices = c("Default" = "default",
"Exact" = "exact",
"Normal approximation" = "normal",
"Normal approximation with continuity correction" = "correction"),
selected = "default")
),
open = FALSE
),
actionButton("run_test", "Run Test", class = "btn btn-primary")
),
hr(),
card(
card_header("Interpretation"),
card_body(
div(class = "alert alert-info",
tags$ul(
tags$li("The Wilcoxon Signed-Rank Test is a non-parametric alternative to the paired t-test."),
tags$li(tags$b("Null hypothesis:"), " The median difference between pairs is zero."),
tags$li("If p-value < 0.05, there is a significant difference between the paired groups.")
)
)
)
)
),
layout_column_wrap(
width = 1,
card(
card_header("Test Results"),
card_body(
navset_tab(
nav_panel("Results", uiOutput("error_message"), verbatimTextOutput("test_results")),
nav_panel("Explanation", div(style = "font-size: 0.9rem;",
p("The Wilcoxon Signed-Rank Test compares paired data by analyzing the differences between pairs."),
tags$ul(
tags$li("It does not assume normality, making it more robust than the paired t-test."),
tags$li("The test examines both the sign and magnitude of the differences."),
tags$li("A small p-value indicates significant differences between the paired groups.")
)
))
)
)
),
card(
card_header("Visual Assessment"),
card_body(
navset_tab(
nav_panel("Boxplot",
navset_tab(
nav_panel("Plot", plotOutput("boxplot")),
nav_panel("Explanation", div(style = "font-size: 0.9rem;",
p("The boxplot shows the distribution of each group:"),
tags$ul(
tags$li("The box represents the interquartile range (IQR) with the median shown as a line."),
tags$li("Whiskers extend to the smallest and largest values within 1.5 times the IQR."),
tags$li("Points outside the whiskers are potential outliers.")
)
))
)
),
nav_panel("Difference Plot",
navset_tab(
nav_panel("Plot", plotOutput("diffplot")),
nav_panel("Explanation", div(style = "font-size: 0.9rem;",
p("The difference plot shows the distribution of paired differences:"),
tags$ul(
tags$li("The histogram shows the frequency of difference values."),
tags$li("The density curve provides a smoothed representation of the distribution."),
tags$li("The vertical dashed line shows the median difference.")
)
))
)
)
)
)
)
)
)
server <- function(input, output, session) {
# Example data for the two groups
example_data1 <- "8.44\n7.16\n16.94\n9.59\n13.25\n12.94\n11\n5.61\n10.6\n12.81"
example_data2 <- "32.66\n31.66\n33.28\n39.81\n30.29\n25.95\n30.74\n38.4\n26.67\n41.49"
# Track input method
input_method <- reactiveVal("manual")
# Function to clear file inputs
clear_file_inputs <- function() {
updateSelectInput(session, "group1_var", choices = NULL)
updateSelectInput(session, "group2_var", choices = NULL)
reset("file_upload")
}
# Function to clear text inputs
clear_text_inputs <- function() {
updateTextAreaInput(session, "group1_input", value = "")
updateTextAreaInput(session, "group2_input", value = "")
}
# When example data is used, clear file inputs and set text inputs
observeEvent(input$use_example, {
input_method("manual")
clear_file_inputs()
updateTextAreaInput(session, "group1_input", value = example_data1)
updateTextAreaInput(session, "group2_input", value = example_data2)
})
# When file is uploaded, clear text inputs and set file method
observeEvent(input$file_upload, {
if (!is.null(input$file_upload)) {
input_method("file")
clear_text_inputs()
}
})
# When clear file button is clicked, clear file and set manual method
observeEvent(input$clear_file, {
input_method("manual")
clear_file_inputs()
})
# When text inputs change, clear file inputs if they have content
observeEvent(input$group1_input, {
if (!is.null(input$group1_input) && nchar(input$group1_input) > 0) {
input_method("manual")
clear_file_inputs()
}
}, ignoreInit = TRUE)
observeEvent(input$group2_input, {
if (!is.null(input$group2_input) && nchar(input$group2_input) > 0) {
input_method("manual")
clear_file_inputs()
}
}, ignoreInit = TRUE)
file_data <- reactive({
req(input$file_upload)
tryCatch({
vroom::vroom(input$file_upload$datapath, delim = NULL, col_names = input$header, show_col_types = FALSE)
}, error = function(e) {
showNotification(paste("File read error:", e$message), type = "error")
NULL
})
})
observe({
df <- file_data()
if (!is.null(df)) {
num_vars <- names(df)[sapply(df, is.numeric)]
updateSelectInput(session, "group1_var", choices = num_vars)
updateSelectInput(session, "group2_var", choices = num_vars)
}
})
output$file_uploaded <- reactive({
!is.null(input$file_upload)
})
outputOptions(output, "file_uploaded", suspendWhenHidden = FALSE)
# Function to parse text input
parse_text_input <- function(text) {
if (is.null(text) || text == "") return(NULL)
input_lines <- strsplit(text, "\\r?\\n")[[1]]
input_lines <- input_lines[input_lines != ""]
numeric_values <- suppressWarnings(as.numeric(input_lines))
if (all(is.na(numeric_values))) return(NULL)
return(numeric_values)
}
# Create a data frame with paired observations
paired_data <- reactive({
if (input_method() == "file" && !is.null(file_data()) &&
!is.null(input$group1_var) && !is.null(input$group2_var)) {
df <- file_data()
# Create a paired data frame with both variables
paired_df <- data.frame(
group1 = df[[input$group1_var]],
group2 = df[[input$group2_var]]
)
# Remove any rows with NA in either column
return(na.omit(paired_df))
} else {
g1 <- parse_text_input(input$group1_input)
g2 <- parse_text_input(input$group2_input)
if (is.null(g1) || is.null(g2)) return(NULL)
# Match lengths for pairing (use shorter length if needed)
min_length <- min(length(g1), length(g2))
paired_df <- data.frame(
group1 = g1[1:min_length],
group2 = g2[1:min_length]
)
return(na.omit(paired_df))
}
})
# Extract group values from paired data
group1_values <- reactive({
pd <- paired_data()
if (is.null(pd)) return(NULL)
return(pd$group1)
})
group2_values <- reactive({
pd <- paired_data()
if (is.null(pd)) return(NULL)
return(pd$group2)
})
# Validate input data
validate_data <- reactive({
pd <- paired_data()
if (is.null(pd)) {
return("Error: Please check your input. Make sure all values are numeric.")
}
if (nrow(pd) < 5) {
return("Error: At least 5 pairs are recommended for the Wilcoxon signed-rank test.")
}
return(NULL)
})
output$error_message <- renderUI({
error <- validate_data()
if (!is.null(error) && input$run_test > 0) {
div(class = "alert alert-danger", error)
}
})
# Run the Wilcoxon test with the selected test type
test_result <- eventReactive(input$run_test, {
error <- validate_data()
if (!is.null(error)) return(NULL)
# Set exact, correct parameters based on test_type selection
exact <- NULL
correct <- NULL
if (input$test_type == "exact") {
exact <- TRUE
} else if (input$test_type == "normal") {
exact <- FALSE
correct <- FALSE
} else if (input$test_type == "correction") {
exact <- FALSE
correct <- TRUE
}
# default case: leave both as NULL to use R's default behavior
wilcox.test(
group1_values(),
group2_values(),
paired = TRUE,
alternative = input$alternative,
exact = exact,
correct = correct
)
})
# Display test results
output$test_results <- renderPrint({
if (is.null(test_result())) return(NULL)
result <- test_result()
# Use only one alternative input
alt_text <- switch(
input$alternative,
"two.sided" = "different from",
"less" = "less than",
"greater" = "greater than"
)
# Add test type to the output
test_type_text <- switch(
input$test_type,
"default" = "Default method",
"exact" = "Exact test",
"normal" = "Normal approximation",
"correction" = "Normal approximation with continuity correction"
)
cat("Wilcoxon Signed-Rank Test Results:\n")
cat("Method:", test_type_text, "\n\n")
cat("V statistic:", round(result$statistic, 4), "\n")
cat("p-value:", round(result$p.value, 6), "\n\n")
cat("Data Summary:\n")
cat("Group 1 - Median:", median(group1_values()), "\n")
cat("Group 2 - Median:", median(group2_values()), "\n")
cat("Median of differences:", median(group1_values() - group2_values()), "\n\n")
if (result$p.value < 0.05) {
cat("Interpretation: There is a significant difference between the groups (p < 0.05).\n")
cat("The median of Group 1 is significantly", alt_text, "the median of Group 2.\n")
} else {
cat("Interpretation: No significant difference detected between the groups (p ≥ 0.05).\n")
cat("We cannot conclude that the median of Group 1 is", alt_text, "the median of Group 2.\n")
}
})
# Generate boxplot
output$boxplot <- renderPlot({
req(input$run_test > 0, !is.null(test_result()))
result <- test_result()
# Prepare data for ggplot
g1 <- group1_values()
g2 <- group2_values()
df <- data.frame(
Value = c(g1, g2),
Group = factor(rep(c("Group 1", "Group 2"), c(length(g1), length(g2))))
)
ggplot(df, aes(x = Group, y = Value, fill = Group)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_fill_manual(values = c("Group 1" = "#5dade2", "Group 2" = "#ff7f0e")) +
theme_minimal(base_size = 14) +
labs(title = "Comparison of Group Values",
subtitle = paste("Wilcoxon test: p =", format.pval(result$p.value, digits = 3)),
y = "Value") +
theme(legend.position = "none",
plot.subtitle = element_text(face = "italic"))
})
# Generate difference plot
output$diffplot <- renderPlot({
req(input$run_test > 0, !is.null(test_result()))
g1 <- group1_values()
g2 <- group2_values()
diffs <- g1 - g2
# Calculate median difference
median_diff <- median(diffs)
ggplot(data.frame(Difference = diffs), aes(x = Difference)) +
geom_histogram(aes(y = after_stat(density)), bins = min(20, max(5, length(diffs)/2)),
fill = "#5dade2", color = "#1f618d", alpha = 0.7) +
geom_density(color = "#e74c3c", linewidth = 1) +
geom_vline(xintercept = median_diff, color = "black",
linewidth = 1, linetype = "dashed") +
geom_vline(xintercept = 0, color = "#7d3c98",
linewidth = 1, linetype = "dotted") +
annotate("text", x = median_diff, y = 0.01, # higher y for clarity
label = paste("Median =", round(median_diff, 2)),
hjust = -0.2, vjust = -0.5,
fontface = "bold", color = "black") +
labs(title = "Distribution of Paired Differences (Group 1 - Group 2)",
x = "Difference Value",
y = "Density") +
theme_minimal(base_size = 14)
})
}
shinyApp(ui = ui, server = server)
Wilcoxon Signed-Rank Test vs. Paired t-Test
When should you choose the Wilcoxon signed-rank test over the paired t-test?
Feature | Wilcoxon Signed-Rank Test | Paired t-Test |
---|---|---|
Distribution assumptions | No normality assumption | Assumes normally distributed differences |
Measurement level | Works with ordinal data | Requires interval/ratio data |
Sensitivity to outliers | Resistant to outliers | Sensitive to outliers |
What it compares | Median differences | Mean differences |
Power | Slightly less powerful when assumptions are met | More powerful when normality holds |
Small sample sizes | Appropriate for small samples | Less reliable with small non-normal samples |
The Wilcoxon signed-rank test is generally recommended when:
- The distribution of differences is clearly non-normal
- The sample size is too small to rely on the Central Limit Theorem
- The data contains outliers that would unduly influence a t-test
- You’re working with ordinal data rather than continuous measurements
How the Wilcoxon Signed-Rank Test Works
The Wilcoxon signed-rank test works by analyzing the ranks of the absolute differences between paired observations:
Mathematical Procedure
- Calculate the differences between each pair of observations
- Remove pairs with zero difference
- Rank the absolute differences from smallest to largest
- Assign each rank the sign of the original difference
- Calculate the sum of positive ranks (W+) and negative ranks (W-)
- Use the smaller of W+ and W- as the test statistic W
- Compare W to critical values or calculate p-value
Statistical Power and Sample Size Considerations
Important
Statistical Power Tip: While the Wilcoxon signed-rank test is robust to non-normality, it has approximately 95% of the power of the paired t-test when normality assumptions are met. This means you may need a slightly larger sample size to detect the same effect compared to a t-test.
For optimal statistical power: - Aim for at least 15-20 pairs when possible - Consider using G*Power or similar software to calculate the required sample size for your specific scenario - Report effect sizes alongside p-values for more comprehensive interpretation
Example 1: Patient Pain Scores Before and After Treatment
A researcher wants to evaluate if a new pain management therapy reduces pain levels in patients with chronic back pain. Pain is measured on a 0-10 scale for 10 patients before and after a 4-week treatment.
Data:
Patient | Before Treatment | After Treatment |
---|---|---|
1 | 8 | 6 |
2 | 7 | 5 |
3 | 9 | 7 |
4 | 6 | 4 |
5 | 8 | 7 |
6 | 10 | 6 |
7 | 7 | 6 |
8 | 9 | 8 |
9 | 8 | 5 |
10 | 7 | 6 |
Analysis Steps:
- Calculate differences (Before - After): 2, 2, 2, 2, 1, 4, 1, 1, 3, 1
- Rank absolute differences: 1-4 (tied at rank 7.5), 5-8 (tied at rank 3.5), 9 (rank 9), 10 (rank 1)
- Assign signs: All positive
- Sum of positive ranks: W+ = 55, Sum of negative ranks: W- = 0
- Test statistic W = min(W+, W-) = 0
- For n = 10 pairs, W = 0 is significant at p < 0.05 (p = 0.005)
Results:
- V = 55, p = 0.005 - Median before: 8, Median after: 6 - Interpretation: There is a statistically significant reduction in pain scores after treatment (p < 0.05).
How to Report: “Patient pain scores were significantly lower after the 4-week treatment period (Mdn = 6) compared to before treatment (Mdn = 8), V = 55, p = 0.005, indicating the pain management therapy was effective in reducing pain levels.”
Example 2: Student Performance Before and After Study Method
A teacher wants to determine if a new study method improves test scores. 12 students take a pre-test, learn the method, then take a post-test.
Data:
Student | Pre-test Score | Post-test Score |
---|---|---|
1 | 65 | 72 |
2 | 78 | 79 |
3 | 58 | 69 |
4 | 87 | 90 |
5 | 72 | 84 |
6 | 63 | 75 |
7 | 55 | 59 |
8 | 89 | 85 |
9 | 76 | 82 |
10 | 68 | 75 |
11 | 71 | 74 |
12 | 84 | 88 |
Note the interesting aspect: Most scores improved, but Student #8 showed a decrease.
Results:
- V = 65.5, p = 0.018 - Median pre-test: 71.5, Median post-test: 77 - Interpretation: There is a statistically significant improvement in test scores after using the new study method (p < 0.05).
How to Report: “Students performed significantly better on the post-test (Mdn = 77) than on the pre-test (Mdn = 71.5) after learning the new study method, V = 65.5, p = 0.018. These results suggest the new study method effectively improves student performance.”
How to Report Wilcoxon Signed-Rank Test Results
When reporting the results of a Wilcoxon signed-rank test in academic papers or research reports, include the following elements:
[Description of groups] were significantly [higher/lower/different] in [Group 1] (Mdn = [value1])
"[Group 2] (Mdn = [value2]), V = [test statistic], p = [p-value]." compared to
For example:
"Pain levels were significantly lower after treatment (Mdn = 3) than before treatment (Mdn = 7), V = 15, n = 20, p = .003."
Additional information to consider including:
- Effect size (r = Z/√N) where Z is the standardized test statistic - Confidence intervals for the median difference (if available) - Whether the test was one-tailed or two-tailed - Sample size (n pairs)
APA Style Reporting
For APA style papers (7th edition), report the Wilcoxon signed-rank test results as follows:
We conducted a Wilcoxon signed-rank test to examine whether there was a significant difference in [variable] between [condition 1] and [condition 2]. Results indicated that [variable] was
[higher/lower] in [condition with higher median] (Mdn = [value]) compared to
significantly [condition with lower median] (Mdn = [value]), T = [smaller sum of ranks], z = [z-value if available],
[exact p-value], r = [effect size]. p =
Reporting in Tables
When reporting multiple Wilcoxon signed-rank test results in a table, include these columns:
- Variables being compared
- Medians for both conditions
- V or T statistic (depends on software)
- Z statistic (if using normal approximation)
- p-value
- Effect size
- Sample size
Test Your Understanding
- When would you choose the Wilcoxon signed-rank test over a paired t-test?
- When you have independent samples
- When your paired data violates normality assumptions
- When you have a very large sample size
- When you’re comparing more than two groups
- What does the Wilcoxon signed-rank test primarily compare?
- Means of the two groups
- Variances of the two groups
- Medians/distributions of the two groups
- Ranges of the two groups
- A researcher finds V = 28, p = 0.04 when comparing before-after measurements. What can they conclude?
- There is no significant difference between measurements
- There is a significant difference between measurements
- The test was invalid
- More data is needed
- What happens to pairs with zero difference in the Wilcoxon signed-rank test?
- They are assigned a rank of zero
- They are excluded from the analysis
- They are averaged with other values
- They are always favorable to the alternative hypothesis
- The Wilcoxon signed-rank test is best described as:
- A parametric test for independent samples
- A non-parametric test for paired samples
- A parametric test for paired samples
- A non-parametric test for more than two groups
Answers: 1-B, 2-C, 3-B, 4-B, 5-B
Common Questions About the Wilcoxon Signed-Rank Test
What’s the difference between the Wilcoxon signed-rank test and the Wilcoxon rank-sum test?
The Wilcoxon signed-rank test is for paired data (before/after, matched pairs), while the Wilcoxon rank-sum test (also called Mann-Whitney U test) is for independent samples. Use the signed-rank test when your measurements are related.
How large does my sample size need to be?
The Wilcoxon signed-rank test can be used with small samples, but most statisticians recommend at least 5 pairs for reliable results. With more than 20 pairs, the test uses normal approximation, which is very reliable.
How do I report Wilcoxon signed-rank test results in research papers?
Include: the test statistic (V), sample size (n pairs), p-value, and medians of both groups. For example: “Patient pain scores were significantly lower after treatment (Mdn = 3) than before treatment (Mdn = 7), V = 15, n = 20, p = .003.”
How does the test handle zero differences between pairs?
Pairs with zero difference (no change between measurements) are excluded from the analysis. The effective sample size is reduced accordingly. This is one reason why the degrees of freedom may be less than your total number of pairs.
What if my data has ties (same rank values)?
The Wilcoxon test handles ties by averaging the ranks. For example, if the 4th and 5th smallest values are the same, they both receive a rank of 4.5. This adjustment ensures the test remains valid even with tied values.
Can I use the Wilcoxon signed-rank test with more than two groups?
No, the Wilcoxon signed-rank test is designed for comparing only two related groups. For more than two related groups, consider the Friedman test, which is the non-parametric equivalent of repeated measures ANOVA.
Examples of When to Use the Wilcoxon Signed-Rank Test
- Medical research: Comparing pain levels before and after treatment when pain is measured on an ordinal scale
- Psychology: Analyzing mood scores before and after therapy where normality cannot be assumed
- Consumer testing: Comparing preference ratings for two products by the same participants
- Education: Measuring student performance before and after an intervention with small class sizes
- Economics: Analyzing non-normally distributed financial metrics over two time periods
- Sports science: Comparing athletic performance metrics before and after training when data contains outliers
References
- Wilcoxon, F. (1945). Individual comparisons by ranking methods. Biometrics Bulletin, 1(6), 80-83.
- Siegel, S., & Castellan, N. J. (1988). Nonparametric statistics for the behavioral sciences (2nd ed.). McGraw-Hill.
- Woolson, R. F. (2007). Wilcoxon signed-rank test. In Encyclopedia of biostatistics.
- Conover, W. J. (1999). Practical nonparametric statistics (3rd ed.). John Wiley & Sons.
- Rosner, B., Glynn, R. J., & Lee, M. L. T. (2006). The Wilcoxon signed rank test for paired comparisons of clustered data. Biometrics, 62(1), 185-192.
- Hollander, M., Wolfe, D. A., & Chicken, E. (2013). Nonparametric statistical methods (3rd ed.). John Wiley & Sons.
Reuse
Citation
BibTeX citation:
@online{kassambara2025,
author = {Kassambara, Alboukadel},
title = {Wilcoxon {Signed-Rank} {Test} {Calculator} \textbar{}
{Non-Parametric} {Paired} {Analysis}},
date = {2025-04-07},
url = {https://www.datanovia.com/apps/statfusion/analysis/inferential/non-parametric/two-sample/wilcoxon-signed-rank-test-paired.html},
langid = {en}
}
For attribution, please cite this work as:
Kassambara, Alboukadel. 2025. “Wilcoxon Signed-Rank Test
Calculator | Non-Parametric Paired Analysis.” April 7, 2025. https://www.datanovia.com/apps/statfusion/analysis/inferential/non-parametric/two-sample/wilcoxon-signed-rank-test-paired.html.