// Visual Style
// - Used the same blue rectangles (#b3deff) for process nodes
// - Yellow diamond (#f8d56f) for the decision node
// - Light gray rectangles (#f0f0f0) for outcome nodes
// - Added light green rectangles (#e2f0d9) for data input nodes
//| echo: false
tTestFlowchart = {
// Canvas setup - wider to accommodate horizontal layout
const width = 1300;
const height = 500;
const padding = 60;
// Create SVG with explicit viewBox
const svg = d3.create("svg")
.attr("width", width)
.attr("height", height)
.attr("viewBox", [0, 0, width, height])
.attr("style", "max-width: 100%; height: auto; font: 16px sans-serif;");
// Add title at the bottom like in original
svg.append("text")
.attr("x", width / 2)
.attr("y", height - 20)
.attr("text-anchor", "middle")
.attr("font-size", "20px")
.attr("font-weight", "bold")
.text("T-Test Procedure");
// Define nodes with more horizontal layout similar to mermaid version
const nodes = [
// Top branch - means
{id: "A", label: "Group A Data", x: padding + 100, y: 120},
{id: "B", label: "Group B Data", x: padding + 100, y: 300},
{id: "C", label: "Calculate Mean A", x: padding + 280, y: 120},
{id: "D", label: "Calculate Mean B", x: padding + 280, y: 200},
{id: "E", label: "Find Mean\nDifference", x: padding + 460, y: 160},
// Bottom branch - variances
{id: "F", label: "Calculate\nVariance A", x: padding + 280, y: 260},
{id: "G", label: "Calculate\nVariance B", x: padding + 280, y: 340},
{id: "H", label: "Estimate\nStandard Error", x: padding + 460, y: 300},
// Final common path
{id: "I", label: "Calculate\nt-statistic", x: padding + 640, y: 230},
{id: "J", label: "Determine\np-value", x: padding + 820, y: 230},
{id: "K", label: "p-value < 0.05?", x: padding + 1000, y: 230, isDecision: true},
{id: "L", label: "Reject null\nhypothesis", x: padding + 910, y: 350},
{id: "M", label: "Retain null\nhypothesis", x: padding + 1090, y: 350}
];
// Define edges - IMPORTANT: Draw order matters, so we'll draw paths in specific order
const edges = [
// Mean calculation path
{source: "A", target: "C", label: "", order: 1},
{source: "C", target: "E", label: "", order: 1},
// Variance calculation path
{source: "A", target: "F", label: "", order: 1},
{source: "F", target: "H", label: "", order: 1},
{source: "G", target: "H", label: "", order: 1},
// Final common path
{source: "E", target: "I", label: "", order: 1},
{source: "H", target: "I", label: "", order: 1},
{source: "I", target: "J", label: "", order: 1},
{source: "J", target: "K", label: "", order: 1},
{source: "K", target: "L", label: "Yes", order: 1},
{source: "K", target: "M", label: "No", order: 1},
// Draw these connections last to ensure they appear on top
{source: "B", target: "D", label: "", order: 2},
{source: "D", target: "E", label: "", order: 2},
{source: "B", target: "G", label: "", order: 2}
];
// Define arrow marker
svg.append("defs").append("marker")
.attr("id", "arrowhead")
.attr("viewBox", "0 0 10 10")
.attr("refX", 8)
.attr("refY", 5)
.attr("markerWidth", 8)
.attr("markerHeight", 8)
.attr("orient", "auto")
.append("path")
.attr("d", "M 0 0 L 10 5 L 0 10 z")
.attr("fill", "#666");
// Sort edges by order to control draw sequence
edges.sort((a, b) => a.order - b.order);
// Draw edges with improved path calculation for horizontal layout
const edgeLines = svg.selectAll("path.edge")
.data(edges)
.join("path")
.attr("class", d => `edge order-${d.order}`)
.attr("d", d => {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
// Calculate connector points
let sourceX, sourceY, targetX, targetY;
let path = "";
// Special case for decision diamond
if (source.isDecision) {
if (d.label === "Yes") {
// Going down-left to L
sourceX = source.x - 25;
sourceY = source.y + 20;
targetX = target.x;
targetY = target.y - 25;
path = `M${sourceX},${sourceY} L${sourceX},${(sourceY + targetY)/2} L${targetX},${(sourceY + targetY)/2} L${targetX},${targetY}`;
} else if (d.label === "No") {
// Going down-right to M
sourceX = source.x + 25;
sourceY = source.y + 20;
targetX = target.x;
targetY = target.y - 25;
path = `M${sourceX},${sourceY} L${sourceX},${(sourceY + targetY)/2} L${targetX},${(sourceY + targetY)/2} L${targetX},${targetY}`;
}
}
// Special case for B to D connection that needs to route around F
else if (source.id === "B" && target.id === "D") {
// Avoid overlapping with Calculate Variance A
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 70;
targetY = target.y;
// Create path that goes around node F
path = `M${sourceX},${sourceY} L${sourceX + 30},${sourceY} L${sourceX + 30},${sourceY - 40} L${targetX - 30},${sourceY - 40} L${targetX - 30},${targetY} L${targetX},${targetY}`;
}
// Special cases for diagonal flows
else if ((source.id === "C" && target.id === "E") ||
(source.id === "D" && target.id === "E")) {
// Mean calculations to mean difference
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 70;
targetY = target.y;
// Create curved paths
const midX = (sourceX + targetX) / 2;
path = `M${sourceX},${sourceY} C${midX},${sourceY} ${midX},${targetY} ${targetX},${targetY}`;
}
else if ((source.id === "F" && target.id === "H") ||
(source.id === "G" && target.id === "H")) {
// Variance calculations to standard error
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 70;
targetY = target.y;
// Create curved paths
const midX = (sourceX + targetX) / 2;
path = `M${sourceX},${sourceY} C${midX},${sourceY} ${midX},${targetY} ${targetX},${targetY}`;
}
else if ((source.id === "E" && target.id === "I") ||
(source.id === "H" && target.id === "I")) {
// Mean difference and standard error to t-statistic
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 70;
targetY = target.y;
// Create curved paths
const midX = (sourceX + targetX) / 2;
path = `M${sourceX},${sourceY} C${midX},${sourceY} ${midX},${targetY} ${targetX},${targetY}`;
}
else if (source.id === "A" && target.id === "F") {
// Group A Data to Variance A (diagonal down)
sourceX = source.x + 30;
sourceY = source.y + 25;
targetX = target.x - 70;
targetY = target.y;
// Create angled path
path = `M${sourceX},${sourceY} L${(sourceX + targetX)/2},${(sourceY + targetY)/2} L${targetX},${targetY}`;
}
else if (source.id === "B" && target.id === "G") {
// B to G connection
sourceX = source.x + 30;
sourceY = source.y + 25;
targetX = target.x - 70;
targetY = target.y;
// Create angled path
path = `M${sourceX},${sourceY} L${(sourceX + targetX)/2},${(sourceY + targetY)/2} L${targetX},${targetY}`;
}
else if (target.y > source.y + 30) {
// General case: Vertical flow down
sourceX = source.x;
sourceY = source.y + 25;
targetX = target.x;
targetY = target.y - 25;
path = `M${sourceX},${sourceY} L${sourceX},${(sourceY + targetY)/2} L${targetX},${(sourceY + targetY)/2} L${targetX},${targetY}`;
}
else if (target.y < source.y - 30) {
// General case: Vertical flow up
sourceX = source.x;
sourceY = source.y - 25;
targetX = target.x;
targetY = target.y + 25;
path = `M${sourceX},${sourceY} L${sourceX},${(sourceY + targetY)/2} L${targetX},${(sourceY + targetY)/2} L${targetX},${targetY}`;
}
else {
// Horizontal flow (default)
sourceX = source.x + 70;
sourceY = source.y;
targetX = target.x - 70;
targetY = target.y;
path = `M${sourceX},${sourceY} L${targetX},${targetY}`;
}
return path;
})
.attr("stroke", "#666")
.attr("stroke-width", 2)
.attr("fill", "none")
.attr("marker-end", "url(#arrowhead)");
// Add edge labels with better positioning
svg.selectAll(".edgelabel")
.data(edges.filter(d => d.label !== ""))
.join("text")
.attr("class", "edgelabel")
.attr("text-anchor", "middle")
.attr("dominant-baseline", "middle")
.attr("x", d => {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
if (d.label === "Yes") {
return (source.x + target.x) / 2 - 30;
} else if (d.label === "No") {
return (source.x + target.x) / 2 + 30;
} else {
return (source.x + target.x) / 2;
}
})
.attr("y", d => {
const source = nodes.find(n => n.id === d.source);
const target = nodes.find(n => n.id === d.target);
if (d.label === "Yes" || d.label === "No") {
return (source.y + target.y) / 2 - 10;
} else {
return source.y - 10;
}
})
.attr("font-size", "14px")
.attr("font-weight", "bold")
.attr("fill", d => d.label === "Yes" ? "#5a9bd5" : (d.label === "No" ? "#ff9052" : "#333"))
.text(d => d.label);
// Draw nodes with fixed box sizes - after drawing paths to ensure nodes appear on top
const node = svg.selectAll(".node")
.data(nodes)
.join("g")
.attr("class", "node")
.attr("transform", d => `translate(${d.x},${d.y})`);
// Add node shapes (rectangles or diamonds) with consistent sizing
node.each(function(d) {
const elem = d3.select(this);
if (d.isDecision) {
// Diamond for decision node
elem.append("polygon")
.attr("points", "0,-30 60,0 0,30 -60,0")
.attr("fill", "#f8d56f")
.attr("stroke", "#d4a82e")
.attr("stroke-width", 2);
} else {
// Rectangle for regular node with fixed width
const boxWidth = 140;
elem.append("rect")
.attr("x", -boxWidth/2)
.attr("y", -25)
.attr("width", boxWidth)
.attr("height", 50)
.attr("rx", 5)
.attr("ry", 5)
.attr("fill", d => {
if (d.id === "L" || d.id === "M") return "#f0f0f0";
if (d.id === "A" || d.id === "B") return "#e2f0d9"; // Light green for data inputs
return "#b3deff";
})
.attr("stroke", d => {
if (d.id === "L" || d.id === "M") return "#999";
if (d.id === "A" || d.id === "B") return "#70ad47"; // Green border for data inputs
return "#4a98e0";
})
.attr("stroke-width", 2);
}
});
// Add node labels with better text wrapping
node.append("text")
.attr("text-anchor", "middle")
.attr("dominant-baseline", "middle")
.attr("font-size", "14px")
.attr("font-weight", d => (d.id === "K" ? "bold" : "normal"))
.attr("fill", "#333")
.each(function(d) {
const lines = d.label.split('\n');
const elem = d3.select(this);
if (lines.length === 1) {
elem.text(d.label);
} else {
lines.forEach((line, i) => {
const lineHeight = 16;
const yOffset = (i - (lines.length - 1) / 2) * lineHeight;
elem.append("tspan")
.attr("x", 0)
.attr("y", yOffset)
.text(line);
});
}
});
// Add interactivity
node.on("mouseover", function(event, d) {
d3.select(this).select("rect, polygon")
.transition()
.duration(200)
.attr("fill", d => {
if (d.isDecision) return "#ffc107";
if (d.id === "A" || d.id === "B") return "#b8e986"; // Brighter green on hover
if (d.id === "L" || d.id === "M") return "#e6e6e6";
return "#7fc9ff";
});
})
.on("mouseout", function(event, d) {
d3.select(this).select("rect, polygon")
.transition()
.duration(200)
.attr("fill", d => {
if (d.isDecision) return "#f8d56f";
if (d.id === "L" || d.id === "M") return "#f0f0f0";
if (d.id === "A" || d.id === "B") return "#e2f0d9";
return "#b3deff";
});
});
return svg.node();
}Key Takeaways: Independent Samples t-Test
Tip
- Purpose: Compare means between two unrelated/independent groups
- When to use: For continuous data when comparing two separate groups
- Assumptions: Independence, normality of distributions, homogeneity of variances (for Student’s t-test)
- Variations: Student’s t-test (equal variances) and Welch’s t-test (unequal variances)
- Null hypothesis: The two population means are equal (\(H_0: \mu_1 = \mu_2\))
- Interpretation: If p < 0.05, there is a significant difference between the group means
- Recommended default: Welch’s t-test (more robust when variances differ)
What is the Independent Samples t-Test?
The independent samples t-test (also called two-sample t-test) is a statistical method used to compare the means of two unrelated groups to determine if there is a significant difference between them. It is one of the most commonly used statistical tests in research, particularly in fields like psychology, medicine, and education.
Tip
When to use the independent samples t-test:
- When comparing means between two separate/unrelated groups
- When your data is measured on a continuous scale
- When your samples are drawn from normally distributed populations
- When you need to determine if observed differences are statistically significant
This online calculator allows you to quickly perform an independent samples t-test, check its assumptions, and visualize your data with clear explanations of the results.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1400
# Enhanced Independent t-Test Module
# This implementation demonstrates best practices for StatFusion modules
# Required libraries
library(shiny)
library(bslib)
library(ggplot2)
library(dplyr)
library(bsicons)
library(shinyjs)
# Module UI function
independentTTestUI <- function(id) {
ns <- NS(id)
page_sidebar(
title = "Independent Samples t-Test Calculator",
sidebar = sidebar(
width = 425,
# Data input card
card(
card_header("Data Input"),
# Fix: Replace nav_panel_tabset with navset_tab_card or navset_tab
navset_tab(
nav_panel(
"Manual Input",
layout_column_wrap(
width = 1/2,
textAreaInput(ns("group_input"), "Grouping variable", rows = 8,
placeholder = "Paste values here (only two levels)..."),
textAreaInput(ns("response_input"), "Response variable", rows = 8,
placeholder = "Paste values here...")
),
actionLink(ns("use_example"), "Use example data"),
hr(),
helpText("Enter one value per line. Group labels should match exactly.")
),
nav_panel(
"File Upload",
fileInput(ns("file_upload"), "Upload CSV or TXT file:",
accept = c("text/csv", "text/plain", ".csv", ".txt")),
checkboxInput(ns("header"), "File has header", TRUE),
conditionalPanel(
condition = paste0("output['", ns("file_uploaded"), "']"),
selectInput(ns("group_var"), "Grouping variable:", choices = NULL),
selectInput(ns("response_var"), "Response variable:", choices = NULL)
)
),
nav_panel(
"Sample Data",
selectInput(ns("sample_dataset"), "Choose a sample dataset:",
choices = c(
"Drug Trial" = "drug_trial",
"Teaching Methods" = "teaching_methods",
"Weight Loss" = "weight_loss"
)),
verbatimTextOutput(ns("sample_data_preview")),
actionButton(ns("use_sample"), "Use this dataset", class = "btn-info")
)
)
),
# Advanced options card
card(
card_header("Analysis Options"),
accordion(
accordion_panel(
"Basic Options",
radioButtons(ns("alternative"), "Alternative hypothesis:",
choices = c("Two-sided" = "two.sided",
"Group 1 < Group 2" = "less",
"Group 1 > Group 2" = "greater"),
selected = "two.sided"),
numericInput(ns("conf_level"), "Confidence level:",
value = 0.95, min = 0.5, max = 0.99, step = 0.01),
open = FALSE
),
accordion_panel(
"Advanced Options",
radioButtons(ns("var_equal"), "Variance assumption:",
choices = c(
"Assume equal variances (Student's t)" = "TRUE",
"Allow unequal variances (Welch's t)" = "FALSE"
),
selected = "FALSE"),
checkboxInput(ns("auto_method"), "Automatically select method based on Levene's test", TRUE),
open = FALSE
),
accordion_panel(
"Visualization Options",
checkboxGroupInput(ns("plot_options"), "Show plots:",
choices = c(
"Mean plot with CI" = "mean_plot",
"Boxplot" = "boxplot",
"Density distributions" = "density",
"Normal Q-Q plots" = "qq"
),
selected = c("mean_plot", "boxplot"))
),
open = FALSE
),
actionButton(ns("run_test"), "Run Analysis", class = "btn btn-primary w-100")
),
# Interpretation guide - helps users understand results
card(
card_header("Quick Guide"),
card_body(
div(
h5("Interpreting p-values:"),
tags$ul(
tags$li("p < 0.05: Statistically significant difference between groups"),
tags$li("p ≥ 0.05: Not enough evidence of a difference")
),
h5("Effect size (Cohen's d):"),
tags$ul(
tags$li("~0.2: Small effect"),
tags$li("~0.5: Medium effect"),
tags$li("~0.8: Large effect")
),
actionLink(ns("learn_more"), "Learn more about t-tests")
)
)
)
),
# Main content area with tabbed layout
navset_card_tab(
height = 800,
nav_panel(
"Results",
conditionalPanel(
condition = paste0("input['", ns("run_test"), "'] == 0"),
div(
class = "text-center p-5",
icon("chart-bar", lib = "font-awesome", class = " fa-4x text-muted mb-3"),
h3("Analysis Results Will Appear Here", class = "text-muted"),
p("Configure your analysis options and click 'Run Analysis'", class = "text-muted")
)
),
conditionalPanel(
condition = paste0("input['", ns("run_test"), "'] > 0"),
navset_pill(
nav_panel(
"Summary",
uiOutput(ns("error_message")),
div(
class = "row",
div(
class = "col-md-7",
card(
card_header("Statistical Results"),
card_body(verbatimTextOutput(ns("test_results")))
)
),
div(
class = "col-md-5",
card(
card_header("Key Statistics"),
card_body(uiOutput(ns("key_stats")))
)
)
),
card(
class = "mt-3",
card_header("Visualization"),
card_body(plotOutput(ns("main_plot"), height = "400px"))
)
),
nav_panel(
"Detailed Results",
fluidRow(
column(
width = 12,
h4("Group Statistics"),
tableOutput(ns("group_stats")),
hr(),
h4("Test Results"),
verbatimTextOutput(ns("detailed_results"))
)
)
),
nav_panel(
"Assumptions",
card(
card_header("Normality Check"),
card_body(
plotOutput(ns("qq_plot"), height = "250px"),
verbatimTextOutput(ns("shapiro_test")),
div(
class = "alert alert-info mt-3",
"Normality is assessed using Shapiro-Wilk test and Q-Q plots. If p < 0.05, the data significantly deviates from normality."
)
)
),
card(
class = "mt-3",
card_header("Homogeneity of Variance"),
card_body(
verbatimTextOutput(ns("levene_test")),
div(
class = "alert alert-info mt-3",
"Levene's test checks if the variances in both groups are similar. If p < 0.05, variances are significantly different - use Welch's t-test."
)
)
)
),
nav_panel(
"Visualizations",
fluidRow(
column(
width = 6,
card(
card_header("Means with Confidence Intervals"),
card_body(plotOutput(ns("means_plot"), height = "250px"))
)
),
column(
width = 6,
card(
card_header("Boxplot Comparison"),
card_body(plotOutput(ns("boxplot"), height = "250px"))
)
)
),
fluidRow(
class = "mt-3",
column(
width = 12,
card(
card_header("Density Distribution"),
card_body(plotOutput(ns("density_plot"), height = "250px"))
)
)
)
),
nav_panel(
"Report",
card(
card_header("Results for Reporting"),
card_body(
h4("APA Style Report"),
verbatimTextOutput(ns("apa_report")),
hr(),
h4("Statistical Table"),
tableOutput(ns("results_table")),
hr(),
h4("Citation Suggestion"),
verbatimTextOutput(ns("citation_text")),
hr(),
downloadButton(ns("download_report"), "Download Complete Report")
)
)
)
)
)
),
nav_panel(
"Help & Learning",
navset_pill(
nav_panel(
"About This Test",
div(
h3("The Independent Samples t-Test"),
p("The independent samples t-test is used to compare means between two unrelated groups."),
h4("When to Use This Test"),
tags$ul(
tags$li("When comparing two separate groups"),
tags$li("When your data is measured on a continuous scale"),
tags$li("When you want to determine if observed differences are statistically significant")
),
h4("Key Assumptions"),
tags$ol(
tags$li(strong("Independence:"), " Observations in each group are independent from each other"),
tags$li(strong("Normality:"), " Data in each group should be approximately normally distributed"),
tags$li(strong("Homogeneity of variance:"), " Both groups should have similar variances (for Student's t-test)")
),
h4("Mathematical Formula"),
withMathJax(
"$$t = \\frac{\\bar{X}_1 - \\bar{X}_2}{\\sqrt{\\frac{s_1^2}{n_1} + \\frac{s_2^2}{n_2}}}$$"
),
p("Where:"),
tags$ul(
tags$li("\\(\\bar{X}_1\\) and \\(\\bar{X}_2\\) are the means of the two groups"),
tags$li("\\(s_1^2\\) and \\(s_2^2\\) are the variances of the two groups"),
tags$li("\\(n_1\\) and \\(n_2\\) are the sample sizes of the two groups")
)
)
),
nav_panel(
"Step-by-Step Guide",
div(
h3("How to Perform an Independent Samples t-Test"),
div(
class = "card mb-3",
div(class = "card-header bg-primary text-white", "Step 1: Enter Your Data"),
div(
class = "card-body",
p("Enter your data using one of the three methods:"),
tags$ol(
tags$li("Manual input: Paste your values in the text areas"),
tags$li("File upload: Upload a CSV or TXT file"),
tags$li("Sample data: Use one of our prepared examples")
)
)
),
div(
class = "card mb-3",
div(class = "card-header bg-primary text-white", "Step 2: Configure Options"),
div(
class = "card-body",
p("Set your analysis preferences:"),
tags$ul(
tags$li("Choose your alternative hypothesis (two-sided or one-sided)"),
tags$li("Set confidence level (typically 0.95)"),
tags$li("Decide on variance assumptions or let the test auto-detect")
)
)
),
div(
class = "card mb-3",
div(class = "card-header bg-primary text-white", "Step 3: Run and Interpret"),
div(
class = "card-body",
p("Review your results:"),
tags$ol(
tags$li("Check assumption tests first (normality and homogeneity of variance)"),
tags$li("Examine the p-value to determine statistical significance"),
tags$li("Look at the effect size to understand the magnitude of the difference"),
tags$li("Use the visualizations to understand the data distribution")
)
)
)
)
),
nav_panel(
"Examples",
div(
h3("Example 1: Comparing Drug Treatment Efficacy"),
p("A researcher wants to test if a new medication affects cognitive performance. They randomly assign 20 participants to either a treatment group or a control group."),
div(
class = "card mb-3",
div(class = "card-header", "Data Summary"),
div(
class = "card-body",
tableOutput(ns("example1_table"))
)
),
p("In this example, the researcher would:"),
tags$ol(
tags$li("Enter the data for both groups"),
tags$li("Run a t-test with default parameters"),
tags$li("Check the assumptions (normality and homogeneity of variance)"),
tags$li("Interpret the results (they would find p < 0.001, indicating a significant effect)")
),
actionButton(ns("load_example1"), "Load This Example", class = "btn-sm btn-info")
)
),
nav_panel(
"FAQ",
div(
h3("Frequently Asked Questions"),
div(
class = "accordion",
div(
class = "accordion-item",
h2(class = "accordion-header", tags$button(
"When should I use an independent samples t-test versus a paired t-test?",
class = "accordion-button",
`data-bs-toggle` = "collapse",
`data-bs-target` = "#faq1"
)),
div(
id = "faq1",
class = "accordion-collapse collapse show",
div(
class = "accordion-body",
p("Use an independent samples t-test when comparing two separate, unrelated groups (e.g., treatment vs. control). Use a paired t-test when comparing two related measurements (e.g., before vs. after treatment on the same subjects).")
)
)
),
div(
class = "accordion-item",
h2(class = "accordion-header", tags$button(
"What if my data isn't normally distributed?",
class = "accordion-button collapsed",
`data-bs-toggle` = "collapse",
`data-bs-target` = "#faq2"
)),
div(
id = "faq2",
class = "accordion-collapse collapse",
div(
class = "accordion-body",
p("If your sample size is large (n > 30 per group), the t-test is generally robust to violations of normality due to the Central Limit Theorem. For smaller samples with non-normal data, consider using a non-parametric alternative like the Mann-Whitney U test.")
)
)
),
div(
class = "accordion-item",
h2(class = "accordion-header", tags$button(
"Why is Welch's t-test often recommended over Student's t-test?",
class = "accordion-button collapsed",
`data-bs-toggle` = "collapse",
`data-bs-target` = "#faq3"
)),
div(
id = "faq3",
class = "accordion-collapse collapse",
div(
class = "accordion-body",
p("Welch's t-test doesn't assume equal variances between groups, making it more robust when this assumption is violated. Research has shown that Welch's t-test maintains good control of Type I error rates while providing adequate statistical power, even when variances are equal. Therefore, many statisticians recommend it as the default choice for independent samples comparisons.")
)
)
)
)
)
)
)
)
)
)
}
# Module server function
# Module server function for Independent t-Test
independentTTestServer <- function(id) {
moduleServer(
id,
function(input, output, session) {
# Create reactive values for storing data and state
values <- reactiveValues(
data = NULL,
test_result = NULL,
levene_result = NULL,
shapiro_result = NULL,
input_method = "manual",
data_loaded = FALSE
)
# Example datasets
sample_datasets <- list(
drug_trial = data.frame(
group = rep(c("Control", "Treatment"), each = 10),
response = c(5.2, 6.1, 5.8, 5.5, 5.9, 6.2, 5.7, 6.0, 5.6, 5.8,
7.1, 7.5, 6.9, 7.2, 7.0, 7.3, 6.8, 7.4, 7.1, 6.9)
),
teaching_methods = data.frame(
group = rep(c("Method A", "Method B"), each = 12),
response = c(78, 82, 76, 84, 80, 79, 83, 81, 77, 84, 78, 82,
68, 72, 65, 74, 69, 71, 67, 70, 68, 72, 71, 69)
),
weight_loss = data.frame(
group = rep(c("Diet Only", "Diet + Exercise"), each = 15),
response = c(3.2, 2.8, 3.5, 2.9, 3.1, 2.7, 3.0, 3.3, 2.9, 3.2, 2.8, 3.1, 2.9, 3.2, 3.0,
5.1, 4.8, 5.3, 4.9, 5.2, 4.7, 5.0, 5.4, 4.8, 5.1, 4.9, 5.2, 5.0, 4.8, 5.3)
)
)
# Show sample data preview
output$sample_data_preview <- renderPrint({
req(input$sample_dataset)
if (input$sample_dataset %in% names(sample_datasets)) {
head(sample_datasets[[input$sample_dataset]], 10)
}
})
# Example table for the drug trial example
output$example1_table <- renderTable({
data.frame(
Group = c("Treatment", "Control"),
Mean = c(7.13, 5.88),
SD = c(0.23, 0.24),
N = c(10, 10)
)
})
# Load example data for example 1
observeEvent(input$load_example1, {
values$input_method <- "sample"
values$data <- sample_datasets$drug_trial
values$data_loaded <- TRUE
# Update the UI to reflect the loaded data
updateSelectInput(session, "sample_dataset", selected = "drug_trial")
})
# Use sample dataset when requested
observeEvent(input$use_sample, {
req(input$sample_dataset)
values$input_method <- "sample"
values$data <- sample_datasets[[input$sample_dataset]]
values$data_loaded <- TRUE
})
# Load example data when "Use example data" link is clicked
observeEvent(input$use_example, {
values$input_method <- "manual"
updateTextAreaInput(session, "group_input",
value = paste(sample_datasets$drug_trial$group, collapse = "\n"))
updateTextAreaInput(session, "response_input",
value = paste(sample_datasets$drug_trial$response, collapse = "\n"))
values$data_loaded <- FALSE
})
# Function to parse text input for grouping variable
parse_group_input <- function(text) {
if (is.null(text) || text == "") return(NULL)
input_lines <- strsplit(text, "\\r?\\n")[[1]]
input_lines <- input_lines[input_lines != ""]
return(input_lines)
}
# Function to parse text input for response variable
parse_response_input <- function(text) {
if (is.null(text) || text == "") return(NULL)
input_lines <- strsplit(text, "\\r?\\n")[[1]]
input_lines <- input_lines[input_lines != ""]
numeric_values <- suppressWarnings(as.numeric(input_lines))
return(numeric_values)
}
# Process manual input data
manual_data <- reactive({
req(input$group_input, input$response_input)
groups <- parse_group_input(input$group_input)
responses <- parse_response_input(input$response_input)
if (is.null(groups) || is.null(responses)) return(NULL)
# If lengths are different, truncate to the shorter length
min_length <- min(length(groups), length(responses))
groups <- groups[1:min_length]
responses <- responses[1:min_length]
# Remove any NA values in the numeric response
valid_idx <- !is.na(responses)
if (sum(valid_idx) == 0) return(NULL)
data.frame(
group = groups[valid_idx],
response = responses[valid_idx]
)
})
# Process file upload
observeEvent(input$file_upload, {
req(input$file_upload)
values$input_method <- "file"
# Show loading message
showNotification("Processing file...", type = "message", id = "fileLoading")
# Try to read the file
tryCatch({
if (grepl("\\.csv$", input$file_upload$name, ignore.case = TRUE)) {
data <- read.csv(input$file_upload$datapath, header = input$header)
} else {
data <- read.table(input$file_upload$datapath, header = input$header)
}
# Check if data was read successfully
if (ncol(data) < 2) {
showNotification("File must contain at least 2 columns.", type = "error")
return(NULL)
}
# Update select inputs for column selection
updateSelectInput(session, "group_var",
choices = names(data),
selected = names(data)[1])
updateSelectInput(session, "response_var",
choices = names(data),
selected = names(data)[2])
removeNotification("fileLoading")
values$file_data <- data
}, error = function(e) {
removeNotification("fileLoading")
showNotification(paste("Error reading file:", e$message), type = "error")
})
})
# Output to indicate if file is uploaded
output$file_uploaded <- reactive({
!is.null(input$file_upload)
})
outputOptions(output, "file_uploaded", suspendWhenHidden = FALSE)
# Get the appropriate dataset based on the input method
dataset <- reactive({
if (values$data_loaded) {
return(values$data)
} else if (values$input_method == "file") {
req(input$group_var, input$response_var, values$file_data)
data <- values$file_data
result <- data.frame(
group = data[[input$group_var]],
response = as.numeric(data[[input$response_var]])
)
return(na.omit(result))
} else {
return(manual_data())
}
})
# Validate data for analysis
validate_data <- reactive({
data <- dataset()
if (is.null(data) || nrow(data) == 0) {
return("Please provide valid input data.")
}
# Check if response values are numeric
if (any(is.na(data$response))) {
return("Response values must be numeric.")
}
# Check that group variable has exactly two levels
unique_groups <- unique(data$group)
if (length(unique_groups) != 2) {
return(paste("Grouping variable must have exactly 2 levels. Found",
length(unique_groups), "levels."))
}
# Check minimum sample size per group
group_counts <- table(data$group)
if (any(group_counts < 3)) {
return("Each group should have at least 3 observations for reliable results.")
}
# Check if all values in a group are identical
group_values <- split(data$response, data$group)
if (any(sapply(group_values, function(x) length(unique(x)) == 1))) {
return("Warning: One of your groups has identical values for all observations. This may affect the test results.")
}
return(NULL)
})
# Display error message if data validation fails
output$error_message <- renderUI({
error <- validate_data()
if (!is.null(error)) {
div(class = "alert alert-danger", error)
}
})
# Extract values for each group
group_values <- reactive({
data <- dataset()
if (is.null(data)) return(NULL)
unique_groups <- unique(data$group)
if (length(unique_groups) != 2) return(NULL)
list(
group1 = data$response[data$group == unique_groups[1]],
group2 = data$response[data$group == unique_groups[2]],
labels = unique_groups
)
})
# Levene's test function for homogeneity of variance
f_levene_test <- function(y, group, center = median) {
if (!is.numeric(y))
stop(deparse(substitute(y)), " is not a numeric variable")
# Convert group to factor if needed
if (!is.factor(group)) {
group <- as.factor(group)
}
valid <- complete.cases(y, group)
meds <- tapply(y[valid], group[valid], center)
resp <- abs(y - meds[group])
table <- anova(lm(resp ~ group))[, c(1, 4, 5)]
rownames(table) <- c("group", " ")
attr(table, "heading") <- paste("Levene's Test for Homogeneity of Variance (center = ",
deparse(substitute(center)), ")", sep="")
return(table)
}
# Run all tests when the Run Analysis button is clicked
observeEvent(input$run_test, {
showNotification("Analyzing data...", type = "message", id = "analyzing")
error <- validate_data()
if (!is.null(error)) {
removeNotification("analyzing")
return()
}
data <- dataset()
group_vals <- group_values() # <-- fix here!
if (is.null(data) || is.null(group_vals)) {
removeNotification("analyzing")
return()
}
# Run Shapiro-Wilk test for normality
values$shapiro_result <- list(
group1 = shapiro.test(group_vals$group1),
group2 = shapiro.test(group_vals$group2),
labels = group_vals$labels
)
# Levene's test for homogeneity of variance
tryCatch({
data$group <- factor(data$group)
values$levene_result <- f_levene_test(data$response, data$group, center = median)
if (input$auto_method) {
var_equal <- values$levene_result$`Pr(>F)`[1] >= 0.05
} else {
var_equal <- as.logical(input$var_equal)
}
# Run t-test
values$test_result <- t.test(
response ~ group,
data = data,
alternative = input$alternative,
var.equal = var_equal,
conf.level = input$conf_level
)
mean1 <- mean(group_vals$group1)
mean2 <- mean(group_vals$group2)
n1 <- length(group_vals$group1)
n2 <- length(group_vals$group2)
var1 <- var(group_vals$group1)
var2 <- var(group_vals$group2)
pooled_sd <- if (var_equal) {
sqrt(((n1-1)*var1 + (n2-1)*var2) / (n1+n2-2))
} else {
sqrt((var1 + var2)/2)
}
d <- abs(mean1 - mean2) / pooled_sd
values$test_result$cohens_d <- d
values$test_result$descriptives <- list(
group1_mean = mean1,
group1_sd = sqrt(var1),
group1_n = n1,
group2_mean = mean2,
group2_sd = sqrt(var2),
group2_n = n2,
var_equal = var_equal
)
}, error = function(e) {
showNotification(paste("Error in analysis:", e$message), type = "error")
})
removeNotification("analyzing")
})
# Output for the t-test results
output$test_results <- renderPrint({
req(values$test_result)
# Format the main test results
type <- if(values$test_result$descriptives$var_equal) "Student's t-test" else "Welch's t-test"
cat("INDEPENDENT SAMPLES T-TEST\n")
cat("==========================\n\n")
# Descriptive statistics
stats <- values$test_result$descriptives
group_labels <- values$shapiro_result$labels
cat("Group Statistics:\n")
cat("-----------------\n")
cat(sprintf("Group: %s\n", group_labels[1]))
cat(sprintf(" n = %d, Mean = %.4f, SD = %.4f\n\n", stats$group1_n, stats$group1_mean, stats$group1_sd))
cat(sprintf("Group: %s\n", group_labels[2]))
cat(sprintf(" n = %d, Mean = %.4f, SD = %.4f\n\n", stats$group2_n, stats$group2_mean, stats$group2_sd))
# Test statistics
cat("Test Results:\n")
cat("-------------\n")
cat(sprintf("Test: %s (Two-Sample)\n", type))
cat(sprintf("t = %.4f, df = %.2f, p-value = %.6f\n\n",
values$test_result$statistic,
values$test_result$parameter,
values$test_result$p.value))
# Effect size
cat("Effect Size:\n")
cat("-----------\n")
cat(sprintf("Cohen's d = %.4f\n", values$test_result$cohens_d))
effect_size <- if(values$test_result$cohens_d < 0.2) {
"very small"
} else if(values$test_result$cohens_d < 0.5) {
"small"
} else if(values$test_result$cohens_d < 0.8) {
"medium"
} else {
"large"
}
cat(sprintf("Interpretation: %s effect\n\n", effect_size))
# Mean difference and confidence interval
mean_diff <- abs(stats$group1_mean - stats$group2_mean)
cat("Mean Difference:\n")
cat("----------------\n")
cat(sprintf("Absolute Difference = %.4f\n", mean_diff))
cat(sprintf("%.1f%% Confidence Interval: [%.4f, %.4f]\n\n",
input$conf_level * 100,
values$test_result$conf.int[1],
values$test_result$conf.int[2]))
# Conclusion
cat("Conclusion:\n")
cat("-----------\n")
if(values$test_result$p.value < 0.05) {
cat(sprintf("At the 5%% significance level, we reject the null hypothesis.\n"))
cat(sprintf("There is a statistically significant difference between the group means.\n"))
} else {
cat(sprintf("At the 5%% significance level, we fail to reject the null hypothesis.\n"))
cat(sprintf("There is not enough evidence to suggest a significant difference between the group means.\n"))
}
})
# Output for detailed results
output$detailed_results <- renderPrint({
req(values$test_result)
print(values$test_result)
})
# Output for group statistics table
output$group_stats <- renderTable({
req(values$test_result)
stats <- values$test_result$descriptives
group_labels <- values$shapiro_result$labels
data.frame(
Group = group_labels,
N = c(stats$group1_n, stats$group2_n),
Mean = c(stats$group1_mean, stats$group2_mean),
SD = c(stats$group1_sd, stats$group2_sd),
`Std. Error` = c(stats$group1_sd / sqrt(stats$group1_n),
stats$group2_sd / sqrt(stats$group2_n))
)
})
# Output for key statistics
output$key_stats <- renderUI({
req(values$test_result)
result <- values$test_result
stats <- result$descriptives
effect_size <- result$cohens_d
# Determine significance and effect size interpretation
significant <- result$p.value < 0.05
effect_interpretation <- if(effect_size < 0.2) {
"Very small effect"
} else if(effect_size < 0.5) {
"Small effect"
} else if(effect_size < 0.8) {
"Medium effect"
} else {
"Large effect"
}
# Create tags for the UI
tagList(
div(
class = if(significant) "alert alert-success" else "alert alert-secondary",
h4(if(significant) "Significant Difference" else "No Significant Difference"),
p(if(significant)
"The means are statistically different."
else
"There is not enough evidence of a difference between means.")
),
div(
class = "mt-3",
h5("Key Metrics:"),
div(class = "row",
div(class = "col-6",
div(class = "card text-center mb-2",
div(class = "card-body py-2",
h6("p-value"),
h4(format.pval(result$p.value, digits = 3))
)
)
),
div(class = "col-6",
div(class = "card text-center mb-2",
div(class = "card-body py-2",
h6("Cohen's d"),
h4(round(effect_size, 2))
)
)
)
),
div(class = "row",
div(class = "col-6",
div(class = "card text-center",
div(class = "card-body py-2",
h6("Mean Diff."),
h4(round(abs(stats$group1_mean - stats$group2_mean), 2))
)
)
),
div(class = "col-6",
div(class = "card text-center",
div(class = "card-body py-2",
h6("Effect Size"),
h4(effect_interpretation)
)
)
)
)
)
)
})
# Output for Shapiro-Wilk test
output$shapiro_test <- renderPrint({
req(values$shapiro_result)
res <- values$shapiro_result
cat("Shapiro-Wilk Normality Test Results:\n\n")
cat(res$labels[1], "group:\n")
cat("W =", round(res$group1$statistic, 4), ", p-value =",
round(res$group1$p.value, 6), "\n")
if(res$group1$p.value < 0.05) {
cat("The data significantly deviates from normality.\n\n")
} else {
cat("The data appears to be normally distributed.\n\n")
}
cat(res$labels[2], "group:\n")
cat("W =", round(res$group2$statistic, 4), ", p-value =",
round(res$group2$p.value, 6), "\n")
if(res$group2$p.value < 0.05) {
cat("The data significantly deviates from normality.\n\n")
} else {
cat("The data appears to be normally distributed.\n\n")
}
if(res$group1$p.value < 0.05 || res$group2$p.value < 0.05) {
cat("Since at least one group deviates from normality, you might consider a non-parametric alternative like the Mann-Whitney U test.\n")
} else {
cat("Both groups appear normally distributed, which supports the use of the t-test.\n")
}
})
# Output for Levene's test
output$levene_test <- renderPrint({
req(values$levene_result)
res <- values$levene_result
cat("Levene's Test for Homogeneity of Variance:\n\n")
cat("F =", round(res$`F value`[1], 4),
", df =", paste(res$Df, collapse = ", "),
", p-value =", round(res$`Pr(>F)`[1], 6), "\n\n")
if(res$`Pr(>F)`[1] < 0.05) {
cat("The variances between groups are significantly different (heterogeneous).\n")
cat("Use Welch's t-test (unequal variances) instead of Student's t-test.\n")
} else {
cat("The variances between groups are not significantly different (homogeneous).\n")
cat("Student's t-test (equal variances) may be appropriate, but Welch's t-test is generally robust regardless.\n")
}
})
# Normal Q-Q plots
output$qq_plot <- renderPlot({
req(values$shapiro_result)
# Create Q-Q plots for both groups
par(mfrow = c(1, 2))
# First group
qqnorm(group_values()$group1,
main = paste("Q-Q Plot for", values$shapiro_result$labels[1]),
col = "blue", pch = 16)
qqline(group_values()$group1, col = "red", lwd = 2)
# Second group
qqnorm(group_values()$group2,
main = paste("Q-Q Plot for", values$shapiro_result$labels[2]),
col = "blue", pch = 16)
qqline(group_values()$group2, col = "red", lwd = 2)
par(mfrow = c(1, 1))
})
# Main plot (shown in summary tab)
output$main_plot <- renderPlot({
req(values$test_result)
# Get the data
data <- dataset()
# Create a boxplot with points
ggplot(data, aes(x = group, y = response, fill = group)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.6, size = 3) +
scale_fill_manual(values = c("#5dade2", "#ff7f0e")) +
labs(y = "Value",
subtitle = paste("t-test: p =", format.pval(values$test_result$p.value, digits = 3),
", d =", round(values$test_result$cohens_d, 2)),
title = "Comparison of Group Values") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"))
})
# Means plot
output$means_plot <- renderPlot({
req(values$test_result)
# Extract data for the plot
stats <- values$test_result$descriptives
group_labels <- values$shapiro_result$labels
# Create a data frame for plotting
plot_data <- data.frame(
Group = factor(c(group_labels[1], group_labels[2]), levels = group_labels),
Mean = c(stats$group1_mean, stats$group2_mean),
SE = c(stats$group1_sd / sqrt(stats$group1_n),
stats$group2_sd / sqrt(stats$group2_n))
)
# Calculate confidence interval based on the t-distribution
ci_factor <- qt(1 - (1 - input$conf_level) / 2, c(stats$group1_n - 1, stats$group2_n - 1))
# Add CI lower and upper bounds
plot_data$CI_lower <- plot_data$Mean - ci_factor * plot_data$SE
plot_data$CI_upper <- plot_data$Mean + ci_factor * plot_data$SE
# Create the plot
ggplot(plot_data, aes(x = Group, y = Mean, color = Group)) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2, size = 1) +
labs(y = "Mean with Confidence Interval",
title = "Group Means with Confidence Intervals",
subtitle = paste0(input$conf_level * 100, "% Confidence Level")) +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
axis.title.x = element_blank()) +
scale_color_manual(values = c("#5dade2", "#ff7f0e"))
})
# Boxplot
output$boxplot <- renderPlot({
req(values$test_result)
# Get the data
data <- dataset()
# Create a boxplot
ggplot(data, aes(x = group, y = response, fill = group)) +
geom_boxplot(outlier.shape = 16, alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_fill_manual(values = c("#5dade2", "#ff7f0e")) +
labs(y = "Value",
subtitle = paste("T-test: p =", format.pval(values$test_result$p.value, digits = 3)),
title = "Comparison of Group Values") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.subtitle = element_text(face = "italic"),
axis.title.x = element_blank())
})
# Density plot
output$density_plot <- renderPlot({
req(values$test_result)
# Get the data
data <- dataset()
res <- values$test_result
# Calculate means
mean1 <- res$descriptives$group1_mean
mean2 <- res$descriptives$group2_mean
# Create the density plot
p <- ggplot(data, aes(x = response, fill = group, color = group)) +
geom_density(alpha = 0.5) +
geom_vline(xintercept = c(mean1, mean2),
color = c("#5dade2", "#ff7f0e"),
linetype = "dashed",
linewidth = 1) +
scale_fill_manual(values = c("#5dade2", "#ff7f0e")) +
scale_color_manual(values = c("#2874a6", "#d35400")) +
labs(title = "Density Distribution by Group",
subtitle = paste("Mean difference:", round(abs(mean2 - mean1), 2),
"| Cohen's d =", round(res$cohens_d, 2)),
x = "Value",
y = "Density") +
theme_minimal(base_size = 14) +
theme(legend.position = "top",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
# If a confidence interval is available, add shaded area
if(!is.null(res$conf.int)) {
# Get max density value for scaling
max_density <- max(ggplot_build(p)$data[[1]]$density)
# Add confidence interval shading
p <- p + annotate("rect",
xmin = res$conf.int[1],
xmax = res$conf.int[2],
ymin = 0,
ymax = max_density * 0.15,
alpha = 0.2,
fill = "darkred") +
annotate("text",
x = mean(res$conf.int),
y = max_density * 0.17,
label = paste0(input$conf_level * 100, "% CI"),
color = "darkred",
size = 3)
}
return(p)
})
# APA style report
output$apa_report <- renderPrint({
req(values$test_result)
res <- values$test_result
stats <- res$descriptives
group_labels <- values$shapiro_result$labels
test_type <- if(stats$var_equal) "Student's" else "Welch's"
# Format the result in APA style
cat("APA Style Result:\n\n")
# Basic t-test result
result_text <- sprintf("Participants in the %s group (M = %.2f, SD = %.2f) compared to the %s group (M = %.2f, SD = %.2f) showed %s, %s's t(%.1f) = %.2f, p = %s, d = %.2f, %d%% CI [%.2f, %.2f].",
group_labels[1],
stats$group1_mean,
stats$group1_sd,
group_labels[2],
stats$group2_mean,
stats$group2_sd,
ifelse(res$p.value < 0.05,
"a statistically significant difference",
"no statistically significant difference"),
test_type,
res$parameter,
res$statistic,
ifelse(res$p.value < 0.001, "<.001", sprintf("%.3f", res$p.value)),
res$cohens_d,
input$conf_level * 100,
res$conf.int[1],
res$conf.int[2])
cat(strwrap(result_text, width = 80), sep = "\n")
})
# Statistical table for reporting
output$results_table <- renderTable({
req(values$test_result)
res <- values$test_result
stats <- res$descriptives
group_labels <- values$shapiro_result$labels
# Create a table for reporting
data.frame(
Comparison = paste(group_labels[1], "vs", group_labels[2]),
`Mean Diff` = round(abs(stats$group1_mean - stats$group2_mean), 2),
t = round(res$statistic, 2),
df = round(res$parameter, 1),
p = ifelse(res$p.value < 0.001, "<.001", sprintf("%.3f", res$p.value)),
`Cohen's d` = round(res$cohens_d, 2),
`CI Lower` = round(res$conf.int[1], 2),
`CI Upper` = round(res$conf.int[2], 2)
)
})
# Citation suggestion
output$citation_text <- renderPrint({
req(values$test_result)
res <- values$test_result
test_type <- if(res$descriptives$var_equal) "Student's t-test" else "Welch's t-test"
cat("For Student's t-test:\n")
cat("Student. (1908). The probable error of a mean. Biometrika, 6(1), 1-25.\n\n")
cat("For Welch's t-test:\n")
cat("Welch, B. L. (1947). The generalization of \"Student's\" problem when several different population variances are involved. Biometrika, 34(1/2), 28-35.\n\n")
cat("For effect size (Cohen's d):\n")
cat("Cohen, J. (1988). Statistical power analysis for the behavioral sciences (2nd ed.). Lawrence Erlbaum Associates.\n")
})
# Generate report for download
report_content <- reactive({
req(values$test_result)
res <- values$test_result
stats <- res$descriptives
group_labels <- values$shapiro_result$labels
# Format the t-test results for a report
report <- c(
"# Independent Samples t-Test Report",
"",
"## Analysis Summary",
"",
paste0("Date: ", format(Sys.Date(), "%B %d, %Y")),
"",
"### Data Overview",
"",
paste0("* Group 1 (", group_labels[1], "): n = ", stats$group1_n, ", Mean = ", round(stats$group1_mean, 2),
", SD = ", round(stats$group1_sd, 2)),
paste0("* Group 2 (", group_labels[2], "): n = ", stats$group2_n, ", Mean = ", round(stats$group2_mean, 2),
", SD = ", round(stats$group2_sd, 2)),
"",
"### Test Results",
"",
paste0("* Test type: ", ifelse(stats$var_equal, "Student's t-test (equal variances)",
"Welch's t-test (unequal variances)")),
paste0("* t-value: ", round(res$statistic, 3)),
paste0("* Degrees of freedom: ", round(res$parameter, 1)),
paste0("* p-value: ", ifelse(res$p.value < 0.001, "<.001", sprintf("%.3f", res$p.value))),
paste0("* Cohen's d: ", round(res$cohens_d, 2), " (",
ifelse(res$cohens_d < 0.2, "very small",
ifelse(res$cohens_d < 0.5, "small",
ifelse(res$cohens_d < 0.8, "medium", "large"))), " effect)"),
paste0("* ", input$conf_level * 100, "% Confidence Interval: [",
round(res$conf.int[1], 2), ", ", round(res$conf.int[2], 2), "]"),
"",
"### Assumption Tests",
"",
"#### Normality (Shapiro-Wilk Test)",
paste0("* ", group_labels[1], ": W = ", round(values$shapiro_result$group1$statistic, 3),
", p = ", round(values$shapiro_result$group1$p.value, 3),
ifelse(values$shapiro_result$group1$p.value < 0.05, " (violates normality)", " (normally distributed)")),
paste0("* ", group_labels[2], ": W = ", round(values$shapiro_result$group2$statistic, 3),
", p = ", round(values$shapiro_result$group2$p.value, 3),
ifelse(values$shapiro_result$group2$p.value < 0.05, " (violates normality)", " (normally distributed)")),
"",
"#### Homogeneity of Variance (Levene's Test)",
paste0("* F = ", round(values$levene_result$`F value`[1], 3),
", p = ", round(values$levene_result$`Pr(>F)`[1], 3),
ifelse(values$levene_result$`Pr(>F)`[1] < 0.05, " (unequal variances)", " (equal variances)")),
"",
"### Conclusion",
"",
ifelse(res$p.value < 0.05,
paste0("There is a statistically significant difference between the means of ",
group_labels[1], " and ", group_labels[2],
" (p < 0.05). The difference is ", round(abs(stats$group1_mean - stats$group2_mean), 2),
" with a ", ifelse(res$cohens_d < 0.2, "very small",
ifelse(res$cohens_d < 0.5, "small",
ifelse(res$cohens_d < 0.8, "medium", "large"))),
" effect size (d = ", round(res$cohens_d, 2), ")."),
paste0("There is not enough evidence to suggest a significant difference between the means of ",
group_labels[1], " and ", group_labels[2], " (p > 0.05).")),
"",
"### APA Style Reporting",
"",
paste0("Participants in the ", group_labels[1], " group (M = ", round(stats$group1_mean, 2),
", SD = ", round(stats$group1_sd, 2), ") compared to the ", group_labels[2], " group (M = ",
round(stats$group2_mean, 2), ", SD = ", round(stats$group2_sd, 2), ") showed ",
ifelse(res$p.value < 0.05, "a statistically significant difference", "no statistically significant difference"),
", ", ifelse(stats$var_equal, "Student's", "Welch's"), " t(", round(res$parameter, 1), ") = ",
round(res$statistic, 2), ", p = ", ifelse(res$p.value < 0.001, "<.001", sprintf("%.3f", res$p.value)),
", d = ", round(res$cohens_d, 2), ", ", input$conf_level * 100, "% CI [", round(res$conf.int[1], 2),
", ", round(res$conf.int[2], 2), "].")
)
# Join all lines with newline characters
paste(report, collapse = "\n")
})
# Download handler for the report
output$download_report <- downloadHandler(
filename = function() {
paste0("t-test-report-", format(Sys.time(), "%Y%m%d-%H%M%S"), ".md")
},
content = function(file) {
writeLines(report_content(), file)
}
)
# Learn more action
observeEvent(input$learn_more, {
showModal(modalDialog(
title = "About the Independent Samples t-Test",
div(
h4("Purpose"),
p("The independent samples t-test is used to compare the means of two unrelated groups to determine if there is a statistically significant difference between them."),
h4("Key Assumptions"),
tags$ol(
tags$li(strong("Independence:"), " Observations in each group are independent from each other"),
tags$li(strong("Normality:"), " Data in each group should be approximately normally distributed"),
tags$li(strong("Homogeneity of variance:"), " Both groups should have similar variances (for Student's t-test)")
),
h4("Interpretation"),
p("If the p-value is less than your significance level (typically 0.05), you can reject the null hypothesis that the means are equal. The effect size (Cohen's d) helps you understand the magnitude of the difference:"),
tags$ul(
tags$li("d ≈ 0.2: Small effect"),
tags$li("d ≈ 0.5: Medium effect"),
tags$li("d ≈ 0.8: Large effect")
),
p("For more in-depth information, check the 'Help & Learning' tab.")
),
footer = modalButton("Close"),
size = "l"
))
})
}
)
}
# Example of how to use the module in a Shiny app
ui <- fluidPage(
theme = bs_theme(version = 5, bootswatch = "lumen"),
independentTTestUI("ttest")
)
server <- function(input, output, session) {
independentTTestServer("ttest")
}
# This is how you would run the app when developing
shinyApp(ui, server)
Types of t-Tests: Student’s vs. Welch’s
There are two main variations of the independent samples t-test:
| Feature | Student’s t-Test | Welch’s t-Test |
|---|---|---|
| Assumption of equal variances | Required | Not required |
| When to use | When variances are similar between groups | When variances may differ between groups |
| Degrees of freedom | \(n_1 + n_2 - 2\) | Calculated using a complex formula |
| Robustness | Less robust to violations of assumptions | More robust to violations of assumptions |
| Recommended as default | No | Yes |
The Welch’s t-test is generally recommended as the default choice because:
- It does not assume equal variances between groups
- It performs well even when sample sizes are unequal
- It maintains good statistical power and control of Type I error rates
How the Independent Samples t-Test Works
The t-test compares the observed difference between group means relative to the variability within the groups:
Mathematical Procedure
Student’s t-Test (Equal Variances)
Calculate the means for each group: \(\bar{X}_1\) and \(\bar{X}_2\)
Calculate the standard deviations for each group: \(s_1\) and \(s_2\)
Calculate the pooled standard deviation:
\[s_p = \sqrt{\frac{(n_1-1)s_1^2 + (n_2-1)s_2^2}{n_1+n_2-2}}\]
Calculate the standard error of the difference between means:
\[SE = s_p \sqrt{\frac{1}{n_1} + \frac{1}{n_2}}\]
Calculate the t-statistic:
\[t = \frac{\bar{X}_1 - \bar{X}_2}{SE}\]
Determine degrees of freedom:
\[df = n_1 + n_2 - 2\]
Calculate p-value by comparing the t-statistic to the t-distribution with the calculated degrees of freedom
Welch’s t-Test (Unequal Variances)
Calculate the means for each group: \(\bar{X}_1\) and \(\bar{X}_2\)
Calculate the standard deviations for each group: \(s_1\) and \(s_2\)
Calculate the standard error of the difference between means:
\[SE = \sqrt{\frac{s_1^2}{n_1} + \frac{s_2^2}{n_2}}\]
Calculate the t-statistic:
\[t = \frac{\bar{X}_1 - \bar{X}_2}{SE}\]
Determine approximate degrees of freedom (Welch-Satterthwaite equation):
\[df = \frac{\left(\frac{s_1^2}{n_1} + \frac{s_2^2}{n_2}\right)^2}{\frac{(s_1^2/n_1)^2}{n_1-1} + \frac{(s_2^2/n_2)^2}{n_2-1}}\]
Calculate p-value by comparing the t-statistic to the t-distribution with the calculated degrees of freedom
Effect Size (Cohen’s d)
The effect size quantifies the magnitude of the difference between groups, independent of sample size:
\[d = \frac{|\bar{X}_1 - \bar{X}_2|}{s_{pooled}}\]
Where \(s_{pooled}\) is the pooled standard deviation.
Assumptions of the Independent Samples t-Test
- Independence: Observations in each group are independent (by research design)
- Normality: Both samples come from normally distributed populations
- With large samples (n > 30 per group), the t-test is robust to normality violations due to the Central Limit Theorem
- Homogeneity of variance (for Student’s t-test only): Both groups have similar variances
- Test using Levene’s test; if violated, use Welch’s t-test instead
Statistical Power Considerations
Important
Statistical Power Note: The power of a t-test is influenced by: - Sample size - Effect size (magnitude of the difference) - Significance level (α) - Variability within groups
To achieve 80% power (standard convention) for detecting: - Small effect (d = 0.2): Need approximately 394 participants per group - Medium effect (d = 0.5): Need approximately 64 participants per group - Large effect (d = 0.8): Need approximately 26 participants per group
These calculations assume α = 0.05 for a two-tailed test.
Example 1: Comparing Treatment vs. Control Group
A researcher wants to test if a new medication affects cognitive performance. They randomly assign 20 participants to either a treatment group or a control group.
Data:
| Treatment Group | Control Group |
|---|---|
| 86, 92, 78, 84, 88, 90, 95, 81, 89, 83 | 74, 77, 70, 82, 75, 68, 73, 79, 71, 69 |
Analysis Steps:
- Check normality assumption:
- Shapiro-Wilk test: Treatment (p = 0.81), Control (p = 0.66)
- Both p-values > 0.05, so we can assume normality for both groups
- Check homogeneity of variance:
- Levene’s test: p = 0.27
- p > 0.05, so we can assume equal variances
- Choose appropriate test:
- Since equal variances can be assumed, Student’s t-test is appropriate
- For completeness, we’ll report both Student’s and Welch’s results
- Perform t-test:
- Treatment mean = 86.6, SD = 5.3
- Control mean = 73.8, SD = 4.6
- Mean difference = 12.8
- Student’s t(18) = 5.87, p < 0.001
- Welch’s t(17.6) = 5.87, p < 0.001
- Cohen’s d = 2.63 (very large effect)
- 95% CI for difference: [8.3, 17.3]
Results:
- t = 5.87, p < 0.001, d = 2.63
- Mean treatment: 86.6, Mean control: 73.8
- Interpretation: There is a statistically significant difference in cognitive performance between the treatment and control groups (p < 0.05), with the treatment group scoring higher. The effect size is very large (d > 0.8).
How to Report: “Participants who received the medication (M = 86.6, SD = 5.3) scored significantly higher on cognitive performance tests compared to those in the control group (M = 73.8, SD = 4.6), t(18) = 5.87, p < 0.001, d = 2.63, 95% CI [8.3, 17.3]. This represents a very large effect.”
Example 2: Comparing Two Teaching Methods
An educator wants to compare two teaching methods. They implement Method A in one class of 25 students and Method B in another class of 25 students, then administer the same test.
Data (summary statistics):
- Method A: n = 25, Mean = 78.3, SD = 8.7 - Method B: n = 25, Mean = 72.1, SD = 12.3
Results:
- Levene’s test: p = 0.04 (unequal variances) - Welch’s t(42.8) = 2.14, p = 0.038, d = 0.59 - Interpretation: There is a statistically significant difference in test scores between the two teaching methods (p < 0.05), with Method A producing higher scores on average. The effect size is medium (d ≈ 0.6).
How to Report: “Students taught using Method A (M = 78.3, SD = 8.7) performed significantly better than those taught using Method B (M = 72.1, SD = 12.3), Welch’s t(42.8) = 2.14, p = 0.038, d = 0.59, 95% CI [0.4, 12.0]. This represents a medium-sized effect. Welch’s t-test was used due to unequal variances between the groups (Levene’s test p = 0.04).”
How to Report Independent Samples t-Test Results
When reporting the results of an independent samples t-test in academic papers or research reports, include the following elements:
"[Group 1] (M = [mean1], SD = [sd1]) [showed/did not show] significantly [higher/lower/different]
[variable] compared to [Group 2] (M = [mean2], SD = [sd2]), [Student's/Welch's] t([df]) = [t-value],
p = [p-value], d = [effect size], 95% CI [lower bound, upper bound]."For example:
"The treatment group (M = 86.6, SD = 5.3) showed significantly higher cognitive performance
compared to the control group (M = 73.8, SD = 4.6), t(18) = 5.87, p < 0.001, d = 2.63,
95% CI [8.3, 17.3]."Additional information to consider including: - Which version of the t-test was used (Student’s or Welch’s) - Results of assumption tests (normality, homogeneity of variance) - Whether the test was one-tailed or two-tailed - Sample sizes for each group
APA Style Reporting
For APA style papers (7th edition), report the independent samples t-test results as follows:
We conducted an independent samples t-test to examine whether [variable] differed between [Group 1]
and [Group 2]. Results indicated that [Group 1] (M = [mean1], SD = [sd1]) [showed/did not show]
significantly [higher/lower] [variable] than [Group 2] (M = [mean2], SD = [sd2]),
[Student's/Welch's] t([df]) = [t-value], p = [p-value], d = [effect size], 95% CI [lower, upper].Reporting in Tables
When reporting multiple t-test results in a table, include these columns: - Variables being compared - Means and standard deviations for both groups - t-value - Degrees of freedom - p-value - Effect size (Cohen’s d) - 95% confidence interval
Test Your Understanding
- When should you use Welch’s t-test instead of Student’s t-test?
- When sample sizes are very large
- When both groups have equal variances
- When groups have unequal variances
- When data is not normally distributed
- What does Cohen’s d measure in a t-test?
- The probability of making a Type I error
- The effect size (magnitude of the difference)
- The variance within groups
- The degrees of freedom
- A researcher finds t(28) = 2.15, p = 0.04 when comparing two groups. What can they conclude?
- There is no significant difference between the groups
- There is a significant difference between the groups
- The test is invalid
- More data is needed
- What is the appropriate sample size per group to detect a medium effect size (d = 0.5) with 80% power?
- Approximately 10
- Approximately 25
- Approximately 64
- Approximately 400
- What happens to the degrees of freedom in Welch’s t-test compared to Student’s t-test?
- They are always higher
- They are always lower
- They depend on the sample variances and sizes
- They remain the same
Answers: 1-C, 2-B, 3-B, 4-C, 5-C
Common Questions About the t-Test
When should I use an independent samples t-test versus a paired t-test?
Use an independent samples t-test when comparing two separate, unrelated groups (e.g., treatment vs. control). Use a paired t-test when comparing two related measurements (e.g., before vs. after treatment on the same subjects).
What if my data isn’t normally distributed?
If your sample size is large (n > 30 per group), the t-test is generally robust to violations of normality due to the Central Limit Theorem. For smaller samples with non-normal data, consider using a non-parametric alternative like the Mann-Whitney U test.
How do I report t-test results in research papers?
For a complete report, include: t-value, degrees of freedom, p-value, mean difference, 95% confidence interval, and effect size (Cohen’s d). For example: “The treatment group (M = 7.13, SD = 0.23) scored significantly higher than the control group (M = 5.78, SD = 0.31), t(18) = 10.82, p < .001, d = 4.84, 95% CI [1.08, 1.62].”
What sample size do I need for adequate statistical power?
The required sample size depends on the expected effect size and desired power level. As a rough guideline, to detect a medium effect (d = 0.5) with 80% power at α = 0.05, you need approximately 64 participants per group. For a large effect (d = 0.8), you need about 26 participants per group.
Why is Welch’s t-test often recommended over Student’s t-test?
Welch’s t-test doesn’t assume equal variances between groups, making it more robust when this assumption is violated. Research has shown that Welch’s t-test maintains good control of Type I error rates while providing adequate statistical power, even when variances are equal. Therefore, many statisticians recommend it as the default choice for independent samples comparisons.
Can I use a t-test if my groups have different sample sizes?
Yes, the t-test can handle unequal sample sizes. However, when sample sizes differ and variances are unequal (heteroscedasticity), Welch’s t-test is strongly recommended over Student’s t-test to maintain proper Type I error control.
Examples of When to Use the Independent Samples t-Test
- Medical research: Comparing treatment outcomes between control and experimental groups
- Educational research: Comparing test scores between two different teaching methods
- Psychology: Comparing psychological measures between different demographic groups
- Market research: Comparing consumer satisfaction scores between two product versions
- Environmental science: Comparing pollution levels between two different locations
- Business: Comparing employee performance between two different management styles
- Sports science: Comparing physiological measures between athletes and non-athletes
- Sociology: Comparing social attitudes between two different cultures or communities
- Agriculture: Comparing crop yields between two different farming methods
- Manufacturing: Comparing product quality metrics between two production processes
Step-by-Step Guide to the Independent Samples t-Test
1. Check Assumptions
Before interpreting t-test results, you should verify these assumptions:
- Independence: Observations in each group are independent (by research design)
- Normality: Both samples come from normally distributed populations
- Check using Shapiro-Wilk test and Q-Q plots in the “Assumptions” tab
- With large samples (n > 30 per group), the t-test is robust to normality violations
- Homogeneity of variance: Both groups have similar variances
- Check using Levene’s test in the “Assumptions” tab
- If violated, use Welch’s t-test instead of Student’s t-test
2. Choose the Appropriate Test
- If variances are equal (Levene’s test p ≥ 0.05), you can use Student’s t-test
- If variances are unequal (Levene’s test p < 0.05), use Welch’s t-test
- When in doubt, Welch’s t-test is generally recommended as the safer option
3. Interpret the Results
- Check the p-value:
- If p < 0.05, there is a statistically significant difference between group means
- If p ≥ 0.05, there is not enough evidence to conclude the means differ
- Examine the effect size (Cohen’s d):
- d ≈ 0.2: Small effect
- d ≈ 0.5: Medium effect
- d ≈ 0.8: Large effect
- Look at the confidence interval:
- If it doesn’t include zero, the difference is statistically significant
- The width indicates precision of the estimated difference
References
- Student. (1908). The probable error of a mean. Biometrika, 6(1), 1-25.
- Welch, B. L. (1947). The generalization of “Student’s” problem when several different population variances are involved. Biometrika, 34(1/2), 28-35.
- Cohen, J. (1988). Statistical power analysis for the behavioral sciences (2nd ed.). Lawrence Erlbaum Associates.
- Ruxton, G. D. (2006). The unequal variance t-test is an underused alternative to Student’s t-test and the Mann-Whitney U test. Behavioral Ecology, 17(4), 688-690.
- Delacre, M., Lakens, D., & Leys, C. (2017). Why psychologists should by default use Welch’s t-test instead of Student’s t-test. International Review of Social Psychology, 30(1), 92-101.
- Fagerland, M. W. (2012). t-tests, non-parametric tests, and large studies—a paradox of statistical practice? BMC Medical Research Methodology, 12(1), 78.
Reuse
Citation
BibTeX citation:
@online{kassambara2025,
author = {Kassambara, Alboukadel},
title = {Independent {Samples} {t-Test} {Calculator} \textbar{}
{Compare} {Two} {Group} {Means}},
date = {2025-04-10},
url = {https://www.datanovia.com/learn/tools/statistical-tests/independent-samples-t-test.html},
langid = {en}
}
For attribution, please cite this work as:
Kassambara, Alboukadel. 2025. “Independent Samples t-Test
Calculator | Compare Two Group Means.” April 10, 2025. https://www.datanovia.com/learn/tools/statistical-tests/independent-samples-t-test.html.