Análisis estadístico de experimentos

DCA, DBA y diseños factoriales

Author

Edimer David Jaramillo

Published

June 5, 2025

Bibliotecas

Code

library(tidyverse)
library(effectsize)
library(readxl)
library(janitor)
library(broom)
library(emmeans)
library(car)

Datos

Code

df_frijol <- read_csv("../datos/dca_frijol.csv") |>
  mutate(Tratamiento = factor(
    Tratamiento,
    levels = c("Control", "Orgánica", "Química", "Mixto")
  ))
df_frijol

Code

df_leche <- read_csv("../datos/dbca_leche.csv")
df_leche

Code

df_ratones <- read_excel("../datos/experimento_ratones.xlsx")
df_ratones

Tamaño del efecto

Artículo guía

Anova de una vía (DCA)

Hipótesis

\[ \begin{aligned} H_0: \mu_1 = \mu_2 = \mu_3 = \mu_4 \\ H_1: \mu_i \neq \mu_j \end{aligned} \]

Exploración

Code

df_frijol |> 
  ggplot(aes(x = Tratamiento, y = Peso_Vainas_g)) +
  geom_boxplot()

Modelo

Code

modelo1 <- aov(Peso_Vainas_g ~ Tratamiento, data = df_frijol)
modelo1 |> tidy()

Medias estimadas

Code

modelo1 |> 
  emmeans(specs = "Tratamiento") |> 
  as.data.frame()

Diferencia de medias

Code

modelo1 |> TukeyHSD()

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = Peso_Vainas_g ~ Tratamiento, data = df_frijol)

$Tratamiento
                    diff       lwr      upr     p adj
Orgánica-Control 21.0832 15.648373 26.51803 0.0000000
Química-Control  30.3488 24.913973 35.78363 0.0000000
Mixto-Control    37.5264 32.091573 42.96123 0.0000000
Química-Orgánica  9.2656  3.830773 14.70043 0.0001307
Mixto-Orgánica   16.4432 11.008373 21.87803 0.0000000
Mixto-Química     7.1776  1.742773 12.61243 0.0045188

Tamaño del efecto

Code

modelo1 |> 
  eta_squared()

Residuales

Code

par(mfrow = c(2, 2))
plot(modelo1)

Prueba de normalidad:

Code

modelo1 |> 
  residuals() |> 
  shapiro.test()


    Shapiro-Wilk normality test

data:  residuals(modelo1)
W = 0.99243, p-value = 0.8514

Prueba de homocedasticidad:

Code

bartlett.test(data = df_frijol, Peso_Vainas_g ~ Tratamiento)


    Bartlett test of homogeneity of variances

data:  Peso_Vainas_g by Tratamiento
Bartlett's K-squared = 0.65189, df = 3, p-value = 0.8845

Diseño en bloques (DBA)

Hipótesis principal:

\[ \begin{aligned} H_0: \mu_1 = \mu_2 = \mu_3 \\ H_1: \mu_i \neq \mu_j \end{aligned} \]

Hipótesis de verificación:

\[ \begin{aligned} H_0: \beta_1 = \beta_2 = \beta_3 \\ H_A: \beta_i \neq \beta_j \end{aligned} \]

Exploración

Code

df_leche |> 
  ggplot(aes(x = Dieta, y = Produccion_Leche_L)) +
  facet_wrap(~Finca) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Modelo

Code

modelo2 <- aov(Produccion_Leche_L ~ Dieta + Finca, data = df_leche)
modelo2 |> tidy()

Medias estimadas

Code

modelo2 |> 
  emmeans(specs = "Dieta") |> 
  as.data.frame()

Diferencia de medias

Code

modelo2 |> TukeyHSD(which = "Dieta")

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = Produccion_Leche_L ~ Dieta + Finca, data = df_leche)

$Dieta
                                            diff        lwr        upr p adj
Ensilaje+Concentrado-Concentrado+Pasto   6.02075   4.450156   7.591344     0
Pasto-Concentrado+Pasto                 -6.64025  -8.210844  -5.069656     0
Pasto-Ensilaje+Concentrado             -12.66100 -14.231594 -11.090406     0

Tamaño del efecto

Code

modelo2 |> 
  eta_squared()

Residuales

Code

par(mfrow = c(2, 2))
plot(modelo2)

Prueba de normalidad:

Code

modelo2 |> 
  residuals() |> 
  shapiro.test()


    Shapiro-Wilk normality test

data:  residuals(modelo2)
W = 0.99012, p-value = 0.5453

Prueba de homocedasticidad tratamiento:

Code

leveneTest(data = df_leche, Produccion_Leche_L ~ Dieta)

Prueba de homocedasticidad bloque:

Code

leveneTest(data = df_leche, Produccion_Leche_L ~ Finca)

Anova de dos vías aditivo

Hipótesis factor 1:

\[ \begin{aligned} H_0: \mu_1 = \mu_2 = \mu_3 = \mu_4 \\ H_1: \mu_i \neq \mu_j \end{aligned} \]

Hipótesis factor 2:

\[ \begin{aligned} H_0: \mu_1 = \mu_2 \\ H_1: \mu_1 \neq \mu_2 \end{aligned} \]

Exploración

Code

df_ratones |> 
  ggplot(aes(x = Strain, y = GST, color = Treatment, fill = Treatment)) +
  geom_boxplot(alpha = 0.5)

Modelo

Code

modelo3 <- aov(GST ~ Treatment + Strain, data = df_ratones)
modelo3 |> tidy()

Medias estimadas

Tratamiento:

Code

modelo3 |> 
  emmeans(specs = "Treatment") |> 
  as.data.frame()

Raza:

Code

modelo3 |> 
  emmeans(specs = "Strain") |> 
  as.data.frame()

Diferencia de medias

Tratamiento:

Code

modelo3 |> 
  TukeyHSD(which = "Treatment")

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = GST ~ Treatment + Strain, data = df_ratones)

$Treatment
                 diff      lwr      upr     p adj
Treated-Control 238.5 92.14634 384.8537 0.0042678

Raza:

Code

modelo3 |> 
  TukeyHSD(which = "Strain")

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = GST ~ Treatment + Strain, data = df_ratones)

$Strain
                  diff       lwr      upr     p adj
A/J-129/Ola      84.25 -198.7606 367.2606 0.8071951
BALB/C-129/Ola  -30.50 -313.5106 252.5106 0.9875652
NIH-129/Ola      28.75 -254.2606 311.7606 0.9895278
BALB/C-A/J     -114.75 -397.7606 168.2606 0.6275906
NIH-A/J         -55.50 -338.5106 227.5106 0.9329709
NIH-BALB/C       59.25 -223.7606 342.2606 0.9201965

Tamaño del efecto

Code

modelo3 |> 
  eta_squared()

Residuales

Code

par(mfrow = c(2, 2))
plot(modelo3)

Prueba de normalidad:

Code

modelo3 |> 
  residuals() |> 
  shapiro.test()


    Shapiro-Wilk normality test

data:  residuals(modelo3)
W = 0.98395, p-value = 0.9872

Prueba de homocedasticidad tratamiento:

Code

bartlett.test(data = df_ratones, GST ~ Treatment)


    Bartlett test of homogeneity of variances

data:  GST by Treatment
Bartlett's K-squared = 0.00092761, df = 1, p-value = 0.9757

Prueba de homocedasticidad raza:

Code

bartlett.test(data = df_ratones, GST ~ Strain)


    Bartlett test of homogeneity of variances

data:  GST by Strain
Bartlett's K-squared = 1.4085, df = 3, p-value = 0.7035

Anova de dos vías multiplicativo

Hipótesis factor 1:

\[\begin{aligned} H_0: \mu_1 = \mu_2 = \mu_3 = \mu_4 \\ H_1: \mu_i \neq \mu_j \end{aligned}\]

Hipótesis factor 2:

\[\begin{aligned} H_0: \mu_1 = \mu_2 \\ H_1: \mu_1 \neq \mu_1 \end{aligned}\]

Hipótesis interacción:

\[\begin{aligned} H_0: (\alpha\tau)_{11} = \cdots = (\alpha\beta)_{42} \\ H_1: (\alpha\tau)_{ij} \neq (\alpha\tau)_{ij} \end{aligned}\]

Exploración

Code

df_ratones |> 
  group_by(Strain, Treatment) |> 
  summarise(promedio = mean(GST)) |> 
  ggplot(aes(x = Strain, y = promedio, color = Treatment)) +
  geom_point() +
  geom_line(aes(group = Treatment))

Modelo

Code

modelo4 <- aov(GST ~ Treatment * Strain, data = df_ratones)
modelo4 |> tidy()

Medias estimadas

Tratamiento:

Code

modelo4 |> 
  emmeans(specs = "Treatment") |> 
  as.data.frame()

Raza:

Code

modelo4 |> 
  emmeans(specs = "Strain") |> 
  as.data.frame()

Interacción raza-tratamiento:

Code

modelo4 |> 
  emmeans(specs = pairwise ~ Treatment | Strain) |> 
  as.data.frame()

Diferencia de medias

Tratamiento:

Code

modelo4 |> 
  TukeyHSD(which = "Treatment")

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = GST ~ Treatment * Strain, data = df_ratones)

$Treatment
                 diff      lwr      upr   p adj
Treated-Control 238.5 83.29536 393.7046 0.00758

Raza:

Code

modelo4 |> 
  TukeyHSD(which = "Strain")

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = GST ~ Treatment * Strain, data = df_ratones)

$Strain
                  diff       lwr      upr     p adj
A/J-129/Ola      84.25 -220.5596 389.0596 0.8127166
BALB/C-129/Ola  -30.50 -335.3096 274.3096 0.9877705
NIH-129/Ola      28.75 -276.0596 333.5596 0.9896978
BALB/C-A/J     -114.75 -419.5596 190.0596 0.6405120
NIH-A/J         -55.50 -360.3096 249.3096 0.9344206
NIH-BALB/C       59.25 -245.5596 364.0596 0.9219941

Tamaño del efecto

Code

modelo4 |> 
  eta_squared()

Residuales

Code

par(mfrow = c(2, 2))
plot(modelo4)

Prueba de normalidad:

Code

modelo4 |> 
  residuals() |> 
  shapiro.test()


    Shapiro-Wilk normality test

data:  residuals(modelo4)
W = 0.91331, p-value = 0.1316

Prueba de homocedasticidad tratamiento:

Code

bartlett.test(data = df_ratones, GST ~ Treatment)


    Bartlett test of homogeneity of variances

data:  GST by Treatment
Bartlett's K-squared = 0.00092761, df = 1, p-value = 0.9757

Prueba de homocedasticidad raza:

Code

bartlett.test(data = df_ratones, GST ~ Strain)


    Bartlett test of homogeneity of variances

data:  GST by Strain
Bartlett's K-squared = 1.4085, df = 3, p-value = 0.7035

Prueba de homocedasticidad interacción:

Code

leveneTest(data = df_ratones, GST ~ Treatment * Strain)

Anova multifactorial aditivo

Hipótesis factor 1:

\[\begin{aligned} H_0: \mu_1 = \mu_2 = \mu_3 = \mu_4 \\ H_1: \mu_i \neq \mu_j \end{aligned}\]

Hipótesis factor 2:

\[\begin{aligned} H_0: \mu_1 = \mu_2 \\ H_1: \mu_1 \neq \mu_1 \end{aligned}\]

Hipótesis de verificación:

\[ \begin{aligned} H_0: \beta_1 = \beta_2 \\ H_A: \beta_1 \neq \beta_2 \end{aligned} \]

Exploración

Code

df_ratones |> 
  ggplot(aes(x = Strain, y = GST, color = Treatment, fill = Treatment)) +
  facet_wrap(~Block) +
  geom_point() +
  geom_line(aes(group = Treatment))

Modelo

Code

modelo5 <- aov(GST ~ Treatment + Strain + Block, data = df_ratones)
modelo5 |> tidy()

Medias estimadas

Tratamiento:

Code

modelo5 |> 
  emmeans(specs = "Treatment") |> 
  as.data.frame()

Raza:

Code

modelo5 |> 
  emmeans(specs = "Strain") |> 
  as.data.frame()

Diferencia de medias

Tratamiento:

Code

modelo5 |> 
  TukeyHSD(which = "Treatment")

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = GST ~ Treatment + Strain + Block, data = df_ratones)

$Treatment
                 diff      lwr      upr     p adj
Treated-Control 238.5 145.0966 331.9034 0.0002012

Raza:

Code

modelo5 |> 
  TukeyHSD(which = "Strain")

  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = GST ~ Treatment + Strain + Block, data = df_ratones)

$Strain
                  diff        lwr       upr     p adj
A/J-129/Ola      84.25  -97.12006 265.62006 0.5151475
BALB/C-129/Ola  -30.50 -211.87006 150.87006 0.9537274
NIH-129/Ola      28.75 -152.62006 210.12006 0.9607093
BALB/C-A/J     -114.75 -296.12006  66.62006 0.2737937
NIH-A/J         -55.50 -236.87006 125.87006 0.7867403
NIH-BALB/C       59.25 -122.12006 240.62006 0.7532299

Tamaño del efecto

Code

modelo5 |> 
  eta_squared()

Residuales

Code

par(mfrow = c(2, 2))
plot(modelo5)

Prueba de normalidad:

Code

modelo5 |> 
  residuals() |> 
  shapiro.test()


    Shapiro-Wilk normality test

data:  residuals(modelo5)
W = 0.96573, p-value = 0.7656

Prueba de homocedasticidad tratamiento:

Code

bartlett.test(data = df_ratones, GST ~ Treatment)


    Bartlett test of homogeneity of variances

data:  GST by Treatment
Bartlett's K-squared = 0.00092761, df = 1, p-value = 0.9757

Prueba de homocedasticidad raza:

Code

bartlett.test(data = df_ratones, GST ~ Strain)


    Bartlett test of homogeneity of variances

data:  GST by Strain
Bartlett's K-squared = 1.4085, df = 3, p-value = 0.7035

Prueba de homocedasticidad bloque:

Code

bartlett.test(data = df_ratones, GST ~ Block)


    Bartlett test of homogeneity of variances

data:  GST by Block
Bartlett's K-squared = 0.14923, df = 1, p-value = 0.6993

Anova multifactorial multiplicativo

Hipótesis factor 1:

\[\begin{aligned}H_0: \mu_1 = \mu_2 = \mu_3 = \mu_4 \\ H_1: \mu_i \neq \mu_j\end{aligned}\]

Hipótesis factor 2:

\[\begin{aligned}H_0: \mu_1 = \mu_2 \\ H_1: \mu_1 \neq \mu_1\end{aligned}\]

Hipótesis interacción:

\[\begin{aligned}H_0: (\alpha\tau)_{11} = \cdots = (\alpha\beta)_{42} \\ H_1: (\alpha\tau)_{ij} \neq (\alpha\tau)_{ij} \end{aligned}\]

Hipótesis de verificación:

\[ \begin{aligned} H_0: \beta_1 = \beta_2 \\ H_A: \beta_1 \neq \beta_2 \end{aligned} \]

Exploración

Code

df_ratones |> 
  ggplot(aes(x = Strain, y = GST, color = Treatment, fill = Treatment)) +
  facet_wrap(~Block) +
  geom_point() +
  geom_line(aes(group = Treatment))

Modelo

Code

modelo6 <- aov(GST ~ Treatment * Strain + Block, data = df_ratones)
modelo6 |> tidy()

Medias estimadas

Tratamiento:

Code

modelo6 |> 
  emmeans(specs = "Treatment") |> 
  as.data.frame()

Raza:

Code

modelo6 |> 
  emmeans(specs = "Strain") |> 
  as.data.frame()

Interacción raza-tratamiento:

Code

modelo6 |> 
  emmeans(specs = pairwise ~ Treatment | Strain) |> 
  as.data.frame()

Diferencia de medias

Code

modelo6 |>
  emmeans(specs = pairwise ~ Treatment | Strain)

$emmeans
Strain = 129/Ola:
 Treatment emmean   SE df lower.CL upper.CL
 Control      526 38.5  7      436      617
 Treated      742 38.5  7      652      833

Strain = A/J:
 Treatment emmean   SE df lower.CL upper.CL
 Control      508 38.5  7      418      599
 Treated      929 38.5  7      838     1020

Strain = BALB/C:
 Treatment emmean   SE df lower.CL upper.CL
 Control      504 38.5  7      414      595
 Treated      704 38.5  7      613      794

Strain = NIH:
 Treatment emmean   SE df lower.CL upper.CL
 Control      604 38.5  7      513      695
 Treated      722 38.5  7      632      813

Results are averaged over the levels of: Block 
Confidence level used: 0.95 

$contrasts
Strain = 129/Ola:
 contrast          estimate   SE df t.ratio p.value
 Control - Treated     -216 54.4  7  -3.972  0.0054

Strain = A/J:
 contrast          estimate   SE df t.ratio p.value
 Control - Treated     -420 54.4  7  -7.733  0.0001

Strain = BALB/C:
 contrast          estimate   SE df t.ratio p.value
 Control - Treated     -199 54.4  7  -3.659  0.0081

Strain = NIH:
 contrast          estimate   SE df t.ratio p.value
 Control - Treated     -118 54.4  7  -2.179  0.0657

Results are averaged over the levels of: Block

Tamaño del efecto

Code

modelo6 |> 
  eta_squared()

Residuales

Code

par(mfrow = c(2, 2))
plot(modelo6)

Prueba de normalidad:

Code

modelo6 |> 
  residuals() |> 
  shapiro.test()


    Shapiro-Wilk normality test

data:  residuals(modelo6)
W = 0.92236, p-value = 0.1841

Prueba de homocedasticidad tratamiento:

Code

bartlett.test(data = df_ratones, GST ~ Treatment)


    Bartlett test of homogeneity of variances

data:  GST by Treatment
Bartlett's K-squared = 0.00092761, df = 1, p-value = 0.9757

Prueba de homocedasticidad raza:

Code

bartlett.test(data = df_ratones, GST ~ Strain)


    Bartlett test of homogeneity of variances

data:  GST by Strain
Bartlett's K-squared = 1.4085, df = 3, p-value = 0.7035

Prueba de homocedasticidad bloque:

Code

bartlett.test(data = df_ratones, GST ~ Block)


    Bartlett test of homogeneity of variances

data:  GST by Block
Bartlett's K-squared = 0.14923, df = 1, p-value = 0.6993

Prueba de homocedasticidad interacción:

Code

leveneTest(data = df_ratones, GST ~ Treatment * Strain)

Comparación de modelos - AIC

Code

AIC(modelo3, modelo4, modelo5, modelo6)

Comparaciones múltiples

Tabla

Code

TukeyHSD(modelo6, which = "Treatment:Strain") |> 
  tidy()

Gráfico 1

Code

TukeyHSD(modelo6, which = "Treatment:Strain") |> 
  tidy() |> 
  ggplot(aes(x = fct_reorder(contrast, estimate), y = estimate,
             ymin = conf.low, ymax = conf.high)) +
  geom_point() +
  geom_errorbar() +
  geom_hline(yintercept = 0, lty = 2, color = "red") +
  coord_flip()