Likan Zhan

Linear model and t test

Likan Zhan · 2018-10-12

1. A brief description

This post will summarize the advancement of using R to analyze data obtained in behavioral and relevant areas.

2. Recent Applications

myDat
##              Y  X Subj
## 1   0.38677486 x1   S1
## 2   0.58257989 x1   S2
## 3  -2.33588446 x1   S3
## 4  -0.08739159 x1   S4
## 5   0.63968083 x1   S5
## 6  -0.74189412 x1   S6
## 7  -1.06947948 x1   S7
## 8  -2.81933807 x1   S8
## 9  -0.04495638 x1   S9
## 10 -0.12124718 x1  S10
## 11  1.05965646 x2   S1
## 12  3.32622874 x2   S2
## 13  1.21247004 x2   S3
## 14  1.77690386 x2   S4
## 15  3.53044980 x2   S5
## 16  0.10361017 x2   S6
## 17  1.94642269 x2   S7
## 18  1.86340251 x2   S8
## 19  0.05714427 x2   S9
## 20  0.28531278 x2  S10
t1 <- t.test(Y ~ X, data = myDat, paired = FALSE)
print(t1)
## 
##  Welch Two Sample t-test
## 
## data:  Y by X
## t = -3.8267, df = 17.985, p-value = 0.001237
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.217799 -0.936752
## sample estimates:
## mean in group x1 mean in group x2 
##       -0.5611156        1.5161601
fm1 <- lm(Y ~ X, data = myDat)
summary(fm1)
## 
## Call:
## lm(formula = Y ~ X, data = myDat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2582 -0.6890  0.3040  0.6241  2.0143 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -0.5611     0.3838  -1.462  0.16102   
## Xx2           2.0773     0.5428   3.827  0.00124 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.214 on 18 degrees of freedom
## Multiple R-squared:  0.4486, Adjusted R-squared:  0.418 
## F-statistic: 14.64 on 1 and 18 DF,  p-value: 0.001236
t2 <- t.test(Y ~ X, data = myDat, paired = TRUE)
print(t2)
## 
##  Paired t-test
## 
## data:  Y by X
## t = -4.2834, df = 9, p-value = 0.00204
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.1743355 -0.9802159
## sample estimates:
## mean of the differences 
##               -2.077276
suppressMessages(library(lme4))
fm2 <- lmer(Y ~ X + (1 | Subj), data = myDat)
model.matrix(fm2, "random")
## 20 x 10 sparse Matrix of class "dgCMatrix"
##    [[ suppressing 10 column names 'S1', 'S10', 'S2' ... ]]
##                       
## 1  1 . . . . . . . . .
## 2  . . 1 . . . . . . .
## 3  . . . 1 . . . . . .
## 4  . . . . 1 . . . . .
## 5  . . . . . 1 . . . .
## 6  . . . . . . 1 . . .
## 7  . . . . . . . 1 . .
## 8  . . . . . . . . 1 .
## 9  . . . . . . . . . 1
## 10 . 1 . . . . . . . .
## 11 1 . . . . . . . . .
## 12 . . 1 . . . . . . .
## 13 . . . 1 . . . . . .
## 14 . . . . 1 . . . . .
## 15 . . . . . 1 . . . .
## 16 . . . . . . 1 . . .
## 17 . . . . . . . 1 . .
## 18 . . . . . . . . 1 .
## 19 . . . . . . . . . 1
## 20 . 1 . . . . . . . .
summary(fm2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Y ~ X + (1 | Subj)
##    Data: myDat
## 
## REML criterion at convergence: 62.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7865 -0.6259  0.2249  0.6111  1.3595 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subj     (Intercept) 0.2974   0.5454  
##  Residual             1.1759   1.0844  
## Number of obs: 20, groups:  Subj, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.5611     0.3838  -1.462
## Xx2           2.0773     0.4850   4.283
## 
## Correlation of Fixed Effects:
##     (Intr)
## Xx2 -0.632
fm3 <- lm(Y ~ X + Subj, data = myDat)
summary(fm3)
## 
## Call:
## lm(formula = Y ~ X + Subj, data = myDat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3027 -0.6375  0.0000  0.6375  1.3027 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -0.3154     0.8042  -0.392  0.70403   
## Xx2           2.0773     0.4850   4.283  0.00204 **
## SubjS10      -0.6412     1.0844  -0.591  0.56889   
## SubjS2        1.2312     1.0844   1.135  0.28556   
## SubjS3       -1.2849     1.0844  -1.185  0.26640   
## SubjS4        0.1215     1.0844   0.112  0.91322   
## SubjS5        1.3618     1.0844   1.256  0.24079   
## SubjS6       -1.0424     1.0844  -0.961  0.36155   
## SubjS7       -0.2847     1.0844  -0.263  0.79879   
## SubjS8       -1.2012     1.0844  -1.108  0.29672   
## SubjS9       -0.7171     1.0844  -0.661  0.52498   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.084 on 9 degrees of freedom
## Multiple R-squared:  0.7799, Adjusted R-squared:  0.5354 
## F-statistic:  3.19 on 10 and 9 DF,  p-value: 0.04769
  1. To be added

Appendix

set.seed(5)
x  <- rnorm(10)
x1 <- x + rnorm(10)
x2 <- x + 1 + rnorm(10)
myDat <- data.frame(
  Y = c(x1, x2),
  X = c(rep("x1", 10), rep("x2", 10)),
  Subj = rep(paste0("S", seq(1, 10)), 2))

3. References