# 1. 一维列联表和配合度检验

pA <- p <- 0.25
pa <- 1 - p
pAA <- p ^ 2
pAa <- 2 * p * (1-p)
paa <- (1 - p) ^2
c(pAA, pAa, paa)
## [1] 0.0625 0.3750 0.5625

##             Genotype
## Type            AA   Aa     aa
##   Observed   10.00 50.0  40.00
##   Expected    6.25 37.5  56.25
##   Difference  3.75 12.5 -16.25

z <- ( Observed - Expected ) / sqrt(Expected)
z <- abs(as.numeric(z))
p <- 2 * pnorm(z, lower.tail = FALSE)
rbind(z, p)
##        [,1]       [,2]       [,3]
## z 1.5000000 2.04124145 2.16666667
## p 0.1336144 0.04122683 0.03026028

$\chi^2 = \sum\frac{(O - E)^2}{E}$

Chi_sqr <- sum (z ^ 2)
df <- length(Observed) - 1
p_value <- pchisq(Chi_sqr, df, lower.tail = FALSE)
cbind(Chi_sqr, p_value)
##       Chi_sqr    p_value
## [1,] 11.11111 0.00386592

p <- 0.25
prop <- c(p^2, 2 * p * (1-p), (1-p)^2)
suppressWarnings(
chisq.test(Observed, p = prop)
)
##
##  Chi-squared test for given probabilities
##
## data:  Observed
## X-squared = 11.111, df = 2, p-value = 0.003866

p <- (2 * 10 + 1 * 50) / 200
p
## [1] 0.35

##             Genotype
## Type            AA   Aa    aa
##   Observed   10.00 50.0 40.00
##   Expected   12.25 45.5 42.25
##   Difference -2.25  4.5 -2.25

z <- ( Observed - Expected ) / sqrt(Expected)
z <- abs(as.numeric(z))
p <- 2 * pnorm(z, lower.tail = FALSE)
Chi_sqr <- sum (z ^ 2)
df <- length(Observed) - 1 - 1
p_value <- pchisq(Chi_sqr, df, lower.tail = FALSE)
cbind(Chi_sqr, p_value)
##        Chi_sqr   p_value
## [1,] 0.9781427 0.3226578

# 2. 二维列联表和独立性检验

##             Genotype
## Hypertension AA Aa aa Total
##        No     3 45 38    86
##        Yes    7  5  2    14
##        Total 10 50 40   100

## 2.1. 费希尔精确检验

fisher.test(table3)
##
##  Fisher's Exact Test for Count Data
##
## data:  table3
## p-value = 2.607e-05
## alternative hypothesis: two.sided

（ps. 博主只能用R语言中的fisher.test()来计算出P值，如果有谁知道手动计算超过2*2列联表的费希尔精确检验的方法，诚请不吝赐教，不胜感激）

## 2.2. 卡方检验

##             Genotype
## Hypertension  AA Aa   aa
##          No  8.6 43 34.4
##          Yes 1.4  7  5.6

chi_sqr <- sum((table3 - table3exp)^2 / table3exp)
df <- (nrow(table3) - 1) * (ncol(table3) - 1)
p_value <- pchisq(chi_sqr, df = df, lower.tail = FALSE)
cbind(chi_sqr, p_value)
##       chi_sqr      p_value
## [1,] 29.40199 4.125136e-07

suppressWarnings(
chisq.test(table3)
)
##
##  Pearson's Chi-squared test
##
## data:  table3
## X-squared = 29.402, df = 2, p-value = 4.125e-07

##             Genotype
## Hypertension AA Aa aa Total
##        No     3 27  8    38
##        Yes    7 23 32    62
##        Total 10 50 40   100

##             Genotype
## Hypertension  AA Aa   aa
##          No  3.8 19 15.2
##          Yes 6.2 31 24.8

fisher.test(table4)
##
##  Fisher's Exact Test for Count Data
##
## data:  table4
## p-value = 0.003103
## alternative hypothesis: two.sided
suppressWarnings(
chisq.test(table4)
)
##
##  Pearson's Chi-squared test
##
## data:  table4
## X-squared = 11.205, df = 2, p-value = 0.003688

# 4. 参考文献

Altman, N., & Krzywinski, M. (2017). Points of Significance: Tabular data. Nature Methods, 14(4), 329-330. doi: 10.1038/nmeth.4239