# 正确理解和使用P值

### 侃侃迩行 · 2017-03-01

Informally, a p-value is the probability under a specified statistical model that a statistical summary of the data (e.g., the sample mean di erence between two compared groups) would be equal to or more extreme than its observed value.

P值检验的是研究者采集到的样本与某一个统计假设的相容程度，而不是这个统计假设的正确与否。

# 1. P值、效力和效应

• 零假设（$$H_0$$）：该疾病不影响该生理指标；

• 备择假设（$$H_1$$）: 该疾病影响该生理指标；

• 不能支持零假设：样本数据现实该疾病影响该生理指标，$$A$$
• 支持零假设：样本数据显示该疾病不影响该生理指表, $$1-A$$

n <- 10
sd <- 1
p_value <- 0.05
Cohen_D <- 4

Mean0 <- 0
t_value <- qt(p_value / 2, df = n, lower.tail = FALSE)
lw <- Mean0 - t_value * sd
up <- Mean0 + t_value * sd

X0 <- Mean0 + seq(-4, 4, by = 0.01) * sd
Y0 <- dt(X0, df = n)
Mean1 <- Mean0 + Cohen_D * sd
X1 <- X0 + Mean1
Y1 <- Y0

Xmin <- min(c(X0, X1))
Xmax <- max(c(X0, X1))
plot(X0, Y0, type = "n", xlab = "", ylab = "",
main = NULL, axes = FALSE, xlim = c(Xmin, Xmax))
lines(X0, Y0)
axis(1, at = seq(Xmin, Xmax, sd), pos = 0)
mtext(expression(H[0]), side = 3,
abline(v = t_value, lty = 3)

polygon(c(min(X0[X0 >= up]),
X0[X0 >= up],
max(X0[X0 >= up])),
c(0, Y0[X0 >= up], 0),
col = rgb(1, 0, 0, 0.4))
polygon(c(min(X0[X0 <= lw]),
X0[X0 <= lw],
max(X0[X0 <= lw])),
c(0, Y0[X0 <= lw], 0),
col = rgb(1, 0, 0, 0.4))
text(x = t_value + 0.4, y = -0.01,
expression(alpha/2), pos = 3, col = "blue")

plot(X0, Y0, type = "n",
xlab = "", ylab = "",
main = NULL, axes = FALSE,
xlim = c(Xmin, Xmax))
lines(X0, Y0)
axis(1, at = seq(Xmin, Xmax, sd), pos = 0)
mtext(expression(H[0]), side = 3,
abline(v = t_value, lty = 3)

polygon(c(min(X0[X0 >= lw & X0 <= up]),
X0[X0 >= lw & X0 <= up],
max(X0[X0 >= lw & X0 <= up])),
c(0, Y0[X0 >= lw & X0 <= up], 0),
col = rgb(0, 0, 1, 0.4))
text(x = Mean0, y = 0.1,
expression(1-alpha), pos = 3)

plot(X1, Y1, type = "n", xlab = "", ylab = "",
main = NULL, axes = FALSE, xlim = c(Xmin, Xmax))
lines(X0, Y0)
lines(X1, Y1)
mtext(expression(H[0]), side = 3,
abline(v = t_value, lty = 3)
axis(1, at = seq(Xmin, Xmax, sd), pos = 0)
mtext(expression(H[1]), side = 3,
polygon(c(min(X1[X1 <= up]),
X1[X1 <= up],
max(X1[X1 <= up])),
c(0, Y1[X1 <= up], 0),
col = rgb(0.5, 0, 0.5, 0.4))
text(x = t_value - 0.35, y = 0.00,
expression(beta), pos = 3, col = "blue")

plot(X1, Y1, type = "n", xlab = "", ylab = "",
main = NULL, axes = FALSE, xlim = c(Xmin, Xmax))
lines(X0, Y0)
lines(X1, Y1)
mtext(expression(H[0]), side = 3,
abline(v = t_value, lty = 3)
axis(1, at = seq(Xmin, Xmax, sd), pos = 0)
mtext(expression(H[1]), side = 3,
polygon(c(min(X1[X1 >= up]),
X1[X1 >= up],
max(X1[X1 >= up])),
c(0, Y1[X1 >= up], 0),
col = rgb(0.5, 0.5, 0.5, 0.4))
text(x = Mean1, y = 0.1,
expression(1-beta), pos = 3, col = "blue")

$$H_0$$ $$H_1$$

pvalue <- seq(0, 0.1, by = 0.001)
ptt <- function (pvalue) {
pt <- power.t.test(
n = 10,
delta = 1.32,
sd = 1,
sig.level = pvalue,
type = "two.sample",
alternative = "two.sided"
)
ptt <- pt\$power
return(ptt)
}
power <- sapply(pvalue, FUN = ptt)

plot(pvalue, power,
ylim = c(0, 1),
type = "l", lwd = 2,
font.lab = 2,
xlab = expression(alpha),
ylab = "Power")
lines(x = c(0.05, 0.05),
y = c(-1, ptt(0.05)),
lty = 3)
lines(x = c(-1, 0.05),
y = c(ptt(0.05), ptt(0.05)),
lty = 3)
lines(x = c(0.005, 0.005),
y = c(-1, ptt(0.005)),
lty = 3)
lines(x = c(-1, 0.005),
y = c(ptt(0.005), ptt(0.005)),
lty = 3)

n <- 10
HD <- 1.2
Sp <- 1.1
t <- HD / (Sp * sqrt(2/n))
df <- 2 * (n - 1)
pvalue0 <- 2 * pt(t, df, lower.tail = FALSE)
pvalue0
## [1] 0.02529368

# 2. 贝叶斯因子

$\overline{B}\leq -\frac{1}{e\times P \times ln(P)}$ 其中P是我们计算出来的显著性水平。

B <- function(P) {
-1 / ( exp(1) * P * log(P))
}
plot(pvalue, B(pvalue),
ylim = c(0, 55),
type = "l", lwd = 2,
ylab = expression( bar(B)),
xlab = "P",
font.lab = 2
)
lines(x = c(0.05, 0.05),
y = c(-100, B(0.05)),
lty = 3)
lines(x = c(-100, 0.05),
y = c(B(0.05), B(0.05)),
lty = 2)
lines(x = c(0.005, 0.005),
y = c(-100, B(0.005)),
lty = 3)
lines(x = c(-100, 0.005),
y = c(B(0.005), B(0.005)),
lty = 2,
col = "blue")

plot(B(pvalue), power,
ylim = c(0, 1),
type = "l", lwd = 2,
xlab = expression( bar(B)),
font.lab = 2
)

# 3. 错误发现率

pi0 <- seq(0, 1, by = 0.01)
alpha <- 0.05
power <- ptt(alpha)
eFDR <- (alpha * pi0) / (alpha * pi0 + power * (1 - pi0) )
plot(pi0, eFDR, type = "l",
xlab = expression(pi[0])
)`

# 5. 延伸阅读

Altman, N., & Krzywinski, M. (2017). Points of Significance: Interpreting P values. Nat Meth, 14(3), 213-214. doi:10.1038/nmeth.4210

Nuzzo, B. (2014). Statistical errors. Nature, 506, 150-152. doi:http://doi.org/10.1038/506150a

Wasserstein, R. L., & Lazar, N. A. (2016). The ASA‘s Statement on p-Values: Context, Process, and Purpose. The American Statistician, 70(2), 129-133. doi:10.1080/00031305.2016.1154108