-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathweighted_statistics.R
101 lines (89 loc) · 2.85 KB
/
weighted_statistics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Cleanup the backend in RStudio:
cat("\014") # Clears the console (imitates CTR + L)
rm(list = ls()) # Clears the Global Environment/variables/data
invisible(gc()) # Garbage collector/Clear unused RAM
# Start coding now:
## Test of weighted statistics
quant <- c(.1, .25, .5, .75, .90)
age_site1 <- c(10, 24, 87, 45, 60, 41, 18, 56)
summary_site1 <- psych::describe(age_site1, quant = quant)
age_site2 <- c(74, 16, 87)
summary_site2 <- psych::describe(age_site2, quant = quant)
age_site3 <- c(12, 23, 34, 45, 75, 26)
summary_site3 <- psych::describe(age_site3, quant = quant)
age_control <- c(age_site1, age_site2, age_site3)
summary_control <- psych::describe(age_control, quant = quant)
## Combine the parameters over all sites:
statistic_params <-
c(
"n",
"mean",
"sd",
"median",
"min",
"max",
"skew",
"kurtosis",
"se",
"Q0.1",
"Q0.25",
"Q0.5",
"Q0.75",
"Q0.9"
)
all <- list()
for (param in statistic_params) {
all[[param]] <-
c(summary_site1[[param]], summary_site2[[param]], summary_site3[[param]])
}
## Start anlysis:
weighted <-
list(
"mean" = Hmisc::wtd.mean(x = all[["mean"]], weights = all[["n"]]),
"min" = min(x = all[["min"]], na.rm = T),
# "Q0.1" = Hmisc::wtd.mean(x = all[["Q0.1"]], weights = all[["n"]]),
# "Q0.1" = reldist::wtd.quantile(x = all[["Q0.1"]], q = 0.1, weight = all[["n"]]),
"Q0.1" = MetricsWeighted::weighted_quantile(x = all[["Q0.1"]], w = all[["n"]], probs = c(0.1)),
"Q0.25" = Hmisc::wtd.quantile(x = all[["Q0.25"]], weights = all[["n"]], probs = c(0.25)),
# "median" = Hmisc::wtd.mean(x = all[["median"]], weights = all[["n"]]),
# "median" = reldist::wtd.quantile(x = all[["median"]], weights = all[["n"]]),
"median" = MetricsWeighted::weighted_median(x = all[["median"]], weights = all[["n"]]),
"sd" = Hmisc::wtd.var(x = all[["sd"]], weights = all[["n"]]),
"Q0.75" = Hmisc::wtd.quantile(x = all[["Q0.75"]], weights = all[["n"]], probs = c(0.75)),
"Q0.9" = Hmisc::wtd.quantile(x = all[["Q0.9"]], weights = all[["n"]], probs = c(0.9)),
"max" = max(all[["max"]], na.rm = T),
"skew" = 0,
"kurtosis" = 0,
"se" = 0
)
## Check if the weighted stats equals the control:
for (param in names(weighted)) {
digits <- 6
weighted_tmp <- round(weighted[[param]], digits = digits)
control_tmp <- round(summary_control[[param]], digits = digits)
if (weighted_tmp == control_tmp) {
print(paste0(
"[",
param,
"] \U2714 successfully checked (",
param,
" = ",
control_tmp,
")."
))
} else{
## The weighted result seems not to be equal to the control, so check by hand:
print(
paste0(
"\U2718 [",
param,
"] ",
"(weighted) -> ",
weighted[[param]],
" = ",
summary_control[[param]],
" (<- control)?"
)
)
}
}