new grand_mean

JohannesOberpriller · Jun 11, 2021 · 5bea3af · 5bea3af
1 parent cbdbeaa
commit 5bea3af
Show file tree

Hide file tree

Showing 91 changed files with 605 additions and 442 deletions.
diff --git a/D_simulated.R b/D_simulated.R
@@ -1,173 +1,166 @@
-grand_mean = function(fit, mountain, beta, weighted = TRUE, z_statistic = FALSE, mode = "old") {
-
-  ind = (mountain+1):(2*mountain)
-  covariance = vcov(fit)[ind,ind]
-  effect = summary(fit)$coefficients[ind, 1]
-
-  if(!weighted) weights = rep(1/mountain, mountain)
-  else weights = (1/apply(covariance, 1,sum))/ (sum(1/apply(covariance,1,sum)))
-  if(mode == "old") weights = (apply(covariance, 1,sum))/ (sum(covariance))
-  effect_sizes = effect
-  eff = weighted.mean(effect_sizes, weights)
-  var1 = 0
-  for(i in 1:mountain){
-    for(j in 1:mountain){
-      var1 = var1 + covariance[i,j]*weights[i]*weights[j]  
-    }
-  }
-  var2 = sum((weights/(mountain-1))*(effect_sizes-eff)^2)  
-  se = sqrt(var1 + var2)
-
-  if(z_statistic) {
-    p_value = 2*pnorm(abs(eff/se), lower.tail = FALSE)
-    confs = cbind( eff -1.96*se, eff + 1.96*se)
-  } else {
-    p_value = 2*pt(abs(eff/se),df=mountain-1, lower.tail = FALSE)
-    bound = qt(0.975, df = mountain-1)
-    confs = cbind( eff - bound*se, eff + bound*se) 
-  }
-  # c("estimate_effect", "p_value_effect","se_effect", "Slope_in_conf")
-  return(c(eff, p_value, se, as.integer(beta > confs[1,1] & beta < confs[1,2])))
-}
 
 
+simulate_lm_re = function(groups = c(20, 20, 20), group_center = TRUE, sd_re = 0.5) {
+  g = as.factor(unlist(sapply(1:length(groups), function(i) rep(i, groups[i]))))
+  x = runif(sum(groups), -1, 1)
+  X <- matrix(c(rep(1, sum(groups)), x), nrow = sum(groups))
+  beta = 0.0  
+  beta0 = 0.0 
+  randintercep <- rnorm(length(groups), mean = beta0, sd = sd_re)
+  randslope <- rnorm(length(groups), mean = beta, sd = sd_re)
+  mu <- sapply(1:sum(groups), FUN = function(j) X[j,] %*% c(randintercep[g[j]],randslope[g[j]]))
+  sigma <- 0.3
+  y <- rnorm(sum(groups), mu, sd = sigma)
+  if(group_center) {
+  data = data.frame(iv = X[,2], id = g)
+  d <- data %>% 
+    mutate(iv.gmc = iv-mean(iv)) %>%
+    group_by(id) %>% 
+    mutate(iv.cm = mean(iv),
+           iv.cwc = iv-iv.cm) %>%
+    ungroup %>%
+    mutate(iv.cmc = iv.cm-mean(iv.cm))
+  d$y = y  
+  fit_lm = lm(y ~ 0+id+iv.cwc:id, data = d)
+  } else { fit_lm = lm(y ~ 0+x:g + g) }
+  ind = (length(groups)+1):(2*length(groups))
+  sim = list(means = coef(fit_lm)[ind], V = vcov(fit_lm)[ind, ind])
+  return(sim)
+}
+
 
-mountains = 100
-balanced = runif(120, 0, 0.9)
-
-parameter = data.frame( mountain = mountains,  balanced = balanced)
-colnames(parameter) = c("mountains", "balanced")
-parameter$sd = NA
-parameter = data.frame(parameter)
-parameter$min = NA
-parameter$max = NA
-parameter$TypeIold = NA
-parameter$TypeInew = NA
-parameter$TypeMeta = NA
-
-cl = snow::makeCluster(30L)
-snow::clusterEvalQ(cl, {library(lme4); library(lmerTest)})
-snow::clusterExport(cl, list("grand_mean", "parameter"), envir = environment())
-
-results_list = vector("list", 5)
-res_sd = vector("list", 3)
-for(sd in 1:3) {
-  sd_re = c(0.1, 0.5, 2.0)
-  snow::clusterExport(cl, list( "sd_re"), envir = environment())
-  for(m in 1:5) {
-    parameter$mountains = c(3, 5, 10, 50, 200)[m]
-    snow::clusterExport(cl, list("grand_mean", "parameter"), envir = environment())
-
-    tmp = 
-      snow::parLapply(cl, 1:nrow(parameter), function(i) {
-
-        n_each <- 25
-        n_groups = number_groups = parameter[i,]$mountains
-        n = (number_groups)*n_each
-        balanced = parameter[i,]$balanced
-        range = (1 - balanced)/2
-        continue = TRUE
-        while(continue) {
-          prob = runif(n_groups, range, 1-range)
-          g <- sample.int(n_groups, n_each*n_groups, replace = TRUE, prob = prob)
-          if(min(table(g)) > 2) continue = FALSE
-        } # Grouping variable (mountain range)
-        group = as.factor(g)
-        min_max = c(min(table(g)), max(table(g)))
-        parameter[i,]$min = min_max[1]
-        parameter[i,]$max = min_max[2]
-
-        nonSing2 = 1
-        abort2 = 1
-        res = matrix(NA, 2000, 3)
-        while(nonSing2 < 2001){
-          abort2 = abort2 + 1
-          if( (abort2 == 20000)) break
-
-          x <- runif(n, -1, 1) # Temperature
-          X <- matrix(c(rep(1, n), x), nrow = n) # Intercept, Temperature
-
-          sd_randeff = sd_re # sd for random effects
-
-          beta = 0.0   # Temperature effect meaning when the temperature increases 1 degree the respective variable also does 
-          beta0 = 10.0   # Intercept: the mean height of a reporductive plant  
-
-          randintercep <- rnorm(n_groups, mean = beta0, sd = sd_randeff)  # random intercept
-          randslope <- rnorm(n_groups, mean = beta, sd = sd_randeff)      # random slope
-
-          mu <- sapply(1:n, FUN = function(j) X[j,] %*% c(randintercep[g[j]],randslope[g[j]]))
-
-          sigma <- 0.5
-          y <- rnorm(n, mu, sd = sigma)
-
-          # lme4 - REML
-          if(nonSing2 < 2001) {
-            try({
-              fit_lmm <- lmer(y ~ x  + (1|group) + (0+x | group), REML = TRUE) 
-            }, silent = TRUE)
-
-
-            # linear model w mountain range as grouping variable
-            if(is.null(fit_lmm@optinfo$conv$lme4$messages)) {
-              try({
-                fit_lm = lm(y ~ 0+x*group-x)
-                res[nonSing2, 1] = grand_mean(fit_lm, n_groups, beta, mode = "old")[2]
-                res[nonSing2, 2] = grand_mean(fit_lm, n_groups, beta, mode = "new")[2]
-                ind = (n_groups+1):(2*n_groups)
-                covariance = vcov(fit_lm)[ind,ind]
-                meta = metafor::rma.mv( summary(fit_lm)$coefficients[ind, 1], V = covariance, method = "FE", test = "t" )
-                res[nonSing2, 3] = meta$pval
-              }, silent = TRUE)
-              nonSing2 = nonSing2 + 1
-            }
-          }
-
-        }
-        parameter$sd = sd_re
-        TypeOne = c(mean(res[,1] < 0.05, na.rm=TRUE),  mean(res[,2] < 0.05, na.rm=TRUE), mean(res[,3] < 0.05, na.rm=TRUE))
-        parameter$TypeIold[i] = TypeOne[1]
-        parameter$TypeInew[i] = TypeOne[2]
-        parameter$TypeMeta[i] = TypeOne[3]
-        return(parameter[i,])
+mountains = c(1, 2, 5, 10, 50, 100)
+sd_re = c(0.1, 0.5, 1.0)
+unbalanced = c(TRUE, FALSE)
+t = c(TRUE, FALSE)
+center = c(TRUE, FALSE)
+parameter = expand.grid(mountains, sd_re, unbalanced, t, center)
+colnames(parameter) = c("mountains", "sd_re","unbalanced","t","center")
+
+
+cl = snow::makeCluster(36L)
+snow::clusterEvalQ(cl, {library(lme4); library(lmerTest); library(tidyverse)})
+snow::clusterExport(cl, list("grand_mean", "parameter", "simulate_lm_re"), envir = environment())
+
+results = 
+  snow::parLapply(cl, 1:nrow(parameter), function(i) { 
+    pars = parameter[i,]
+    last = 30
+    if(pars$unbalanced) last = 1000
+    results = 
+      sapply(1:10000, function(k) {
+        sim = simulate_lm_re(c(rep(30, pars$mountains), last), group_center = pars$center, sd_re = pars$sd_re)
+        return(grand_mean(sim$means, sim$V, 0, t = pars$t))
       })
-    tmp = do.call(rbind, tmp)
-    tmp$balanced2 = scales::rescale(tmp$max/tmp$min - 1)
-    results_list[[m]] = tmp
+    res = t(results)
+
+    return(list(pars = pars, result = res))
+
+    })
+saveRDS(results, "boot.RDS")
+
+results[[40]]$pars
+
+
+# t-test, centered, sd = 0.1
+vars = as.integer(rownames(parameter[parameter$sd_re=="0.1" & parameter$t & parameter$center & parameter$unbalanced,]))
+
+# 1:6 unbalanced
+# 7:12 balanced
+par(mfrow = c(6, 6), mar = c(1, 2, 1, 1), oma = c(5, 2, 2, 1))
+mounts = c(1, 2, 5, 10, 50, 100) +1
+ylab = c("0.1", "0.5", "1.0")
+counter = 1
+for(sd in c(0.1, 0.5, 1.0)) {
+  vars = as.integer(rownames(parameter[parameter$sd_re==sd & parameter$t & parameter$center & parameter$unbalanced,]))
+  yy = paste0("SD: ", ylab[counter])
+  counter = counter + 1
+  for(i in 1:6) {
+    if(i > 1) yy = ""
+    if(sd=="0.1") hist((results[[vars[i]]][[2]][,2]), main = paste0(mounts[i], " mountains"), xlab = "", col = "blue", ylab = yy, xpd = NA)
+    else hist((results[[vars[i]]][[2]][,2]), main = "", xlab = "", col = "blue", ylab = yy, xpd = NA)
   }
-  res_sd[[sd]] = results_list
 }
+counter = 1
+
+for(sd in c(0.1, 0.5, 1.0)) {
+  vars = as.integer(rownames(parameter[parameter$sd_re==sd & parameter$t & parameter$center & !parameter$unbalanced,]))
+  yy = paste0("SD: ", ylab[counter])
+  counter = counter + 1
+  for(i in 1:6) {
+    if(i > 1) yy = ""
+    hist((results[[vars[i]]][[2]][,2]), main = "", xlab = "", col = "red", ylab = yy, xpd = NA)
+  }
+}
+legend(-0.5, y = -200, xpd = NA, legend = c("unbalanced", "balanced"), horiz = TRUE, pch = 15, col = c("blue", "red"))
+text(-3.5, y = -200, xpd = NA, labels = "t-test + centered groups", pos = 1, font = 2)
 
-results = do.call(rbind, res)
-results50 = results
-par(mfrow = c(1, 3))
-plot(results$TypeIold~results$balanced, ylim = c(0.0, 0.1))
-plot(results$TypeInew~results$balanced, ylim = c(0.0, 0.1))
-plot(results$TypeMeta~results$balanced, ylim = c(0.0, 0.1))
-
-
-results100 = do.call(rbind, res100)
-par(mfrow = c(2, 3))
-results100$balanced2 = scales::rescale(results100$max/results100$min - 1)
 
 
-plot(TypeIold~balanced2,data=results100, ylim = c(0.0, 0.15))
-plot(TypeInew~balanced2,data=results100, ylim = c(0.0, 0.15))
-plot(TypeMeta~balanced2,data=results100, ylim = c(0.0, 0.15))
 
 
-plot(TypeIold~balanced2,data=results100[results100$min > 20,], ylim = c(0.0, 0.15))
-plot(TypeInew~balanced2,data=results100[results100$min > 20,], ylim = c(0.0, 0.15))
-plot(TypeMeta~balanced2,data=results100[results100$min > 20,], ylim = c(0.0, 0.15))
 
 
+# z-stat
+par(mfrow = c(6, 6), mar = c(1, 2, 1, 1), oma = c(5, 2, 2, 1))
+mounts = c(1, 2, 5, 10, 50, 100) +1
+ylab = c("0.1", "0.5", "1.0")
+counter = 1
+for(sd in c(0.1, 0.5, 1.0)) {
+  vars = as.integer(rownames(parameter[parameter$sd_re==sd & !parameter$t & parameter$center & parameter$unbalanced,]))
+  yy = paste0("SD: ", ylab[counter])
+  counter = counter + 1
+  for(i in 1:6) {
+    if(i > 1) yy = ""
+    if(sd=="0.1") hist((results[[vars[i]]][[2]][,2]), main = paste0(mounts[i], " mountains"), xlab = "", col = "blue", ylab = yy, xpd = NA)
+    else hist((results[[vars[i]]][[2]][,2]), main = "", xlab = "", col = "blue", ylab = yy, xpd = NA)
+  }
+}
+counter = 1
+
+for(sd in c(0.1, 0.5, 1.0)) {
+  vars = as.integer(rownames(parameter[parameter$sd_re==sd & !parameter$t & parameter$center & !parameter$unbalanced,]))
+  yy = paste0("SD: ", ylab[counter])
+  counter = counter + 1
+  for(i in 1:6) {
+    if(i > 1) yy = ""
+    hist((results[[vars[i]]][[2]][,2]), main = "", xlab = "", col = "red", ylab = yy, xpd = NA)
+  }
+}
+legend(-0.5, y = -200, xpd = NA, legend = c("unbalanced", "balanced"), horiz = TRUE, pch = 15, col = c("blue", "red"))
+text(-3.5, y = -200, xpd = NA, labels = "z-stat + centered groups", pos = 1, font = 2)
+
+
+
+# t+ non-centered
+par(mfrow = c(6, 6), mar = c(1, 2, 1, 1), oma = c(5, 2, 2, 1))
+mounts = c(1, 2, 5, 10, 50, 100) +1
+ylab = c("0.1", "0.5", "1.0")
+counter = 1
+for(sd in c(0.1, 0.5, 1.0)) {
+  vars = as.integer(rownames(parameter[parameter$sd_re==sd & parameter$t & !parameter$center & parameter$unbalanced,]))
+  yy = paste0("SD: ", ylab[counter])
+  counter = counter + 1
+  for(i in 1:6) {
+    if(i > 1) yy = ""
+    if(sd=="0.1") hist((results[[vars[i]]][[2]][,2]), main = paste0(mounts[i], " mountains"), xlab = "", col = "blue", ylab = yy, xpd = NA)
+    else hist((results[[vars[i]]][[2]][,2]), main = "", xlab = "", col = "blue", ylab = yy, xpd = NA)
+  }
+}
+counter = 1
+
+for(sd in c(0.1, 0.5, 1.0)) {
+  vars = as.integer(rownames(parameter[parameter$sd_re==sd & parameter$t & !parameter$center & !parameter$unbalanced,]))
+  yy = paste0("SD: ", ylab[counter])
+  counter = counter + 1
+  for(i in 1:6) {
+    if(i > 1) yy = ""
+    hist((results[[vars[i]]][[2]][,2]), main = "", xlab = "", col = "red", ylab = yy, xpd = NA)
+  }
+}
+legend(-0.5, y = -200, xpd = NA, legend = c("unbalanced", "balanced"), horiz = TRUE, pch = 15, col = c("blue", "red"))
+text(-3.5, y = -200, xpd = NA, labels = "t-test + non-centered groups", pos = 1, font = 2)
 
-par(mfrow = c(1, 3))
 
-plot(TypeIold~balanced2,data=res_sd[[1]][[5]], ylim = c(0.0, 0.35))
-plot(TypeInew~balanced2,data=res_sd[[1]][[5]], ylim = c(0.0, 0.35))
-plot(TypeMeta~balanced2,data=res_sd[[1]][[5]], ylim = c(0.0, 0.35))
 
 
-plot(TypeIold~balanced2,data=results50[results50$min > 10,], ylim = c(0.0, 0.15))
-plot(TypeInew~balanced2,data=results50[results50$min > 10,], ylim = c(0.0, 0.15))
-plot(TypeMeta~balanced2,data=results50[results50$min > 10,], ylim = c(0.0, 0.15))
diff --git a/Figures.R b/Figures.R
@@ -1511,9 +1511,6 @@ dev.off()
 
 
 
-
-
-
 ########## __Figure S5 Variance estimates lmm REML imbalanced vs REML balanced ########## 
 cols = viridis::viridis(5)
 
@@ -1817,9 +1814,9 @@ sub$balanced = ((sub$balanced))
 sub$balanced2 = scales::rescale(sub$balanced2)
 form = function(resp) paste0(resp, "~ sd + moutain + nobs + balanced2 + sd:moutain + sd:nobs + moutain:nobs")
 form = function(resp) paste0(resp, "~ sd + moutain + nobs + balanced2 + sd:moutain + sd:nobs + moutain:nobs + balanced2:sd + balanced2:moutain + balanced2:nobs")
-powerLM = (lm( form("PowerLM") , data = sub))
+powerLM = (lm( form("PowerLM_MLE") , data = sub))
 powerLMM = (lm(form("PowerLMM") , data = sub))
-TypeLM = (lm(form("TypeOneLM - 0.05"), data = sub))
+TypeLM = (lm(form("TypeOneLM_MLE - 0.05"), data = sub))
 TypeLMM = (lm(form("TypeOneLMM - 0.05"), data = sub))
 
 
@@ -1849,12 +1846,12 @@ sub$moutain = scale((sub$moutain))
 sub$balanced2 = scales::rescale((sub$balanced2)) 
 #form = function(resp) paste0(resp, "~ sd + moutain + nobs + balanced2+ sd:moutain + sd:nobs + moutain:nobs")
 #form = function(resp) paste0(resp, "~ .^2")
-powerLM = (lm( form("PowerLM") , data = sub))
+powerLM = (lm( form("PowerLM_MLE") , data = sub))
 powerLMM = (lm(form("PowerLMM") , data = sub ))
-TypeLM = (lm(form("TypeOneLM - 0.05"), data = sub ))
+TypeLM = (lm(form("TypeOneLM_MLE - 0.05"), data = sub ))
 TypeLMM = (lm(form("TypeOneLMM - 0.05"), data = sub))
 
-plot(NULL, NULL, ylim = c(0, 1), xlim = c(-0.1, 0.1), yaxt="n", las = 1)
+plot(NULL, NULL, ylim = c(0, 1), xlim = c(-0.015, 0.015), yaxt="n", las = 1)
 coefsType = cbind(coef(TypeLMM), coef(TypeLM))
 seType = cbind(coef(summary(TypeLMM))[,2], coef(summary(TypeLM))[,2] )
 plotEffects(coefsType, seType, cols = cols,XX = -0.016, labels =labels_y)
@@ -1887,3 +1884,18 @@ sapply(results_lmm, function(l )mean(l$results_w_lme4_reml$Singularity, na.rm=TR
 sapply(results_lmm, function(l )mean(l$results_w_lme4_ml$Singularity, na.rm=TRUE)),
 sapply(results_glmm, function(l )mean(l$results_w_lme4_ml$Singularity, na.rm=TRUE))
 )
+
+
+
+par(mfrow = c(1,3))
+plot(TypeOneLM~balanced2, data = test, ylim = c(0.0, 1.0))
+plot(TypeOneLM_GM~balanced2, data = test, ylim = c(0.0, 1.0))
+plot(TypeOneLM_Uni~balanced2, data = test, ylim = c(0.0, 1.0))
+
+plot(CoverLM~balanced2, data = test, ylim = c(0.0, 1.0))
+plot(CoverLM_GM~balanced2, data = test, ylim = c(0.0, 1.0))
+plot(CoverLM_Uni~balanced2, data = test, ylim = c(0.0, 1.0))
+
+plot(PowerLM~balanced2, data = test, ylim = c(0.0, 1.0))
+plot(PowerLM_GM~balanced2, data = test, ylim = c(0.0, 1.0))
+plot(PowerLM_Uni~balanced2, data = test, ylim = c(0.0, 1.0))
diff --git a/Figures/Fig_5.pdf b/Figures/Fig_5.pdf