This is an R Markdown document explaining all the functions used by the language and learning lab. The functions are stored into the tools folder.
This function takes as its input a dataframe which is the output of the BF_set function. This will have a set of betas, standard errors and sd-theory. The function adds and additional column to this table which shows the range of values over which BF’s meet the criteria of (i) strong evdience for null (BF< 1/10); substantial evidence for null (BF< 1/3); ambiguous (3 > BF >1/3 ); substantial evidence H1 (BF >= 10).
addBf_ranges <-function(Bf_df, sdtheoryrange)
{
BFranges = vector()
for (b in 1:dim(Bf_df)[1]){
range = Bf_range(sd=as.numeric(as.character(Bf_df$SE[b])), obtained=as.numeric(as.character(Bf_df$'Mean difference'[b])), meanoftheory=0, sdtheoryrange=sdtheoryrange)
from_table = vector()
to_table = vector()
cat = vector()
category_table = vector()
for(i in 1:dim(range)[1]) { # go through each value in the range and categorize it
#categorize current BF
if (range[i,2] <= (1/3)) {
cat[i] = "H0" ## below or equal to 1/3
} else if (range[i,2] < 3) { ## NOT below or equal to 1/3, IS below 3
cat[i] = "ambig"
} else { ## NOT below or equal to 1/3, NOT below 3
cat[i] = "substH1"
}
# adjust the table
j = length(category_table)
if (i==1){ # first one
category_table[j+1] = cat[i]
from_table[j+1] = range[i,1]
} else if (cat[i] != cat[i-1]) { # NOT the first one, IS one where need to start new range
to_table[j] = range[i-1,1]
category_table[j+1] = cat[i]
from_table[j+1] = range[i,1]
}
if (i==dim(range)[1]){ # if its the last one, finish off the table
to_table[j] = range[i,1]
}
}
# go through the little table and turn it int a string of ranges
string = ""
for(i in 1: length(category_table)){
string = paste(string, category_table[i],":", round(from_table[i],3),"-", round(to_table[i],3))
}
BFranges[b] = string
}
out = cbind(Bf_df, BFranges)
return(out)
}
Author: Elizabeth Wonnacott
Similar to addBf_ranges2 except that it works out ranges by working down/up from the given sd_theory value in increments given by stepsizes up to maximum given by maxs. Stepsizes/maxs can either be a single value used across all the coefficients or can be a vector of values (one per row - size must match )
addBf_ranges3 = function(Bf_df, stepsizes, maxs, method = "old") {
if(method=="old"){
RRminV = vector()
RRmaxV = vector()
for (b in 1:dim(Bf_df)[1]){
if (length(stepsizes)==1){stepsize = stepsizes} else {stepsize = stepsizes[b]}
if (length(maxs)==1){max = maxs} else {max = maxs[b]}
Ndp = num.decimals(stepsize)
BF = as.numeric(as.character(Bf_df$Bf[b]))
BType= BfClassify(BF)
sdtheory = as.numeric(as.character(Bf_df$sdtheory[b]))
sd = as.numeric(as.character(Bf_df$std.Error[b]))
obtained = as.numeric(as.character(Bf_df$estimate[b]))
tail = as.numeric(as.character(Bf_df$BFtail[b]))
# get the max RRmax
RRmax = ""
lastsdtheory = sdtheory
newsdtheory = ""
while(newsdtheory<= max){
newsdtheory = lastsdtheory + stepsize
newBF = Bf(sd=sd, obtained=obtained, uniform = NULL, meanoftheory=0,sdtheory=newsdtheory, tail=tail)$BayesFactor
if (BfClassify(newBF)!= BType) {
RRmax = format(round(lastsdtheory, Ndp))
break }
lastsdtheory = newsdtheory
}
if(RRmax == "" & BType == "h0") {RRmax = "inf"}
if(RRmax == "" & BType == "h1") {RRmax = paste(">", max, sep="")}
if(RRmax == "" & BType == "ambig") {RRmax = paste(">", max, sep="")}
# get the min RRmin
RRmin = ""
lastsdtheory = sdtheory
newsdtheory = lastsdtheory - stepsize
while(newsdtheory > 0){
newBF = Bf(sd=sd, obtained=obtained, uniform = NULL, meanoftheory=0,sdtheory=newsdtheory, tail=tail)$BayesFactor
if (BfClassify(newBF)!= BType) {
RRmin = format(round(lastsdtheory, Ndp))
break }
lastsdtheory = newsdtheory
newsdtheory = lastsdtheory-stepsize
}
if(RRmin== "" & BType == "ambig") {RRmin = 0}
if(RRmin== "" & BType == "h1") {RRmin = paste(" 0< & <", format(round(lastsdtheory, Ndp), nsmall=Ndp), sep="")}
if(RRmin== "" & BType == "h0") {RRmin = paste(" 0< & <", format(round(lastsdtheory, Ndp), nsmall=Ndp), sep="")}
RRminV[b] = RRmin
RRmaxV[b] = RRmax
}
out = cbind(Bf_df, RRminV, RRmaxV)
colnames(out)[10:11]= c("RRmin","RRmax")
return(out)
}
if(method=="new"){
RRminV = vector()
RRmaxV = vector()
for (b in 1:dim(Bf_df)[1]){
if (length(stepsizes)==1){stepsize = stepsizes} else {stepsize = stepsizes[b]}
if (length(maxs)==1){max = maxs} else {max = maxs[b]}
Ndp = num.decimals(stepsize)
BF = as.numeric(as.character(Bf_df$Bf[b]))
BType= BfClassify(BF)
sdtheory = as.numeric(as.character(Bf_df$sdtheory[b]))
sd = as.numeric(as.character(Bf_df$std.Error[b]))
obtained = as.numeric(as.character(Bf_df$estimate[b]))
tail = as.numeric(as.character(Bf_df$BFtail[b]))
# get the max RRmax
RRmax = ""
lastsdtheory = sdtheory
newsdtheory = ""
while(newsdtheory<= max){
newsdtheory = lastsdtheory + stepsize
if(colnames(Bf_df)[4]=="t"){
dfdata=as.numeric(as.character(Bf_df$df[b]))
newBF = Bf(sd=sd, obtained=obtained, dfdata=dfdata, likelihood = c("t"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
}
if(colnames(Bf_df)[4]=="z"){
dfdata=as.numeric(as.character(Bf_df$df[b]))
newBF = Bf(sd=sd, obtained=obtained, likelihood = c("normal"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
}
if (BfClassify(newBF)!= BType) {
RRmax = format(round(lastsdtheory, Ndp))
break }
lastsdtheory = newsdtheory
}
if(RRmax == "" & BType == "h0") {RRmax = "inf"}
if(RRmax == "" & BType == "h1") {RRmax = paste(">", max, sep="")}
if(RRmax == "" & BType == "ambig") {RRmax = paste(">", max, sep="")}
# get the min RRmin
RRmin = ""
lastsdtheory = sdtheory
newsdtheory = lastsdtheory - stepsize
while(newsdtheory > 0){
if(colnames(Bf_df)[4]=="t"){
dfdata=as.numeric(as.character(Bf_df$df[b]))
newBF = Bf(sd=sd, obtained=obtained, dfdata=dfdata, likelihood = c("t"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
}
if(colnames(Bf_df)[4]=="z"){
dfdata=as.numeric(as.character(Bf_df$df[b]))
newBF = Bf(sd=sd, obtained=obtained, likelihood = c("normal"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
}
if (is.nan(newBF)) {
RRmin = format(round(lastsdtheory, Ndp))
break }
if (BfClassify(newBF)!= BType) {
RRmin = format(round(lastsdtheory, Ndp))
break }
lastsdtheory = newsdtheory
newsdtheory = lastsdtheory-stepsize
}
if(RRmin== "" & BType == "ambig") {RRmin = 0}
if(RRmin== "" & BType == "h1") {RRmin = paste(" 0< & <", format(round(lastsdtheory, Ndp), nsmall=Ndp), sep="")}
if(RRmin== "" & BType == "h0") {RRmin = paste(" 0< & <", format(round(lastsdtheory, Ndp), nsmall=Ndp), sep="")}
RRminV[b] = RRmin
RRmaxV[b] = RRmax
}
out = cbind(Bf_df, RRminV, RRmaxV)
colnames(out)[10:11]= c("RRmin", "RRmax")
return(out)
}
}
This function takes a table output by the Bfs_ranges function (or the BF_model function and for any rows where the tails are set as 1 adjusts the pvalues to be one-tailed. This means that the tails for BFs and p values are consistent)
add_p_tails_match_Bf <- function(Bfs.table) {
Bfs.table$p_tails_match_Bf = Bfs.table$p
Bfs.table$p_tails_match_Bf[Bfs.table$BFtail == 1 &
Bfs.table$z > 0] = Bfs.table$p[Bfs.table$BFtail == 1 &
Bfs.table$z > 0] / 2
Bfs.table$p_tails_match_Bf[Bfs.table$BFtail == 1 &
Bfs.table$z < 0] = 1 - Bfs.table$p[Bfs.table$BFtail == 1 &
Bfs.table$z < 0] / 2
Bfs.table$p_tails_match_Bf[Bfs.table$BFtail == 1 &
Bfs.table$t > 0] = Bfs.table$p[Bfs.table$BFtail == 1 &
Bfs.table$t > 0] / 2
Bfs.table$p_tails_match_Bf[Bfs.table$BFtail == 1 &
Bfs.table$t < 0] = 1 - Bfs.table$p[Bfs.table$BFtail == 1 &
Bfs.table$t < 0] / 2
Bfs.table$p_tails_match_Bf = round(Bfs.table$p_tails_match_Bf, 3)
Bfs.table$p = Bfs.table$p_tails_match_Bf
Bfs.table$p_tails_match_Bf = NULL
return(Bfs.table)
}
Function to adjust the intercept of the model
adjust_intercept_model<- function(model, chance, intercept_list = c("(Intercept)"))
{
summary = summary(model)$coefficients
for (i in 1:length(intercept_list)){
summary[intercept_list[i], "Estimate"] = summary[intercept_list[i], "Estimate"]- chance
summary[intercept_list[i], "z value"] = summary[intercept_list[i], "Estimate"]/summary[intercept_list[i], "Std. Error"]
summary[intercept_list[i], "Pr(>|z|)"] = p =2*pnorm(-abs(summary[intercept_list[i], "z value"])) }
summary[intercept_list[i], "Estimate"]
return(summary)
}
Author of the new version: Bence Palfi.
Author of the old version: Baguely and Kayne (2010).
This function computes the Bayes Factor and it’s equivalent to the Dienes (2008) calculator.
Integration of new and old versions is made available by Catriona Silvey on 16.04.2021
The BF function has been updated recently. The novel function has options for a likelihood that is either normal- or t-distributed, and a model of H1 that is either uniform, or normal or t- (or Cauchy-) distributed, with the normal/t/cauchy models being 1- or 2-tailed. In addition, the 1-tailed models are compatible with any mode (unlike the Dienes, 2008, calculator that assumed that 1-tailed models had a mode of zero).
The Bf function incorporates both old and new methods (Baguley & Kaye, 2010, and Bence Palfi’s more recent version). By default, uses B&K version (for backward compatibility) if using Palfi version, specify method = “new”.
Bf<- function(sd, obtained, dfdata = 1, likelihood = c("normal", "t"),
modeloftheory = c("normal", "t", "cauchy", "uniform"), uniform,
lower=0, upper=1, meanoftheory=0, sdtheory=1, modeoftheory =0, scaleoftheory = 1,
dftheory = 1, tail=1, method = "old"){
if (method == "old") {
area <- 0
if(identical(uniform, 1)){
theta <- lower
range <- upper - lower
incr <- range / 2000
for (A in -1000:1000){
theta <- theta + incr
dist_theta <- 1 / range
height <- dist_theta * dnorm(obtained, theta, sd)
area <- area + height * incr
}
}else
{theta <- meanoftheory - 5 * sdtheory
incr <- sdtheory / 200
for (A in -1000:1000){
theta <- theta + incr
dist_theta <- dnorm(theta, meanoftheory, sdtheory)
if(identical(tail, 1)){
if (theta <= 0){
dist_theta <- 0
} else {
dist_theta <- dist_theta * 2
}
}
height <- dist_theta * dnorm(obtained, theta, sd)
area <- area + height * incr
}
}
LikelihoodTheory <- area
Likelihoodnull <- dnorm(obtained, 0, sd)
BayesFactor <- LikelihoodTheory / Likelihoodnull
ret <- list("LikelihoodTheory" = LikelihoodTheory,"Likelihoodnull" = Likelihoodnull, "BayesFactor" = BayesFactor)
ret
} else if (method == "new") {
if(likelihood=="normal"){
dfdata=10^10
}
if(modeloftheory=="normal"){
dftheory = 10^10
} else if(modeloftheory=="cauchy"){
dftheory = 1
}
area <- 0
normarea <- 0
if(modeloftheory=="uniform"){
theta <- lower
range <- upper - lower
incr <- range / 2000
for (A in -1000:1000){
theta <- theta + incr
dist_theta <- 1 / range
height <- dist_theta * dt((obtained-theta)/sd, df=dfdata)
area <- area + height * incr
}
LikelihoodTheory <- area
}else{
theta <- modeoftheory - 10 * scaleoftheory
incr <- scaleoftheory/200
for (A in -2000:2000){
theta <- theta + incr
dist_theta <- dt((theta-modeoftheory)/scaleoftheory, df=dftheory)
if(identical(tail, 1)){
if (theta <= modeoftheory){
dist_theta <- 0
} else {
dist_theta <- dist_theta * 2
}
}
height <- dist_theta * dt((obtained-theta)/sd, df = dfdata)
area <- area + height * incr
normarea <- normarea + dist_theta*incr
}
LikelihoodTheory <- area/normarea
}
Likelihoodnull <- dt(obtained/sd, df = dfdata)
BayesFactor <- LikelihoodTheory/Likelihoodnull
BayesFactor
}
}
Author: Elizabeth Wonnacott
Classifies a Bayes factors as substantial for h1/h0 or ambigious (is used by addBf_ranges3) is used by Bf_addranges function below
BfClassify<-function(B)
{
if(B<=(1/3)){ "h0"}
else if (B>=3) {"h1"}
else {"ambig"}
}
Author: Elizabeth Wonnacott
This function calls the Bf function described above. Summary should be the coefficients summary from an lmer/gmer model (obtained via #summary(model)$coefficients). If its a lmer model, it expects there to be a columns “df” and “p” (i.e.summary function has been called using lmerTest package which applieds Kenward-Roger approximation for denominator degrees of freedom and computes p values). For each coefficient, the model uses the estimate as the obtained mean and the Std.Error as the measure of standard error.
The function allows for positive and negtive h1 values. If negative, since the BF function requires a positive value for sdtheory, both sdtheory and #obtained are multiplied by -1.
It doesn’t round unless there is an argument “digits” which says how many dp to round to- in this case the round_df function below is called.
Bf_model = function(coeff_summary, coeff_list, h1_list,
h1_motivation, tail_list, digits = "", method = "old") {
if(method == "old") {
Bfs = vector('double')
estimates = vector('double')
sterrors = vector('double')
sdtheorys = vector('double')
pvalues = vector('double')
tzvalues = vector('double')
for (i in 1:length(coeff_list)) {
sd_error = coeff_summary[coeff_list[i], "Std. Error"]
obtained = coeff_summary[coeff_list[i], "Estimate"]
stdtheory = h1_list[i]
if(h1_list[i] < 0) {
stdtheory = h1_list[i] * -1
obtained = (coeff_summary[coeff_list[i], "Estimate"] * -1)
}
Bfs[i] = Bf(sd_error, obtained, uniform = 0, meanoftheory = 0,
sdtheory = stdtheory, tail = tail_list[i])$BayesFactor
estimates[i] = obtained
sdtheorys[i] = stdtheory
sterrors[i] = sd_error
if(colnames(coeff_summary)[3] == "z value") {
tzlabel = "z"
tzvalues[i] = abs(coeff_summary[coeff_list[i], 3]) * sign(obtained)
pvalues[i] = coeff_summary[coeff_list[i], 4]
} else if(colnames(coeff_summary)[3] == "df") {
tzlabel = "t"
tzvalues[i] = abs(coeff_summary[coeff_list[i], 4]) * sign(obtained)
pvalues[i] = coeff_summary[coeff_list[i], 5]
}
}
df = data.frame(cbind(coeff_list, estimates, sterrors, tzvalues,
pvalues, sdtheorys, tail_list, Bfs,h1_motivation ))
colnames(df) = c("coefficient", "estimate", "std.Error", tzlabel, "p",
"sdtheory", "BFtail", "Bf", "h1 motivation")
df$estimate = as.numeric(as.character(df$estimate))
df$std.Error = as.numeric(as.character(df$std.Error))
df$sdtheory = as.numeric(as.character(df$sdtheory))
df$Bf = as.numeric(as.character(df$Bf))
df$p = as.numeric(as.character(df$p))
if (tzlabel == "z") {df$z = as.numeric(as.character(df$z))}
if (tzlabel == "t") {df$t = as.numeric(as.character(df$t))}
if (digits != "") {df = round_df(df, digits)}
return(df)
}
if(method == "new") {
Bfs = vector('double')
estimates = vector('double')
sterrors = vector('double')
sdtheorys = vector('double')
pvalues = vector('double')
tzvalues = vector('double')
dfvalues = vector('double')
if(colnames(coeff_summary)[3] == "df"){tzlabel = "t"}
if(colnames(coeff_summary)[3] == "z value"){tzlabel = "z"}
for (i in 1:length(coeff_list)) {
sd = coeff_summary[coeff_list[i], "Std. Error"]
obtained = coeff_summary[coeff_list[i], "Estimate"]
stdtheory = h1_list[i]
if(h1_list[i] < 0) {
stdtheory = h1_list[i] * -1
obtained = (coeff_summary[coeff_list[i], "Estimate"] * -1)}
if(tzlabel == "t") {
dfdata = coeff_summary[coeff_list[i], "df"]
Bfs[i] = Bf(sd, obtained, dfdata, likelihood = c("t"),
modeloftheory = c("normal"), modeoftheory = 0,
scaleoftheory = stdtheory, tail = tail_list[i],
method = "new")
}
if(tzlabel == "z") {
Bfs[i] = Bf(sd, obtained, likelihood = c("normal"),
modeloftheory = c("normal"), modeoftheory = 0,
scaleoftheory = stdtheory, tail = tail_list[i],
method = "new")
}
estimates[i] = obtained
sdtheorys[i] = stdtheory
sterrors[i] = sd
if(tzlabel == "z") {
tzvalues[i] = abs(coeff_summary[coeff_list[i], 3]) * sign(obtained)
pvalues[i] = coeff_summary[coeff_list[i], 4]
} else if(tzlabel == "t") {
dfvalues[i] = dfdata
tzvalues[i] = abs(coeff_summary[coeff_list[i], 4]) * sign(obtained)
pvalues[i] = coeff_summary[coeff_list[i], 5]
}
}
if(tzlabel == "z") {
df = data.frame(cbind(coeff_list, estimates, sterrors, tzvalues, pvalues, sdtheorys, tail_list, Bfs, h1_motivation))
colnames(df) = c("coefficient", "estimate", "std.Error", tzlabel, "p", "sdtheory", "BFtail", "Bf", "h1 motivation")
}
if(tzlabel == "t") {df = data.frame(cbind(coeff_list, estimates, sterrors, tzvalues, dfvalues, pvalues, sdtheorys, tail_list, Bfs, h1_motivation))
colnames(df) = c("coefficient", "estimate", "std.Error", tzlabel, "df", "p", "sdtheory", "BFtail", "Bf", "h1 motivation")
}
df$estimate = as.numeric(as.character(df$estimate))
df$std.Error = as.numeric(as.character(df$std.Error))
df$sdtheory = as.numeric(as.character(df$sdtheory))
df$Bf = as.numeric(as.character(df$Bf))
df$p = as.numeric(as.character(df$p))
if (tzlabel == "z") {df$z = as.numeric(as.character(df$z))}
if (tzlabel == "t") {df$t = as.numeric(as.character(df$t))}
if (digits != "") {df = round_df(df, digits)}
return(df)
}
}
Author: Elizabeth Wonnacott.
This works with the Bf function above. It requires the same values as that function (i.e. the obtained mean and SE for the current sample, a value for the predicted mean, which is set to be sdtheory (with meanoftheory=0), and the current number of participants N). However rather than return BF for current sample, it works out what the BF would be for a range of different subject numbers (assuming that the SE scales with sqrt(N)).
Bf_powercalc<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=1, N, min, max)
{
x = c(0)
y = c(0)
# note: working out what the difference between N and df is (for the contrast between two groups, this is 2; for constraints where there is 4 groups this will be 3, etc.)
for(newN in min : max)
{
B = as.numeric(Bf(sd = sd*sqrt(N/newN), obtained, uniform, lower, upper, meanoftheory, sdtheory, tail)[3])
x= append(x,newN)
y= append(y,B)
output = cbind(x,y)
}
output = output[-1,]
return(output)
}
Author: Elizabeth Wonnacott Updated by: Catriona Silvey on 16.04.2021
This works with the Bf function above. It requires the obtained mean and SE for the current sample and works out what the BF would be for a range of predicted means (which are set to be sdtheoryrange (with meanoftheory=0)).
Bf_range function can be used with both old and new versions of Bf function. By default, uses old version (for backward compatibility) if using new version, specify method = “new”. The “new” function is an update of the Bf_range function in order to use the new Bf updated version. It works with normal, t and Cauchy H1s, and normal or t likelihoods.
Bf_range <- function(sd, obtained, dfdata = 1, likelihood = c("normal", "t"),
modeloftheory= c("normal","t","cauchy") , meanoftheory = 0,
modeoftheory = 0, sdtheoryrange, dftheory = 1, tail = 1, method = "old")
{
if (method == "old") {
x = c(0)
y = c(0)
for(sdi in sdtheoryrange)
{
#sdi = sdtheoryrange[1]
# uses old Bf method
B = as.numeric(Bf(sd, obtained, meanoftheory=0, uniform = 0, sdtheory=sdi, tail=tail)[3])
#following line corrects for the fact that the calcuator does not correctly compute BF when sdtheory==0; this code ensures that if sdtheory ==0, BF=1
if (sdi ==0 ) {B=1}
x= append(x,sdi)
y= append(y,B)
output = cbind(x,y)
}
}
else if (method == "new") {
x = c(0)
y = c(0)
for(sdi in sdtheoryrange)
{
#sdi = sdtheoryrange[1]
# uses new Bf method
B = as.numeric(Bf(sd = sd, obtained = obtained, dfdata = dfdata, likelihood = likelihood,
modeloftheory = modeloftheory, modeoftheory=modeoftheory, scaleoftheory=sdi,
dftheory = dftheory, tail = tail, method="new"))
#following line corrects for the fact that the calcuator does not correctly compute BF when sdtheory==0; this code ensures that if sdtheory ==0, BF=1
if (sdi ==0 ) {B=1}
x= append(x,sdi)
y= append(y,B)
output = cbind(x,y)
}
}
output = output[-1,]
colnames(output) = c("sdtheory", "BF")
return(output)
}
Bf_set is a wrapper around Bf which allows us to caculate a set of BFs and put them in a table. It can only be used with non-uniform method and each h1 must be positive (as for original calculator).
Bf_set <-function(names_list, meandiff_list, sd_list, h1_list, tail_list)
{
Bfs = vector('double')
for (i in 1:length(meandiff_list)){
Bfs[i] = Bf(sd_list[i], meandiff_list[i], uniform = 0, meanoftheory = 0, sdtheory=h1_list[i] , tail = tail_list[i])$BayesFactor
}
df = data.frame(names_list, cbind(round(meandiff_list,3), round(sd_list,3), h1_list, round(Bfs,3)))
colnames(df) = c("Contrast", "Mean difference", "SE", "H1 estimate", "BF" )
return(df)
kable(df)
}
lower_ci.R and upper_ci.R are functions made available publicly in this Rcommunity post
library(tidyverse)
test <- data.frame(
n = c(298, 298, 298, 298, 298, 298, 298, 298, 298, 298, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3),
run = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
mAP = c(0.8112, 0.8006, 0.8076, 0.7999, 0.8067, 0.8046, 0.8004, 0.799,
0.8052, 0.8002, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.8333, 0.8333,
0.8333, 1, 0.8333, 1, 1, 0.8333, 1, 1)
)
lower_ci <- function(mean, se, n, conf_level = 0.95){
lower_ci <- mean - qt(1 - ((1 - conf_level) / 2), n - 1) * se
}
upper_ci <- function(mean, se, n, conf_level = 0.95){
upper_ci <- mean + qt(1 - ((1 - conf_level) / 2), n - 1) * se
}
foobar <- test %>%
group_by(n) %>%
summarise(smean = mean(mAP, na.rm = TRUE),
ssd = sd(mAP, na.rm = TRUE)) %>%
mutate(se = ssd / sqrt(n),
lower_ci = lower_ci(smean, se, n),
upper_ci = upper_ci(smean, se, n))
foobar
Functions below:
lower_ci <- function(mean, se, n, conf_level = 0.95){
lower_ci <- mean - qt(1 - ((1 - conf_level) / 2), n - 1) * se
}
upper_ci <- function(mean, se, n, conf_level = 0.95){
upper_ci <- mean + qt(1 - ((1 - conf_level) / 2), n - 1) * se
}
This takes a dataframe (df) and an integer (n) and returns a df which has n rows randomly deleted
deleteRandomRows = function(df,n){
indices <- sample(1:nrow(df), n)
df <- df[-indices,]
return(df)
}
Author: Eva Viviani
This function is a wrapper around Ferenc Igali’s script.
It downloads the data from Gorilla automatically, provided that you set your Gorilla login credentials into R.
TO SET YOUR GORILLA LOGIN CREDENTIALS: Install “keyring” package and run in R the following lines:
Change the “username” with the email you use for login into Gorilla. Keyring package will ask and save your password within R. These information are passed into the function below to log into Gorilla and access your data. Only if you have set your login credentials, you can run the “download_from_Gorilla” function below.
Arguments:
output: a character string with the name of the folder where the downloaded file is saved
ulr: a character string, or a vector of characters, with the links (starting with “https”) that I want to download
myGorillaemail: the email that I use to log into Gorilla
output <- c("C:/Users/...")
previewDownloadFile <- read.csv("data.csv") #this is a preview file of the eyetracker zone that contains in the "Response" column my links. One for each trial.
url <- previewDownloadFile[grepl("https", previewDownloadFile$Response),]$Response
download_from_Gorilla(output, url, "eva.viviani@education.ox.ac.uk")
Function below:
download_from_Gorilla <-function(output, url, myGorillaemail){
if (!require(downloader)) {
stop("downloader not installed")
} else if (!require(keyring)){
stop("keyring not installed")
} else if (!require(httr)){
stop("httr not installed")
} else {
print("------ start download -------")
};
#---------------------- get credentials and login ----------------------------#
login <- list(
email = myGorillaemail,
password = keyring::key_get("Gorilla", myGorillaemail))
login_res <- POST("https://gorilla.sc/api/login", body = login, encode = "form")
#---------------------- cycle over the list of urls --------------------------#
for (i in 1:length(url)){
file_end <- ""
experiment_download_url <- GET(url[i])
content_type_you_need_to_save <- experiment_download_url$headers$`content-type`
#------------------------- check content type -------------------------------#
if (content_type_you_need_to_save == "text/csv") {
file_end <- ".csv"
} else if (content_type_you_need_to_save == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") {
file_end <- ".xlsx"
} else if (content_type_you_need_to_save == "audio/wav") {
file_end <- ".weba"
} else if (content_type_you_need_to_save == "video/webm;codecs=vp8") {
file_end <- ".webm"
}
file_download_url <- experiment_download_url$url
file_name_saver <- paste0(substring(as.character(url[i]),54,nchar(as.character(url[i]))))
#download the file
downloader::download(file_download_url, paste0(output, file_name_saver), mode = "wb", quiet = T)
print(paste0("file ",i))
}
print("------ download done -------")
}
This is a function which filters a column of data removing values which are some number of standard deviations above/below the mean for that participant, possibly in some condition/subcondition.
im: the input matrix (a data frame)
svn: a list of the names of factors to be group by (subject name + one or more conditions)
fn: the name of the column containing the data to be filtered
lim: how many standard deviations above/below the mean to filter
The function returns an input matrix identical to the input matrix but with an additional columns giving the group means and the “filtered” data.
filter2 = function(im, svn, fn, lim)
{
## work out means lisfor each subject for each word
x = list()
y = ""
for(n in svn) x=append(im[n],x)
for(n in svn) y=paste(y,n,sep="_")
means = aggregate(im[fn], by = x, mean, na.rm=T)
head(means)
nocols = dim(means)[2]
colnames(means)[nocols] = "means"
sds = aggregate(im[fn], by = x, sd, na.rm=T)
head(sds)
nocols = dim(sds)[2]
colnames(sds)[nocols] = "sds"
gs = merge(means,sds)
## because if there is just one value it doesn't have a stand deviation and don't want to just disregard all of these
gs$sds[is.na(gs$sds)] = 0
gs$max = gs$means + lim*gs$sds
gs$min = gs$means- lim*gs$sds
im2 = merge(im,gs, sort=F)
im2[paste(fn,"filt",sep="")] = im2[fn]
cn= dim(im2)[2] ## get colnumber (last one added)
im2[,cn][im2[,fn]> im2$max] = ""
im2[,cn][im2[,fn]< im2$min] = ""
im2[,cn]= as.numeric(im2[,cn])
names(im2)[names(im2)=="means"] = paste("mean", y, sep="_")
names(im2)[names(im2)=="sds"] = paste("sd", y, sep="_")
names(im2)[names(im2)=="max"] = paste("max", y, sep="_")
names(im2)[names(im2)=="min"] = paste("min", y, sep="_")
return(im2)
}
Author: Catriona Silvey
This function generates some multilevel binary data.
Arguments:
n_subj = number of subjects
n_obs = total number of trials per subject
alpha = veridical grand mean log odds performance
beta1 = veridical effect of cond1
beta2 = veridical effect of cond2
beta3 = veridical cond1 * cond2 interaction effect
subj_corrs = list of correlations between random effects
subj_tau = list of SDs of random effects
generate_bin <- function(n_subj, n_obs, alpha, beta1, beta2, beta3, subj_corrs, subj_tau) {
# make data frame where number of rows = number of subjects * number of trials per subject
data <- data.frame(matrix(0, ncol=0, nrow = n_subj * n_obs))
# make subject vector and add to this data frame
data$subject <- as.factor(rep(seq(1:n_subj), each = n_obs))
# make condition 1 vector - within subjects
# half trials one value, half trials the other
data$cond1 <- as.factor(rep(c(0,1), each = n_obs/2))
# make centred version
data$c_cond1 <- as.numeric(data$cond1) - mean(as.numeric(data$cond1))
# make condition 2 vector - also within subjects
# 1/4 trials one value, 1/4 trials the other, then repeat
# this ensures cond1 and cond2 are not identical
data$cond2 <- as.factor(rep(c(0,1), each = n_obs/4))
# make centred version
data$c_cond2 <- as.numeric(data$cond2) - mean(as.numeric(data$cond2))
# for subject effects
# first, we put the correlations between the random effects in a matrix
# * if changing to simulate fewer random effects & hence fewer correlations,
# this will need to be adjusted
corr_matrix <- matrix(c(1, subj_corrs[1], subj_corrs[2], subj_corrs[3],
subj_corrs[1], 1, subj_corrs[4], subj_corrs[5],
subj_corrs[2], subj_corrs[4], 1, subj_corrs[6],
subj_corrs[3], subj_corrs[5], subj_corrs[6], 1), nrow = 4)
# next, construct variance covariance matrix for subject effects
# We multiply the subject effect sds (in matrix form) by the correlation matrix
# and then again by the subject effect sds
# so we end up with the sds squared (on the diagonal) = variance,
# and covariances between each pair of subject effects on the off-diagonal
# * if changing to simulate fewer random effects, this should still work fine,
# assuming corr_matrix has been adjusted appropriately
subj_v_cov <- diag(subj_tau) %*% corr_matrix %*% diag(subj_tau)
# Create the correlated subject effects, using mvrnorm to sample from multivariate normal distribution
# means of subject intercepts and slopes are 0
u <- mvrnorm(n = n_subj, c(0,0,0,0), subj_v_cov)
# check the correlation - this should be fairly accurate in large samples
# print(cor(u))
# check the SDs - again, should be fairly accurate in large samples
# print(sd(u[,1]))
# print(sd(u[,2]))
# print(sd(u[,3]))
# print(sd(u[,4]))
# finally, generate data on the basis of these parameters
data <- data %>%
mutate(
# We first calculate the linear predictor eta for each row in the data frame
# = overall intercept + subject intercept +
eta = alpha + u[data$subject,1] +
# cond1 value * (cond1 fixed effect + cond1 random slope) +
data$c_cond1 * (beta1 + u[data$subject,2]) +
# cond2 value * (cond2 fixed effect + cond2 random slope) +
data$c_cond2 * (beta2 + u[data$subject,3]) +
# cond1 * cond2 value * (interaction fixed effect + interaction random slope) +
(data$c_cond1 * data$c_cond2) * (beta3 + u[data$subject,4]),
# then transform by inverse logit to a probability (for this combo of subject/condition)
mu = inv.logit(eta),
# finally, generate a 0 or 1 for this row based on the probability for this subject/condition
y = rbinom(nrow(data),1,mu))
return(data)
}
Author: Tutorials point.
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
This function allows us to inspect particular coefficients from the output of an LME model by putting them in table.
x: the output returned when running lmer or glmer (i.e. an object of type lmerMod or glmerMod)
list: a list of the names of the coefficients to be extracted (e.g. c(“variable1”, “variable1:variable2”))
get_coeffs <- function(x,list){(as.data.frame(summary(x)$coefficients)[list,])}
outputs a set of specific coefficients for an lmer model
x must match the form summary(lmermodel)$coefficients as applied to the output of an lmermodel. List should be a list of the coefficients which should be reported.
get_coeffs2 <- function(x,list){(x[list,])}
It takes a log-odds value and calculates the proportion
inverse_log_odd<-function(lo) {exp(lo)/(1 + exp(lo))}
Author: Elizabeth Wonnacott.
This function provides a wrapper around myCenter allowing to center a specific list of variables from a dataframe. The input is a dataframe (x) and a list of the names of the variables which you wish to center (listfname). The output is a copy of the dataframe with a column (numeric) added for each of the centered variables with each one labelled with it’s previous name with “.ct” appended. For example, if x is a dataframe with columns “a” and “b” lizCenter(x, list(“a”, “b”)) will return a dataframe with two additional columns, a.ct and b.ct, which are numeric, centered codings of the corresponding variables.
lizCenter= function(x, listfname)
{
for (i in 1:length(listfname))
{
fname = as.character(listfname[i])
x[paste(fname,".ct", sep="")] = myCenter(x[fname])
}
return(x)
}
This function provides a wrapper around lizCenter which removes rows with missing data in the the DV before centering- note the df will have less rows as well as the centered variables (so will need to be careful with this is working with multiple dvs).
dv - a string which is the column name of the column in the dataframe with the dv
x: data frame
listfname: a list of the variables to be centered (e.g. list(variable1,variable2))
The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labeled with the column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).
lizCenter2= function(x, listfname, dv)
{
x= droplevels(subset(x, is.na(x[as.character(dv)])==FALSE))
x = lizCenter(x, listfname)
return(x)
}
Author: Elizabeth Wonnacott.
This function implements a simple coding scheme. It creates two centered dummy variables which stand in place of a three way factor (condition). This allows us to inspect each contrast separately against a reference level, as well as their interactions with other factors. Other fixed effects in the model can be evaluated as the average effects across all levels of the factor. Note that the intercept corresponds to the mean of cell means, sometimes referred as grand mean.
The function takes a data frame (d), a factor from that database (condition), which must have three levels, and the name of the level of the factor which is to be used as the baseline for the contrasts (baselevel).
lizContrasts= function(d, condition, baselevel)
{
condition = factor(condition)
condition = relevel(condition, baselevel)
a= (contrasts(condition)-apply(contrasts(condition),2,mean))
d$dummy1[condition== rownames(a)[1]] <- a[1]
d$dummy1[condition== rownames(a)[2]] <- a[2]
d$dummy1[condition== rownames(a)[3]] <- a[3]
d$dummy2[condition== rownames(a)[1]] <- a[4]
d$dummy2[condition== rownames(a)[2]] <- a[5]
d$dummy2[condition== rownames(a)[3]] <- a[6]
name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
d[name1] = d$dummy1
d[name2] = d$dummy2
d$dummy1 <-NULL
d$dummy2 <-NULL
return(d)
}
Author: Elizabeth Wonnacott.
This function is a version of the previous lizContrasts function but to be used to create three centered dummy variables which stand in place of a four-way factor (condition). Notes that this function implements a simple coding scheme. It creates three centered dummy variables which stand in place of a three way factor (condition). This allows us to inspect each contrast separately against a reference level, as well as their interactions with other factors. Other fixed effects in the model can be evaluated as the average effects across all levels of the factor. Note that the intercept corresponds to the mean of cell means, sometimes referred as grand mean.
lizContrasts4= function(d, condition, baselevel)
{
#condition = pictest$condition
condition = factor(condition)
condition = relevel(condition, baselevel)
a= (contrasts(condition)-apply(contrasts(condition),2,mean))
d$dummy1[condition== rownames(a)[1]] <- a[1]
d$dummy1[condition== rownames(a)[2]] <- a[2]
d$dummy1[condition== rownames(a)[3]] <- a[3]
d$dummy1[condition== rownames(a)[4]] <- a[4]
d$dummy2[condition== rownames(a)[1]] <- a[5]
d$dummy2[condition== rownames(a)[2]] <- a[6]
d$dummy2[condition== rownames(a)[3]] <- a[7]
d$dummy2[condition== rownames(a)[4]] <- a[8]
d$dummy3[condition== rownames(a)[1]] <- a[9]
d$dummy3[condition== rownames(a)[2]] <- a[10]
d$dummy3[condition== rownames(a)[3]] <- a[11]
d$dummy3[condition== rownames(a)[4]] <- a[12]
name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
name3 = paste(baselevel, rownames(a)[4],sep="_VERSUS_")
d[name1] = d$dummy1
d[name2] = d$dummy2
d[name3] = d$dummy3
d$dummy1 <-NULL
d$dummy2 <-NULL
d$dummy3 <-NULL
return(d)
}
Author: Eva Viviani.
This function loads automatically all the functions stored in the /tools folder. It requires three arguments:
urlFolder that is the url of the github at the master tree level
urlRaw, that is the raw address of the folder
listFunctions that is a vector containing the functions I want to download. If I want all of them, simply leave it empty, i.e., listFunctions <- NULL, otherwise specify the functions with the “.R” extention like this: e.g., listFunctions <- c(“Bf.R”,“lizContrasts.R”,“getmode.R”)
urlFolder <- 'https://api.github.com/repos/n400peanuts/languagelearninglab/git/trees/master?recursive=1'
urlRaw <- 'https://raw.githubusercontent.com/n400peanuts/languagelearninglab/master/tools/'
listFunctions <- NULL #this needs to be a list, e.g., listFunctions <- c("Bf.R","lizContrasts.R","getmode.R")
loadFunctionsGithub <-function(urlFolder, urlRaw, listFunctions){
if (!require(httr)) {
stop("httr not installed")
}
else {
print('----Downloading. Please wait----')
};
httr::GET(urlFolder)-> req
stop_for_status(req)
filelist <- unlist(lapply(content(req)$tree, "[", "path"), use.names = F)
urlFunctions <- grep("docs/tools/", filelist, value = TRUE, fixed = TRUE)
gsub("docs/tools/", "", urlFunctions) -> functions
if (length(listFunctions) == 0){ #load all
for (i in 1:length(functions)){
httr::GET(paste0(urlRaw, functions[i]), ssl.verifypeer = FALSE)-> temp
content(temp)->p3
eval(parse(text = p3), envir = .GlobalEnv)
}
} else {
functions[functions %in% listFunctions]-> functionsIlike
for (i in 1:length(functionsIlike)){
httr::GET(paste0(urlRaw, functionsIlike[i]), ssl.verifypeer = FALSE)-> temp
content(temp)->p3
eval(parse(text = p3), envir = .GlobalEnv)
}
};
print('----Download completed----')
}
This function takes a percentage p and returns the logodds value.
logodds <- function(p){log(p/(100-p))}
Author: Florian Jaegers.
This function outputs the centered values of a variable, which can be a numeric variable, a factor, or a data frame.
From Florian’s blog:
If the input is a numeric variable, the output is the centered variable.
If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.
If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).
myCenter <- function(x) {
if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
if (is.factor(x)) {
x= as.numeric(x)
return(x - mean(x, na.rm=T))
}
if (is.data.frame(x) || is.matrix(x)) {
m= matrix(nrow=nrow(x), ncol=ncol(x))
colnames(m)= paste("c", colnames(x), sep="")
for (i in 1:ncol(x)) {
m[,i]= myCenter(x[,i])
}
return(as.data.frame(m))
}
}
Author: Cookbook for R.
This function is used by the SummarySEWithin function above. From that website:
Norms the data within specified groups in a data frame; it normalizes each subject (identified by idvar) so that they have the same mean, within each group specified by betweenvars.
data: a data frame
idvar: the name of a column that identifies each subject (or matched subjects)
measurevar: the name of a column that contains the variable to be summarized
betweenvars: a vector containing names of columns that are between-subjects variables
na.rm: a boolean that indicates whether to ignore NA’s
normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
na.rm=FALSE, .drop=TRUE) {
#library(plyr)
# Measure var on left, idvar + between vars on right of formula.
data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
.fun = function(xx, col, na.rm) {
c(subjMean = mean(xx[,col], na.rm=na.rm))
},
measurevar,
na.rm
)
# Put the subject means with original data
data <- merge(data, data.subjMean)
# Get the normalized data in a new column
measureNormedVar <- paste(measurevar, "_norm", sep="")
data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
mean(data[,measurevar], na.rm=na.rm)
# Remove this subject mean column
data$subjMean <- NULL
return(data)
}
Author: Elizabeth Wonnacott
num.decimals <- function(x) {
stopifnot(class(x)=="numeric")
x <- sub("0+$","",x)
x <- sub("^.+[.]","",x)
nchar(x)
}
This function does the reverse of the function above.
percentage <- function(logodds) {(exp(logodds)/(1+exp(logodds)) ) * 100}
Author: Elizabeth Wonnacott
Take a data frame “df” and round any numeric column by to the number of dp given in digits.
round_df <- function(df, digits) {
for(i in 1: ncol(df)){
if (is.numeric(df[,i])) {
df[,i]= round(df[,i],digits) }
}
return(df)
}
Author: Elizabeth Wonnacott
This function provides a wrapper around myCenter allowing you to center a specific list of variables from a data frame.
The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labelled with the column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).
selectCenter= function(x, listfname)
{
for (i in 1:length(listfname))
{
fname = as.character(listfname[i])
x[paste(fname,".ct", sep="")] = myCenter(x[fname])
}
return(x)
}
This function takes summary output from addBf_ranges3
and the name of a coefficient from that summary and formats so that can
be reported as a string in the main text. Default is to round all stats
to 2pd, apart from p values which are rounded to 3dp (as per APA
format)). Beta, SE, z p, Bayes factor (with (0,H1) as a subscript, where
H1 is the predicted of H1) and Robustness regions are all reported.
Note that as this stands, if the ranges are strings
(e.g. >4.5
) they won’t be rounded
stat_string <- function(Bfrange,
coefficientName,
dpbeta = 2,
dpse = 2,
dpz = 2,
dpp = 3,
dpseH1 = 2,
dpB = 3,
dpRRmin = 2,
dpRRmax = 2)
{
beta = subset(Bfrange, coefficient == coefficientName)$estimate
se = subset(Bfrange, coefficient == coefficientName)$std.Error
z = subset(Bfrange, coefficient == coefficientName)$z
p = subset(Bfrange, coefficient == coefficientName)$p
seH1 = subset(Bfrange, coefficient == coefficientName)$sdtheory
B = subset(Bfrange, coefficient == coefficientName)$Bf
RRmin = subset(Bfrange, coefficient == coefficientName)$RRmin
RRmax = subset(Bfrange, coefficient == coefficientName)$RRmax
pstring = if (p < .001) {
", _p_ <.001"
} else {
paste(", _p_ =", round(p, dpp))
}
if (B > 1000) {
B = signif(B, digits = dpB)
}
else {
B = round(B, dpB)
}
outstring = paste(
"_β_ = ",
round(beta, dpbeta),
", _SE_ = ",
round(se, dpse),
", _z_ = " ,
round(z, dpz),
pstring ,
", _B_",
"~(0,",
round(seH1, dpseH1),
")~",
" = ",
signif(B, digits = dpB),
", _RR_ = [",
RRmin,
",",
RRmax,
"]",
sep = ""
)
return(outstring)
}
This function is the same as stat_string except that z and p aren’t reported
stat_string2 <- function(Bfrange,
coefficientName,
dpbeta = 2,
dpse = 2,
dpz = 2,
dpp = 3,
dpseH1 = 2,
dpB = 3,
dpRRmin = 2,
dpRRmax = 2)
{
beta = subset(Bfrange, coefficient == coefficientName)$estimate
se = subset(Bfrange, coefficient == coefficientName)$std.Error
B = subset(Bfrange, coefficient == coefficientName)$Bf
RRmin = subset(Bfrange, coefficient == coefficientName)$RRmin
RRmax = subset(Bfrange, coefficient == coefficientName)$RRmax
seH1 = subset(Bfrange, coefficient == coefficientName)$sdtheory
if (B > 1000) {
B = signif(B, digits = dpB)
}
else {
B = round(B, dpB)
}
outstring = paste(
"_β_ = ",
round(beta, dpbeta),
", _B_",
"~(0,",
round(seH1, dpseH1),
")~",
" = ",
signif(B, digits = dpB),
", _RR_ = [",
RRmin,
",",
RRmax,
"]",
sep = ""
)
return(outstring)
}
Author: Cookbook for R.
It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
data: a data frame
measurevar: the name of a column that contains the variable to be summarized
groupvars: a vector containing the names of the columns that contain grouping variables
na.rm: a boolean that indicates whether to ignore NA’s
conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = FALSE,
conf.interval = .95, .drop = TRUE) {
require(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- plyr::rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval / 2 + .5, datac$N - 1)
datac$ci <- datac$se * ciMult
return(datac)
}
Author: Cookbook for R. dataset argument implemented by: Eva Viviani and improved by Elizabeth Wonnacott
From that website:
Summarizes data, handling within-subjects variables by removing inter-subject variability. It will still work if there are no within-subjects variables. Gives count, un-normed mean, normed mean (with same between-group mean), standard deviation, standard error of the mean, and confidence interval. If there are within-subject variables, calculate adjusted values using method from Morey (2008).
data: a data frame
measurevar: the name of a column that contains the variable to be summarized
betweenvars: a vector containing names of columns that are between-subjects variables
withinvars: a vector containing names of columns that are within-subjects variables
idvar: the name of a column that identifies each subject (or matched subjects)
na.rm: a boolean that indicates whether to ignore NA’s
conf.interval: the percent range of the confidence interval (default is 95%)
dataset: if NULL (default), will produce a new dataset. If this is set to “old” it adds the confidence intervals and groupmean into the original dataframe, rather than into a new one
summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE, dataset = NULL) {
# Ensure that the betweenvars and withinvars are factors
factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
FUN=is.factor, FUN.VALUE=logical(1))
if (!all(factorvars)) {
nonfactorvars <- names(factorvars)[!factorvars]
message("Automatically converting the following non-factors to factors: ",
paste(nonfactorvars, collapse = ", "))
data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
}
# Get the means from the un-normed data
datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
# Drop all the unused columns (these will be calculated with normed data)
datac$sd <- NULL
datac$se <- NULL
datac$ci <- NULL
# Norm each subject's data
ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)
# This is the name of the new column
measurevar_n <- paste(measurevar, "_norm", sep="")
# Collapse the normed data - now we can treat between and within vars the same
ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
# Apply correction from Morey (2008) to the standard error and confidence interval
# Get the product of the number of conditions of within-S variables
nWithinGroups <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
FUN.VALUE=numeric(1)))
correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )
# Apply the correction factor
ndatac$sd <- ndatac$sd * correctionFactor
ndatac$se <- ndatac$se * correctionFactor
ndatac$ci <- ndatac$ci * correctionFactor
# Combine the un-normed means with the normed results
merge(datac, ndatac) -> data2
if (!is.null(dataset)){ # if we want the info in the same dataset:
colnames(data2)[colnames(data2)==measurevar] <- "groupmean"
out = merge(data,data2)
return(out)
} else { # we want a new dataset
return(data2)
}
}
This function is a wrapper around summarySEwithin2, from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper
above It takes a dataframe and uses summarySEwithin2
to get
the group means and variances for conditions and merges these into the
original dataframe
summarySEwithin2 <- function(data = NULL,
measurevar,
betweenvars = NULL,
withinvars = NULL,
idvar = NULL,
na.rm = FALSE,
conf.interval = .95,
.drop = TRUE) {
data2 = summarySEwithin(
data = data,
measurevar = measurevar,
betweenvars = betweenvars,
withinvars = withinvars,
idvar = idvar,
na.rm = na.rm,
conf.interval = conf.interval,
.drop = .drop
)
colnames(data2)[colnames(data2) == measurevar] <- "groupmean"
out = merge(data, data2)
return(out)
}