This is an R Markdown document explaining all the functions used by the language and learning lab. The functions are stored into the tools folder.

addBf_ranges

This function takes as its input a dataframe which is the output of the BF_set function. This will have a set of betas, standard errors and sd-theory. The function adds and additional column to this table which shows the range of values over which BF’s meet the criteria of (i) strong evdience for null (BF< 1/10); substantial evidence for null (BF< 1/3); ambiguous (3 > BF >1/3 ); substantial evidence H1 (BF >= 10).

addBf_ranges <-function(Bf_df, sdtheoryrange)
{
  
  BFranges = vector()  
  for (b in 1:dim(Bf_df)[1]){
    range = Bf_range(sd=as.numeric(as.character(Bf_df$SE[b])), obtained=as.numeric(as.character(Bf_df$'Mean difference'[b])), meanoftheory=0, sdtheoryrange=sdtheoryrange)
    
    from_table = vector()
    to_table = vector()
    cat = vector()
    category_table = vector()
    
    for(i in 1:dim(range)[1]) {       # go through each value in the range and categorize it
      
      #categorize current BF 
      if (range[i,2] <= (1/3)) { 
        cat[i] = "H0"      ## below or equal to 1/3
      } else if (range[i,2] < 3) { ## NOT below or equal to 1/3, IS below 3 
        cat[i] = "ambig"
      } else {                ## NOT below or equal to 1/3, NOT below 3
        cat[i] = "substH1"
      }
      
      # adjust the table
      j = length(category_table) 
      
      if (i==1){                      # first one
        category_table[j+1] = cat[i]   
        from_table[j+1] = range[i,1]
        
      } else if (cat[i] != cat[i-1]) { # NOT the first one, IS one where need to start new range 
        to_table[j] = range[i-1,1]
        category_table[j+1] = cat[i]
        from_table[j+1] = range[i,1]
      } 
      
      if (i==dim(range)[1]){        # if its the last one, finish off the table
        to_table[j] = range[i,1]
      }
    }
    
    # go through the little table and turn it int a string of ranges  
    string = ""
    for(i in 1: length(category_table)){
      string = paste(string, category_table[i],":", round(from_table[i],3),"-", round(to_table[i],3))
    }
    
    BFranges[b] = string
  }
  out = cbind(Bf_df, BFranges)
  return(out)
}

addBf_ranges3

Author: Elizabeth Wonnacott

Similar to addBf_ranges2 except that it works out ranges by working down/up from the given sd_theory value in increments given by stepsizes up to maximum given by maxs. Stepsizes/maxs can either be a single value used across all the coefficients or can be a vector of values (one per row - size must match )

addBf_ranges3 = function(Bf_df, stepsizes, maxs, method = "old") {
  
  if(method=="old"){  
    RRminV = vector()
    RRmaxV = vector()
    for (b in 1:dim(Bf_df)[1]){
      
      if (length(stepsizes)==1){stepsize = stepsizes} else {stepsize = stepsizes[b]}
      if (length(maxs)==1){max = maxs} else {max = maxs[b]}
      
      
      Ndp = num.decimals(stepsize)
      BF = as.numeric(as.character(Bf_df$Bf[b]))  
      BType= BfClassify(BF) 
      sdtheory = as.numeric(as.character(Bf_df$sdtheory[b]))  
      sd = as.numeric(as.character(Bf_df$std.Error[b]))
      obtained = as.numeric(as.character(Bf_df$estimate[b]))
      tail = as.numeric(as.character(Bf_df$BFtail[b]))
      
      # get the max RRmax 
      RRmax = ""
      lastsdtheory = sdtheory
      newsdtheory = ""
      while(newsdtheory<= max){
        newsdtheory = lastsdtheory + stepsize
        newBF = Bf(sd=sd, obtained=obtained, uniform = NULL, meanoftheory=0,sdtheory=newsdtheory, tail=tail)$BayesFactor
        
        if (BfClassify(newBF)!= BType) {
          RRmax = format(round(lastsdtheory, Ndp))
          break }
        lastsdtheory = newsdtheory
      }
      
      if(RRmax == "" & BType == "h0") {RRmax = "inf"}  
      if(RRmax == "" & BType == "h1") {RRmax = paste(">", max, sep="")}  
      if(RRmax == "" & BType == "ambig") {RRmax = paste(">", max, sep="")}  
      
      # get the min RRmin 
      RRmin = ""
      lastsdtheory = sdtheory
      newsdtheory = lastsdtheory - stepsize
      
      while(newsdtheory > 0){
        newBF = Bf(sd=sd, obtained=obtained, uniform = NULL, meanoftheory=0,sdtheory=newsdtheory, tail=tail)$BayesFactor
        
        if (BfClassify(newBF)!= BType) {
          RRmin = format(round(lastsdtheory, Ndp))
          break }
        lastsdtheory = newsdtheory
        newsdtheory = lastsdtheory-stepsize
      }
      
      if(RRmin== "" & BType == "ambig") {RRmin = 0}  
      if(RRmin== "" & BType == "h1") {RRmin = paste(" 0< & <", format(round(lastsdtheory,  Ndp), nsmall=Ndp), sep="")} 
      if(RRmin== "" & BType == "h0") {RRmin = paste(" 0< & <", format(round(lastsdtheory,  Ndp), nsmall=Ndp), sep="")}  
      
      RRminV[b] = RRmin
      RRmaxV[b] = RRmax
      
    }
    out = cbind(Bf_df, RRminV, RRmaxV)
    
    colnames(out)[10:11]= c("RRmin","RRmax")
    
    return(out)
  }
  if(method=="new"){  
    RRminV = vector()
    RRmaxV = vector()
    for (b in 1:dim(Bf_df)[1]){
      
      
      if (length(stepsizes)==1){stepsize = stepsizes} else {stepsize = stepsizes[b]}
      if (length(maxs)==1){max = maxs} else {max = maxs[b]}
      
      
      Ndp = num.decimals(stepsize)
      BF = as.numeric(as.character(Bf_df$Bf[b]))  
      BType= BfClassify(BF) 
      sdtheory = as.numeric(as.character(Bf_df$sdtheory[b]))  
      sd = as.numeric(as.character(Bf_df$std.Error[b]))
      obtained = as.numeric(as.character(Bf_df$estimate[b]))
      tail = as.numeric(as.character(Bf_df$BFtail[b]))
      
      # get the max RRmax 
      RRmax = ""
      lastsdtheory = sdtheory
      newsdtheory = ""
      while(newsdtheory<= max){
        newsdtheory = lastsdtheory + stepsize
        if(colnames(Bf_df)[4]=="t"){
          dfdata=as.numeric(as.character(Bf_df$df[b]))
          newBF = Bf(sd=sd, obtained=obtained, dfdata=dfdata, likelihood = c("t"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
        }
        if(colnames(Bf_df)[4]=="z"){
          dfdata=as.numeric(as.character(Bf_df$df[b]))
          newBF = Bf(sd=sd, obtained=obtained,  likelihood = c("normal"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
        }
        
        if (BfClassify(newBF)!= BType) {
          RRmax = format(round(lastsdtheory, Ndp))
          break }
        lastsdtheory = newsdtheory
      }
      
      if(RRmax == "" & BType == "h0") {RRmax = "inf"}  
      if(RRmax == "" & BType == "h1") {RRmax = paste(">", max, sep="")}  
      if(RRmax == "" & BType == "ambig") {RRmax = paste(">", max, sep="")}  
      
      # get the min RRmin 
      RRmin = ""
      lastsdtheory = sdtheory
      newsdtheory = lastsdtheory - stepsize
      
      while(newsdtheory > 0){
        if(colnames(Bf_df)[4]=="t"){
          dfdata=as.numeric(as.character(Bf_df$df[b]))
          newBF = Bf(sd=sd, obtained=obtained, dfdata=dfdata, likelihood = c("t"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
        }
        
        
        if(colnames(Bf_df)[4]=="z"){
          dfdata=as.numeric(as.character(Bf_df$df[b]))
          newBF = Bf(sd=sd, obtained=obtained,  likelihood = c("normal"), modeloftheory= c("normal"),modeoftheory=0,scaleoftheory=newsdtheory, tail=tail, method="new")
        }
        
        if (is.nan(newBF)) {
          RRmin = format(round(lastsdtheory, Ndp))
          break }
        if (BfClassify(newBF)!= BType) {
          RRmin = format(round(lastsdtheory, Ndp))
          break }
        lastsdtheory = newsdtheory
        newsdtheory = lastsdtheory-stepsize
      }
      
      if(RRmin== "" & BType == "ambig") {RRmin = 0}  
      if(RRmin== "" & BType == "h1") {RRmin = paste(" 0< & <", format(round(lastsdtheory,  Ndp), nsmall=Ndp), sep="")} 
      if(RRmin== "" & BType == "h0") {RRmin = paste(" 0< & <", format(round(lastsdtheory,  Ndp), nsmall=Ndp), sep="")}  
      
      RRminV[b] = RRmin
      RRmaxV[b] = RRmax
      
    }
    out = cbind(Bf_df, RRminV, RRmaxV)
    
    colnames(out)[10:11]= c("RRmin", "RRmax")
    
    return(out)
  }
}  

adjust_intercept_model

Function to adjust the intercept of the model

adjust_intercept_model<- function(model, chance, intercept_list = c("(Intercept)"))
{
  summary = summary(model)$coefficients
  
  for (i in 1:length(intercept_list)){
    summary[intercept_list[i], "Estimate"] = summary[intercept_list[i], "Estimate"]- chance
    summary[intercept_list[i], "z value"] =  summary[intercept_list[i], "Estimate"]/summary[intercept_list[i], "Std. Error"]
    summary[intercept_list[i], "Pr(>|z|)"] =  p =2*pnorm(-abs(summary[intercept_list[i], "z value"])) }
  
  
  summary[intercept_list[i], "Estimate"]
  
  
  return(summary)
  
}

BayesFactor updated

Author of the new version: Bence Palfi.

Author of the old version: Baguely and Kayne (2010).

This function computes the Bayes Factor and it’s equivalent to the Dienes (2008) calculator.

Integration of new and old versions is made available by Catriona Silvey on 16.04.2021

The BF function has been updated recently. The novel function has options for a likelihood that is either normal- or t-distributed, and a model of H1 that is either uniform, or normal or t- (or Cauchy-) distributed, with the normal/t/cauchy models being 1- or 2-tailed. In addition, the 1-tailed models are compatible with any mode (unlike the Dienes, 2008, calculator that assumed that 1-tailed models had a mode of zero).

The Bf function incorporates both old and new methods (Baguley & Kaye, 2010, and Bence Palfi’s more recent version). By default, uses B&K version (for backward compatibility) if using Palfi version, specify method = “new”.

Bf<- function(sd, obtained, dfdata = 1, likelihood = c("normal", "t"), 
              modeloftheory = c("normal", "t", "cauchy", "uniform"), uniform, 
              lower=0, upper=1, meanoftheory=0, sdtheory=1, modeoftheory =0, scaleoftheory = 1, 
              dftheory = 1, tail=1, method = "old"){
  if (method == "old") {
    area <- 0
    if(identical(uniform, 1)){
      theta <- lower
      range <- upper - lower
      incr <- range / 2000
      for (A in -1000:1000){
        theta <- theta + incr
        dist_theta <- 1 / range
        height <- dist_theta * dnorm(obtained, theta, sd)
        area <- area + height * incr
      }
    }else
    {theta <- meanoftheory - 5 * sdtheory
    incr <- sdtheory / 200
    for (A in -1000:1000){
      theta <- theta + incr
      dist_theta <- dnorm(theta, meanoftheory, sdtheory)
      if(identical(tail, 1)){
        if (theta <= 0){
          dist_theta <- 0
        } else {
          dist_theta <- dist_theta * 2
        }
      }
      height <- dist_theta * dnorm(obtained, theta, sd)
      area <- area + height * incr
    }
    }
    LikelihoodTheory <- area
    Likelihoodnull <- dnorm(obtained, 0, sd)
    BayesFactor <- LikelihoodTheory / Likelihoodnull
    ret <- list("LikelihoodTheory" = LikelihoodTheory,"Likelihoodnull" = Likelihoodnull, "BayesFactor" = BayesFactor)
    ret
  } else if (method == "new") {
    if(likelihood=="normal"){
      dfdata=10^10
    }
    if(modeloftheory=="normal"){
      dftheory = 10^10
    } else if(modeloftheory=="cauchy"){
      dftheory = 1
    }
    area <- 0
    normarea <- 0
    if(modeloftheory=="uniform"){
      theta <- lower
      range <- upper - lower
      incr <- range / 2000
      for (A in -1000:1000){
        theta <- theta + incr
        dist_theta <- 1 / range
        height <- dist_theta * dt((obtained-theta)/sd, df=dfdata)
        area <- area + height * incr
      }
      LikelihoodTheory <- area
    }else{
      theta <- modeoftheory - 10 * scaleoftheory
      incr <- scaleoftheory/200
      for (A in -2000:2000){
        theta <- theta + incr
        dist_theta <- dt((theta-modeoftheory)/scaleoftheory, df=dftheory)
        if(identical(tail, 1)){
          if (theta <= modeoftheory){
            dist_theta <- 0
          } else {
            dist_theta <- dist_theta * 2
          }
        }
        height <- dist_theta * dt((obtained-theta)/sd, df = dfdata)
        area <- area + height * incr
        normarea <- normarea + dist_theta*incr
      }
      LikelihoodTheory <- area/normarea
    }
    Likelihoodnull <- dt(obtained/sd, df = dfdata)
    BayesFactor <- LikelihoodTheory/Likelihoodnull
    BayesFactor
  }
}

BfClassify

Author: Elizabeth Wonnacott

Classifies a Bayes factors as substantial for h1/h0 or ambigious (is used by addBf_ranges3) is used by Bf_addranges function below

BfClassify<-function(B)
{
  if(B<=(1/3)){ "h0"}
  else if (B>=3) {"h1"}  
  else {"ambig"}
}

Bf_model

Author: Elizabeth Wonnacott

This function calls the Bf function described above. Summary should be the coefficients summary from an lmer/gmer model (obtained via #summary(model)$coefficients). If its a lmer model, it expects there to be a columns “df” and “p” (i.e.summary function has been called using lmerTest package which applieds Kenward-Roger approximation for denominator degrees of freedom and computes p values). For each coefficient, the model uses the estimate as the obtained mean and the Std.Error as the measure of standard error.

The function allows for positive and negtive h1 values. If negative, since the BF function requires a positive value for sdtheory, both sdtheory and #obtained are multiplied by -1.

It doesn’t round unless there is an argument “digits” which says how many dp to round to- in this case the round_df function below is called.

Bf_model = function(coeff_summary, coeff_list, h1_list, 
                    h1_motivation, tail_list, digits = "", method = "old") {
  if(method == "old") {
    Bfs = vector('double')
    estimates = vector('double')
    sterrors = vector('double')
    sdtheorys = vector('double')
    pvalues = vector('double')
    tzvalues = vector('double')
    
    for (i in 1:length(coeff_list)) {
      
      sd_error = coeff_summary[coeff_list[i], "Std. Error"]
      obtained = coeff_summary[coeff_list[i], "Estimate"]
      stdtheory = h1_list[i]
      
      
      if(h1_list[i] < 0) {
        stdtheory = h1_list[i] * -1
        obtained = (coeff_summary[coeff_list[i], "Estimate"] * -1)
      }
      
      Bfs[i] = Bf(sd_error, obtained, uniform = 0, meanoftheory = 0, 
                  sdtheory = stdtheory, tail = tail_list[i])$BayesFactor
      estimates[i] = obtained
      sdtheorys[i] = stdtheory
      sterrors[i] = sd_error
      
      if(colnames(coeff_summary)[3] == "z value") {
        tzlabel = "z"
        tzvalues[i] = abs(coeff_summary[coeff_list[i], 3]) * sign(obtained)
        pvalues[i] = coeff_summary[coeff_list[i], 4]  
      } else if(colnames(coeff_summary)[3] == "df") {
        tzlabel = "t"
        tzvalues[i] = abs(coeff_summary[coeff_list[i], 4]) * sign(obtained)
        pvalues[i] = coeff_summary[coeff_list[i], 5]  
      } 
    }
    
    df = data.frame(cbind(coeff_list, estimates, sterrors, tzvalues, 
                          pvalues, sdtheorys,  tail_list, Bfs,h1_motivation ))
    colnames(df) = c("coefficient", "estimate", "std.Error", tzlabel, "p", 
                     "sdtheory", "BFtail", "Bf", "h1 motivation")
    df$estimate = as.numeric(as.character(df$estimate))
    df$std.Error = as.numeric(as.character(df$std.Error))
    df$sdtheory = as.numeric(as.character(df$sdtheory))
    
    df$Bf = as.numeric(as.character(df$Bf))
    df$p = as.numeric(as.character(df$p))
    if (tzlabel == "z") {df$z = as.numeric(as.character(df$z))}
    if (tzlabel == "t") {df$t = as.numeric(as.character(df$t))}
    if (digits !=  "") {df = round_df(df, digits)}
    return(df)
  } 
  
  if(method == "new") {
    Bfs = vector('double')
    estimates = vector('double')
    sterrors = vector('double')
    sdtheorys = vector('double')
    pvalues = vector('double')
    tzvalues = vector('double')
    dfvalues = vector('double')
    
    if(colnames(coeff_summary)[3] == "df"){tzlabel = "t"}
    if(colnames(coeff_summary)[3] == "z value"){tzlabel = "z"}
    
    for (i in 1:length(coeff_list)) {
      
      sd = coeff_summary[coeff_list[i], "Std. Error"]
      obtained = coeff_summary[coeff_list[i], "Estimate"]
      stdtheory = h1_list[i]
      
      if(h1_list[i] < 0) {
        stdtheory = h1_list[i] * -1
        obtained = (coeff_summary[coeff_list[i], "Estimate"] * -1)}
      
      
      if(tzlabel == "t") {
        dfdata = coeff_summary[coeff_list[i], "df"]  
        Bfs[i] = Bf(sd, obtained, dfdata, likelihood = c("t"), 
                    modeloftheory = c("normal"), modeoftheory = 0, 
                    scaleoftheory = stdtheory, tail = tail_list[i], 
                    method = "new")
      }
      if(tzlabel == "z") {
        Bfs[i] = Bf(sd, obtained, likelihood = c("normal"), 
                    modeloftheory = c("normal"), modeoftheory = 0, 
                    scaleoftheory = stdtheory, tail = tail_list[i], 
                    method = "new")
      }
      
      estimates[i] = obtained
      sdtheorys[i] = stdtheory
      sterrors[i] = sd
      
      if(tzlabel == "z") {
        
        tzvalues[i] = abs(coeff_summary[coeff_list[i], 3]) * sign(obtained)
        pvalues[i] = coeff_summary[coeff_list[i], 4]  
      } else if(tzlabel == "t") {
        dfvalues[i] = dfdata
        tzvalues[i] = abs(coeff_summary[coeff_list[i], 4]) * sign(obtained)
        pvalues[i] = coeff_summary[coeff_list[i], 5]  
      } 
    }
    
    if(tzlabel == "z") {
      df = data.frame(cbind(coeff_list, estimates, sterrors, tzvalues, pvalues, sdtheorys,  tail_list, Bfs, h1_motivation))
      colnames(df) = c("coefficient", "estimate", "std.Error", tzlabel, "p", "sdtheory", "BFtail", "Bf", "h1 motivation")
      
    }
    if(tzlabel == "t") {df = data.frame(cbind(coeff_list, estimates, sterrors, tzvalues, dfvalues, pvalues, sdtheorys,  tail_list, Bfs, h1_motivation))
    colnames(df) = c("coefficient", "estimate", "std.Error", tzlabel, "df", "p", "sdtheory", "BFtail", "Bf", "h1 motivation")
    
    }
    
    df$estimate = as.numeric(as.character(df$estimate))
    df$std.Error = as.numeric(as.character(df$std.Error))
    df$sdtheory = as.numeric(as.character(df$sdtheory))
    
    df$Bf = as.numeric(as.character(df$Bf))
    df$p = as.numeric(as.character(df$p))
    if (tzlabel == "z") {df$z = as.numeric(as.character(df$z))}
    if (tzlabel == "t") {df$t = as.numeric(as.character(df$t))}
    if (digits !=  "") {df = round_df(df, digits)}
    return(df)
  } 
}

Bf_powercalc

Author: Elizabeth Wonnacott.

This works with the Bf function above. It requires the same values as that function (i.e. the obtained mean and SE for the current sample, a value for the predicted mean, which is set to be sdtheory (with meanoftheory=0), and the current number of participants N). However rather than return BF for current sample, it works out what the BF would be for a range of different subject numbers (assuming that the SE scales with sqrt(N)).

Bf_powercalc<-function(sd, obtained, uniform, lower=0, upper=1, meanoftheory=0, sdtheory=1, tail=1, N, min, max)
{
  
  x = c(0)
  y = c(0)
  # note: working out what the difference between N and df is (for the contrast between two groups, this is 2; for constraints where there is 4 groups this will be 3, etc.)  
  for(newN in min : max)
  {
    B = as.numeric(Bf(sd = sd*sqrt(N/newN), obtained, uniform, lower, upper, meanoftheory, sdtheory, tail)[3])
    x= append(x,newN)  
    y= append(y,B)
    output = cbind(x,y)
    
  } 
  output = output[-1,] 
  return(output) 
}

Bf_range

Author: Elizabeth Wonnacott Updated by: Catriona Silvey on 16.04.2021

This works with the Bf function above. It requires the obtained mean and SE for the current sample and works out what the BF would be for a range of predicted means (which are set to be sdtheoryrange (with meanoftheory=0)).

Bf_range function can be used with both old and new versions of Bf function. By default, uses old version (for backward compatibility) if using new version, specify method = “new”. The “new” function is an update of the Bf_range function in order to use the new Bf updated version. It works with normal, t and Cauchy H1s, and normal or t likelihoods.

Bf_range <- function(sd, obtained, dfdata = 1, likelihood = c("normal", "t"), 
                     modeloftheory= c("normal","t","cauchy") , meanoftheory = 0,
                     modeoftheory = 0, sdtheoryrange, dftheory = 1, tail = 1, method = "old")
{
  if (method == "old") {
    
    x = c(0)
    y = c(0)
    
    for(sdi in sdtheoryrange)
    {
      #sdi = sdtheoryrange[1]
      # uses old Bf method
      B = as.numeric(Bf(sd, obtained, meanoftheory=0, uniform = 0, sdtheory=sdi, tail=tail)[3])
      
      #following line corrects for the fact that the calcuator does not correctly compute BF when sdtheory==0; this code ensures that if sdtheory ==0, BF=1
      
      if (sdi ==0 ) {B=1}
      
      x= append(x,sdi)  
      y= append(y,B)
      output = cbind(x,y)
      
    }
    
  }
  else if (method == "new") {
    
    x = c(0)
    y = c(0)
    
    for(sdi in sdtheoryrange)
    {
      #sdi = sdtheoryrange[1]
      # uses new Bf method
      B = as.numeric(Bf(sd = sd, obtained = obtained, dfdata = dfdata, likelihood = likelihood, 
                        modeloftheory = modeloftheory, modeoftheory=modeoftheory, scaleoftheory=sdi, 
                        dftheory = dftheory, tail = tail, method="new"))
      
      #following line corrects for the fact that the calcuator does not correctly compute BF when sdtheory==0; this code ensures that if sdtheory ==0, BF=1
      
      if (sdi ==0 ) {B=1}
      
      x= append(x,sdi)  
      y= append(y,B)
      output = cbind(x,y)
      
    }
    
  }
  output = output[-1,] 
  colnames(output) = c("sdtheory", "BF")
  return(output) 
}

Bf_set

Bf_set is a wrapper around Bf which allows us to caculate a set of BFs and put them in a table. It can only be used with non-uniform method and each h1 must be positive (as for original calculator).

Bf_set <-function(names_list, meandiff_list, sd_list,  h1_list, tail_list)
{
  Bfs = vector('double')
  for (i in 1:length(meandiff_list)){
    Bfs[i] = Bf(sd_list[i], meandiff_list[i], uniform = 0, meanoftheory = 0, sdtheory=h1_list[i] , tail = tail_list[i])$BayesFactor
    
  }
  
  df = data.frame(names_list, cbind(round(meandiff_list,3), round(sd_list,3),  h1_list, round(Bfs,3)))
  colnames(df) = c("Contrast", "Mean difference", "SE", "H1 estimate", "BF" )
  return(df)
  
  kable(df)
}

confidence interval calculators (95% CI)

lower_ci.R and upper_ci.R are functions made available publicly in this Rcommunity post

Example of how to use it::

library(tidyverse)
test <- data.frame(
  n = c(298, 298, 298, 298, 298, 298, 298, 298, 298, 298, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3),
  run = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
          1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
  mAP = c(0.8112, 0.8006, 0.8076, 0.7999, 0.8067, 0.8046, 0.8004, 0.799,
          0.8052, 0.8002, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.8333, 0.8333,
          0.8333, 1, 0.8333, 1, 1, 0.8333, 1, 1)
)

lower_ci <- function(mean, se, n, conf_level = 0.95){
  lower_ci <- mean - qt(1 - ((1 - conf_level) / 2), n - 1) * se
}

upper_ci <- function(mean, se, n, conf_level = 0.95){
  upper_ci <- mean + qt(1 - ((1 - conf_level) / 2), n - 1) * se
}

foobar <- test %>%
  group_by(n) %>%
  summarise(smean = mean(mAP, na.rm = TRUE),
            ssd = sd(mAP, na.rm = TRUE)) %>%
  mutate(se = ssd / sqrt(n),
         lower_ci = lower_ci(smean, se, n),
         upper_ci = upper_ci(smean, se, n))

foobar         

Functions below:

lower_ci <- function(mean, se, n, conf_level = 0.95){
  lower_ci <- mean - qt(1 - ((1 - conf_level) / 2), n - 1) * se
}

upper_ci <- function(mean, se, n, conf_level = 0.95){
  upper_ci <- mean + qt(1 - ((1 - conf_level) / 2), n - 1) * se
}

deleteRandomRows

This takes a dataframe (df) and an integer (n) and returns a df which has n rows randomly deleted

deleteRandomRows = function(df,n){
  indices <- sample(1:nrow(df), n)
  df <- df[-indices,]
  return(df)
}

download_from_Gorilla

Author: Eva Viviani

This function is a wrapper around Ferenc Igali’s script.

It downloads the data from Gorilla automatically, provided that you set your Gorilla login credentials into R.

TO SET YOUR GORILLA LOGIN CREDENTIALS: Install “keyring” package and run in R the following lines:

Change the “username” with the email you use for login into Gorilla. Keyring package will ask and save your password within R. These information are passed into the function below to log into Gorilla and access your data. Only if you have set your login credentials, you can run the “download_from_Gorilla” function below.

Arguments:

  • output: a character string with the name of the folder where the downloaded file is saved

  • ulr: a character string, or a vector of characters, with the links (starting with “https”) that I want to download

  • myGorillaemail: the email that I use to log into Gorilla

Example of how to use the function:

output <- c("C:/Users/...")
previewDownloadFile <- read.csv("data.csv") #this is a preview file of the eyetracker zone that contains in the "Response" column my links. One for each trial.
url <- previewDownloadFile[grepl("https", previewDownloadFile$Response),]$Response
download_from_Gorilla(output, url, "eva.viviani@education.ox.ac.uk")

Function below:

download_from_Gorilla <-function(output, url, myGorillaemail){
  if (!require(downloader)) {
    stop("downloader not installed")
  } else if (!require(keyring)){
    stop("keyring not installed")
  } else if (!require(httr)){
    stop("httr not installed") 
  } else {
    print("------ start download -------")
  };
  
  #---------------------- get credentials and login ----------------------------#
  login <- list(
    email = myGorillaemail, 
    password = keyring::key_get("Gorilla", myGorillaemail))
  login_res <- POST("https://gorilla.sc/api/login", body = login, encode = "form")
  
  #---------------------- cycle over the list of urls --------------------------#
  for (i in 1:length(url)){
    file_end <- ""
    experiment_download_url <- GET(url[i])
    content_type_you_need_to_save <- experiment_download_url$headers$`content-type`
    
    #------------------------- check content type -------------------------------# 
    if (content_type_you_need_to_save == "text/csv") {
      file_end <- ".csv"
    } else if (content_type_you_need_to_save == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") {
      file_end <- ".xlsx"
    } else if (content_type_you_need_to_save == "audio/wav") {
      file_end <- ".weba"
    } else if (content_type_you_need_to_save == "video/webm;codecs=vp8") {
      file_end <- ".webm"
    }
    
    file_download_url <- experiment_download_url$url
    file_name_saver <- paste0(substring(as.character(url[i]),54,nchar(as.character(url[i]))))
    
    #download the file
    downloader::download(file_download_url, paste0(output, file_name_saver), mode = "wb", quiet = T)
    print(paste0("file ",i))
  }
  print("------ download done -------")
}

filter2

This is a function which filters a column of data removing values which are some number of standard deviations above/below the mean for that participant, possibly in some condition/subcondition.

  • im: the input matrix (a data frame)

  • svn: a list of the names of factors to be group by (subject name + one or more conditions)

  • fn: the name of the column containing the data to be filtered

  • lim: how many standard deviations above/below the mean to filter

The function returns an input matrix identical to the input matrix but with an additional columns giving the group means and the “filtered” data.

filter2 = function(im, svn, fn, lim)
{
  ## work out means lisfor each subject for each word
  
  x = list()
  y = ""
  
  for(n in svn) x=append(im[n],x)
  for(n in svn) y=paste(y,n,sep="_")
  
  means = aggregate(im[fn], by = x, mean, na.rm=T)
  head(means)
  nocols = dim(means)[2]
  colnames(means)[nocols] = "means"
  
  sds = aggregate(im[fn], by = x, sd, na.rm=T)
  head(sds)
  nocols = dim(sds)[2]
  colnames(sds)[nocols] = "sds"
  
  gs = merge(means,sds)
  
  ## because if there is just one value it doesn't have a stand deviation and don't want to just disregard all of these
  gs$sds[is.na(gs$sds)] = 0 
  
  gs$max = gs$means + lim*gs$sds
  gs$min = gs$means- lim*gs$sds
  
  im2 = merge(im,gs, sort=F)
  
  
  im2[paste(fn,"filt",sep="")] = im2[fn]
  cn= dim(im2)[2] ## get colnumber (last one added)
  
  im2[,cn][im2[,fn]> im2$max] = ""
  
  im2[,cn][im2[,fn]< im2$min] = ""
  
  im2[,cn]= as.numeric(im2[,cn])
  
  
  names(im2)[names(im2)=="means"] = paste("mean", y, sep="_") 
  names(im2)[names(im2)=="sds"] = paste("sd", y, sep="_") 
  names(im2)[names(im2)=="max"] = paste("max", y, sep="_") 
  names(im2)[names(im2)=="min"] = paste("min", y, sep="_") 
  
  return(im2)
}

generate_bin

Author: Catriona Silvey

This function generates some multilevel binary data.

Arguments:

  • n_subj = number of subjects

  • n_obs = total number of trials per subject

  • alpha = veridical grand mean log odds performance

  • beta1 = veridical effect of cond1

  • beta2 = veridical effect of cond2

  • beta3 = veridical cond1 * cond2 interaction effect

  • subj_corrs = list of correlations between random effects

  • subj_tau = list of SDs of random effects

generate_bin <- function(n_subj, n_obs, alpha, beta1, beta2, beta3, subj_corrs, subj_tau) {
  # make data frame where number of rows = number of subjects * number of trials per subject
  data <- data.frame(matrix(0, ncol=0, nrow = n_subj * n_obs))
  # make subject vector and add to this data frame
  data$subject <- as.factor(rep(seq(1:n_subj), each = n_obs))
  # make condition 1 vector - within subjects
  # half trials one value, half trials the other
  data$cond1 <- as.factor(rep(c(0,1), each = n_obs/2))
  # make centred version
  data$c_cond1 <- as.numeric(data$cond1) - mean(as.numeric(data$cond1))
  # make condition 2 vector - also within subjects
  # 1/4 trials one value, 1/4 trials the other, then repeat
  # this ensures cond1 and cond2 are not identical
  data$cond2 <- as.factor(rep(c(0,1), each = n_obs/4))
  # make centred version
  data$c_cond2 <- as.numeric(data$cond2) - mean(as.numeric(data$cond2))
  # for subject effects
  # first, we put the correlations between the random effects in a matrix
  # * if changing to simulate fewer random effects & hence fewer correlations, 
  # this will need to be adjusted
  corr_matrix <- matrix(c(1, subj_corrs[1], subj_corrs[2], subj_corrs[3],
                          subj_corrs[1], 1, subj_corrs[4], subj_corrs[5],
                          subj_corrs[2], subj_corrs[4], 1, subj_corrs[6],
                          subj_corrs[3], subj_corrs[5], subj_corrs[6], 1), nrow = 4)
  # next, construct variance covariance matrix for subject effects
  # We multiply the subject effect sds (in matrix form) by the correlation matrix
  # and then again by the subject effect sds
  # so we end up with the sds squared (on the diagonal) = variance, 
  # and covariances between each pair of subject effects on the off-diagonal
  # * if changing to simulate fewer random effects, this should still work fine,
  # assuming corr_matrix has been adjusted appropriately
  subj_v_cov <- diag(subj_tau) %*% corr_matrix %*% diag(subj_tau)
  # Create the correlated subject effects, using mvrnorm to sample from multivariate normal distribution
  # means of subject intercepts and slopes are 0
  u <- mvrnorm(n = n_subj, c(0,0,0,0), subj_v_cov)
  # check the correlation - this should be fairly accurate in large samples
  # print(cor(u))
  # check the SDs - again, should be fairly accurate in large samples
  # print(sd(u[,1]))
  # print(sd(u[,2]))
  # print(sd(u[,3]))
  # print(sd(u[,4]))
  # finally, generate data on the basis of these parameters
  data <- data %>%
    mutate(
      # We first calculate the linear predictor eta for each row in the data frame
      # = overall intercept + subject intercept +
      eta = alpha + u[data$subject,1] +
        # cond1 value * (cond1 fixed effect + cond1 random slope) +
        data$c_cond1 * (beta1 + u[data$subject,2]) + 
        # cond2 value * (cond2 fixed effect + cond2 random slope) +
        data$c_cond2 * (beta2 + u[data$subject,3]) +
        # cond1 * cond2 value * (interaction fixed effect + interaction random slope) +
        (data$c_cond1 * data$c_cond2) * (beta3 + u[data$subject,4]),
      # then transform by inverse logit to a probability (for this combo of subject/condition)
      mu = inv.logit(eta),
      # finally, generate a 0 or 1 for this row based on the probability for this subject/condition
      y = rbinom(nrow(data),1,mu))
  return(data)
}

getmode

Author: Tutorials point.

getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

get_coeffs

This function allows us to inspect particular coefficients from the output of an LME model by putting them in table.

  • x: the output returned when running lmer or glmer (i.e. an object of type lmerMod or glmerMod)

  • list: a list of the names of the coefficients to be extracted (e.g. c(“variable1”, “variable1:variable2”))

get_coeffs <- function(x,list){(as.data.frame(summary(x)$coefficients)[list,])}

get_coeffs2

outputs a set of specific coefficients for an lmer model

x must match the form summary(lmermodel)$coefficients as applied to the output of an lmermodel. List should be a list of the coefficients which should be reported.

get_coeffs2 <- function(x,list){(x[list,])}

inverse_log_odd

It takes a log-odds value and calculates the proportion

inverse_log_odd<-function(lo) {exp(lo)/(1 + exp(lo))} 

lizCenter

Author: Elizabeth Wonnacott.

This function provides a wrapper around myCenter allowing to center a specific list of variables from a dataframe. The input is a dataframe (x) and a list of the names of the variables which you wish to center (listfname). The output is a copy of the dataframe with a column (numeric) added for each of the centered variables with each one labelled with it’s previous name with “.ct” appended. For example, if x is a dataframe with columns “a” and “b” lizCenter(x, list(“a”, “b”)) will return a dataframe with two additional columns, a.ct and b.ct, which are numeric, centered codings of the corresponding variables.

lizCenter= function(x, listfname) 
{
  for (i in 1:length(listfname)) 
  {
    fname = as.character(listfname[i])
    x[paste(fname,".ct", sep="")] = myCenter(x[fname])
  }
  
  return(x)
}

lizCenter2

This function provides a wrapper around lizCenter which removes rows with missing data in the the DV before centering- note the df will have less rows as well as the centered variables (so will need to be careful with this is working with multiple dvs).

  • dv - a string which is the column name of the column in the dataframe with the dv

  • x: data frame

  • listfname: a list of the variables to be centered (e.g. list(variable1,variable2))

The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labeled with the column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).

lizCenter2= function(x, listfname, dv) 
{
  x= droplevels(subset(x, is.na(x[as.character(dv)])==FALSE))
  x = lizCenter(x, listfname)
  
  
  return(x)
}

lizContrasts

Author: Elizabeth Wonnacott.

This function implements a simple coding scheme. It creates two centered dummy variables which stand in place of a three way factor (condition). This allows us to inspect each contrast separately against a reference level, as well as their interactions with other factors. Other fixed effects in the model can be evaluated as the average effects across all levels of the factor. Note that the intercept corresponds to the mean of cell means, sometimes referred as grand mean.

The function takes a data frame (d), a factor from that database (condition), which must have three levels, and the name of the level of the factor which is to be used as the baseline for the contrasts (baselevel).

lizContrasts= function(d, condition, baselevel) 
{
  
  condition = factor(condition)
  condition = relevel(condition, baselevel)
  
  a= (contrasts(condition)-apply(contrasts(condition),2,mean))
  d$dummy1[condition== rownames(a)[1]] <- a[1] 
  d$dummy1[condition== rownames(a)[2]] <- a[2] 
  d$dummy1[condition== rownames(a)[3]] <- a[3] 
  
  d$dummy2[condition== rownames(a)[1]] <- a[4] 
  d$dummy2[condition== rownames(a)[2]] <- a[5] 
  d$dummy2[condition== rownames(a)[3]] <- a[6] 
  
  name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
  name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
  
  d[name1] = d$dummy1 
  d[name2] = d$dummy2 
  
  d$dummy1 <-NULL 
  d$dummy2 <-NULL 
  
  return(d)
}

lizContrasts4

Author: Elizabeth Wonnacott.

This function is a version of the previous lizContrasts function but to be used to create three centered dummy variables which stand in place of a four-way factor (condition). Notes that this function implements a simple coding scheme. It creates three centered dummy variables which stand in place of a three way factor (condition). This allows us to inspect each contrast separately against a reference level, as well as their interactions with other factors. Other fixed effects in the model can be evaluated as the average effects across all levels of the factor. Note that the intercept corresponds to the mean of cell means, sometimes referred as grand mean.

lizContrasts4= function(d, condition, baselevel) 
{
  #condition = pictest$condition
  condition = factor(condition)
  condition = relevel(condition, baselevel)
  a= (contrasts(condition)-apply(contrasts(condition),2,mean))
  
  d$dummy1[condition== rownames(a)[1]] <- a[1] 
  d$dummy1[condition== rownames(a)[2]] <- a[2] 
  d$dummy1[condition== rownames(a)[3]] <- a[3] 
  d$dummy1[condition== rownames(a)[4]] <- a[4] 
  d$dummy2[condition== rownames(a)[1]] <- a[5] 
  d$dummy2[condition== rownames(a)[2]] <- a[6] 
  d$dummy2[condition== rownames(a)[3]] <- a[7] 
  d$dummy2[condition== rownames(a)[4]] <- a[8] 
  d$dummy3[condition== rownames(a)[1]] <- a[9] 
  d$dummy3[condition== rownames(a)[2]] <- a[10] 
  d$dummy3[condition== rownames(a)[3]] <- a[11] 
  d$dummy3[condition== rownames(a)[4]] <- a[12] 
  
  name1 = paste(baselevel, rownames(a)[2],sep="_VERSUS_")
  name2 = paste(baselevel, rownames(a)[3],sep="_VERSUS_")
  name3 = paste(baselevel, rownames(a)[4],sep="_VERSUS_")
  
  d[name1] = d$dummy1 
  d[name2] = d$dummy2 
  d[name3] = d$dummy3 
  
  d$dummy1 <-NULL 
  d$dummy2 <-NULL 
  d$dummy3 <-NULL 
  
  return(d)
}

loadFunctionsGithub

Author: Eva Viviani.

This function loads automatically all the functions stored in the /tools folder. It requires three arguments:

  • urlFolder that is the url of the github at the master tree level

  • urlRaw, that is the raw address of the folder

  • listFunctions that is a vector containing the functions I want to download. If I want all of them, simply leave it empty, i.e., listFunctions <- NULL, otherwise specify the functions with the “.R” extention like this: e.g., listFunctions <- c(“Bf.R”,“lizContrasts.R”,“getmode.R”)

urlFolder <- 'https://api.github.com/repos/n400peanuts/languagelearninglab/git/trees/master?recursive=1'
urlRaw <- 'https://raw.githubusercontent.com/n400peanuts/languagelearninglab/master/tools/'
listFunctions <- NULL #this needs to be a list, e.g., listFunctions <- c("Bf.R","lizContrasts.R","getmode.R")
loadFunctionsGithub <-function(urlFolder, urlRaw, listFunctions){
  if (!require(httr)) {
    stop("httr not installed")
  } 
  else {
    print('----Downloading. Please wait----')
  };
  httr::GET(urlFolder)-> req
  stop_for_status(req)
  filelist <- unlist(lapply(content(req)$tree, "[", "path"), use.names = F)
  urlFunctions <- grep("docs/tools/", filelist, value = TRUE, fixed = TRUE)
  gsub("docs/tools/", "", urlFunctions) -> functions
  if (length(listFunctions) == 0){ #load all
    for (i in 1:length(functions)){
      httr::GET(paste0(urlRaw, functions[i]), ssl.verifypeer = FALSE)-> temp
      content(temp)->p3
      eval(parse(text = p3), envir = .GlobalEnv)
    } 
  } else {
    functions[functions %in% listFunctions]-> functionsIlike
    for (i in 1:length(functionsIlike)){
      httr::GET(paste0(urlRaw, functionsIlike[i]), ssl.verifypeer = FALSE)-> temp
      content(temp)->p3
      eval(parse(text = p3), envir = .GlobalEnv)
    }
  };
  print('----Download completed----')
}

logodds

This function takes a percentage p and returns the logodds value.

logodds <- function(p){log(p/(100-p))} 

myCenter

Author: Florian Jaegers.

This function outputs the centered values of a variable, which can be a numeric variable, a factor, or a data frame.

From Florian’s blog:

  • If the input is a numeric variable, the output is the centered variable.

  • If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor’s levels are converted into numerical values in their inherent order (if not specified otherwise, R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.

  • If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by “c” (e.g. “Variable1” will be “cVariable1”).

myCenter <- function(x) {
  if (is.numeric(x)) { return(x - mean(x, na.rm=T)) }
  if (is.factor(x)) {
    x= as.numeric(x)
    return(x - mean(x, na.rm=T))
  }
  if (is.data.frame(x) || is.matrix(x)) {
    m= matrix(nrow=nrow(x), ncol=ncol(x))
    colnames(m)= paste("c", colnames(x), sep="")
    
    for (i in 1:ncol(x)) {
      
      m[,i]= myCenter(x[,i])
    }
    return(as.data.frame(m))
  }
}

normDataWithin

Author: Cookbook for R.

This function is used by the SummarySEWithin function above. From that website:

Norms the data within specified groups in a data frame; it normalizes each subject (identified by idvar) so that they have the same mean, within each group specified by betweenvars.

  • data: a data frame

  • idvar: the name of a column that identifies each subject (or matched subjects)

  • measurevar: the name of a column that contains the variable to be summarized

  • betweenvars: a vector containing names of columns that are between-subjects variables

  • na.rm: a boolean that indicates whether to ignore NA’s

normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
                           na.rm=FALSE, .drop=TRUE) {
  #library(plyr)
  
  # Measure var on left, idvar + between vars on right of formula.
  data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
                         .fun = function(xx, col, na.rm) {
                           c(subjMean = mean(xx[,col], na.rm=na.rm))
                         },
                         measurevar,
                         na.rm
  )
  
  # Put the subject means with original data
  data <- merge(data, data.subjMean)
  
  # Get the normalized data in a new column
  measureNormedVar <- paste(measurevar, "_norm", sep="")
  data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
    mean(data[,measurevar], na.rm=na.rm)
  
  # Remove this subject mean column
  data$subjMean <- NULL
  
  return(data)
}

num.decimals

Author: Elizabeth Wonnacott

num.decimals <- function(x) {
  stopifnot(class(x)=="numeric")
  x <- sub("0+$","",x)
  x <- sub("^.+[.]","",x)
  nchar(x)
}

percentage

This function does the reverse of the function above.

percentage <- function(logodds) {(exp(logodds)/(1+exp(logodds)) ) * 100}

round_df

Author: Elizabeth Wonnacott

Take a data frame “df” and round any numeric column by to the number of dp given in digits.

round_df <- function(df, digits) {
  for(i in 1: ncol(df)){
    
    if (is.numeric(df[,i])) { 
      df[,i]= round(df[,i],digits) }
  }
  
  return(df)
}

selectCenter

Author: Elizabeth Wonnacott

This function provides a wrapper around myCenter allowing you to center a specific list of variables from a data frame.

  • x: data frame
  • listfname: a list of the variables to be centered (e.g. list(variable1,variable2))

The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labelled with the column’s previous name, but with “.ct” appended (e.g., “variable1” will become “variable1.ct”).

selectCenter= function(x, listfname) 
{
  for (i in 1:length(listfname)) 
  {
    fname = as.character(listfname[i])
    x[paste(fname,".ct", sep="")] = myCenter(x[fname])
  }
  
  return(x)
}

summarySE

Author: Cookbook for R.

It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).

  • data: a data frame

  • measurevar: the name of a column that contains the variable to be summarized

  • groupvars: a vector containing the names of the columns that contain grouping variables

  • na.rm: a boolean that indicates whether to ignore NA’s

  • conf.interval: the percent range of the confidence interval (default is 95%)

summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = FALSE,
                     conf.interval = .95, .drop = TRUE) {
  require(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- plyr::rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval / 2 + .5, datac$N - 1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}

summarySEwithin

Author: Cookbook for R. dataset argument implemented by: Eva Viviani and improved by Elizabeth Wonnacott

From that website:

Summarizes data, handling within-subjects variables by removing inter-subject variability. It will still work if there are no within-subjects variables. Gives count, un-normed mean, normed mean (with same between-group mean), standard deviation, standard error of the mean, and confidence interval. If there are within-subject variables, calculate adjusted values using method from Morey (2008).

  • data: a data frame

  • measurevar: the name of a column that contains the variable to be summarized

  • betweenvars: a vector containing names of columns that are between-subjects variables

  • withinvars: a vector containing names of columns that are within-subjects variables

  • idvar: the name of a column that identifies each subject (or matched subjects)

  • na.rm: a boolean that indicates whether to ignore NA’s

  • conf.interval: the percent range of the confidence interval (default is 95%)

  • dataset: if NULL (default), will produce a new dataset. If this is set to “old” it adds the confidence intervals and groupmean into the original dataframe, rather than into a new one

summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
                            idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE, dataset = NULL) {
  
  # Ensure that the betweenvars and withinvars are factors
  factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
                       FUN=is.factor, FUN.VALUE=logical(1))
  
  if (!all(factorvars)) {
    nonfactorvars <- names(factorvars)[!factorvars]
    message("Automatically converting the following non-factors to factors: ",
            paste(nonfactorvars, collapse = ", "))
    data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
  }
  
  # Get the means from the un-normed data
  datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
                     na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
  
  # Drop all the unused columns (these will be calculated with normed data)
  datac$sd <- NULL
  datac$se <- NULL
  datac$ci <- NULL
  
  # Norm each subject's data
  ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)
  
  # This is the name of the new column
  measurevar_n <- paste(measurevar, "_norm", sep="")
  
  # Collapse the normed data - now we can treat between and within vars the same
  ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
                      na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
  
  # Apply correction from Morey (2008) to the standard error and confidence interval
  #  Get the product of the number of conditions of within-S variables
  nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
                                  FUN.VALUE=numeric(1)))
  correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )
  
  # Apply the correction factor
  ndatac$sd <- ndatac$sd * correctionFactor
  ndatac$se <- ndatac$se * correctionFactor
  ndatac$ci <- ndatac$ci * correctionFactor
  
  # Combine the un-normed means with the normed results
  merge(datac, ndatac) -> data2
  
  if (!is.null(dataset)){ # if we want the info in the same dataset:
    colnames(data2)[colnames(data2)==measurevar] <- "groupmean"
    out = merge(data,data2)
    return(out)
    
  } else { # we want a new dataset
    return(data2)
  }
  
}