Skip to content
Snippets Groups Projects
Select Git revision
  • 0ecd92648eaa50f5850b11655edcd43a69d50016
  • master default protected
2 results

supervised_pipeline_aux_functions.R

Blame
  • supervised_pipeline_aux_functions.R 19.92 KiB
    ##################################################
    ## Project: Functions 
    ## Date: 23.12.2021
    ## Author: Joeri Bordes
    ###################################################
    
    
    # Bar graph production
      
    barplot_standard <- function(data,xdata, ydata, y_axe_limits, y_axe_breaks, palette)
    {
      ggplot(data, aes(x=xdata, y=ydata, fill = xdata)) +
        stat_summary(fun = "mean", geom = "bar", width = barwidth) +
        stat_summary(fun.data = mean_cl_normal, geom = "errorbar", fun.args = list(mult = sem_amount), width = errorbar_width, size = line_sizes, alpha = errorbar_alpha) +
        #  geom_point(size = 1) +
        theme_light() +
        scale_fill_manual(values=palette) +
        theme(legend.position="right") +
        theme_classic() +
        labs(y = ylabel_name, title = yplot) +
        theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
        scale_y_continuous(expand = c(0, 0), breaks = y_axe_breaks) +
        coord_cartesian(ylim=y_axe_limits) +
        theme(axis.line = element_line(colour = 'black', size = line_sizes),
            axis.ticks.y = element_line(size = line_sizes)) +
        theme(legend.position = "none") +
        theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"), 
              axis.title.y = element_text(size = y_title), 
              axis.title.x=element_blank(),
              axis.text.x=element_text(size = x_y_labelsize, color = "black"),
              axis.ticks.x = element_line(size = line_sizes)) +
        scale_x_discrete(labels=c("Nonstressed" = "NS", "Stressed" = "CSDS")) +
        theme(
          panel.background = element_rect(fill = "transparent"), # bg of the panel
          plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
          panel.grid.major = element_blank(), # get rid of major grid
          panel.grid.minor = element_blank(), # get rid of minor grid
          legend.background = element_rect(fill = "transparent"), # get rid of legend bg
          legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
        ) 
    }
    
    
    # Timeline graph
    
    timeline_graph_standard <- function(data, ydata, y_axe_limits, y_axe_breaks, palette)
    {
      ggplot(data, aes(x=BW_day, y=ydata, fill = condition)) +
        stat_summary(fun.min = function(x) mean(x) - se(x), fun.max = function(x) mean(x) + se(x), geom = "ribbon", aes(group = condition), alpha = geomribbon_alpha) +
        scale_fill_manual(values=palette) +
        geom_line(stat = "summary", fun = "mean", size=line_plotsize, aes(color = condition, group = condition)) +
        geom_point(stat = "summary", fun = "mean", size = point_thick) +
        theme_light() +
        scale_color_manual(values=palette) +
        theme_classic() +
        theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"), 
              axis.title.y = element_text(size = y_title), 
              axis.title.x = element_text(size = y_title),
              axis.text.x=element_text(size = x_y_labelsize, color = "black"),
              axis.ticks.x = element_line(size = line_sizes)) +
        labs(y = ylabel_name, x = xlabel_name, title = yplot) +
        theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
        scale_y_continuous(expand = c(0, 0), breaks = y_axe_breaks) +
        coord_cartesian(ylim=y_axe_limits) +
        theme(axis.line = element_line(colour = 'black', size = line_sizes),
              axis.ticks.y = element_line(size = line_sizes)) +
        theme(legend.position = "top", legend.title = element_blank(), legend.text = element_text(size = x_y_labelsize)) +
        scale_x_discrete(labels=xlabels) +
        theme(
          panel.background = element_rect(fill = "transparent"), # bg of the panel
          plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
          panel.grid.major = element_blank(), # get rid of major grid
          panel.grid.minor = element_blank(), # get rid of minor grid
          legend.background = element_rect(fill = "transparent"), # get rid of legend bg
          #legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
        )
      }
    
    # PCA graph
    
    PCAplot_standard <- function(data, xdata, ydata, label, labelnames, y_axe_limits, y_axe_breaks, x_axe_limits, x_axe_breaks, palette)
    {
      ggplot(data, aes(x=xdata, y=ydata, label=label, colour = label)) +
        stat_ellipse(linetype = 1, level = 0.95, size = 0.8)  +
        geom_point(size = 3) +
        xlab(paste("PC1 - ", labelnames[1], "%", sep="")) +
        ylab(paste("PC2 - ", labelnames[2], "%", sep="")) +
        theme(legend.title=element_blank()) +
        scale_color_manual(values=palette) +
        theme(legend.position="right") +
        theme_classic() +
        theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"), 
              axis.title.y = element_text(size = y_title), 
              axis.title.x=element_text(size = y_title),
              axis.text.x=element_text(size = x_y_labelsize, color = "black"),
              axis.ticks.x = element_line(size = line_sizes)) +
        labs(title = yplot) +
        theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
        scale_y_continuous(expand = c(0, 0), n.breaks = y_axe_breaks) +
        coord_cartesian(xlim=x_axe_limits, ylim=y_axe_limits) + 
        scale_x_continuous(expand = c(0, 0), n.breaks = x_axe_breaks) +
        theme(axis.line = element_line(colour = 'black', size = line_sizes),
              axis.ticks.y = element_line(size = line_sizes)) +
        theme(
          panel.background = element_rect(fill = "transparent"), # bg of the panel
          plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
          panel.grid.major = element_blank(), # get rid of major grid
          panel.grid.minor = element_blank(), # get rid of minor grid
          legend.background = element_rect(fill = "transparent"), # get rid of legend bg
          #   legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
        )
      
    }
    
    
    # Violin plot for pCA analysis
    
    violinplot_standard <- function(data, xdata, ydata, label, y_axe_limits, y_axe_breaks, palette, ylab)
    {
      ggplot(data, aes(x=xdata, y=ydata, label=label, colour=label)) +
        geom_violin(size=.75)+
        geom_boxplot(width=0.1)+
        geom_jitter(position=position_jitter(0.1)) +
        # xlab("Day") +
        ylab(ylab) +
        scale_color_manual(values=palette) +
        theme(legend.title=element_blank()) +
        theme_classic() +
        theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"), 
              axis.title.y = element_text(size = y_title), 
              axis.title.x=element_text(size = y_title),
              axis.text.x=element_text(size = x_y_labelsize, color = "black"),
              axis.ticks.x = element_line(size = line_sizes)) +
        labs(title = yplot) +
        theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
        coord_cartesian(ylim=y_axe_limits) +
        scale_y_continuous(expand = c(0, 0), n.breaks = y_axe_breaks) +
        theme(axis.line = element_line(colour = 'black', size = line_sizes),
              axis.ticks.y = element_line(size = line_sizes)) +
        theme(legend.position = "none") +
        theme(axis.title.x=element_blank()) +
        theme(
          panel.background = element_rect(fill = "transparent"), # bg of the panel
          plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
          panel.grid.major = element_blank(), # get rid of major grid
          panel.grid.minor = element_blank(), # get rid of minor grid
          legend.background = element_rect(fill = "transparent"), # get rid of legend bg
          #   legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
        )
      
      
    }
    
    
    # Lollipop plot for correlation analysis
    lollipop_corr <- function(data, x_axe_breaks){
      ggplot(data, aes(x = abs(cor),
                       y = row,
                       fill = Sign)) +
        geom_bar(stat = 'identity', width = 0.07) +
        geom_point(size = 4, pch = 19, alpha = 1, aes(col = Sign)) +
        geom_point(data = subset(corr_zscore2.5_SI, Sig), size = 3, pch = 1, color = 'black', stroke = 2) +
        scale_fill_manual(values = palette_posneg, name = '', guide = F) +
        scale_color_manual(values = palette_posneg, name = '') +
        labs(y = NULL, x = 'Absolute Correlation') + 
        scale_x_continuous(expand = c(0, 0, 0, 0.1), n.breaks = x_axe_breaks) +
        scale_y_discrete(expand = c(0, 0.5, 0, 2)) +
        theme(legend.position = "none") +
        theme(legend.title=element_blank()) +
        theme_classic() +
        theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"), 
              axis.title.y = element_text(size = y_title), 
              axis.title.x=element_text(size = y_title),
              axis.text.x=element_text(size = x_y_labelsize, color = "black"),
              axis.ticks.x = element_line(size = line_sizes)) +
        # labs(title = yplot) +
        theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
        theme(axis.line = element_line(colour = 'black', size = line_sizes),
              axis.ticks.y = element_line(size = line_sizes)) +
        theme(
          panel.background = element_rect(fill = "transparent"), # bg of the panel
          plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
          panel.grid.major = element_blank(), # get rid of major grid
          panel.grid.minor = element_blank(), # get rid of minor grid
          legend.background = element_rect(fill = "transparent"), # get rid of legend bg
          #   legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
        )
    }
    
    
    
    
    
    
    
    # Lollipop plot for top contributing behaviors in the PCA
    lollipop_topcontbeh_standard <- function(data, x_axe_breaks)
    {
      ggplot(data, aes(x = abs(score),
                                         y = name,
                                         fill = Sign)) +
      geom_bar(stat = 'identity', width = 0.05) +
      geom_point(size = 2.5, pch = 19, alpha = 1, aes(col = Sign)) +
      # scale_fill_manual(values = c(hm.palette(9)[8], hm.palette(9)[1]), name = '', guide = F) +
      #  scale_color_manual(values = c(hm.palette(9)[8], hm.palette(9)[1]), name = '') +
      labs(y = NULL, x = 'PC1 score') + 
      scale_x_continuous(expand = c(0, 0, 0, 0.1), n.breaks = x_axe_breaks) +
      scale_y_discrete(expand = c(0, 0.5, 0, 2)) +
      theme(legend.position = "none") +
      theme(legend.title=element_blank()) +
      theme_classic() +
      theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"), 
            axis.title.y = element_text(size = y_title), 
            axis.title.x=element_text(size = y_title),
            axis.text.x=element_text(size = x_y_labelsize, color = "black"),
            axis.ticks.x = element_line(size = line_sizes)) +
      # labs(title = yplot) +
      theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
      theme(axis.line = element_line(colour = 'black', size = line_sizes),
            axis.ticks.y = element_line(size = line_sizes)) +
      theme(
        panel.background = element_rect(fill = "transparent"), # bg of the panel
        plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
        panel.grid.major = element_blank(), # get rid of major grid
        panel.grid.minor = element_blank(), # get rid of minor grid
        legend.background = element_rect(fill = "transparent"), # get rid of legend bg
        #   legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
      )
    }
    
    
    
    # Scatterplot for correlation data
    scatter_standard <- function(data, xdata, ydata, y_axe_limits, y_axe_breaks, xlab, ylab) {
      ggscatter(data, x = xdata, y = ydata, 
                add = "reg.line", conf.int = TRUE, 
                cor.coef = TRUE, cor.method = "pearson",
                cor.coef.size = 6,
                cor.coef.coord = c(1, 7.2),
                xlab = xlab, ylab = ylab,
                color = "black") +
        #color = "#A9CAFF") +
        theme_classic() +
        theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"), 
              axis.title.y = element_text(size = y_title), 
              axis.title.x=element_text(size = y_title),
              axis.text.x=element_text(size = x_y_labelsize, color = "black"),
              axis.ticks.x = element_line(size = line_sizes)) +
        labs(title = yplot) +
        theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
        scale_y_continuous(expand = c(0, 0), breaks = y_axe_breaks) +
        coord_cartesian(ylim=y_axe_limits) + 
        theme(axis.line = element_line(colour = 'black', size = line_sizes),
              axis.ticks.y = element_line(size = line_sizes)) +
        theme(
          panel.background = element_rect(fill = "transparent"), # bg of the panel
          plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
          panel.grid.major = element_blank(), # get rid of major grid
          panel.grid.minor = element_blank(), # get rid of minor grid
          legend.background = element_rect(fill = "transparent"), # get rid of legend bg
          #   legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
        )
    }
    
    
    # Take dataframe as input and spits out all important independent t-test information
    summary_ttest <- function(dataframe) {
      # Outlier check
      stat.outlier <- dataframe %>%
        group_by(variables) %>%
        identify_outliers(data)
      # Check normality of the data: Shapiro
      stat.shapiro <- dataframe %>%
        group_by(variables) %>%
        shapiro_test(data) %>%
        add_significance()
      # Check equality of the variance: Levene's test
      stat.levene <- dataframe %>%
        group_by(variables) %>%
        levene_test(data ~ condition) %>%
        add_significance()
      # Parametric test: Independent samples T-test
      para_stat.ttest <- dataframe %>%
        group_by(variables) %>%
        t_test(data ~ condition, var.equal = TRUE) %>%
        #  adjust_pvalue(method = "BH") %>%
        add_significance()
      # Parametric test: Welch's T-test
      para_stat.welchtest <- dataframe %>%
        group_by(variables) %>%
        t_test(data ~ condition, var.equal = FALSE) %>%
        #  adjust_pvalue(method = "BH") %>%
        add_significance()
      # Non-parametric test: Wilcox test
      nonp_stat.test <- dataframe %>%
        group_by(variables) %>%
        wilcox_test(data ~ condition) %>%
        #  adjust_pvalue(method = "BH") %>%
        add_significance()
      All_stats_tot_dur <- merge(stat.shapiro, stat.levene,  by = "variables")
      All_stats_tot_dur <- merge(All_stats_tot_dur, para_stat.ttest,  by = "variables")
      All_stats_tot_dur <- merge(All_stats_tot_dur, para_stat.welchtest,  by = "variables")
      All_stats_tot_dur <- merge(All_stats_tot_dur, nonp_stat.test,  by = "variables")
      
      
      return(All_stats_tot_dur)  
    }
    
    
    
    
    
    
    
    # Take dataframe as input and spits out all important ANOVA information
    stats_anova <- function(dataframe,grouping1,grouping2, measurement, ID) { 
      
      colnames(dataframe)[which(names(dataframe) == grouping1)] <- "grouping1"
      colnames(dataframe)[which(names(dataframe) == grouping2)] <- "grouping2"
      colnames(dataframe)[which(names(dataframe) == measurement)] <- "measurement"
      # summary
      summary <- dataframe %>%
        group_by(grouping1, grouping2) %>%
        get_summary_stats(measurement, type = "mean_sd")
      # Outlier check
      outlier <- dataframe %>%
        group_by(grouping1, grouping2) %>%
        identify_outliers(measurement)
      # Check normality of the data: Shapiro
      shapiro <- dataframe %>%
        group_by(grouping1, grouping2) %>%
        shapiro_test(measurement) %>%
        add_significance()
      # General test effect: two-way repeated measures ANOVA
      anova <- anova_test((measurement) ~ grouping1*grouping2, data = dataframe) %>%
        add_significance()
      # Post-hoc-analysis parametric
      # Effect of the condition (NS vs Stressed) at each time point
      t_test_bonferroni <- dataframe %>%
        group_by(grouping2) %>%
        anova_test(measurement ~ grouping1) %>%
        get_anova_table() %>%
        adjust_pvalue(method = "bonferroni") %>%
        add_significance()
      # Kruskal-Wallis test for independent samples non-parametric
      kruskal_nonp <- dataframe %>%
        group_by(grouping2) %>%
        kruskal_test(measurement ~ grouping1) %>%
        adjust_pvalue(method = "bonferroni") %>%
        add_significance()
      total_list <- list(summary = summary, outlier = outlier, shapiro = shapiro, anova = anova, t_test_bonferroni = t_test_bonferroni, kruskal_nonp = kruskal_nonp )
      
      
      return(total_list)  
    }
    
    
    
    
    # Take dataframe as input and spits out all important ANOVA information
    stats_anova_alternative <- function(dataframe,grouping1,grouping2, measurement, ID) { 
      
      colnames(dataframe)[which(names(dataframe) == grouping1)] <- "grouping1"
      colnames(dataframe)[which(names(dataframe) == grouping2)] <- "grouping2"
      colnames(dataframe)[which(names(dataframe) == measurement)] <- "measurement"
      # summary
      summary <- dataframe %>%
        group_by(grouping1, grouping2) %>%
        get_summary_stats(measurement, type = "mean_sd")
      # Outlier check
      outlier <- dataframe %>%
        group_by(grouping1, grouping2) %>%
        identify_outliers(measurement)
      # Check normality of the data: Shapiro
      shapiro <- dataframe %>%
        group_by(grouping1, grouping2) %>%
        shapiro_test(measurement) %>%
        add_significance()
      # General test effect: two-way repeated measures ANOVA
      anova <- aov((measurement) ~ grouping1*grouping2, data = dataframe)
      anova <- anova_summary(anova)
      # Post-hoc-analysis parametric
      # Effect of the condition (NS vs Stressed) at each time point
      t_test_bonferroni <- dataframe %>%
        group_by(grouping2) %>%
        anova_test(measurement ~ grouping1) %>%
        get_anova_table() %>%
        adjust_pvalue(method = "bonferroni") %>%
        add_significance()
      # Kruskal-Wallis test for independent samples non-parametric
      kruskal_nonp <- dataframe %>%
        group_by(grouping2) %>%
        kruskal_test(measurement ~ grouping1) %>%
        adjust_pvalue(method = "bonferroni") %>%
        add_significance()
      total_list <- list(summary = summary, outlier = outlier, shapiro = shapiro, anova = anova, t_test_bonferroni = t_test_bonferroni, kruskal_nonp = kruskal_nonp )
      
      
      return(total_list)  
    }
    
    
    
    
    
    # Old function for anova do not use!
    stats_anova_old <- function(dataframe,grouping1,grouping2, measurement, ID) { 
      # summary
      summary <- dataframe %>%
        group_by(!! sym(grouping1), !! sym(grouping2)) %>%
        get_summary_stats(!! sym(measurement), type = "mean_sd")
      # # Outlier check
      outlier <- dataframe %>%
        group_by(!! sym(grouping1), !! sym(grouping2)) %>%
        identify_outliers(!! sym(measurement))
      # Check normality of the data: Shapiro
      shapiro <- dataframe %>%
        group_by(!! sym(grouping1), !! sym(grouping2)) %>%
        shapiro_test(!! sym(measurement)) %>%
        add_significance()
      # General test effect: two-way repeated measures ANOVA
      anova <- anova_test(
        data = dataframe, dv = measurement, wid = ID,
        between = c(grouping1, grouping2)) %>% add_significance()
      anova <- get_anova_table(anova)
      # Post-hoc-analysis: effect of the condition (NS vs Stressed) at each time point
      t_test_bonferroni <- dataframe %>%
        group_by(!! sym(grouping2)) %>%
        anova_test(dv = measurement, between = c(grouping1)) %>%
        get_anova_table() %>%
        adjust_pvalue(method = "bonferroni") %>%
        add_significance()
      # General test effect: Kruskal-Wallis test for independent samples non-parametric
      res.kruskal1 <- dataframe %>%
        group_by(!! sym(grouping2)) %>%
        kruskal_test(!! sym(dataframe ~ grouping1)) %>%
        adjust_pvalue(method = "bonferroni") %>%
        add_significance()
      total_list <- list(summary = summary, outlier = outlier, shapiro = shapiro, anova = anova, t_test_bonferroni = t_test_bonferroni)
      
      
      return(total_list)  
    }
    
    
    
    # ++++++++++++++++++++++++++++
    # flattenCorrMatrix
    # ++++++++++++++++++++++++++++
    # cormat : matrix of the correlation coefficients
    # pmat : matrix of the correlation p-values
    flattenCorrMatrix <- function(cormat, pmat) {
      ut <- upper.tri(cormat)
      data.frame(
        row = rownames(cormat)[row(cormat)[ut]],
        column = rownames(cormat)[col(cormat)[ut]],
        cor  =(cormat)[ut],
        p = pmat[ut]
      )
    }