Select Git revision
supervised_pipeline_aux_functions.R

Lucas Miranda authored
supervised_pipeline_aux_functions.R 19.92 KiB
##################################################
## Project: Functions
## Date: 23.12.2021
## Author: Joeri Bordes
###################################################
# Bar graph production
barplot_standard <- function(data,xdata, ydata, y_axe_limits, y_axe_breaks, palette)
{
ggplot(data, aes(x=xdata, y=ydata, fill = xdata)) +
stat_summary(fun = "mean", geom = "bar", width = barwidth) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", fun.args = list(mult = sem_amount), width = errorbar_width, size = line_sizes, alpha = errorbar_alpha) +
# geom_point(size = 1) +
theme_light() +
scale_fill_manual(values=palette) +
theme(legend.position="right") +
theme_classic() +
labs(y = ylabel_name, title = yplot) +
theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
scale_y_continuous(expand = c(0, 0), breaks = y_axe_breaks) +
coord_cartesian(ylim=y_axe_limits) +
theme(axis.line = element_line(colour = 'black', size = line_sizes),
axis.ticks.y = element_line(size = line_sizes)) +
theme(legend.position = "none") +
theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"),
axis.title.y = element_text(size = y_title),
axis.title.x=element_blank(),
axis.text.x=element_text(size = x_y_labelsize, color = "black"),
axis.ticks.x = element_line(size = line_sizes)) +
scale_x_discrete(labels=c("Nonstressed" = "NS", "Stressed" = "CSDS")) +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
)
}
# Timeline graph
timeline_graph_standard <- function(data, ydata, y_axe_limits, y_axe_breaks, palette)
{
ggplot(data, aes(x=BW_day, y=ydata, fill = condition)) +
stat_summary(fun.min = function(x) mean(x) - se(x), fun.max = function(x) mean(x) + se(x), geom = "ribbon", aes(group = condition), alpha = geomribbon_alpha) +
scale_fill_manual(values=palette) +
geom_line(stat = "summary", fun = "mean", size=line_plotsize, aes(color = condition, group = condition)) +
geom_point(stat = "summary", fun = "mean", size = point_thick) +
theme_light() +
scale_color_manual(values=palette) +
theme_classic() +
theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"),
axis.title.y = element_text(size = y_title),
axis.title.x = element_text(size = y_title),
axis.text.x=element_text(size = x_y_labelsize, color = "black"),
axis.ticks.x = element_line(size = line_sizes)) +
labs(y = ylabel_name, x = xlabel_name, title = yplot) +
theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
scale_y_continuous(expand = c(0, 0), breaks = y_axe_breaks) +
coord_cartesian(ylim=y_axe_limits) +
theme(axis.line = element_line(colour = 'black', size = line_sizes),
axis.ticks.y = element_line(size = line_sizes)) +
theme(legend.position = "top", legend.title = element_blank(), legend.text = element_text(size = x_y_labelsize)) +
scale_x_discrete(labels=xlabels) +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
#legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
)
}
# PCA graph
PCAplot_standard <- function(data, xdata, ydata, label, labelnames, y_axe_limits, y_axe_breaks, x_axe_limits, x_axe_breaks, palette)
{
ggplot(data, aes(x=xdata, y=ydata, label=label, colour = label)) +
stat_ellipse(linetype = 1, level = 0.95, size = 0.8) +
geom_point(size = 3) +
xlab(paste("PC1 - ", labelnames[1], "%", sep="")) +
ylab(paste("PC2 - ", labelnames[2], "%", sep="")) +
theme(legend.title=element_blank()) +
scale_color_manual(values=palette) +
theme(legend.position="right") +
theme_classic() +
theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"),
axis.title.y = element_text(size = y_title),
axis.title.x=element_text(size = y_title),
axis.text.x=element_text(size = x_y_labelsize, color = "black"),
axis.ticks.x = element_line(size = line_sizes)) +
labs(title = yplot) +
theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
scale_y_continuous(expand = c(0, 0), n.breaks = y_axe_breaks) +
coord_cartesian(xlim=x_axe_limits, ylim=y_axe_limits) +
scale_x_continuous(expand = c(0, 0), n.breaks = x_axe_breaks) +
theme(axis.line = element_line(colour = 'black', size = line_sizes),
axis.ticks.y = element_line(size = line_sizes)) +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
# legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
)
}
# Violin plot for pCA analysis
violinplot_standard <- function(data, xdata, ydata, label, y_axe_limits, y_axe_breaks, palette, ylab)
{
ggplot(data, aes(x=xdata, y=ydata, label=label, colour=label)) +
geom_violin(size=.75)+
geom_boxplot(width=0.1)+
geom_jitter(position=position_jitter(0.1)) +
# xlab("Day") +
ylab(ylab) +
scale_color_manual(values=palette) +
theme(legend.title=element_blank()) +
theme_classic() +
theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"),
axis.title.y = element_text(size = y_title),
axis.title.x=element_text(size = y_title),
axis.text.x=element_text(size = x_y_labelsize, color = "black"),
axis.ticks.x = element_line(size = line_sizes)) +
labs(title = yplot) +
theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
coord_cartesian(ylim=y_axe_limits) +
scale_y_continuous(expand = c(0, 0), n.breaks = y_axe_breaks) +
theme(axis.line = element_line(colour = 'black', size = line_sizes),
axis.ticks.y = element_line(size = line_sizes)) +
theme(legend.position = "none") +
theme(axis.title.x=element_blank()) +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
# legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
)
}
# Lollipop plot for correlation analysis
lollipop_corr <- function(data, x_axe_breaks){
ggplot(data, aes(x = abs(cor),
y = row,
fill = Sign)) +
geom_bar(stat = 'identity', width = 0.07) +
geom_point(size = 4, pch = 19, alpha = 1, aes(col = Sign)) +
geom_point(data = subset(corr_zscore2.5_SI, Sig), size = 3, pch = 1, color = 'black', stroke = 2) +
scale_fill_manual(values = palette_posneg, name = '', guide = F) +
scale_color_manual(values = palette_posneg, name = '') +
labs(y = NULL, x = 'Absolute Correlation') +
scale_x_continuous(expand = c(0, 0, 0, 0.1), n.breaks = x_axe_breaks) +
scale_y_discrete(expand = c(0, 0.5, 0, 2)) +
theme(legend.position = "none") +
theme(legend.title=element_blank()) +
theme_classic() +
theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"),
axis.title.y = element_text(size = y_title),
axis.title.x=element_text(size = y_title),
axis.text.x=element_text(size = x_y_labelsize, color = "black"),
axis.ticks.x = element_line(size = line_sizes)) +
# labs(title = yplot) +
theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
theme(axis.line = element_line(colour = 'black', size = line_sizes),
axis.ticks.y = element_line(size = line_sizes)) +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
# legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
)
}
# Lollipop plot for top contributing behaviors in the PCA
lollipop_topcontbeh_standard <- function(data, x_axe_breaks)
{
ggplot(data, aes(x = abs(score),
y = name,
fill = Sign)) +
geom_bar(stat = 'identity', width = 0.05) +
geom_point(size = 2.5, pch = 19, alpha = 1, aes(col = Sign)) +
# scale_fill_manual(values = c(hm.palette(9)[8], hm.palette(9)[1]), name = '', guide = F) +
# scale_color_manual(values = c(hm.palette(9)[8], hm.palette(9)[1]), name = '') +
labs(y = NULL, x = 'PC1 score') +
scale_x_continuous(expand = c(0, 0, 0, 0.1), n.breaks = x_axe_breaks) +
scale_y_discrete(expand = c(0, 0.5, 0, 2)) +
theme(legend.position = "none") +
theme(legend.title=element_blank()) +
theme_classic() +
theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"),
axis.title.y = element_text(size = y_title),
axis.title.x=element_text(size = y_title),
axis.text.x=element_text(size = x_y_labelsize, color = "black"),
axis.ticks.x = element_line(size = line_sizes)) +
# labs(title = yplot) +
theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
theme(axis.line = element_line(colour = 'black', size = line_sizes),
axis.ticks.y = element_line(size = line_sizes)) +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
# legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
)
}
# Scatterplot for correlation data
scatter_standard <- function(data, xdata, ydata, y_axe_limits, y_axe_breaks, xlab, ylab) {
ggscatter(data, x = xdata, y = ydata,
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
cor.coef.size = 6,
cor.coef.coord = c(1, 7.2),
xlab = xlab, ylab = ylab,
color = "black") +
#color = "#A9CAFF") +
theme_classic() +
theme(axis.text.y = element_text(size = x_y_labelsize, color = "black"),
axis.title.y = element_text(size = y_title),
axis.title.x=element_text(size = y_title),
axis.text.x=element_text(size = x_y_labelsize, color = "black"),
axis.ticks.x = element_line(size = line_sizes)) +
labs(title = yplot) +
theme(plot.title = element_text(hjust = title_location, size = title_size, face = "bold")) +
scale_y_continuous(expand = c(0, 0), breaks = y_axe_breaks) +
coord_cartesian(ylim=y_axe_limits) +
theme(axis.line = element_line(colour = 'black', size = line_sizes),
axis.ticks.y = element_line(size = line_sizes)) +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
panel.grid.major = element_blank(), # get rid of major grid
panel.grid.minor = element_blank(), # get rid of minor grid
legend.background = element_rect(fill = "transparent"), # get rid of legend bg
# legend.box.background = element_rect(fill = "transparent") # get rid of legend panel bg
)
}
# Take dataframe as input and spits out all important independent t-test information
summary_ttest <- function(dataframe) {
# Outlier check
stat.outlier <- dataframe %>%
group_by(variables) %>%
identify_outliers(data)
# Check normality of the data: Shapiro
stat.shapiro <- dataframe %>%
group_by(variables) %>%
shapiro_test(data) %>%
add_significance()
# Check equality of the variance: Levene's test
stat.levene <- dataframe %>%
group_by(variables) %>%
levene_test(data ~ condition) %>%
add_significance()
# Parametric test: Independent samples T-test
para_stat.ttest <- dataframe %>%
group_by(variables) %>%
t_test(data ~ condition, var.equal = TRUE) %>%
# adjust_pvalue(method = "BH") %>%
add_significance()
# Parametric test: Welch's T-test
para_stat.welchtest <- dataframe %>%
group_by(variables) %>%
t_test(data ~ condition, var.equal = FALSE) %>%
# adjust_pvalue(method = "BH") %>%
add_significance()
# Non-parametric test: Wilcox test
nonp_stat.test <- dataframe %>%
group_by(variables) %>%
wilcox_test(data ~ condition) %>%
# adjust_pvalue(method = "BH") %>%
add_significance()
All_stats_tot_dur <- merge(stat.shapiro, stat.levene, by = "variables")
All_stats_tot_dur <- merge(All_stats_tot_dur, para_stat.ttest, by = "variables")
All_stats_tot_dur <- merge(All_stats_tot_dur, para_stat.welchtest, by = "variables")
All_stats_tot_dur <- merge(All_stats_tot_dur, nonp_stat.test, by = "variables")
return(All_stats_tot_dur)
}
# Take dataframe as input and spits out all important ANOVA information
stats_anova <- function(dataframe,grouping1,grouping2, measurement, ID) {
colnames(dataframe)[which(names(dataframe) == grouping1)] <- "grouping1"
colnames(dataframe)[which(names(dataframe) == grouping2)] <- "grouping2"
colnames(dataframe)[which(names(dataframe) == measurement)] <- "measurement"
# summary
summary <- dataframe %>%
group_by(grouping1, grouping2) %>%
get_summary_stats(measurement, type = "mean_sd")
# Outlier check
outlier <- dataframe %>%
group_by(grouping1, grouping2) %>%
identify_outliers(measurement)
# Check normality of the data: Shapiro
shapiro <- dataframe %>%
group_by(grouping1, grouping2) %>%
shapiro_test(measurement) %>%
add_significance()
# General test effect: two-way repeated measures ANOVA
anova <- anova_test((measurement) ~ grouping1*grouping2, data = dataframe) %>%
add_significance()
# Post-hoc-analysis parametric
# Effect of the condition (NS vs Stressed) at each time point
t_test_bonferroni <- dataframe %>%
group_by(grouping2) %>%
anova_test(measurement ~ grouping1) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
# Kruskal-Wallis test for independent samples non-parametric
kruskal_nonp <- dataframe %>%
group_by(grouping2) %>%
kruskal_test(measurement ~ grouping1) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
total_list <- list(summary = summary, outlier = outlier, shapiro = shapiro, anova = anova, t_test_bonferroni = t_test_bonferroni, kruskal_nonp = kruskal_nonp )
return(total_list)
}
# Take dataframe as input and spits out all important ANOVA information
stats_anova_alternative <- function(dataframe,grouping1,grouping2, measurement, ID) {
colnames(dataframe)[which(names(dataframe) == grouping1)] <- "grouping1"
colnames(dataframe)[which(names(dataframe) == grouping2)] <- "grouping2"
colnames(dataframe)[which(names(dataframe) == measurement)] <- "measurement"
# summary
summary <- dataframe %>%
group_by(grouping1, grouping2) %>%
get_summary_stats(measurement, type = "mean_sd")
# Outlier check
outlier <- dataframe %>%
group_by(grouping1, grouping2) %>%
identify_outliers(measurement)
# Check normality of the data: Shapiro
shapiro <- dataframe %>%
group_by(grouping1, grouping2) %>%
shapiro_test(measurement) %>%
add_significance()
# General test effect: two-way repeated measures ANOVA
anova <- aov((measurement) ~ grouping1*grouping2, data = dataframe)
anova <- anova_summary(anova)
# Post-hoc-analysis parametric
# Effect of the condition (NS vs Stressed) at each time point
t_test_bonferroni <- dataframe %>%
group_by(grouping2) %>%
anova_test(measurement ~ grouping1) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
# Kruskal-Wallis test for independent samples non-parametric
kruskal_nonp <- dataframe %>%
group_by(grouping2) %>%
kruskal_test(measurement ~ grouping1) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
total_list <- list(summary = summary, outlier = outlier, shapiro = shapiro, anova = anova, t_test_bonferroni = t_test_bonferroni, kruskal_nonp = kruskal_nonp )
return(total_list)
}
# Old function for anova do not use!
stats_anova_old <- function(dataframe,grouping1,grouping2, measurement, ID) {
# summary
summary <- dataframe %>%
group_by(!! sym(grouping1), !! sym(grouping2)) %>%
get_summary_stats(!! sym(measurement), type = "mean_sd")
# # Outlier check
outlier <- dataframe %>%
group_by(!! sym(grouping1), !! sym(grouping2)) %>%
identify_outliers(!! sym(measurement))
# Check normality of the data: Shapiro
shapiro <- dataframe %>%
group_by(!! sym(grouping1), !! sym(grouping2)) %>%
shapiro_test(!! sym(measurement)) %>%
add_significance()
# General test effect: two-way repeated measures ANOVA
anova <- anova_test(
data = dataframe, dv = measurement, wid = ID,
between = c(grouping1, grouping2)) %>% add_significance()
anova <- get_anova_table(anova)
# Post-hoc-analysis: effect of the condition (NS vs Stressed) at each time point
t_test_bonferroni <- dataframe %>%
group_by(!! sym(grouping2)) %>%
anova_test(dv = measurement, between = c(grouping1)) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
# General test effect: Kruskal-Wallis test for independent samples non-parametric
res.kruskal1 <- dataframe %>%
group_by(!! sym(grouping2)) %>%
kruskal_test(!! sym(dataframe ~ grouping1)) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
total_list <- list(summary = summary, outlier = outlier, shapiro = shapiro, anova = anova, t_test_bonferroni = t_test_bonferroni)
return(total_list)
}
# ++++++++++++++++++++++++++++
# flattenCorrMatrix
# ++++++++++++++++++++++++++++
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}