library(tidyverse)
[30m── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[30m[32m✔[30m [34mggplot2[30m 3.0.0 [32m✔[30m [34mpurrr [30m 0.2.5
[32m✔[30m [34mtibble [30m 1.4.2 [32m✔[30m [34mdplyr [30m 0.7.6
[32m✔[30m [34mtidyr [30m 0.8.1 [32m✔[30m [34mstringr[30m 1.3.1
[32m✔[30m [34mreadr [30m 1.1.1 [32m✔[30m [34mforcats[30m 0.3.0[39m
[30m── [1mConflicts[22m ──────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(here)
here() starts at /Users/scottericr/Documents/Tufts/Research Projects/BACE Tea/Data for drought by herbivory in tea
library(ggridges)
Attaching package: ‘ggridges’
The following object is masked from ‘package:ggplot2’:
scale_discrete_manual
library(gglabeller)
library(ropls)
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.5-2
library(cowplot)
Attaching package: ‘cowplot’
The following object is masked from ‘package:ggplot2’:
ggsave
library(chemhelper) #install with devtools::install_github("Aariq/chemhelper")
library(broom)
library(ggrepel)
library(HDoutliers)
Loading required package: FNN
Loading required package: FactoMineR
Loading required package: mclust
__ ___________ __ _____________
/ |/ / ____/ / / / / / ___/_ __/
/ /|_/ / / / / / / / /\__ \ / /
/ / / / /___/ /___/ /_/ /___/ // /
/_/ /_/\____/_____/\____//____//_/ version 5.4.1
Type 'citation("mclust")' for citing this R package in publications.
Attaching package: ‘mclust’
The following object is masked from ‘package:purrr’:
map
#create a "not in" function
#useful for filtering out outliers
`%!in%` <- Negate(`%in%`)
#detect if a column has at least one non-zero value. Probably other ways to do this.
isnt_empty <- function(x) !is.double(x) || is.double(x) && sum(x) != 0
#function to select columns that are NOT double, i.e. integer, factor, and character
not_double <- function(x) !is.double(x)
lc <- read_rds(here("cleaned_data", "lcms.rds"))
lc <- lc %>% rename(conc = `mean_leaf_conc(µg/mg)`)
lc
Non-detects are represented in this dataset as NA
. I’ll set them to 0 for multivariate analysis.
lc2 <- lc %>% mutate_if(is.double, funs(ifelse(is.na(.), 0 , .)))
to_remove <- lc2 %>%
filter(`Under LOQ` == TRUE) %>%
arrange(Compound) %>%
group_by(Compound) %>%
summarise(count = n()) %>%
filter(count == 53) %>%
.$Compound
lc3 <- lc2 %>%
filter(!Compound %in% to_remove)
horiz_scatter <- function(){
list(
geom_point(alpha = 0.5),
coord_flip(),
labs(x = "Compound Name"),
scale_color_brewer(type = "qual", palette = 6)
)
}
p <- ggplot(lc3, aes(x = Compound, y = conc, label = unique.id)) + horiz_scatter() + ylab("Concentration (µg/mg)") + theme_bw()
Interactive, not run by default.
Created using chunk above interactively and copying code from p.lab$code
:
gglabeller_data <- p$data
gglabeller_data[c(1:95, 97:100, 102:121, 123, 125:143, 147:154, 156:333, 335:371),'unique.id'] <- ''
p + geom_text_repel(data = gglabeller_data,mapping = aes(x = Compound, y = conc, label = unique.id), min.segment.length = unit(0.5, 'lines'),box.padding = unit(0.25, 'lines'),point.padding = unit(1e-06, 'lines'))
c9 and d3 appear to be possible outliers.
lc_wide <- lc3 %>%
select(unique.id, Spray, Treatment, Compound, conc) %>%
spread(key = Compound, value = conc) %>%
ungroup()
lc_wide
lc_pca <- opls(select_if(lc_wide, is.double), plotL = FALSE, scaleC = "standard")
PCA
53 samples x 7 variables
standard scaling of predictors
plot(lc_pca, parAsColFcVn = lc_wide$Treatment, parLabVc = lc_wide$unique.id)
outliers <- HDoutliers(select_if(lc_wide, is.double), transform = FALSE)
lc_wide[outliers, ]
d3 and c9 come out as outliers. This confirms univariate plots. Remove them
outliers <- c("d3", "c9")
lc_wide2 <- lc_wide %>% filter(unique.id %!in% outliers)
lc_pca2 <- opls(lc_wide2 %>% select_if(is.double), plotL = FALSE, scaleC = "standard")
PCA
51 samples x 7 variables
standard scaling of predictors
plot(lc_pca2, parAsColFcVn = lc_wide2$Treatment, parLabVc = lc_wide2$unique.id)
No clear separation
I’d love to streamline this exact plot. Maybe I can write a function for it sometime.
#extract scores and make into data frame
lc_pca2_scores <- lc_pca2 %>% getScoreMN() %>%
as_data_frame()
#rejoin with sample info
lc_pca2_scores <- bind_cols(select_if(lc_wide2, not_double), lc_pca2_scores)
lc_pca2_scores <- lc_pca2_scores %>%
mutate(treatcombo = paste(Treatment, Spray))
# set up colors and shapes
sprayshapes <- c("MeJA" = 17, "Control" = 20)
trtcols <- c("100%" = "blue", "75%" = "darkgreen", "50%" = "darkorange3")
#make base plot
lc_pca2_plot <- ggplot(lc_pca2_scores, aes(x = p1, y = p2, shape = Spray, color = Treatment)) +
geom_point(size = 2) +
stat_ellipse(aes(group = Treatment:Spray, linetype = Spray), type = "t") +
scale_shape_manual(values = sprayshapes) +
scale_color_manual(values = trtcols) +
guides(color = guide_legend(override.aes = list(shape = 15, size = 4, linetype = 0))) +
xlab(paste0("PC1 (", signif(lc_pca2@modelDF$R2X[1]*100, digits = 2), "%)")) +
ylab(paste0("PC2 (", signif(lc_pca2@modelDF$R2X[2]*100, digits = 2), "%)")) +
#xlim(-9, 8) + ylim(-5, 18) +
theme_bw() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 9))
Make the legend manually and plot inside the main plot.
base <- lc_pca2_plot +
theme(legend.position = "none") + #suppress legend
ylim(-5, 7) #expand axes to make room for custom legend
# Create some dummy data for legend
legend.df <- data.frame(x = c(10, 10), y = c(85, 80), label = c("MeJA", "Control"))
legend.df2 <- data.frame(x = c(10, 10, 10), y = c(65, 60, 55), label = c("100%", "75%", "50%"))
legend <- ggplot(legend.df) +
xlim(0,100) +
ylim(0,100) +
theme_nothing() + #comment this line out to get a better idea of what is happening
# Simulated herbivory legend
geom_text(aes(x = 10, y = 90, label = "Simulated Herbivory"), #title
hjust = "left",
size = 3.5,
data = data.frame(), inherit.aes = FALSE) +
geom_text(aes(x = x, y = y, label = label), #labels
hjust = "inward", nudge_x = 11,
size = 3.0,
data = legend.df, inherit.aes = FALSE) +
geom_point(aes(x = x, y = y, shape = label), #shapes
position = position_nudge(x = 8),
data = legend.df, inherit.aes = FALSE) +
geom_segment(aes(x = x, y = y, xend = x + 5, yend = y, linetype = label), #ellipse lines
position = position_nudge(x = 1),
data = legend.df, inherit.aes = FALSE) +
scale_shape_manual(values = sprayshapes) +
scale_linetype_manual(values = c(2, 1)) +
# Drought treatment legend
geom_text(aes(x = 10, y = 70, label = "% Ambient Rainfall"), #title
hjust = "left",
size = 3.5,
data = data.frame(),
inherit.aes = FALSE) +
geom_text(aes(x = x, y = y, label = label), #labels
hjust = "inward",
nudge_x = 5,
size = 3,
data = legend.df2, inherit.aes = FALSE) +
geom_point(aes(x = x, y = y, color = label), #color swatches
position = position_nudge(x = 2.3),
shape = 15,
size = 3.5,
data = legend.df2, inherit.aes = FALSE) +
scale_color_manual(values = trtcols)
legend
#overlay legend on base plot
lc_pca_plot <- base + draw_plot(legend, -1.4, 2.4, width = 1, height = 1, scale = 11)
lc_pca_plot
save_plot(here("figs", "LCMS PCA.png"), lc_pca_plot, base_aspect_ratio = 1.3, base_height = 5)
lc_wide2_scale <- lc_wide2 %>% mutate_if(is.double, scale)
#multivariate analog of Levene's Test for homogeneity of variances--an assumption of PERMANOVA
PERMDIST <- betadisper(vegdist(select_if(lc_wide2_scale, is.double), method = "eu"), group = lc_wide2_scale$Treatment, type = "centroid")
plot(PERMDIST)
PERMDIST
Homogeneity of multivariate dispersions
Call: betadisper(d = vegdist(select_if(lc_wide2_scale, is.double), method = "eu"), group =
lc_wide2_scale$Treatment, type = "centroid")
No. of Positive Eigenvalues: 7
No. of Negative Eigenvalues: 0
Average distance to centroid:
100% 75% 50%
2.534 2.221 2.538
Eigenvalues for PCoA axes:
PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 <NA>
111.9964 91.0814 59.3914 39.3388 23.6779 20.6940 3.8200 NA
anova(PERMDIST)
Analysis of Variance Table
Response: Distances
Df Sum Sq Mean Sq F value Pr(>F)
Groups 2 1.1277 0.56383 0.9509 0.3936
Residuals 48 28.4612 0.59294
#if p < 0.05 then doesn't pass test.
Passes
adonis2(select_if(lc_wide2_scale, is.double) ~ Treatment*Spray, method = "euclidean", by = "terms", data = lc_wide2_scale)
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
adonis2(formula = select_if(lc_wide2_scale, is.double) ~ Treatment * Spray, data = lc_wide2_scale, method = "euclidean", by = "terms")
Df SumOfSqs R2 F Pr(>F)
Treatment 2 19.04 0.05440 1.3374 0.196
Spray 1 2.24 0.00641 0.3151 0.914
Treatment:Spray 2 8.37 0.02392 0.5880 0.807
Residual 45 320.34 0.91527
Total 50 350.00 1.00000
Treatment has biggest effect, but not significant unless no data censoring. Indicating its low concentration compounds that are effected.