library(tidyverse)
library(here)
library(ropls)
library(cowplot)
library(vegan)
library(broom)
library(readxl)
library(chemhelper) #install with devtools::install_github("Aariq/chemhelper")
library(latex2exp)
options(scipen = 999)
#create a "not in" function
#useful for filtering out outliers
`%!in%` <- Negate(`%in%`)
#detect if a column has at least one non-zero value. Probably other ways to do this.
not_empty <- function(x) !is.double(x) || is.double(x) && sum(x) != 0
#function to select columns that are NOT double, i.e. integer, factor, and character
not_double <- function(x) !is.double(x)
# After fixing some mistakes in the Ion Analytics methods file, the FINAL output.
gc_zeroes_long.1 <- read_rds(here("cleaned_data", "GCMS_tidy_zeroes.rds"))
gc_log.1 <- read_rds(here("cleaned_data", "GCMS_wide_loged.rds"))
PCA shows the best separation by herbivory treatment in the 100%, ambient rainfall treatment. There is no obvious separation other than that. This indicates that plants responded to MeJA spray, but only in the 100% rainfall treatment.
gc_pca <- opls(select_if(gc_log.1, is.double), plotL = FALSE, scaleC = "standard")
PCA
45 samples x 128 variables
standard scaling of predictors
plot(gc_pca, parLabVc = gc_log.1$sample, parAsColFcVn = paste0(gc_log.1$Treatment, gc_log.1$Spray))
Character 'parAsColFcVn' set to a factor
gc_pca_scores <- gc_pca %>% getScoreMN() %>% as.data.frame()
gc_pca_scores <- bind_cols(select_if(gc_log.1, not_double), gc_pca_scores)
gc_pca_scores <- gc_pca_scores %>% mutate(treatcombo = paste(Treatment, Spray))
build the base plot
# calculate data for convex hulls
hulldata<- hullfn(gc_pca_scores, "p1", "p2", "treatcombo") #in chemhelper
# set shapes and colors
trtcols <- c("100%" = "blue", "75%" = "darkgreen", "50%" = "darkorange3")
sprayshapes <- c("MeJA" = 17, "Control" = 20)
gc_pca.plot <- ggplot(gc_pca_scores, aes(x = p1, y = p2,shape = Spray, color = Treatment)) +
geom_point(size = 2) +
stat_ellipse(aes(group = Treatment:Spray, linetype = Spray), type = "t") + # Hotelling T^2 distribution
# Uncomment to see convex hulls. Pattern is less obvious because the outlines all overlap.
#geom_polygon(data = hulldata, aes(linetype = Spray, fill = Treatment), alpha = 0.3) +
scale_linetype_manual(values = c(1, 2), name = NULL) +
scale_shape_manual(values = sprayshapes, name = "Simulated Herbivory") +
scale_color_manual(values = trtcols, name = "% Ambient Rainfall") +
guides(color = guide_legend(override.aes = list(shape = 15, size = 4, linetype = 0))) +
xlab(paste0("PC1 (", signif(gc_pca@modelDF$R2X[1]*100, digits = 2), "%)")) +
ylab(paste0("PC2 (", signif(gc_pca@modelDF$R2X[2]*100, digits = 2), "%)")) +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 9)) +
theme_bw()
# gc_pca.plot
Add custom legend
base <- gc_pca.plot + theme(legend.position = "none") #suppress legend
# Create some dummy data for legend
legend.df <- data.frame(x = c(10, 10), y = c(85, 80), label = c("MeJA", "Control"))
legend.df2 <- data.frame(x = c(10, 10, 10), y = c(65, 60, 55), label = c("100%", "75%", "50%"))
legend <- ggplot(legend.df) +
xlim(0,100) +
ylim(0,100) +
theme_nothing() + #comment this line out to get a better idea of what is happening
# Simulated herbivory legend
geom_text(aes(x = 10, y = 90, label = "Simulated Herbivory"), #title
hjust = "left",
size = 3.5,
data = data.frame(), inherit.aes = FALSE) +
geom_text(aes(x = x, y = y, label = label), #labels
hjust = "inward", nudge_x = 11,
size = 3.0,
data = legend.df, inherit.aes = FALSE) +
geom_point(aes(x = x, y = y, shape = label), #shapes
position = position_nudge(x = 8),
data = legend.df, inherit.aes = FALSE) +
geom_segment(aes(x = x, y = y, xend = x + 5, yend = y, linetype = label), #ellipse lines
position = position_nudge(x = 1),
data = legend.df, inherit.aes = FALSE) +
scale_shape_manual(values = sprayshapes) +
scale_linetype_manual(values = c(2, 1)) +
# Drought treatment legend
geom_text(aes(x = 10, y = 70, label = "% Ambient Rainfall"), #title
hjust = "left",
size = 3.5,
data = data.frame(),
inherit.aes = FALSE) +
geom_text(aes(x = x, y = y, label = label), #labels
hjust = "inward",
nudge_x = 5,
size = 3,
data = legend.df2, inherit.aes = FALSE) +
geom_point(aes(x = x, y = y, color = label), #color swatches
position = position_nudge(x = 2.3),
shape = 15,
size = 3.5,
data = legend.df2, inherit.aes = FALSE) +
scale_color_manual(values = trtcols)
legend
gc_pca.plot2 <- base + draw_plot(legend, -8, 5.8, width = 1, height = 1, scale = 28)
gc_pca.plot2
save_plot(here("figs", "GCMS PCA.png"), gc_pca.plot2, base_aspect_ratio = 1.3, base_height = 5)
PERMANOVA confirms what is seen in the PCA. There is a significant main effect of MeJA treatment on volatile profile and a significant interaction between drought treatment and MeJA treatment.
None of the vegan
functions support scaling with and argument. I need to scale the data ahead of time.
gc_log_scale <- gc_log.1 %>% mutate_if(is.double, ~chem_scale(.,center = TRUE, scale = "auto"))
PERMANOVA assumes homogneity of variances in multivariate space.
#multivariate analog of Levene's Test for homogeneity of variances--an assumption of PERMANOVA
PERMDIST <- betadisper(vegdist(select_if(gc_log_scale, is.double), method = "eu"), group = paste(gc_log_scale$Treatment, gc_log_scale$Spray), type = "centroid")
#plot(PERMDIST)
#PERMDIST
anova(PERMDIST)
Analysis of Variance Table
Response: Distances
Df Sum Sq Mean Sq F value Pr(>F)
Groups 5 15.96 3.1921 0.4863 0.7843
Residuals 39 255.97 6.5635
#meets the assumption of homogeneity of dispersion if p > 0.05
adonis(select_if(gc_log_scale, is.double) ~ Treatment*Spray,
method = "euclidean",
by = "terms",
data = gc_log_scale)
Call:
adonis(formula = select_if(gc_log_scale, is.double) ~ Treatment * Spray, data = gc_log_scale, method = "euclidean", by = "terms")
Permutation: free
Number of permutations: 999
Terms added sequentially (first to last)
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
Treatment 2 241.1 120.54 1.0042 0.04280 0.440
Spray 1 391.9 391.86 3.2648 0.06958 0.001 ***
Treatment:Spray 2 317.9 158.97 1.3245 0.05645 0.061 .
Residuals 39 4681.1 120.03 0.83116
Total 44 5632.0 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Spray and interaction are significant, matching what is seen in the PCA
library(pairwiseAdonis)
Loading required package: cluster
pairwise.adonis(select_if(gc_log_scale, is.double), factors = gc_log_scale$Treatment:gc_log_scale$Spray,
sim.method = "euclidean",
p.adjust.m = "none") %>%
filter(pairs %in% c("50%:MeJA vs 50%:Control", "75%:Control vs 75%:MeJA", "100%:MeJA vs 100%:Control")) %>%
mutate(p.adjusted = p.adjust(p.value, "fdr"))
I can also try to use PLS-DA to confirm that there is an effect of MeJA, no main effect of drought, and an interaction between MeJA and drought treatment.
There is significant separation of volatile profiles by MeJA spray. Q2 and R2 are both high and similar to eachother, and p < 0.05.
gc_plsda_spray <- opls(select_if(gc_log.1, is.double), gc_log.1$Spray,
permI = 1000,
scaleC = "standard",
plotL = FALSE)
PLS-DA
45 samples x 128 variables and 1 response
standard scaling of predictors and response(s)
# plot(gc_plsda_spray)
Plot
gc_plsda_sprayPD <- get_plotdata(gc_plsda_spray)
Joining, by = "sample"
ggplot(gc_plsda_sprayPD$plot_data, aes(x = p1, y = p2, shape = y1, color = gc_log.1$Treatment)) +
stat_ellipse(aes(x = p1, y = p2, linetype = y1), inherit.aes = FALSE) +
geom_point(size = 2.5) +
labs(x = paste0("P1 (", gc_plsda_sprayPD$axis_stats$R2X[1]*100, "%)"),
y = paste0("P2 (", gc_plsda_sprayPD$axis_stats$R2X[2]*100, "%)")) +
ggtitle("PLS-DA on full data set",
subtitle = paste0("R^2 = ", gc_plsda_sprayPD$model_stats$`R2Y(cum)`,
" Q^2 = ", gc_plsda_sprayPD$model_stats$`Q2(cum)`,
" p(Q^2) = ", gc_plsda_sprayPD$model_stats$pQ2)) +
scale_shape_manual("Simulated\nHerbivory", values = sprayshapes) +
scale_linetype("Simulated\nHerbivory") +
scale_color_manual("% Ambient\nRainfall", values = trtcols) +
theme_bw()
get_VIP(gc_plsda_spray) %>% #from chemhelper
rename(Compound = "Variable") %>%
arrange(desc(VIP)) %>%
filter(VIP > 1)
There is no significant separation by drought treatment (no model is built because first component is not significant).
gc_plsda_drought <- try(
opls(select_if(gc_log.1, is.double), gc_log.1$Treatment,
permI = 100,
scaleC = "standard",
plotL = FALSE)
)
Error : No model was built because the first predictive component was already not significant;
Select a number of predictive components of 1 if you want the algorithm to compute a model despite this.
There is no significant separation when using all 6 groups (every drought x MeJA combination). A single-component model is built, but the R2 and Q2 are extremely low, so there is essentially no predictive or explanatory power of the model.
gc_plsda_combo <- try(
opls(select_if(gc_log.1, is.double), paste(gc_log.1$Treatment, gc_log.1$Spray),
permI = 100,
scaleC = "standard",
plotL = FALSE)
)
PLS-DA
45 samples x 128 variables and 1 response
standard scaling of predictors and response(s)
Table 1 will be only the metabolites with VIP > 1, arranged by VIP from gc_plsda_spray
showing mean RPA * 1000 for each treatment combination.
VIP_spray <- get_VIP(gc_plsda_spray) %>% rename(Compound = "Variable")
alkanes <- read_csv(here("data", "GC alkanes.csv"))
Parsed with column specification:
cols(
Compound = col_character(),
`Actual RT` = col_double(),
`Expected RT` = col_double(),
RT = col_double(),
C_num = col_integer()
)
gc_ri <- gc_zeroes_long.1 %>%
mutate(RI = map_dbl(RT, ~calc_RI(., alkanes$RT, alkanes$C_num))) %>% #calculate RI based on alkane file
group_by(Compound) %>%
summarise(mean_RI = mean(RI, na.rm = TRUE)) #calculate mean RI for each compound
Add in ± SE here.
gc_summary <- gc_zeroes_long.1 %>%
mutate(Combo = paste0(Treatment," rain, ", Spray) %>% # set up column headers
fct_relevel("100% rain, Control",
"75% rain, Control",
"50% rain, Control",
"100% rain, MeJA",
"75% rain, MeJA",
"50% rain, MeJA")) %>%
group_by(Combo, Compound) %>%
summarise(mean_RPAx1000 = mean(RPA * 1000), # calculate mean RPA x 1000 (just to make it more readable)
se_RPAx1000 = sd(RPA * 1000)/n()) %>% # calculate SEM
mutate(`RPA ± SE` = ifelse(mean_RPAx1000 == 0, NA, # paste RPA ± SEM
paste0(round(mean_RPAx1000, 3), " ± ", round(se_RPAx1000, 2)))) %>%
select(-mean_RPAx1000, -se_RPAx1000) %>%
spread(key = Combo, value = `RPA ± SE`)
gc_table <- left_join(gc_summary, VIP_spray)
Joining, by = "Compound"
gc_table <- left_join(gc_table, gc_ri)
Joining, by = "Compound"
gc_table
annotations <- read_excel(here("data", "Compound annotation.xlsx"))
gc_table.1 <- left_join(gc_table,
annotations %>%
select(Compound, `Pretty Name`, LaTeX)) #start simple
Joining, by = "Compound"
Unknowns in the method are numbered based on some order they were found. Rename based on RI for publication.
gc_table.2 <- gc_table.1 %>%
mutate(unknown = str_detect(Compound, "^\\d+($| \\(DCSE\\)$)")) # unknowns are either all numbers or "<num> (DCSE)"
unknowns <- gc_table.2 %>% # extract just the unknowns and arrange by RI
filter(unknown) %>%
arrange(mean_RI) %>%
add_column(unk_name = 1:nrow(.)) #add a column to give them new numbers
gc_table.3 <- right_join(unknowns, gc_table.2) %>% #join back with original table
mutate(`Pretty Name` = ifelse(unknown == TRUE, unk_name, `Pretty Name`)) #use the new numbers only for unknowns
Joining, by = c("Compound", "100% rain, Control", "75% rain, Control", "50% rain, Control", "100% rain, MeJA", "75% rain, MeJA", "50% rain, MeJA", "VIP", "mean_RI", "Pretty Name", "LaTeX", "unknown")
gc_table.3
#use pretty names
gc_table.4 <- gc_table.3 %>%
mutate(Compound = ifelse(is.na(`Pretty Name`), Compound, `Pretty Name`)) %>%
select(-`Pretty Name`, -unknown, -unk_name) %>%
rename(RI = mean_RI) %>%
arrange(desc(VIP)) %>%
filter(VIP>1) %>%
select(Compound, RI, everything())
write_excel_csv(gc_table.4 %>% select(-LaTeX), here("figs", "Table 1.csv"), na = "-")
# Create table of RIs
tableS1 <- gc_table.3 %>%
select(-unknown, -unk_name, -VIP) %>%
rename(RI = mean_RI) %>%
arrange(RI)
# Read in CAS numbers and join
CASnums <- annotations %>% select(Compound, CAS)
tableS1 <- left_join(tableS1, CASnums)
# Read in reference standard data and join
referencestds <- read_csv(here("data", "Reference Standards.csv")) %>%
select(`Compound Name`, CAS, Vendor, `RI DB5`) %>%
filter(!is.na(CAS))
tableS1 <- left_join(tableS1, referencestds)
# Read in literature RIs and join
master <- read_csv(here("data", "method_master RIs.csv"))
litRIs <- master %>%
select(Compound, CAS, RI_lit)
tableS1 <- left_join(tableS1, litRIs)
# Remove CAS numbers from unknowns (they were put by Ion Analytics)
tableS1 %>% filter(str_detect(`Pretty Name`, "^\\d+$"))
tableS1 <- tableS1 %>%
mutate(CAS = ifelse(str_detect(`Pretty Name`, "^\\d+$"), NA, CAS))
# Rename and select columns and write to .csv
tableS1 <- tableS1 %>%
select(Compound = `Pretty Name`, CAS, experimental_RI = RI, reference_RI = `RI DB5`, Vendor, literature_RI = RI_lit )
write_excel_csv(tableS1, here("figs", "Table S1.csv"), na = "-")
tableS1 %>% filter(is.na(CAS)) %>% summarize(unknowns = n())
tableS1 %>% filter(!is.na(CAS)) %>% summarize(identified = n())
tableS1 %>% filter(!is.na(CAS), is.na(reference_RI)) %>% summarize(putative = n())
tableS1 %>% filter(!is.na(CAS), !is.na(reference_RI)) %>% summarize(reference = n())
gc_log.1 %>%
gather(-sample, -Treatment, -Spray, -Plot, -Plant, key = Compound, value = RPA) %>%
filter(Compound == "Methyl salicylate") %>%
ggplot(aes(x = Treatment, color = Spray, y = RPA)) +
geom_jitter(width = 0.2) +
labs(x = "Precipitation treatment", y = "log(RPA)")
gc_zeroes_long.1 %>%
filter(Compound == "Methyl salicylate") %>%
group_by(Treatment, Spray) %>%
summarize(detected = sum(RPA!=0))
gc_log.1 %>% group_by(Spray) %>%
summarize(count = n())