library("vegan") library("ggplot2") library("plyr") library("reshape2") library("randomcoloR") library("limma") library("ggsignif") library("BiodiversityR") source("load_samples_2.R") #BURST version of OTU Table source("stats.R") ####qPCR basic analyses#### m.qpcr <- m.all[rownames(qpcr.norm),] qpcr_milk <- qpcr.norm qpcr_milk$Site <- NA qpcr_milk$Site <- m.qpcr$Site #useful for calculaing number of positive samples via qPCR m.qpcr <- m.all[rownames(qpcr_milk_nopaci),] qpcr_milk_nopaci$Site <- NA qpcr_milk_nopaci$Site <- m.qpcr$Site qpcr_milk_nopaci_summary <- ddply(qpcr_milk_nopaci, .(Site), summarize, Cq = mean(MeanCq), SD = sd(MeanCq)) TukeyHSD(aov(MeanCq ~ Site, data=qpcr_milk_nopaci)) #Statistics of different qPCR comparisons by sites ####Sequence counts analyses#### seq_counts_nicu_milk_nopaci <- as.data.frame(allsamples[rownames(nicu_milk_nopaci),]) seq_counts_nicu_milk_nopaci$SeqCount <- rowSums(seq_counts_nicu_milk_nopaci) seq_counts_nicu_milk_nopaci$Site <- NA seq_counts_nicu_milk_nopaci$Site <- m.nicu_milk_nopaci$Site seq_counts_nicu_milk_nopaci$SampleID <- NA seq_counts_nicu_milk_nopaci$SampleID <- rownames(seq_counts_nicu_milk_nopaci) seq_counts_nicu_milk_nopaci$Patient <- NA seq_counts_nicu_milk_nopaci$Patient <- m.nicu_milk_nopaci$Patient seq_counts_nicu_milk_nopaci <- seq_counts_nicu_milk_nopaci[,seq(ncol(seq_counts_nicu_milk_nopaci)-3, ncol(seq_counts_nicu_milk_nopaci), 1)] seq_counts_site <- ddply(seq_counts_nicu_milk_nopaci, .(Site), summarize, SeqCount = sum(SeqCount)) seq_counts_mean_site <- ddply(seq_counts_nicu_milk_nopaci, .(Site), summarize, MeanSeqCount = mean(SeqCount), SD = sd(SeqCount)) #FIGURE 1# ggplot(seq_counts_nicu_milk_nopaci, aes(x = Site, y = SeqCount)) + geom_boxplot() + ylab("# Sequence Counts") + theme(panel.background = element_blank(), panel.border = element_blank(), axis.line = element_line(colour = "black")) + scale_x_discrete(limits=c("IncubatorHandle","Keyboard","Mouse","Stethoscope","Milk")) #Figure 1# ggsave(filename = "Figure 1.tiff", units = "in", width = 5.5, height = 4.5, dpi = 300) #END FIGURE 1# TukeyHSD(aov(SeqCount ~ Site, data=seq_counts_nicu_milk_nopaci)) #Statistics of site comparisons #Sequence counts for all samples, including negative controls, etc. seq_counts_all <- as.data.frame(allsamples) seq_counts_all$SeqCount <- rowSums(seq_counts_all) seq_counts_all$Site <- NA seq_counts_all$Site <- m.all[rownames(seq_counts_all), c("Site")] seq_counts_all$SampleID <- NA seq_counts_all$SampleID <- rownames(seq_counts_all) seq_counts_all$Patient <- NA seq_counts_all$Patient <-m.all[rownames(seq_counts_all), c("Patient")] seq_counts_all <- seq_counts_all[,seq(ncol(seq_counts_all)-3, ncol(seq_counts_all), 1)] seq_counts_site <- ddply(seq_counts_all, .(Site), summarize, SeqCount = sum(SeqCount)) seq_counts_mean_site <- ddply(seq_counts_all, .(Site), summarize, MeanSeqCount = mean(SeqCount), SD = sd(SeqCount)) ####Removing rare/singleton taxa#### cutoff <- 0.1 nicu_milk_nopaci <- nicu_milk_nopaci[,colSums(nicu_milk_nopaci) > 0] nicu_milk_nopaci <- nicu_milk_nopaci[,colSums(nicu_milk_nopaci) > cutoff] ####Abundances#### #Top abundant and prevalent taxa for all samples# prev <- list(1:ncol(nicu_milk_nopaci)) for(i in 1:ncol(nicu_milk_nopaci)){ prev[i] <- sum(nicu_milk_nopaci[,i] > 0) } names(prev) <- paste0(colnames(nicu_milk_nopaci)) sort(unlist(prev)) #Most prevalent taxa sort(colMeans(nicu_milk_nopaci)) #Most abundant taxa ####Comparing all environmental samples against milk#### m.nicu_vs_milk <- m.nicu_milk_nopaci m.nicu_vs_milk$Milk <- NA milk <- m.nicu_vs_milk$Site == c("Milk") m.nicu_vs_milk[milk,c("Milk")] <- c("Y") m.nicu_vs_milk[!milk,c("Milk")] <- c("N") nicu_milk_only <- nicu_milk_nopaci[rownames(m.nicu_vs_milk[m.nicu_vs_milk$Milk == "Y",]),] nicu_milk_only <- nicu_milk_only[,colSums(nicu_milk_only) > 0] nicu_no_milk <- nicu_milk_nopaci[rownames(m.nicu_vs_milk[m.nicu_vs_milk$Milk == "N",]),] nicu_no_milk <- nicu_no_milk[,colSums(nicu_no_milk) > 0] #Top abundant and prevalent taxa for milk or non milk only# prevmilk <- list(1:ncol(nicu_milk_only)) for(i in 1:ncol(nicu_milk_only)){ prevmilk[i] <- sum(nicu_milk_only[,i] > 0) } names(prevmilk) <- paste0(colnames(nicu_milk_only)) sort(unlist(prevmilk)) #Most prevalent taxa sort(colMeans(nicu_milk_only)) #Most abundant taxa prevnomilk <- list(1:ncol(nicu_no_milk)) for(i in 1:ncol(nicu_no_milk)){ prevnomilk[i] <- sum(nicu_no_milk[,i] > 0) } names(prevnomilk) <- paste0(colnames(nicu_no_milk)) sort(unlist(prevnomilk)) #Most prevalent taxa sort(colMeans(nicu_no_milk)) #Most abundant taxa #qPCR m.nicu_vs_milk.q <- m.nicu_vs_milk[rownames(qpcr_milk_nopaci),] t.test(qpcr_milk_nopaci$MeanCq ~ Milk, data=m.nicu_vs_milk.q) #Comparing mean Cq t.test(qpcr_milk_nopaci$SD ~ Milk, data=m.nicu_vs_milk.q) #Comparing SD aggregate(qpcr_milk_nopaci$MeanCq ~ Site, data=m.nicu_vs_milk.q, FUN=mean) #Shows means of each site independently #Sequence analysis t.test(seq_counts_nicu_milk_nopaci$SeqCount ~ Milk, data=m.nicu_vs_milk, alternative = "less")