content
My count data is countdatacsv; df is the count data with categories. mymetadata is my metadata. I am trying to put the df and mymetedata into omu_summary, a function that gives t-test statistics and even folding analysis. I am following this vignette: https://cran.r-project.org/web/packages/omu/vignettes/Omu_vignette.html
# their example (below) works fine
# omu package included c57_nos2KO_mouse_metadata and c57_nos2KO_mouse_countDF
# assigning hierarchy characterizes the metabolites:
DF <- assign_hierarchy(count_data = c57_nos2KO_mouse_countDF, keep_unknowns = TRUE, identifier = "KEGG")
theirsummary <- omu_summary(count_data = DF, metadata = c57_nos2KO_mouse_metadata, numerator = "Strep", denominator = "Mock", response_variable = "Metabolite", Factor = "Treatment", log_transform = TRUE, p_adjust = "BH", test_type = "welch")
manually getting stats
cleancountdata <- countdata %>% gather(key = "plot", value = "count", M_C_4_AB:O_H_32_AB) %>% mutate(type = gsub("_[0-9][0-9]*_AB","",plot)) %>% mutate(Min.Org = gsub("_[A-Z]","",type)) %>% unite(metabplot, c("Metabolite","Min.Org")) %>% mutate(metabplot = gsub("_2B", "", metabplot)) %>% mutate(type = gsub("_2B","", type))
cleancountdata[cleancountdata == 0] <- 1
cleancountdata$count <- log(cleancountdata$count)
metab.stat <- cleancountdata %>%
group_by(metabplot) %>%
t_test(count ~ type) %>%
adjust_pvalue(method = "BH") %>%
add_significance()
# summary; mean and st.dev:
MCmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "M_C") %>%
summarise(meanC = mean(count), st.devC = sd(count)) %>% select(-type)
MHmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "M_H") %>%
summarise(meanH = mean(count), st.devH = sd(count)) %>% select(-type)
OCmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "O_C") %>%
summarise(meanC = mean(count), st.devC = sd(count)) %>% select(-type)
OHmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "O_H") %>%
summarise(meanH = mean(count), st.devH = sd(count)) %>% select(-type)
Minerals <- merge(MCmean, MHmean) %>% mutate(Horizon = "Mineral")
Organics <- merge(OCmean, OHmean) %>% mutate(Horizon = "Organic")
means <- rbind(Minerals, Organics)
rm(MCmean, MHmean, OCmean,OHmean, Minerals, Organics)
# combine summary and stats:
metab.stat.sum <- merge(metab.stat, means, by = "metabplot", all=TRUE)
metab.stat.sum$foldchange <- metab.stat.sum$meanC/metab.stat.sum$meanH
metab.stat.sum$log2foldchange <- log2(metab.stat.sum$foldchange)
metab.stat.sum$Metabolite <- gsub("_[A-Z]","",metab.stat.sum$metabplot)
# KEGG and heirarchy matching:
KEGG_ID <- read_csv("KEGG.ID.csv")
df <- merge(metab.stat.sum, KEGG_ID, by = "Metabolite", all = TRUE)
df <- as.data.frame(assign_hierarchy(df, keep_unknowns = TRUE, identifier = "KEGG"))
sig <- df[which(df$p <= 0.05),]
head(sig)
## Metabolite metabplot .y. group1 group2 n1 n2
## 2 1,3-dihydroxyacetone 1,3-dihydroxyacetone_O count O_C O_H 6 5
## 4 1,4-D-xylobiose 1,4-D-xylobiose_O count O_C O_H 6 5
## 9 3-amino-2-piperidone* 3-amino-2-piperidone*_M count M_C M_H 6 10
## 10 3-amino-2-piperidone* 3-amino-2-piperidone*_O count O_C O_H 6 5
## 12 3-hydroxybutyric acid 3-hydroxybutyric acid_O count O_C O_H 6 5
## 20 arbutin arbutin_O count O_C O_H 6 5
## statistic df p p.adj p.adj.signif meanC st.devC meanH
## 2 3.382217 6.737232 0.012 0.2194400 ns 12.75862 0.3366425 12.252514
## 4 2.445262 6.920443 0.045 0.3274138 ns 12.43513 0.5207342 11.458238
## 9 2.842661 9.752112 0.018 0.2532000 ns 13.28931 0.4594043 12.639614
## 10 2.746490 8.992850 0.023 0.2978824 ns 15.53493 0.6496346 14.545110
## 12 2.960833 8.178347 0.018 0.2532000 ns 13.44891 0.3515654 12.945662
## 20 2.971323 4.667778 0.034 0.3222545 ns 13.80208 2.0688713 4.748944
## st.devH Horizon foldchange log2foldchange KEGG Class
## 2 0.1323567 Organic 1.041307 0.05839494 C00184 Carbohydrates
## 4 0.7563356 Organic 1.085257 0.11803613 C01630 Carbohydrates
## 9 0.4130559 Mineral 1.051402 0.07231438 <NA> <NA>
## 10 0.5456451 Organic 1.068051 0.09498117 <NA> <NA>
## 12 0.2035877 Organic 1.038874 0.05502055 C01089 Organic acids
## 20 6.5459311 Organic 2.906347 1.53920720 C06186 <NA>
## Subclass_1 Subclass_2 Subclass_3 Subclass_4
## 2 Monosaccharides Ketoses none none
## 4 Oligosaccharides Disaccharides none none
## 9 <NA> <NA> <NA> <NA>
## 10 <NA> <NA> <NA> <NA>
## 12 Carboxylic acids Hydroxycarboxylic acids none none
## 20 <NA> <NA> <NA> <NA>
# just significant metabolites
# write.csv(df, "Metabolites.withstat.KEGG.hierarchy.foldchange.csv")
df$meancountdifference <- df$meanH - df$meanC
makeStars <- function(x){
stars <- c("*", "ns")
vec <- c(0, 0.05 )
i <- findInterval(x, vec)
stars[i]
}
sig.legend <- function(x){
label <- c("H>C", "H<C")
vec <- c(-10, 0 )
i <- findInterval(x, vec)
label[i]
}
df$p.category <- makeStars(df$p)
df[which(df$p.category == "*"),28] <- sig.legend(df[which(df$p.category == "*"),26])
df[is.na(df$V28),28] <- "H=C"
names(df)[names(df) == "V28"] <- "legend"
I found 62 deferentially significant metabolites in both layers with 31 being in the Organic layer while 31 being in the Mineral layer.
fold change
ggplot(metab.stat.sum) + geom_point(aes(x=log2foldchange, y=-log(p.adj), color = p.adj.signif)) + theme(legend.position = "none",
plot.title = element_text(size = rel(1.5), hjust = 0.5),
axis.title = element_text(size = rel(1.25)))
Significant Change in metabolite abundance
figure <- df[which(df$Metabolite %in% sig$Metabolite),]
figure <- figure[order(figure$Class),]
figure$Metabolite <- factor(figure$Metabolite, levels = unique(figure$Metabolite))
datatable(figure)
ggplot(figure, aes(x=Metabolite, y=meancountdifference, color = legend)) + geom_point(stat="identity") + ylab("average count difference (C-H)") + xlab("Metabolite") + coord_flip() + ggtitle("Significant Metabolites") + facet_grid(~Horizon) +
scale_color_manual(values=c('dodgerblue3','gray','magenta2'))