content

My count data is countdatacsv; df is the count data with categories. mymetadata is my metadata. I am trying to put the df and mymetedata into omu_summary, a function that gives t-test statistics and even folding analysis. I am following this vignette: https://cran.r-project.org/web/packages/omu/vignettes/Omu_vignette.html

# their example (below) works fine
# omu package included c57_nos2KO_mouse_metadata and c57_nos2KO_mouse_countDF
# assigning hierarchy characterizes the metabolites:
DF <- assign_hierarchy(count_data = c57_nos2KO_mouse_countDF, keep_unknowns = TRUE, identifier = "KEGG")
theirsummary <- omu_summary(count_data = DF, metadata = c57_nos2KO_mouse_metadata, numerator = "Strep", denominator = "Mock", response_variable = "Metabolite", Factor = "Treatment", log_transform = TRUE, p_adjust = "BH", test_type = "welch")

manually getting stats

cleancountdata <- countdata %>% gather(key = "plot", value = "count", M_C_4_AB:O_H_32_AB) %>% mutate(type = gsub("_[0-9][0-9]*_AB","",plot)) %>% mutate(Min.Org = gsub("_[A-Z]","",type)) %>% unite(metabplot, c("Metabolite","Min.Org")) %>% mutate(metabplot = gsub("_2B", "", metabplot)) %>% mutate(type = gsub("_2B","", type))

cleancountdata[cleancountdata == 0] <- 1
cleancountdata$count <- log(cleancountdata$count)
metab.stat <- cleancountdata %>%
  group_by(metabplot) %>%
  t_test(count ~ type) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
# summary; mean and st.dev:
MCmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "M_C") %>%
  summarise(meanC = mean(count), st.devC = sd(count)) %>% select(-type)
MHmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "M_H") %>%
  summarise(meanH = mean(count), st.devH = sd(count)) %>% select(-type)
OCmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "O_C") %>%
  summarise(meanC = mean(count), st.devC = sd(count)) %>% select(-type)
OHmean <- cleancountdata %>% group_by(metabplot, type) %>% filter(type == "O_H") %>%
  summarise(meanH = mean(count), st.devH = sd(count)) %>% select(-type)
Minerals <- merge(MCmean, MHmean) %>% mutate(Horizon = "Mineral")
Organics <- merge(OCmean, OHmean) %>% mutate(Horizon = "Organic")
means <- rbind(Minerals, Organics) 
rm(MCmean, MHmean, OCmean,OHmean, Minerals, Organics)
# combine summary and stats:
metab.stat.sum <- merge(metab.stat, means, by = "metabplot", all=TRUE)
metab.stat.sum$foldchange <- metab.stat.sum$meanC/metab.stat.sum$meanH
metab.stat.sum$log2foldchange <- log2(metab.stat.sum$foldchange)
metab.stat.sum$Metabolite <- gsub("_[A-Z]","",metab.stat.sum$metabplot)
# KEGG and heirarchy matching:
KEGG_ID <- read_csv("KEGG.ID.csv")
df <- merge(metab.stat.sum, KEGG_ID, by = "Metabolite", all = TRUE)
df <- as.data.frame(assign_hierarchy(df, keep_unknowns = TRUE, identifier = "KEGG"))
sig <- df[which(df$p <= 0.05),]
head(sig)
##               Metabolite               metabplot   .y. group1 group2 n1 n2
## 2   1,3-dihydroxyacetone  1,3-dihydroxyacetone_O count    O_C    O_H  6  5
## 4        1,4-D-xylobiose       1,4-D-xylobiose_O count    O_C    O_H  6  5
## 9  3-amino-2-piperidone* 3-amino-2-piperidone*_M count    M_C    M_H  6 10
## 10 3-amino-2-piperidone* 3-amino-2-piperidone*_O count    O_C    O_H  6  5
## 12 3-hydroxybutyric acid 3-hydroxybutyric acid_O count    O_C    O_H  6  5
## 20               arbutin               arbutin_O count    O_C    O_H  6  5
##    statistic       df     p     p.adj p.adj.signif    meanC   st.devC     meanH
## 2   3.382217 6.737232 0.012 0.2194400           ns 12.75862 0.3366425 12.252514
## 4   2.445262 6.920443 0.045 0.3274138           ns 12.43513 0.5207342 11.458238
## 9   2.842661 9.752112 0.018 0.2532000           ns 13.28931 0.4594043 12.639614
## 10  2.746490 8.992850 0.023 0.2978824           ns 15.53493 0.6496346 14.545110
## 12  2.960833 8.178347 0.018 0.2532000           ns 13.44891 0.3515654 12.945662
## 20  2.971323 4.667778 0.034 0.3222545           ns 13.80208 2.0688713  4.748944
##      st.devH Horizon foldchange log2foldchange   KEGG         Class
## 2  0.1323567 Organic   1.041307     0.05839494 C00184 Carbohydrates
## 4  0.7563356 Organic   1.085257     0.11803613 C01630 Carbohydrates
## 9  0.4130559 Mineral   1.051402     0.07231438   <NA>          <NA>
## 10 0.5456451 Organic   1.068051     0.09498117   <NA>          <NA>
## 12 0.2035877 Organic   1.038874     0.05502055 C01089 Organic acids
## 20 6.5459311 Organic   2.906347     1.53920720 C06186          <NA>
##          Subclass_1              Subclass_2 Subclass_3 Subclass_4
## 2   Monosaccharides                 Ketoses       none       none
## 4  Oligosaccharides           Disaccharides       none       none
## 9              <NA>                    <NA>       <NA>       <NA>
## 10             <NA>                    <NA>       <NA>       <NA>
## 12 Carboxylic acids Hydroxycarboxylic acids       none       none
## 20             <NA>                    <NA>       <NA>       <NA>
# just significant metabolites
# write.csv(df, "Metabolites.withstat.KEGG.hierarchy.foldchange.csv")
df$meancountdifference <- df$meanH - df$meanC
makeStars <- function(x){
  stars <- c("*", "ns")
  vec <- c(0, 0.05 )
  i <- findInterval(x, vec)
  stars[i]
}
sig.legend <- function(x){
  label <- c("H>C", "H<C")
  vec <- c(-10, 0 )
  i <- findInterval(x, vec)
  label[i]
}
df$p.category <- makeStars(df$p)
df[which(df$p.category == "*"),28] <- sig.legend(df[which(df$p.category == "*"),26])
df[is.na(df$V28),28] <- "H=C"
names(df)[names(df) == "V28"] <- "legend"

I found 62 deferentially significant metabolites in both layers with 31 being in the Organic layer while 31 being in the Mineral layer.

fold change

ggplot(metab.stat.sum) + geom_point(aes(x=log2foldchange, y=-log(p.adj), color = p.adj.signif)) + theme(legend.position = "none",
              plot.title = element_text(size = rel(1.5), hjust = 0.5),
              axis.title = element_text(size = rel(1.25)))

Significant Change in metabolite abundance

figure <- df[which(df$Metabolite %in% sig$Metabolite),]
figure <- figure[order(figure$Class),]
figure$Metabolite <- factor(figure$Metabolite, levels = unique(figure$Metabolite))
datatable(figure)
ggplot(figure, aes(x=Metabolite, y=meancountdifference, color = legend)) + geom_point(stat="identity") + ylab("average count difference (C-H)") + xlab("Metabolite") + coord_flip() + ggtitle("Significant Metabolites") + facet_grid(~Horizon) + 
    scale_color_manual(values=c('dodgerblue3','gray','magenta2'))

Edited Figure