POS <- read.csv("Blanchard_Lipid_POS.csv", header=TRUE)
NEG <- read.csv("Blanchard_Lipid_NEG.csv", header=TRUE)
names(POS) <- gsub(x = names(POS), pattern = "_POS", replacement = "")
names(NEG) <- gsub(x=names(POS), pattern="_NEG", replacement="")
POS$charge <- "POS"
NEG$charge <- "NEG"
data <- rbind(POS, NEG) # lipid data with counts
summary_class <- read_csv("significantlipidswithformula.csv")[-1] # made in comparealigned .Rmd
summary_class <- summary_class %>%
rename(Main_Class = "Main Class") #getting rid of the space in column
summary_class <- summary_class[order(summary_class$Main_Class, summary_class$Standardized_Difference_Control_minus_Heated),] #ordering the metadata
df <- merge(summary_class, data, by="row.identity", all=FALSE)
# For the dflong below:
# columns 17-43 is counts
# Blanch_Nat_Lip is redundant
# separate by treatment Control vs Heated
dflong <- df[-c(11:16)] %>%
gather(plot, count,Blanch_Nat_Lip_C_12_AB_M_17:Blanch_Nat_Lip_H_4_AB_M_05) %>%
mutate(plot = gsub("Blanch_Nat_Lip_", "", plot)) %>%
separate(plot, c("treatment","plot"), "_", extra = "merge") %>%
unite("row.id", c(row.identity, soil_type), remove=FALSE) %>%
mutate(row.id = gsub("_Mineral", "", row.id)) %>%
mutate(row.id = gsub("_Organic", "", row.id)) %>%
arrange(soil_type)
dflongMineral <- dflong[which(dflong$soil_type == "Mineral"),]
dflongOrganic <- dflong[which(dflong$soil_type == "Organic"),]
lipidfigureMineral <- ggplot(data=dflongMineral, aes(x=row.id, y=log(count), group_by(Main_Class, treatment), fill = treatment)) + geom_boxplot(show.legend = FALSE) + coord_flip() + scale_fill_manual(values = c( "#082BEA", "#EA0D08")) + scale_color_manual(values = c("#D4E126", "#12EF3A")) + ggtitle("Significant Lipids Heated v. Control Soil Mineral Plots") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
lipidfigureOrganic <- ggplot(data=dflongOrganic, aes(x=row.id, y=log(count), group_by(Main_Class, treatment), fill = treatment)) + geom_boxplot() + coord_flip() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + scale_fill_manual(values = c( "#082BEA", "#EA0D08")) + scale_color_manual(values = c("#D4E126", "#12EF3A")) + ggtitle("Significant Lipids Heated v. Control Soil Organic Plots")
lipidfigure <- ggarrange(lipidfigureMineral, lipidfigureOrganic, ncol=1)
lipidfigure
list.ggplotsbygroup <- dflong %>% group_by(Main_Class) %>% do(plots=ggplot(data=.) + aes(x=row.id, y=log(count)) + geom_boxplot(aes(fill=treatment)) + xlab("compound") + ylab("abundance (logged)") + ggtitle(unique(.$Main_Class)))
DG1 <- list.ggplotsbygroup$plots[[1]] + coord_flip()
DG1
PC2 <- list.ggplotsbygroup$plots[[2]] + coord_flip()
PC2
PE3 <- list.ggplotsbygroup$plots[[3]] + coord_flip()
PE3
DGDG4 <- list.ggplotsbygroup$plots[[4]] + coord_flip()
DGDG4
HexCer5 <- list.ggplotsbygroup$plots[[5]] + coord_flip()
HexCer5
DGTSA6 <- list.ggplotsbygroup$plots[[6]] + coord_flip()
DGTSA6
TG7 <- list.ggplotsbygroup$plots[[7]] + coord_flip()
TG7
more background info in Literature Review
Literature has stated that soil composition and lipid composition has been shown to adapt to environmental change including temperature. One major strategy by which plants adapt to temperature change is to decrease the degree of unsaturation of membrane lipids (aka increase saturation) under high temperature and increase [unsaturation] under low temperature.
TGchain <- summary_class[c(23:31), c(1,4)] %>%
separate(row.identity, c("length", "dbond_number"), ":") %>%
mutate(length = str_replace(length, "TG\\(", "")) %>%
mutate(dbond_number = str_replace(dbond_number, "\\)", "")) %>%
mutate(dbond_number = str_replace(dbond_number, "_B", ""))
TGplot <- ggplot(TGchain, aes(x=length, y=Standardized_Difference_Control_minus_Heated, size=dbond_number, color=dbond_number)) + geom_point() + ggtitle("Triacylglyerols") + ylab("Difference [C - H]")
TGplot
TG is the largest group (out of 7), and there was no distinguishable trend between saturation or chain length with abundance. If anything, the most saturated (48:1) has a high Heated Plot count over Control while the least saturated (58:9) has a high Control Plot count over Heated. This is the opposite to findings in plants.