rm(list=ls())
library(ftmsRanalysis)
fdata <- read.delim('input/FTICR_metadata.txt')
fdata <- fdata[fdata$Horizon %in% "Mineral",]
str(fdata)
'data.frame': 16 obs. of 7 variables:
$ Sample_ID : chr "H2" "H4" "H6" "H8" ...
$ Sample.name: chr "H2M" "H4M" "H11M" "H17M" ...
$ Horizon : chr "Mineral" "Mineral" "Mineral" "Mineral" ...
$ Treatment : chr "Heated" "Heated" "Heated" "Heated" ...
$ T_H : chr "Mineral_Heated" "Mineral_Heated" "Mineral_Heated" "Mineral_Heated" ...
$ Sample.ID : chr "BW.H.2.M" "BW.H.4.M" "BW.H.11.M" "BW.H.17.M" ...
$ wMT : chr "Y" "Y" "Y" "Y" ...
mydata = read.delim('input/FTICR_data.txt')
edata <- subset(mydata,select=c("Mass",fdata$Sample_ID))
str(edata)
'data.frame': 16814 obs. of 17 variables:
$ Mass: num 98.7 98.8 98.8 98.9 99 ...
$ H2 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H4 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H6 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H8 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H10 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H12 : num 0 1312225 0 0 0 ...
$ H14 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H16 : num 0 0 1128302 0 0 ...
$ C2 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C4 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C6 : num 0 0 0 1381438 0 ...
$ C8 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C10 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C12 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C14 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C16 : num 0 0 0 0 0 0 0 0 0 0 ...
emeta <- subset(mydata,select=c("Mass","C","H","O","N","C13",
"S","P","Error_ppm","NeutralMass"))
str(emeta)
'data.frame': 16814 obs. of 10 variables:
$ Mass : num 98.7 98.8 98.8 98.9 99 ...
$ C : int 0 0 0 0 0 0 0 0 0 0 ...
$ H : int 0 0 0 0 0 0 0 0 0 0 ...
$ O : int 0 0 0 0 0 0 0 0 0 0 ...
$ N : int 0 0 0 0 0 0 0 0 0 0 ...
$ C13 : int 0 0 0 0 0 0 0 0 0 0 ...
$ S : int 0 0 0 0 0 0 0 0 0 0 ...
$ P : int 0 0 0 0 0 0 0 0 0 0 ...
$ Error_ppm : num 0 0 0 0 0 0 0 0 0 0 ...
$ NeutralMass: num 99.7 99.8 99.8 99.9 100 ...
peakObj <- as.peakData(e_data = edata,f_data = fdata,e_meta = emeta,
edata_cname = "Mass",fdata_cname = "Sample_ID",
mass_cname = "Mass",c_cname="C", h_cname="H",
o_cname="O", n_cname="N", s_cname="S",
p_cname="P", isotopic_cname = "C13",
isotopic_notation = "1")
peakObj
peakData object
# Peaks: 15443
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm]
“Currently, any peaks that are isotopic are removed from the dataset, as available methods (e.g. Van Krevelen plot) are not applicable to these peaks.”
names(peakObj)
[1] "e_data" "f_data" "e_meta"
tail(peakObj$e_meta)
summary(peakObj)
Samples: 16
Molecules: 15443
Percent Missing: 88.566%
plot(peakObj)
Transforming abundance values to log2
peakObj <- edata_transform(peakObj, data_scale="log2")
plotting
plot(peakObj)
peakObj <- compound_calcs(peakObj)
peakObj
peakData object
# Peaks: 15443
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio]
peakObj <- assign_elemental_composition(peakObj)
table(peakObj$e_meta[,getElCompColName(peakObj)])
CHO CHON CHONP CHONS CHONSP CHOP CHOS CHOSP
3033 1382 212 284 79 148 483 99
peakObj <- assign_class(peakObj, boundary_set = "bs1")
table(peakObj$e_meta[, getBS1ColName(peakObj)])
Amino Sugar Carbohydrate Carbohydrate;Amino Sugar
219 536 35
Cond Hydrocarbon Lignin Lignin;Amino Sugar
465 1407 62
Lignin;Tannin Lipid Lipid;Protein
6 420 17
Other Protein Protein;Amino Sugar
1146 785 21
Protein;Lignin Tannin Tannin;Cond Hydrocarbon
6 416 22
Unsat Hydrocarbon Unsat Hydrocarbon;Cond Hydrocarbon
150 7
*Something is going on boundary_set argument when bs2 or bs3 selected.
Before filtering
summary(peakObj)
Samples: 16
Molecules: 15443
Percent Missing: 88.566%
After filtering
filter_obj <- mass_filter(peakObj)
plot(filter_obj, min_mass=200, max_mass=900)
peakObj <- applyFilt(filter_obj, peakObj, min_mass = 200,
max_mass = 900)
summary(peakObj)
Samples: 16
Molecules: 8574
Percent Missing: 83.674%
peakObj
peakData object
# Peaks: 8574
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
Other filters
# minimum number to be observed across all samples in order to retain the biomolecule
peakObj <- applyFilt(molecule_filter(peakObj), peakObj, min_num=2)
# remove biomolec without molecular formula; remove=NoFormula)
peakObj <- applyFilt(formula_filter(peakObj), peakObj)
summary(peakObj)
Samples: 16
Molecules: 2168
Percent Missing: 47.008%
peakObj <- group_designation(peakObj, main_effects=c("Treatment"))
getGroupDF(peakObj)
group_summary <- summarizeGroups(peakObj, summary_functions =
c("n_present", "prop_present"))
head(group_summary$e_data)
p <- densityPlot(peakObj, samples=FALSE, groups=c("Control","Heated"), variable="NOSC",title="Comparison of NOSC Between Experimental Plots: Mineral Horizon")
p
byGroup <- divideByGroupComparisons(peakObj,
comparisons = "all")[[1]]$value
getGroupComparisonSummaryFunctionNames()
[1] "uniqueness_gtest" "uniqueness_nsamps" "uniqueness_prop"
“Use a g-test to compare two groups and determine which peaks are uniquely expressed in each group based on a p-value threshold.”
mineral_unique <- summarizeGroupComparisons(byGroup,
summary_functions="uniqueness_gtest",
summary_function_params=list(
uniqueness_gtest=list(pres_fn="nsamps",
pres_thresh=1, pvalue_thresh=0.1)))
head(mineral_unique$e_data)
mineral_unique
peakData object
# Peaks: 2168
# Samples: 1
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
Group info: main effects=[Treatment]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
A molecule filter was applied to the data, removing Masses that were present in fewer than 2 samples. A total of 6096 Masses were filtered out of the dataset by this filter.
A formula filter was applied to the data, removing Masses that had NoFormula assigned. A total of 310 Masses were filtered out of the dataset by this filter.
vanKrevelenPlot(mineral_unique, colorCName = "uniqueness_gtest")
Compounds breakdown
table(mineral_unique$e_data$uniqueness_gtest)
Unique to Heated Unique to Control Observed in Both
373 77 1718
“Use number of samples for which a mass/peak is present to compare two groups and determine which peaks are uniquely expressed in each group.”
mineral_unique <- summarizeGroupComparisons(byGroup,
summary_functions="uniqueness_nsamps",
summary_function_params=list(
uniqueness_nsamps=list(pres_thresh=1, absn_thresh=0)))
head(mineral_unique$e_data)
mineral_unique
peakData object
# Peaks: 2168
# Samples: 1
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
Group info: main effects=[Treatment]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
A molecule filter was applied to the data, removing Masses that were present in fewer than 2 samples. A total of 6096 Masses were filtered out of the dataset by this filter.
A formula filter was applied to the data, removing Masses that had NoFormula assigned. A total of 310 Masses were filtered out of the dataset by this filter.
vanKrevelenPlot(mineral_unique, colorCName = "uniqueness_nsamps")
Compounds breakdown
table(mineral_unique$e_data$uniqueness_nsamps)
Unique to Heated Unique to Control Observed in Both
272 25 1871