rm(list=ls())
library(ftmsRanalysis)
fdata <- read.delim('input/FTICR_metadata.txt')
fdata <- fdata[fdata$Horizon %in% "Organic",]
str(fdata)
'data.frame': 16 obs. of 7 variables:
$ Sample_ID : chr "H1" "H3" "H5" "H7" ...
$ Sample.name: chr "H2O" "H4O" "H11O" "H17O" ...
$ Horizon : chr "Organic" "Organic" "Organic" "Organic" ...
$ Treatment : chr "Heated" "Heated" "Heated" "Heated" ...
$ T_H : chr "Organic_Heated" "Organic_Heated" "Organic_Heated" "Organic_Heated" ...
$ Sample.ID : chr "BW.H.2.O" "BW.H.4.O" "BW.H.11.O" "BW.H.17.O" ...
$ wMT : chr "Y" "Y" "Y" "Y" ...
mydata = read.delim('input/FTICR_data.txt')
edata <- subset(mydata,select=c("Mass",fdata$Sample_ID))
str(edata)
'data.frame': 16814 obs. of 17 variables:
$ Mass: num 98.7 98.8 98.8 98.9 99 ...
$ H1 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H3 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H5 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H7 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H9 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H11 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H13 : num 0 0 0 0 0 0 0 0 0 0 ...
$ H15 : num 1099266 0 0 0 0 ...
$ C1 : num 0 0 0 0 0 ...
$ C3 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C5 : num 0 0 0 0 0 0 0 0 0 0 ...
$ C7 : num 0 0 0 0 0 ...
$ C9 : num 0 0 0 0 1046645 ...
$ C11 : num 0 0 0 0 0 0 0 0 0 0 ...
$ CS13: num 0 0 0 0 0 ...
$ C15 : num 0 0 0 0 0 ...
emeta <- subset(mydata,select=c("Mass","C","H","O","N","C13",
"S","P","Error_ppm","NeutralMass"))
str(emeta)
'data.frame': 16814 obs. of 10 variables:
$ Mass : num 98.7 98.8 98.8 98.9 99 ...
$ C : int 0 0 0 0 0 0 0 0 0 0 ...
$ H : int 0 0 0 0 0 0 0 0 0 0 ...
$ O : int 0 0 0 0 0 0 0 0 0 0 ...
$ N : int 0 0 0 0 0 0 0 0 0 0 ...
$ C13 : int 0 0 0 0 0 0 0 0 0 0 ...
$ S : int 0 0 0 0 0 0 0 0 0 0 ...
$ P : int 0 0 0 0 0 0 0 0 0 0 ...
$ Error_ppm : num 0 0 0 0 0 0 0 0 0 0 ...
$ NeutralMass: num 99.7 99.8 99.8 99.9 100 ...
peakObj <- as.peakData(e_data = edata,f_data = fdata,e_meta = emeta,
edata_cname = "Mass",fdata_cname = "Sample_ID",
mass_cname = "Mass",c_cname="C", h_cname="H",
o_cname="O", n_cname="N", s_cname="S",
p_cname="P", isotopic_cname = "C13",
isotopic_notation = "1")
peakObj
peakData object
# Peaks: 15443
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm]
“Currently, any peaks that are isotopic are removed from the dataset, as available methods (e.g. Van Krevelen plot) are not applicable to these peaks.”
names(peakObj)
[1] "e_data" "f_data" "e_meta"
tail(peakObj$e_meta)
summary(peakObj)
Samples: 16
Molecules: 15443
Percent Missing: 80.524%
plot(peakObj)
Transforming abundance values to log2
peakObj <- edata_transform(peakObj, data_scale="log2")
plotting
plot(peakObj)
peakObj <- compound_calcs(peakObj)
peakObj
peakData object
# Peaks: 15443
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio]
peakObj <- assign_elemental_composition(peakObj)
table(peakObj$e_meta[,getElCompColName(peakObj)])
CHO CHON CHONP CHONS CHONSP CHOP CHOS CHOSP
3033 1382 212 284 79 148 483 99
peakObj <- assign_class(peakObj, boundary_set = "bs1")
table(peakObj$e_meta[, getBS1ColName(peakObj)])
Amino Sugar Carbohydrate Carbohydrate;Amino Sugar
219 536 35
Cond Hydrocarbon Lignin Lignin;Amino Sugar
465 1407 62
Lignin;Tannin Lipid Lipid;Protein
6 420 17
Other Protein Protein;Amino Sugar
1146 785 21
Protein;Lignin Tannin Tannin;Cond Hydrocarbon
6 416 22
Unsat Hydrocarbon Unsat Hydrocarbon;Cond Hydrocarbon
150 7
*Something is going on boundary_set argument when bs2 or bs3 selected.
Before filtering
summary(peakObj)
Samples: 16
Molecules: 15443
Percent Missing: 80.524%
After filtering
filter_obj <- mass_filter(peakObj)
plot(filter_obj, min_mass=200, max_mass=900)
peakObj <- applyFilt(filter_obj, peakObj, min_mass = 200,
max_mass = 900)
summary(peakObj)
Samples: 16
Molecules: 8574
Percent Missing: 69.770%
peakObj
peakData object
# Peaks: 8574
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
Other filters
# minimum number to be observed across all samples in order to retain the biomolecule
peakObj <- applyFilt(molecule_filter(peakObj), peakObj, min_num=2)
# remove biomolec without molecular formula; remove=NoFormula)
peakObj <- applyFilt(formula_filter(peakObj), peakObj)
summary(peakObj)
Samples: 16
Molecules: 3423
Percent Missing: 32.859%
peakObj <- group_designation(peakObj, main_effects=c("Treatment"))
getGroupDF(peakObj)
group_summary <- summarizeGroups(peakObj, summary_functions =
c("n_present", "prop_present"))
head(group_summary$e_data)
densityPlot(peakObj, samples=FALSE, groups=c("Control","Heated"), variable="NOSC",title="Comparison of NOSC Between Experimental Plots: Organic Horizon")
byGroup <- divideByGroupComparisons(peakObj,
comparisons = "all")[[1]]$value
getGroupComparisonSummaryFunctionNames()
[1] "uniqueness_gtest" "uniqueness_nsamps" "uniqueness_prop"
“Use a g-test to compare two groups and determine which peaks are uniquely expressed in each group based on a p-value threshold.”
organic_unique <- summarizeGroupComparisons(byGroup,
summary_functions="uniqueness_gtest",
summary_function_params=list(
uniqueness_gtest=list(pres_fn="nsamps",
pres_thresh=1, pvalue_thresh=0.1)))
head(organic_unique$e_data)
organic_unique
peakData object
# Peaks: 3423
# Samples: 1
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
Group info: main effects=[Treatment]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
A molecule filter was applied to the data, removing Masses that were present in fewer than 2 samples. A total of 4720 Masses were filtered out of the dataset by this filter.
A formula filter was applied to the data, removing Masses that had NoFormula assigned. A total of 431 Masses were filtered out of the dataset by this filter.
vanKrevelenPlot(organic_unique, colorCName = "uniqueness_gtest")
Compounds breakdown
table(organic_unique$e_data$uniqueness_nsamps)
< table of extent 0 >
“Use number of samples for which a mass/peak is present to compare two groups and determine which peaks are uniquely expressed in each group.”
organic_unique <- summarizeGroupComparisons(byGroup,
summary_functions="uniqueness_nsamps",
summary_function_params=list(
uniqueness_nsamps=list(pres_thresh=1, absn_thresh=0)))
head(organic_unique$e_data)
organic_unique
peakData object
# Peaks: 3423
# Samples: 1
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
Group info: main effects=[Treatment]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
A molecule filter was applied to the data, removing Masses that were present in fewer than 2 samples. A total of 4720 Masses were filtered out of the dataset by this filter.
A formula filter was applied to the data, removing Masses that had NoFormula assigned. A total of 431 Masses were filtered out of the dataset by this filter.
vanKrevelenPlot(organic_unique, colorCName = "uniqueness_nsamps")
Compounds breakdown
table(organic_unique$e_data$uniqueness_nsamps)
Unique to Heated Unique to Control Observed in Both
36 296 3091