rm(list=ls())
library(ftmsRanalysis)fdata <- read.delim('input/FTICR_metadata.txt')
fdata <- fdata[fdata$Horizon %in% "Organic",]
str(fdata)'data.frame':   16 obs. of  7 variables:
 $ Sample_ID  : chr  "H1" "H3" "H5" "H7" ...
 $ Sample.name: chr  "H2O" "H4O" "H11O" "H17O" ...
 $ Horizon    : chr  "Organic" "Organic" "Organic" "Organic" ...
 $ Treatment  : chr  "Heated" "Heated" "Heated" "Heated" ...
 $ T_H        : chr  "Organic_Heated" "Organic_Heated" "Organic_Heated" "Organic_Heated" ...
 $ Sample.ID  : chr  "BW.H.2.O" "BW.H.4.O" "BW.H.11.O" "BW.H.17.O" ...
 $ wMT        : chr  "Y" "Y" "Y" "Y" ...mydata = read.delim('input/FTICR_data.txt')edata <- subset(mydata,select=c("Mass",fdata$Sample_ID))
str(edata)'data.frame':   16814 obs. of  17 variables:
 $ Mass: num  98.7 98.8 98.8 98.9 99 ...
 $ H1  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ H3  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ H5  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ H7  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ H9  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ H11 : num  0 0 0 0 0 0 0 0 0 0 ...
 $ H13 : num  0 0 0 0 0 0 0 0 0 0 ...
 $ H15 : num  1099266 0 0 0 0 ...
 $ C1  : num  0 0 0 0 0 ...
 $ C3  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ C5  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ C7  : num  0 0 0 0 0 ...
 $ C9  : num  0 0 0 0 1046645 ...
 $ C11 : num  0 0 0 0 0 0 0 0 0 0 ...
 $ CS13: num  0 0 0 0 0 ...
 $ C15 : num  0 0 0 0 0 ...emeta <- subset(mydata,select=c("Mass","C","H","O","N","C13",
                                "S","P","Error_ppm","NeutralMass"))
str(emeta)'data.frame':   16814 obs. of  10 variables:
 $ Mass       : num  98.7 98.8 98.8 98.9 99 ...
 $ C          : int  0 0 0 0 0 0 0 0 0 0 ...
 $ H          : int  0 0 0 0 0 0 0 0 0 0 ...
 $ O          : int  0 0 0 0 0 0 0 0 0 0 ...
 $ N          : int  0 0 0 0 0 0 0 0 0 0 ...
 $ C13        : int  0 0 0 0 0 0 0 0 0 0 ...
 $ S          : int  0 0 0 0 0 0 0 0 0 0 ...
 $ P          : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Error_ppm  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ NeutralMass: num  99.7 99.8 99.8 99.9 100 ...peakObj <- as.peakData(e_data = edata,f_data = fdata,e_meta = emeta,
                       edata_cname = "Mass",fdata_cname = "Sample_ID",
                       mass_cname = "Mass",c_cname="C", h_cname="H",
                       o_cname="O", n_cname="N", s_cname="S",
                       p_cname="P", isotopic_cname = "C13",
                       isotopic_notation = "1")
peakObjpeakData object
# Peaks: 15443
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm]“Currently, any peaks that are isotopic are removed from the dataset, as available methods (e.g. Van Krevelen plot) are not applicable to these peaks.”
names(peakObj)[1] "e_data" "f_data" "e_meta"tail(peakObj$e_meta)summary(peakObj)Samples: 16
Molecules: 15443
Percent Missing: 80.524%plot(peakObj)Transforming abundance values to log2
peakObj <- edata_transform(peakObj, data_scale="log2")plotting
plot(peakObj)peakObj <- compound_calcs(peakObj)
peakObjpeakData object
# Peaks: 15443
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio]peakObj <- assign_elemental_composition(peakObj)
table(peakObj$e_meta[,getElCompColName(peakObj)])
   CHO   CHON  CHONP  CHONS CHONSP   CHOP   CHOS  CHOSP 
  3033   1382    212    284     79    148    483     99 peakObj <- assign_class(peakObj, boundary_set = "bs1")
table(peakObj$e_meta[, getBS1ColName(peakObj)])
                       Amino Sugar                       Carbohydrate           Carbohydrate;Amino Sugar 
                               219                                536                                 35 
                  Cond Hydrocarbon                             Lignin                 Lignin;Amino Sugar 
                               465                               1407                                 62 
                     Lignin;Tannin                              Lipid                      Lipid;Protein 
                                 6                                420                                 17 
                             Other                            Protein                Protein;Amino Sugar 
                              1146                                785                                 21 
                    Protein;Lignin                             Tannin            Tannin;Cond Hydrocarbon 
                                 6                                416                                 22 
                 Unsat Hydrocarbon Unsat Hydrocarbon;Cond Hydrocarbon 
                               150                                  7 *Something is going on boundary_set argument when bs2 or bs3 selected.
Before filtering
summary(peakObj)Samples: 16
Molecules: 15443
Percent Missing: 80.524%After filtering
filter_obj <- mass_filter(peakObj)
plot(filter_obj, min_mass=200, max_mass=900)peakObj <- applyFilt(filter_obj, peakObj, min_mass = 200,
                  max_mass = 900)
summary(peakObj)Samples: 16
Molecules: 8574
Percent Missing: 69.770%peakObjpeakData object
# Peaks: 8574
# Samples: 16
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.Other filters
# minimum number to be observed across all samples in order to retain the biomolecule
peakObj <- applyFilt(molecule_filter(peakObj), peakObj, min_num=2)
# remove biomolec without molecular formula; remove=NoFormula)
peakObj <- applyFilt(formula_filter(peakObj), peakObj)
summary(peakObj)Samples: 16
Molecules: 3423
Percent Missing: 32.859%peakObj <- group_designation(peakObj, main_effects=c("Treatment"))
getGroupDF(peakObj)group_summary <- summarizeGroups(peakObj, summary_functions =
                                   c("n_present", "prop_present"))
head(group_summary$e_data)densityPlot(peakObj, samples=FALSE, groups=c("Control","Heated"), variable="NOSC",title="Comparison of NOSC Between Experimental Plots: Organic Horizon")byGroup <- divideByGroupComparisons(peakObj,
                                comparisons = "all")[[1]]$valuegetGroupComparisonSummaryFunctionNames()[1] "uniqueness_gtest"  "uniqueness_nsamps" "uniqueness_prop"  “Use a g-test to compare two groups and determine which peaks are uniquely expressed in each group based on a p-value threshold.”
organic_unique <- summarizeGroupComparisons(byGroup,
            summary_functions="uniqueness_gtest",
            summary_function_params=list(
                  uniqueness_gtest=list(pres_fn="nsamps",
                          pres_thresh=1, pvalue_thresh=0.1)))
head(organic_unique$e_data)organic_uniquepeakData object
# Peaks: 3423
# Samples: 1
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
Group info: main effects=[Treatment]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
A molecule filter was applied to the data, removing Masses that were present in fewer than 2 samples. A total of 4720 Masses were filtered out of the dataset by this filter.
A formula filter was applied to the data, removing Masses that had NoFormula assigned. A total of 431 Masses were filtered out of the dataset by this filter.vanKrevelenPlot(organic_unique, colorCName = "uniqueness_gtest")Compounds breakdown
table(organic_unique$e_data$uniqueness_nsamps)< table of extent 0 >“Use number of samples for which a mass/peak is present to compare two groups and determine which peaks are uniquely expressed in each group.”
organic_unique <- summarizeGroupComparisons(byGroup,
            summary_functions="uniqueness_nsamps",
            summary_function_params=list(
                  uniqueness_nsamps=list(pres_thresh=1, absn_thresh=0)))
head(organic_unique$e_data)organic_uniquepeakData object
# Peaks: 3423
# Samples: 1
Meta data columns: [Mass, C, H, O, N, C13, S, P, Error_ppm, NeutralMass, MolForm, AI, AI_Mod, DBE, DBE_O, DBE_AI, GFE, kmass.CH2, kdefect.CH2, NOSC, OtoC_ratio, HtoC_ratio, NtoC_ratio, PtoC_ratio, NtoP_ratio, ElComposition, bs1_class]
Group info: main effects=[Treatment]
A mass filter was applied to the data, removing Masses that had a mass less than 200 or a mass greater than 900. A total of 6869 Masses were filtered out of the dataset by this filter.
A molecule filter was applied to the data, removing Masses that were present in fewer than 2 samples. A total of 4720 Masses were filtered out of the dataset by this filter.
A formula filter was applied to the data, removing Masses that had NoFormula assigned. A total of 431 Masses were filtered out of the dataset by this filter.vanKrevelenPlot(organic_unique, colorCName = "uniqueness_nsamps")Compounds breakdown
table(organic_unique$e_data$uniqueness_nsamps)
 Unique to Heated Unique to Control  Observed in Both 
               36               296              3091