Data preparation
library(massdataset)
library(tidyverse)
library(metid)
ms1_data =
readr::read_csv(file.path(
system.file("ms1_peak", package = "metid"),
"ms1.peak.table.csv"
))
ms1_data = data.frame(ms1_data, sample1 = 1, sample2 = 2)
expression_data = ms1_data %>%
dplyr::select(-c(name:rt))
variable_info =
ms1_data %>%
dplyr::select(name:rt) %>%
dplyr::rename(variable_id = name)
sample_info =
data.frame(
sample_id = colnames(expression_data),
injection.order = c(1, 2),
class = c("Subject", "Subject"),
group = c("Subject", "Subject")
)
rownames(expression_data) = variable_info$variable_id
object = create_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info
)
object
#> --------------------
#> massdataset version: 0.01
#> --------------------
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> Creation ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2021-12-22 14:23:58
Add MS2 to mass_dataset object
path = "./example"
dir.create(path)
ms2_data <- system.file("ms2_data", package = "metid")
file.copy(
from = file.path(ms2_data, "QC1_MSMS_NCE25.mgf"),
to = path,
overwrite = TRUE,
recursive = TRUE
)
#> [1] TRUE
object =
massdataset::mutate_ms2(
object = object,
column = "rp",
polarity = "positive",
ms1.ms2.match.mz.tol = 10,
ms1.ms2.match.rt.tol = 30
)
#> Reading mgf data...
#> 25 out of 100 variable have MS2 spectra.
#> Selecting the most intense MS2 spectrum for each peak...
object
#> --------------------
#> massdataset version: 0.01
#> --------------------
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 25 variables x 24 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> Creation ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2021-12-22 14:23:58
#> update_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset update_mass_dataset() 2021-12-22 14:23:58
#> mutate_ms2 ----------
#> Package Function.used Time
#> 1 massdataset mutate_ms2() 2021-12-22 14:24:02
object@ms2_data
#> $QC1_MSMS_NCE25.mgf
#> --------------------
#> column: rp
#> polarity: positive
#> mz_tol: 10
#> rt_tol (second): 30
#> --------------------
#> 25 variables:
#> pRPLC_603 pRPLC_722 pRPLC_778 pRPLC_1046 pRPLC_1112...
#> 24 MS2 spectra.
#> mz162.112442157672rt37.9743312 mz181.072050304971rt226.14144 mz289.227264404297rt284.711172 mz181.072050673093rt196.800648 mz209.092155077047rt58.3735608...
Annotate single peaks
data("snyder_database_rplc0.0.3", package = "metid")
annotate_single_peak_mass_dataset(
object = object,
variable_index = 3,
based_on_rt = FALSE,
based_on_ms2 = FALSE,
database = snyder_database_rplc0.0.3,
add_to_annotation_table = FALSE
)
#> No MS2 data in object, so only use mz and/or RT for matching.
#> You set rt.match.tol as NA, so RT will not be used for matching.
#>
|
| | 0%
|
|======================================================================| 100%
#>
#> All done.
#> variable_id ms2_files_id ms2_spectrum_id Compound.name CAS.ID HMDB.ID
#> 1 pRPLC_603 NA NA L(-)-Carnitine <NA> <NA>
#> 2 pRPLC_603 NA NA L-Carnitine 541-15-1 HMDB00062
#> KEGG.ID Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1 <NA> RPLC_54 (M+H)+ 0.05375 0.9999977 NA NA NA NA
#> 2 C00318 RPLC_406 (M+H)+ 1.86625 0.9972176 NA NA NA NA
#> Total.score Database Level
#> 1 0.9999977 MS_0.0.2 3
#> 2 0.9972176 MS_0.0.2 3
annotate_single_peak_mass_dataset(
object = object,
variable_index = 3,
based_on_rt = TRUE,
based_on_ms2 = FALSE,
database = snyder_database_rplc0.0.3,
add_to_annotation_table = FALSE
)
#> No MS2 data in object, so only use mz and/or RT for matching.
#> You set rt.match.tol < 10,000, so if the compounds have RT, RTs will be used for matching
#>
|
| | 0%
|
|======================================================================| 100%
#>
#> All done.
#> variable_id ms2_files_id ms2_spectrum_id Compound.name CAS.ID HMDB.ID
#> 1 pRPLC_603 NA NA L(-)-Carnitine <NA> <NA>
#> 2 pRPLC_603 NA NA L-Carnitine 541-15-1 HMDB00062
#> KEGG.ID Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1 <NA> RPLC_54 (M+H)+ 0.05375 0.9999977 0.746 0.9996909 NA NA
#> 2 C00318 RPLC_406 (M+H)+ 1.86625 0.9972176 2.254 0.9971815 NA NA
#> Total.score Database Level
#> 1 0.9998443 MS_0.0.2 2
#> 2 0.9971995 MS_0.0.2 2
annotate_single_peak_mass_dataset(
object = object,
variable_index = 3,
based_on_rt = TRUE,
based_on_ms2 = TRUE,
database = snyder_database_rplc0.0.3,
add_to_annotation_table = FALSE
)
#> QC1_MSMS_NCE25.mgf file:
#> 25 MS2 spectra.
#> Use all CE values.
#>
#> Identifing metabolites with MS/MS database...
#>
|
| | 0%
|
|======================= | 33%
|
|=============================================== | 67%
|
|======================================================================| 100%
#>
#> All done.
#> variable_id ms2_files_id ms2_spectrum_id Compound.name
#> 1 pRPLC_603 QC1_MSMS_NCE25.mgf mz162.112442157672rt37.9743312 L-Carnitine
#> CAS.ID HMDB.ID KEGG.ID Lab.ID Adduct mz.error mz.match.score RT.error
#> 1 541-15-1 HMDB00062 C00318 RPLC_406 (M+H)+ 1.667894 0.997777 1.974331
#> RT.match.score CE SS Total.score Database Level
#> 1 0.9978368 NCE25 0.6048288 0.8013178 MS_0.0.2 1
Add to object
object@annotation_table
#> data frame with 0 columns and 0 rows
object1 =
annotate_single_peak_mass_dataset(
object = object,
variable_index = 3,
based_on_rt = FALSE,
based_on_ms2 = FALSE,
database = snyder_database_rplc0.0.3,
add_to_annotation_table = TRUE
)
#> No MS2 data in object, so only use mz and/or RT for matching.
#> You set rt.match.tol as NA, so RT will not be used for matching.
#>
|
| | 0%
|
|======================================================================| 100%
#>
#> All done.
object1@annotation_table
#> variable_id ms2_files_id ms2_spectrum_id Compound.name CAS.ID HMDB.ID
#> 1 pRPLC_603 NA NA L(-)-Carnitine <NA> <NA>
#> 2 pRPLC_603 NA NA L-Carnitine 541-15-1 HMDB00062
#> KEGG.ID Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1 <NA> RPLC_54 (M+H)+ 0.05375 0.9999977 NA NA NA NA
#> 2 C00318 RPLC_406 (M+H)+ 1.86625 0.9972176 NA NA NA NA
#> Total.score Database Level
#> 1 0.9999977 MS_0.0.2 3
#> 2 0.9972176 MS_0.0.2 3
object2 =
annotate_single_peak_mass_dataset(
object = object1,
variable_index = 3,
based_on_rt = TRUE,
based_on_ms2 = FALSE,
database = snyder_database_rplc0.0.3,
add_to_annotation_table = TRUE
)
#> No MS2 data in object, so only use mz and/or RT for matching.
#> You set rt.match.tol < 10,000, so if the compounds have RT, RTs will be used for matching
#>
|
| | 0%
|
|======================================================================| 100%
#>
#> All done.
object2@annotation_table
#> variable_id ms2_files_id ms2_spectrum_id Compound.name CAS.ID HMDB.ID
#> 1 pRPLC_603 NA NA L(-)-Carnitine <NA> <NA>
#> 2 pRPLC_603 NA NA L-Carnitine 541-15-1 HMDB00062
#> 3 pRPLC_603 NA NA L(-)-Carnitine <NA> <NA>
#> KEGG.ID Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1 <NA> RPLC_54 (M+H)+ 0.05375 0.9999977 0.746 0.9996909 NA NA
#> 2 C00318 RPLC_406 (M+H)+ 1.86625 0.9972176 2.254 0.9971815 NA NA
#> 3 <NA> RPLC_54 (M+H)+ 0.05375 0.9999977 NA NA NA NA
#> Total.score Database Level
#> 1 0.9998443 MS_0.0.2 2
#> 2 0.9971995 MS_0.0.2 2
#> 3 0.9999977 MS_0.0.2 3
object3 =
annotate_single_peak_mass_dataset(
object = object2,
variable_index = 3,
based_on_rt = TRUE,
based_on_ms2 = FALSE,
database = snyder_database_rplc0.0.3,
add_to_annotation_table = TRUE
)
#> No MS2 data in object, so only use mz and/or RT for matching.
#> You set rt.match.tol < 10,000, so if the compounds have RT, RTs will be used for matching
#>
|
| | 0%
|
|======================================================================| 100%
#>
#> All done.
object3@annotation_table
#> variable_id ms2_files_id ms2_spectrum_id Compound.name CAS.ID HMDB.ID
#> 1 pRPLC_603 NA NA L(-)-Carnitine <NA> <NA>
#> 2 pRPLC_603 NA NA L-Carnitine 541-15-1 HMDB00062
#> KEGG.ID Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1 <NA> RPLC_54 (M+H)+ 0.05375 0.9999977 0.746 0.9996909 NA NA
#> 2 C00318 RPLC_406 (M+H)+ 1.86625 0.9972176 2.254 0.9971815 NA NA
#> Total.score Database Level
#> 1 0.9998443 MS_0.0.2 2
#> 2 0.9971995 MS_0.0.2 2