Identify metabolites based on MS1 or MS/MS database. [Maturing]

identify_metabolites_mass_dataset(
  object,
  ms1.match.ppm = 25,
  ms2.match.ppm = 30,
  mz.ppm.thr = 400,
  ms2.match.tol = 0.5,
  fraction.weight = 0.3,
  dp.forward.weight = 0.6,
  dp.reverse.weight = 0.1,
  rt.match.tol = 30,
  polarity = c("positive", "negative"),
  ce = "all",
  column = c("rp", "hilic"),
  ms1.match.weight = 0.25,
  rt.match.weight = 0.25,
  ms2.match.weight = 0.5,
  total.score.tol = 0.5,
  candidate.num = 3,
  database,
  threads = 3
)

Arguments

object

A mass_dataset class obejct.

ms1.match.ppm

Precursor match ppm tolerance.

ms2.match.ppm

Fragment ion match ppm tolerance.

mz.ppm.thr

Accurate mass tolerance for m/z error calculation.

ms2.match.tol

MS2 match (MS2 similarity) tolerance.

fraction.weight

The weight for matched fragments.

dp.forward.weight

Forward dot product weight.

dp.reverse.weight

Reverse dot product weight.

rt.match.tol

RT match tolerance.

polarity

The polarity of data, "positive"or "negative".

ce

Collision energy. Please confirm the CE values in your database. Default is "all".

column

"hilic" (HILIC column) or "rp" (reverse phase).

ms1.match.weight

The weight of MS1 match for total score calculation.

rt.match.weight

The weight of RT match for total score calculation.

ms2.match.weight

The weight of MS2 match for total score calculation.

total.score.tol

Total score tolerance. The total score are referring to MS-DIAL.

candidate.num

The number of candidate.

database

MS2 database name or MS database.

threads

Number of threads

Value

A metIdentifyClass object.

See also

The example and demo data of this function can be found https://tidymass.github.io/metid/articles/metid.html

Author

Xiaotao Shen shenxt1990@163.com

Examples

library(massdataset)
#>  magrittr  2.0.1      tinytools 0.9.1
#> massdataset,
#> More information can be found at https://tidymass.github.io/massdataset/
#> Authors: Xiaotao Shen (shenxt@stanford.edu)
#> Maintainer: Xiaotao Shen
#> 
#> Attaching package: ‘massdataset’
#> The following object is masked from ‘package:stats’:
#> 
#>     filter
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#>  ggplot2 3.3.5      purrr   0.3.4
#>  tibble  3.1.3      dplyr   1.0.7
#>  tidyr   1.1.3      stringr 1.4.0
#>  readr   2.0.0      forcats 0.5.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> x tidyr::extract()   masks magrittr::extract()
#> x dplyr::filter()    masks massdataset::filter(), stats::filter()
#> x dplyr::lag()       masks stats::lag()
#> x purrr::set_names() masks magrittr::set_names()

ms1_data =
  readr::read_csv(file.path(
    system.file("ms1_peak", package = "metid"),
    "ms1.peak.table.csv"
  ))
#> Rows: 100 Columns: 3
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (1): name
#> dbl (2): mz, rt
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.

ms1_data = data.frame(ms1_data, sample1 = 1, sample2 = 2)

expression_data = ms1_data %>%
  dplyr::select(-c(name:rt))

variable_info =
  ms1_data %>%
  dplyr::select(name:rt) %>%
  dplyr::rename(variable_id = name)

sample_info =
  data.frame(
    sample_id = colnames(expression_data),
    injection.order = c(1, 2),
    class = c("Subject", "Subject"),
    group = c("Subject", "Subject")
  )
rownames(expression_data) = variable_info$variable_id

object = create_mass_dataset(
  expression_data = expression_data,
  sample_info = sample_info,
  variable_info = variable_info
)

object
#> -------------------- 
#> massdataset version: 0.01 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> Creation ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2021-12-22 00:24:53

data("hmdb_ms1_database0.0.3", package = "metid")

object1 =
  identify_metabolites_mass_dataset(object = object,
                                    database = hmdb_ms1_database0.0.3)
#> No MS2 data, so only use mz and/or RT for matching.
#> You set rt.match.tol < 10,000, so if the compounds have RT,  RTs will be used for matching
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================================| 100%
#> 
#> All done.
#> 

data("snyder_database_rplc0.0.3", package = "metid")

database = snyder_database_rplc0.0.3

object2 =
  identify_metabolites_mass_dataset(object = object1,
                                    database = snyder_database_rplc0.0.3)
#> No MS2 data, so only use mz and/or RT for matching.
#> You set rt.match.tol < 10,000, so if the compounds have RT,  RTs will be used for matching
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================================| 100%
#> 
#> All done.
#> 
head(object2@annotation_table)
#> # A tibble: 6 × 18
#>   variable_id ms2_files_id ms2_spectrum_id Compound.name  CAS.ID HMDB.ID KEGG.ID
#>   <chr>       <lgl>        <lgl>           <chr>          <chr>  <chr>   <chr>  
#> 1 pRPLC_10319 NA           NA              Tyrosyl-Serine NA     HMDB00… NA     
#> 2 pRPLC_10319 NA           NA              Seryltyrosine  21435… HMDB00… NA     
#> 3 pRPLC_10319 NA           NA              Monoglyceride… 36291… HMDB00… NA     
#> 4 pRPLC_1046  NA           NA              Theophylline   NA     HMDB00… NA     
#> 5 pRPLC_1046  NA           NA              Paraxanthine   611-5… HMDB01… C13747 
#> 6 pRPLC_1046  NA           NA              Theobromine    83-67… HMDB02… C07480 
#> # … with 11 more variables: Lab.ID <chr>, Adduct <chr>, mz.error <dbl>,
#> #   RT.error <dbl>, mz.match.score <dbl>, RT.match.score <dbl>,
#> #   Total.score <dbl>, CE <lgl>, SS <lgl>, Database <chr>, Level <dbl>
extract_variable_info(object = object)
#>     variable_id       mz      rt
#> 1     pRPLC_376 472.3032 772.906
#> 2     pRPLC_391 466.3292 746.577
#> 3     pRPLC_603 162.1125  33.746
#> 4     pRPLC_629 181.0720  36.360
#> 5     pRPLC_685 230.0701 158.205
#> 6     pRPLC_722 181.0721 228.305
#> 7     pRPLC_778 289.2275 286.185
#> 8    pRPLC_1046 181.0720 201.182
#> 9    pRPLC_1112 209.0922  57.406
#> 10   pRPLC_1148 282.8750  40.947
#> 11   pRPLC_1307 314.2326 401.848
#> 12   pRPLC_1326 207.1292 406.754
#> 13   pRPLC_1560 264.0558 495.824
#> 14   pRPLC_1799 699.3153 564.204
#> 15   pRPLC_1835 351.2139 572.258
#> 16   pRPLC_1860 249.1849 579.437
#> 17   pRPLC_2064 419.2343 621.835
#> 18   pRPLC_2065 660.4051 622.159
#> 19   pRPLC_2151 363.2198 638.691
#> 20   pRPLC_2171 568.3405 641.920
#> 21   pRPLC_2295 280.2637 669.073
#> 22   pRPLC_2386 659.3539 699.576
#> 23   pRPLC_2600 267.6469 820.049
#> 24   pRPLC_2615 508.3395 823.869
#> 25   pRPLC_3110 414.3002 545.186
#> 26   pRPLC_3138 286.1439 537.015
#> 27   pRPLC_3633 180.9620  40.947
#> 28   pRPLC_3839 141.0297  63.436
#> 29   pRPLC_3927 232.1545  77.507
#> 30   pRPLC_3959 153.0660  84.240
#> 31   pRPLC_3968 244.1545  87.115
#> 32   pRPLC_4027 797.7993  99.964
#> 33   pRPLC_4250 473.1718 171.891
#> 34   pRPLC_4508 260.1857 222.592
#> 35   pRPLC_4633 810.8984 240.055
#> 36   pRPLC_4673 379.2229 243.748
#> 37   pRPLC_5025 195.0878 284.564
#> 38   pRPLC_5044 286.2014 286.511
#> 39   pRPLC_5461 310.2011 337.952
#> 40   pRPLC_5721 517.1417 375.094
#> 41   pRPLC_5726 125.0600 376.112
#> 42   pRPLC_7083 229.0973 516.577
#> 43   pRPLC_7291 175.0968 536.688
#> 44   pRPLC_7637 563.1516 563.240
#> 45   pRPLC_7781 343.2266 572.579
#> 46   pRPLC_7987 105.0336 586.229
#> 47   pRPLC_8054 590.3301 590.498
#> 48   pRPLC_9043 651.4245 646.108
#> 49   pRPLC_9544 247.1694 682.229
#> 50   pRPLC_9960 232.1121 737.340
#> 51  pRPLC_10319 313.0773 790.835
#> 52  pRPLC_10514 343.1944 823.233
#> 53  pRPLC_10535 322.2266 823.869
#> 54  pRPLC_10563 313.0773 825.778
#> 55  pRPLC_10906 285.0097 901.116
#> 56  pRPLC_10992 566.4290 950.459
#> 57  pRPLC_11024 128.5944 963.771
#> 58  pRPLC_11039 392.2556 979.156
#> 59  pRPLC_11516 446.2952 860.152
#> 60  pRPLC_12426 599.3413 765.518
#> 61  pRPLC_12599 365.1958 750.413
#> 62  pRPLC_12912 539.2851 718.742
#> 63  pRPLC_13460 503.1783 680.306
#> 64  pRPLC_13514 858.5523 676.774
#> 65  pRPLC_13668 639.4079 667.146
#> 66  pRPLC_14733 533.9914 622.159
#> 67  pRPLC_15362 323.1850 596.649
#> 68  pRPLC_16044 302.2688 565.806
#> 69  pRPLC_16149 189.1005 561.272
#> 70  pRPLC_16169 395.2062 560.284
#> 71  pRPLC_16854 200.1728 524.606
#> 72  pRPLC_17684 625.2754 470.139
#> 73  pRPLC_17705 381.1375 468.480
#> 74  pRPLC_17826 499.0576 460.728
#> 75  pRPLC_17934 249.1099 452.073
#> 76  pRPLC_18346 316.1176 421.377
#> 77  pRPLC_18562 135.1170 407.078
#> 78  pRPLC_18580 196.1334 406.429
#> 79  pRPLC_18586 241.1410 406.429
#> 80  pRPLC_18600 493.4408 405.782
#> 81  pRPLC_19147 347.2037 363.045
#> 82  pRPLC_19371 164.1072 341.233
#> 83  pRPLC_19437 407.2250 336.002
#> 84  pRPLC_19565 313.0876 325.577
#> 85  pRPLC_19596 310.2011 322.275
#> 86  pRPLC_19688 125.0599 314.347
#> 87  pRPLC_19794 399.1861 304.420
#> 88  pRPLC_19991 419.0097 287.167
#> 89  pRPLC_20220 768.3619 268.005
#> 90  pRPLC_20346 209.0786 256.795
#> 91  pRPLC_20350 352.1653 256.480
#> 92  pRPLC_20625 301.0005 233.347
#> 93  pRPLC_21179 290.9476 152.554
#> 94  pRPLC_21343 426.0706  96.728
#> 95  pRPLC_21471 166.0726  77.507
#> 96  pRPLC_21522 123.0554  71.419
#> 97  pRPLC_21528 237.2214  70.460
#> 98  pRPLC_21720 170.0330  45.387
#> 99  pRPLC_21734 183.0628  44.440
#> 100 pRPLC_22098 182.0813  33.420