#!/usr/bin/env python3
# 2025 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com
r"""
Parse different crosslink search engine and result formats.
Examples
--------
>>> from pyXLMS.parser import read
>>> csms_from_xiSearch = read(
... "data/xi/r1_Xi1.7.6.7.csv", engine="xiSearch/xiFDR", crosslinker="DSS"
... )
>>> from pyXLMS.parser import read
>>> csms_from_MaxQuant = read(
... "data/maxquant/run1/crosslinkMsms.txt", engine="MaxQuant", crosslinker="DSS"
... )
"""
from __future__ import annotations
__all__ = [
"read_xi",
"read_mzid",
"read_plink",
"read_scout",
"read_xlinkx",
"read_custom",
"read_merox",
"read_msannika",
"read_maxquant",
"read_maxlynx",
"detect_xi_filetype",
"parse_peptide",
"parse_modifications_from_xi_sequence",
"parse_scan_nr_from_mzid",
"parse_scan_nr_from_plink",
"parse_spectrum_file_from_plink",
"detect_plink_filetype",
"detect_scout_filetype",
"parse_modifications_from_scout_sequence",
"pyxlms_modification_str_parser",
"parse_modifications_from_maxquant_sequence",
"read",
"read_xinet",
"read_xiview",
"format_sequence",
"get_bool_from_value",
]
# READERS
from ._parser_xldbse_xi import read_xi
from ._parser_xldbse_mzid import read_mzid
from ._parser_xldbse_plink import read_plink
from ._parser_xldbse_scout import read_scout
from ._parser_xldbse_xlinkx import read_xlinkx
from ._parser_xldbse_custom import read_custom
from ._parser_xldbse_merox import read_merox
from ._parser_xldbse_msannika import read_msannika
from ._parser_xldbse_maxquant import read_maxquant
from ._parser_xldbse_xinet_xiview import read_xinet
from ._parser_xldbse_maxquant import read_maxlynx
from ._parser_xldbse_xinet_xiview import read_xiview
# UTILITY
from ._parser_xldbse_xi import detect_xi_filetype
from ._parser_xldbse_xi import parse_peptide
from ._parser_xldbse_xi import parse_modifications_from_xi_sequence
from ._parser_xldbse_mzid import parse_scan_nr_from_mzid
from ._parser_xldbse_plink import parse_scan_nr_from_plink
from ._parser_xldbse_plink import parse_spectrum_file_from_plink
from ._parser_xldbse_plink import detect_plink_filetype
from ._parser_xldbse_scout import detect_scout_filetype
from ._parser_xldbse_scout import parse_modifications_from_scout_sequence
from ._parser_xldbse_custom import pyxlms_modification_str_parser
from ._parser_xldbse_maxquant import parse_modifications_from_maxquant_sequence
# UTIL
from ._util import format_sequence
from ._util import get_bool_from_value
from ..data._parser_result import ParserResult
from typing import BinaryIO
from typing import List
# legacy
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
[docs]
def read(
files: str | List[str] | BinaryIO,
engine: Literal[
"Custom",
"MaxQuant",
"MaxLynx",
"MeroX",
"MS Annika",
"mzIdentML",
"pLink",
"Scout",
"xiSearch/xiFDR",
"xiNET/xiVIEW",
"XlinkX",
],
crosslinker: str,
parse_modifications: bool = True,
ignore_errors: bool = False,
verbose: Literal[0, 1, 2] = 1,
**kwargs,
) -> ParserResult:
r"""Read a crosslink result file.
Reads a crosslink or crosslink-spectrum-match result file from any of the supported crosslink search engines or formats.
Currently supports results files from MaxLynx/MaxQuant, MeroX, MS Annika, pLink 2 and pLink 3, Scout, xiSearch and xiFDR,
xiNET and xiVIEW,
XlinkX, and the mzIdentML format. Additionally supports parsing from custom ``.csv`` files in pyXLMS format, see more
about the custom format in ``parser.read_custom()`` and in here:
`docs <https://github.com/hgb-bin-proteomics/pyXLMS/blob/master/docs/format.md>`_.
Parameters
----------
files : str, list of str, or file stream
The name/path of the result file(s) or a file-like object/stream.
engine : "Custom", "MaxQuant", "MaxLynx", "MeroX", "MS Annika", "mzIdentML", "pLink", "Scout", "xiSearch/xiFDR", "xiNET/xiVIEW", or "XlinkX"
Crosslink search engine or format of the result file.
crosslinker : str
Name of the used cross-linking reagent, for example "DSSO".
parse_modifications : bool, default = True
Whether or not post-translational-modifications should be parsed for crosslink-spectrum-matches.
Requires correct specification of the 'modifications' parameter for every parser. Defaults are selected
for every parser if 'modifications' is not passed via ``**kwargs``.
ignore_errors : bool, default = False
Ignore errors when mapping modifications. Used in ``parser.read_xi()`` and ``parser.read_xlinkx()``.
verbose : 0, 1, or 2, default = 1
- 0: All warnings are ignored.
- 1: Warnings are printed to stdout.
- 2: Warnings are treated as errors.
**kwargs
Any additional parameters will be passed to the specific parsers.
Returns
-------
ParserResult
The ``parser_result`` object containing all parsed information.
Raises
------
ValueError
If the value entered for parameter ``engine`` is not supported.
Examples
--------
>>> from pyXLMS.parser import read
>>> csms_from_xiSearch = read(
... "data/xi/r1_Xi1.7.6.7.csv", engine="xiSearch/xiFDR", crosslinker="DSS"
... )
>>> from pyXLMS.parser import read
>>> csms_from_MaxQuant = read(
... "data/maxquant/run1/crosslinkMsms.txt", engine="MaxQuant", crosslinker="DSS"
... )
"""
supported = [
"Custom",
"MaxQuant",
"MaxLynx",
"MeroX",
"MS Annika",
"mzIdentML",
"pLink",
"Scout",
"xiSearch/xiFDR",
"xiNET/xiVIEW",
"XlinkX",
]
ff = engine.lower().strip()
if ff in ["custom", "pyxlms"]:
return read_custom(files, parse_modifications=parse_modifications, **kwargs)
if ff in ["maxquant", "max quant"]:
return read_maxquant(
files,
crosslinker=crosslinker,
parse_modifications=parse_modifications,
**kwargs,
)
if ff in ["maxlynx", "max lynx"]:
return read_maxlynx(
files,
crosslinker=crosslinker,
parse_modifications=parse_modifications,
**kwargs,
)
if ff in ["merox", "stavrox"]:
return read_merox(
files,
crosslinker=crosslinker,
parse_modifications=parse_modifications,
**kwargs,
)
if ff in ["ms annika", "msannika"]:
return read_msannika(
files, parse_modifications=parse_modifications, verbose=verbose, **kwargs
)
if ff in ["mzidentml", "mzid"]:
return read_mzid(files, verbose=verbose, **kwargs)
if ff in ["plink", "plink2", "plink3", "plink 2", "plink 3"]:
return read_plink(
files, parse_modifications=parse_modifications, verbose=verbose, **kwargs
)
if ff in ["scout"]:
return read_scout(
files,
crosslinker=crosslinker,
parse_modifications=parse_modifications,
verbose=verbose,
**kwargs,
)
if ff in ["xisearch/xifdr", "xisearch", "xifdr", "xi search", "xi fdr", "xi"]:
return read_xi(
files,
parse_modifications=parse_modifications,
ignore_errors=ignore_errors,
verbose=verbose,
**kwargs,
)
if ff in ["xinet/xiview", "xinet", "xiview", "xi net", "xi view"]:
return read_xinet(
files,
verbose=verbose,
**kwargs,
)
if ff in ["xlinkx", "x link x"]:
return read_xlinkx(
files,
parse_modifications=parse_modifications,
ignore_errors=ignore_errors,
verbose=verbose,
**kwargs,
)
err_str = (
f"{engine} is not a supported crosslink search engine or format! Valid options are:\n"
+ ", ".join(supported)
)
raise ValueError(err_str)
return ParserResult(search_engine="error")