Source code for pyXLMS.transform._to_proforma
#!/usr/bin/env python3
# 2025 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com
from __future__ import annotations
from ..data._csm import CrosslinkSpectrumMatch
from ..data._crosslink import Crosslink
from ..data._util import check_input_multi
from ..data._util import __get_modified_peptide as __gmp
from ._util import assert_csms_or_xls
from typing import Optional
from typing import Dict
from typing import List
from typing import Tuple
# this is just kept here for legacy purposes
def __get_modified_peptide(
sequence: str,
modifications: Optional[Dict[int, Tuple[str, float]]],
crosslink_position: int,
crosslinker: Optional[str | float],
) -> str:
r"""Returns the Proforma string for a single peptide.
Parameters
----------
sequence : str
The unmodified peptide sequence.
modifications : dict of int, tuple of str and float
The pyXLMS specific modifications object. See ``data.create_csm()`` for reference.
crosslink_position : int
Crosslink position in the peptide sequence (1-based).
crosslinker : str, or float, or None
Optional name or mass of the crosslink reagent. If the name is given, it should be a valid
name from XLMOD.
Returns
-------
str
The Proforma string of the peptidoform.
Notes
-----
- This function should not be called directly, it is called from ``__to_proforma_csm()`` and ``__to_proforma_xl``.
- Modifications with unknown mass are skipped.
- If no modifications are given, only the crosslink modification will be encoded in the Proforma.
- If no modifications are given and no crosslinker is given, the unmodified peptide Proforma will be returned.
"""
return __gmp(sequence, modifications, crosslink_position, crosslinker)
# this is just kept here for legacy purposes
def __to_proforma_csm(
csm: CrosslinkSpectrumMatch, crosslinker: Optional[str | float]
) -> str:
r"""Returns the Proforma string for a single crosslink-spectrum-match.
Parameters
----------
csm : dict of str, any
A pyXLMS crosslink-spectrum-match object. See ``data.create_csm()``.
crosslinker : str, or float, or None
Optional name or mass of the crosslink reagent. If the name is given, it should be a valid
name from XLMOD.
Returns
-------
str
The Proforma string of the crosslink-spectrum-match.
Notes
-----
- This function should not be called directly, it is called from ``to_proforma()``.
- Modifications with unknown mass are skipped.
- If no modifications are given, only the crosslink modification will be encoded in the Proforma.
- If no modifications are given and no crosslinker is given, the unmodified peptide Proforma will be returned.
"""
return csm.to_proforma(crosslinker)
# this is just kept here for legacy purposes
def __to_proforma_xl(xl: Crosslink, crosslinker: Optional[str | float]) -> str:
r"""Returns the Proforma string for a single crosslink.
Parameters
----------
xl : dict of str, any
A pyXLMS crosslink object. See ``data.create_crosslink()``.
crosslinker : str, or float, or None
Optional name or mass of the crosslink reagent. If the name is given, it should be a valid
name from XLMOD.
Returns
-------
str
The Proforma string of the crosslink.
Notes
-----
- This function should not be called directly, it is called from ``to_proforma()``.
- Modifications with unknown mass are skipped.
- If no modifications are given, only the crosslink modification will be encoded in the Proforma.
- If no modifications are given and no crosslinker is given, the unmodified peptide Proforma will be returned.
"""
return xl.to_proforma(crosslinker)
[docs]
def to_proforma(
data: CrosslinkSpectrumMatch
| Crosslink
| List[CrosslinkSpectrumMatch]
| List[Crosslink],
crosslinker: Optional[str | float] = None,
) -> str | List[str]:
r"""Returns the Proforma string for a single crosslink or crosslink-spectrum-match, or for
a list of crosslinks or crosslink-spectrum-matches.
Parameters
----------
data : CrosslinkSpectrumMatch, Crosslink, list of CrosslinkSpectrumMatch, or list of Crosslink
A pyXLMS crosslink object, e.g. see ``data.create_crosslink()``. Or a pyXLMS crosslink-spectrum-match
object, e.g. see ``data.create_csm()``. Alternatively, a list of crosslinks or crosslink-spectrum-matches
can also be provided.
crosslinker : str, or float, or None, default = None
Optional name or mass of the crosslink reagent. If the name is given, it should be a valid
name from XLMOD. If the crosslink modification is contained in the crosslink-spectrum-match object
this parameter has no effect.
Returns
-------
str, or list of str
The Proforma string of the crosslink or crosslink-spectrum-match. If a list was provided
a list containing all Proforma strings is returned.
Raises
------
TypeError
If an unsupported data type is provided.
Notes
-----
- Modifications with unknown mass are skipped.
- If no modifications are given, only the crosslink modification will be encoded in the Proforma.
- If no modifications are given and no crosslinker is given, the unmodified peptide Proforma will be returned.
Examples
--------
>>> from pyXLMS.data import create_crosslink_min
>>> from pyXLMS.transform import to_proforma
>>> xl = create_crosslink_min("PEPKTIDE", 4, "KPEPTIDE", 1)
>>> to_proforma(xl)
'KPEPTIDE//PEPKTIDE'
>>> from pyXLMS.data import create_crosslink_min
>>> from pyXLMS.transform import to_proforma
>>> xl = create_crosslink_min("PEPKTIDE", 4, "KPEPTIDE", 1)
>>> to_proforma(xl, crosslinker="Xlink:DSSO")
'K[Xlink:DSSO]PEPTIDE//PEPK[Xlink:DSSO]TIDE'
>>> from pyXLMS.data import create_csm_min
>>> from pyXLMS.transform import to_proforma
>>> csm = create_csm_min("PEPKTIDE", 4, "KPEPTIDE", 1, "RUN_1", 1)
>>> to_proforma(csm)
'KPEPTIDE//PEPKTIDE'
>>> from pyXLMS.data import create_csm_min
>>> from pyXLMS.transform import to_proforma
>>> csm = create_csm_min("PEPKTIDE", 4, "KPEPTIDE", 1, "RUN_1", 1)
>>> to_proforma(csm, crosslinker="Xlink:DSSO")
'K[Xlink:DSSO]PEPTIDE//PEPK[Xlink:DSSO]TIDE'
>>> from pyXLMS.data import create_csm_min
>>> from pyXLMS.transform import to_proforma
>>> csm = create_csm_min(
... "PEPKTIDE",
... 4,
... "KPMEPTIDE",
... 1,
... "RUN_1",
... 1,
... modifications_b={3: ("Oxidation", 15.994915)},
... )
>>> to_proforma(csm, crosslinker="Xlink:DSSO")
'K[Xlink:DSSO]PM[+15.994915]EPTIDE//PEPK[Xlink:DSSO]TIDE'
>>> from pyXLMS.data import create_csm_min
>>> from pyXLMS.transform import to_proforma
>>> csm = create_csm_min(
... "PEPKTIDE",
... 4,
... "KPMEPTIDE",
... 1,
... "RUN_1",
... 1,
... modifications_b={3: ("Oxidation", 15.994915)},
... charge=3,
... )
>>> to_proforma(csm, crosslinker="Xlink:DSSO")
'K[Xlink:DSSO]PM[+15.994915]EPTIDE//PEPK[Xlink:DSSO]TIDE/3'
>>> from pyXLMS.data import create_csm_min
>>> from pyXLMS.transform import to_proforma
>>> csm = create_csm_min(
... "PEPKTIDE",
... 4,
... "KPMEPTIDE",
... 1,
... "RUN_1",
... 1,
... modifications_a={4: ("DSSO", 158.00376)},
... modifications_b={1: ("DSSO", 158.00376), 3: ("Oxidation", 15.994915)},
... charge=3,
... )
>>> to_proforma(csm)
'K[+158.00376]PM[+15.994915]EPTIDE//PEPK[+158.00376]TIDE/3'
>>> from pyXLMS.data import create_csm_min
>>> from pyXLMS.transform import to_proforma
>>> csm = create_csm_min(
... "PEPKTIDE",
... 4,
... "KPMEPTIDE",
... 1,
... "RUN_1",
... 1,
... modifications_a={4: ("DSSO", 158.00376)},
... modifications_b={1: ("DSSO", 158.00376), 3: ("Oxidation", 15.994915)},
... charge=3,
... )
>>> to_proforma(csm, crosslinker="Xlink:DSSO")
'K[+158.00376]PM[+15.994915]EPTIDE//PEPK[+158.00376]TIDE/3'
"""
_ok = check_input_multi(data, "data", [list, CrosslinkSpectrumMatch, Crosslink])
_ok = (
check_input_multi(crosslinker, "crosslinker", [str, float])
if crosslinker is not None
else True
)
if isinstance(data, list):
csms_or_xls = assert_csms_or_xls(data)
return [item.to_proforma(crosslinker) for item in csms_or_xls]
return data.to_proforma(crosslinker)