Source code for pyXLMS.data._parser_result

#!/usr/bin/env python3

# 2026 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com

from __future__ import annotations

import copy
from pydantic import BaseModel
from pydantic import Field
from pydantic import ConfigDict
from pydantic import computed_field

from ._csm import CrosslinkSpectrumMatch
from ._crosslink import Crosslink
from ._util import check_input

from typing import Annotated
from typing import Optional
from typing import List
from typing import Dict
from typing import Tuple
from typing import Any

# legacy
try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal



[docs]
class ParserResult(BaseModel):
    r"""Core data structure for parser results.

    Data structure returned by any (parser) function that reads crosslink-spectrum-matches
    and/or crosslinks.

    Attributes Summary
    ------------------
    Here is a short summary about the parser result attributes, for more details
    on the specific Pydantic validation requirements please refer to the corresponding attributes
    themselves.

    Required
    ^^^^^^^^
    The following attributes are required:

    search_engine : str
        The name of the identifying crosslink search engine.

    Optional
    ^^^^^^^^
    The following attributes are optional:

    crosslink_spectrum_matches : list of CrosslinkSpectrumMatch, or None, default = None
        List of parsed crosslink-spectrum-matches.
    crosslinks : list of Crosslink, or None, default = None
        List of parsed crosslinks.

    Examples
    --------
    >>> from pyXLMS.data import Crosslink
    >>> from pyXLMS.data import ParserResult
    >>> xl = Crosslink(
    ...     alpha_peptide="PEKP",
    ...     alpha_peptide_crosslink_position=3,
    ...     beta_peptide="TKIDE",
    ...     beta_peptide_crosslink_position=2,
    ... )
    >>> pr = ParserResult(search_engine="My Search Engine", crosslinks=[xl])
    """

    search_engine: Annotated[
        str,
        Field(
            frozen=True,
            description="The name of the identifying crosslink search engine.",
        ),
    ]
    r"""
    The name of the identifying crosslink search engine.
    """
    crosslink_spectrum_matches: Annotated[
        Optional[List[CrosslinkSpectrumMatch]],
        Field(frozen=True, description="List of parsed crosslink-spectrum-matches."),
    ] = None
    r"""
    List of parsed crosslink-spectrum-matches.
    """
    crosslinks: Annotated[
        Optional[List[Crosslink]],
        Field(frozen=True, description="List of parsed crosslinks."),
    ] = None
    r"""
    List of parsed crosslinks.
    """
    model_config = ConfigDict(
        validate_assignment=True, strict=True, str_strip_whitespace=True
    )
    r"""
    Pydantic configuration for the underlying validation model.
    """

    @computed_field(description="Data type of the object.")
    @property
    def data_type(self) -> Literal["parser_result"]:
        r"""
        Data type of the object.
        """
        return "parser_result"

    @computed_field(description="Completeness of the parser result.")
    @property
    def completeness(self) -> Literal["full", "partial", "empty"]:
        r"""
        Completeness of the parser result, e.g. ``"full"`` if all attributes
        are not ``None``, ``"empty"`` if crosslink-spectrum-matches and crosslinks
        are ``None``, and otherwise ``"partial"``.
        """
        if self.crosslink_spectrum_matches is not None and self.crosslinks is not None:
            return "full"
        if self.crosslink_spectrum_matches is None and self.crosslinks is None:
            return "empty"
        return "partial"

    def __getitem__(self, key: str) -> Any:
        r"""
        Support for dict-like access.
        """
        # this is for legacy support
        if key == "crosslink-spectrum-matches":
            return self.crosslink_spectrum_matches
        try:
            return getattr(self, key)
        except AttributeError:
            raise KeyError(f"'{key}' is not a valid field!")

    def __contains__(self, key: str) -> bool:
        r"""
        Support for ``in`` operator.
        """
        # this is for legacy support
        if key == "crosslink-spectrum-matches":
            return True
        return hasattr(self, key)


[docs]
    def items(self) -> List[Tuple[str, Any]]:
        r"""
        Support for dict-like read access for backward compatibility.

        Returns
        -------
        list of tuple of str, any
            Returns a list of tuples of attribute name, attribute value.

        Notes
        -----
        This internally just calls ``self.model_dump(mode="python").items()``.
        See `model_dump <https://pydantic.dev/docs/validation/latest/api/pydantic/base_model/#pydantic.BaseModel.model_dump>`_.
        """
        return self.model_dump(mode="python").items()



[docs]
    def keys(self) -> List[str]:
        r"""
        Support for dict-like read access for backward compatibility.

        Returns
        -------
        list of str
            Returns a list of attribute names.

        Notes
        -----
        This internally just calls ``self.model_dump(mode="python").keys()``.
        See `model_dump <https://pydantic.dev/docs/validation/latest/api/pydantic/base_model/#pydantic.BaseModel.model_dump>`_.
        """
        return self.model_dump(mode="python").keys()



[docs]
    def values(self) -> List[Any]:
        r"""
        Support for dict-like read access for backward compatibility.

        Returns
        -------
        list of any
            Returns a list of attribute values.

        Notes
        -----
        This internally just calls ``self.model_dump(mode="python").values()``.
        See `model_dump <https://pydantic.dev/docs/validation/latest/api/pydantic/base_model/#pydantic.BaseModel.model_dump>`_.
        """
        return self.model_dump(mode="python").values()



[docs]
    def copy_with_update(self, update: Dict[str, Any] = {}) -> ParserResult:
        r"""Creates a deep copy of the parser result with optional attribute updates.

        Parameters
        ----------
        update : dict of str, any, default = empty dict
            Dictionary mapping attribute names (str) to their updated values.
            The default (empty dict) will create a deep copy with the original
            attribute values.

        Returns
        -------
        ParserResult
            New parser result with optionally updated attributes.

        Examples
        --------
        >>> from pyXLMS.data import Crosslink
        >>> from pyXLMS.data import ParserResult
        >>> pr = ParserResult(search_engine="My Search Engine")
        >>> xl = Crosslink(
        ...     alpha_peptide="PEKP",
        ...     alpha_peptide_crosslink_position=3,
        ...     beta_peptide="TKIDE",
        ...     beta_peptide_crosslink_position=2,
        ... )
        >>> pr_copy = pr.copy_with_update(update={"crosslinks": [xl]})
        """
        _ok = check_input(update, "update", dict)
        if (
            "crosslink_spectrum_matches" in update
            and "crosslink-spectrum-matches" in update
        ):
            raise ValueError(
                "Dict 'update' must only contain key 'crosslink_spectrum_matches' "
                "or key 'crosslink-spectrum-matches' but not both!"
            )
        new_csms = copy.deepcopy(self.crosslink_spectrum_matches)
        if "crosslink_spectrum_matches" in update:
            new_csms = update["crosslink_spectrum_matches"]
        if "crosslink-spectrum-matches" in update:
            new_csms = update["crosslink-spectrum-matches"]
        return ParserResult(
            search_engine=self.search_engine
            if "search_engine" not in update
            else update["search_engine"],
            crosslink_spectrum_matches=new_csms,
            crosslinks=copy.deepcopy(self.crosslinks)
            if "crosslinks" not in update
            else update["crosslinks"],
        )



[docs]
    def csms(self, create_copy: bool = True) -> List[CrosslinkSpectrumMatch] | None:
        r"""Shorthand function to retrieve crosslink-spectrum-matches.

        Parameters
        ----------
        create_copy : bool, default = True
            Whether a deep copy of the crosslink-spectrum-matches should be returned
            (default) or ``self.crosslink_spectrum_matches`` directly.

        Returns
        -------
        list of CrosslinkSpectrumMatch, or None
            Returns (a deep copy of) ``self.crosslink_spectrum_matches``.

        Notes
        -----
        Please be aware that by default this explicitly creates a deep copy of the
        underlying data!
        """
        if create_copy:
            return copy.deepcopy(self.crosslink_spectrum_matches)
        return self.crosslink_spectrum_matches



[docs]
    def xls(self, create_copy: bool = True) -> List[Crosslink] | None:
        r"""Shorthand function to retrieve crosslinks.

        Parameters
        ----------
        create_copy : bool, default = True
            Whether a deep copy of the crosslinks should be returned (default) or
            ``self.crosslinks`` directly.

        Returns
        -------
        list of Crosslink, or None
            Returns (a deep copy of) ``self.crosslinks``.

        Notes
        -----
        Please be aware that by default this explicitly creates a deep copy of the
        underlying data!
        """
        if create_copy:
            return copy.deepcopy(self.crosslinks)
        return self.crosslinks



[docs]
    def display(
        self,
        show_additional_information: bool = False,
        return_str: bool = False,
    ) -> None | str:
        r"""Pretty prints the parser result.

        Parameters
        ----------
        show_additional_information : bool, default = False
            Also display data in the ``additional_information``.
        return_str : bool, default = False
            If the display string should be returned.

        Returns
        -------
        None, or str
            The display string of the parser result if ``return_str = True`` otherwise None.

        Examples
        --------
        >>> from pyXLMS import parser
        >>> pr = parser.read(
        ...     "data/ms_annika/XLpeplib_Beveridge_QEx-HFX_DSS_R1.pdResult",
        ...     engine="MS Annika",
        ...     crosslinker="DSS",
        ... )
        >>> pr.display()
        Data Type:                            parser_result
        Completeness:                         full
        Identifying Search Engine:            MS Annika
        Number of Crosslink-Spectrum-Matches: 826
        Number of Crosslinks:                 300
        """
        _ok = check_input(
            show_additional_information, "show_additional_information", bool
        )
        _ok = check_input(return_str, "return_str", bool)
        display: str = ""
        csms = self.crosslink_spectrum_matches
        xls = self.crosslinks
        display += f"Data Type:                            {self.data_type}\n"
        display += f"Completeness:                         {self.completeness}\n"
        display += f"Identifying Search Engine:            {self.search_engine}\n"
        display += f"Number of Crosslink-Spectrum-Matches: {len(csms) if csms is not None else None}\n"
        display += f"Number of Crosslinks:                 {len(xls) if xls is not None else None}\n"
        display = display.strip()
        print(display)
        if return_str:
            return display
        return





[docs]
def create_parser_result(
    search_engine: str,
    csms: Optional[List[CrosslinkSpectrumMatch]] = None,
    crosslinks: Optional[List[Crosslink]] = None,
) -> ParserResult:
    r"""Creates a parser result data structure.

    Contains all necessary data elements that should be contained in a result returned by a crosslink search engine result parser.

    Parameters
    ----------
    search_engine : str
        Name of the identifying crosslink search engine.
    csms : list of dict, or None, default = None
        List of crosslink-spectrum-matches as created by ``data.create_csm()``.
    crosslinks : list of dict, or None, default = None
        List of crosslinks as created by ``data.create_crosslink()``.

    Returns
    -------
    dict
        The parser result data structure which is a dictionary with keys ``data_type``, ``completeness``, ``search_engine``, ``crosslink-spectrum-matches`` and
        ``crosslinks``.

    Examples
    --------
    >>> from pyXLMS.data import create_parser_result
    >>> result = create_parser_result("MS Annika", None, None)
    >>> result["data_type"]
    'parser_result'
    >>> result["completeness"]
    'empty'
    >>> result["search_engine"]
    'MS Annika'
    """
    return ParserResult(
        search_engine=search_engine,
        crosslink_spectrum_matches=csms,
        crosslinks=crosslinks,
    )