Source code for pyXLMS.data._util

#!/usr/bin/env python3

# 2024 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com

from __future__ import annotations

import pandas as pd

from typing import Optional
from typing import Dict
from typing import List
from typing import Tuple
from typing import Any


[docs] def check_input( parameter: Any, parameter_name: str, supported_class: Any, supported_subclass: Optional[Any] = None, ) -> bool: r"""Checks if the given parameter is of the specified type. Function that checks if a given parameter is of the specified type and if iterable, all elements are of the specified element type. This is mostly an input check function to catch any errors arising from not supported inputs early. Parameters ---------- parameter : any Parameter to check class of. parameter_name : str Name of the parameter. supported_class : any Class the parameter has to be of. supported_subclass : any, or None, default = None Class of the values in case the parameter is a list or dict. Returns ------- bool If the given input is okay. Raises ------ TypeError If the parameter is not of the given class. Examples -------- >>> from pyXLMS.data import check_input >>> check_input("PEPTIDE", "peptide_a", str) True >>> from pyXLMS.data import check_input >>> check_input([1, 2], "xl_position_proteins_a", list, int) True """ if not isinstance(parameter, supported_class): raise TypeError(f"{parameter_name} must be {supported_class}!") if isinstance(parameter, list) and supported_subclass is not None: for value in parameter: if not isinstance(value, supported_subclass): raise TypeError( f"List values of {parameter_name} must be {supported_subclass}!" ) if isinstance(parameter, dict) and supported_subclass is not None: for key in parameter: if not isinstance(parameter[key], supported_subclass): raise TypeError( f"Dict values of {parameter_name} must be {supported_subclass}!" ) return True
[docs] def check_input_multi( parameter: Any, parameter_name: str, supported_classes: List[Any], supported_subclass: Optional[Any] = None, ) -> bool: r"""Checks if the given parameter is of one of the specified types. Function that checks if a given parameter is of one of the specified types and if iterable, all elements are of the specified element type. This is mostly an input check function to catch any errors arising from not supported inputs early. Parameters ---------- parameter : any Parameter to check class of. parameter_name : str Name of the parameter. supported_classes : list of any Classes the parameter has to be of. supported_subclass : any, or None, default = None Class of the values in case the parameter is a list or dict. Returns ------- bool If the given input is okay. Raises ------ TypeError If the parameter is not of one of the given classes. Examples -------- >>> from pyXLMS.data import check_input_multi >>> check_input_multi("PEPTIDE", "peptide_a", [str, list]) True """ if not isinstance(parameter, tuple(supported_classes)): raise TypeError( f"{parameter_name} must be one of {','.join([str(c) for c in supported_classes])}!" ) if isinstance(parameter, list) and supported_subclass is not None: for value in parameter: if not isinstance(value, supported_subclass): raise TypeError( f"List values of {parameter_name} must be {supported_subclass}!" ) if isinstance(parameter, dict) and supported_subclass is not None: for key in parameter: if not isinstance(parameter[key], supported_subclass): raise TypeError( f"Dict values of {parameter_name} must be {supported_subclass}!" ) return True
[docs] def check_indexing(value: int | List[int]) -> bool: r"""Checks that the given value is not 0-based. Parameters ---------- value : int, or list of int The value(s) to check. Returns ------- bool If the given value(s) is/are okay. Raises ------ ValueError If any of the values are smaller than one. Examples -------- >>> from pyXLMS.data import check_indexing >>> check_indexing([1, 2, 3]) True """ check_input_multi(value, "value", [int, list], int) if isinstance(value, int): if value < 1: raise ValueError( "0-based value found! All positions must use 1-based indexing!" ) else: for val in value: if val < 1: raise ValueError( "0-based value found! All positions must use 1-based indexing!" ) return True
def __get_modified_peptide( sequence: str, modifications: Optional[Dict[int, Tuple[str, float]]], crosslink_position: int, crosslinker: Optional[str | float], ) -> str: r"""Returns the Proforma string for a single peptide. Parameters ---------- sequence : str The unmodified peptide sequence. modifications : dict of int, tuple of str and float The pyXLMS specific modifications object. See ``data.create_csm()`` for reference. crosslink_position : int Crosslink position in the peptide sequence (1-based). crosslinker : str, or float, or None Optional name or mass of the crosslink reagent. If the name is given, it should be a valid name from XLMOD. Returns ------- str The Proforma string of the peptidoform. Notes ----- - This function should not be called directly, it is called from ``__to_proforma_csm()`` and ``__to_proforma_xl``. - Modifications with unknown mass are skipped. - If no modifications are given, only the crosslink modification will be encoded in the Proforma. - If no modifications are given and no crosslinker is given, the unmodified peptide Proforma will be returned. """ if isinstance(crosslinker, float): crosslinker = f"+{crosslinker}" if crosslinker > 0.0 else f"{crosslinker}" pep_len = len(sequence) if modifications is not None: new_modifications = dict() for pos, mod in modifications.items(): if not pd.isna(mod[1]): new_modifications[pos] = ( mod[0], f"+{mod[1]}" if mod[1] > 0.0 else f"{mod[1]}", ) if crosslink_position not in new_modifications and crosslinker is not None: new_modifications[crosslink_position] = ("", f"{crosslinker}") for pos in sorted(new_modifications.keys(), reverse=True): if pos == 0: sequence = f"[{new_modifications[pos][1]}]-" + sequence elif pos == pep_len + 1: sequence = sequence + f"-[{new_modifications[pos][1]}]" else: sequence = ( sequence[:pos] + f"[{new_modifications[pos][1]}]" + sequence[pos:] ) return sequence if crosslinker is not None: sequence = ( sequence[:crosslink_position] + f"[{crosslinker}]" + sequence[crosslink_position:] ) return sequence return sequence