Source code for pyXLMS.parser.util

#!/usr/bin/env python3

# 2025 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com

from __future__ import annotations

import warnings

from ..constants import AMINO_ACIDS

from typing import Any



[docs]
def format_sequence(
    sequence: str, remove_non_aa: bool = True, remove_lower: bool = True
) -> str:
    r"""Formats the given amino acid sequence into common represenation.

    The given amino acid sequence is re-formatted by converting all amino acids to upper case and optionally removing non-encoding and
    lower case characters.

    Parameters
    ----------
    sequence : str
        The amino acid sequence that should be formatted. Post-translational-modifications can be included in lower case but will
        be removed.
    remove_non_aa : bool, default = True
        Whether or not to remove characters that do not encode amino acids.
    remove_lower : bool, default = True
        Whether or not to remove lower case characters, this should be true if the amino acid sequence encodes post-translational-modifications
        in lower case.

    Returns
    -------
    str
        The formatted sequence.

    Examples
    --------
    >>> from pyXLMS.parser_util import format_sequence
    >>> format_sequence("PEP[K]TIDE")
    'PEPKTIDE'

    >>> from pyXLMS.parser_util import format_sequence
    >>> format_sequence("PEPKdssoTIDE")
    'PEPKTIDE'

    >>> from pyXLMS.parser_util import format_sequence
    >>> format_sequence("peptide", remove_lower = False)
    'PEPTIDE'
    """
    fmt_seq = ""
    for aa in str(sequence).strip():
        if aa.isupper():
            if aa not in AMINO_ACIDS:
                if remove_non_aa:
                    continue
                else:
                    warnings.warn(
                        f"The sequence {sequence} contains non-valid characters.",
                        RuntimeWarning,
                    )
            fmt_seq += aa
        elif remove_lower:
            continue
        else:
            if aa.upper() not in AMINO_ACIDS:
                if remove_non_aa:
                    continue
                else:
                    warnings.warn(
                        f"The sequence {sequence} contains non-valid characters.",
                        RuntimeWarning,
                    )
            fmt_seq += aa.upper()
    return fmt_seq




[docs]
def get_bool_from_value(value: Any) -> bool:
    r"""Parse a bool value from the given input.

    Tries to parse a boolean value from the given input object. If the object is of instance ``bool`` it will return the object, if it is of
    instance ``int`` it will return ``True`` if the object is ``1`` or ``False`` if the object is ``0``, any other number will raise a
    ``ValueError``. If the object is of instance ``str`` it will return ``True`` if the lower case version contains the letter ``t`` and
    otherwise ``False``. If the object is none of these types a ``ValueError`` will be raised.

    Parameters
    ----------
    value: Any
        The value to parse from.

    Returns
    -------
    bool
        The parsed boolean value.

    Raises
    ------
    ValueError
        If the object could not be parsed to bool.

    Examples
    --------
    >>> from pyXLMS.parser_util import get_bool_from_value
    >>> get_bool_from_value(0)
    False

    >>> from pyXLMS.parser_util import get_bool_from_value
    >>> get_bool_from_value("T")
    True
    """
    if isinstance(value, bool):
        return value
    elif isinstance(value, int):
        if value in [0, 1]:
            return bool(value)
        else:
            raise ValueError(f"Cannot parse bool value from the given input {value}.")
    elif isinstance(value, str):
        return "t" in value.lower()
    else:
        raise ValueError(f"Cannot parse bool value from the given input {value}.")
    return False