Source code for pyXLMS.parser.util
#!/usr/bin/env python3
# 2025 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com
from __future__ import annotations
import warnings
from ..constants import AMINO_ACIDS
from typing import Any
[docs]
def format_sequence(
sequence: str, remove_non_aa: bool = True, remove_lower: bool = True
) -> str:
r"""Formats the given amino acid sequence into common represenation.
The given amino acid sequence is re-formatted by converting all amino acids to upper case and optionally removing non-encoding and
lower case characters.
Parameters
----------
sequence : str
The amino acid sequence that should be formatted. Post-translational-modifications can be included in lower case but will
be removed.
remove_non_aa : bool, default = True
Whether or not to remove characters that do not encode amino acids.
remove_lower : bool, default = True
Whether or not to remove lower case characters, this should be true if the amino acid sequence encodes post-translational-modifications
in lower case.
Returns
-------
str
The formatted sequence.
Examples
--------
>>> from pyXLMS.parser_util import format_sequence
>>> format_sequence("PEP[K]TIDE")
'PEPKTIDE'
>>> from pyXLMS.parser_util import format_sequence
>>> format_sequence("PEPKdssoTIDE")
'PEPKTIDE'
>>> from pyXLMS.parser_util import format_sequence
>>> format_sequence("peptide", remove_lower = False)
'PEPTIDE'
"""
fmt_seq = ""
for aa in str(sequence).strip():
if aa.isupper():
if aa not in AMINO_ACIDS:
if remove_non_aa:
continue
else:
warnings.warn(
f"The sequence {sequence} contains non-valid characters.",
RuntimeWarning,
)
fmt_seq += aa
elif remove_lower:
continue
else:
if aa.upper() not in AMINO_ACIDS:
if remove_non_aa:
continue
else:
warnings.warn(
f"The sequence {sequence} contains non-valid characters.",
RuntimeWarning,
)
fmt_seq += aa.upper()
return fmt_seq
[docs]
def get_bool_from_value(value: Any) -> bool:
r"""Parse a bool value from the given input.
Tries to parse a boolean value from the given input object. If the object is of instance ``bool`` it will return the object, if it is of
instance ``int`` it will return ``True`` if the object is ``1`` or ``False`` if the object is ``0``, any other number will raise a
``ValueError``. If the object is of instance ``str`` it will return ``True`` if the lower case version contains the letter ``t`` and
otherwise ``False``. If the object is none of these types a ``ValueError`` will be raised.
Parameters
----------
value: Any
The value to parse from.
Returns
-------
bool
The parsed boolean value.
Raises
------
ValueError
If the object could not be parsed to bool.
Examples
--------
>>> from pyXLMS.parser_util import get_bool_from_value
>>> get_bool_from_value(0)
False
>>> from pyXLMS.parser_util import get_bool_from_value
>>> get_bool_from_value("T")
True
"""
if isinstance(value, bool):
return value
elif isinstance(value, int):
if value in [0, 1]:
return bool(value)
else:
raise ValueError(f"Cannot parse bool value from the given input {value}.")
elif isinstance(value, str):
return "t" in value.lower()
else:
raise ValueError(f"Cannot parse bool value from the given input {value}.")
return False