#!/usr/bin/env python3
# 2026 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com
from __future__ import annotations
import os
import json
from pydantic import ValidationError
from ..data._csm import CrosslinkSpectrumMatch
from ..data._crosslink import Crosslink
from ..data._parser_result import ParserResult
from ..data._util import check_input
from ..data._util import check_input_multi
from ._util import assert_csms_or_xls
from typing import Optional
from typing import BinaryIO
from typing import List
from typing import Dict
from typing import Any
[docs]
def to_json(
data: List[CrosslinkSpectrumMatch] | List[Crosslink] | ParserResult,
output_file: Optional[str | BinaryIO] = None,
ensure_ascii: bool = False,
indent: Optional[int | str] = 4,
**kwargs,
) -> str:
r"""Serialize pyXLMS objects to JSON.
Serializes a list of crosslink-spectrum-matches, a list of crosslinks, or a parser_result
to JSON.
Parameters
----------
data : list of CrosslinkSpectrumMatch, list of Crosslink, or ParserResult
The list of crosslink-spectrum-matches, list of crosslinks, or parser_result to be
serialized to JSON.
output_file : str, file stream, or None, default = None
If given the JSON string will be written to the specified file. Defaults to ``None``
which does not write anything to file.
ensure_ascii : bool, default = False
If ``True``, the output is guaranteed to have all incoming non-ASCII and non-printable characters
escaped. If ``False`` (the default), all characters will be outputted as-is, except for the characters
that must be escaped: quotation mark, reverse solidus, and the control characters ``U+0000`` through ``U+001F``.
indent : int, str, or None, default = 4
If a positive integer or string, JSON array elements and object members will be pretty-printed with that indent level.
A positive integer indents that many spaces per level; a string (such as ``"\t"``) is used to indent each level. If zero,
negative, or ``""`` (the empty string), only newlines are inserted. If ``None``, no newlines are inserted.
**kwargs
Any additional parameters will be passed to ``json.dump()`` and ``json.dumps()``.
Returns
-------
str
The JSON string representation of the input data.
Notes
-----
To serialize individual CrosslinkSpectrumMatch or Crosslink objects please use
`model_dump_json <https://pydantic.dev/docs/validation/latest/api/pydantic/base_model/#pydantic.BaseModel.model_dump_json>`_.
Examples
--------
>>> from pyXLMS.parser import read
>>> from pyXLMS.transform import to_json, from_json
>>> pr = read(
... "data/ms_annika/XLpeplib_Beveridge_QEx-HFX_DSS_R1.pdResult",
... engine="MS Annika",
... crosslinker="DSS",
... )
>>> pr.display()
Data Type: parser_result
Completeness: full
Identifying Search Engine: MS Annika
Number of Crosslink-Spectrum-Matches: 826
Number of Crosslinks: 300
>>> json_data_pr = to_json(pr)
>>> json_data_csms = to_json(pr.csms())
>>> json_data_xls = to_json(pr.xls(), output_file="xls.json")
>>> pr = from_json(json_data_pr)
>>> type(pr)
<class 'pyXLMS.data._parser_result.ParserResult'>
>>> csms = from_json(json_data_csms)
>>> len(csms)
826
>>> type(csms[0])
<class 'pyXLMS.data._csm.CrosslinkSpectrumMatch'>
>>> xls = from_json("xls.json")
>>> len(xls)
300
>>> type(xls[0])
<class 'pyXLMS.data._crosslink.Crosslink'>
"""
_ok = check_input_multi(data, "data", [list, ParserResult])
_ok = check_input(ensure_ascii, "ensure_ascii", bool)
json_data: List[Dict[str, Any]] | Dict[str, Any] | None = list()
if isinstance(data, list):
csms_or_xls = assert_csms_or_xls(data)
for item in csms_or_xls:
json_data.append(item.model_dump(mode="python"))
else:
json_data = data.model_dump(mode="python")
if output_file is not None:
if isinstance(output_file, str):
with open(output_file, "w", encoding="utf-8") as f:
json.dump(
json_data, f, ensure_ascii=ensure_ascii, indent=indent, **kwargs
)
else:
json.dump(
json_data,
output_file, # ty: ignore[invalid-argument-type]
ensure_ascii=ensure_ascii,
indent=indent,
**kwargs,
)
return json.dumps(json_data, ensure_ascii=ensure_ascii, indent=indent, **kwargs)
[docs]
def from_json(
json_input: str | BinaryIO,
**kwargs,
) -> List[CrosslinkSpectrumMatch] | List[Crosslink] | ParserResult:
r"""Deserialize JSON to pyXLMS objects.
Deserializes JSON to a list of crosslink-spectrum-matches, a list of crosslinks,
or a parser_result.
Parameters
----------
json_input : str, or file stream
The JSON data to be deserialized to pyXLMS objects. Can be a JSON string, a file
path, or an open file stream. If a string is provided the function checks if a file
with that name exists and reads from the file if it exists and otherwise treats the
string as a JSON object.
**kwargs
Any additional parameters will be passed to ``json.load()`` and ``json.loads()``.
Returns
-------
list of CrosslinkSpectrumMatch, list of Crosslink, or ParserResult
The parsed pyXLMS object.
Raises
------
ValueError
If the JSON data could not be parsed into (a) valid pyXLMS object(s).
Notes
-----
To deserialize individual CrosslinkSpectrumMatch or Crosslink objects please use
`model_validate_json <https://pydantic.dev/docs/validation/latest/api/pydantic/base_model/#pydantic.BaseModel.model_validate_json>`_.
Examples
--------
>>> from pyXLMS.parser import read
>>> from pyXLMS.transform import to_json, from_json
>>> pr = read(
... "data/ms_annika/XLpeplib_Beveridge_QEx-HFX_DSS_R1.pdResult",
... engine="MS Annika",
... crosslinker="DSS",
... )
>>> pr.display()
Data Type: parser_result
Completeness: full
Identifying Search Engine: MS Annika
Number of Crosslink-Spectrum-Matches: 826
Number of Crosslinks: 300
>>> json_data_pr = to_json(pr)
>>> json_data_csms = to_json(pr.csms())
>>> json_data_xls = to_json(pr.xls(), output_file="xls.json")
>>> pr = from_json(json_data_pr)
>>> type(pr)
<class 'pyXLMS.data._parser_result.ParserResult'>
>>> csms = from_json(json_data_csms)
>>> len(csms)
826
>>> type(csms[0])
<class 'pyXLMS.data._csm.CrosslinkSpectrumMatch'>
>>> xls = from_json("xls.json")
>>> len(xls)
300
>>> type(xls[0])
<class 'pyXLMS.data._crosslink.Crosslink'>
"""
json_data: List[Dict[str, Any]] | Dict[str, Any] | None = None
if isinstance(json_input, str):
if os.path.isfile(json_input):
with open(json_input, "r", encoding="utf-8") as f:
json_data = json.load(f, **kwargs)
else:
json_data = json.loads(json_input, **kwargs)
else:
json_input.seek(0)
json_data = json.load(json_input, **kwargs)
if json_data is None:
raise ValueError("Could not parse JSON data into a valid pyXLMS object!")
if isinstance(json_data, list):
maybe_csms_or_xls = list()
for item in json_data:
maybe_csm_or_xl: CrosslinkSpectrumMatch | Crosslink | None = None
try:
maybe_csm_or_xl = CrosslinkSpectrumMatch.model_validate(
item, strict=False
)
except ValidationError as _e:
maybe_csm_or_xl = Crosslink.model_validate(item, strict=False)
if maybe_csm_or_xl is None:
raise ValueError(
"Could not parse JSON data into a valid pyXLMS object!"
)
maybe_csms_or_xls.append(maybe_csm_or_xl)
return assert_csms_or_xls(maybe_csms_or_xls)
return ParserResult.model_validate(json_data, strict=False)