#!/usr/bin/env python3
# 2025 (c) Micha Johannes Birklbauer
# https://github.com/michabirklbauer/
# micha.birklbauer@gmail.com
from __future__ import annotations
import pandas as pd
from ..data._crosslink import Crosslink
from ..data._util import check_input
from ..transform._util import check_available_keys
from ._util import __get_filename
from typing import Optional
from typing import List
def __xls_to_xinet(
xls: List[Crosslink],
filename: Optional[str],
) -> pd.DataFrame:
r"""Exports crosslinks to xiNET format.
Parameters
----------
xls : list of Crosslink
A list of crosslinks.
filename : str, or None
If not None, the data will be written to a file with the specified filename.
Returns
-------
pd.DataFrame
A pandas DataFrame in xiNET format.
Notes
-----
This function should not be called directly, it is called from ``to_xinet()``.
"""
protein1 = list()
peppos1 = list()
pepseq1 = list()
linkpos1 = list()
protein2 = list()
peppos2 = list()
pepseq2 = list()
linkpos2 = list()
score = list()
id = list()
has_scores = True
for i, xl in enumerate(xls):
pos1 = xl["alpha_peptide_crosslink_position"]
protein1.append(";".join(xl["alpha_proteins"]))
peppos1.append(
";".join(
[
str(pos - pos1 + 1)
for pos in xl["alpha_proteins_crosslink_positions"]
]
)
)
pepseq1.append(xl["alpha_peptide"])
linkpos1.append(pos1)
pos2 = xl["beta_peptide_crosslink_position"]
protein2.append(";".join(xl["beta_proteins"]))
peppos2.append(
";".join(
[str(pos - pos2 + 1) for pos in xl["beta_proteins_crosslink_positions"]]
)
)
pepseq2.append(xl["beta_peptide"])
linkpos2.append(pos2)
if xl["score"] is not None:
score.append(xl["score"])
else:
has_scores = False
id.append(i + 1)
xinet_df = pd.DataFrame()
if has_scores:
xinet_df = pd.DataFrame(
{
"Protein1": protein1,
"PepPos1": peppos1,
"PepSeq1": pepseq1,
"LinkPos1": linkpos1,
"Protein2": protein2,
"PepPos2": peppos2,
"PepSeq2": pepseq2,
"LinkPos2": linkpos2,
"Score": score,
"Id": id,
}
)
else:
xinet_df = pd.DataFrame(
{
"Protein1": protein1,
"PepPos1": peppos1,
"PepSeq1": pepseq1,
"LinkPos1": linkpos1,
"Protein2": protein2,
"PepPos2": peppos2,
"PepSeq2": pepseq2,
"LinkPos2": linkpos2,
"Id": id,
}
)
if filename is not None:
xinet_df.to_csv(__get_filename(filename, "csv"), index=False)
return xinet_df
[docs]
def to_xinet(
crosslinks: List[Crosslink],
filename: Optional[str],
) -> pd.DataFrame:
r"""Exports a list of crosslinks to xiNET format.
Exports a list of crosslinks to xiNET format. The tool xiNET is accessible
via the link
`crosslinkviewer.org <https://crosslinkviewer.org/>`_.
Requires that ``alpha_proteins``, ``beta_proteins``, ``alpha_proteins_crosslink_positions`` and
``beta_proteins_crosslink_positions`` fields are set for all crosslinks.
Parameters
----------
crosslinks : list of Crosslink
A list of crosslinks.
filename : str, or None
If not None, the exported data will be written to a file with the specified filename.
Returns
-------
pd.DataFrame
A pandas DataFrame containing crosslinks in xiNET format.
Raises
------
TypeError
If a wrong data type is provided.
TypeError
If 'crosslinks' parameter contains elements of mixed data type.
ValueError
If the provided 'crosslinks' parameter contains no elements.
RuntimeError
If not all of the required information is present in the input data.
Notes
-----
The optional ``Score`` column in the xiNET table will only be available if all crosslinks have assigned scores.
Examples
--------
>>> from pyXLMS.exporter import to_xinet
>>> from pyXLMS.parser import read
>>> from pyXLMS.transform import targets_only
>>> from pyXLMS.transform import filter_proteins
>>> pr = read(
... "data/ms_annika/XLpeplib_Beveridge_QEx-HFX_DSS_R1_Crosslinks.xlsx",
... engine="MS Annika",
... crosslinker="DSS",
... )
>>> crosslinks = targets_only(pr)["crosslinks"]
>>> cas9 = filter_proteins(crosslinks, proteins=["Cas9"])["Both"]
>>> to_xinet(cas9, filename="crosslinks_xiNET.csv")
Protein1 PepPos1 PepSeq1 LinkPos1 Protein2 PepPos2 PepSeq2 LinkPos2 Score Id
0 Cas9 777 GQKNSR 3 Cas9 777 GQKNSR 3 119.83 1
1 Cas9 864 SDKNR 3 Cas9 864 SDKNR 3 114.43 2
2 Cas9 676 DKQSGK 2 Cas9 676 DKQSGK 2 200.98 3
3 Cas9 676 DKQSGK 2 Cas9 45 HSIKK 4 94.47 4
4 Cas9 31 VPSKK 4 Cas9 31 VPSKK 4 110.48 5
.. ... ... ... ... ... ... ... ... ... ...
248 Cas9 387 MDGTEELLVKLNR 10 Cas9 387 MDGTEELLVKLNR 10 305.63 249
249 Cas9 682 TILDFLKSDGFANR 7 Cas9 947 YDENDKLIR 6 110.46 250
250 Cas9 788 IEEGIKELGSQILK 6 Cas9 1176 SSFEKNPIDFLEAK 5 288.36 251
251 Cas9 575 KIECFDSVEISGVEDR 1 Cas9 682 TILDFLKSDGFANR 7 376.15 252
252 Cas9 1176 SSFEKNPIDFLEAK 5 Cas9 1176 SSFEKNPIDFLEAK 5 437.10 253
[253 rows x 10 columns]
>>> from pyXLMS.exporter import to_xinet
>>> from pyXLMS.parser import read
>>> from pyXLMS.transform import targets_only
>>> from pyXLMS.transform import filter_proteins
>>> pr = read(
... "data/ms_annika/XLpeplib_Beveridge_QEx-HFX_DSS_R1_Crosslinks.xlsx",
... engine="MS Annika",
... crosslinker="DSS",
... )
>>> crosslinks = targets_only(pr)["crosslinks"]
>>> cas9 = filter_proteins(crosslinks, proteins=["Cas9"])["Both"]
>>> df = to_xinet(cas9, filename=None)
"""
_ok = check_input(crosslinks, "crosslinks", list, Crosslink)
_ok = check_input(filename, "filename", str) if filename is not None else True
if len(crosslinks) == 0:
raise ValueError("Provided crosslinks contain no elements!")
_ok = check_available_keys(
[
"alpha_proteins",
"alpha_proteins_crosslink_positions",
"beta_proteins",
"beta_proteins_crosslink_positions",
],
crosslinks,
)
return __xls_to_xinet(crosslinks, filename)