CandidateSearch 1.1.2
Proof-of-concept implementation of a search engine that uses sparse matrix multiplication to identify the best peptide candidates for a given mass spectrum.
Loading...
Searching...
No Matches
Database.cs
Go to the documentation of this file.
1using System.Text;
2
4{
8 public class Peptide
9 {
13 public string sequence { get; }
17 public double mass { get; }
21 public Dictionary<int, double> modifications { get; }
25 public bool isDecoy { get; }
29 public List<double> ions { get; }
30
39 public Peptide(string Sequence, double Mass, Dictionary<int, double> Modifications, Settings IonSettings, bool IsDecoy)
40 {
41 sequence = Sequence;
42 mass = Mass;
43 modifications = Modifications;
44 isDecoy = IsDecoy;
45 ions = getIons(Sequence, Mass, Modifications, IonSettings);
46 }
47
54 public int[] getEnconding(int massRange = 5000, int massMultiplier = 100)
55 {
56 var encoding = new List<int>();
57
58 foreach (var ion in ions)
59 {
60 if (ion < massRange)
61 {
62 encoding.Add((int) Math.Round(ion * massMultiplier));
63 }
64 }
65
66 return encoding.Distinct().OrderBy(x => x).ToArray();
67 }
68
73 public override string ToString()
74 {
75 string peptide = sequence + "[";
76 foreach (var modification in modifications)
77 {
78 peptide = peptide + $"{modification.Key}:{modification.Value}+";
79 }
80
81 peptide = peptide.TrimEnd(new char[] {'+'});
82
83 if (isDecoy)
84 {
85 return "_" + peptide + "]";
86 }
87
88 return peptide + "]";
89 }
90
97 public bool addModification(int position, double mass)
98 {
99 if (modifications.ContainsKey(position))
100 return false;
101
102 modifications.Add(position, mass);
103 return true;
104 }
105
114 private List<double> getIons(string Sequence, double Mass, Dictionary<int, double> Modifications, Settings IonSettings)
115 {
116 var ions = new List<double>();
117
118 double[] outIonsNoNL;
119 MSAMANDA_IONCALCULATION.IonWithNL[] outIonsWithNL;
121 foreach (var mod in Modifications)
122 {
123 mods[mod.Key + 1] = new MSAMANDA_IONCALCULATION.Modification(title: mod.Key.ToString() + ":" + mod.Value.ToString(),
124 name: mod.Key.ToString() + ":" + mod.Value.ToString(),
125 mono: mod.Value,
126 avg: mod.Value,
127 aa: Sequence[mod.Key],
128 fix: true, // this should technically be true/false depending on modification
129 neutralLosses: new double[0],
130 nTerminal: false,
131 cTerminal: false,
132 id: mod.Key.GetHashCode() + mod.Value.GetHashCode(),
133 protein: false,
134 maxOccurrence: 3);
135 }
136 MSAMANDA_IONCALCULATION.IonCalculator.CalculateIons(out outIonsNoNL,
137 out outIonsWithNL,
138 sequence: Encoding.ASCII.GetBytes(Sequence),
139 mass: Mass,
140 charge: IonSettings.MAX_PRECURSOR_CHARGE,
141 mods: mods,
142 maxNumberNeutralLoss: IonSettings.MAX_NEUTRAL_LOSSES,
143 maxNumberNeutralLossModifications: IonSettings.MAX_NEUTRAL_LOSS_MODS,
144 lowerBound: 0,
145 upperBound: 5000,
146 mono: true,
147 maxAllowedChargeState: IonSettings.MAX_FRAGMENT_CHARGE);
148
149 ions.AddRange(outIonsNoNL);
150
151 return ions.Distinct().OrderBy(x => x).ToList();
152 }
153 }
154
158 public static class DatabaseReader
159 {
167 public static List<Peptide> readFASTA(string filename, Settings settings, bool generateDecoys = false)
168 {
169 // digestion parameters set in method
170 return MSAMANDA_FASTAPARSER.FASTAParser.DigestFasta(filename, settings, generateDecoys);
171 }
172 }
173}
Simplified peptide class that stores peptide/peptidoform information.
Definition Database.cs:9
bool isDecoy
Is the peptide a decoy peptide or target peptide.
Definition Database.cs:25
bool addModification(int position, double mass)
Adds a modification to the peptide if the peptide isn't already modified at that position.
Definition Database.cs:97
string sequence
Amino acid sequence of the peptide.
Definition Database.cs:13
double mass
Mass of the unmodified peptide.
Definition Database.cs:17
Peptide(string Sequence, double Mass, Dictionary< int, double > Modifications, Settings IonSettings, bool IsDecoy)
Constructor for a new peptide/pepidoform.
Definition Database.cs:39
int[] getEnconding(int massRange=5000, int massMultiplier=100)
Get the encoding vector of the peptide.
Definition Database.cs:54
Dictionary< int, double > modifications
Dictionary mapping residue positions (0 based) to modification masses.
Definition Database.cs:21
List< double > ions
List of theoretical ion m/z values.
Definition Database.cs:29
override string ToString()
Constructs a string representation of the peptide.
Definition Database.cs:73
Settings for digestion, ion calculation and VectorSearch.
Definition Settings.cs:9
static List< Peptide > DigestFasta(string fastaFileName, Settings settings, bool generateDecoys=false, double coreUsage=0.75)