14                                                bool generateDecoys = 
false, 
 
   15                                                double coreUsage = 0.75)
 
   17            var trypsin = 
new Enzyme();
 
   18            trypsin.Name = 
"Trypsin";
 
   19            trypsin.CleavageSites = 
"KR";
 
   20            trypsin.CleavageInhibitors = 
"P";
 
   24            var proteins = ReadInFasta(fastaFileName, 
false);
 
   26            var peptides = DigestProteins(proteins, 
 
   34                var decoyProteins = 
new List<DBProtein>();
 
   35                foreach (var protein 
in proteins)
 
   37                    var decoySequence = ReverseSequence(protein.Sequence);
 
   38                    var decoyProtein = 
new DBProtein(protein.DbProtRef.ProtIdentifier, -protein.DbProtRef.MappingId, decoySequence, 
true);
 
   39                    decoyProteins.Add(decoyProtein);
 
   42                var decoyPeptides = DigestProteins(decoyProteins,
 
   48                peptides.AddRange(decoyPeptides);
 
 
   54        private static string ReverseSequence(
string seq)
 
   56            char[] array = seq.ToCharArray();
 
   58            return new String(array);
 
   61        private static List<Peptide> DigestProteins(List<DBProtein> proteins, 
 
   67            var opts = 
new ParallelOptions { MaxDegreeOfParallelism = (int) Math.Ceiling(Environment.ProcessorCount * coreUsage) };
 
   68            var concurrentPeptideList = 
new ConcurrentBag<List<DBPeptide>>();
 
   69            Parallel.ForEach(proteins, opts, (protein) => {
 
   71                concurrentPeptideList.Add(digester.DigestProteinIntoList());
 
   74            var peptideList = concurrentPeptideList.SelectMany(x => x).ToList();
 
   75            var massToPeptides = 
new DigesterDB();
 
   76            HelperMethods.MergeToDBDictionaries(peptideList, ref massToPeptides, opts);
 
   78            var peptides = 
new List<Peptide>();
 
   79            foreach (var item 
in massToPeptides.DbPeptidesDictMassKey)
 
   81                var currentPeptides = item.Value;
 
   82                foreach (var peptide 
in currentPeptides)
 
   84                    var peptidoforms = GetPeptidoforms(peptide, settings, isDecoy);
 
   85                    peptides.AddRange(peptidoforms);
 
   92        private static List<Peptide> GetPeptidoforms(DBPeptide dbPeptide, 
Settings settings, 
bool isDecoy)
 
   94            var peptides = 
new List<Peptide>();
 
   95            var mods = 
new Dictionary<int, double>();
 
   99                for (
int i = 0; i < dbPeptide.Sequence.Length; i++)
 
  108            var peptide = 
new Peptide(dbPeptide.Sequence, dbPeptide.Mass, mods, settings, isDecoy);
 
  109            peptides.Add(peptide);
 
  115                    addPeptidoformsForModification(peptides, modification, settings);
 
  122        private static void addPeptidoformsForModification(List<Peptide> peptides,
 
  123                                                           KeyValuePair<string, double> modification,
 
  126            var peptidoforms = 
new List<Peptide>();
 
  128            foreach (var peptide 
in peptides)
 
  130                var possibleModificationSites = 
new List<int>();
 
  131                for (
int i = 0; i < peptide.sequence.Length; i++)
 
  133                    if (peptide.sequence[i].ToString() == modification.Key)
 
  135                        possibleModificationSites.Add(i);
 
  139                var possibleCombinations = getAllPossibleCombinations(possibleModificationSites);
 
  141                foreach (var combination 
in possibleCombinations)
 
  143                    var peptidoform = 
new Peptide(peptide.sequence,
 
  145                                                  new Dictionary<int, double>(),
 
  149                    foreach (var mod 
in peptide.modifications)
 
  151                        peptidoform.addModification(mod.Key, mod.Value);
 
  155                    foreach (var position 
in combination)
 
  157                        peptidoform.addModification(position, modification.Value);
 
  160                    peptidoforms.Add(peptidoform);
 
  164            peptides.AddRange(peptidoforms);
 
  167        private static List<List<int>> getAllPossibleCombinations(List<int> possibleModificationSites)
 
  169            var possibleCombinations = 
new List<List<int>>();
 
  171            for (
int i = 0; i < (1 << possibleModificationSites.Count); ++i)
 
  173                var combination = 
new List<int>();
 
  174                for (
int j = 0; j < possibleModificationSites.Count; ++j)
 
  176                    if ((i & (1 << j)) != 0)
 
  178                        combination.Add(possibleModificationSites[j]);
 
  181                possibleCombinations.Add(combination);
 
  184            return possibleCombinations;
 
  187        private static List<DBProtein> ReadInFasta(
string fastaFileName, 
bool isDecoy)
 
  189            List<DBProtein> proteins = 
new List<DBProtein>();
 
  190            string regexPatternSequence = 
"[^ARNDCEQGHILKMFPSTUWYVXBZJO]";
 
  193            StreamReader fastaFileReader = 
new StreamReader(fastaFileName);
 
  196                string currentLine = 
"";
 
  197                string sequence = 
"";
 
  198                string identifier = 
"";
 
  200                while ((currentLine = fastaFileReader.ReadLine()) != 
null)
 
  202                    if (currentLine.StartsWith(
">", StringComparison.Ordinal))
 
  204                        if (!
string.IsNullOrWhiteSpace(sequence))
 
  207                            if (Regex.IsMatch(sequence, regexPatternSequence))
 
  209                                var builder = 
new StringBuilder(
"Cannot parse ");
 
  210                                builder.Append(
"fasta file at identifier " + identifier + 
". Sequence error. ");
 
  211                                Console.WriteLine(builder.ToString());
 
  212                                sequence = String.Empty;
 
  213                                identifier = 
string.Empty;
 
  214                                throw new Exception(
"Parsing error.");
 
  216                            proteins.Add(GenerateDbProtein(mappingID, isDecoy, identifier, sequence));
 
  221                        int index = currentLine.IndexOfAny(
new[] { 
' ', 
'|' }, 0);
 
  225                            identifier = currentLine.Substring(1);
 
  229                            identifier = currentLine.Substring(1, index - 1);
 
  230                            int indexNumber = identifier.IndexOfAny(
new[]
 
  231                                {
'0', 
'1', 
'2', 
'3', 
'4', 
'5', 
'6', 
'7', 
'8', 
'9'});
 
  232                            while (indexNumber == -1)
 
  234                                index = currentLine.IndexOfAny(
new[] { 
' ', 
'|' }, index + 1);
 
  237                                    identifier = currentLine.Substring(1);
 
  241                                identifier = currentLine.Substring(1, index - 1);
 
  242                                indexNumber = identifier.IndexOfAny(
new[]
 
  243                                    {
'0', 
'1', 
'2', 
'3', 
'4', 
'5', 
'6', 
'7', 
'8', 
'9'});
 
  249                        sequence += currentLine.ToUpper();
 
  253                if (!
string.IsNullOrWhiteSpace(sequence))
 
  255                    if (Regex.IsMatch(sequence, regexPatternSequence))
 
  257                        var builder = 
new StringBuilder(
"Cannot parse ");
 
  258                        builder.Append(
"fasta file at identifier " + identifier + 
". Sequence error. ");
 
  259                        Console.WriteLine(builder.ToString());
 
  260                        throw new Exception(
"Parsing error.");
 
  264                        proteins.Add(GenerateDbProtein(mappingID, isDecoy, identifier, sequence));
 
  272                Console.WriteLine(
"Fasta file error");
 
  273                Console.WriteLine(e.ToString());
 
  278                fastaFileReader.Close();
 
  284        private static DBProtein GenerateDbProtein(
int mappingID, 
bool isDecoy, 
string identifier, 
string sequence)
 
  290                return new DBProtein(identifier, -mappingID, sequence);
 
  292            return new DBProtein(identifier, mappingID, sequence);
 
 
  538        private readonly 
Enzyme _enzyme;
 
  539        private readonly 
int _missedCleavages;
 
  540        private readonly 
bool _useMonoisotopicMass;
 
  541        private readonly 
int _minPepLength;
 
  542        private readonly 
int _maxPepLength;
 
  545        private List<DBPeptide> _dbPeptides;
 
  550            public int MissedCleavages;
 
  556            _missedCleavages = missedCleavages;
 
  557            _useMonoisotopicMass = useMonoisotopicMass;
 
  558            _minPepLength = minPepLength;
 
  559            _maxPepLength = maxPepLength;
 
  560            _dbProtein = dbProtein;
 
  561            _dbPeptides = 
new List<DBPeptide>();
 
 
  566            _dbPeptides = 
new List<DBPeptide>();
 
  567            DigestSingleProtein(_dbProtein);
 
 
  571        private void DigestSingleProtein(
DBProtein protein)
 
  574            if (_enzyme.CleavageSites == 
"X")
 
  577                DigestSingleProteinWithNoEnzyme(protein);
 
  579            else if (_enzyme.CleavageSites == 
"")
 
  586                var peptides = SplitProtein(_enzyme, protein, _missedCleavages, 0);
 
  587                foreach (var pep 
in peptides)
 
  589                    SaveSinglePeptide(pep);
 
  592                if (protein.
Sequence.StartsWith(
"M", StringComparison.Ordinal) && _enzyme.CleavageSites != 
"X" && _enzyme.CleavageSites != 
"")
 
  595                    var otherPeptides = SplitProtein(_enzyme, prot, _missedCleavages, 1);
 
  597                    foreach (var pep 
in otherPeptides)
 
  599                        var peptideWithSameHash = peptides.FindPeptideWithSameHash(pep);
 
  600                        if (peptideWithSameHash == 
null)
 
  602                            SaveSinglePeptide(pep);
 
  605                        if (peptideWithSameHash.MissedCleavages == pep.MissedCleavages) 
continue;
 
  607                        SaveSinglePeptide(pep);
 
  613        private void DigestSingleProteinWithNoEnzyme(DBProtein protein)
 
  615            SortedSet<int> positionsOfX = 
new SortedSet<int>();
 
  616            SortedSet<int> positionsOfZ = 
new SortedSet<int>();
 
  617            SortedSet<int> positionsOfB = 
new SortedSet<int>();
 
  618            MatchCollection collX = Regex.Matches(protein.Sequence, 
"X");
 
  619            MatchCollection collZ = Regex.Matches(protein.Sequence, 
"Z");
 
  620            MatchCollection collB = Regex.Matches(protein.Sequence, 
"B");
 
  622            for (
int m = 0; m < collX.Count; ++m)
 
  624                positionsOfX.Add(collX[m].Index);
 
  627            for (
int m = 0; m < collZ.Count; ++m)
 
  629                positionsOfZ.Add(collZ[m].Index);
 
  632            for (
int m = 0; m < collB.Count; ++m)
 
  634                positionsOfB.Add(collB[m].Index);
 
  637            for (
int s = 0; s < protein.Sequence.Length; ++s)
 
  639                for (
int l = 1; l < protein.Sequence.Length - s + 1; ++l)
 
  642                    if (seqL < _minPepLength)
 
  644                    if (seqL > _maxPepLength)
 
  648                    SortedSet<int> xInSeq = positionsOfX.GetViewBetween(s, s + l);
 
  649                    if (xInSeq.Count > 1)
 
  651                    SortedSet<int> bInSeq = positionsOfB.GetViewBetween(s, s + l);
 
  652                    if (bInSeq.Count > 3)
 
  654                    SortedSet<int> zInSeq = positionsOfZ.GetViewBetween(s, s + l);
 
  655                    if (zInSeq.Count > 3 || zInSeq.Count + bInSeq.Count > 3)
 
  657                    if (xInSeq.Count > 0 || bInSeq.Count > 0 || zInSeq.Count > 0)
 
  660                        SaveSinglePeptide(
new DBPeptide(protein.Sequence.Substring(s, l), protein.Sequence.Substring(s, l), 0,
 
  661                          IsProteinStart(s, protein.Sequence[0]), protein.DbProtRef));
 
  666                        string sseq = protein.Sequence.Substring(s, l);
 
  667                        double mass = ChemicalUtils.CalculatePeptideMass(sseq, _useMonoisotopicMass);
 
  669                        if (mass.IsBetweenExcludeBounds(200, 6000))
 
  677                            SavePeptide(
new DBPeptide(sseq, sseq, 0, IsProteinStart(s, protein.Sequence[0]), protein.DbProtRef));
 
  684        private void SaveSinglePeptide(DBPeptide pep)
 
  686            if (String.IsNullOrEmpty(pep.Sequence))
 
  688            if (pep.Sequence.Length < _minPepLength || pep.Sequence.Length > _maxPepLength)
 
  691            if (pep.Sequence.Contains(
'B') || pep.Sequence.Contains(
'Z') || pep.Sequence.Contains(
'X'))
 
  693                string temp = pep.Sequence.Replace(
"X", 
"");
 
  694                if (pep.Sequence.Length - temp.Length > 1)
 
  699                temp = pep.Sequence.Replace(
"B", 
"");
 
  700                temp = temp.Replace(
"Z", 
"");
 
  701                if (pep.Sequence.Length - temp.Length > 3)
 
  706                List<char> replacements = 
new List<char>();
 
  707                for (
int i = 0; i < pep.Sequence.Length; ++i)
 
  709                    replacements.Add(
'#');
 
  712                if (pep.Sequence.Contains(
'X'))
 
  714                    GenerateCombinationsForX(pep, replacements);
 
  718                    CalculateMass(pep, replacements);
 
  723                pep.Mass = ChemicalUtils.CalculatePeptideMass(pep.Sequence, _useMonoisotopicMass);
 
  728        private static List<DBPeptide> SplitProtein(Enzyme enzyme, DBProtein prot, 
int missedCleavages, 
int offset)
 
  730            string[] peptides = enzyme.TheRegex.Split(prot.Sequence);
 
  731            List<DBPeptide> peps = 
new List<DBPeptide>(offset == 0 ? peptides.Length * 3 : 3); 
 
  734            Dictionary<string, PeptideInfo> allPeps = 
new Dictionary<string, PeptideInfo>();
 
  735            for (
int i = 0; i < peptides.Length; ++i)
 
  737                if (offset > 0 && i > 0)
 
  739                if (!String.IsNullOrEmpty(peptides[i]))
 
  741                    string myPeptide = peptides[i];
 
  742                    if (!allPeps.ContainsKey(myPeptide))
 
  745                        peps.Add(
new DBPeptide(myPeptide, myPeptide, 0, IsProteinStart(myPeptide, prot.Sequence), prot.DbProtRef));
 
  746                        allPeps.Add(peptides[i], 
new PeptideInfo
 
  754                    string pep = peptides[i];
 
  757                    while (count < missedCleavages)
 
  759                        if (i + j < peptides.Length)
 
  761                            if (!String.IsNullOrEmpty(peptides[i + j]))
 
  763                                pep += peptides[i + j];
 
  764                                if (!allPeps.ContainsKey(pep))
 
  767                                    peps.Add(
new DBPeptide(pep, pep, count + 1, IsProteinStart(pep, prot.Sequence), prot.DbProtRef));
 
  768                                    allPeps.Add(pep, 
new PeptideInfo
 
  771                                        MissedCleavages = count + 1
 
  786                    start += peptides[i].Length;
 
  790            if (enzyme.CleavageSites != 
"X" && (enzyme.Specificity == Enzyme.CLEAVAGE_SPECIFICITY.SEMI ||
 
  791                                                enzyme.Specificity == Enzyme.CLEAVAGE_SPECIFICITY.SEMI_C ||
 
  792                                                enzyme.Specificity == Enzyme.CLEAVAGE_SPECIFICITY.SEMI_N))
 
  794                foreach (
string peptide 
in allPeps.Keys.ToArray())
 
  796                    PeptideInfo info = allPeps[peptide];
 
  797                    for (
int i = 1; i < peptide.Length - 1; ++i)
 
  799                        string newFront = peptide.Substring(i);
 
  800                        string newBack = peptide.Substring(0, peptide.Length - i);
 
  801                        if (!allPeps.ContainsKey(newFront) && (enzyme.Specificity == Enzyme.CLEAVAGE_SPECIFICITY.SEMI ||
 
  802                                                               enzyme.Specificity == Enzyme.CLEAVAGE_SPECIFICITY.SEMI_C))
 
  805                            peps.Add(
new DBPeptide(newFront, newFront, info.MissedCleavages, IsProteinStart(newFront, prot.Sequence), prot.DbProtRef));
 
  807                            allPeps.Add(newFront, 
new PeptideInfo
 
  809                                Start = info.Start + i,
 
  810                                MissedCleavages = info.MissedCleavages
 
  814                        if (allPeps.ContainsKey(newBack) || (enzyme.Specificity != Enzyme.CLEAVAGE_SPECIFICITY.SEMI &&
 
  815                                                             enzyme.Specificity != Enzyme.CLEAVAGE_SPECIFICITY.SEMI_N)) 
continue;
 
  818                        peps.Add(
new DBPeptide(newBack, newBack, info.MissedCleavages, IsProteinStart(newBack, prot.Sequence), prot.DbProtRef));
 
  819                        allPeps.Add(newBack, 
new PeptideInfo
 
  822                            MissedCleavages = info.MissedCleavages
 
  831        private void SavePeptide(DBPeptide pep)
 
  833            if (pep.Sequence.Length < 2)
 
  835            if (!pep.Sequence.Length.IsBetweenIncludeBounds(_minPepLength, _maxPepLength))
 
  838            pep.Mass = ChemicalUtils.CalculatePeptideMass(pep.Sequence, _useMonoisotopicMass);
 
  839            pep.MassInt = (int)(pep.Mass);
 
  840            pep.SeqHash = pep.CreateMD5();
 
  841            _dbPeptides.Add(pep);
 
  844        private DBPeptide CreatePeptide(
string sequence, 
string origSequence, 
int missedCleavages, 
bool proteinStartFlag, DBProtRef protRef)
 
  846            var pep = 
new DBPeptide(sequence, origSequence, missedCleavages, proteinStartFlag, protRef);
 
  847            if (pep.Sequence.Length < 2)
 
  849            if (pep.Sequence.Length < _minPepLength || pep.Sequence.Length > _maxPepLength)
 
  851            pep.Mass = ChemicalUtils.CalculatePeptideMass(pep.Sequence, _useMonoisotopicMass);
 
  852            pep.MassInt = (int)(pep.Mass);
 
  853            pep.SeqHash = pep.CreateMD5();
 
  857        private static bool IsProteinStart(
int startPosition, 
char protStarter)
 
  859            return (startPosition == 0 || (startPosition == 1 && protStarter == 
'M'));
 
  862        private static bool IsProteinStart(
string pep, 
string prot)
 
  864            return IsProteinStart(prot.IndexOf(pep), prot[0]);
 
  867        private void CalculateMass(DBPeptide info,
 
  868          List<char> replacements)
 
  870            if (info.Sequence.Contains(
'B'))
 
  872                int firstIndex = info.Sequence.IndexOf(
'B');
 
  873                string changedPeptide = info.Sequence;
 
  874                char[] temp = 
new char[replacements.Count];
 
  875                replacements.CopyTo(temp);
 
  876                List<char> currentReplacements = 
new List<char>(temp)
 
  880                changedPeptide = changedPeptide.Insert(firstIndex, 
"N");
 
  881                changedPeptide = changedPeptide.Remove(firstIndex + 1, 1);
 
  882                info.Sequence = changedPeptide;
 
  883                CalculateMass(info, currentReplacements);
 
  884                changedPeptide = changedPeptide.Insert(firstIndex, 
"D");
 
  885                changedPeptide = changedPeptide.Remove(firstIndex + 1, 1);
 
  886                currentReplacements[firstIndex] = 
'D';
 
  887                info.Sequence = changedPeptide;
 
  888                CalculateMass(info, currentReplacements);
 
  890            else if (info.Sequence.Contains(
'Z'))
 
  892                int firstIndex = info.Sequence.IndexOf(
'Z');
 
  893                string changedPeptide = info.Sequence;
 
  894                char[] temp = 
new char[replacements.Count];
 
  895                replacements.CopyTo(temp);
 
  896                List<char> currentReplacements = 
new List<char>(temp)
 
  900                changedPeptide = changedPeptide.Insert(firstIndex, 
"E");
 
  901                changedPeptide = changedPeptide.Remove(firstIndex + 1, 1);
 
  902                info.Sequence = changedPeptide;
 
  903                CalculateMass(info, currentReplacements);
 
  904                changedPeptide = changedPeptide.Insert(firstIndex, 
"Q");
 
  905                changedPeptide = changedPeptide.Remove(firstIndex + 1, 1);
 
  906                currentReplacements[firstIndex] = 
'Q';
 
  907                info.Sequence = changedPeptide;
 
  908                CalculateMass(info, currentReplacements);
 
  917        private void FinalCalculation(DBPeptide info)
 
  919            double mass = ChemicalUtils.CalculatePeptideMass(info.Sequence, _useMonoisotopicMass);
 
  923        private void GenerateCombinationsForX(DBPeptide info,
 
  924          List<char> replacements)
 
  926            foreach (
char aa 
in ChemicalUtils.AminoAcids.Keys)
 
  928                if (aa != 
'^' && aa != 
'$' && aa != 
'J')
 
  930                    int firstIndex = info.Sequence.IndexOf(
'X');
 
  931                    string changedPeptide = info.Sequence;
 
  932                    changedPeptide = changedPeptide.Insert(firstIndex, aa.ToString());
 
  933                    changedPeptide = changedPeptide.Remove(firstIndex + 1, 1);
 
  937                    List<char> currentReplacement = 
new List<char>(replacements.ToArray())
 
  943                    if (changedPeptide.Contains(
'X'))
 
  949                        var newPep = 
new DBPeptide
 
  951                            MissedCleavages = info.MissedCleavages,
 
  952                            Sequence = changedPeptide,
 
  953                            SequenceOriginal = info.SequenceOriginal,
 
  954                            ProteinStartFlag = info.ProteinStartFlag,
 
  955                            DbProtRefs = info.DbProtRefs
 
  957                        CalculateMass(newPep, currentReplacement);