Skip to content

Commit 898b7ca

Browse files
author
Stefan S
committed
2 parents 4c25358 + 32f2a5e commit 898b7ca

File tree

5 files changed

+240
-84
lines changed

5 files changed

+240
-84
lines changed

InternalLogic/AnalysisEngine.cs

+97-15
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ public void ApplyProteinParsimony(out List<ProteinGroup> proteinGroups)
145145

146146
// add proteins with unique peptides to the parsimony dictionary before applying parsimony algorithm (more efficient)
147147
Dictionary<Protein, HashSet<CompactPeptide>> parsimonyDict = new Dictionary<Protein, HashSet<CompactPeptide>>();
148+
HashSet<Protein> proteinsWithUniquePeptides = new HashSet<Protein>();
148149
HashSet<CompactPeptide> usedPeptides = new HashSet<CompactPeptide>();
149150
HashSet<string> usedBaseSequences = new HashSet<string>();
150151

@@ -162,6 +163,7 @@ public void ApplyProteinParsimony(out List<ProteinGroup> proteinGroups)
162163

163164
if (proteinContainsUniquePeptide)
164165
{
166+
proteinsWithUniquePeptides.Add(kvp.Key);
165167
parsimonyDict.Add(kvp.Key, kvp.Value);
166168
foreach (var peptide in kvp.Value)
167169
{
@@ -175,9 +177,7 @@ public void ApplyProteinParsimony(out List<ProteinGroup> proteinGroups)
175177
// greedy algorithm adds the next protein that will account for the most unaccounted-for peptides
176178
HashSet<CompactPeptide> bestProteinPeptideList = new HashSet<CompactPeptide>();
177179
Protein bestProtein = null;
178-
179-
var initialDictCompactPeptides = new HashSet<CompactPeptide>(compactPeptideToProteinPeptideMatching.Keys.Distinct().ToList());
180-
int startingPeptides = initialDictCompactPeptides.Count;
180+
int startingPeptides = compactPeptideToProteinPeptideMatching.Keys.Count;
181181
bool currentBestPeptidesIsOne = false;
182182

183183
// as long as there are peptides that have not been accounted for, keep going
@@ -299,6 +299,7 @@ public void ApplyProteinParsimony(out List<ProteinGroup> proteinGroups)
299299
proteinListHere.Add(kvp.Key);
300300
proteinGroups.Add(new ProteinGroup(proteinListHere, kvp.Value, uniquePeptidesHere));
301301
}
302+
302303

303304
// grab indistinguishable proteins
304305
foreach (var proteinGroup in proteinGroups)
@@ -307,14 +308,17 @@ public void ApplyProteinParsimony(out List<ProteinGroup> proteinGroups)
307308
{
308309
foreach (var kvp in newDict)
309310
{
310-
if (!parsimonyDict.ContainsKey(kvp.Key))
311+
if (!proteinsWithUniquePeptides.Contains(kvp.Key))
311312
{
312-
if (kvp.Value.Count == proteinGroup.PeptideList.Count)
313+
if (!parsimonyDict.ContainsKey(kvp.Key))
313314
{
314-
if (kvp.Value.SetEquals(proteinGroup.PeptideList))
315+
if (kvp.Value.Count == proteinGroup.PeptideList.Count)
315316
{
316-
proteinGroup.Proteins.Add(kvp.Key);
317-
parsimonyDict.Add(kvp.Key, kvp.Value);
317+
if (kvp.Value.SetEquals(proteinGroup.PeptideList))
318+
{
319+
proteinGroup.Proteins.Add(kvp.Key);
320+
parsimonyDict.Add(kvp.Key, kvp.Value);
321+
}
318322
}
319323
}
320324
}
@@ -335,13 +339,13 @@ public void ApplyProteinParsimony(out List<ProteinGroup> proteinGroups)
335339
HashSet<PeptideWithSetModifications> newPeptides = new HashSet<PeptideWithSetModifications>();
336340
HashSet<Protein> proteinListHere;
337341

338-
// get the peptide's protein group after parsimony
342+
// get the CompactPeptide's protein list after parsimony
339343
peptideProteinListMatch.TryGetValue(peptide, out proteinListHere);
340344

341-
// find peptide's original (unparsimonious) virtual peptide matches
345+
// find CompactPeptide's original (unparsimonious) peptide matches
342346
compactPeptideToProteinPeptideMatching.TryGetValue(peptide, out oldPeptides);
343347

344-
// get the virtual peptides that belong to the post-parsimony protein(s) only
348+
// get the peptides that belong to the post-parsimony protein(s) only
345349
foreach (var peptide1 in oldPeptides)
346350
{
347351
if (proteinListHere.Contains(peptide1.Protein))
@@ -350,7 +354,7 @@ public void ApplyProteinParsimony(out List<ProteinGroup> proteinGroups)
350354
}
351355
}
352356

353-
// make new dictionary using only virtual peptides from parsimonious protein list
357+
// make new dictionary using only peptides from parsimonious protein list
354358
answer.Add(peptide, newPeptides);
355359
}
356360
}
@@ -367,6 +371,9 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
367371

368372
Dictionary<string, List<NewPsmWithFdr>> peptideBaseSeqToPsmMatching = new Dictionary<string, List<NewPsmWithFdr>>();
369373
Dictionary<CompactPeptide, NewPsmWithFdr> peptideToBestPsmMatching = new Dictionary<CompactPeptide, NewPsmWithFdr>();
374+
Dictionary<CompactPeptide, HashSet<ProteinGroup>> peptideToProteinGroupMatching = new Dictionary<CompactPeptide, HashSet<ProteinGroup>>();
375+
HashSet<CompactPeptide> allRazorPeptides = new HashSet<CompactPeptide>();
376+
HashSet<ProteinGroup> proteinGroupsToRemove = new HashSet<ProteinGroup>();
370377

371378
// match the peptide base sequence to all of its PSMs
372379
foreach (var psm in psmList)
@@ -388,6 +395,25 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
388395
}
389396
}
390397

398+
// add every psm that corresponds to the protein group's peptides to the group
399+
foreach(var proteinGroup in proteinGroups)
400+
{
401+
foreach(var peptide in proteinGroup.PeptideList)
402+
{
403+
string peptideBaseSequence = string.Join("", peptide.BaseSequence.Select(b => char.ConvertFromUtf32(b)));
404+
List<NewPsmWithFdr> psmListForThisBaseSeq = new List<NewPsmWithFdr>();
405+
406+
peptideBaseSeqToPsmMatching.TryGetValue(peptideBaseSequence, out psmListForThisBaseSeq);
407+
foreach(var psm in psmListForThisBaseSeq)
408+
{
409+
if (!proteinGroup.TotalPsmList.Contains(psm))
410+
{
411+
proteinGroup.TotalPsmList.Add(psm);
412+
}
413+
}
414+
}
415+
}
416+
391417
// find the best psm per base sequence
392418
foreach (var kvp in peptideBaseSeqToPsmMatching)
393419
{
@@ -424,12 +450,12 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
424450
thisProteinGroupsPsmList.Add(psm);
425451
}
426452
}
427-
proteinGroup.PsmList = thisProteinGroupsPsmList;
453+
proteinGroup.BestPsmList = thisProteinGroupsPsmList;
428454

429455
// remove CompactPeptides that are not associated with the best psm per base sequence from the group
430456
HashSet<CompactPeptide> newPeptideList = new HashSet<CompactPeptide>();
431457
HashSet<CompactPeptide> newUniquePeptideList = new HashSet<CompactPeptide>();
432-
foreach (var psm in proteinGroup.PsmList)
458+
foreach (var psm in proteinGroup.BestPsmList)
433459
{
434460
CompactPeptide peptide = psm.thisPSM.newPsm.GetCompactPeptide(variableModifications, localizeableModifications);
435461

@@ -440,11 +466,67 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
440466
newUniquePeptideList.Add(peptide);
441467
}
442468
}
469+
470+
// for finding razor peptides later
471+
foreach (var peptide in proteinGroup.PeptideList)
472+
{
473+
HashSet<ProteinGroup> proteinGroupsHere = new HashSet<ProteinGroup>();
474+
if (peptideToProteinGroupMatching.ContainsKey(peptide))
475+
{
476+
peptideToProteinGroupMatching.TryGetValue(peptide, out proteinGroupsHere);
477+
proteinGroupsHere.Add(proteinGroup);
478+
}
479+
else
480+
{
481+
proteinGroupsHere.Add(proteinGroup);
482+
peptideToProteinGroupMatching.Add(peptide, proteinGroupsHere);
483+
}
484+
}
485+
443486
proteinGroup.PeptideList = newPeptideList;
444487
proteinGroup.UniquePeptideList = newUniquePeptideList;
445488

446489
// score the group (scoring algorithm defined in the ProteinGroup class)
447-
proteinGroup.scoreThisProteinGroup();
490+
proteinGroup.ScoreThisProteinGroup();
491+
492+
// remove empty protein groups (peptides were too poor quality and group doesn't exist anymore)
493+
if (proteinGroup.proteinGroupScore == 0)
494+
proteinGroupsToRemove.Add(proteinGroup);
495+
}
496+
497+
foreach(var proteinGroup in proteinGroupsToRemove)
498+
{
499+
proteinGroups.Remove(proteinGroup);
500+
}
501+
502+
// build razor peptide list (peptides that have >1 protein groups in the final protein group list)
503+
foreach(var kvp in peptideToProteinGroupMatching)
504+
{
505+
if (kvp.Value.Count > 1)
506+
allRazorPeptides.Add(kvp.Key);
507+
}
508+
509+
foreach (var proteinGroup in proteinGroups)
510+
{
511+
foreach (var peptide in proteinGroup.PeptideList)
512+
{
513+
// build razor peptide list for each protein group
514+
if (allRazorPeptides.Contains(peptide))
515+
{
516+
proteinGroup.RazorPeptideList.Add(peptide);
517+
}
518+
519+
// build PeptideWithSetMod list to calc sequence coverage
520+
HashSet<PeptideWithSetModifications> peptidesWithSetMods = null;
521+
compactPeptideToProteinPeptideMatching.TryGetValue(peptide, out peptidesWithSetMods);
522+
foreach(var pep in peptidesWithSetMods)
523+
{
524+
proteinGroup.PeptideWithSetModsList.Add(pep);
525+
}
526+
}
527+
528+
// calculate sequence coverage for each protein in the group
529+
proteinGroup.CalculateSequenceCoverage();
448530
}
449531
}
450532

InternalLogic/AnalysisResults.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ protected override string StringForOutput
3333
var sb = new StringBuilder();
3434
sb.Append("\t\tAll PSMS within 1% FDR: " + string.Join(", ", AllResultingIdentifications.Select(b => b.Count(c => c.qValue <= 0.01))));
3535

36-
if (ProteinGroups != null)
37-
sb.Append("\n\t\tAll proteins within 1% FDR: " + string.Join(", ", ProteinGroups.Count(c => c.QValue <= 0.01)));
36+
if (ProteinGroups != null)
37+
sb.Append("\n\t\tAll proteins within 1% FDR: " + string.Join(", ", ProteinGroups.Count(c => ((c.QValue <= 0.01) && (c.isDecoy == false)))));
3838

3939
return sb.ToString();
4040
}

0 commit comments

Comments
 (0)