Skip to content

Commit 96f078e

Browse files
author
stefanks
authored
Merge pull request #175 from rmillikin/master
merge
2 parents b9d82c6 + 664c4a5 commit 96f078e

File tree

3 files changed

+124
-28
lines changed

3 files changed

+124
-28
lines changed

InternalLogic/AnalysisEngine.cs

+15-4
Original file line numberDiff line numberDiff line change
@@ -486,8 +486,8 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
486486
proteinGroup.UniquePeptideList = newUniquePeptideList;
487487

488488
// score the group (scoring algorithm defined in the ProteinGroup class)
489-
proteinGroup.scoreThisProteinGroup();
490-
489+
proteinGroup.ScoreThisProteinGroup();
490+
491491
// remove empty protein groups (peptides were too poor quality and group doesn't exist anymore)
492492
if (proteinGroup.proteinGroupScore == 0)
493493
proteinGroupsToRemove.Add(proteinGroup);
@@ -504,17 +504,28 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
504504
if (kvp.Value.Count > 1)
505505
allRazorPeptides.Add(kvp.Key);
506506
}
507-
508-
// build razor peptide list for each protein group
507+
509508
foreach (var proteinGroup in proteinGroups)
510509
{
511510
foreach (var peptide in proteinGroup.PeptideList)
512511
{
512+
// build razor peptide list for each protein group
513513
if (allRazorPeptides.Contains(peptide))
514514
{
515515
proteinGroup.RazorPeptideList.Add(peptide);
516516
}
517+
518+
// build PeptideWithSetMod list to calc sequence coverage
519+
HashSet<PeptideWithSetModifications> peptidesWithSetMods = null;
520+
compactPeptideToProteinPeptideMatching.TryGetValue(peptide, out peptidesWithSetMods);
521+
foreach(var pep in peptidesWithSetMods)
522+
{
523+
proteinGroup.PeptideWithSetModsList.Add(pep);
524+
}
517525
}
526+
527+
// calculate sequence coverage for each protein in the group
528+
proteinGroup.CalculateSequenceCoverage();
518529
}
519530
}
520531

InternalLogic/ProteinGroup.cs

+38-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public class ProteinGroup
1111
#region Public Fields
1212

1313
public readonly bool isDecoy;
14-
public double proteinGroupScore;
14+
public double proteinGroupScore { get; private set; }
1515

1616
#endregion Public Fields
1717

@@ -25,6 +25,7 @@ internal ProteinGroup(HashSet<Protein> proteins, HashSet<CompactPeptide> peptide
2525
BestPsmList = null;
2626
TotalPsmList = new List<NewPsmWithFdr>();
2727
RazorPeptideList = new HashSet<CompactPeptide>();
28+
PeptideWithSetModsList = new HashSet<PeptideWithSetModifications>();
2829
proteinGroupScore = 0;
2930
QValue = 0;
3031
isDecoy = false;
@@ -53,6 +54,7 @@ public static string TabSeparatedHeader
5354
sb.Append("Razor peptides" + '\t');
5455
sb.Append("Number of peptides" + '\t');
5556
sb.Append("Number of unique peptides" + '\t');
57+
sb.Append("Sequence coverage" + '\t');
5658
sb.Append("Number of PSMs" + '\t');
5759
sb.Append("Summed MetaMorpheus Score" + '\t');
5860
sb.Append("Decoy?" + '\t');
@@ -68,6 +70,8 @@ public static string TabSeparatedHeader
6870
public List<NewPsmWithFdr> TotalPsmList { get; set; }
6971
public HashSet<CompactPeptide> PeptideList { get; set; }
7072
public HashSet<CompactPeptide> UniquePeptideList { get; set; }
73+
public HashSet<PeptideWithSetModifications> PeptideWithSetModsList { get; set; }
74+
public List<double> sequenceCoverage { get; private set; }
7175
public HashSet<CompactPeptide> RazorPeptideList { get; set; }
7276
public double QValue { get; set; }
7377
public int cumulativeTarget { get; set; }
@@ -128,7 +132,17 @@ public override string ToString()
128132
sb.Append("" + UniquePeptideList.Count());
129133
sb.Append("\t");
130134

131-
// number of PSMs for final base sequences
135+
// sequence coverage
136+
foreach (double coverage in sequenceCoverage)
137+
{
138+
double coverage1 = coverage * 100;
139+
string str = string.Format("{0:0}", coverage1);
140+
141+
sb.Append("" + str + "% ;; ");
142+
}
143+
sb.Append("\t");
144+
145+
// number of PSMs for listed peptides
132146
sb.Append("" + TotalPsmList.Count());
133147
sb.Append("\t");
134148

@@ -155,7 +169,7 @@ public override string ToString()
155169
return sb.ToString();
156170
}
157171

158-
public void scoreThisProteinGroup()
172+
public void ScoreThisProteinGroup()
159173
{
160174
// score the protein group
161175
foreach (var psm in BestPsmList)
@@ -164,6 +178,27 @@ public void scoreThisProteinGroup()
164178
}
165179
}
166180

181+
public void CalculateSequenceCoverage()
182+
{
183+
sequenceCoverage = new List<double>();
184+
185+
foreach (var protein in Proteins)
186+
{
187+
HashSet<int> coveredResidues = new HashSet<int>();
188+
189+
foreach(var peptide in PeptideWithSetModsList)
190+
{
191+
for(int i = peptide.OneBasedStartResidueInProtein; i <= peptide.OneBasedEndResidueInProtein; i++)
192+
{
193+
coveredResidues.Add(i);
194+
}
195+
}
196+
197+
double sequenceCoverageHere = (double)coveredResidues.Count / protein.Length;
198+
sequenceCoverage.Add(sequenceCoverageHere);
199+
}
200+
}
201+
167202
#endregion Public Methods
168203

169204
}

Test/RobTest.cs

+71-21
Original file line numberDiff line numberDiff line change
@@ -29,32 +29,35 @@ public static void TestParsimony()
2929
var totalVirtualPeptideList = new HashSet<PeptideWithSetModifications>();
3030

3131
var p1 = new Protein(sequence1, "1", temp1, temp3, temp3, null, "Test1", "TestFullName1", 0, false, false);
32-
var p2 = new Protein(sequence2, "2", temp1, temp3, temp3, null, "Test2", "TestFullName2", 0, false, false);
32+
var p2 = new Protein(sequence2, "2", temp1, temp3, temp3, null, "DECOY_Test2", "DECOY_TestFullName2", 0, true, false);
3333
var p3 = new Protein(sequence3, "3", temp1, temp3, temp3, null, "Test3", "TestFullName3", 0, false, false);
3434

3535
IEnumerable<PeptideWithPossibleModifications> digestedList1 = p1.Digest(protease, 2, InitiatorMethionineBehavior.Variable);
3636
IEnumerable<PeptideWithPossibleModifications> digestedList2 = p2.Digest(protease, 2, InitiatorMethionineBehavior.Variable);
3737
IEnumerable<PeptideWithPossibleModifications> digestedList3 = p3.Digest(protease, 2, InitiatorMethionineBehavior.Variable);
38+
IEnumerable<PeptideWithSetModifications> peptides1 = null;
39+
IEnumerable<PeptideWithSetModifications> peptides2 = null;
40+
IEnumerable<PeptideWithSetModifications> peptides3 = null;
3841

3942
foreach (var protein in digestedList1)
4043
{
41-
IEnumerable<PeptideWithSetModifications> peptides1 = protein.GetPeptideWithSetModifications(temp2, 4098, 3);
44+
peptides1 = protein.GetPeptideWithSetModifications(temp2, 4098, 3);
4245

4346
foreach (var peptide in peptides1)
4447
totalVirtualPeptideList.Add(peptide);
4548
}
4649

4750
foreach (var protein in digestedList2)
4851
{
49-
IEnumerable<PeptideWithSetModifications> peptides2 = protein.GetPeptideWithSetModifications(temp2, 4098, 3);
52+
peptides2 = protein.GetPeptideWithSetModifications(temp2, 4098, 3);
5053

5154
foreach (var peptide in peptides2)
5255
totalVirtualPeptideList.Add(peptide);
5356
}
5457

5558
foreach (var protein in digestedList3)
5659
{
57-
IEnumerable<PeptideWithSetModifications> peptides3 = protein.GetPeptideWithSetModifications(temp2, 4098, 3);
60+
peptides3 = protein.GetPeptideWithSetModifications(temp2, 4098, 3);
5861

5962
foreach (var peptide in peptides3)
6063
totalVirtualPeptideList.Add(peptide);
@@ -95,11 +98,18 @@ public static void TestParsimony()
9598
dictionary.Add(peptides[i], virtualPeptideSets[i]);
9699
}
97100
}
98-
List<ProteinGroup> pg = new List<ProteinGroup>();
101+
List<ProteinGroup> proteinGroups = new List<ProteinGroup>();
99102
AnalysisEngine ae = new AnalysisEngine(null, dictionary, null, null, null, null, null, null, null, null, null, null, null, true, 0, 0, false, new List<ProductType> { ProductType.B, ProductType.Y });
100103

104+
Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> initialDictionary = new Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>>();
105+
foreach(var kvp in dictionary)
106+
{
107+
initialDictionary.Add(kvp.Key, kvp.Value);
108+
}
109+
110+
101111
// apply parsimony to initial dictionary
102-
ae.ApplyProteinParsimony(out pg);
112+
ae.ApplyProteinParsimony(out proteinGroups);
103113

104114
var parsimonyProteinList = new List<Protein>();
105115
string[] parsimonyBaseSequences = new string[3];
@@ -121,50 +131,90 @@ public static void TestParsimony()
121131
}
122132
}
123133

134+
135+
136+
List<NewPsmWithFdr> psms = new List<NewPsmWithFdr>();
137+
138+
foreach(var kvp in dictionary)
139+
{
140+
foreach(var peptide in kvp.Value)
141+
{
142+
HashSet<PeptideWithSetModifications> hashSet = new HashSet<PeptideWithSetModifications>();
143+
hashSet.Add(peptide);
144+
145+
switch (peptide.BaseSequence)
146+
{
147+
case "AK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 1), hashSet, null, null, null), 1, 0, 0.29)); break;
148+
case "CK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 9), hashSet, null, null, null), 1, 0, 0.0)); break;
149+
case "BK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 8), hashSet, null, null, null), 1, 0, 0.0)); break;
150+
case "AKCK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 7), hashSet, null, null, null), 1, 0, 0.0)); break;
151+
case "CKBK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 6), hashSet, null, null, null), 1, 0, 0.0)); break;
152+
case "AKCKBK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 5), hashSet, null, null, null), 1, 0, 0.0)); break;
153+
case "DK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 4), hashSet, null, null, null), 1, 0, 0.2)); break;
154+
case "DKCK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 3), hashSet, null, null, null), 1, 0, 0.4)); break;
155+
case "AAAAK": psms.Add(new NewPsmWithFdr(new PSMwithProteinHashSet(new ClassicSpectrumMatch(peptide, null, 0, 0, 0, 0, 0, 0, 0, 0, 2), hashSet, null, null, null), 1, 0, 0.33)); break;
156+
}
157+
}
158+
}
159+
160+
ae.ScoreProteinGroups(proteinGroups, psms);
161+
ae.DoProteinFdr(proteinGroups);
162+
163+
124164
/*
125165
// prints initial dictionary
126166
List<Protein> proteinList = new List<Protein>();
127167
128-
Console.WriteLine("----Initial Dictionary----");
129-
Console.WriteLine("PEPTIDE\t\t\tPROTEIN\t\t\tPeptideWithSetModifications");
168+
System.Console.WriteLine("----Initial Dictionary----");
169+
System.Console.WriteLine("PEPTIDE\t\t\tPROTEIN\t\t\tPeptideWithSetModifications");
130170
foreach (var kvp in initialDictionary)
131171
{
132172
proteinList = new List<Protein>();
133-
Console.Write(string.Join("", kvp.Key.BaseSequence.Select(b => char.ConvertFromUtf32(b))) + " \t\t\t ");
173+
System.Console.Write(string.Join("", kvp.Key.BaseSequence.Select(b => char.ConvertFromUtf32(b))) + " \t\t\t ");
134174
foreach (var peptide in kvp.Value)
135175
{
136-
if (!proteinList.Contains(peptide.protein))
176+
if (!proteinList.Contains(peptide.Protein))
137177
{
138-
Console.Write(peptide.protein.BaseSequence + " ;; ");
139-
proteinList.Add(peptide.protein);
178+
System.Console.Write(peptide.Protein.BaseSequence + " ;; ");
179+
proteinList.Add(peptide.Protein);
140180
}
141181
}
142-
Console.WriteLine();
182+
System.Console.WriteLine();
143183
}
144184
145185
// prints parsimonious dictionary
146-
Console.WriteLine("----Parsimonious Dictionary----");
147-
Console.WriteLine("PEPTIDE\t\t\tPROTEIN\t\t\tPeptideWithSetModifications");
148-
foreach (var kvp in parsimonyTest)
186+
System.Console.WriteLine("----Parsimonious Dictionary----");
187+
System.Console.WriteLine("PEPTIDE\t\t\tPROTEIN\t\t\tPeptideWithSetModifications");
188+
foreach (var kvp in dictionary)
149189
{
150190
proteinList = new List<Protein>();
151-
Console.Write(string.Join("", kvp.Key.BaseSequence.Select(b => char.ConvertFromUtf32(b))) + " \t\t\t ");
191+
System.Console.Write(string.Join("", kvp.Key.BaseSequence.Select(b => char.ConvertFromUtf32(b))) + " \t\t\t ");
152192
foreach (var peptide in kvp.Value)
153193
{
154-
if (!proteinList.Contains(peptide.protein))
194+
if (!proteinList.Contains(peptide.Protein))
155195
{
156-
Console.Write(peptide.protein.BaseSequence + " ;; ");
157-
proteinList.Add(peptide.protein);
196+
System.Console.Write(peptide.Protein.BaseSequence + " ;; ");
197+
proteinList.Add(peptide.Protein);
158198
}
159199
}
160-
Console.WriteLine();
200+
System.Console.WriteLine();
201+
}
202+
203+
204+
// prints protein groups after scoring/fdr
205+
System.Console.WriteLine(ProteinGroup.TabSeparatedHeader);
206+
foreach (var proteinGroup in proteinGroups)
207+
{
208+
System.Console.WriteLine(proteinGroup);
161209
}
162210
*/
163211

164212
Assert.That(parsimonyProteinList.Count == 3);
165213
Assert.That(parsimonyBaseSequences.Contains(sequence1));
166214
Assert.That(parsimonyBaseSequences.Contains(sequence2));
167215
Assert.That(parsimonyBaseSequences.Contains(sequence3));
216+
Assert.That(proteinGroups.Count == 2);
217+
Assert.That(proteinGroups.First().proteinGroupScore > 10);
168218
}
169219

170220
#endregion Public Methods

0 commit comments

Comments
 (0)