Skip to content

Commit a0be807

Browse files
author
stefanks
authored
Merge branch 'master' into master
2 parents 839675e + bcc707e commit a0be807

File tree

8 files changed

+61
-33
lines changed

8 files changed

+61
-33
lines changed

InternalLogic/AnalysisEngine.cs

+34-11
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ public class AnalysisEngine : MyEngine
1616

1717
#region Private Fields
1818

19-
private const double comboThresholdMultiplier = 3;
2019
private const int max_mods_for_peptide = 3;
2120
private readonly double binTol;
2221
private readonly int maximumMissedCleavages;
@@ -281,7 +280,6 @@ public Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> ApplyPro
281280
proteinListHere.Add(kvp.Key);
282281
proteinGroups.Add(new ProteinGroup(proteinListHere, kvp.Value, uniquePeptidesHere));
283282
}
284-
285283

286284
// grab indistinguishable proteins
287285
foreach (var proteinGroup in proteinGroups)
@@ -409,15 +407,14 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
409407
}
410408

411409
// add every psm that corresponds to the protein group's peptides to the group
412-
foreach(var proteinGroup in proteinGroups)
410+
foreach (var proteinGroup in proteinGroups)
413411
{
414-
foreach(var peptide in proteinGroup.PeptideList)
412+
foreach (var peptide in proteinGroup.PeptideList)
415413
{
416414
string peptideBaseSequence = string.Join("", peptide.BaseSequence.Select(b => char.ConvertFromUtf32(b)));
417415
List<NewPsmWithFdr> psmListForThisBaseSeq;
418416

419417
peptideBaseSeqToPsmMatching.TryGetValue(peptideBaseSequence, out psmListForThisBaseSeq);
420-
421418
if (psmListForThisBaseSeq != null)
422419
{
423420
foreach (var psm in psmListForThisBaseSeq)
@@ -511,26 +508,26 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
511508

512509
// score the group (scoring algorithm defined in the ProteinGroup class)
513510
proteinGroup.ScoreThisProteinGroup();
514-
511+
515512
// remove empty protein groups (peptides were too poor quality and group doesn't exist anymore)
516513
if (proteinGroup.proteinGroupScore == 0)
517514
proteinGroupsToRemove.Add(proteinGroup);
518515
}
519516

520-
foreach(var proteinGroup in proteinGroupsToRemove)
517+
foreach (var proteinGroup in proteinGroupsToRemove)
521518
{
522519
proteinGroups.Remove(proteinGroup);
523520
}
524521

525522
// build razor peptide list (peptides that have >1 protein groups in the final protein group list)
526-
foreach(var kvp in peptideToProteinGroupMatching)
523+
foreach (var kvp in peptideToProteinGroupMatching)
527524
{
528525
if (kvp.Value.Count > 1)
529526
{
530527
allRazorPeptides.Add(kvp.Key);
531528
}
532529
}
533-
530+
534531
foreach (var proteinGroup in proteinGroups)
535532
{
536533
foreach (var peptide in proteinGroup.PeptideList)
@@ -544,7 +541,7 @@ public void ScoreProteinGroups(List<ProteinGroup> proteinGroups, List<NewPsmWith
544541
// build PeptideWithSetMod list to calc sequence coverage
545542
HashSet<PeptideWithSetModifications> peptidesWithSetMods = null;
546543
compactPeptideToProteinPeptideMatching.TryGetValue(peptide, out peptidesWithSetMods);
547-
foreach(var pep in peptidesWithSetMods)
544+
foreach (var pep in peptidesWithSetMods)
548545
{
549546
proteinGroup.PeptideWithSetModsList.Add(pep);
550547
}
@@ -659,6 +656,28 @@ protected override MyResults RunSpecific()
659656

660657
#region Private Methods
661658

659+
private static void OverlappingIonSequences(BinTreeStructure myTreeStructure)
660+
{
661+
foreach (Bin bin in myTreeStructure.FinalBins)
662+
{
663+
foreach (var hm in bin.uniquePSMs.Where(b => !b.Value.Item3.IsDecoy))
664+
{
665+
var ya = hm.Value.Item3.thisPSM.newPsm.matchedIonsList;
666+
if (ya.ContainsKey(ProductType.B) && ya.ContainsKey(ProductType.Y) && ya[ProductType.B].Any(b => b > 0) && ya[ProductType.Y].Any(b => b > 0))
667+
if (ya[ProductType.B].Last(b => b > 0) + ya[ProductType.Y].Last(b => b > 0) > hm.Value.Item3.thisPSM.PeptideMonoisotopicMass)
668+
bin.Overlapping++;
669+
}
670+
}
671+
}
672+
673+
private static void IdentifyPsmsWithMaxMods(BinTreeStructure myTreeStructure)
674+
{
675+
foreach (Bin bin in myTreeStructure.FinalBins)
676+
{
677+
bin.FracWithMaxMods = ((double)bin.uniquePSMs.Values.Count(b => !b.Item3.IsDecoy && b.Item3.thisPSM.NumVariableMods == max_mods_for_peptide)) / bin.CountTarget;
678+
}
679+
}
680+
662681
private static void IdentifyAAsInCommon(BinTreeStructure myTreeStructure)
663682
{
664683
foreach (Bin bin in myTreeStructure.FinalBins)
@@ -796,7 +815,7 @@ private static void IdentifyCombos(BinTreeStructure myTreeStructure, double v)
796815
var ok = new HashSet<Tuple<double, double, double>>();
797816
foreach (var bin in myTreeStructure.FinalBins.Where(b => Math.Abs(b.MassShift) > v))
798817
foreach (var bin2 in myTreeStructure.FinalBins.Where(b => Math.Abs(b.MassShift) > v))
799-
if (bin.CountTarget * bin2.CountTarget >= totalTargetCount * comboThresholdMultiplier)
818+
if (bin.CountTarget * bin2.CountTarget >= totalTargetCount)
800819
ok.Add(new Tuple<double, double, double>(bin.MassShift, bin2.MassShift, Math.Min(bin.CountTarget, bin2.CountTarget)));
801820

802821
foreach (var bin in myTreeStructure.FinalBins)
@@ -900,6 +919,10 @@ private static BinTreeStructure MyAnalysis(List<NewPsmWithFdr> limitedpsms_with_
900919

901920
IdentifyMine(myTreeStructure, binTol);
902921

922+
IdentifyPsmsWithMaxMods(myTreeStructure);
923+
924+
OverlappingIonSequences(myTreeStructure);
925+
903926
return myTreeStructure;
904927
}
905928

InternalLogic/Bin.cs

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ namespace InternalLogicEngineLayer
66
{
77
public class Bin
88
{
9+
910
#region Public Fields
1011

1112
public string UnimodId = "-";
@@ -70,6 +71,8 @@ public int LocalizeableTarget
7071

7172
public string Mine { get; internal set; }
7273
public Dictionary<char, int> AAsInCommon { get; internal set; }
74+
public double FracWithMaxMods { get; internal set; }
75+
public int Overlapping { get; internal set; }
7376

7477
#endregion Public Properties
7578

@@ -97,5 +100,6 @@ internal void Add(NewPsmWithFdr ok)
97100
}
98101

99102
#endregion Internal Methods
103+
100104
}
101105
}

InternalLogic/PSMwithProteinHashSet.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ public override string ToString()
174174

175175
#region Internal Methods
176176

177-
internal static double MatchIons(IMsDataScan<IMzSpectrum<MzPeak>> thisScan, Tolerance product_mass_tolerance_value, double[] sorted_theoretical_product_masses_for_this_peptide, double[] matchedIonsList)
177+
internal static double MatchIons(IMsDataScan<IMzSpectrum<MzPeak>> thisScan, Tolerance product_mass_tolerance_value, double[] sorted_theoretical_product_masses_for_this_peptide, double[] matchedIonMassesList)
178178
{
179179
var TotalProductsHere = sorted_theoretical_product_masses_for_this_peptide.Length;
180180
if (TotalProductsHere == 0)
@@ -199,12 +199,12 @@ internal static double MatchIons(IMsDataScan<IMzSpectrum<MzPeak>> thisScan, Tole
199199
{
200200
MatchingProductsHere++;
201201
MatchingIntensityHere += experimental_intensities[i];
202-
matchedIonsList[theoreticalIndex] = nextTheoreticalMass;
202+
matchedIonMassesList[theoreticalIndex] = nextTheoreticalMass;
203203
}
204204
else if (currentExperimentalMZ < nextTheoreticalMZ)
205205
continue;
206206
else
207-
matchedIonsList[theoreticalIndex] = -nextTheoreticalMass;
207+
matchedIonMassesList[theoreticalIndex] = -nextTheoreticalMass;
208208
i--;
209209
// Passed a theoretical! Move counter forward
210210
theoreticalIndex++;

InternalLogicWithFileIO/Parent/MyTaskEngine.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -368,14 +368,13 @@ protected IEnumerable<Protein> GetProteins(bool onTheFlyDecoys, IDictionary<stri
368368
while (true)
369369
{
370370
string line = fasta.ReadLine();
371-
372371

373372
if (line.StartsWith(">"))
374373
{
375374
// fasta protein only has accession, fullname, sequence (no mods)
376375
string[] delimiters = { ">", "|", " OS=" };
377376
string[] output = line.Split(delimiters, StringSplitOptions.None);
378-
if(output.Length > 4)
377+
if (output.Length > 4)
379378
{
380379
accession = output[2];
381380
name = accession;
@@ -387,11 +386,10 @@ protected IEnumerable<Protein> GetProteins(bool onTheFlyDecoys, IDictionary<stri
387386
full_name = line.Substring(1);
388387
accession = "";
389388
}
390-
389+
391390
// new protein
392391
sequence = "";
393392
}
394-
395393
else
396394
{
397395
sequence += line.Trim();
@@ -489,7 +487,7 @@ protected void WriteTree(BinTreeStructure myTreeStructure, string output_folder,
489487
var writtenFile = Path.Combine(output_folder, fileName + ".mytsv");
490488
using (StreamWriter output = new StreamWriter(writtenFile))
491489
{
492-
output.WriteLine("MassShift\tCount\tCountDecoy\tCountTarget\tCountLocalizeableTarget\tCountNonLocalizeableTarget\tFDR\tArea 0.01t\tArea 0.255\tFracLocalizeableTarget\tMine\tUnimodID\tUnimodFormulas\tAA\tCombos\tModsInCommon\tAAsInCommon\tResidues\tNtermLocFrac\tCtermLocFrac\tUniprot");
490+
output.WriteLine("MassShift\tCount\tCountDecoy\tCountTarget\tCountLocalizeableTarget\tCountNonLocalizeableTarget\tFDR\tArea 0.01t\tArea 0.255\tFracLocalizeableTarget\tMine\tUnimodID\tUnimodFormulas\tAA\tCombos\tModsInCommon\tAAsInCommon\tResidues\tNtermLocFrac\tCtermLocFrac\tFracWithMaxMods\tOverlappingFrac\tUniprot");
493491
foreach (Bin bin in myTreeStructure.FinalBins.OrderByDescending(b => b.Count))
494492
{
495493
output.WriteLine(bin.MassShift.ToString("F3", CultureInfo.InvariantCulture)
@@ -512,6 +510,8 @@ protected void WriteTree(BinTreeStructure myTreeStructure, string output_folder,
512510
+ "\t" + string.Join(",", bin.residueCount.OrderByDescending(b => b.Value).Select(b => b.Key + ":" + b.Value))
513511
+ "\t" + (bin.LocalizeableTarget == 0 ? double.NaN : (double)bin.NlocCount / bin.LocalizeableTarget).ToString("F3", CultureInfo.InvariantCulture)
514512
+ "\t" + (bin.LocalizeableTarget == 0 ? double.NaN : (double)bin.ClocCount / bin.LocalizeableTarget).ToString("F3", CultureInfo.InvariantCulture)
513+
+ "\t" + (bin.FracWithMaxMods).ToString("F3", CultureInfo.InvariantCulture)
514+
+ "\t" + ((double)bin.Overlapping / bin.CountTarget).ToString("F3", CultureInfo.InvariantCulture)
515515
+ "\t" + bin.uniprotID);
516516
}
517517
}

OldInternalLogic/OldInternalLogic.csproj

+5-5
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,10 @@
116116
</ItemGroup>
117117
<ItemGroup>
118118
<Content Include="combos.txt">
119-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
119+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
120120
</Content>
121121
<Content Include="f.txt">
122-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
122+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
123123
</Content>
124124
<Content Include="glyco.txt">
125125
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
@@ -128,13 +128,13 @@
128128
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
129129
</Content>
130130
<Content Include="r.txt">
131-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
131+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
132132
</Content>
133133
<Content Include="s.txt">
134-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
134+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
135135
</Content>
136136
<Content Include="v.txt">
137-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
137+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
138138
</Content>
139139
</ItemGroup>
140140
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />

OldInternalLogic/ProteomeDatabaseReader.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
using System.Collections.Generic;
44
using System.IO;
55
using System.IO.Compression;
6-
using System.Text;
76
using System.Xml;
87

98
namespace OldInternalLogic
@@ -28,6 +27,7 @@ static ProteomeDatabaseReader()
2827
modificationTypeCodes.Add("Peptide N-terminal.", ModificationType.PeptideNTerminus);
2928
modificationTypeCodes.Add("Peptide C-terminal.", ModificationType.PeptideCTerminus);
3029
modificationTypeCodes.Add("Anywhere.", ModificationType.AminoAcidResidue);
30+
modificationTypeCodes.Add("Protein core.", ModificationType.AminoAcidResidue);
3131

3232
aminoAcidCodes = new Dictionary<string, char>();
3333
aminoAcidCodes.Add("Alanine", 'A');
@@ -124,7 +124,7 @@ public static IEnumerable<MorpheusModification> ReadModFile(string v)
124124
case "PP":
125125
if (feature_type == "MOD_RES")
126126
{
127-
modificationTypeCodes.TryGetValue(line.Substring(5), out modification_type);
127+
modification_type = modificationTypeCodes[line.Substring(5)];
128128
}
129129
break;
130130

@@ -156,9 +156,9 @@ public static IEnumerable<MorpheusModification> ReadModFile(string v)
156156
if (Math.Abs(monoisotopic_mass_shift - ye.MonoisotopicMass) > 1e-3)
157157
throw new InvalidDataException("In file" + v + " Modification " + description + " mass formula mismatch");
158158
if (labileOrSticky.Equals("Labile") || labileOrSticky.Equals("Both"))
159-
yield return new MorpheusModification(description, modification_type, amino_acid_residue, Path.GetFileNameWithoutExtension(v), database_name, prevAA, alternative_mass, true, ye);
159+
yield return new MorpheusModification(description, modification_type, amino_acid_residue, Path.GetFileNameWithoutExtension(v), database_name, prevAA, alternative_mass, true, ye);
160160
if (labileOrSticky.Equals("Sticky") || labileOrSticky.Equals("Both"))
161-
yield return new MorpheusModification(description, modification_type, amino_acid_residue, Path.GetFileNameWithoutExtension(v), database_name, prevAA, alternative_mass, false, ye);
161+
yield return new MorpheusModification(description, modification_type, amino_acid_residue, Path.GetFileNameWithoutExtension(v), database_name, prevAA, alternative_mass, false, ye);
162162
}
163163
description = null;
164164
feature_type = null;

OldInternalLogic/combos.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@
1212
-17.02655 15.994915
1313
-17.026549 0.984016
1414
21.981943 21.981943
15-
14.015650 0.984016
15+
14.015650 0.984016
16+
79.966331 79.966331

OldInternalLogic/m.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ CF H-3 N-1
7474
ID Loss of two hydrogens from N term M
7575
FT MOD_RES
7676
TG Methionine
77-
PP Protein N-terminal.
77+
PP N-terminal.
7878
SL Sticky
7979
MM -2.016
8080
CF H-2
@@ -469,7 +469,7 @@ CF C6 H12 N2 O1
469469
ID Acetylation+Methionine
470470
FT MOD_RES
471471
TG Any
472-
PP Protein N-terminal.
472+
PP N-terminal.
473473
SL Sticky
474474
MM 173.051055
475475
CF C7 H11 N1 O2 S
@@ -485,7 +485,7 @@ CF H24 C15
485485
ID Myristoylation
486486
FT MOD_RES
487487
TG Glycine
488-
PP Protein N-terminal.
488+
PP N-terminal.
489489
SL Both
490490
MM 210.198366
491491
CF H26 C14 O1
@@ -541,7 +541,7 @@ CF H30 C16 O1
541541
ID Palmitoylation
542542
FT MOD_RES
543543
TG Any
544-
PP Protein N-terminal.
544+
PP N-terminal.
545545
SL Both
546546
MM 238.229666
547547
CF H30 C16 O1

0 commit comments

Comments
 (0)