Skip to content

Commit 32fa2f0

Browse files
author
stefanks
authored
Merge pull request #252 from smith-chem-wisc/StefanBranch
Stefan branch
2 parents d6b87b7 + df5c14b commit 32fa2f0

37 files changed

+428
-225
lines changed

CMD/CMD.csproj

+18-27
Original file line numberDiff line numberDiff line change
@@ -35,41 +35,34 @@
3535
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
3636
</PropertyGroup>
3737
<ItemGroup>
38-
<Reference Include="Chemistry, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
39-
<HintPath>..\packages\mzLib.1.0.117\lib\Chemistry.dll</HintPath>
40-
<Private>True</Private>
38+
<Reference Include="Chemistry, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
39+
<HintPath>..\packages\mzLib.1.0.121\lib\Chemistry.dll</HintPath>
4140
</Reference>
42-
<Reference Include="MassSpectrometry, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
43-
<HintPath>..\packages\mzLib.1.0.117\lib\MassSpectrometry.dll</HintPath>
44-
<Private>True</Private>
41+
<Reference Include="MassSpectrometry, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
42+
<HintPath>..\packages\mzLib.1.0.121\lib\MassSpectrometry.dll</HintPath>
4543
</Reference>
4644
<Reference Include="MathNet.Numerics, Version=3.17.0.0, Culture=neutral, processorArchitecture=MSIL">
4745
<HintPath>..\packages\MathNet.Numerics.3.17.0\lib\net40\MathNet.Numerics.dll</HintPath>
4846
<Private>True</Private>
4947
</Reference>
50-
<Reference Include="mzIdentML, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
51-
<HintPath>..\packages\mzLib.1.0.117\lib\mzIdentML.dll</HintPath>
52-
<Private>True</Private>
48+
<Reference Include="mzIdentML, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
49+
<HintPath>..\packages\mzLib.1.0.121\lib\mzIdentML.dll</HintPath>
5350
</Reference>
54-
<Reference Include="MzLibUtil, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
55-
<HintPath>..\packages\mzLib.1.0.117\lib\MzLibUtil.dll</HintPath>
56-
<Private>True</Private>
51+
<Reference Include="MzLibUtil, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
52+
<HintPath>..\packages\mzLib.1.0.121\lib\MzLibUtil.dll</HintPath>
5753
</Reference>
58-
<Reference Include="MzML, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
59-
<HintPath>..\packages\mzLib.1.0.117\lib\MzML.dll</HintPath>
60-
<Private>True</Private>
54+
<Reference Include="MzML, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
55+
<HintPath>..\packages\mzLib.1.0.121\lib\MzML.dll</HintPath>
6156
</Reference>
6257
<Reference Include="NetSerializer, Version=4.1.0.0, Culture=neutral, processorArchitecture=MSIL">
6358
<HintPath>..\packages\NetSerializer.4.1.0\lib\net45\NetSerializer.dll</HintPath>
6459
<Private>True</Private>
6560
</Reference>
66-
<Reference Include="Proteomics, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
67-
<HintPath>..\packages\mzLib.1.0.117\lib\Proteomics.dll</HintPath>
68-
<Private>True</Private>
61+
<Reference Include="Proteomics, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
62+
<HintPath>..\packages\mzLib.1.0.121\lib\Proteomics.dll</HintPath>
6963
</Reference>
70-
<Reference Include="Spectra, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
71-
<HintPath>..\packages\mzLib.1.0.117\lib\Spectra.dll</HintPath>
72-
<Private>True</Private>
64+
<Reference Include="Spectra, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
65+
<HintPath>..\packages\mzLib.1.0.121\lib\Spectra.dll</HintPath>
7366
</Reference>
7467
<Reference Include="System" />
7568
<Reference Include="System.Core" />
@@ -80,13 +73,11 @@
8073
<Reference Include="System.Data" />
8174
<Reference Include="System.Net.Http" />
8275
<Reference Include="System.Xml" />
83-
<Reference Include="Thermo, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
84-
<HintPath>..\packages\mzLib.1.0.117\lib\Thermo.dll</HintPath>
85-
<Private>True</Private>
76+
<Reference Include="Thermo, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
77+
<HintPath>..\packages\mzLib.1.0.121\lib\Thermo.dll</HintPath>
8678
</Reference>
87-
<Reference Include="UsefulProteomicsDatabases, Version=1.0.117.0, Culture=neutral, processorArchitecture=AMD64">
88-
<HintPath>..\packages\mzLib.1.0.117\lib\UsefulProteomicsDatabases.dll</HintPath>
89-
<Private>True</Private>
79+
<Reference Include="UsefulProteomicsDatabases, Version=1.0.121.0, Culture=neutral, processorArchitecture=AMD64">
80+
<HintPath>..\packages\mzLib.1.0.121\lib\UsefulProteomicsDatabases.dll</HintPath>
9081
</Reference>
9182
<Reference Include="Zlib.Portable, Version=1.11.0.0, Culture=neutral, processorArchitecture=MSIL">
9283
<HintPath>..\packages\Zlib.Portable.1.11.0\lib\portable-net4+sl5+wp8+win8+wpa81+MonoTouch+MonoAndroid\Zlib.Portable.dll</HintPath>

CMD/packages.config

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<packages>
33
<package id="MathNet.Numerics" version="3.17.0" targetFramework="net451" />
4-
<package id="mzLib" version="1.0.117" targetFramework="net451" />
4+
<package id="mzLib" version="1.0.121" targetFramework="net451" />
55
<package id="NetSerializer" version="4.1.0" targetFramework="net451" />
66
<package id="Zlib.Portable" version="1.11.0" targetFramework="net451" />
77
</packages>

EngineLayer/Analysis/AnalysisEngine.cs

+36-4
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ public void RunQuantification(List<NewPsmWithFdr> psms, double rtTolerance, doub
512512
// calculate apex intensity
513513
var rt1 = kvp.Value.Select(r => r.thisPSM.newPsm.scanRetentionTime).Min();
514514
var rt2 = kvp.Value.Select(r => r.thisPSM.newPsm.scanRetentionTime).Max();
515-
515+
516516
double theoreticalMz = Chemistry.ClassExtensions.ToMz(kvp.Value.First().thisPSM.PeptideMonoisotopicMass, kvp.Value.First().thisPSM.newPsm.scanPrecursorCharge);
517517

518518
double mzTol = ((ppmTolerance / 1e6) * kvp.Value.First().thisPSM.PeptideMonoisotopicMass) / kvp.Value.First().thisPSM.newPsm.scanPrecursorCharge;
@@ -534,10 +534,10 @@ public void RunQuantification(List<NewPsmWithFdr> psms, double rtTolerance, doub
534534
}
535535

536536
double apexIntensity = 0;
537-
if(intensities.Any())
537+
if (intensities.Any())
538538
apexIntensity = intensities.Max();
539539

540-
foreach(var p in kvp.Value)
540+
foreach (var p in kvp.Value)
541541
p.thisPSM.newPsm.apexIntensity = apexIntensity;
542542

543543
// calculate full width half max (peak quality)
@@ -658,6 +658,8 @@ protected override MyResults RunSpecific()
658658
}
659659

660660
List<NewPsmWithFdr>[] allResultingIdentifications = new List<NewPsmWithFdr>[searchModes.Count];
661+
Dictionary<string, int>[] allModsSeen = new Dictionary<string, int>[searchModes.Count];
662+
Dictionary<string, int>[] allModsOnPeptides = new Dictionary<string, int>[searchModes.Count];
661663

662664
for (int j = 0; j < searchModes.Count; j++)
663665
{
@@ -676,6 +678,35 @@ protected override MyResults RunSpecific()
676678
Status("Running FDR analysis...", nestedIds);
677679
var orderedPsmsWithFDR = DoFalseDiscoveryRateAnalysis(orderedPsmsWithPeptides, searchModes[j]);
678680

681+
Status("Running modification analysis...", nestedIds);
682+
683+
Dictionary<string, int> modsSeen = new Dictionary<string, int>();
684+
Dictionary<string, int> modsOnPeptides = new Dictionary<string, int>();
685+
686+
// For now analyze only psms with a single option
687+
foreach (var highConfidencePSM in orderedPsmsWithFDR.Where(b => (b.qValue <= 0.01 && b.thisPSM.peptidesWithSetModifications.Count == 1)))
688+
{
689+
var singlePeptide = highConfidencePSM.thisPSM.peptidesWithSetModifications.First();
690+
var modsIdentified = singlePeptide.allModsOneIsNterminus;
691+
foreach (var modSeen in modsIdentified)
692+
{
693+
if (modsSeen.ContainsKey(modSeen.Value.id))
694+
modsSeen[modSeen.Value.id]++;
695+
else
696+
modsSeen.Add(modSeen.Value.id, 1);
697+
}
698+
var modsInProtein = singlePeptide.Protein.OneBasedPossibleLocalizedModifications.Where(b => b.Key >= singlePeptide.OneBasedStartResidueInProtein && b.Key <= singlePeptide.OneBasedEndResidueInProtein).SelectMany(b => b.Value);
699+
foreach (var modInProtein in modsInProtein)
700+
{
701+
if (modsOnPeptides.ContainsKey(modInProtein.id))
702+
modsOnPeptides[modInProtein.id]++;
703+
else
704+
modsOnPeptides.Add(modInProtein.id, 1);
705+
}
706+
}
707+
allModsSeen[j] = modsSeen;
708+
allModsOnPeptides[j] = modsOnPeptides;
709+
679710
if (quantify)
680711
{
681712
Status("Quantifying peptides...", nestedIds);
@@ -695,7 +726,6 @@ protected override MyResults RunSpecific()
695726
writeHistogramPeaksAction(myTreeStructure, searchModes[j].FileNameAddition);
696727
}
697728
}
698-
699729
else
700730
{
701731
Status("Running FDR analysis on unique peptides...", nestedIds);
@@ -715,6 +745,8 @@ protected override MyResults RunSpecific()
715745

716746
myAnalysisResults.AllResultingIdentifications = allResultingIdentifications;
717747
myAnalysisResults.ProteinGroups = proteinGroups;
748+
myAnalysisResults.allModsSeen = allModsSeen;
749+
myAnalysisResults.allModsOnPeptides = allModsOnPeptides;
718750
return myAnalysisResults;
719751
}
720752

EngineLayer/Analysis/AnalysisResults.cs

+16
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ namespace EngineLayer.Analysis
88
public class AnalysisResults : MyResults
99
{
1010

11+
#region Internal Fields
12+
13+
internal Dictionary<string, int>[] allModsOnPeptides;
14+
internal Dictionary<string, int>[] allModsSeen;
15+
16+
#endregion Internal Fields
17+
1118
#region Private Fields
1219

1320
private string output;
@@ -54,6 +61,15 @@ public override string ToString()
5461

5562
sb.AppendLine("All proteins within 1% FDR: " + string.Join(", ", numProteinsList));
5663
}
64+
65+
for (int i = 0; i < allModsOnPeptides.Length; i++)
66+
{
67+
sb.AppendLine("Search mode " + i + " Mods seen:");
68+
sb.AppendLine(string.Join(Environment.NewLine, allModsSeen[i].OrderBy(b => -b.Value).Select(b => b.Key + " : " + b.Value)));
69+
sb.AppendLine("Search mode " + i + " Mods on proteins:");
70+
sb.AppendLine(string.Join(Environment.NewLine, allModsOnPeptides[i].OrderBy(b => -b.Value).Select(b => b.Key + " : " + b.Value)));
71+
}
72+
5773
sb.Append(output);
5874

5975
return sb.ToString();

EngineLayer/Analysis/Bin.cs

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ public class Bin
1313
public string psimodID = "-";
1414
public string uniprotID = "-";
1515
public string UnimodFormulas = "-";
16+
public string UnimodDiffs = "-";
1617
public string AA = "-";
1718
public string combos = "-";
1819
public Dictionary<char, int> residueCount;

EngineLayer/Analysis/BinTreeStructure.cs

+10-2
Original file line numberDiff line numberDiff line change
@@ -292,17 +292,20 @@ private void IdentifyUnimodBins(double v)
292292
{
293293
var ok = new HashSet<string>();
294294
var okformula = new HashSet<string>();
295+
var okDiff = new HashSet<double>();
295296
foreach (var hm in MyEngine.UnimodDeserialized)
296297
{
297298
var theMod = hm as ModificationWithMassAndCf;
298299
if (Math.Abs(theMod.monoisotopicMass - bin.MassShift) <= v)
299300
{
300301
ok.Add(hm.id);
301302
okformula.Add(theMod.chemicalFormula.Formula);
303+
okDiff.Add(theMod.monoisotopicMass - bin.MassShift);
302304
}
303305
}
304306
bin.UnimodId = string.Join(" or ", ok);
305307
bin.UnimodFormulas = string.Join(" or ", okformula);
308+
bin.UnimodDiffs = string.Join(" or ", okDiff);
306309
}
307310
}
308311

@@ -379,16 +382,21 @@ private void IdentifyMine(double v)
379382
myInfos.Add(new MyInfo(0, "Exact match!"));
380383
myInfos.Add(new MyInfo(-48.128629, "Phosphorylation-Lysine: Probably reverse is the correct match"));
381384
myInfos.Add(new MyInfo(-76.134779, "Phosphorylation-Arginine: Probably reverse is the correct match"));
382-
myInfos.Add(new MyInfo(1.003, "1 MM"));
385+
myInfos.Add(new MyInfo(1.0025, "1 MM"));
383386
myInfos.Add(new MyInfo(2.005, "2 MM"));
384-
myInfos.Add(new MyInfo(3.008, "3 MM"));
387+
myInfos.Add(new MyInfo(3.0075, "3 MM"));
385388
myInfos.Add(new MyInfo(173.051055, "Acetylation + Methionine: Usually on protein N terminus"));
386389
myInfos.Add(new MyInfo(-91.009185, "neg Carbamidomethylation - H2S: Usually on cysteine."));
387390
myInfos.Add(new MyInfo(-32.008456, "oxidation and then loss of oxidized M side chain"));
388391
myInfos.Add(new MyInfo(-79.966331, "neg Phosphorylation."));
389392
myInfos.Add(new MyInfo(189.045969, "Carboxymethylated + Methionine. Usually on protein N terminus"));
390393
myInfos.Add(new MyInfo(356.20596, "Lysine+V+E or Lysine+L+D"));
391394
myInfos.Add(new MyInfo(239.126988, "Lysine+H(5) C(5) N O(2), possibly Nmethylmaleimide"));
395+
myInfos.Add(new MyInfo(-105.02484, "Methionine loss then acetaldehyde"));
396+
myInfos.Add(new MyInfo(52.911464, "Fe[III]"));
397+
398+
399+
392400
foreach (Bin bin in FinalBins)
393401
{
394402
bin.Mine = "";

EngineLayer/ClassicSearch/ClassicSearchEngine.cs

+6-3
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,13 @@ public class ClassicSearchEngine : MyEngine
3939
private readonly List<ProductType> lp;
4040
private readonly List<string> nestedIds;
4141

42+
private readonly bool conserveMemory;
43+
4244
#endregion Private Fields
4345

4446
#region Public Constructors
4547

46-
public ClassicSearchEngine(LocalMS2Scan[] arrayOfSortedMS2Scans, int myMsDataFileNumSpectra, List<ModificationWithMass> variableModifications, List<ModificationWithMass> fixedModifications, List<Protein> proteinList, Tolerance productMassTolerance, Protease protease, List<SearchMode> searchModes, int maximumMissedCleavages, int maximumVariableModificationIsoforms, string fileName, List<ProductType> lp, List<string> nestedIds)
48+
public ClassicSearchEngine(LocalMS2Scan[] arrayOfSortedMS2Scans, int myMsDataFileNumSpectra, List<ModificationWithMass> variableModifications, List<ModificationWithMass> fixedModifications, List<Protein> proteinList, Tolerance productMassTolerance, Protease protease, List<SearchMode> searchModes, int maximumMissedCleavages, int maximumVariableModificationIsoforms, string fileName, List<ProductType> lp, List<string> nestedIds, bool conserveMemory)
4749
{
4850
this.arrayOfSortedMS2Scans = arrayOfSortedMS2Scans;
4951
this.myScanPrecursorMasses = arrayOfSortedMS2Scans.Select(b => b.MonoisotopicPrecursorMass).ToArray();
@@ -59,6 +61,7 @@ public ClassicSearchEngine(LocalMS2Scan[] arrayOfSortedMS2Scans, int myMsDataFil
5961
this.fileName = fileName;
6062
this.lp = lp;
6163
this.nestedIds = nestedIds;
64+
this.conserveMemory = conserveMemory;
6265
}
6366

6467
#endregion Public Constructors
@@ -101,7 +104,7 @@ protected override MyResults RunSpecific()
101104
if (peptide.Length <= 1)
102105
continue;
103106

104-
if (peptide.numLocMods == 0)
107+
if (peptide.numLocMods == 0 && !conserveMemory)
105108
{
106109
var hc = peptide.BaseLeucineSequence;
107110
var observed = level3_observed.Contains(hc);
@@ -119,7 +122,7 @@ protected override MyResults RunSpecific()
119122
var ListOfModifiedPeptides = peptide.GetPeptidesWithSetModifications(variableModifications, maximumVariableModificationIsoforms, max_mods_for_peptide).ToList();
120123
foreach (var yyy in ListOfModifiedPeptides)
121124
{
122-
if (peptide.numLocMods > 0)
125+
if (peptide.numLocMods > 0 && !conserveMemory)
123126
{
124127
var hc = yyy.Sequence;
125128
var observed = level4_observed.Contains(hc);

EngineLayer/Data/combos.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,7 @@
1313
-17.026549 0.984016
1414
21.981943 21.981943
1515
14.015650 0.984016
16-
79.966331 79.966331
16+
79.966331 79.966331
17+
15.994915 -18.010565
18+
42.010565 21.981943
19+
-17.026549 21.981943

0 commit comments

Comments
 (0)