Skip to content

Commit fc72929

Browse files
authored
Revert "PrSM output for top-down (#2062)" (#2079)
This reverts commit 97335a4.
1 parent 97335a4 commit fc72929

11 files changed

+27
-4602
lines changed

EngineLayer/GlobalVariables.cs

+2-8
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,7 @@ public static class GlobalVariables
3232
private static char[] _InvalidAminoAcids;
3333

3434
// this affects output labels, etc. and can be changed to "Proteoform" for top-down searches
35-
public static Analyte AnalyteType;
36-
37-
public enum Analyte
38-
{
39-
Peptide,
40-
Proteoform
41-
}
35+
public static string AnalyteType;
4236

4337
public static List<string> ErrorsReadingMods;
4438

@@ -69,7 +63,7 @@ public static void SetUpGlobalVariables()
6963
Loaders.LoadElements();
7064
AcceptedDatabaseFormats = new List<string> { ".fasta", ".fa", ".xml", ".msp" };
7165
AcceptedSpectraFormats = new List<string> { ".raw", ".mzml", ".mgf" };
72-
AnalyteType = Analyte.Peptide;
66+
AnalyteType = "Peptide";
7367
_InvalidAminoAcids = new char[] { 'X', 'B', 'J', 'Z', ':', '|', ';', '[', ']', '{', '}', '(', ')', '+', '-' };
7468
ExperimentalDesignFileName = "ExperimentalDesign.tsv";
7569
SeparationTypes = new List<string> { { "HPLC" }, { "CZE" } };

EngineLayer/MetaMorpheusEngine.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -216,4 +216,4 @@ private void FinishedSingleEngine(MetaMorpheusEngineResults myResults)
216216
FinishedSingleEngineHandler?.Invoke(this, new SingleEngineFinishedEventArgs(myResults));
217217
}
218218
}
219-
}
219+
}

GUI/TaskWindows/GPTMDTaskWindow.xaml.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ private void CancelButton_Click(object sender, RoutedEventArgs e)
240240
DialogResult = false;
241241
CustomFragmentationWindow.Close();
242242
}
243-
243+
244244
private void ProteaseSpecificUpdate(object sender, SelectionChangedEventArgs e)
245245
{
246246
string proteaseName = ((Protease)ProteaseComboBox.SelectedItem).Name;
@@ -456,7 +456,7 @@ private void SaveButton_Click(object sender, RoutedEventArgs e)
456456
dissociationType: dissociationType,
457457
scoreCutoff: double.Parse(MinScoreAllowed.Text, CultureInfo.InvariantCulture),
458458
precursorMassTolerance: precursorMassTolerance,
459-
productMassTolerance: productMassTolerance,
459+
productMassTolerance: productMassTolerance,
460460
trimMs1Peaks: TrimMs1Peaks,
461461
trimMsMsPeaks: TrimMsMsPeaks,
462462
numberOfPeaksToKeepPerWindow: numPeaksToKeep,

TaskLayer/MetaMorpheusTask.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -946,11 +946,11 @@ public static void DetermineAnalyteType(CommonParameters commonParameters)
946946
&& commonParameters.DigestionParams.Protease != null
947947
&& commonParameters.DigestionParams.Protease.Name == "top-down")
948948
{
949-
GlobalVariables.AnalyteType = GlobalVariables.Analyte.Proteoform;
949+
GlobalVariables.AnalyteType = "Proteoform";
950950
}
951951
else
952952
{
953-
GlobalVariables.AnalyteType = GlobalVariables.Analyte.Peptide;
953+
GlobalVariables.AnalyteType = "Peptide";
954954
}
955955
}
956956

TaskLayer/SearchTask/PostSearchAnalysisTask.cs

+14-17
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,11 @@ private void ProteinAnalysis()
128128
List<PeptideSpectralMatch> psmsForProteinParsimony = Parameters.AllPsms;
129129

130130
// run parsimony
131-
ProteinParsimonyResults proteinAnalysisResults = (ProteinParsimonyResults)(new ProteinParsimonyEngine(psmsForProteinParsimony, Parameters.SearchParameters.ModPeptidesAreDifferent, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run());
131+
ProteinParsimonyResults proteinAnalysisResults = (ProteinParsimonyResults)(new ProteinParsimonyEngine(psmsForProteinParsimony, Parameters.SearchParameters.ModPeptidesAreDifferent, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run());
132132

133133
// score protein groups and calculate FDR
134134
ProteinScoringAndFdrResults proteinScoringAndFdrResults = (ProteinScoringAndFdrResults)new ProteinScoringAndFdrEngine(proteinAnalysisResults.ProteinGroups, psmsForProteinParsimony,
135-
Parameters.SearchParameters.NoOneHitWonders, Parameters.SearchParameters.ModPeptidesAreDifferent, true, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run();
135+
Parameters.SearchParameters.NoOneHitWonders, Parameters.SearchParameters.ModPeptidesAreDifferent, true, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }).Run();
136136

137137
ProteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups;
138138

@@ -516,10 +516,7 @@ private void HistogramAnalysis()
516516

517517
private void WritePsmResults()
518518
{
519-
//if doing proteoform analysis, then output is proteoform-spectrum match (PrSM) instead of peptide-spectrum match (PSM)
520-
string analyteString = GlobalVariables.AnalyteType == GlobalVariables.Analyte.Proteoform ? "PrSM" : "PSM";
521-
522-
Status("Writing " + analyteString + " results...", Parameters.SearchTaskId);
519+
Status("Writing PSM results...", Parameters.SearchTaskId);
523520
var FilteredPsmListForOutput = Parameters.AllPsms
524521
.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter
525522
&& p.FdrInfo.QValueNotch <= CommonParameters.QValueOutputFilter).ToList();
@@ -534,18 +531,18 @@ private void WritePsmResults()
534531
}
535532

536533
// write PSMs
537-
string writtenFile = Path.Combine(Parameters.OutputFolder, "All" + analyteString + "s.psmtsv");
534+
string writtenFile = Path.Combine(Parameters.OutputFolder, "AllPSMs.psmtsv");
538535
WritePsmsToTsv(FilteredPsmListForOutput, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
539536
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });
540537

541538
// write PSMs for percolator
542539
// percolator native read format is .tab
543-
writtenFile = Path.Combine(Parameters.OutputFolder, "All" + analyteString + "s_FormattedForPercolator.tab");
540+
writtenFile = Path.Combine(Parameters.OutputFolder, "AllPSMs_FormattedForPercolator.tab");
544541
WritePsmsForPercolator(FilteredPsmListForOutput, writtenFile);
545542
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });
546543

547544
// write summary text
548-
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + analyteString + "s within 1% FDR: " + Parameters.AllPsms.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
545+
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target PSMS within 1% FDR: " + Parameters.AllPsms.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
549546
if (Parameters.SearchParameters.DoParsimony)
550547
{
551548
Parameters.SearchTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy)
@@ -562,7 +559,7 @@ private void WritePsmResults()
562559

563560
Parameters.SearchTaskResults.AddTaskSummaryText("MS2 spectra in " + strippedFileName + ": " + Parameters.NumMs2SpectraPerFile[strippedFileName][0]);
564561
Parameters.SearchTaskResults.AddTaskSummaryText("Precursors fragmented in " + strippedFileName + ": " + Parameters.NumMs2SpectraPerFile[strippedFileName][1]);
565-
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + analyteString + "s within 1% FDR in " + strippedFileName + ": " + psmsForThisFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
562+
Parameters.SearchTaskResults.AddTaskSummaryText("Target PSMs within 1% FDR in " + strippedFileName + ": " + psmsForThisFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
566563

567564
// writes all individual spectra file search results to subdirectory
568565
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)
@@ -571,12 +568,12 @@ private void WritePsmResults()
571568
Directory.CreateDirectory(Parameters.IndividualResultsOutputFolder);
572569

573570
// write PSMs
574-
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_" + analyteString + "s.psmtsv");
571+
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_PSMs.psmtsv");
575572
WritePsmsToTsv(psmsForThisFile, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
576573
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", file.First().FullFilePath });
577574

578575
// write PSMs for percolator
579-
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_" + analyteString + "sFormattedForPercolator.tab");
576+
writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + "_PSMsFormattedForPercolator.tab");
580577
WritePsmsForPercolator(psmsForThisFile, writtenFile);
581578
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", file.First().FullFilePath });
582579
}
@@ -656,10 +653,10 @@ private void WriteProteinResults()
656653
{
657654
mzidFilePath = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + ".mzID");
658655
}
659-
MzIdentMLWriter.WriteMzIdentMl(psmsForThisFile.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter),
656+
MzIdentMLWriter.WriteMzIdentMl(psmsForThisFile.Where(p => p.FdrInfo.QValue <= CommonParameters.QValueOutputFilter),
660657
subsetProteinGroupsForThisFile, Parameters.VariableModifications, Parameters.FixedModifications, Parameters.SearchParameters.SilacLabels,
661658
new List<Protease> { CommonParameters.DigestionParams.Protease }, CommonParameters.QValueOutputFilter, CommonParameters.ProductMassTolerance,
662-
CommonParameters.PrecursorMassTolerance, CommonParameters.DigestionParams.MaxMissedCleavages, mzidFilePath,
659+
CommonParameters.PrecursorMassTolerance, CommonParameters.DigestionParams.MaxMissedCleavages, mzidFilePath,
663660
Parameters.SearchParameters.IncludeModMotifInMzid);
664661

665662
FinishedWritingFile(mzidFilePath, new List<string> { Parameters.SearchTaskId, "Individual Spectra Files", fullFilePath });
@@ -1005,16 +1002,16 @@ private void WritePeptideResults()
10051002
WritePsmsToTsv(peptides, writtenFile, Parameters.SearchParameters.ModsToWriteSelection);
10061003
FinishedWritingFile(writtenFile, new List<string> { Parameters.SearchTaskId });
10071004

1008-
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + GlobalVariables.AnalyteType.ToString().ToLower() + "s within 1% FDR: " + peptides.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
1005+
Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText("All target " + GlobalVariables.AnalyteType.ToLower() + "s within 1% FDR: " + peptides.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy));
10091006

10101007
foreach (var file in PsmsGroupedByFile)
10111008
{
10121009
// write summary text
10131010
var psmsForThisFile = file.ToList();
10141011
string strippedFileName = Path.GetFileNameWithoutExtension(file.First().FullFilePath);
10151012
var peptidesForFile = psmsForThisFile.GroupBy(b => b.FullSequence).Select(b => b.FirstOrDefault()).OrderByDescending(b => b.Score).ToList();
1016-
new FdrAnalysisEngine(peptidesForFile, Parameters.NumNotches, CommonParameters, FileSpecificParameters, new List<string> { Parameters.SearchTaskId }, "Peptide").Run();
1017-
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + GlobalVariables.AnalyteType.ToString().ToLower() + "s within 1% FDR in " + strippedFileName + ": " + peptidesForFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
1013+
new FdrAnalysisEngine(peptidesForFile, Parameters.NumNotches, CommonParameters, this.FileSpecificParameters, new List<string> { Parameters.SearchTaskId }, "Peptide").Run();
1014+
Parameters.SearchTaskResults.AddTaskSummaryText("Target " + GlobalVariables.AnalyteType.ToLower() + "s within 1% FDR in " + strippedFileName + ": " + peptidesForFile.Count(a => a.FdrInfo.QValue <= 0.01 && !a.IsDecoy) + Environment.NewLine);
10181015

10191016
// writes all individual spectra file search results to subdirectory
10201017
if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles)

Test/MyTaskTest.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ public static void MakeSureFdrDoesntSkip()
274274

275275
// RUN!
276276
var theStringResult = task.RunTask(outputFolder, new List<DbForTask> { new DbForTask(xmlName, false) }, new List<string> { mzmlName }, "taskId1").ToString();
277-
Assert.IsTrue(theStringResult.Contains("All target PSMs within 1% FDR: 1"));
277+
Assert.IsTrue(theStringResult.Contains("All target PSMS within 1% FDR: 1"));
278278
Directory.Delete(outputFolder, true);
279279
File.Delete(xmlName);
280280
File.Delete(mzmlName);

Test/SilacTest.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ public static void TestSilacQuantification()
207207
Directory.CreateDirectory(outputFolder);
208208
var theStringResult = task.RunTask(outputFolder, new List<DbForTask> { new DbForTask(xmlName, false) }, new List<string> { mzmlName, mzmlName2 }, "taskId1").ToString();
209209

210-
Assert.IsTrue(theStringResult.Contains("All target PSMs within 1% FDR: 2")); //it's not a psm, it's a MBR feature. 2 because there are two files, but not 4 because MBR != psm
210+
Assert.IsTrue(theStringResult.Contains("All target PSMS within 1% FDR: 2")); //it's not a psm, it's a MBR feature. 2 because there are two files, but not 4 because MBR != psm
211211

212212
///Normal Peptide
213213
//test proteins

Test/Test.csproj

-6
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,6 @@
277277
<None Update="TestVariantPep.mzML">
278278
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
279279
</None>
280-
<None Update="TopDownTestData\ProteoformAmbiguity.fasta">
281-
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
282-
</None>
283-
<None Update="TopDownTestData\ProteoformAmbiguity.mzML">
284-
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
285-
</None>
286280
<None Update="TopDownTestData\slicedTDYeast.mzML">
287281
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
288282
</None>

Test/TopDownTest.cs Test/TestTopDown.cs

+4-54
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
namespace Test
1717
{
1818
[TestFixture]
19-
public class TopDownTest
19+
public class TestTopDown
2020
{
2121
[Test]
2222
public static void TestClassicSearchEngineTopDown()
@@ -29,7 +29,7 @@ public static void TestClassicSearchEngineTopDown()
2929
MetaMorpheusTask.DetermineAnalyteType(CommonParameters);
3030

3131
// test output file name (should be proteoform and not peptide)
32-
Assert.That(GlobalVariables.AnalyteType == GlobalVariables.Analyte.Proteoform);
32+
Assert.That(GlobalVariables.AnalyteType == "Proteoform");
3333

3434
var variableModifications = new List<Modification>();
3535
var fixedModifications = new List<Modification>();
@@ -47,63 +47,13 @@ public static void TestClassicSearchEngineTopDown()
4747
var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();
4848

4949
PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
50-
new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
50+
new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
5151
proteinList, searchMode, CommonParameters, null, null, new List<string>()).Run();
5252

5353
var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();
5454
Assert.That(psm.MatchedFragmentIons.Count == 47);
5555
}
5656

57-
/// <summary>
58-
/// TODO: MetaMorpheus currently reports ambiguity at the PrSM level, but starts tossing things when we get to the proteoform/protein level. See issue #2061
59-
/// Example 1: a base seqeunce is needed for parsimony, but an ambiguous sequence means the base sequence is null.
60-
/// Example 2: a full sequence is needed for determining which peptides/proteoforms are unique, but ambiguous localization means the full sequence is null.
61-
/// </summary>
62-
[Test]
63-
public static void TestAmbiguousProteoformOutput()
64-
{
65-
CommonParameters commonParameters = new CommonParameters(
66-
digestionParams: new DigestionParams(protease: "top-down"),
67-
scoreCutoff: 1,
68-
useProvidedPrecursorInfo: false,
69-
deconvolutionMaxAssumedChargeState: 60,
70-
trimMsMsPeaks: false,
71-
listOfModsVariable: new List<(string, string)> { ("Common Variable", "Oxidation on M"), ("Common Biological", "Acetylation on K"), ("Common Biological", "Trimethylation on K") },
72-
listOfModsFixed: new List<(string, string)> { ("Common Fixed", "Carbamidomethyl on C") }
73-
);
74-
75-
SearchParameters searchParameters = new SearchParameters
76-
{
77-
DoQuantification = false
78-
};
79-
80-
81-
SearchTask searchTask = new SearchTask
82-
{
83-
CommonParameters = commonParameters,
84-
SearchParameters = searchParameters
85-
};
86-
87-
List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("task1", searchTask) };
88-
string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestProteoformAmbiguity");
89-
string mzmlName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData/ProteoformAmbiguity.mzML");
90-
string fastaName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData/ProteoformAmbiguity.fasta");
91-
// RUN!
92-
var engine = new EverythingRunnerEngine(taskList, new List<string> { mzmlName }, new List<DbForTask> { new DbForTask(fastaName, false) }, outputFolder);
93-
engine.Run();
94-
95-
//There are 8 PrSMs, each with a unique proteoform and protein
96-
//check that all 8 PrSMs are reported, all 8 unique proteoforms, and all 8 proteins
97-
string[] prsmLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllPrSMs.psmtsv"));
98-
Assert.AreEqual(prsmLines.Length, 9); //8 + header
99-
string[] proteoformLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllProteoforms.psmtsv"));
100-
Assert.AreEqual(proteoformLines.Length, 4); //3 + header, five of the PrSMs have ambiguous full sequences, which are needed to determine individuality
101-
string[] proteinLines = File.ReadAllLines(Path.Combine(outputFolder, "task1/AllProteinGroups.tsv"));
102-
Assert.AreEqual(proteinLines.Length, 7); //6 + header, two of the PrSMs have ambiguous base sequences, which prevents their use in parsimony
103-
104-
Directory.Delete(outputFolder, true);
105-
}
106-
10757
[Test]
10858
public static void TestModernSearchEngineTopDown()
10959
{
@@ -139,4 +89,4 @@ public static void TestModernSearchEngineTopDown()
13989
Assert.That(psm.MatchedFragmentIons.Count == 47);
14090
}
14191
}
142-
}
92+
}

0 commit comments

Comments
 (0)