diff --git a/Changelog b/Changelog index 22b7ef9b..e1d184f8 100644 --- a/Changelog +++ b/Changelog @@ -1,12 +1,7 @@ ### Added -- Adjusted the peptide-to-protein mapping due to I/L replacement rules -- Adjusted the protein FDR caclulation rules. + ### Changed -- Removed the normalization done at the protein level in labelquant -- Improved the protein filter for small scale analyses -### Fixed -- Adjusted the msstats file format, added Purity to all plex sizes -- Fixed the protein coverage calculations -- Fixed repeated isobaric values that would eventually show up in the protein table \ No newline at end of file + +### Fixed \ No newline at end of file diff --git a/Makefile b/Makefile index 1a43643c..9e7ea8b5 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,8 @@ BINARY = philosopher VERSION = $(shell date +%Y%m%d) BUILD = $(shell date +%Y%m%d%H%M) -TAG = v4.7.0 -RC = RC-6 +TAG = v4.7.1 +RC = RC-2 LDFLAGS = -ldflags "-w -s -extldflags -static -X main.version=${TAG} -X main.build=${BUILD}" diff --git a/lib/id/pep.go b/lib/id/pep.go index cd1b3f0e..bb456b01 100644 --- a/lib/id/pep.go +++ b/lib/id/pep.go @@ -90,7 +90,7 @@ type PeptideIdentification struct { Intensity float64 PrevAA []byte NextAA []byte - AlternativeProteins map[string]int + AlternativeProteins map[string]string MSFragerLoc *MSFraggerLoc PTM *PTM Modifications mod.ModificationsSlice @@ -399,7 +399,7 @@ func ReadPepXMLInput(xmlFile, decoyTag, temp string, models bool) (PepIDListPtrs func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag, FileName string) PeptideIdentification { var psm PeptideIdentification - psm.AlternativeProteins = make(map[string]int) + psm.AlternativeProteins = make(map[string]string) psm.Index = sq.Index psm.SpectrumFile = FileName @@ -462,7 +462,7 @@ func processSpectrumQuery(sq spc.SpectrumQuery, mods mod.Modifications, decoyTag } for _, j := range i.AlternativeProteins { - psm.AlternativeProteins[string(j.Protein)]++ + psm.AlternativeProteins[string(j.Protein)] = string(j.PepPrevAA) + "#" + string(j.PepNextAA) } for _, j := range i.Score { @@ -643,21 +643,21 @@ func (p *PepXML4Serialiazation) PromoteProteinIDs() { var current string var alt string - var list = make(map[string]int) + var list = make(map[string]string) var isUniProt bool if strings.Contains(p.PeptideIdentification[i].Protein, p.DecoyTag) { current = p.PeptideIdentification[i].Protein - for j := range p.PeptideIdentification[i].AlternativeProteins { + for k, v := range p.PeptideIdentification[i].AlternativeProteins { - if strings.Contains(j, "sp|") { + if strings.Contains(k, "sp|") { isUniProt = true } - if !strings.HasPrefix(j, p.DecoyTag) { - list[j]++ + if !strings.HasPrefix(k, p.DecoyTag) { + list[k] = v } } @@ -665,40 +665,52 @@ func (p *PepXML4Serialiazation) PromoteProteinIDs() { if len(list) > 0 { + var prevAA string + var nextAA string + // if a Uniprot database is used we give preference to SwissProt proteins if isUniProt { - for k := range list { + for k, v := range list { + + pna := strings.Split(v, "#") + if strings.HasPrefix(k, "sp|") { alt = k + prevAA = pna[0] + nextAA = pna[1] break } else { alt = k + prevAA = pna[0] + nextAA = pna[1] } } - p.PeptideIdentification[i].Protein = alt - - // remove the replaces protein from the alternative proteins list - //p.PeptideIdentification[i].AlternativeProteins[list[alt]] = p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1] - //p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1] = "" - //p.PeptideIdentification[i].AlternativeProteins = p.PeptideIdentification[i].AlternativeProteins[:len(p.PeptideIdentification[i].AlternativeProteins)-1] // add the replaces current to the list - p.PeptideIdentification[i].AlternativeProteins[current]++ + p.PeptideIdentification[i].AlternativeProteins[current] = string(p.PeptideIdentification[i].PrevAA) + "#" + string(p.PeptideIdentification[i].NextAA) + + p.PeptideIdentification[i].Protein = alt + p.PeptideIdentification[i].PrevAA = []byte(prevAA) + p.PeptideIdentification[i].NextAA = []byte(nextAA) } else { - for k := range list { + for k, v := range list { + + pna := strings.Split(v, "#") + alt = k + prevAA = pna[0] + nextAA = pna[1] break } - p.PeptideIdentification[i].Protein = alt - - // remove the replaces protein from the alternative proteins list - //p.PeptideIdentification[i].AlternativeProteins[list[alt]] = p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1] - //p.PeptideIdentification[i].AlternativeProteins[len(p.PeptideIdentification[i].AlternativeProteins)-1] = "" - //p.PeptideIdentification[i].AlternativeProteins = p.PeptideIdentification[i].AlternativeProteins[:len(p.PeptideIdentification[i].AlternativeProteins)-1] // add the replaces current to the list - p.PeptideIdentification[i].AlternativeProteins[current]++ + p.PeptideIdentification[i].AlternativeProteins[current] = string(p.PeptideIdentification[i].PrevAA) + "#" + string(p.PeptideIdentification[i].NextAA) + + p.PeptideIdentification[i].Protein = alt + p.PeptideIdentification[i].PrevAA = []byte(prevAA) + p.PeptideIdentification[i].NextAA = []byte(nextAA) + } } diff --git a/lib/inf/inf.go b/lib/inf/inf.go index 0c657b30..dc0704d3 100644 --- a/lib/inf/inf.go +++ b/lib/inf/inf.go @@ -197,7 +197,7 @@ func ProteinInference(psm id.PepIDList) (id.PepIDList, map[string]string, map[st if pt != psm[i].Protein { - psm[i].AlternativeProteins[psm[i].Protein]++ + psm[i].AlternativeProteins[psm[i].Protein] = string(psm[i].PrevAA) + "#" + string(psm[i].NextAA) var toRemove string for j := range psm[i].AlternativeProteins { @@ -207,7 +207,7 @@ func ProteinInference(psm id.PepIDList) (id.PepIDList, map[string]string, map[st } } - psm[i].AlternativeProteins[psm[i].Protein]++ + psm[i].AlternativeProteins[psm[i].Protein] = string(psm[i].PrevAA) + "#" + string(psm[i].NextAA) delete(psm[i].AlternativeProteins, toRemove) psm[i].Protein = pt diff --git a/lib/rep/ion.go b/lib/rep/ion.go index 69a202a2..0f3db31f 100644 --- a/lib/rep/ion.go +++ b/lib/rep/ion.go @@ -53,18 +53,18 @@ func (evi *Evidence) AssembleIonReport(ion id.PepIDList, decoyTag string) { for idx, i := range ion { pr := &evi.Ions[idx] - //pr.IonForm() = fmt.Sprintf("%s#%d#%.4f", i.Peptide, i.AssumedCharge, i.CalcNeutralPepMass) - pr.Spectra = make(map[id.SpectrumType]int) pr.MappedGenes = make(map[string]struct{}) pr.MappedProteins = make(map[string]int) - //pr.Modifications.Index = make(map[string]mod.Modification) - pr.Sequence = i.Peptide pr.ModifiedSequence = i.ModifiedPeptide pr.MZ = uti.Round(((i.CalcNeutralPepMass + (float64(i.AssumedCharge) * bio.Proton)) / float64(i.AssumedCharge)), 5, 4) pr.ChargeState = i.AssumedCharge pr.PeptideMass = i.CalcNeutralPepMass + + pr.PrevAA = string(i.PrevAA) + pr.NextAA = string(i.NextAA) + if v, ok := psmIonMap[pr.IonForm()]; ok { for _, j := range v { pr.Spectra[j]++ @@ -92,6 +92,7 @@ func (evi *Evidence) AssembleIonReport(ion id.PepIDList, decoyTag string) { } } pr.Modifications = prModifications.ToSlice() + // is this bservation a decoy ? if cla.IsDecoyPSM(i, decoyTag) { pr.IsDecoy = true @@ -146,7 +147,7 @@ func (evi IonEvidenceList) IonReport(workspace, brand, decoyTag string, channels var headerIndex int for i := range printSet { - if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.Name) > 0 { + if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.CustomName) > 0 { headerIndex = i break } diff --git a/lib/rep/peptide.go b/lib/rep/peptide.go index f056cee4..e97a9b3e 100644 --- a/lib/rep/peptide.go +++ b/lib/rep/peptide.go @@ -23,12 +23,14 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { var pepSeqMap = make(map[string]bool) //is this a decoy var pepCSMap = make(map[string][]uint8) - var pepInt = make(map[string]float64) var pepProt = make(map[string]string) - var spectra = make(map[string][]id.SpectrumType) var mappedGenes = make(map[string][]string) var mappedProts = make(map[string][]string) + var pepInt = make(map[string]float64) var bestProb = make(map[string]float64) + var prevAA = make(map[string]string) + var nextAA = make(map[string]string) + var spectra = make(map[string][]id.SpectrumType) var pepMods = make(map[string][]mod.Modification) for _, i := range pep { @@ -42,6 +44,8 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { pepCSMap[i.Peptide] = append(pepCSMap[i.Peptide], i.AssumedCharge) spectra[i.Peptide] = append(spectra[i.Peptide], i.SpectrumFileName()) pepProt[i.Peptide] = i.Protein + prevAA[i.Peptide] = i.PrevAA + nextAA[i.Peptide] = i.NextAA if i.Intensity > pepInt[i.Peptide] { pepInt[i.Peptide] = i.Intensity @@ -83,6 +87,9 @@ func (evi *Evidence) AssemblePeptideReport(pep id.PepIDList, decoyTag string) { pep.Probability = bestProb[k] + pep.PrevAA = prevAA[k] + pep.NextAA = nextAA[k] + for _, i := range spectra[k] { pep.Spectra[i] = 0 } @@ -166,7 +173,7 @@ func (evi PeptideEvidenceList) PeptideReport(workspace, brand, decoyTag string, var headerIndex int for i := range printSet { - if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.Name) > 0 { + if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.CustomName) > 0 { headerIndex = i break } diff --git a/lib/rep/protein.go b/lib/rep/protein.go index beacdcc1..bd39b84c 100644 --- a/lib/rep/protein.go +++ b/lib/rep/protein.go @@ -282,7 +282,7 @@ func (eviProteins ProteinEvidenceList) ProteinReport(workspace, brand, decoyTag var headerIndex int for i := range printSet { - if printSet[i].UniqueLabels != nil && len(printSet[i].UniqueLabels.Channel1.Name) > 0 { + if printSet[i].UniqueLabels != nil && len(printSet[i].UniqueLabels.Channel1.CustomName) > 0 { headerIndex = i break } diff --git a/lib/rep/psm.go b/lib/rep/psm.go index 862cad73..38c3c0b0 100644 --- a/lib/rep/psm.go +++ b/lib/rep/psm.go @@ -269,8 +269,9 @@ func (evi PSMEvidenceList) PSMReport(workspace, brand, decoyTag string, channels var headerIndex int for i := range printSet { - if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.Name) > 0 { + if printSet[i].Labels != nil && len(printSet[i].Labels.Channel1.CustomName) > 0 { headerIndex = i + //fmt.Println(headerIndex, printSet[i].Labels.Channel1.Name, printSet[i].Labels.Channel1.CustomName) break } } diff --git a/lib/rep/updater.go b/lib/rep/updater.go index 72d93f28..3d02e9eb 100644 --- a/lib/rep/updater.go +++ b/lib/rep/updater.go @@ -427,12 +427,6 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) { } } - type prevNextAA struct { - prev string - next string - } - var pepPrevNextAA = make(map[string]prevNextAA) - replacerIL := strings.NewReplacer("L", "I") for i := range evi.PSM { @@ -449,19 +443,30 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) { } } - peptide := string(evi.PSM[i].PrevAA) + replacerIL.Replace(evi.PSM[i].Peptide) + string(evi.PSM[i].NextAA) + var adjustStart = 0 + var adjustEnd = 0 + peptide := replacerIL.Replace(evi.PSM[i].Peptide) - // map the peptide to the protein - mstart := strings.Index(replacerIL.Replace(rec.Sequence), peptide) - mend := mstart + len(evi.PSM[i].Peptide) + if evi.PSM[i].PrevAA != "-" && len(evi.PSM[i].PrevAA) == 1 { + peptide = replacerIL.Replace(evi.PSM[i].PrevAA) + peptide + adjustStart = +2 + } - if mstart != -1 { - evi.PSM[i].ProteinStart = mstart + 2 - evi.PSM[i].ProteinEnd = mend + 1 + if evi.PSM[i].PrevAA == "-" && len(evi.PSM[i].PrevAA) == 1 { + adjustStart = +1 } - pepPrevNextAA[evi.PSM[i].Peptide] = prevNextAA{evi.PSM[i].PrevAA, evi.PSM[i].NextAA} + if evi.PSM[i].NextAA != "-" && len(evi.PSM[i].NextAA) == 1 { + peptide = peptide + replacerIL.Replace(evi.PSM[i].NextAA) + adjustEnd = -1 + } + + // map the peptide to the protein + mstart := strings.Index(replacerIL.Replace(rec.Sequence), peptide) + mend := mstart + len(peptide) + evi.PSM[i].ProteinStart = mstart + adjustStart + evi.PSM[i].ProteinEnd = mend + adjustEnd } for i := range evi.Ions { @@ -483,9 +488,6 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) { evi.Ions[i].MappedGenes[recordMap[k].GeneNames] = struct{}{} } } - pnAA := pepPrevNextAA[evi.Ions[i].Sequence] - evi.Ions[i].PrevAA = pnAA.prev - evi.Ions[i].NextAA = pnAA.next } for i := range evi.Peptides { @@ -506,9 +508,6 @@ func (evi *Evidence) UpdateLayerswithDatabase(decoyTag string) { evi.Peptides[i].MappedGenes[recordMap[k].GeneNames] = struct{}{} } } - pnAA := pepPrevNextAA[evi.Peptides[i].Sequence] - evi.Peptides[i].PrevAA = pnAA.prev - evi.Peptides[i].NextAA = pnAA.next } }