Skip to content

Commit 826ec27

Browse files
committed
Fixed a bug causing rare seg-faults with for --peOverlap* options and chimeric detection.
1 parent 2cf245c commit 826ec27

18 files changed

+46
-30
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
* Fixed a bug causing rare seg-faults with for --peOverlap* options and chimeric detection.
2+
* Implemented SAM attributes sS and sQ to output sequence and quality of the entire barcode read, and sM to output the barcode match status.
3+
* Implemented complex barcodes in STARsolo with --soloType CB_UMI_Complex, --soloCBmatchWLtype --soloAdapterSequence, --soloAdapterMismatchesNmax, --soloCBposition,--soloUMIposition
14
* Fixed a problem in STARsolo with unmapped reads counts in Solo.out/*.stats files.
25
* Fixed a bug in STARsolo with counting reads for splice junctions. Solo.out/matrixSJ.mtx output is slighlty changed.
36
* Implemented CB/UB/GX/GN BAM tags for STARsolo.

bin/Linux_x86_64/STAR

35.9 KB
Binary file not shown.

bin/Linux_x86_64/STARlong

35.7 KB
Binary file not shown.

bin/Linux_x86_64_static/STAR

35.7 KB
Binary file not shown.

bin/Linux_x86_64_static/STARlong

35.6 KB
Binary file not shown.

source/BAMbinSortByCoordinate.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include "BAMfunctions.h"
55
#include "SequenceFuns.h"
66

7-
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, SoloFeature &soloFeat) {
7+
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, Solo &solo) {
88

99
if (binS==0) return; //nothing to do for empty bins
1010
//allocate arrays
@@ -66,7 +66,10 @@ void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, st
6666
for (uint ia=0;ia<binN;ia++) {
6767
char* bam0=bamIn+startPos[ia*3+2];
6868
uint32 size0=*((uint32*) bam0)+sizeof(uint32);
69-
soloFeat.addBAMtags(bam0,size0,bam1);
69+
70+
if (solo.pSolo.samAttrYes)
71+
solo.soloFeat[solo.pSolo.samAttrFeature]->addBAMtags(bam0,size0,bam1);
72+
7073
bgzf_write(bgzfBin, bam0, size0);
7174
};
7275

source/BAMbinSortByCoordinate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
#include "IncludeDefine.h"
44
#include "Parameters.h"
55
#include "Genome.h"
6-
#include "SoloFeature.h"
6+
#include "Solo.h"
77

88
#include SAMTOOLS_BGZF_H
99

10-
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, SoloFeature &soloFeat);
10+
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, Solo &solo);
1111

1212
#endif

source/BAMbinSortUnmapped.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include "ErrorWarning.h"
33
#include "BAMfunctions.h"
44

5-
void BAMbinSortUnmapped(uint32 iBin, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, SoloFeature &soloFeat) {
5+
void BAMbinSortUnmapped(uint32 iBin, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, Solo &solo) {
66

77
BGZF *bgzfBin;
88
bgzfBin=bgzf_open((dirBAMsort+"/b"+to_string((uint) iBin)).c_str(),("w"+to_string((long long) P.outBAMcompression)).c_str());
@@ -53,7 +53,9 @@ void BAMbinSortUnmapped(uint32 iBin, uint nThreads, string dirBAMsort, Parameter
5353
//add extra tags to the BAM record
5454
char* bam0=bamIn[it];
5555
uint32 size0=bamSize.at(it);
56-
soloFeat.addBAMtags(bam0,size0,bam1);
56+
57+
if (solo.pSolo.samAttrYes)
58+
solo.soloFeat[solo.pSolo.samAttrFeature]->addBAMtags(bam0,size0,bam1);
5759

5860
bgzf_write(bgzfBin, bam0, size0);
5961
bamInStream[it].read(bamIn[it],sizeof(int32));//read record size

source/BAMbinSortUnmapped.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
#include "IncludeDefine.h"
44
#include "Parameters.h"
55
#include "Genome.h"
6-
#include "SoloFeature.h"
6+
#include "Solo.h"
77

88
#include SAMTOOLS_BGZF_H
99

10-
void BAMbinSortUnmapped(uint32 iBin, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, SoloFeature &soloFeat);
10+
void BAMbinSortUnmapped(uint32 iBin, uint nThreads, string dirBAMsort, Parameters &P, Genome &mapGen, Solo &solo);
1111

1212
#endif

source/ParametersSolo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ void ParametersSolo::initialize(Parameters *pPin)
1313
{
1414
pP=pPin;
1515

16+
yes = true;
1617
if (typeStr=="None") {
17-
type=0;
18+
type = 0;
19+
yes = false;
20+
samAttrYes = false;
1821
//solo SAM attributes not allowed
1922
if (pP->outSAMattrPresent.CR || pP->outSAMattrPresent.CY || pP->outSAMattrPresent.UR || pP->outSAMattrPresent.UY || pP->outSAMattrPresent.CB || pP->outSAMattrPresent.UB) {
2023
ostringstream errOut;

source/ParametersSolo.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@ class ParametersSolo {
1010
//chemistry, library etc
1111
string typeStr;
1212
int type;
13+
bool yes;
1314
string strandStr;
1415
int32 strand;
1516

16-
17-
1817
//simple barcodes
1918
uint32 cbS,cbL; //cell barcode start,length
2019
uint32 umiS,umiL; //umi start,length

source/ReadAlign_waspMap.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,10 @@ void ReadAlign::copyRead(ReadAlign &r) {//copy read information only
9595
readLengthPairOriginal=r.readLengthPairOriginal;
9696
outFilterMismatchNmaxTotal=r.outFilterMismatchNmaxTotal;
9797
readName=r.readName;
98-
98+
iReadAll=r.iReadAll;
99+
readFilter=r.readFilter;
100+
readFilesIndex=r.readFilesIndex;
101+
99102
for (uint ii=0;ii<=2;ii++)
100103
memcpy(Read1[ii],r.Read1[ii],Lread);//need to copy since it will be changed
101104
Qual1=r.Qual1;

source/STAR.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ int main(int argInN, char* argIn[]) {
383383
RAchunk[0]->chunkFilesCat(P.inOut->outSAM, P.outFileTmp + "/Aligned.out.sam.chunk", g_threadChunks.chunkOutN);
384384
};
385385

386-
bamSortByCoordinate(P, RAchunk, mainGenome, *soloMain.soloFeat[soloMain.pSolo.samAttrFeature]);
386+
bamSortByCoordinate(P, RAchunk, mainGenome, soloMain);
387387

388388
//wiggle output
389389
if (P.outWigFlags.yes) {

source/SoloFeature_addBAMtags.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44

55
void SoloFeature::addBAMtags(char *&bam0, uint32 &size0, char *bam1)
66
{//add extra tags to the BAM record
7-
8-
if (!pSolo.samAttrYes)
9-
return;
107

118
uint64 iread = * ((uint64*) (bam0+size0));
129
iread = iread >> 32; //iRead was encoded in the upper 32 bitsls

source/SoloReadBarcode_getCBandUMI.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,12 @@ bool SoloReadBarcode::convertCheckUMI()
129129
{//check UMIs, return if bad UMIs
130130
if (convertNuclStrToInt32(umiSeq,umiB)!=-1) {//convert and check for Ns
131131
stats.V[stats.nNinUMI]++;//UMIs are not allowed to have Ns
132+
cbMatch=-23;
132133
return false;
133134
};
134135
if (umiB==homoPolymer[0] || umiB==homoPolymer[1] || umiB==homoPolymer[2] || umiB==homoPolymer[3]) {
135136
stats.V[stats.nUMIhomopolymer]++;
137+
cbMatch=-24;
136138
return false;
137139
};
138140
return true;
@@ -167,6 +169,8 @@ void SoloReadBarcode::getCBandUMI(string &readNameExtra)
167169

168170
cbSeq="";
169171
cbQual="";
172+
umiSeq="";
173+
umiQual="";
170174

171175
uint32 adapterStart=0;
172176
if (pSolo.adapterYes) {
@@ -183,19 +187,21 @@ void SoloReadBarcode::getCBandUMI(string &readNameExtra)
183187
return;
184188
};
185189

186-
if (!convertCheckUMI())
187-
return;
188-
189190
bool cbMatchGood=true;
191+
if (!convertCheckUMI())
192+
cbMatchGood=false;//CB matching will not be done, just extract the sequences
193+
190194
cbMatchInd={0};
191195
for (auto &cb : pSolo.cbV) {//cycle over multiple barcodes
192196

193197
string cbSeq1, cbQual1;
194-
if (!cb.extractBarcode(bSeq, bQual, adapterStart, cbSeq1, cbQual1)
195-
|| cbSeq1.size() < cb.minLen || cbSeq1.size() >= cb.wl.size() || cb.wl[cbSeq1.size()].size()==0) {
196-
//no match possible for this barcode, or no match for previous barcodes
197-
cbMatchGood=false;
198-
cbMatch=-11;
198+
if ( !cb.extractBarcode(bSeq, bQual, adapterStart, cbSeq1, cbQual1)
199+
|| cbSeq1.size() < cb.minLen || cbSeq1.size() >= cb.wl.size() || cb.wl[cbSeq1.size()].size()==0 ) {
200+
//barcode cannot be extracted
201+
if (cbMatchGood) {
202+
cbMatch=-11;
203+
cbMatchGood=false;
204+
};
199205
};
200206
cbSeq += cbSeq1 + "_";
201207
cbQual += cbQual1 + "_";

source/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
#define STAR_VERSION "2.7.2x_0708_soloBamTags"
1+
#define STAR_VERSION "2.7.2x_0719_soloComplexBarcodes"

source/bamSortByCoordinate.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "ErrorWarning.h"
66
#include "bam_cat.h"
77

8-
void bamSortByCoordinate (Parameters &P, ReadAlignChunk **RAchunk, Genome &mainGenome, SoloFeature &soloFeat) {
8+
void bamSortByCoordinate (Parameters &P, ReadAlignChunk **RAchunk, Genome &mainGenome, Solo &solo) {
99
if (P.outBAMcoord) {//sort BAM if needed
1010
*P.inOut->logStdOut << timeMonthDayTime() << " ..... started sorting BAM\n" <<flush;
1111
P.inOut->logMain << timeMonthDayTime() << " ..... started sorting BAM\n" <<flush;
@@ -54,7 +54,7 @@ void bamSortByCoordinate (Parameters &P, ReadAlignChunk **RAchunk, Genome &mainG
5454
if (binS==0) continue; //empty bin
5555

5656
if (ibin == nBins-1) {//last bin for unmapped reads
57-
BAMbinSortUnmapped(ibin,P.runThreadN,P.outBAMsortTmpDir, P, mainGenome, soloFeat);
57+
BAMbinSortUnmapped(ibin,P.runThreadN,P.outBAMsortTmpDir, P, mainGenome, solo);
5858
} else {
5959
uint newMem=binS+binN*24;
6060
bool boolWait=true;
@@ -66,7 +66,7 @@ void bamSortByCoordinate (Parameters &P, ReadAlignChunk **RAchunk, Genome &mainG
6666
};
6767
sleep(0.1);
6868
};
69-
BAMbinSortByCoordinate(ibin,binN,binS,P.runThreadN,P.outBAMsortTmpDir, P, mainGenome, soloFeat);
69+
BAMbinSortByCoordinate(ibin,binN,binS,P.runThreadN,P.outBAMsortTmpDir, P, mainGenome, solo);
7070
#pragma omp critical
7171
totalMem-=newMem;//"release" RAM
7272
};

source/bamSortByCoordinate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include "Parameters.h"
55
#include "ReadAlignChunk.h"
66
#include "Genome.h"
7-
#include "SoloFeature.h"
7+
#include "Solo.h"
88

9-
void bamSortByCoordinate(Parameters &P, ReadAlignChunk **RAchunk, Genome &mainGenome, SoloFeature &soloFeat) ;
9+
void bamSortByCoordinate(Parameters &P, ReadAlignChunk **RAchunk, Genome &mainGenome, Solo &solo) ;
1010

1111
#endif

0 commit comments

Comments
 (0)