Skip to content

Commit c4f176d

Browse files
author
philres
committed
Fixed extension of LMPs accross chromosome borders
1 parent b907e04 commit c4f176d

6 files changed

+64
-9
lines changed

.vscode/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/c_cpp_properties.json

src/AlignmentBuffer.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ char const * const AlignmentBuffer::extractReferenceSequenceForAlignment(loc con
211211
//TODO: check why decoded/SequenceProvider writes outside of refSeqLen: This makes + 100 necessary
212212
refSeq = new char[refSeqLength + 100];
213213
//decode reference refSeqLength
214+
verbose(0, true, "RefStart: %llu, RefLength: %d", onRefStart, refSeqLength);
214215
if (!SequenceProvider.DecodeRefSequenceExact(refSeq, onRefStart, refSeqLength, 0)) {
215216
//Log.Warning("Could not decode reference for alignment (read: %s): %llu, %d", cur_read->Scores[scoreID].Location.m_Location - (corridor >> 1), cur_read->length + corridor, cur_read->name);
216217
Log.Warning("Could not decode reference for alignment");
@@ -429,7 +430,7 @@ Align * AlignmentBuffer::computeAlignment(Interval const * interval,
429430
delete alignFast; alignFast = 0;
430431
#endif
431432
} else {
432-
Log.Error("Could not align reference sequence for read %s.", read->name);
433+
Log.Error("Could not extract reference sequence for read %s.", read->name);
433434
validAlignment = false;
434435
}
435436

@@ -760,15 +761,16 @@ bool AlignmentBuffer::canSpanDeletionInsertion(Interval const * a, Interval cons
760761
verbose(3, true, "DistOnRef: %llu, DistOnRead: %d, Corridor: %f", distanceOnRef, distanceOnRead, corridorSize);
761762
if(distanceOnRead < (2 * readPartLength)) {
762763
//Possible deletion
763-
merge = (distanceOnRef - distanceOnRead) < corridorSize;
764+
merge = abs(distanceOnRef - distanceOnRead) < corridorSize;
764765
} else if(distanceOnRef < (2 * readPartLength)) {
765766
//Possible insertion
766-
merge = (distanceOnRead - distanceOnRef) < corridorSize;
767+
merge = abs(distanceOnRead - distanceOnRef) < corridorSize;
767768
} else {
768769
/**
769770
* Do nothing, intervals show a larger gap. Probably, poor quality read.
770771
* Better to merge than to risk fragmented alignments
771772
*/
773+
merge = abs(distanceOnRead - distanceOnRef) < corridorSize;
772774
}
773775
return merge;
774776
}
@@ -2378,22 +2380,28 @@ void AlignmentBuffer::verbose(int const tabs, bool const newLine, char const * c
23782380
bool AlignmentBuffer::extendIntervalStop(Interval * interval, int const readBp, int const readLength) {
23792381
bool extended = false;
23802382

2383+
_SequenceProvider::Chromosome chr = SequenceProvider.getChrBorders(interval->onRefStart, interval->onRefStop);
2384+
2385+
verbose(0, true, "extendIntervalStop - Located on chr %llu %llu", chr.start, chr.end);
2386+
2387+
23812388
double lengthRatio = std::min(1.0f, interval->lengthOnRead() * 1.0f / interval->lengthOnRef() * 1.0f);
23822389
// lengthRatio = 1.0;
23832390

23842391
int extendOnRead = std::min(readLength - interval->onReadStop, readBp);
23852392
int extendOnRef = (int) round(extendOnRead / lengthRatio);
23862393

2387-
loc maxExtendOnRef = SequenceProvider.GetConcatRefLen() - interval->onRefStop;
2394+
loc maxExtendOnRef = interval->onRefStop > chr.end ? 0 : chr.end - interval->onRefStop;
23882395
if (interval->isReverse) {
2389-
maxExtendOnRef = interval->onRefStop;
2396+
maxExtendOnRef = interval->onRefStop < chr.start ? 0 : interval->onRefStop - chr.start;
23902397
}
23912398

23922399
if (extendOnRef > maxExtendOnRef) {
23932400
extendOnRef = maxExtendOnRef;
23942401
extendOnRead = std::min(extendOnRead, std::max(0, (int) round(extendOnRef * lengthRatio) - 1));
23952402
}
23962403

2404+
verbose(1, true, "Min/Max extend on ref: %d/%lld", extendOnRef, maxExtendOnRef);
23972405
interval->onReadStop += extendOnRead;
23982406
extended = true;
23992407
if (interval->isReverse) {
@@ -2408,16 +2416,22 @@ bool AlignmentBuffer::extendIntervalStop(Interval * interval, int const readBp,
24082416
bool AlignmentBuffer::extendIntervalStart(Interval * interval, int const readBp) {
24092417
bool extended = false;
24102418

2419+
_SequenceProvider::Chromosome chr = SequenceProvider.getChrBorders(interval->onRefStart, interval->onRefStop);
2420+
2421+
verbose(0, true, "extendIntervalStart - Located on chr %llu %llu", chr.start, chr.end);
2422+
2423+
24112424
double lengthRatio = std::min(1.0f, interval->lengthOnRead() * 1.0f / interval->lengthOnRef() * 1.0f);
24122425
// lengthRatio = 1.0;
24132426

24142427
int extendOnRead = std::min(interval->onReadStart, readBp);
24152428
int extendOnRef = (int) round(extendOnRead / lengthRatio);
24162429

2417-
loc maxExtendOnRef = interval->onRefStart;
2430+
loc maxExtendOnRef = interval->onRefStart < chr.start ? 0 : interval->onRefStart - chr.start;
24182431
if (interval->isReverse) {
2419-
maxExtendOnRef = SequenceProvider.GetConcatRefLen() - interval->onRefStart;
2432+
maxExtendOnRef = interval->onRefStart > chr.end ? 0 : chr.end - interval->onRefStart;
24202433
}
2434+
verbose(1, true, "Min/Max extend on ref: %d/%lld", extendOnRef, maxExtendOnRef);
24212435
if (extendOnRef > maxExtendOnRef) {
24222436
extendOnRef = maxExtendOnRef;
24232437
extendOnRead = std::min(extendOnRead, std::max(0, (int) round(extendOnRef * lengthRatio) - 1));
@@ -2466,6 +2480,8 @@ bool AlignmentBuffer::shortenIntervalEnd(Interval * interval, int const readBp)
24662480

24672481
int refBp = (int) round(readBp / lengthRatio);
24682482

2483+
verbose(1, true, "%llu %d", interval->onRefStop, refBp);
2484+
24692485
if (readBp < interval->lengthOnRead() && refBp < interval->lengthOnRef()) {
24702486

24712487
interval->onReadStop = interval->onReadStop - readBp;
@@ -3344,6 +3360,7 @@ void AlignmentBuffer::processLongReadLIS(ReadGroup * group) {
33443360
currentInterval->onRefStop = tmp;
33453361
}
33463362

3363+
verbose(0, "Aligning interval: ", currentInterval);
33473364
if (!Config.getSkipalign()) {
33483365
alignSingleOrMultipleIntervals(read, currentInterval, tmpLocationScores, tmpAlingments, nTempAlignments);
33493366
} else {

src/AlignmentMatrixFast.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ bool AlignmentMatrixFast::prepare(int const width, int const height,
4444
}
4545
if ((ulong) (matrixSize * sizeof(char) / 1000.0f / 1000.0f) < maxMatrixSizeMB) {
4646
if (matrixSize > privateMatrixSize) {
47-
fprintf(stderr, "Reallocating matrix for alignment (size %llu)\n\n", (long long) (matrixSize * sizeof(char) / 1000.0f / 1000.0f));
47+
// fprintf(stderr, "Reallocating matrix for alignment (size %llu)\n\n", (long long) (matrixSize * sizeof(char) / 1000.0f / 1000.0f));
4848
delete[] directionMatrix;
4949
directionMatrix = 0;
5050
directionMatrix = new char[matrixSize];

src/ConvexAlignFast.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ void ConvexAlignFast::checkMdBufferLength(int md_offset, Align& result, int cons
105105
delete[] result.pQry;
106106
result.pQry = tmp;
107107
tmp = 0;
108-
fprintf(stderr, "Reallocating MD buffer (%d)\n", result.maxMdBufferLength);
108+
// fprintf(stderr, "Reallocating MD buffer (%d)\n", result.maxMdBufferLength);
109109
}
110110
}
111111

src/SequenceProvider.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,42 @@ static inline char dec4Low(unsigned char c) {
112112
return dec4(c & 0xF);
113113
}
114114

115+
_SequenceProvider::Chromosome _SequenceProvider::getChrBorders(
116+
loc start, loc stop) {
117+
118+
if(start > stop) {
119+
loc tmp = start;
120+
start = stop;
121+
stop = tmp;
122+
}
123+
124+
if(start < 1000) {
125+
start = 1001;
126+
}
127+
128+
// Log.Message("Detecting chromosome for: %lld %lld", start, stop);
129+
130+
uloc * upperStart = std::upper_bound(refStartPos, refStartPos + (refCount / 2) + 1, start);
131+
if ((*upperStart - start) < 1000) {
132+
upperStart += 1;
133+
}
134+
uloc * upperStop= std::upper_bound(refStartPos, refStartPos + (refCount / 2) + 1, stop);
135+
// if ((*upperStop - stop) < 1000) don't do anything. Probably still same chromosome
136+
137+
// Log.Message("Upper: %llu - %llu", upperStart, upperStop);
138+
139+
Chromosome chr;
140+
if(upperStart == upperStop) {
141+
chr.start = *(upperStart - 1);
142+
chr.end = *(upperStart) - 1000;
143+
} else {
144+
Log.Error("Could not determine chromosome for interval.");
145+
throw "Could not determine chromosome for interval.";
146+
}
147+
148+
return chr;
149+
}
150+
115151
_SequenceProvider::Chromosome _SequenceProvider::getChrStart(
116152
uloc const position) {
117153
if (position < 1000) {

src/SequenceProvider.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class _SequenceProvider {
4040

4141
bool convert(SequenceLocation & m_Location);
4242

43+
Chromosome getChrBorders(loc start, loc stop);
4344
Chromosome getChrStart(uloc const position);
4445

4546
private:

0 commit comments

Comments
 (0)