Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit ea7bb2d

Browse files
authoredFeb 14, 2017
Merge branch 'master' into feature/atomic-nibble
2 parents 8a5bf98 + 6dd8430 commit ea7bb2d

8 files changed

+74
-28
lines changed
 

‎.travis.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ notifications:
1212
# for a PR. So we disable build-on-push for all branches
1313
# except the ones whitelisted here.
1414
branches:
15-
only:
15+
only:
1616
- master
1717

1818
matrix:
@@ -69,6 +69,7 @@ script:
6969

7070
# generate all the diagnostic reports
7171
after_success:
72+
- make clean
7273
- PYTEST_ADDOPTS=-qqq make coverage-gcovr.xml coverage.xml
7374
# Fix suggested by http://diff-cover.readthedocs.io/en/latest/#troubleshooting
7475
- git fetch origin master:refs/remotes/origin/master

‎Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ clean: FORCE
136136
rm -f diff-cover.html
137137
rm -Rf build dist
138138
rm -rf __pycache__/ .eggs/ khmer.egg-info/
139+
-rm *.gcov
139140

140141
debug: FORCE
141142
export CFLAGS="-pg -fprofile-arcs -D_GLIBCXX_DEBUG_PEDANTIC \

‎khmer/_khmer.cc

+23-5
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ static bool convert_Pytablesizes_to_vector(PyListObject * sizes_list_o,
284284
}
285285

286286

287+
static FastxParserPtr& _PyObject_to_khmer_ReadParser(PyObject * py_object);
288+
287289
/***********************************************************************/
288290

289291
//
@@ -791,6 +793,16 @@ ReadParser_iter_read_pairs(PyObject * self, PyObject * args )
791793
}
792794

793795

796+
PyObject *
797+
ReadParser_close(PyObject * self, PyObject * args)
798+
{
799+
FastxParserPtr& rparser = _PyObject_to_khmer_ReadParser(self);
800+
rparser->close();
801+
802+
Py_INCREF(Py_None);
803+
return Py_None;
804+
}
805+
794806
static PyMethodDef _ReadParser_methods [ ] = {
795807
{
796808
"iter_reads", (PyCFunction)ReadParser_iter_reads,
@@ -800,6 +812,10 @@ static PyMethodDef _ReadParser_methods [ ] = {
800812
"iter_read_pairs", (PyCFunction)ReadParser_iter_read_pairs,
801813
METH_VARARGS, "Iterates over paired reads as pairs."
802814
},
815+
{
816+
"close", (PyCFunction)ReadParser_close,
817+
METH_NOARGS, "Close associated files."
818+
},
803819
{ NULL, NULL, 0, NULL } // sentinel
804820
};
805821

@@ -866,7 +882,8 @@ void _init_ReadParser_Type_constants()
866882

867883
// Place pair mode constants into class dictionary.
868884
int result;
869-
PyObject *value = PyLong_FromLong(ReadParser<FastxReader>::PAIR_MODE_IGNORE_UNPAIRED);
885+
PyObject *value = PyLong_FromLong(
886+
ReadParser<FastxReader>::PAIR_MODE_IGNORE_UNPAIRED);
870887
if (value == NULL) {
871888
Py_DECREF(cls_attrs_DICT);
872889
return;
@@ -2306,7 +2323,7 @@ CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KHashtable_Object")
23062323
= {
23072324
PyVarObject_HEAD_INIT(NULL, 0) /* init & ob_size */
23082325
"_khmer.KHashtable ", /*tp_name*/
2309-
sizeof(khmer_KHashtable_Object) , /*tp_basicsize*/
2326+
sizeof(khmer_KHashtable_Object), /*tp_basicsize*/
23102327
0, /*tp_itemsize*/
23112328
0, /*tp_dealloc*/
23122329
0, /*tp_print*/
@@ -2905,7 +2922,7 @@ labelhash_consume_fasta_and_tag_with_labels(khmer_KGraphLabels_Object * me,
29052922
//Py_BEGIN_ALLOW_THREADS
29062923
try {
29072924
hb->consume_fasta_and_tag_with_labels<FastxReader>(filename, total_reads,
2908-
n_consumed);
2925+
n_consumed);
29092926
} catch (khmer_file_exception &exc) {
29102927
exc_string = exc.what();
29112928
file_exception = exc_string.c_str();
@@ -3735,8 +3752,9 @@ static PyObject * hllcounter_consume_fasta(khmer_KHLLCounter_Object * me,
37353752
unsigned long long n_consumed = 0;
37363753
unsigned int total_reads = 0;
37373754
try {
3738-
me->hllcounter->consume_fasta<FastxReader>(filename, stream_records, total_reads,
3739-
n_consumed);
3755+
me->hllcounter->consume_fasta<FastxReader>(filename, stream_records,
3756+
total_reads,
3757+
n_consumed);
37403758
} catch (khmer_file_exception &exc) {
37413759
PyErr_SetString(PyExc_OSError, exc.what());
37423760
return NULL;

‎lib/kmer_hash.cc

+6
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,12 @@ HashIntoType _hash_murmur(const std::string& kmer,
184184
h = out[0];
185185

186186
std::string rev = khmer::_revcomp(kmer);
187+
if (rev == kmer) {
188+
// self complement kmer, can't use bitwise XOR
189+
r = out[0];
190+
return h;
191+
}
192+
187193
MurmurHash3_x64_128((void *)rev.c_str(), rev.size(), seed, &out);
188194
r = out[0];
189195

‎lib/read_parsers.cc

+26-17
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,9 @@ ReadPair ReadParser<SeqIO>::get_next_read_pair(uint8_t mode)
209209
{
210210
if (mode == ReadParser<SeqIO>::PAIR_MODE_IGNORE_UNPAIRED) {
211211
return _get_next_read_pair_in_ignore_mode();
212-
}
213-
else if (mode == ReadParser<SeqIO>::PAIR_MODE_ERROR_ON_UNPAIRED) {
212+
} else if (mode == ReadParser<SeqIO>::PAIR_MODE_ERROR_ON_UNPAIRED) {
214213
return _get_next_read_pair_in_error_mode();
215-
}
216-
else {
214+
} else {
217215
std::ostringstream oss;
218216
oss << "Unknown pair reading mode: " << mode;
219217
throw UnknownPairReadingMode(oss.str());
@@ -232,6 +230,12 @@ bool ReadParser<SeqIO>::is_complete()
232230
return _parser->is_complete();
233231
}
234232

233+
template<typename SeqIO>
234+
void ReadParser<SeqIO>::close()
235+
{
236+
_parser->close();
237+
}
238+
235239
void FastxReader::_init()
236240
{
237241
seqan::open(_stream, _filename.c_str());
@@ -248,25 +252,25 @@ void FastxReader::_init()
248252
}
249253

250254
FastxReader::FastxReader()
251-
: _filename("-"), _spin_lock(0), _num_reads(0), _have_qualities(false)
255+
: _filename("-"), _spin_lock(0), _num_reads(0), _have_qualities(false)
252256
{
253257
_init();
254258
}
255259

256260
FastxReader::FastxReader(const std::string& infile)
257-
: _filename(infile),
258-
_spin_lock(0),
259-
_num_reads(0),
260-
_have_qualities(false)
261+
: _filename(infile),
262+
_spin_lock(0),
263+
_num_reads(0),
264+
_have_qualities(false)
261265
{
262266
_init();
263267
}
264268

265269
FastxReader::FastxReader(FastxReader& other)
266-
: _filename(other._filename),
267-
_spin_lock(other._spin_lock),
268-
_num_reads(other._num_reads),
269-
_have_qualities(other._have_qualities)
270+
: _filename(other._filename),
271+
_spin_lock(other._spin_lock),
272+
_num_reads(other._num_reads),
273+
_have_qualities(other._have_qualities)
270274
{
271275
_stream = std::move(other._stream);
272276
}
@@ -286,6 +290,11 @@ size_t FastxReader::get_num_reads()
286290
return _num_reads;
287291
}
288292

293+
void FastxReader::close()
294+
{
295+
seqan::close(_stream);
296+
}
297+
289298
Read FastxReader::get_next_read()
290299
{
291300
Read read;
@@ -335,10 +344,10 @@ template<typename SeqIO>
335344
ReadParserPtr<SeqIO> get_parser(const std::string& filename)
336345
{
337346
return ReadParserPtr<SeqIO>(
338-
new ReadParser<SeqIO>(
339-
std::unique_ptr<SeqIO>(new SeqIO(filename))
340-
)
341-
);
347+
new ReadParser<SeqIO>(
348+
std::unique_ptr<SeqIO>(new SeqIO(filename))
349+
)
350+
);
342351
}
343352

344353
// All template instantiations used in the codebase must be declared here.

‎lib/read_parsers.hh

+3-2
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ struct InvalidReadPair : public khmer_value_exception {
9393
unsigned char _to_valid_dna(const unsigned char c);
9494

9595

96-
struct Read
97-
{
96+
struct Read {
9897
std::string name;
9998
std::string description;
10099
std::string sequence;
@@ -167,6 +166,7 @@ public:
167166

168167
size_t get_num_reads();
169168
bool is_complete();
169+
void close();
170170
}; // class ReadParser
171171

172172

@@ -189,6 +189,7 @@ public:
189189
Read get_next_read();
190190
bool is_complete();
191191
size_t get_num_reads();
192+
void close();
192193
}; // class FastxReader
193194

194195

‎scripts/trim-low-abund.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,8 @@ def main():
414414
# so pairs will stay together if not orphaned. This is in contrast
415415
# to the first loop. Hence, force_single=True below.
416416

417-
paired_iter = broken_paired_reader(ReadParser(pass2filename),
417+
read_parser = ReadParser(pass2filename)
418+
paired_iter = broken_paired_reader(read_parser,
418419
min_length=K,
419420
force_single=True)
420421

@@ -432,15 +433,21 @@ def main():
432433
written_reads += 1
433434
written_bp += len(read)
434435

436+
read_parser.close()
437+
435438
log_info('removing {pass2}', pass2=pass2filename)
436439
os.unlink(pass2filename)
437440

438441
# if we created our own trimfps, close 'em.
439442
if not args.output:
440443
trimfp.close()
441444

442-
log_info('removing temp directory & contents ({temp})', temp=tempdir)
443-
shutil.rmtree(tempdir)
445+
try:
446+
log_info('removing temp directory & contents ({temp})', temp=tempdir)
447+
shutil.rmtree(tempdir)
448+
except OSError as oe:
449+
log_info('WARNING: unable to remove {temp} (probably an NFS issue); '
450+
'please remove manually', temp=tempdir)
444451

445452
trimmed_reads = trimmer.trimmed_reads
446453

‎tests/test_functions.py

+3
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ def test_hash_murmur3():
153153
assert khmer.hash_murmur3('TTTT') == 526240128537019279
154154
assert khmer.hash_murmur3('CCCC') == 14391997331386449225
155155
assert khmer.hash_murmur3('GGGG') == 14391997331386449225
156+
assert khmer.hash_murmur3('TATATATATATATATATATA') != 0
157+
assert khmer.hash_murmur3('TTTTGCAAAA') != 0
158+
assert khmer.hash_murmur3('GAAAATTTTC') != 0
156159

157160

158161
def test_hash_no_rc_murmur3():

0 commit comments

Comments
 (0)
Please sign in to comment.