Skip to content

Commit 9be5c9f

Browse files
committed
Fix gener_sample_fasta.py to ensure at least one sequence is selected in probability mode
1 parent a8d1e12 commit 9be5c9f

1 file changed

Lines changed: 22 additions & 4 deletions

File tree

src/scripts/fasta/gener_sample_fasta.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,27 @@ def count_sequences_in_fasta(fasta_file_path):
123123
if args.output_non_sample:
124124
args.output_non_sample.write(">{0}\n{1}\n".format(header, format_seq(sequence)))
125125
else:
126-
for header, sequence in read_fasta_file_handle(args.input_fasta):
126+
# When using probability mode
127+
sequences = list(read_fasta_file_handle(args.input_fasta))
128+
selected_sequences = []
129+
130+
# Apply probability sampling
131+
for header, sequence in sequences:
127132
if random.random() <= args.proba:
128-
args.output_sample.write(">{0}\n{1}\n".format(header, format_seq(sequence)))
129-
else:
130-
if args.output_non_sample:
133+
selected_sequences.append((header, sequence))
134+
135+
# If no sequences were selected by chance but there were input sequences,
136+
# select at least one random sequence to ensure the output isn't empty
137+
if not selected_sequences and sequences:
138+
random_seq = random.choice(sequences)
139+
selected_sequences.append(random_seq)
140+
141+
# Write selected sequences to output file
142+
for header, sequence in selected_sequences:
143+
args.output_sample.write(">{0}\n{1}\n".format(header, format_seq(sequence)))
144+
145+
# Write non-selected sequences if requested
146+
if args.output_non_sample:
147+
for header, sequence in sequences:
148+
if (header, sequence) not in selected_sequences:
131149
args.output_non_sample.write(">{0}\n{1}\n".format(header, format_seq(sequence)))

0 commit comments

Comments
 (0)