@@ -123,9 +123,27 @@ def count_sequences_in_fasta(fasta_file_path):
123123 if args .output_non_sample :
124124 args .output_non_sample .write (">{0}\n {1}\n " .format (header , format_seq (sequence )))
125125 else :
126- for header , sequence in read_fasta_file_handle (args .input_fasta ):
126+ # When using probability mode
127+ sequences = list (read_fasta_file_handle (args .input_fasta ))
128+ selected_sequences = []
129+
130+ # Apply probability sampling
131+ for header , sequence in sequences :
127132 if random .random () <= args .proba :
128- args .output_sample .write (">{0}\n {1}\n " .format (header , format_seq (sequence )))
129- else :
130- if args .output_non_sample :
133+ selected_sequences .append ((header , sequence ))
134+
135+ # If no sequences were selected by chance but there were input sequences,
136+ # select at least one random sequence to ensure the output isn't empty
137+ if not selected_sequences and sequences :
138+ random_seq = random .choice (sequences )
139+ selected_sequences .append (random_seq )
140+
141+ # Write selected sequences to output file
142+ for header , sequence in selected_sequences :
143+ args .output_sample .write (">{0}\n {1}\n " .format (header , format_seq (sequence )))
144+
145+ # Write non-selected sequences if requested
146+ if args .output_non_sample :
147+ for header , sequence in sequences :
148+ if (header , sequence ) not in selected_sequences :
131149 args .output_non_sample .write (">{0}\n {1}\n " .format (header , format_seq (sequence )))
0 commit comments