@@ -387,18 +387,21 @@ def resolve_split_genes(tmp_size_filtered, transcript_gene_map, resolved_df, unf
387387 """
388388 Use localNearBest algorithm to determine split genes and populate that field
389389 """
390- with tools .fileOps .TemporaryFilePath () as local_tmp :
391- cmd = [['sed' , 's/\-[0-9]\+//' , tmp_size_filtered ], # strip unique identifiers for comparative filters
392- ['pslCDnaFilter' , '-localNearBest=0.05' ,
393- '-minCover=0.1' , '-verbose=0' ,
394- '-minSpan=0.2' , '/dev/stdin' , '/dev/stdout' ]]
390+ with tools .fileOps .TemporaryFilePath () as local_tmp , tools .fileOps .TemporaryFilePath () as stripped_tmp :
391+ with open (stripped_tmp , 'w' ) as outf :
392+ for rec in tools .psl .psl_iterator (tmp_size_filtered ):
393+ rec .q_name = tools .nameConversions .strip_alignment_numbers (rec .q_name )
394+ tools .fileOps .print_row (outf , rec .psl_string ())
395+ cmd = ['pslCDnaFilter' , '-localNearBest=0.05' ,
396+ '-minCover=0.1' , '-verbose=0' ,
397+ '-minSpan=0.2' , stripped_tmp , '/dev/stdout' ]
395398 tools .procOps .run_proc (cmd , stdout = local_tmp )
396399 filtered_alns = list (tools .psl .psl_iterator (local_tmp ))
397400
398401 # remove alignments that we didn't resolve
399402 resolved_ids = set (resolved_df .TranscriptId )
400403 filtered_alns = [x for x in filtered_alns if x .q_name in resolved_ids ]
401- grouped = tools .psl .group_alignments_by_qname (filtered_alns )
404+ grouped = tools .psl .group_alignments_by_qname (filtered_alns , strip = False )
402405
403406 # construct the transcript interval for resolved transcripts
404407 tx_intervals = {tx_id : unfiltered_tx_dict [aln_id ].interval for
@@ -418,3 +421,4 @@ def resolve_split_genes(tmp_size_filtered, transcript_gene_map, resolved_df, unf
418421 'Number of intra-contig split genes' : len (split_gene_data ['intra' ])}
419422
420423 return merged , split_gene_metrics
424+
0 commit comments