-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbedCount.c
More file actions
639 lines (568 loc) · 21.4 KB
/
bedCount.c
File metadata and controls
639 lines (568 loc) · 21.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
/* This program demonstrates how to generate pileup from multiple BAMs * simutaneously, to achieve random access and to use the BED interface.
* To compile this program separately, you may:
*
* gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -L. -lbam -lz
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <limits.h>
#include <float.h>
#include <math.h>
#include <pthread.h>
#include "bam.h"
#include "functions.h"
#define NOCHROM 2
#define TOOSHORT 4
#define BED_READ_LENGTH 3000
#define DEBUG 0
//assuming chr name <200 characters
#define MAX_CHR_LENGTH 200
typedef struct {
int32_t nNames;
int32_t nBuffers;
char** names;
} nameBuffer;
#define NEW_NAME_BUFFER(X) nameBuffer X = {.nNames=0, .nBuffers=0}
void addBuffersToNameBuffer(nameBuffer* buffer, int32_t n){
int32_t newN;
newN=buffer->nBuffers+n;
char** newBuffer;
int ii;
newBuffer=(char **)malloc(newN*sizeof(char *));
for(ii=0;ii<buffer->nNames;ii++)newBuffer[ii]=buffer->names[ii];
if(buffer->nNames>0)free(buffer->names);
buffer->names=newBuffer;
buffer->nBuffers=newN;
}
void clearBuffer(nameBuffer* buffer){
int ii;
for(ii=0;ii<buffer->nNames;ii++)free(buffer->names[ii]);
buffer->nNames=0;
}
void addNameToBuffer(nameBuffer* buffer, const char * name){
if(buffer->nNames+1>buffer->nBuffers){
addBuffersToNameBuffer(buffer,10000);
}
//don't need nNames+1 because 0 based array
buffer->names[buffer->nNames]=(char *)malloc((strlen(name))*sizeof(char));
strcpy(buffer->names[buffer->nNames],name);
buffer->nNames++;
}
void copyBufferToBuffer(nameBuffer* buffer1,nameBuffer* buffer2){
int ii;
for(ii=0;ii<buffer1->nNames;ii++){
addNameToBuffer(buffer2,buffer1->names[ii]);
}
}
void destroyNameBuffer(nameBuffer* buffer){
if(buffer->nBuffers>0){
clearBuffer(buffer);
free(buffer->names);
buffer->nBuffers=0;
}
}
int cstring_cmp(const void *a, const void *b){
const char **ia = (const char **)a;
const char **ib = (const char **)b;
return strcmp(*ia, *ib);
}
int32_t countUniqueNamesInBuffer(nameBuffer* buffer){
int ii;
//don't need to do anything if 0 or 1 names
if(buffer->nNames<2)return(buffer->nNames);
//sort the names in buffer
qsort(buffer->names,buffer->nNames,sizeof(char*),cstring_cmp);
//count uniques in sorted buffer
int32_t uniqCount=1;
//for(ii=0;ii<buffer->nNames;ii++)printf("%d %s\n",ii,buffer->names[ii]);
for(ii=1;ii<buffer->nNames;ii++){
//printf("%s==%s\n",buffer->names[ii-1],buffer->names[ii]);
if(strcmp(buffer->names[ii-1],buffer->names[ii])!=0)uniqCount++;
}
return(uniqCount);
}
typedef struct { // auxiliary data structure
bamFile fp; // the file handler
bam_iter_t iter; // NULL if a region not specified
} aux_t;
typedef struct {
char chr[MAX_CHR_LENGTH];
int start, tStart;
int end;
char strand;
char name[MAX_CHR_LENGTH];
int32_t tid;
} region;
void setRegionStart(region *reg,int start){
reg->start=start;
reg->tStart=start-1;
}
int sprintRegion(char *str,const region *reg){
sprintf(str,"%s:%d-%d",reg->chr,reg->start,reg->end);
return(0);
}
char* printRegion(const region *reg){//careful with this one when parallel
static char text[BED_READ_LENGTH];
sprintRegion(text,reg);
return(text);
}
struct regionArray{
region* regions;
int num;
};
int destroyRegionArray(struct regionArray x){
free(x.regions);
return(0);
}
typedef struct {
int *startStop[2];
int num;
} startStopArray;
void destroyStartStopArray(startStopArray *startStops){
int ii;
for(ii=0;ii<2;ii++)free(startStops->startStop[ii]);
}
struct regionList{
region reg;
struct regionList* next;
};
typedef struct {
char reg[300];//assuming chrx:98987898-78998789 less than 300 characters
int *counts;
} regAndCounts;
typedef struct {
char parent[300];
int exonNum;
int countNum;
regAndCounts *exonCounts;
} exonCountArray;
int fprintExonCountArray(FILE *fp,exonCountArray *exons,int numGenes){
int ii,jj,kk;
for(ii=0;ii<numGenes;ii++){
for(jj=0;jj<exons[ii].exonNum;jj++){
fprintf(fp, "%s\t%s",exons[ii].parent,exons[ii].exonCounts[jj].reg);
for(kk=0;kk<exons[ii].countNum;kk++){
fprintf(fp, "\t%d",exons[ii].exonCounts[jj].counts[kk]);
}
fprintf(fp,"\n");
}
}
return(0);
}
int getBedLine(FILE *fp,region* out){
char buffer[BED_READ_LENGTH]; //set this outside so we don't have to create every time?
char tmp[BED_READ_LENGTH];
//should deal with empty lines here
if(fgets(buffer,BED_READ_LENGTH,fp) == NULL){
if(feof(fp))return(1);
fprintf(stderr,"Problem reading file %s\n",strerror(ferror(fp)));
return(2);
}
//counters
int ii;
int tmpPos=0;
int field=0;
for(ii=0;ii<strlen(buffer);ii++){
if(buffer[ii]=='\t'||buffer[ii]=='\n'){
tmp[tmpPos]='\0';
if(tmpPos>MAX_CHR_LENGTH)tmp[MAX_CHR_LENGTH]='\0'; //avoid buffer overflow by truncating
switch (field) {
case 0: strcpy(out->chr,tmp); break;
case 1: setRegionStart(out, atoi(tmp)+1); break; //+1 to deal with ucsc 0-based starts
case 2: out->end = atoi(tmp); break;
case 3: strcpy(out->name,tmp); break;
case 4: out->strand = tmp[0]; break;
}
field++;
tmpPos=0;
if(field>4)break;
}else{
tmp[tmpPos]=buffer[ii];
tmpPos++;
}
}
//make sure we get the whole line (discarding anything in other fields)
while(buffer[strlen(buffer)-1]!='\n' && fgets(buffer,BED_READ_LENGTH,fp) != NULL){}
//deal with empty
if(field<3){ fprintf(stderr,"Incomplete bed line"); exit(9); }
if(field<4)sprintRegion(out->name,out);
if(field<5)out->strand ='*';
return(0);
}
int readBed(FILE *bedFile, struct regionArray* out){
region location;
struct regionList *head=NULL, *current=NULL, *new;
int count=0;
int ii;
while(getBedLine(bedFile,&location)==0){
new=(struct regionList*)malloc(sizeof(struct regionList));
new->reg=location;
new->next=NULL;
if(count==0) head=new;
else current->next=new;
current=new;
count++;
}
if(head==NULL||count==0)return(1);
out->regions=(region*)malloc(count*sizeof(region));
out->num=count;
current=head;
for(ii=0;ii<count;ii++){
out->regions[ii]=current->reg;
new=current->next;
free(current);
current=new;
}
return(0);
}
int assignTidToLocations(struct regionArray *locations, bam_header_t *h){
int anyAssigned=0;
int dummy1, dummy2;
int ii;
for(ii=0;ii<locations->num;ii++){
if(bam_parse_region(h, printRegion(&locations->regions[ii]), &locations->regions[ii].tid, &dummy1, &dummy2)!=0){
if(DEBUG)fprintf(stderr,"Couldn't figure out %s. Called %d\n",printRegion(&locations->regions[ii]),locations->regions[ii].tid);
//return(-1);
}else{
anyAssigned=1;
}
}
return(!anyAssigned);
}
//data structure to pass into bam_fetch
typedef struct {
int *counter;
struct regionArray regionArray;
nameBuffer names;
int singleEnd;
int min_mapQ;
} fetchData;
#define NEW_FETCH_DATA(X) fetchData X;X.names.nNames=0;X.names.nBuffers=0;X.singleEnd=1;
// callback for bam_fetch()
int fetchFunc(const bam1_t *b, void *data){
//convert data into regionArray
fetchData *regionArrayAndCounter=(fetchData*)data;
struct regionArray *region=®ionArrayAndCounter->regionArray;
int singleEnd=regionArrayAndCounter->singleEnd;
int mapQ=regionArrayAndCounter->min_mapQ;
int ii,jj;
uint32_t operation,length;
uint32_t *cigarBuffer=bam1_cigar(b);
int genomePos=(int)b->core.pos;
int operationEnd;
int isInsideTarget=0;
//flag 256 => not primary alignment. samtools depth appears to ignore these by default so I guess I'll follow
//flag 1 => read paired. Only keep pairs (could add option)
//flag 2 => read mapped in proper pair. Only keep proper pairs
if(b->core.flag & BAM_FSECONDARY || ( !singleEnd && (!(b->core.flag & BAM_FPAIRED) || !(b->core.flag & BAM_FPROPER_PAIR))))return(0);
//mapping quality too poor
if(b->core.qual < mapQ)return(0);
for(ii=0;ii<b->core.n_cigar;ii++){
operation=((cigarBuffer[ii])&0xf);
length=((cigarBuffer[ii])>>4);
operationEnd=genomePos-1;
if(operation==0||operation==2||operation==3||operation==7||operation==8){
operationEnd+=length;
}
if(operation==6){fprintf(stderr,"SAM operations 6 padding undefined in this code but found here");exit(12);}
if((operation==0||operation==8||operation==7)&&length>0){
for(jj=0;jj<region->num;jj++){
if(region->regions[jj].start <= operationEnd && region->regions[jj].end >= genomePos && checkStrand(region->regions[jj].strand,b->core.flag,singleEnd)){
//printf("%d/%d %d-%d:%d\n",ii+1,b->core.n_cigar,genomePos,operationEnd,operation);
isInsideTarget=1;
break;
}
}
if(isInsideTarget)break;
}
genomePos=operationEnd+1;
}
if(isInsideTarget)*regionArrayAndCounter->counter=*regionArrayAndCounter->counter+1;
if(isInsideTarget)addNameToBuffer(®ionArrayAndCounter->names,bam1_qname(b));
//free(cigarBuffer); //I think bam1_cigar actually returns a pointer to something inside b and things crash if we free it. let bam take care of it
return(0);
}
int getUniqueReadsFromFiles(aux_t **data, bam_index_t **indices, const startStopArray *breaks, const region *location, int nFiles, int **exonCounts, int singleEnd,int min_mapQ, nameBuffer *nameStores,int reportGlobalUnique){
int ii,jj;
//fetchData regionArrayAndCounter;
NEW_FETCH_DATA(regionArrayAndCounter);
regionArrayAndCounter.regionArray.regions=(region*)malloc(sizeof(region));
regionArrayAndCounter.regionArray.regions[0].tid=location->tid;
regionArrayAndCounter.regionArray.num=1;
regionArrayAndCounter.singleEnd=singleEnd;
regionArrayAndCounter.min_mapQ=min_mapQ;
strcpy(regionArrayAndCounter.regionArray.regions[0].chr,location->chr);
strcpy(regionArrayAndCounter.regionArray.regions[0].name,location->name);
regionArrayAndCounter.regionArray.regions[0].strand=location->strand;
for(ii=0;ii<breaks->num;ii++){
if(breaks->startStop[1][ii]<breaks->startStop[0][ii]){
fprintf(stderr,"Start greater than end when finding unique reads in breaks:%s\n",printRegion(location));
exit(9);
}
setRegionStart(®ionArrayAndCounter.regionArray.regions[0],location->start+breaks->startStop[0][ii]);
regionArrayAndCounter.regionArray.regions[0].end=location->start+breaks->startStop[1][ii];
if(DEBUG)fprintf(stderr,"%d - %d,%s, tid:%d ",breaks->startStop[0][ii],breaks->startStop[1][ii],printRegion(®ionArrayAndCounter.regionArray.regions[0]),regionArrayAndCounter.regionArray.regions[0].tid);
for(jj=0;jj<nFiles;jj++){
regionArrayAndCounter.counter=&exonCounts[jj][ii];
*regionArrayAndCounter.counter=0;
bam_fetch(data[jj]->fp,indices[jj],regionArrayAndCounter.regionArray.regions[0].tid,regionArrayAndCounter.regionArray.regions[0].tStart,regionArrayAndCounter.regionArray.regions[0].end, ®ionArrayAndCounter, fetchFunc);
if(DEBUG)fprintf(stderr," Naive count%d: %d All names%d: %d Unique names%d: %d\n",jj,exonCounts[jj][ii],jj,regionArrayAndCounter.names.nNames,jj,countUniqueNamesInBuffer(&(regionArrayAndCounter.names)));
//could put an option here or not calculate naive count
exonCounts[jj][ii]=countUniqueNamesInBuffer(&(regionArrayAndCounter.names));
//copy names to global buffer here
if(reportGlobalUnique)copyBufferToBuffer(&(regionArrayAndCounter.names),&nameStores[jj]);
//int kk;
//for(kk=0;kk<regionArrayAndCounter.names.nNames;kk++)printf("%s ",regionArrayAndCounter.names.names[kk]);
clearBuffer(&(regionArrayAndCounter.names));
}
destroyNameBuffer(®ionArrayAndCounter.names);
free(regionArrayAndCounter.regionArray.regions);
if(DEBUG)fprintf(stderr,"\n");
}
return(0);
}
int storeExonCounts(exonCountArray *exonStoreCell,const startStopArray *breaks, int * const*exonCounts,int nFiles, const region *location){
int ii,jj;
sprintRegion(exonStoreCell->parent,location);
exonStoreCell->countNum=nFiles;
exonStoreCell->exonNum=breaks->num;
if(exonStoreCell->exonNum==0)return(0);
exonStoreCell->exonCounts=(regAndCounts *)malloc(exonStoreCell->exonNum*sizeof(regAndCounts));
for(ii=0;ii<exonStoreCell->exonNum;ii++){
exonStoreCell->exonCounts[ii].counts=(int *)malloc(nFiles*sizeof(int));
sprintf(exonStoreCell->exonCounts[ii].reg,"%s:%d-%d",location->chr,location->start+breaks->startStop[0][ii],location->start+breaks->startStop[1][ii]);
for(jj=0;jj<nFiles;jj++){
exonStoreCell->exonCounts[ii].counts[jj]=exonCounts[jj][ii];
}
}
return(0);
}
int destroyExonCountArray(exonCountArray *exonStore, int num){
int ii,jj;
for(ii=0;ii<num;ii++){
if(exonStore[ii].exonNum==0)continue;
for(jj=0;jj<exonStore[ii].exonNum;jj++){
free(exonStore[ii].exonCounts[jj].counts);
}
free(exonStore[ii].exonCounts);
}
return(0);
}
double getCountForRegion(region location, bam_index_t **indices, aux_t **data, int nFiles, int breakPadding, exonCountArray *exonStoreCell, int singleEnd, nameBuffer *nameStores,int reportGlobalUnique, int min_mapQ){
int nBases;
int ii;
int isTooShort=0;
startStopArray breaks;
if(location.tid<0)return(NOCHROM);
nBases=location.end-location.start+1;
if(nBases<=2*breakPadding)isTooShort=1; //don't return here since we want to store zeros
breaks.num=1;
for(ii=0;ii<2;ii++)breaks.startStop[ii]=calloc(1,sizeof(int));
breaks.startStop[0][0]=breakPadding;
breaks.startStop[1][0]=nBases-breakPadding-1;
int **exonCounts=calloc(nFiles,sizeof(int*));
for(ii=0;ii<nFiles;ii++)exonCounts[ii]=calloc(breaks.num,sizeof(int));
if(!isTooShort)getUniqueReadsFromFiles(data, indices, &breaks, &location, nFiles, exonCounts, singleEnd, min_mapQ, nameStores,reportGlobalUnique);
//Store break counts for later output
storeExonCounts(exonStoreCell,&breaks,exonCounts,nFiles,&location);
for(ii=0;ii<nFiles;ii++)free(exonCounts[ii]);
free(exonCounts);
//Clean up break finding
destroyStartStopArray(&breaks);
//might want to return break coords and counts too
if(isTooShort)return(TOOSHORT);
else return(0);
}
struct scoreArgs{
struct regionArray locations;
bam_index_t **indices;
aux_t **data;
int nFiles;
int stepSize;
int start;
int breakPadding;
int singleEnd;
exonCountArray *exonStore;
nameBuffer *nameStore;
int reportGlobalUnique;
int vocal;
int min_mapQ;
};
void* getScoreParallel(void *getScoreArgs){
int ii;
struct scoreArgs *args=(struct scoreArgs *)getScoreArgs;
//if(DEBUG)fprintf(stderr,"Thread started. start: %d, stepsize:%d\n",args->start,args->stepSize);
for(ii=args->start;ii<args->locations.num;ii+=args->stepSize){
if(args->vocal){
fprintf(stderr,".");
fflush(stderr);
}
//if(DEBUG)fprintf(stderr,"Thread %d working on region %d (%s)\n",args->start,ii,printRegion(&args->locations.regions[ii]));
getCountForRegion(args->locations.regions[ii], args->indices, args->data, args->nFiles, args->breakPadding, &args->exonStore[ii],args->singleEnd,args->nameStore,args->reportGlobalUnique,args->min_mapQ);
//if(DEBUG)fprintf(stderr,"Thread %d finished region %d. Score: %.3f\n",args->start,ii,args->scores[ii]);
}
pthread_exit(NULL);
}
#ifdef _MAIN_BAM2DEPTH
int main(int argc, char *argv[])
#else
int main_depth(int argc, char *argv[])
#endif
{
int ii, jj, nFiles;
int mapQ = 0;
bam_header_t *h=0; // BAM header of the 1st input
aux_t ***data;
bam_index_t ***indices;
char *bed = NULL;
FILE *bedFile;
struct regionArray locations;
pthread_t *threads;
struct scoreArgs *args;
int nThreads=1;
int breakPadding=15;
int singleEnd=0;
int *globalCounts;
int reportGlobalUnique=0;
int vocal=0;
//storing read counts in exons
exonCountArray *exonStore;
nameBuffer **nameStore;
char usage[50000];
sprintf(usage,"Usage: %.1000s [-r reg] [-q baseQthres] [-Q mapQthres] [-b in.bed] [-s] [-G] [-v] [-h] <in1.bam> [...]\nOutput: streams to standard out a tab separated file with columns; region, trimmedRegion, count1, count2 ...\n where countX is is the count for that region in the Xth file argument\nArguments:\n first and additional arguments: bam files to be parsed\n -Q: only count reads with a map quality greater than or equal this (default:0) \n -B: don't count reads only falling within this number of bases of the borders of a region (default: 15)\n -t: number of threads to use (default: 1)\n -s: Data is single end reads. Do not filter out reads not in a well mapped pair. Assume strandedness behaves as first read (default: paired reads)\n -G report the total unique reads combined over all the regions\n -v: increase verbosity\n -h: display this message and exit\n",argv[0]);
// parse the command line
while ((ii = getopt(argc, argv, "b:Q:B:t:sGvh")) >= 0) {
switch (ii) {
case 'b': bed = strdup(optarg); break; // BED or position list file can be parsed now
case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold
case 'B': breakPadding = atoi(optarg); break; //don't count reads only falling within this number of bases within break
case 't': nThreads = atoi(optarg); break; //number of threads to use
case 's': singleEnd = 1; break; //do not report only report good pairs
case 'G': reportGlobalUnique = 1; break; //report the total unique reads in all regions
case 'v': vocal = 1; break; //report progress to stderr
case 'h':
fprintf(stdout,"%s",usage);
return(0);
}
}
if(bed==NULL){
fprintf(stderr,"Must specify a bed file\n");
return(3);
}
if (optind == argc) {
fprintf(stderr,"%s",usage);
return(1);
}
nFiles = argc - optind; // the number of BAMs on the command lined
// initialize the auxiliary data structures
data = calloc(nThreads, sizeof(aux_t**)); // data[i] for the j-th thread
for(ii=0;ii<nThreads;ii++)data[ii]=calloc(nFiles, sizeof(aux_t*)); // data[i] for the i-th input in j-th thread
indices =calloc(nThreads,sizeof(bam_index_t**));
for(ii=0;ii<nThreads;ii++)indices[ii] = calloc(nFiles,sizeof(bam_index_t*));
for(jj = 0; jj < nThreads; jj++){ //may not need to duplicate all these
for (ii = 0; ii < nFiles; ++ii) {
bam_header_t *htmp;
data[jj][ii] = calloc(1, sizeof(aux_t));
data[jj][ii]->fp = bam_open(argv[optind+ii], "r"); // open BAM
if(data[jj][ii]->fp == NULL){
fprintf(stderr,"Problem reading file %s\n",argv[optind+ii]);
return(5);
}
htmp = bam_header_read(data[jj][ii]->fp); // read the BAM header
if (ii == 0) {
h = htmp; // keep the header of the 1st BAM
} else bam_header_destroy(htmp); // if not the 1st BAM, trash the header
indices[jj][ii] = bam_index_load(argv[optind+ii]); // load the index
}
}
//read bed file
bedFile=fopen(bed,"rt");
if(bedFile == NULL){
fprintf(stderr,"Problem reading file %s\n",bed);
return(2);
}
free(bed);
readBed(bedFile,&locations);
fclose(bedFile);
if(locations.num<1){
fprintf(stderr,"No lines in file %s\n",bed);
return(3);
}
if(assignTidToLocations(&locations,h)!=0){
fprintf(stderr,"No known chromosomes in file");
return(4);
}
threads=(pthread_t *)malloc(nThreads*sizeof(pthread_t));
args=(struct scoreArgs *)malloc(nThreads*sizeof(struct scoreArgs));
exonStore=(exonCountArray *)malloc(locations.num*sizeof(exonCountArray));
for(ii=0;ii<locations.num;ii++)exonStore[ii].exonNum=0;
nameStore=(nameBuffer **)malloc(nThreads*sizeof(nameBuffer*));
for(ii=0;ii<nThreads;ii++){
nameStore[ii]=(nameBuffer *)malloc(nFiles*sizeof(nameBuffer));
for(jj=0;jj<nFiles;jj++){
nameStore[ii][jj].nNames=0;
nameStore[ii][jj].nBuffers=0;
if(reportGlobalUnique)addBuffersToNameBuffer(&nameStore[ii][jj],200000); //save a bunch of room
}
}
for(ii=0;ii<nThreads;ii++){
//set up args
args[ii].locations=locations;
args[ii].indices=indices[ii];
args[ii].data=data[ii];
args[ii].nFiles=nFiles;
args[ii].breakPadding=breakPadding;
args[ii].stepSize=nThreads;
args[ii].start=ii;
args[ii].exonStore=exonStore;
args[ii].singleEnd=singleEnd;
args[ii].nameStore=nameStore[ii];
args[ii].reportGlobalUnique=reportGlobalUnique;
args[ii].vocal=vocal;
args[ii].min_mapQ=mapQ;
if(pthread_create(&threads[ii],NULL,getScoreParallel,&args[ii])){fprintf(stderr,"Couldn't create thread");exit(9);}
}
for(ii=0;ii<nThreads;ii++){
if(pthread_join(threads[ii],NULL)){fprintf(stderr,"Couldn't join thread");exit(10);}
}
free(threads);
fprintExonCountArray(stdout,exonStore,locations.num);
if(reportGlobalUnique){
//find global uniques
globalCounts=(int *)malloc(nFiles*sizeof(int));
fprintf(stdout, "GLOBAL\tGLOBAL");
for(jj=0;jj<nFiles;jj++){
for(ii=1;ii<nThreads;ii++){ //use name[ii][0] as base
copyBufferToBuffer(&nameStore[ii][jj],&nameStore[0][jj]);
destroyNameBuffer(&nameStore[ii][jj]);
}
globalCounts[jj]=countUniqueNamesInBuffer(&nameStore[0][jj]);
fprintf(stdout, "\t%d",globalCounts[jj]);
destroyNameBuffer(&nameStore[0][jj]);
}
fprintf(stdout, "\n");
free(globalCounts);
}
for(ii=0;ii<nThreads;ii++)free(nameStore[ii]);
free(nameStore);
free(args);
destroyExonCountArray(exonStore,locations.num);
destroyRegionArray(locations);
free(exonStore);
for(jj=0;jj<nThreads;jj++){
for (ii = 0; ii < nFiles; ++ii) {
bam_close(data[jj][ii]->fp);
free(data[jj][ii]);
bam_index_destroy(indices[jj][ii]);
}
free(indices[jj]);
free(data[jj]);
}
bam_header_destroy(h);
free(indices);
free(data);
fprintf(stderr,"All done. Thanks\n----------------\n");
return 0;
}