[go: nahoru, domu]

Skip to content

Commit

Permalink
Convert uses of legacy samtools API to use HTSlib API instead
Browse files Browse the repository at this point in the history
Change to include htslib/*.h headers; use HTSlib-style type and
function names; use sam_hdr_read() and pass bam_hdr_t headers
explicitly to sam_read1().

Remove references to libbam.a and don't build it. The only remaining
use of submodules/samtools is in the install-samtools rule.

Use HTSlib's kseq.h instead of a local copy.
  • Loading branch information
jmarshall committed Oct 31, 2019
1 parent 16ca66c commit a3b57ad
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 313 deletions.
23 changes: 8 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,23 @@ CUNIT=/usr/include/CUnit

all: $(B)/extractHAIRS $(B)/HAPCUT2

# if samtools makefile not present, then submodules have not yet been downloaded (init & updated)
# if HTSlib makefile not present, then submodules have not yet been downloaded (init & updated)
# first check if git present, else print error message
# credit to lhunath for the one-line check for git below http://stackoverflow.com/questions/592620/check-if-a-program-exists-from-a-bash-script
$(SAMTOOLS)/Makefile:
$(HTSLIB)/Makefile:
@hash git 2>/dev/null || { echo >&2 "Git not installed (required for auto-download of required submodules). Install git and retry, or manually download samtools 1.2 and htslib 1.2.1 and place the unzipped folders in \"submodules\" directory with names matching their Makefile variables (default \"samtools\" and \"htslib\")"; exit 1; }
git submodule init
git submodule update

# just call the samtools makefile target because it downloads both htslib and samtools submodules
$(HTSLIB)/Makefile: $(SAMTOOLS)/Makefile

$(SAMTOOLS)/libbam.a: $(SAMTOOLS)/Makefile
echo "Building Samtools bam library..."
make -C $(SAMTOOLS) lib

$(HTSLIB)/libhts.a: $(HTSLIB)/Makefile
echo "Building HTSlib libraries..."
make -C $(HTSLIB) lib-static

# BUILD HAIRS

#temporarily removed -O2 flag after -I$(HTSLIB)
$(B)/extractHAIRS: $(B)/bamread.o $(B)/hashtable.o $(B)/readvariant.o $(B)/readfasta.o $(B)/hapfragments.o $(H)/extracthairs.c $(SAMTOOLS)/libbam.a $(HTSLIB)/libhts.a $(H)/parsebamread.c $(H)/realignbamread.c $(H)/nw.c $(H)/realign_pairHMM.c $(H)/estimate_hmm_params.c | $(B)
$(CC) -I$(SAMTOOLS) -I$(HTSLIB) -g $(B)/bamread.o $(B)/hapfragments.o $(B)/hashtable.o $(B)/readfasta.o $(B)/readvariant.o -o $(B)/extractHAIRS $(H)/extracthairs.c -L$(SAMTOOLS) -L$(HTSLIB) -pthread -lhts -lbam -lm -lz
$(B)/extractHAIRS: $(B)/bamread.o $(B)/hashtable.o $(B)/readvariant.o $(B)/readfasta.o $(B)/hapfragments.o $(H)/extracthairs.c $(HTSLIB)/libhts.a $(H)/parsebamread.c $(H)/realignbamread.c $(H)/nw.c $(H)/realign_pairHMM.c $(H)/estimate_hmm_params.c | $(B)
$(CC) -I$(HTSLIB) -g $(B)/bamread.o $(B)/hapfragments.o $(B)/hashtable.o $(B)/readfasta.o $(B)/readvariant.o -o $(B)/extractHAIRS $(H)/extracthairs.c -L$(HTSLIB) -pthread -lhts -lm -lz
#temporarily removed -O2 flag after -I$(HTSLIB)

$(B)/hapfragments.o: $(H)/hapfragments.c $(H)/hapfragments.h $(H)/readvariant.h | $(B)
Expand All @@ -49,19 +42,19 @@ $(B)/hapfragments.o: $(H)/hapfragments.c $(H)/hapfragments.h $(H)/readvariant.h
$(B)/readvariant.o: $(H)/readvariant.c $(H)/readvariant.h $(H)/hashtable.h $(H)/hashtable.c | $(B)
$(CC) -c -I$(HTSLIB) $(H)/readvariant.c -o $(B)/readvariant.o

$(B)/bamread.o: $(H)/bamread.h $(H)/bamread.c $(H)/readfasta.h $(H)/readfasta.c $(SAMTOOLS)/libbam.a $(HTSLIB)/libhts.a | $(B)
$(CC) -I$(SAMTOOLS) -I$(HTSLIB) -c $(H)/bamread.c -o $(B)/bamread.o
$(B)/bamread.o: $(H)/bamread.h $(H)/bamread.c $(H)/readfasta.h $(H)/readfasta.c $(HTSLIB)/libhts.a | $(B)
$(CC) -I$(HTSLIB) -c $(H)/bamread.c -o $(B)/bamread.o

$(B)/hashtable.o: $(H)/hashtable.h $(H)/hashtable.c | $(B)
$(CC) -c $(H)/hashtable.c -o $(B)/hashtable.o

$(B)/readfasta.o: $(H)/readfasta.c $(H)/readfasta.h | $(B)
$(CC) -c $(H)/readfasta.c -o $(B)/readfasta.o
$(CC) -I$(HTSLIB) -c $(H)/readfasta.c -o $(B)/readfasta.o

# BUILD HAPCUT2

$(B)/HAPCUT2: $(B)/fragmatrix.o $(B)/readinputfiles.o $(B)/pointerheap.o $(B)/common.o $(X)/hapcut2.c $(X)/find_maxcut.c $(X)/post_processing.c| $(B)
$(CC) $(B)/common.o $(B)/fragmatrix.o $(B)/readinputfiles.o $(B)/pointerheap.o -o $(B)/HAPCUT2 -lm $(X)/hapcut2.c -L$(HTSLIB) -lhts
$(CC) $(B)/common.o $(B)/fragmatrix.o $(B)/readinputfiles.o $(B)/pointerheap.o -o $(B)/HAPCUT2 -lm $(X)/hapcut2.c -L$(HTSLIB) -lhts

$(B)/common.o: $(X)/common.h $(X)/common.c | $(B)
$(CC) -c $(X)/common.c -o $(B)/common.o
Expand Down
14 changes: 7 additions & 7 deletions hairs-src/bamread.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ void print_read_debug(struct alignedread* read)
}

int fetch_func(const bam1_t *b, void *data, struct alignedread* read) {
samfile_t *fp = (samfile_t*) data;
uint32_t *cigar = bam1_cigar(b);
bam_hdr_t *hdr = (bam_hdr_t*) data;
uint32_t *cigar = bam_get_cigar(b);
const bam1_core_t *c = &b->core;
int i, op, ol;
read->cigs = 0;
Expand All @@ -28,9 +28,9 @@ int fetch_func(const bam1_t *b, void *data, struct alignedread* read) {
read->sequence = (char*) malloc(b->core.l_qseq + 1);
read->quality = (char*) malloc(b->core.l_qseq + 1);

uint8_t* sequence = bam1_seq(b);
uint8_t* quality = bam1_qual(b);
for (i = 0; i < b->core.l_qseq; i++) read->sequence[i] = bam_nt16_rev_table[bam1_seqi(sequence, i)];
uint8_t* sequence = bam_get_seq(b);
uint8_t* quality = bam_get_qual(b);
for (i = 0; i < b->core.l_qseq; i++) read->sequence[i] = seq_nt16_str[bam_seqi(sequence, i)];
read->sequence[i] = '\0';
if (quality[0] == 255) // quality string is missing, 01/29/2014, quality is set to minimum quality value specified using --minq
{
Expand Down Expand Up @@ -99,9 +99,9 @@ int fetch_func(const bam1_t *b, void *data, struct alignedread* read) {
for (i = 0; i < c->l_qname; i++) read->readid[i] = qs[i];
read->readid[i] = '\0';

if (c->tid >= 0) read->chrom = fp->header->target_name[c->tid];
if (c->tid >= 0) read->chrom = hdr->target_name[c->tid];
else read->chrom = NULL;
if (c->mtid >= 0) read->matechrom = fp->header->target_name[c->mtid];
if (c->mtid >= 0) read->matechrom = hdr->target_name[c->mtid];
else read->matechrom = NULL;
read->tid = c->tid;
read->mtid = c->mtid;
Expand Down
2 changes: 1 addition & 1 deletion hairs-src/bamread.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include<string.h>
#include<ctype.h>
#include "readfasta.h"
#include "sam.h"
#include "htslib/sam.h"
#include "htslib/hts.h"

extern int QVoffset;
Expand Down
33 changes: 20 additions & 13 deletions hairs-src/estimate_hmm_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,18 +183,26 @@ int realignment_params(char* bamfile,REFLIST* reflist,char* regions,Align_Params
// 0 = M | I =1, D = 2 from cigar encoding
char* newregion = calloc(4096,sizeof(char));

samfile_t *fp;
samFile *fp;
bam_hdr_t *hdr;
bam1_t *b;
bam_iter_t iter;
bam_index_t *idx; // bam file index
int ret; int ref=-1,beg=0,end=0;
hts_itr_t *iter = NULL;
hts_idx_t *idx; // bam file index
int ret;

if ((fp = samopen(bamfile, "rb", 0)) == 0)
if ((fp = sam_open(bamfile, "r")) == NULL)
{
fprintf(stderr, "Fail to open BAM file %s\n", bamfile);
return -1;
}

if ((hdr = sam_hdr_read(fp)) == NULL)
{
fprintf(stderr, "Fail to read header from file %s\n", bamfile);
sam_close(fp);
return -1;
}

if (regions != NULL)
{
strcpy(newregion,regions);
Expand All @@ -205,12 +213,11 @@ int realignment_params(char* bamfile,REFLIST* reflist,char* regions,Align_Params
newregion[i+1] = '\0';
//fprintf(stderr,"newrion %s %s \n",regions,newregion);
if ((idx = bam_index_load(bamfile)) ==0) { fprintf(stderr,"unable to load bam index for file %s\n",bamfile); return -1; }
bam_parse_region(fp->header,newregion,&ref,&beg,&end);
if (ref < 0) { fprintf(stderr,"invalid region for bam file %s \n",regions); return -1; }
iter = bam_iter_query(idx,ref,beg,end);
iter = sam_itr_querys(idx,hdr,newregion);
if (iter == NULL) { fprintf(stderr,"invalid region for bam file %s \n",regions); return -1; }
}

b = bam_init1(); // samread(fp,b)
b = bam_init1(); // sam_read1(fp,hdr,b)

int prevtid =-1;
int reads=0,ureads=0,useful=0;
Expand All @@ -219,10 +226,10 @@ int realignment_params(char* bamfile,REFLIST* reflist,char* regions,Align_Params

while (reads < 10000)
{
if (ref < 0) ret = samread(fp,b); // read full bam file
else ret = bam_iter_read(fp->x.bam,iter,b); // specific region
if (!iter) ret = sam_read1(fp,hdr,b); // read full bam file
else ret = sam_itr_next(fp,iter,b); // specific region
if (ret < 0) break;
fetch_func(b, fp, read);
fetch_func(b, hdr, read);
if ((read->flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP | 2048)) || read->mquality < MIN_MQ)
{
free_readmemory(read); continue;
Expand Down Expand Up @@ -250,6 +257,6 @@ int realignment_params(char* bamfile,REFLIST* reflist,char* regions,Align_Params
fprintf(stderr,"using %d reads to estimate realignment parameters for HMM \n",reads);
if (ureads > 1000) print_error_params(emission_counts,trans_counts,indel_lengths,AP);

bam_destroy1(b); samclose(fp); free(newregion);
bam_destroy1(b); bam_hdr_destroy(hdr); sam_close(fp); free(newregion);

}
31 changes: 17 additions & 14 deletions hairs-src/extracthairs.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "hashtable.h"
#include "readfasta.h"
#include "bamread.h"
#include "sam.h"
#include "htslib/sam.h"
#include "readvariant.h"
#include "hapfragments.h"

Expand Down Expand Up @@ -144,35 +144,38 @@ int parse_bamfile_sorted(char* bamfile, HASHTABLE* ht, CHROMVARS* chromvars, VAR
fragment.variants = 0;
fragment.alist = (allele*) malloc(sizeof (allele)*16184);

samfile_t *fp;
samFile *fp;
bam_hdr_t *hdr;
bam1_t *b;
int ref=-1,beg=0,end=0;
int ret; // for reading indexed bam files
bam_index_t *idx; // bam file index
bam_iter_t iter;
hts_idx_t *idx; // bam file index
hts_itr_t *iter = NULL;

if ((fp = samopen(bamfile, "rb", 0)) == 0) {
if ((fp = sam_open(bamfile, "r")) == NULL) {
fprintf(stderr, "Fail to open BAM file %s\n", bamfile);
return -1;
}
if ((hdr = sam_hdr_read(fp)) == NULL) {
fprintf(stderr, "Fail to read header from file %s\n", bamfile);
sam_close(fp);
return -1;
}
if (regions != NULL && (idx = bam_index_load(bamfile)) ==0) { fprintf(stderr,"unable to load bam index for file %s\n",bamfile); return -1; }
if (regions == NULL) b = bam_init1(); // samread(fp,b)
if (regions == NULL) b = bam_init1(); // sam_read1(fp,hdr,b)
else
{
bam_parse_region(fp->header,regions,&ref,&beg,&end);
if (ref < 0) { fprintf(stderr,"invalid region for bam file %s \n",regions); return -1; }
fprintf(stderr,"parsing region %s of bam file %d %d-%d\n",regions,ref,beg,end); //return -1;
b = bam_init1();
iter = bam_iter_query(idx,ref,beg,end);
iter = sam_itr_querys(idx,hdr,regions);
if (iter == NULL) { fprintf(stderr,"invalid region for bam file %s \n",regions); return -1; }
}


while (1) {
if (ref < 0) ret = samread(fp,b); // read full bam file
else ret = bam_iter_read(fp->x.bam,iter,b); // specific region
if (!iter) ret = sam_read1(fp,hdr,b); // read full bam file
else ret = sam_itr_next(fp,iter,b); // specific region
//fprintf(stderr,"here %d %d\n",ret,iter);
if (ret < 0) break;
fetch_func(b, fp, read);
fetch_func(b, hdr, read);
if ((read->flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)) || read->mquality < MIN_MQ || (USE_SUPP_ALIGNMENTS == 0 && (read->flag & 2048))) {
free_readmemory(read);
continue;
Expand Down
18 changes: 13 additions & 5 deletions hairs-src/fosmidbam_hairs.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include<stdint.h>
int MIN_CLUSTER_DISTANCE = 10000;
#include "print_clusters.c"
#include "sam.h"
#include "htslib/sam.h"
int BLOCK_SIZE = 100; // 100 bp blocks
int BF = 10;
#define BS1 20
Expand Down Expand Up @@ -265,16 +265,22 @@ int parse_bamfile_fosmid(char* bamfile, HASHTABLE* ht, CHROMVARS* chromvars, VAR
int prevtid = -1; //int prevposition = -1; // position of previous read in sorted bam file
int lastread = 0;

samfile_t *fp;
if ((fp = samopen(bamfile, "rb", 0)) == 0) {
samFile *fp;
if ((fp = sam_open(bamfile, "r")) == NULL) {
fprintf(stderr, "Fail to open BAM file %s\n", bamfile);
return -1;
}
bam_hdr_t *hdr;
if ((hdr = sam_hdr_read(fp)) == NULL) {
fprintf(stderr, "Fail to read header from file %s\n", bamfile);
sam_close(fp);
return -1;
}
bam1_t *b = bam_init1();

while (samread(fp, b) >= 0) {
while (sam_read1(fp, hdr, b) >= 0) {
//readlist[r] = calloc(1,sizeof(struct alignedread));
fetch_func(b, fp, readlist[r]);
fetch_func(b, hdr, readlist[r]);
if ((readlist[r]->flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP))) // unmapped reads, PCR/optical dups are ignored
{
free_readmemory(readlist[r]);
Expand Down Expand Up @@ -326,6 +332,8 @@ int parse_bamfile_fosmid(char* bamfile, HASHTABLE* ht, CHROMVARS* chromvars, VAR
for (reads = 0; reads < MAX_READS; reads++) free(readlist[reads]);
free(readlist);
bam_destroy1(b);
bam_hdr_destroy(hdr);
sam_close(fp);
return 0;
}

Loading

0 comments on commit a3b57ad

Please sign in to comment.