From 66b0e143192955888ed67a3b750ead60b835bf19 Mon Sep 17 00:00:00 2001
From: vasudeva8 <vasudeva.sarma@sanger.ac.uk>
Date: Mon, 21 Aug 2023 17:42:26 +0100
Subject: [PATCH 1/9] a few more functionalities

---
 samples/DEMO.md           | 167 ++++++++++++++++++
 samples/Makefile          |  12 +-
 samples/README.md         |  26 ++-
 samples/qtask_thread.c    | 344 ++++++++++++++++++++++++++++++++++++++
 samples/read_fast_index.c | 163 ++++++++++++++++++
 samples/read_with_tabix.c | 124 ++++++++++++++
 samples/sample.ref.fq     |  16 ++
 samples/write_fast.c      |  18 +-
 8 files changed, 864 insertions(+), 6 deletions(-)
 create mode 100644 samples/qtask_thread.c
 create mode 100644 samples/read_fast_index.c
 create mode 100644 samples/read_with_tabix.c
 create mode 100644 samples/sample.ref.fq

diff --git a/samples/DEMO.md b/samples/DEMO.md
index 911792899..c6966613b 100644
--- a/samples/DEMO.md
+++ b/samples/DEMO.md
@@ -100,6 +100,11 @@ alignment read.
 Read_multireg - This application showcases the usage of mulitple regionn
 specification in alignment read.
 
+Read_fast_index - This application showcases the fasta/fastq data read using
+index.
+
+Read_tbx - This application showcases the tabix index usage with sam files.
+
 Pileup - This application showcases the pileup api, where all alignments
 covering a reference position are accessed together. It displays the bases
 covering each position on standard output.
@@ -131,6 +136,12 @@ handling. It saves the read1 and read2 as separate files in given directory,
 one as sam and other as bam. A pool of 4 threads is created and shared for both
 read and write.
 
+Qtask_thread - This application shocases the use of mulitple queues, scheduling
+of different tasks and getting results in orderered and unordered fashion. It
+saves the read1 and read2 as separate files in given directory, one as sam and
+other as bam. The samfile/read1 can have unordered data and bamfile/read2 will
+have ordered data.
+
 
 ## Building the sample apps
 
@@ -777,6 +788,7 @@ index, the min shift has to be 0.
 
 At the end of write, sam_idx_save api need to be invoked to save the index.
 
+    ...
     //write header
     if (sam_hdr_write(outfile, in_samhdr)) {
     ...
@@ -803,6 +815,24 @@ The sam_index_build2 api takes the index file path as well and gives more
 control than the previous one.  The sam_index_build3 api provides an option to
 configure the number of threads in index creation.
 
+Index for a fasta/fastq file can be created using fai_build3 api after saving
+of the same. It takes the filename and creates index files, fai and gzi based
+on compression status of input file. When fai/gzi path are NULL, they are
+created along with fasta/q file.
+
+    ...
+    if (fai_build3(outname, NULL, NULL) == -1) {
+    ...
+Refer: write_fast.c
+
+A tabix index can be created for compressed sam/vcf and other data using
+tbx_index_build.
+
+    ...
+                if (tbx_index_build3(inname, NULL, shift, 1, &tbx_conf_sam) == -1) {
+    ...
+Refer: read_with_tabix.c
+
 
 ### Read with iterators
 
@@ -921,6 +951,75 @@ hts_idx_destroy. The hts_reglist_t* array passed is destroyed by the library
 on iterator destroy. The regions array (array of char array/string) needs to be
 destroyed by the user itself.
 
+For fasta/fastq files, the index has to be loaded using fai_load3_format which
+takes the file, index file names and format. With single region specification
+fai_fetch64 can be used to get bases, and fai_fetchqual64 for quality in case
+of fastq data. With multiple region specification, with comma separattion,
+faidx_fetch_seq64 and faidx_fetch_qual64 does the job. Regions has to be parsed
+using fai_parse_region in case of multiregion specifications. fai_adjust_region
+is used to adjust the start-end points based on available data.
+
+Below excerpt shows fasta/q access with single and multiregions,
+
+    ...
+    //load index
+    if (!(idx = fai_load3_format(inname, NULL, NULL, FAI_CREATE, fmt))) {
+    ...
+        //get data from given region
+        if (!(data = fai_fetch64(idx, region, &len))) {
+    ...
+        else {
+            printf("Data: %"PRId64" %s\n", len, data);
+            free((void*)data);
+            //get quality for fastq type
+            if (fmt == FAI_FASTQ) {
+                if (!(data = fai_fetchqual64(idx, region, &len))) {
+    ...
+        //parse, get each region and get data for each
+        while ((remaining = fai_parse_region(idx, region, &tid, &beg, &end, HTS_PARSE_LIST))) {     //here expects regions as csv
+            //parsed the region, correct end points based on actual data
+            if (fai_adjust_region(idx, tid, &beg, &end) == -1) {
+    ...
+            //get data for given region
+            if (!(data = faidx_fetch_seq64(idx, faidx_iseq(idx, tid), beg, end, &len))) {
+    ...
+                printf("Data: %"PRIhts_pos" %s\n", len, data);
+                free((void*)data);
+                data = NULL;
+
+                //get quality data for fastq
+                if (fmt == FAI_FASTQ) {
+                    if (!(data = faidx_fetch_qual64(idx, faidx_iseq(idx, tid), beg, end, &len))) {
+    ...
+                        printf("Qual: %"PRIhts_pos" %s\n", len, data);
+                        free((void*)data);
+    ...
+            region = remaining;                                     //parse remaining region defs
+    ...
+    if (idx) {
+        fai_destroy(idx);
+    ...
+Refer: read_fast_index.c
+
+Tabix index can be loaded using tbx_index_load. Data can be retrieved using
+iterators, tbx_itr_querys / tbx_itr_queryi. It need to be destroyed using
+tbx_destroy at the end.
+
+Below excerpt shows usage of tabix index,
+
+    ...
+        //load index
+        if (!(idx = tbx_index_load3(inname, NULL, HTS_IDX_SILENT_FAIL))) {
+    ...
+        //read using index and region
+        if (!(iter = tbx_itr_querys(idx, region))) {
+    ...
+        while ((c = tbx_itr_next(infile, idx, iter, &data)) >= 0) {
+    ...
+            tbx_destroy(idx);
+    ...
+Refer: read_with_tabix.c
+
 
 ### Pileup and MPileup
 
@@ -1345,6 +1444,74 @@ Below excerpt shows thread pool shared across files,
     ...
 Refer: split_thread2.c
 
+Task to be performed on data can be scheduled to a queue and a thread pool can
+be associated to it. There can be multiple pools and queues(processes). There
+are 2 type of queues, one where the result is queued back and retrieved in
+ordered fashion and other where the result may be unordered based on processing
+speed and number of threads. Explicitly created threads can also be used along
+with hts thread pool.
+
+hts_tpool_process_init initializes the queue / process associates a queue with
+thread pool and reserves space for given number of tasks. It takes a parameter
+indicating whether it is for unordered processing or for ordered processing
+where it has to queue the result back. Usually 2 times the number of threads
+are reserved. hts_tpool_dispatch3 queues the data to the queue along with the
+processing function, pool and clean up routines. The cleanup routines will be
+executed when hts_tpool_process_reset api is invoked, to reset the queue to
+have a restart. hts_tpool_process_flush can ensure that all the piledup tasks
+are processed, in case the queueing and processing happen at different speed.
+hts_tpool_process_shutdown api stops the processing of queue.
+
+Below excerpt shows the usage of 2 queues, one where the processed result may
+be unordered and writes as sam file. Other queue shows the processing and 
+queueing back, for ordered retrieval and saving as bam file,
+
+    ...
+    //thread pool
+    if (!(pool = hts_tpool_init(cnt))) {
+    ...
+    //queue to use with thread pool, no queuing of results for Q1 and results queued for Q2
+    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) || !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
+    ...
+    //open output files - w write as SAM, wb  write as BAM
+    outfile1 = sam_open(file1, "w"); outfile2 = sam_open(file2, "wb");
+    ...
+    //start output writer thread for ordered processing
+    twritedata.outfile = outfile2; twritedata.queue = queue2; twritedata.done = &stopcond; twritedata.lock = &stopcondsynch;
+    twritedata.samhdr = in_samhdr;
+    if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
+    ...
+    //check flags and schedule
+    while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
+        if (bamdata->core.flag & BAM_FREAD1) {
+            //read1, scheduling for unordered output, using Q1; processing occurs in order of thread execution
+            td_unordered *tdata = calloc(1, sizeof(td_unordered));
+            tdata->bamdata = bamdata; tdata->outfile = outfile1; tdata->lock = &filesynch; tdata->samhdr = in_samhdr;
+            if (hts_tpool_dispatch3(pool, queue1, thread_unordered_proc, tdata, (void (*)(void *))&free, (void (*)(void*))&bam_destroy1, 0) == -1) {
+    ...
+        else if (bamdata->core.flag & BAM_FREAD2) {
+            //read2, scheduling for ordered output, using Q2; proces and result queueing occurs in order of thread execution and result retrieval
+            //showcases the threaded execution and ordered result handling on read2
+            if (hts_tpool_dispatch3(pool, queue2, thread_ordered_proc, bamdata, NULL, (void (*)(void*))&bam_destroy1, 0) == -1) {
+    ...
+        //EOF read
+        //clear the queues of any tasks; NOTE: will be blocked until queue is cleared
+        if (hts_tpool_process_flush(queue1) == -1 || hts_tpool_process_flush(queue2) == -1) {
+    ...
+    //trigger exit for ordered write thread
+    if (thread) {
+        //shutown queues to exit the result wait
+        hts_tpool_process_shutdown(queue1);
+        hts_tpool_process_shutdown(queue2);
+    ...
+    if (queue1) {
+        hts_tpool_process_destroy(queue1);
+    ...
+    if (thread) {
+        pthread_join(thread, NULL);
+    ...
+Refer: qtask_thread.c
+
 
 ## More Information
 
diff --git a/samples/Makefile b/samples/Makefile
index 40991d78f..1c3e2caac 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -13,7 +13,8 @@ LDFLAGS = $(HTS_DIR)/libhts.a -L$(HTS_DIR) $(HTSLIB_static_LDFLAGS) $(HTSLIB_sta
 PRGS = flags split split2 cram read_fast read_header read_ref read_bam \
 	read_aux dump_aux add_header rem_header update_header mod_bam mod_aux \
 	mod_aux_ba write_fast idx_on_write read_reg read_multireg pileup \
-	mpileup modstate pileup_mod flags_field split_t1 split_t2
+	mpileup modstate pileup_mod flags_field split_t1 split_t2 qtask read_tbx\
+	read_fast_i
 
 all: $(PRGS)
 
@@ -77,6 +78,12 @@ read_reg:
 read_multireg:
 	$(CC) $(CFLAGS) -I $(HTS_DIR) index_multireg_read.c -o $@  $(LDFLAGS)
 
+read_tbx:
+	$(CC) $(CFLAGS) -I $(HTS_DIR) read_with_tabix.c -o $@  $(LDFLAGS)
+
+read_fast_i:
+	$(CC) $(CFLAGS) -I $(HTS_DIR) read_fast_index.c -o $@  $(LDFLAGS)
+
 pileup:
 	$(CC) $(CFLAGS) -I $(HTS_DIR) pileup.c -o $@  $(LDFLAGS)
 
@@ -98,6 +105,9 @@ split_t1:
 split_t2:
 	$(CC) $(CFLAGS) -I $(HTS_DIR) split_thread2.c -o $@  $(LDFLAGS)
 
+qtask:
+	$(CC) $(CFLAGS) -I $(HTS_DIR) qtask_thread.c -o $@  $(LDFLAGS)
+
 clean:
 	find . -name "*.o" | xargs rm -rf
 	find . -name "*.dSYM" | xargs rm -rf
diff --git a/samples/README.md b/samples/README.md
index ab5481dea..34ed35eff 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -61,7 +61,7 @@ indexed.
 
 [Read_fast][Read_fast]
 
-  This application showcases the fasta/fastq data read.
+  This application showcases fasta/fastq data read without using index.
 
 [Read_header][Read_header]
 
@@ -130,7 +130,7 @@ indexed.
 [Write_fast][Write_fast]
 
   This application showcases the fasta/fastq data write. It appends a dummy
-  data to given file.
+  data to given file and creates an index file for it.
 
 [Index_write][Index_write]
 
@@ -147,6 +147,17 @@ indexed.
   This application showcases the usage of mulitple region specification in
   alignment read.
 
+[Read_fast_index][Read_fast_index]
+
+  This application showcases the fasta/fastq data read using index. It takes a
+  region (reference name[:start-end]) and gets data from that region.
+
+[Read_tbx][Read_tbx]
+
+  This application showcases the tabix index usage with sam files. It takes a
+  shift value and region (refname[:start-end]) and gets data from that region.
+  The shift will be used to create index file if not present already.
+    
 [Pileup][Pileup]:
 
   This application showcases the pileup api, where all alignments covering a
@@ -191,6 +202,14 @@ indexed.
   and other as bam. A pool of 4 threads is created and shared for both read
   and write.
 
+[Qtask_thread][Qtask_thread]
+
+  This application shocases the use of mulitple queues, scheduling of different
+  tasks and getting results in orderered and unordered fashion. It saves the
+  read1 and read2 as separate files in given directory, one as sam and other
+  as bam. The samfile/read1 can have unordered data and bamfile/read2 will have
+  ordered data.
+
 ### More Information
 
 More detailed documentation is available in the [DEMO.md][DEMO] with worked
@@ -217,6 +236,8 @@ examples per demonstration tool.
 [Index_write]: index_write.c
 [Read_reg]: index_reg_read.c
 [Read_multireg]: index_multireg_read.c
+[Read_fast_index]: read_fast_index.c
+[Read_tbx]: read_with_tabix.c
 [Pileup]: pileup.c
 [Mpileup]: mpileup.c
 [Modstate]: modstate.c
@@ -224,4 +245,5 @@ examples per demonstration tool.
 [Flags_field]: flags_htsopt_field.c
 [Split_thread1]: split_thread1.c
 [Split_thread2]: split_thread2.c
+[Qtask_thread]: qtask_thread.c
 [DEMO]: DEMO.md
diff --git a/samples/qtask_thread.c b/samples/qtask_thread.c
new file mode 100644
index 000000000..6c48d814d
--- /dev/null
+++ b/samples/qtask_thread.c
@@ -0,0 +1,344 @@
+/*  split_thread3.c --  showcases the htslib api usage
+
+    Copyright (C) 2023 Genome Research Ltd.
+
+    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+
+*/
+
+/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+
+#include <getopt.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <htslib/sam.h>
+#include <htslib/thread_pool.h>
+
+//thread specific data
+typedef struct td_unordered {
+    samFile *outfile;           //output file handle
+    sam_hdr_t *samhdr;          //header used to write data
+    bam1_t *bamdata;            //data to be written
+
+    pthread_mutex_t *lock;      //to synchronize write to file
+} td_unordered;
+
+typedef struct td_orderedwrite {
+    samFile *outfile;           //output file handle
+    sam_hdr_t *samhdr;          //header used to write data
+    hts_tpool_process *queue;   //queue from which results to be retrieved
+
+    pthread_cond_t *done;       //indicates exit condition
+    pthread_mutex_t *lock;      //to synchronize queue access
+} td_orderedwrite;
+
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
+returns nothing
+*/
+static void print_usage(FILE *fp)
+{
+    fprintf(fp, "Usage: qtask infile threadcount outdir\n\
+Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\
+GC ratio - sum(G,C) / sum(A,T,C,G,N) - is calculated and added on each alignment as xr:f aux tag.\n");
+    return;
+}
+
+/// addcount - calculates and adds the aux tag
+/** @param bamdata pointer to bam data
+returns 0 on success and -1 for failure
+*/
+int addcount(bam1_t *bamdata)
+{
+    int pos = 0, gc = 0, ret = 0;
+    float gcratio = 0;
+    uint8_t *data = bam_get_seq(bamdata);
+    for (pos = 0; pos < bamdata->core.l_qseq; ++pos) {
+        switch(seq_nt16_str[bam_seqi(data, pos)]) {
+            case 'G':   //fall through
+            case 'C':
+                gc++;
+            break;
+        }
+    }
+    gcratio = gc / (float) bamdata->core.l_qseq;
+
+    if (bam_aux_append(bamdata, "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
+        printf("Failed to add aux tag xr, errno: %d\n", errno);
+        ret = -1;
+    }
+    return ret;
+}
+
+
+/// thread_unordered_proc - does the processing of task in queue1 (Read1) and writes output
+/** @param args pointer to data specific for the thread
+returns none
+the output could be unordered based on the number of threads in use
+*/
+void *thread_unordered_proc(void *args)
+{
+    td_unordered *tdata = (td_unordered*)args;
+
+    //add count
+    if (addcount(tdata->bamdata) < 0) {
+        printf("Failed to calculate gc data\n");
+    }
+
+    //lock to have exclusive write access to outfile
+    pthread_mutex_lock(tdata->lock);
+    if (sam_write1(tdata->outfile, tdata->samhdr, tdata->bamdata) < 0) {
+        printf("Failed to write output data\n");
+    }
+    pthread_mutex_unlock(tdata->lock);
+
+    return NULL;
+}
+
+/// thread_ordered_proc - does the processing of task in queue2 (Read2) and queues the output back
+/** @param argss pointer to data specific for the thread
+returns the processed bamdata
+the processing could be unordered based on the number of threads in use but read of output from queue
+will be in order
+*/
+void *thread_ordered_proc(void *args)
+{
+    bam1_t *bamdata = (bam1_t*)args;
+    //add count
+    if (addcount(bamdata) < 0) {
+        printf("Failed to calculate gc data\n");
+    }
+    return bamdata;
+}
+
+/// thread_ordered_proc - thread that read the output from queue2 (Read2) and writes
+/** @param argss pointer to data specific for the thread
+returns NULL
+*/
+void *threadfn_orderedwrite(void *args)
+{
+    td_orderedwrite *tdata = (td_orderedwrite*)args;
+    hts_tpool_result *r = NULL;
+    bam1_t *bamdata = NULL;
+
+    struct timeval now;
+    struct timespec timeout;
+
+    //get time to set the wait time on exit condition check
+    gettimeofday(&now, NULL);
+    timeout.tv_sec = now.tv_sec + 0;
+    timeout.tv_nsec = 0;
+
+    pthread_mutex_lock(tdata->lock);                //lock to check the exit condition
+    while (pthread_cond_timedwait(tdata->done, tdata->lock, &timeout) == ETIMEDOUT) {
+        //exit not set, get result and write; wait if no result is in queue - until shutdown of queue
+        while ((r = hts_tpool_next_result_wait(tdata->queue))) {
+            bamdata = (bam1_t*) hts_tpool_result_data(r);
+            if (bamdata && sam_write1(tdata->outfile, tdata->samhdr, bamdata) < 0) {
+                printf("Failed to write output data\n");
+            }
+            hts_tpool_delete_result(r, 0);          //release the result memory
+            if (bamdata) {
+                bam_destroy1(bamdata);              //clear the bamdata;
+                bamdata = NULL;
+            }
+        }
+        //no more data in queues, check and wait till exit is triggered
+        gettimeofday(&now, NULL);
+        timeout.tv_sec = now.tv_sec + 1;            //atmost 1 sec wait
+        timeout.tv_nsec = 0;
+    }
+    pthread_mutex_unlock(tdata->lock);
+    return NULL;
+}
+
+/// main_demo - start of the demo
+/** @param argc - count of arguments
+ *  @param argv - pointer to array of arguments
+returns 1 on failure 0 on success
+*/
+int main(int argc, char *argv[])
+{
+    const char *inname = NULL, *outdir = NULL;
+    char *file1 = NULL, *file2 = NULL;
+    int c = 0, ret = EXIT_FAILURE, size = 0, cnt = 0;
+    samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL;
+    sam_hdr_t *in_samhdr = NULL;
+    bam1_t *bamdata = NULL;
+    pthread_mutex_t filesynch = PTHREAD_MUTEX_INITIALIZER, stopcondsynch = PTHREAD_MUTEX_INITIALIZER;
+    pthread_cond_t stopcond = PTHREAD_COND_INITIALIZER;
+    pthread_t thread = 0;
+    td_orderedwrite twritedata = {0};
+    hts_tpool *pool = NULL;
+    hts_tpool_process *queue1 = NULL, *queue2 = NULL;
+
+    //split_t3 infile threadcount outdir
+    if (argc != 4) {
+        print_usage(stdout);
+        goto end;
+    }
+    inname = argv[1];
+    cnt = atoi(argv[2]);
+    outdir = argv[3];
+
+    if (cnt < 1) {
+        cnt = 1;
+    }
+
+    //allocate space for output
+    size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination
+    file1 = malloc(size); file2 = malloc(size);
+    if (!file1 || !file2) {
+        printf("Failed to set output path\n");
+        goto end;
+    }
+
+    //output file names
+    snprintf(file1, size, "%s/1.sam", outdir);  //for SAM output
+    snprintf(file2, size, "%s/2.bam", outdir);  //for BAM output
+    //bam data storage
+    if (!(bamdata = bam_init1())) {
+        printf("Failed to initialize bamdata\n");
+        goto end;
+    }
+    //thread pool
+    if (!(pool = hts_tpool_init(cnt))) {
+        printf("Failed to create thread pool\n");
+        goto end;
+    }
+    //queue to use with thread pool, no queuing of results for Q1 and results queued for Q2
+    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) || !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
+        printf("Failed to create queue\n");
+        goto end;
+    }
+    //open input file - r reading
+    if (!(infile = sam_open(inname, "r"))) {
+        printf("Could not open %s\n", inname);
+        goto end;
+    }
+
+    //open output files - w write as SAM, wb  write as BAM
+    outfile1 = sam_open(file1, "w"); outfile2 = sam_open(file2, "wb");
+    if (!outfile1 || !outfile2) {
+        printf("Could not open output file\n");
+        goto end;
+    }
+    //read header, required to resolve the target names to proper ids
+    if (!(in_samhdr = sam_hdr_read(infile))) {
+        printf("Failed to read header from file!\n");
+        goto end;
+    }
+    //start output writer thread for ordered processing
+    twritedata.outfile = outfile2; twritedata.queue = queue2; twritedata.done = &stopcond; twritedata.lock = &stopcondsynch;
+    twritedata.samhdr = in_samhdr;
+    if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
+        printf("Failed to create writer thread\n");
+        goto end;
+    }
+    //write header
+    if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) {
+        printf("Failed to write header\n");
+        goto end;
+    }
+    //check flags and schedule
+    while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
+        if (bamdata->core.flag & BAM_FREAD1) {
+            //read1, scheduling for unordered output, using Q1; processing occurs in order of thread execution
+            td_unordered *tdata = calloc(1, sizeof(td_unordered));
+            tdata->bamdata = bamdata; tdata->outfile = outfile1; tdata->lock = &filesynch; tdata->samhdr = in_samhdr;
+            if (hts_tpool_dispatch3(pool, queue1, thread_unordered_proc, tdata, (void (*)(void *))&free, (void (*)(void*))&bam_destroy1, 0) == -1) {
+                printf("Failed to schedule unordered processing\n");
+                goto end;
+            }
+        }
+        else if (bamdata->core.flag & BAM_FREAD2) {
+            //read2, scheduling for ordered output, using Q2; proces and result queueing occurs in order of thread execution and result retrieval
+            //showcases the threaded execution and ordered result handling on read2
+            if (hts_tpool_dispatch3(pool, queue2, thread_ordered_proc, bamdata, NULL, (void (*)(void*))&bam_destroy1, 0) == -1) {
+                printf("Failed to schedule ordered processing\n");
+                goto end;
+            }
+        }
+        bamdata = bam_init1();                  //for next read
+    }
+    if (-1 == c) {
+        //EOF read
+        //clear the queues of any tasks; NOTE: will be blocked until queue is cleared
+        if (hts_tpool_process_flush(queue1) == -1 || hts_tpool_process_flush(queue2) == -1) {
+            printf("Failed to flush queues\n");
+            goto end;
+        }
+        ret = EXIT_SUCCESS;
+    }
+    else {
+        printf("Error in reading data\n");
+    }
+
+    //trigger exit for ordered write thread
+    if (thread) {
+        //shutown queues to exit the result wait
+        hts_tpool_process_shutdown(queue1);
+        hts_tpool_process_shutdown(queue2);
+
+        pthread_mutex_lock(twritedata.lock);
+        pthread_cond_signal(twritedata.done);
+        pthread_mutex_unlock(twritedata.lock);
+        pthread_join(thread, NULL);
+        thread = NULL;
+    }
+end:
+    //cleanup
+    if (in_samhdr) {
+        sam_hdr_destroy(in_samhdr);
+    }
+    if (infile) {
+        sam_close(infile);
+    }
+    if (bamdata) {
+        bam_destroy1(bamdata);
+    }
+    if (file1) {
+        free(file1);
+    }
+    if (file2) {
+        free(file2);
+    }
+    if (outfile1) {
+        sam_close(outfile1);
+    }
+    if (outfile2) {
+        sam_close(outfile2);
+    }
+    if (queue1) {
+        hts_tpool_process_destroy(queue1);
+    }
+    if (queue2) {
+        hts_tpool_process_destroy(queue2);
+    }
+    if (pool) {
+        hts_tpool_destroy(pool);
+    }
+    if (thread) {
+        pthread_join(thread, NULL);
+    }
+    return ret;
+}
diff --git a/samples/read_fast_index.c b/samples/read_fast_index.c
new file mode 100644
index 000000000..aa6e90da2
--- /dev/null
+++ b/samples/read_fast_index.c
@@ -0,0 +1,163 @@
+/*  read_fast_index.c --  showcases the htslib api usage
+
+    Copyright (C) 2023 Genome Research Ltd.
+
+    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+
+*/
+
+/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+
+#include <getopt.h>
+#include <unistd.h>
+#include <htslib/sam.h>
+#include <htslib/faidx.h>
+
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
+returns nothing
+*/
+static void print_usage(FILE *fp)
+{
+    fprintf(fp, "Usage: read_fast_i <infile> A/Q 0/1 regiondef\n\
+Reads the fasta/fastq file using index and shows the content.\n\
+For fasta files use A and Q for fastq files.\n\
+Region can be 1 or more of <reference name>[:start-end] entries separated by comma.\n\
+For single region, give regcount as 0 and non 0 for multi-regions.\n");
+    return;
+}
+
+/// main_demo - start of the demo
+/** @param argc - count of arguments
+ *  @param argv - pointer to array of arguments
+returns 1 on failure 0 on success
+*/
+int main(int argc, char *argv[])
+{
+    const char *inname = NULL, *region = NULL, *data = NULL, *remaining = NULL;
+    int ret = EXIT_FAILURE, tid = -1, usemulti = 0;
+    faidx_t *idx = NULL;
+    enum fai_format_options fmt = FAI_FASTA;
+    hts_pos_t len = 0, beg = 0, end = 0;
+
+    //read_fast_i infile A/Q regcount region
+    if (argc != 5) {
+        print_usage(stdout);
+        goto end;
+    }
+    inname = argv[1];
+    if (argv[2][0] == 'Q') {
+        fmt = FAI_FASTQ;
+    }
+    usemulti = atoi(argv[3]);
+    region = argv[4];
+
+    //load index
+    if (!(idx = fai_load3_format(inname, NULL, NULL, FAI_CREATE, fmt))) {
+        printf("Failed to load index\n");
+        goto end;
+    }
+
+    if (!usemulti) {
+        //get data from given region
+        if (!(data = fai_fetch64(idx, region, &len))) {
+            if (-1 == len) {
+                printf("Failed to get data\n");                 //failure
+                goto end;
+            }
+            else {
+                printf("Data not found for given region\n");    //no data
+            }
+        }
+        else {
+            printf("Data: %"PRId64" %s\n", len, data);
+            free((void*)data);
+            //get quality for fastq type
+            if (fmt == FAI_FASTQ) {
+                if (!(data = fai_fetchqual64(idx, region, &len))) {
+                    if (len == -1) {
+                        printf("Failed to get data\n");
+                        goto end;
+                    }
+                    else {
+                        printf("Data not found for given region\n");
+                    }
+                }
+                else {
+                    printf("Qual: %"PRId64" %s\n", len, data);
+                    free((void*)data);
+                }
+            }
+        }
+    }
+    else {
+        //parse, get each region and get data for each
+        while ((remaining = fai_parse_region(idx, region, &tid, &beg, &end, HTS_PARSE_LIST))) {     //here expects regions as csv
+            //parsed the region, correct end points based on actual data
+            if (fai_adjust_region(idx, tid, &beg, &end) == -1) {
+                printf("Error in adjusting region for tid %d\n", tid);
+                goto end;
+            }
+            //get data for given region
+            if (!(data = faidx_fetch_seq64(idx, faidx_iseq(idx, tid), beg, end, &len))) {
+                if (len == -1) {
+                    printf("Failed to get data\n");                 //failure
+                    goto end;
+                }
+                else {
+                    printf("No data found for given region\n");     //no data
+                }
+            }
+            else {
+                printf("Data: %"PRIhts_pos" %s\n", len, data);
+                free((void*)data);
+                data = NULL;
+
+                //get quality data for fastq
+                if (fmt == FAI_FASTQ) {
+                    if (!(data = faidx_fetch_qual64(idx, faidx_iseq(idx, tid), beg, end, &len))) {
+                        if (len == -1) {
+                            printf("Failed to get qual data\n");
+                            goto end;
+                        }
+                        else {
+                            printf("No data found for given region\n");
+                        }
+                    }
+                    else {
+                        printf("Qual: %"PRIhts_pos" %s\n", len, data);
+                        free((void*)data);
+                        data = NULL;
+                    }
+                }
+            }
+            region = remaining;                                     //parse remaining region defs
+        }
+    }
+
+    ret = EXIT_SUCCESS;
+end:
+    //clean up
+    if (idx) {
+        fai_destroy(idx);
+    }
+    return ret;
+}
diff --git a/samples/read_with_tabix.c b/samples/read_with_tabix.c
new file mode 100644
index 000000000..de43cb342
--- /dev/null
+++ b/samples/read_with_tabix.c
@@ -0,0 +1,124 @@
+/*  read_with_tabix.c --  showcases the htslib api usage
+
+    Copyright (C) 2023 Genome Research Ltd.
+
+    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+
+*/
+
+/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+
+#include <getopt.h>
+#include <unistd.h>
+#include <htslib/sam.h>
+#include <htslib/tbx.h>
+
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
+returns nothing
+*/
+static void print_usage(FILE *fp)
+{
+    fprintf(fp, "Usage: read_with_tbx <infile> <shift> <region>\n\
+Reads sam.gz file using tabix index.\n");
+    return;
+}
+
+/// main_demo - start of the demo
+/** @param argc - count of arguments
+ *  @param argv - pointer to array of arguments
+returns 1 on failure 0 on success
+*/
+int main(int argc, char *argv[])
+{
+    const char *inname = NULL, *region = NULL;
+    int ret = EXIT_FAILURE, built = 0, shift = 0, c = 0;
+    tbx_t *idx = NULL;
+    hts_itr_t *iter = NULL;
+    samFile *infile = NULL;
+    kstring_t data = KS_INITIALIZE;
+
+    //read_with_tbx infile shift region
+    if (argc != 4) {
+        print_usage(stdout);
+        goto end;
+    }
+    inname = argv[1];
+    shift = atoi(argv[2]);
+    region = argv[3];
+
+    //open file for read
+    if (!(infile = sam_open(inname, "r"))) {
+        printf("Failed to open file\n");
+        goto end;
+    }
+load:
+    //load index
+    if (!(idx = tbx_index_load3(inname, NULL, HTS_IDX_SILENT_FAIL))) {
+        if (!built) {                   //try to build index and then load
+            built = 1;                  //reset flag to avoid repeated attempt
+            if (tbx_index_build3(inname, NULL, shift, 1, &tbx_conf_sam) == -1) {
+                printf("Failed to create index\n");
+            }
+            else {
+                goto load;              //built, try again to load
+            }
+        /*tbx_conf_sam for compressed sam, tbx_conf_vcf for compressed vcf,
+        tbx_conf_bed for bed ...*/
+        }
+        printf("Failed to load index\n");
+        goto end;
+    }
+
+    //read using index and region
+    if (!(iter = tbx_itr_querys(idx, region))) {
+        printf("Failed to get iterator\n");
+        goto end;
+    }
+    while ((c = tbx_itr_next(infile, idx, iter, &data)) >= 0) {
+        printf("%s\n", data.s);
+    }
+    if (c != -1) {
+        printf("Failed to read all data\n");
+        goto end;
+    }
+    //tabix doesnt support multi region description but application can explicitly extract region
+    //from such queries and use on tbx_itr_querys
+    /*while (rem = hts_parse_region(..region.)) {
+        if(rem[-1] == ',') rem[-1]='\0';
+        iter = tbx_itr_querys(..region);
+        ....
+        region = rem;
+    }*/
+
+    ret = EXIT_SUCCESS;
+end:
+    //clean up
+    ks_free(&data);
+
+    if (idx) {
+        tbx_destroy(idx);
+    }
+    if (infile) {
+        sam_close(infile);
+    }
+    return ret;
+}
diff --git a/samples/sample.ref.fq b/samples/sample.ref.fq
new file mode 100644
index 000000000..18b2b9617
--- /dev/null
+++ b/samples/sample.ref.fq
@@ -0,0 +1,16 @@
+@T1
+AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT
++
+AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT
+@T2
+TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT
++
+TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT
+@T3
+TTTTGGGGACTGTTAACAGT
++
+TTTTGGGGACTGTTAACAGT
+@T4
+TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTGGGGACTGTTAACAGT
++
+TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTGGGGACTGTTAACAGT
diff --git a/samples/write_fast.c b/samples/write_fast.c
index ef7817683..c8fb1d15a 100644
--- a/samples/write_fast.c
+++ b/samples/write_fast.c
@@ -29,6 +29,7 @@ DEALINGS IN THE SOFTWARE
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
+#include <htslib/faidx.h>
 
 /// print_usage - show flags_demo usage
 /** @param fp pointer to the file / terminal to which demo_usage to be dumped
@@ -37,7 +38,7 @@ returns nothing
 static void print_usage(FILE *fp)
 {
     fprintf(fp, "Usage: write_fast <file>\n\
-Appends a fasta/fastq file.\n");
+Appends a fasta/fastq file and indexes it.\n");
     return;
 }
 
@@ -71,10 +72,16 @@ int main(int argc, char *argv[])
         goto end;
     }
     //open output file
-    if (!(outfile = sam_open(outname, mode))) {
+    if (!(outfile = sam_open(outname, mode))) {         //expects the name to have correct extension!
         printf("Could not open %s\n", outname);
         goto end;
     }
+    /* if the file name extension is not appropriate to the content, inconsistent data will be present in output.
+    if required, htsFormat and sam_open_format can be explicitly used to ensure appropriateness of content.
+    htsFormat fmt = {sequence_data, fastq_format / fasta_format};
+    sam_open_format(outname, mode, fmt);
+    */
+
     //dummy data
     if (bam_set1(bamdata, sizeof("test"), "test", BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, 10, "AACTGACTGA", "1234567890", 0) < 0) {
         printf("Failed to set data\n");
@@ -84,7 +91,12 @@ int main(int argc, char *argv[])
         printf("Failed to write data\n");
         goto end;
     }
-
+    //close and index it
+    sam_close(outfile); outfile = NULL;
+    if (fai_build3(outname, NULL, NULL) == -1) {
+        printf("Indexing failed with %d\n", errno);
+        goto end;
+    }
     ret = EXIT_SUCCESS;
 end:
     //clean up

From a238a75d14276572bab88ea88ccb4bac7d07ce78 Mon Sep 17 00:00:00 2001
From: vasudeva8 <vasudeva.sarma@sanger.ac.uk>
Date: Wed, 25 Oct 2023 19:41:19 +0100
Subject: [PATCH 2/9] merge

---
 samples/DEMO.md           |  90 ++++++++++++++-------------
 samples/Makefile          |   5 +-
 samples/README.md         |  13 +---
 samples/qtask_thread.c    |  34 +++++++----
 samples/read_fast.c       |   3 +
 samples/read_with_tabix.c | 124 --------------------------------------
 samples/split_thread1.c   |   4 +-
 samples/write_fast.c      |  22 +++++--
 8 files changed, 98 insertions(+), 197 deletions(-)
 delete mode 100644 samples/read_with_tabix.c

diff --git a/samples/DEMO.md b/samples/DEMO.md
index c6966613b..7be54b297 100644
--- a/samples/DEMO.md
+++ b/samples/DEMO.md
@@ -88,7 +88,7 @@ alignment. It adds count of ATCGN base as an array in auxiliary data, BA:I.
 Modified data is written on standard output.
 
 Write_fast - This application showcases the fasta/fastq data write. It appends
-a dummy data to given file.
+data to given file.
 
 Index_write - This application showcases the creation of index along with
 output creation. Based on file type and shift, it creates bai, csi or crai
@@ -103,8 +103,6 @@ specification in alignment read.
 Read_fast_index - This application showcases the fasta/fastq data read using
 index.
 
-Read_tbx - This application showcases the tabix index usage with sam files.
-
 Pileup - This application showcases the pileup api, where all alignments
 covering a reference position are accessed together. It displays the bases
 covering each position on standard output.
@@ -348,6 +346,8 @@ or explicit format text. This mode buffer can be used with sam_open or can be
 used with sam_open_format with explicit format information in htsFormat
 structure.
 
+It is the FASTA format which is mainly in use to store the reference data.
+
     ...
     if (!(bamdata = bam_init1())) {
     ...
@@ -358,6 +358,7 @@ structure.
     if (!(in_samhdr = sam_hdr_read(infile))) {
     ...
     while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
+    ...
         printf("\nsequence: ");
         for (c = 0; c < bamdata->core.l_qseq; ++c) {
             printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]);
@@ -376,7 +377,7 @@ Refer: read_fast.c
     ...
     if (!(outfile = sam_open(outname, mode))) {
     ...
-    if (bam_set1(bamdata, sizeof("test"), "test", BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, 10, "AACTGACTGA", "1234567890", 0)
+    if (bam_set1(bamdata, strlen(name), name, BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, strlen(data), data, qual, 0)
      < 0) {
     ...
     if (sam_write1(outfile, out_samhdr, bamdata) < 0) {
@@ -772,14 +773,14 @@ can be read easily. There are different type of indices, BAI, CSI, CRAI, TBI,
 FAI etc. and are usually used with iterators.
 
 Indexing of plain/textual files are not supported, compressed SAM&FASTA/Q, BAM,
-and CRAM files can be indexed. CRAM files are indexed as .crai and the other two
-can be indexed as .bai or .csi files. Each of these types have different
-internal representations of the index information. Bai uses a fixed
-configuration values where as csi has them dynamically updated based on the
-alignment data.
+and CRAM files can be indexed. CRAM files are indexed as .crai and the others
+as .bai, .csi, .fai etc. Each of these types have different internal
+representations of the index information. Bai uses a fixed configuration values
+where as csi has them dynamically updated based on the alignment data.
 
 Indexes can be created either with save of alignment data or explicitly by
-read of existing alignment file.
+read of existing alignment file for alignment data (SAM/BAM/CRAM). For reference
+data it has to be explicitly created (FASTA).
 
 To create index along with alignment write, the sam_idx_init api need to be
 invoked before the start of alignment data write. This api takes the output
@@ -815,23 +816,18 @@ The sam_index_build2 api takes the index file path as well and gives more
 control than the previous one.  The sam_index_build3 api provides an option to
 configure the number of threads in index creation.
 
-Index for a fasta/fastq file can be created using fai_build3 api after saving
-of the same. It takes the filename and creates index files, fai and gzi based
-on compression status of input file. When fai/gzi path are NULL, they are
-created along with fasta/q file.
+Index for a fasta file / reference data can be created using fai_build3 api
+after save of the same. It takes the filename and creates index files, fai and
+gzi based on compression status of input file. When fai/gzi path are NULL, they
+are created along with input file.
 
     ...
     if (fai_build3(outname, NULL, NULL) == -1) {
     ...
 Refer: write_fast.c
 
-A tabix index can be created for compressed sam/vcf and other data using
-tbx_index_build.
-
-    ...
-                if (tbx_index_build3(inname, NULL, shift, 1, &tbx_conf_sam) == -1) {
-    ...
-Refer: read_with_tabix.c
+A tabix index can be created for compressed vcf/sam/bed and other data using
+tbx_index_build. It is mainly used with vcf and non-sam type files.
 
 
 ### Read with iterators
@@ -986,7 +982,6 @@ Below excerpt shows fasta/q access with single and multiregions,
                 printf("Data: %"PRIhts_pos" %s\n", len, data);
                 free((void*)data);
                 data = NULL;
-
                 //get quality data for fastq
                 if (fmt == FAI_FASTQ) {
                     if (!(data = faidx_fetch_qual64(idx, faidx_iseq(idx, tid), beg, end, &len))) {
@@ -1406,6 +1401,10 @@ generic pool can be created and shared across multiple files. Another way to
 use thread pool is to schedule tasks explicitly to queues which gets executed
 using threads in pool.
 
+The samples are trivial ones to showcase the usage of api and the number of
+threads to use has to be identified based on complexity and parallelism of the
+job.
+
 To have a thread pool specific for a file, hts_set_opt api can be used with the
 file pointer, HTS_OPT_NTHREADS and the number of threads to use in the pool.
 Closure of file releases the thread pool as well. To have a thread pool which
@@ -1419,15 +1418,15 @@ Below excerpt shows file specific thread pool,
 
     ...
     //create file specific threads
-    if (hts_set_opt(infile, HTS_OPT_NTHREADS, 2) < 0 ||     //2 thread specific for reading
+    if (hts_set_opt(infile, HTS_OPT_NTHREADS, 1) < 0 ||     //1 thread specific for reading
     hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 ||       //1 thread specific for sam write
-    hts_set_opt(outfile2, HTS_OPT_NTHREADS, 1) < 0) {       //1 thread specific for bam write
+    hts_set_opt(outfile2, HTS_OPT_NTHREADS, 2) < 0) {       //2 thread specific for bam write
         printf("Failed to set thread options\n");
         goto end;
     }
 Refer: split_thread1.c
 
-Below excerpt shows thread pool shared across files,
+Below excerpt shows a thread pool shared across files, 
 
     ...
     //create a pool of 4 threads
@@ -1444,6 +1443,11 @@ Below excerpt shows thread pool shared across files,
     ...
 Refer: split_thread2.c
 
+Note that it is important to analyze the task in hand to decide the number of
+threads to be used. As an example, if the number of threads for reading is set
+to 2 and bam write to 1, keeping total number of threads the same, the
+performance may decrease as decoding is easier than encoding.
+
 Task to be performed on data can be scheduled to a queue and a thread pool can
 be associated to it. There can be multiple pools and queues(processes). There
 are 2 type of queues, one where the result is queued back and retrieved in
@@ -1455,26 +1459,30 @@ hts_tpool_process_init initializes the queue / process associates a queue with
 thread pool and reserves space for given number of tasks. It takes a parameter
 indicating whether it is for unordered processing or for ordered processing
 where it has to queue the result back. Usually 2 times the number of threads
-are reserved. hts_tpool_dispatch3 queues the data to the queue along with the
-processing function, pool and clean up routines. The cleanup routines will be
-executed when hts_tpool_process_reset api is invoked, to reset the queue to
-have a restart. hts_tpool_process_flush can ensure that all the piledup tasks
-are processed, in case the queueing and processing happen at different speed.
-hts_tpool_process_shutdown api stops the processing of queue.
+are reserved. hts_tpool_dispatch queues the data to the queue. If blocking /
+non-blocking control is required, hts_tpool_dispatch2 can be used instead. If
+hts_tpool_dispatch3 is used instead, it takes cleanup routines as well. The
+cleanup routines will be executed when hts_tpool_process_reset api is invoked,
+to reset the queue to have a restart. hts_tpool_process_flush can ensure that
+all the piledup tasks are processed, in case the queueing and processing happen
+at different speed. hts_tpool_process_shutdown api stops the processing of queue.
 
-Below excerpt shows the usage of 2 queues, one where the processed result may
-be unordered and writes as sam file. Other queue shows the processing and 
-queueing back, for ordered retrieval and saving as bam file,
+Below excerpt shows the usage of 2 queues with a trivial sample, one where the
+processed result may be unordered - based on execution order - and writes as sam
+file. Other queue shows the processing and queueing back, for ordered retrieval
+and save as bam file,
 
     ...
     //thread pool
     if (!(pool = hts_tpool_init(cnt))) {
     ...
     //queue to use with thread pool, no queuing of results for Q1 and results queued for Q2
-    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) || !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
+    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) || 
+        !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
     ...
     //open output files - w write as SAM, wb  write as BAM
-    outfile1 = sam_open(file1, "w"); outfile2 = sam_open(file2, "wb");
+    outfile1 = sam_open(file1, "w");
+    outfile2 = sam_open(file2, "wb");
     ...
     //start output writer thread for ordered processing
     twritedata.outfile = outfile2; twritedata.queue = queue2; twritedata.done = &stopcond; twritedata.lock = &stopcondsynch;
@@ -1487,12 +1495,12 @@ queueing back, for ordered retrieval and saving as bam file,
             //read1, scheduling for unordered output, using Q1; processing occurs in order of thread execution
             td_unordered *tdata = calloc(1, sizeof(td_unordered));
             tdata->bamdata = bamdata; tdata->outfile = outfile1; tdata->lock = &filesynch; tdata->samhdr = in_samhdr;
-            if (hts_tpool_dispatch3(pool, queue1, thread_unordered_proc, tdata, (void (*)(void *))&free, (void (*)(void*))&bam_destroy1, 0) == -1) {
+            if (hts_tpool_dispatch(pool, queue1, thread_unordered_proc, tdata) == -1) {
     ...
         else if (bamdata->core.flag & BAM_FREAD2) {
             //read2, scheduling for ordered output, using Q2; proces and result queueing occurs in order of thread execution and result retrieval
             //showcases the threaded execution and ordered result handling on read2
-            if (hts_tpool_dispatch3(pool, queue2, thread_ordered_proc, bamdata, NULL, (void (*)(void*))&bam_destroy1, 0) == -1) {
+            if (hts_tpool_dispatch(pool, queue2, thread_ordered_proc, bamdata) == -1) {
     ...
         //EOF read
         //clear the queues of any tasks; NOTE: will be blocked until queue is cleared
@@ -1588,9 +1596,9 @@ be destroyed as many times with sam_hdr_destroy api.
 ### Index
 
 Indices need the data to be sorted by position.  They can be of different
-types with extension .bai, .csi or .tbi for compressed SAM/BAM files and .crai
-for CRAM files.  The index name can be passed along with the alignment file
-itself by appending a specific character sequence. The apis can detect this
+types with extension .bai, .csi or .tbi for compressed SAM/BAM/VCF files and
+.crai for CRAM files.  The index name can be passed along with the alignment
+file itself by appending a specific character sequence. The apis can detect this
 sequence and extract the index path. ##idx## is the sequence which separates
 the file path and index path.
 
diff --git a/samples/Makefile b/samples/Makefile
index 1c3e2caac..4821b9dd7 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -13,7 +13,7 @@ LDFLAGS = $(HTS_DIR)/libhts.a -L$(HTS_DIR) $(HTSLIB_static_LDFLAGS) $(HTSLIB_sta
 PRGS = flags split split2 cram read_fast read_header read_ref read_bam \
 	read_aux dump_aux add_header rem_header update_header mod_bam mod_aux \
 	mod_aux_ba write_fast idx_on_write read_reg read_multireg pileup \
-	mpileup modstate pileup_mod flags_field split_t1 split_t2 qtask read_tbx\
+	mpileup modstate pileup_mod flags_field split_t1 split_t2 qtask \
 	read_fast_i
 
 all: $(PRGS)
@@ -78,9 +78,6 @@ read_reg:
 read_multireg:
 	$(CC) $(CFLAGS) -I $(HTS_DIR) index_multireg_read.c -o $@  $(LDFLAGS)
 
-read_tbx:
-	$(CC) $(CFLAGS) -I $(HTS_DIR) read_with_tabix.c -o $@  $(LDFLAGS)
-
 read_fast_i:
 	$(CC) $(CFLAGS) -I $(HTS_DIR) read_fast_index.c -o $@  $(LDFLAGS)
 
diff --git a/samples/README.md b/samples/README.md
index 34ed35eff..6043847d4 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -129,8 +129,8 @@ indexed.
 
 [Write_fast][Write_fast]
 
-  This application showcases the fasta/fastq data write. It appends a dummy
-  data to given file and creates an index file for it.
+  This application showcases the fasta/fastq data write. It appends given data
+  file and creates an index file for it.
 
 [Index_write][Index_write]
 
@@ -152,12 +152,6 @@ indexed.
   This application showcases the fasta/fastq data read using index. It takes a
   region (reference name[:start-end]) and gets data from that region.
 
-[Read_tbx][Read_tbx]
-
-  This application showcases the tabix index usage with sam files. It takes a
-  shift value and region (refname[:start-end]) and gets data from that region.
-  The shift will be used to create index file if not present already.
-    
 [Pileup][Pileup]:
 
   This application showcases the pileup api, where all alignments covering a
@@ -192,8 +186,7 @@ indexed.
 
   This application showcases the use of threads in file handling. It saves
   the read1 and read2 as separate files in given directory, one as sam and
-  other as bam. 2 threads are used for read and 1 each dedicated for each
-  output file.
+  other as bam. 1 thread is used for read, 1 for sam write and 2 for bam write.
 
 [Split_thread2][Split_thread2]
 
diff --git a/samples/qtask_thread.c b/samples/qtask_thread.c
index 6c48d814d..558a90c86 100644
--- a/samples/qtask_thread.c
+++ b/samples/qtask_thread.c
@@ -111,6 +111,8 @@ void *thread_unordered_proc(void *args)
     }
     pthread_mutex_unlock(tdata->lock);
 
+    bam_destroy1(tdata->bamdata);
+    free(tdata);
     return NULL;
 }
 
@@ -180,7 +182,8 @@ int main(int argc, char *argv[])
 {
     const char *inname = NULL, *outdir = NULL;
     char *file1 = NULL, *file2 = NULL;
-    int c = 0, ret = EXIT_FAILURE, size = 0, cnt = 0;
+    int c = 0, ret = EXIT_FAILURE, cnt = 0;
+    size_t size = 0;
     samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL;
     sam_hdr_t *in_samhdr = NULL;
     bam1_t *bamdata = NULL;
@@ -205,7 +208,7 @@ int main(int argc, char *argv[])
     }
 
     //allocate space for output
-    size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination
+    size = (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination
     file1 = malloc(size); file2 = malloc(size);
     if (!file1 || !file2) {
         printf("Failed to set output path\n");
@@ -226,7 +229,8 @@ int main(int argc, char *argv[])
         goto end;
     }
     //queue to use with thread pool, no queuing of results for Q1 and results queued for Q2
-    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) || !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
+    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) ||
+        !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
         printf("Failed to create queue\n");
         goto end;
     }
@@ -237,7 +241,8 @@ int main(int argc, char *argv[])
     }
 
     //open output files - w write as SAM, wb  write as BAM
-    outfile1 = sam_open(file1, "w"); outfile2 = sam_open(file2, "wb");
+    outfile1 = sam_open(file1, "w");
+    outfile2 = sam_open(file2, "wb");
     if (!outfile1 || !outfile2) {
         printf("Could not open output file\n");
         goto end;
@@ -248,14 +253,18 @@ int main(int argc, char *argv[])
         goto end;
     }
     //start output writer thread for ordered processing
-    twritedata.outfile = outfile2; twritedata.queue = queue2; twritedata.done = &stopcond; twritedata.lock = &stopcondsynch;
+    twritedata.outfile = outfile2;
+    twritedata.queue = queue2;
+    twritedata.done = &stopcond;
+    twritedata.lock = &stopcondsynch;
     twritedata.samhdr = in_samhdr;
     if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
         printf("Failed to create writer thread\n");
         goto end;
     }
     //write header
-    if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) {
+    if ((sam_hdr_write(outfile1, in_samhdr) == -1) ||
+        (sam_hdr_write(outfile2, in_samhdr) == -1)) {
         printf("Failed to write header\n");
         goto end;
     }
@@ -263,9 +272,12 @@ int main(int argc, char *argv[])
     while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
         if (bamdata->core.flag & BAM_FREAD1) {
             //read1, scheduling for unordered output, using Q1; processing occurs in order of thread execution
-            td_unordered *tdata = calloc(1, sizeof(td_unordered));
-            tdata->bamdata = bamdata; tdata->outfile = outfile1; tdata->lock = &filesynch; tdata->samhdr = in_samhdr;
-            if (hts_tpool_dispatch3(pool, queue1, thread_unordered_proc, tdata, (void (*)(void *))&free, (void (*)(void*))&bam_destroy1, 0) == -1) {
+            td_unordered *tdata = malloc(sizeof(td_unordered));
+            tdata->bamdata = bamdata;
+            tdata->outfile = outfile1;
+            tdata->lock = &filesynch;
+            tdata->samhdr = in_samhdr;
+            if (hts_tpool_dispatch(pool, queue1, thread_unordered_proc, tdata) == -1) {
                 printf("Failed to schedule unordered processing\n");
                 goto end;
             }
@@ -273,7 +285,7 @@ int main(int argc, char *argv[])
         else if (bamdata->core.flag & BAM_FREAD2) {
             //read2, scheduling for ordered output, using Q2; proces and result queueing occurs in order of thread execution and result retrieval
             //showcases the threaded execution and ordered result handling on read2
-            if (hts_tpool_dispatch3(pool, queue2, thread_ordered_proc, bamdata, NULL, (void (*)(void*))&bam_destroy1, 0) == -1) {
+            if (hts_tpool_dispatch(pool, queue2, thread_ordered_proc, bamdata) == -1) {
                 printf("Failed to schedule ordered processing\n");
                 goto end;
             }
@@ -303,7 +315,7 @@ int main(int argc, char *argv[])
         pthread_cond_signal(twritedata.done);
         pthread_mutex_unlock(twritedata.lock);
         pthread_join(thread, NULL);
-        thread = NULL;
+        thread = 0;
     }
 end:
     //cleanup
diff --git a/samples/read_fast.c b/samples/read_fast.c
index f74b25515..dc24a1167 100644
--- a/samples/read_fast.c
+++ b/samples/read_fast.c
@@ -83,6 +83,8 @@ int main(int argc, char *argv[])
 
     //read data
     while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
+        printf("\nname: ");
+        printf("%s", bam_get_qname(bamdata));
         printf("\nsequence: ");
         for (c = 0; c < bamdata->core.l_qseq; ++c) {
             printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]);
@@ -94,6 +96,7 @@ int main(int argc, char *argv[])
             }
         }
     }
+    printf("\n");
     if (c != -1) {
         //error
         printf("Failed to get data\n");
diff --git a/samples/read_with_tabix.c b/samples/read_with_tabix.c
deleted file mode 100644
index de43cb342..000000000
--- a/samples/read_with_tabix.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*  read_with_tabix.c --  showcases the htslib api usage
-
-    Copyright (C) 2023 Genome Research Ltd.
-
-    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE
-
-*/
-
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
-
-#include <getopt.h>
-#include <unistd.h>
-#include <htslib/sam.h>
-#include <htslib/tbx.h>
-
-/// print_usage - show usage
-/** @param fp pointer to the file / terminal to which usage to be dumped
-returns nothing
-*/
-static void print_usage(FILE *fp)
-{
-    fprintf(fp, "Usage: read_with_tbx <infile> <shift> <region>\n\
-Reads sam.gz file using tabix index.\n");
-    return;
-}
-
-/// main_demo - start of the demo
-/** @param argc - count of arguments
- *  @param argv - pointer to array of arguments
-returns 1 on failure 0 on success
-*/
-int main(int argc, char *argv[])
-{
-    const char *inname = NULL, *region = NULL;
-    int ret = EXIT_FAILURE, built = 0, shift = 0, c = 0;
-    tbx_t *idx = NULL;
-    hts_itr_t *iter = NULL;
-    samFile *infile = NULL;
-    kstring_t data = KS_INITIALIZE;
-
-    //read_with_tbx infile shift region
-    if (argc != 4) {
-        print_usage(stdout);
-        goto end;
-    }
-    inname = argv[1];
-    shift = atoi(argv[2]);
-    region = argv[3];
-
-    //open file for read
-    if (!(infile = sam_open(inname, "r"))) {
-        printf("Failed to open file\n");
-        goto end;
-    }
-load:
-    //load index
-    if (!(idx = tbx_index_load3(inname, NULL, HTS_IDX_SILENT_FAIL))) {
-        if (!built) {                   //try to build index and then load
-            built = 1;                  //reset flag to avoid repeated attempt
-            if (tbx_index_build3(inname, NULL, shift, 1, &tbx_conf_sam) == -1) {
-                printf("Failed to create index\n");
-            }
-            else {
-                goto load;              //built, try again to load
-            }
-        /*tbx_conf_sam for compressed sam, tbx_conf_vcf for compressed vcf,
-        tbx_conf_bed for bed ...*/
-        }
-        printf("Failed to load index\n");
-        goto end;
-    }
-
-    //read using index and region
-    if (!(iter = tbx_itr_querys(idx, region))) {
-        printf("Failed to get iterator\n");
-        goto end;
-    }
-    while ((c = tbx_itr_next(infile, idx, iter, &data)) >= 0) {
-        printf("%s\n", data.s);
-    }
-    if (c != -1) {
-        printf("Failed to read all data\n");
-        goto end;
-    }
-    //tabix doesnt support multi region description but application can explicitly extract region
-    //from such queries and use on tbx_itr_querys
-    /*while (rem = hts_parse_region(..region.)) {
-        if(rem[-1] == ',') rem[-1]='\0';
-        iter = tbx_itr_querys(..region);
-        ....
-        region = rem;
-    }*/
-
-    ret = EXIT_SUCCESS;
-end:
-    //clean up
-    ks_free(&data);
-
-    if (idx) {
-        tbx_destroy(idx);
-    }
-    if (infile) {
-        sam_close(infile);
-    }
-    return ret;
-}
diff --git a/samples/split_thread1.c b/samples/split_thread1.c
index 40d2dfdc2..fd90e4b19 100644
--- a/samples/split_thread1.c
+++ b/samples/split_thread1.c
@@ -94,9 +94,9 @@ int main(int argc, char *argv[])
     }
 
     //create file specific threads
-    if (hts_set_opt(infile, HTS_OPT_NTHREADS, 2) < 0 ||     //2 thread specific for reading
+    if (hts_set_opt(infile, HTS_OPT_NTHREADS, 1) < 0 ||     //1 thread specific for reading
     hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 ||       //1 thread specific for sam write
-    hts_set_opt(outfile2, HTS_OPT_NTHREADS, 1) < 0) {       //1 thread specific for bam write
+    hts_set_opt(outfile2, HTS_OPT_NTHREADS, 2) < 0) {       //2 thread specific for bam write
         printf("Failed to set thread options\n");
         goto end;
     }
diff --git a/samples/write_fast.c b/samples/write_fast.c
index c8fb1d15a..5bfcd73f0 100644
--- a/samples/write_fast.c
+++ b/samples/write_fast.c
@@ -28,6 +28,7 @@ DEALINGS IN THE SOFTWARE
 
 #include <getopt.h>
 #include <unistd.h>
+#include <time.h>
 #include <htslib/sam.h>
 #include <htslib/faidx.h>
 
@@ -37,8 +38,8 @@ returns nothing
 */
 static void print_usage(FILE *fp)
 {
-    fprintf(fp, "Usage: write_fast <file>\n\
-Appends a fasta/fastq file and indexes it.\n");
+    fprintf(fp, "Usage: write_fast <file> <sequence> [<qualities]\n\
+Appends a fasta/fastq file.\n");
     return;
 }
 
@@ -55,12 +56,22 @@ int main(int argc, char *argv[])
     sam_hdr_t *out_samhdr = NULL;           //header of file
     bam1_t *bamdata = NULL;                 //to hold the read data
     char mode[4] = "a";
+    const char *data = NULL, *qual = NULL;  //ref data and quality
+    char name[256] = {0};
 
-    if (argc != 2) {
+    if (argc > 4 || argc < 3) {
         print_usage(stdout);
         goto end;
     }
     outname = argv[1];
+    data = argv[2];
+    if (argc == 4) {    //fastq data
+        qual = argv[3];
+        if (strlen(data) != strlen(qual)) {     //check for proper length of data and quality values
+            printf("Incorrect reference and quality data\n");
+            goto end;
+        }
+    }
 
     //initialize
     if (!(bamdata = bam_init1())) {
@@ -82,8 +93,9 @@ int main(int argc, char *argv[])
     sam_open_format(outname, mode, fmt);
     */
 
-    //dummy data
-    if (bam_set1(bamdata, sizeof("test"), "test", BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, 10, "AACTGACTGA", "1234567890", 0) < 0) {
+    snprintf(name, sizeof(name), "Test_%ld", time(NULL));
+    //data
+    if (bam_set1(bamdata, strlen(name), name, BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, strlen(data), data, qual, 0) < 0) {
         printf("Failed to set data\n");
         goto end;
     }

From a73aaeabc5c4f0184a03fe3bd5ed07a728c03117 Mon Sep 17 00:00:00 2001
From: vasudeva8 <vasudeva.sarma@sanger.ac.uk>
Date: Thu, 26 Oct 2023 17:07:09 +0100
Subject: [PATCH 3/9] update based on reviews

update with correction

fixed issues
---
 samples/DEMO.md               | 280 ++++++++++++++++----------
 samples/Makefile              |  14 +-
 samples/README.md             |  37 ++--
 samples/add_header.c          |   8 +-
 samples/cram.c                |   6 +-
 samples/dump_aux.c            |   8 +-
 samples/flags_demo.c          |   6 +-
 samples/flags_htsopt_field.c  |   6 +-
 samples/index_fasta.c         |  72 +++++++
 samples/index_multireg_read.c |   6 +-
 samples/index_reg_read.c      |   8 +-
 samples/index_write.c         |   6 +-
 samples/mod_aux.c             |   6 +-
 samples/mod_aux_ba.c          |   6 +-
 samples/mod_bam.c             |   6 +-
 samples/modstate.c            |   6 +-
 samples/mpileup.c             |   6 +-
 samples/pileup.c              |   6 +-
 samples/pileup_mod.c          |   6 +-
 samples/qtask_ordered.c       | 367 ++++++++++++++++++++++++++++++++++
 samples/qtask_thread.c        | 356 ---------------------------------
 samples/qtask_unordered.c     | 311 ++++++++++++++++++++++++++++
 samples/read_aux.c            |  10 +-
 samples/read_bam.c            |   6 +-
 samples/read_fast.c           |   6 +-
 samples/read_fast_index.c     |   2 +-
 samples/read_header.c         |   6 +-
 samples/read_refname.c        |   6 +-
 samples/rem_header.c          |   8 +-
 samples/sample.bed            |   4 +
 samples/sample.sam            |   2 +-
 samples/split.c               |   6 +-
 samples/split2.c              |   8 +-
 samples/split_thread1.c       |   6 +-
 samples/split_thread2.c       |   6 +-
 samples/update_header.c       |   6 +-
 samples/write_fast.c          |  12 +-
 37 files changed, 1050 insertions(+), 577 deletions(-)
 create mode 100644 samples/index_fasta.c
 create mode 100644 samples/qtask_ordered.c
 delete mode 100644 samples/qtask_thread.c
 create mode 100644 samples/qtask_unordered.c
 create mode 100644 samples/sample.bed

diff --git a/samples/DEMO.md b/samples/DEMO.md
index 7be54b297..f2a281445 100644
--- a/samples/DEMO.md
+++ b/samples/DEMO.md
@@ -94,10 +94,13 @@ Index_write - This application showcases the creation of index along with
 output creation. Based on file type and shift, it creates bai, csi or crai
 files.
 
+Index_fast - This application showcases the index creation on fasta/fastq
+reference files.
+
 Read_reg - This application showcases the usage of region specification in
 alignment read.
 
-Read_multireg - This application showcases the usage of mulitple regionn
+Read_multireg - This application showcases the usage of multiple region
 specification in alignment read.
 
 Read_fast_index - This application showcases the fasta/fastq data read using
@@ -134,12 +137,15 @@ handling. It saves the read1 and read2 as separate files in given directory,
 one as sam and other as bam. A pool of 4 threads is created and shared for both
 read and write.
 
-Qtask_thread - This application shocases the use of mulitple queues, scheduling
-of different tasks and getting results in orderered and unordered fashion. It
-saves the read1 and read2 as separate files in given directory, one as sam and
-other as bam. The samfile/read1 can have unordered data and bamfile/read2 will
-have ordered data.
+Qtask_ordered - This application showcases the use of queues and threads for
+custom processing. In this, alignments in input file are updated with GC ratio
+on a custom aux tag. The processing may occur in any order and the result is
+retrieved in same order as it was queued and saved to disk.
 
+Qtask_unordered - This application showcases the use of queues and threads for
+custom processing. In this, alignments in input files are split to different
+files for requested regions. The processing may occur in any order and results
+may appear in the order completion of processing.
 
 ## Building the sample apps
 
@@ -182,7 +188,7 @@ sam_read1 api. samFile pointer, header and bam storage are to be passed as
 argument and it returns 0 on success, -1 on end of file and < -1 in case of
 errors.
 
-The bam storage has to be initialised using bam_init1 api before the call and
+The bam storage has to be initialized using bam_init1 api before the call and
 can be reused for successive reads. Once done, it needs to be destroyed using
 bam_destroy1.  The member field named core - bam1_core_t - in bam storage,
 bam1_t, has the sequence data in an easily accessible way. Using the fields
@@ -488,7 +494,7 @@ indexing the seq_nt16_str array.
         for (i = 0; i < bamdata->core.l_qseq ; ++i) {       //sequence length
             printf("%c", seq_nt16_str[bam_seqi(data, i)]);  //retrieves the base from (internal compressed) sequence data
         ...
-            printf("%c", bam_get_qual(bamdata)[i]+33);      //retrives the quality value
+            printf("%c", bam_get_qual(bamdata)[i]+33);      //retrieves the quality value
         ...
 Refer: read_bam.c
 
@@ -816,15 +822,16 @@ The sam_index_build2 api takes the index file path as well and gives more
 control than the previous one.  The sam_index_build3 api provides an option to
 configure the number of threads in index creation.
 
-Index for a fasta file / reference data can be created using fai_build3 api
-after save of the same. It takes the filename and creates index files, fai and
-gzi based on compression status of input file. When fai/gzi path are NULL, they
-are created along with input file.
+Index for reference data can be created using fai_build3 api. This creates
+index file with .fai extension. If the file is bgzip-ped, a .gzi file is
+created as well. It takes the path to input file and that of fai and gzi files.
+When fai/gzi path are NULL, they are created along with input file.
+These index files will be useful for reference data access.
 
     ...
-    if (fai_build3(outname, NULL, NULL) == -1) {
+    if (fai_build3(filename, NULL, NULL) == -1) {
     ...
-Refer: write_fast.c
+Refer: index_fast.c
 
 A tabix index can be created for compressed vcf/sam/bed and other data using
 tbx_index_build. It is mainly used with vcf and non-sam type files.
@@ -950,7 +957,7 @@ destroyed by the user itself.
 For fasta/fastq files, the index has to be loaded using fai_load3_format which
 takes the file, index file names and format. With single region specification
 fai_fetch64 can be used to get bases, and fai_fetchqual64 for quality in case
-of fastq data. With multiple region specification, with comma separattion,
+of fastq data. With multiple region specification, with comma separation,
 faidx_fetch_seq64 and faidx_fetch_qual64 does the job. Regions has to be parsed
 using fai_parse_region in case of multiregion specifications. fai_adjust_region
 is used to adjust the start-end points based on available data.
@@ -996,32 +1003,13 @@ Below excerpt shows fasta/q access with single and multiregions,
     ...
 Refer: read_fast_index.c
 
-Tabix index can be loaded using tbx_index_load. Data can be retrieved using
-iterators, tbx_itr_querys / tbx_itr_queryi. It need to be destroyed using
-tbx_destroy at the end.
-
-Below excerpt shows usage of tabix index,
-
-    ...
-        //load index
-        if (!(idx = tbx_index_load3(inname, NULL, HTS_IDX_SILENT_FAIL))) {
-    ...
-        //read using index and region
-        if (!(iter = tbx_itr_querys(idx, region))) {
-    ...
-        while ((c = tbx_itr_next(infile, idx, iter, &data)) >= 0) {
-    ...
-            tbx_destroy(idx);
-    ...
-Refer: read_with_tabix.c
-
 
 ### Pileup and MPileup
 
 Pileup shows the transposed view of the SAM alignment data, i.e. it shows the
-the reference positions and bases which cover that position through different
-reads side by side. MPileup facilitates the piling up of multiple sam files
-against each other and same reference at the same time.
+reference positions and bases which cover that position through different reads
+side by side. MPileup facilitates the piling up of multiple sam files against
+each other and same reference at the same time.
 
 Mpileup has replaced the pileup. The input expects the data to be sorted by
 position.
@@ -1397,9 +1385,10 @@ Refer: flags_htsopt_field.c
 
 The HTSLib api supports thread pooling for better performance. There are a few
 ways in which this can be used. The pool can be made specific for a file or a
-generic pool can be created and shared across multiple files. Another way to
-use thread pool is to schedule tasks explicitly to queues which gets executed
-using threads in pool.
+generic pool can be created and shared across multiple files. Thread pool can
+also be used to execute user defined tasks. The tasks are to be queued on a
+queue and threads in pool executes them and results can be queued back if
+required.
 
 The samples are trivial ones to showcase the usage of api and the number of
 threads to use has to be identified based on complexity and parallelism of the
@@ -1426,7 +1415,7 @@ Below excerpt shows file specific thread pool,
     }
 Refer: split_thread1.c
 
-Below excerpt shows a thread pool shared across files, 
+Below excerpt shows a thread pool shared across files,
 
     ...
     //create a pool of 4 threads
@@ -1446,80 +1435,155 @@ Refer: split_thread2.c
 Note that it is important to analyze the task in hand to decide the number of
 threads to be used. As an example, if the number of threads for reading is set
 to 2 and bam write to 1, keeping total number of threads the same, the
-performance may decrease as decoding is easier than encoding.
-
-Task to be performed on data can be scheduled to a queue and a thread pool can
-be associated to it. There can be multiple pools and queues(processes). There
-are 2 type of queues, one where the result is queued back and retrieved in
-ordered fashion and other where the result may be unordered based on processing
-speed and number of threads. Explicitly created threads can also be used along
-with hts thread pool.
-
-hts_tpool_process_init initializes the queue / process associates a queue with
-thread pool and reserves space for given number of tasks. It takes a parameter
-indicating whether it is for unordered processing or for ordered processing
-where it has to queue the result back. Usually 2 times the number of threads
-are reserved. hts_tpool_dispatch queues the data to the queue. If blocking /
-non-blocking control is required, hts_tpool_dispatch2 can be used instead. If
-hts_tpool_dispatch3 is used instead, it takes cleanup routines as well. The
-cleanup routines will be executed when hts_tpool_process_reset api is invoked,
-to reset the queue to have a restart. hts_tpool_process_flush can ensure that
-all the piledup tasks are processed, in case the queueing and processing happen
-at different speed. hts_tpool_process_shutdown api stops the processing of queue.
-
-Below excerpt shows the usage of 2 queues with a trivial sample, one where the
-processed result may be unordered - based on execution order - and writes as sam
-file. Other queue shows the processing and queueing back, for ordered retrieval
-and save as bam file,
-
-    ...
-    //thread pool
-    if (!(pool = hts_tpool_init(cnt))) {
-    ...
-    //queue to use with thread pool, no queuing of results for Q1 and results queued for Q2
-    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) || 
-        !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
-    ...
-    //open output files - w write as SAM, wb  write as BAM
-    outfile1 = sam_open(file1, "w");
-    outfile2 = sam_open(file2, "wb");
-    ...
-    //start output writer thread for ordered processing
-    twritedata.outfile = outfile2; twritedata.queue = queue2; twritedata.done = &stopcond; twritedata.lock = &stopcondsynch;
-    twritedata.samhdr = in_samhdr;
-    if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
-    ...
-    //check flags and schedule
-    while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
-        if (bamdata->core.flag & BAM_FREAD1) {
-            //read1, scheduling for unordered output, using Q1; processing occurs in order of thread execution
-            td_unordered *tdata = calloc(1, sizeof(td_unordered));
-            tdata->bamdata = bamdata; tdata->outfile = outfile1; tdata->lock = &filesynch; tdata->samhdr = in_samhdr;
-            if (hts_tpool_dispatch(pool, queue1, thread_unordered_proc, tdata) == -1) {
+performance may decrease as bam decoding is easier than encoding.
+
+Custom task / user defined functions can be performed on data using thread pool
+and for that, the task has to be scheduled to a queue. Thread pool associated
+with the queue will perform the task. There can be multiple pools and queues.
+The order of execution of threads are decided based on many factors and load on
+each task may vary, so the completion of the tasks may not be in the order of
+their queueing. The queues can be used in two different ways, one where the
+result is enqueued to queue again to be read in same order as initial queueing,
+two where the resuls is not enqueued and is readily available, possibly in a
+different order than initial queueing. Explicitly created threads can also be
+used along with hts thread pool usage.
+
+hts_tpool_process_init initializes the queue / process, associates a queue with
+thread pool and reserves space for given number of tasks on queue. It takes a
+parameter indicating whether the result need to be enqueued for retrieval or
+not. If the result is enqueued, it is retrieved in the order of scheduling of
+task. Another parameter sets the maximum number of slots for tasks in queue,
+usually 2 times the number of threads are used. The input and output have their
+own queues and they grow as required upto the max set. hts_tpool_dispatch api
+enqueues the task to the queue. The api blocks when there is no space in queue.
+This behavior can be controlled with hts_tpool_dispatch2 api. The queue can be
+reset using hts_tpool_process_reset api where all tasks are discarded. The api
+hts_tpool_dispatch3 supports configuring cleanup routines which are to be run
+when reset occurs on the queue. hts_tpool_process_flush api can ensure that
+all the piled up tasks are processed, a possible case when the queueing and
+processing happen at different speeds. hts_tpool_process_shutdown api stops the
+processing of queue.
+
+The order of execution of tasks depends on the number of threads involved and
+how the threads are scheduled by operating system. When the results are enqueued
+back to queue, they are read in same order of enqueueing of task and in that
+case the order of execution will not be noticed. When the results are not
+enqueued the results are available right away and the order of execution may be
+noticeable. Based on the nature of task and need of order maintenance, users
+can select either of the queueing.
+
+Below excerpts shows the usage of queues and threads in both cases. In the 1st,
+alignments are updated with an aux tag indicating GC ratio. The order of data
+has to be maintained even after update, hence the result queueing is used to
+ensure same order as initial. A number of alignments are bunched together for
+optimal processing.
+    ...
+    void *thread_ordered_proc(void *args)
+    {
+    ...
+        for ( i = 0; i < bamdata->count; ++i) {
+            //add count
+            if (addcount(bamdata->bamarray[i]) < 0) {
+    ...
+        return bamdata;
+    ...
+    void *threadfn_orderedwrite(void *args)
+    {
+        ...
+        do {
+            //get result and write; wait if no result is in queue - until shutdown of queue
+            while ((r = hts_tpool_next_result(tdata->queue))) {
+                bamdata = (data*) hts_tpool_result_data(r);
+                for (i = 0; i < bamdata->count; ++i) {
+                    if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) {
+        ...
+                hts_tpool_delete_result(r, 0);          //release the result memory
+        ..
+        } while (pthread_cond_timedwait(tdata->done, tdata->lock, &timeout) == ETIMEDOUT);
+    ...
+    int main(int argc, char *argv[])
+    {
+    ...
+        if (!(pool = hts_tpool_init(cnt))) {                //thread pool
+    ...
+        //queue to use with thread pool, for task and results
+        if (!(queue = hts_tpool_process_init(pool, cnt * 2, 0))) {
+    ...
+        if (!(infile = sam_open(inname, "r"))) {
     ...
-        else if (bamdata->core.flag & BAM_FREAD2) {
-            //read2, scheduling for ordered output, using Q2; proces and result queueing occurs in order of thread execution and result retrieval
-            //showcases the threaded execution and ordered result handling on read2
-            if (hts_tpool_dispatch(pool, queue2, thread_ordered_proc, bamdata) == -1) {
+        if (!(outfile = sam_open(file, "w"))) {
     ...
-        //EOF read
-        //clear the queues of any tasks; NOTE: will be blocked until queue is cleared
-        if (hts_tpool_process_flush(queue1) == -1 || hts_tpool_process_flush(queue2) == -1) {
+        //start output writer thread for ordered processing
+        if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
     ...
-    //trigger exit for ordered write thread
-    if (thread) {
-        //shutown queues to exit the result wait
-        hts_tpool_process_shutdown(queue1);
-        hts_tpool_process_shutdown(queue2);
+        while (c >= 0) {
+            bamdata = getbamstorage(chunk);
+            //read alignments, upto max size for this lot
+            for (cnt = 0; cnt < bamdata->size; ++cnt) {
     ...
-    if (queue1) {
-        hts_tpool_process_destroy(queue1);
+                //queue the lot for processing
+                if (hts_tpool_dispatch(pool, queue, thread_ordered_proc,
+                    bamdata) == -1) {
     ...
-    if (thread) {
-        pthread_join(thread, NULL);
+        if (-1 == c) {
+            //EOF read, trigger processing for anything pending, NOTE: will be blocked until queue is cleared
+            if (hts_tpool_process_flush(queue) == -1) {
     ...
-Refer: qtask_thread.c
+        usleep(WAIT * 1000000);                     //to ensure result check is done atleast once after processing
+        //trigger exit for ordered write thread
+        pthread_cond_signal(twritedata.done);
+    ...
+        //shutdown queues to exit the result wait
+        hts_tpool_process_shutdown(queue);
+    ...
+Refer: qtask_ordered.c
 
+In this 2nd, the alignments from input are split to different files containing
+alignment specific to given regions. The order in which the output files are
+created doesnt matter and hence no result queueing is used.
+    ...
+    void * splittoregions(void *args)
+    {
+    ...
+        if (!(infile = sam_open(data->infile, "r"))) {
+    ...
+        if (!(in_samhdr = sam_hdr_read(infile))) {
+    ...
+        if (!(bamdata = bam_init1())) {
+    ...
+        if (!(idx = sam_index_load(infile, data->infile))) {
+    ...
+        if (!(iter = sam_itr_querys(idx, in_samhdr, region))) {
+    ...
+        if (!(outfile = sam_open(file, "w"))) {
+    ...
+        if (sam_hdr_write(outfile, in_samhdr) < 0) {
+    ...
+        while ((ret = sam_itr_next(infile, iter, bamdata)) >= 0) {  //read and write relevant data
+            if (sam_write1(outfile, in_samhdr, bamdata) < 0) {
+    ...
+    int main(int argc, char *argv[])
+    {
+    ...
+        //get regions from bedfile
+        if ((regcnt = readbedfile(bedfile, &regions)) < 0) {
+    ...
+        if (!(pool = hts_tpool_init(cnt))) {                //thread pool
+    ...
+        if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1))) {
+    ...
+        for (c = 0; c < regcnt; ++c) {
+    ...
+            //schedule jobs to run in parallel
+            if (hts_tpool_dispatch(pool, queue, splittoregions, task) < 0) {
+    ...
+        //trigger processing for anything pending, NOTE: will be blocked until queue is cleared
+        if (hts_tpool_process_flush(queue) == -1) {
+    ...
+        //shutdown queues to exit the result wait
+        hts_tpool_process_shutdown(queue);
+    ...
+Refer: qtask_unordered.c
 
 ## More Information
 
diff --git a/samples/Makefile b/samples/Makefile
index 4821b9dd7..9c51e8edc 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -13,8 +13,8 @@ LDFLAGS = $(HTS_DIR)/libhts.a -L$(HTS_DIR) $(HTSLIB_static_LDFLAGS) $(HTSLIB_sta
 PRGS = flags split split2 cram read_fast read_header read_ref read_bam \
 	read_aux dump_aux add_header rem_header update_header mod_bam mod_aux \
 	mod_aux_ba write_fast idx_on_write read_reg read_multireg pileup \
-	mpileup modstate pileup_mod flags_field split_t1 split_t2 qtask \
-	read_fast_i
+	mpileup modstate pileup_mod flags_field split_t1 split_t2 \
+	read_fast_i qtask_ordered qtask_unordered index_fasta
 
 all: $(PRGS)
 
@@ -102,8 +102,14 @@ split_t1:
 split_t2:
 	$(CC) $(CFLAGS) -I $(HTS_DIR) split_thread2.c -o $@  $(LDFLAGS)
 
-qtask:
-	$(CC) $(CFLAGS) -I $(HTS_DIR) qtask_thread.c -o $@  $(LDFLAGS)
+index_fasta:
+	$(CC) $(CFLAGS) -I $(HTS_DIR) index_fasta.c -o $@  $(LDFLAGS)
+
+qtask_ordered:
+	$(CC) $(CFLAGS) -I $(HTS_DIR) qtask_ordered.c -o $@  $(LDFLAGS)
+
+qtask_unordered:
+	$(CC) $(CFLAGS) -I $(HTS_DIR) qtask_unordered.c -o $@  $(LDFLAGS)
 
 clean:
 	find . -name "*.o" | xargs rm -rf
diff --git a/samples/README.md b/samples/README.md
index 6043847d4..6ede74007 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -4,7 +4,7 @@ data, and is the core library used by [samtools][2] and [bcftools][3].
 
 A set of sample programs are available which showcases the usage of APIs in HTSlib.
 They are based on version 1.17 of HTSLib and are mainly for demonstration of API usage.
-Further optimization and error handling might be required for actual usage.
+Further optimisation and error handling might be required for actual usage.
 
 
 [1]: http://samtools.github.io/hts-specs/
@@ -72,7 +72,7 @@ indexed.
 [Read_ref][Read_ref]
 
   This application showcases the read and access of header data. It shows
-  all reference names which has length equal or greather to given input.
+  all reference names which has length equal or greater to given input.
 
 [Read_bam][Read_bam]
 
@@ -129,14 +129,18 @@ indexed.
 
 [Write_fast][Write_fast]
 
-  This application showcases the fasta/fastq data write. It appends given data
-  file and creates an index file for it.
+  This application showcases the fasta/fastq data write. It appends data on
+  given file.
 
 [Index_write][Index_write]
 
   This application showcases the creation of index along with output
   creation. Based on file type and shift, it creates bai, csi or crai files.
 
+[Index_fast][Index_fast]
+
+  This application showcases index creation on fasta/fastq reference data.
+
 [Read_reg][Read_reg]:
 
   This application showcases the usage of region specification in alignment
@@ -144,7 +148,7 @@ indexed.
 
 [Read_multireg][Read_multireg]:
 
-  This application showcases the usage of mulitple region specification in
+  This application showcases the usage of multiple region specification in
   alignment read.
 
 [Read_fast_index][Read_fast_index]
@@ -195,13 +199,19 @@ indexed.
   and other as bam. A pool of 4 threads is created and shared for both read
   and write.
 
-[Qtask_thread][Qtask_thread]
+[Qtask_ordered][Qtask_ordered]
+
+  This application showcases the use of queues and threads for custom
+  processing. In this, alignments in input file are updated with GC ratio on a
+  custom aux tag. The processing may occur in any order and the result is
+  retrieved in same order as it was queued and saved to disk.
+
+[Qtask_unordered][Qtask_unordered]
 
-  This application shocases the use of mulitple queues, scheduling of different
-  tasks and getting results in orderered and unordered fashion. It saves the
-  read1 and read2 as separate files in given directory, one as sam and other
-  as bam. The samfile/read1 can have unordered data and bamfile/read2 will have
-  ordered data.
+  This application showcases the use of queues and threads for custom
+  processing. In this, alignments in input files are split to different files
+  for requested regions. The processing may occur in any order and results may
+  appear in the order completion of processing.
 
 ### More Information
 
@@ -227,10 +237,10 @@ examples per demonstration tool.
 [Mod_aux_ba]: mod_aux_ba.c
 [Write_fast]: write_fast.c
 [Index_write]: index_write.c
+[Index_fasta]: index_fasta.c
 [Read_reg]: index_reg_read.c
 [Read_multireg]: index_multireg_read.c
 [Read_fast_index]: read_fast_index.c
-[Read_tbx]: read_with_tabix.c
 [Pileup]: pileup.c
 [Mpileup]: mpileup.c
 [Modstate]: modstate.c
@@ -238,5 +248,6 @@ examples per demonstration tool.
 [Flags_field]: flags_htsopt_field.c
 [Split_thread1]: split_thread1.c
 [Split_thread2]: split_thread2.c
-[Qtask_thread]: qtask_thread.c
+[Qtask_ordered]: qtask_ordered.c
+[Qtask_unordered]: qtask_unordered.c
 [DEMO]: DEMO.md
diff --git a/samples/add_header.c b/samples/add_header.c
index d1a2fc13c..066b1d438 100644
--- a/samples/add_header.c
+++ b/samples/add_header.c
@@ -24,20 +24,20 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
 {
     fprintf(fp, "Usage: add_header infile\n\
-Adds new header lines of SQ, RG, PG and CO typs\n");
+Adds new header lines of SQ, RG, PG and CO types\n");
     return;
 }
 
diff --git a/samples/cram.c b/samples/cram.c
index 5f55e65d2..7b1342377 100644
--- a/samples/cram.c
+++ b/samples/cram.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/dump_aux.c b/samples/dump_aux.c
index 49251fe04..3caa16027 100644
--- a/samples/dump_aux.c
+++ b/samples/dump_aux.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
@@ -92,7 +92,7 @@ int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data)
         fprintf(fp, "%c", auxBType);
         for (i = 0; i < auxBcnt; ++i) {                                                     //iterate the array
             fprintf(fp, ",");
-            //calling recurssively  with index to reuse a few lines
+            //calling recursively  with index to reuse a few lines
             if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) {
                 return EXIT_FAILURE;
             }
diff --git a/samples/flags_demo.c b/samples/flags_demo.c
index e03fc6cd8..ac26be86c 100644
--- a/samples/flags_demo.c
+++ b/samples/flags_demo.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - show flags_demo usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/flags_htsopt_field.c b/samples/flags_htsopt_field.c
index 4b64445e3..40a0affc4 100644
--- a/samples/flags_htsopt_field.c
+++ b/samples/flags_htsopt_field.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - show flags_demo usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/index_fasta.c b/samples/index_fasta.c
new file mode 100644
index 000000000..ba0489094
--- /dev/null
+++ b/samples/index_fasta.c
@@ -0,0 +1,72 @@
+/*  index_fasta.c --  showcases the htslib api usage
+
+    Copyright (C) 2024 Genome Research Ltd.
+
+    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+
+*/
+
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
+
+#include <getopt.h>
+#include <unistd.h>
+#include <time.h>
+#include <htslib/sam.h>
+#include <htslib/faidx.h>
+
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
+returns nothing
+*/
+static void print_usage(FILE *fp)
+{
+    fprintf(fp, "Usage: index_fasta <file>\n\
+Indexes a fasta/fastq file and saves along with source.\n");
+    return;
+}
+
+/// main - indexes fasta/fastq file
+/** @param argc - count of arguments
+ *  @param argv - pointer to array of arguments
+returns 1 on failure 0 on success
+*/
+int main(int argc, char *argv[])
+{
+    const char *filename = NULL;             //file name
+    int ret = EXIT_FAILURE;
+
+    if (argc != 2) {
+        print_usage(stdout);
+        goto end;
+    }
+    filename = argv[1];
+
+    // index the file
+    if (fai_build3(filename, NULL, NULL) == -1) {
+        printf("Indexing failed with %d\n", errno);
+        goto end;
+    }
+    //this creates an .fai file. If the file is bgzipped, a .gzi file will be created along with .fai
+    ret = EXIT_SUCCESS;
+end:
+    //clean up
+    return ret;
+}
diff --git a/samples/index_multireg_read.c b/samples/index_multireg_read.c
index dbe8f15f9..7bb864990 100644
--- a/samples/index_multireg_read.c
+++ b/samples/index_multireg_read.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the print_usage
-/** @param fp pointer to the file / terminal to which print_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/index_reg_read.c b/samples/index_reg_read.c
index 346d5428f..dec684933 100644
--- a/samples/index_reg_read.c
+++ b/samples/index_reg_read.c
@@ -24,19 +24,19 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the print_usage
-/** @param fp pointer to the file / terminal to which print_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
 {
-    fprintf(fp, "Usage: readreg infile idxfile region\n\
+    fprintf(fp, "Usage: read_reg infile idxfile region\n\
 Reads alignments matching to a specific region\n\
 \\. from start of file\n\
 \\* only unmapped reads\n\
diff --git a/samples/index_write.c b/samples/index_write.c
index 8fd2bc968..9ec63d4ad 100644
--- a/samples/index_write.c
+++ b/samples/index_write.c
@@ -24,15 +24,15 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <libgen.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/mod_aux.c b/samples/mod_aux.c
index d5ed18cde..da6a4b08d 100644
--- a/samples/mod_aux.c
+++ b/samples/mod_aux.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/mod_aux_ba.c b/samples/mod_aux_ba.c
index 8ef90ee1e..836a3d39c 100644
--- a/samples/mod_aux_ba.c
+++ b/samples/mod_aux_ba.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/mod_bam.c b/samples/mod_bam.c
index 9f1eb324e..4dd2f31df 100644
--- a/samples/mod_bam.c
+++ b/samples/mod_bam.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/modstate.c b/samples/modstate.c
index 976391684..4d5f67635 100644
--- a/samples/modstate.c
+++ b/samples/modstate.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/mpileup.c b/samples/mpileup.c
index fe933748e..ecab70584 100644
--- a/samples/mpileup.c
+++ b/samples/mpileup.c
@@ -24,15 +24,15 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <ctype.h>
 #include <htslib/sam.h>
 
-/// print_usage - show flags_demo usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/pileup.c b/samples/pileup.c
index 11e2fb02f..be7aad801 100644
--- a/samples/pileup.c
+++ b/samples/pileup.c
@@ -24,15 +24,15 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <ctype.h>
 #include <htslib/sam.h>
 
-/// print_usage - show flags_demo usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/pileup_mod.c b/samples/pileup_mod.c
index 24d6cf539..81ac5a540 100644
--- a/samples/pileup_mod.c
+++ b/samples/pileup_mod.c
@@ -24,15 +24,15 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <ctype.h>
 #include <htslib/sam.h>
 
-/// print_usage - show flags_demo usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/qtask_ordered.c b/samples/qtask_ordered.c
new file mode 100644
index 000000000..f080209bc
--- /dev/null
+++ b/samples/qtask_ordered.c
@@ -0,0 +1,367 @@
+/*  qtask_ordered.c --  showcases the htslib api usage
+
+    Copyright (C) 2024 Genome Research Ltd.
+
+    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+
+*/
+
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
+
+#include <getopt.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <htslib/sam.h>
+#include <htslib/thread_pool.h>
+
+typedef struct orderedwrite {
+    samFile *outfile;           //output file handle
+    sam_hdr_t *samhdr;          //header used to write data
+    hts_tpool_process *queue;   //queue from which results to be retrieved
+
+    pthread_cond_t *done;       //indicates exit condition
+    pthread_mutex_t *lock;      //to synchronise queue access
+} orderedwrite;
+
+typedef struct data {
+    int count;                  //used up size
+    int size;                   //max size
+    bam1_t **bamarray;          //bam1_t array for optimal queueing
+} data;
+
+#define WAIT 1                  //1 sec
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
+returns nothing
+*/
+static void print_usage(FILE *fp)
+{
+    fprintf(fp, "Usage: qtask_ordered infile threadcount outdir [chunksize]\n\
+Calculates GC ratio - sum(G,C) / sum(A,T,C,G,N) - and adds to each alignment\n\
+as xr:f aux tag. Output is saved in outdir.\n\
+chunksize [100] sets the number of alignments clubbed together to process.\n");
+    return;
+}
+
+/// addcount - calculates and adds the aux tag
+/** @param bamdata pointer to bam data
+returns 0 on success and -1 for failure
+*/
+int addcount(bam1_t *bamdata)
+{
+    int pos = 0, gc = 0, ret = 0;
+    float gcratio = 0;
+    uint8_t *data = bam_get_seq(bamdata);
+    for (pos = 0; pos < bamdata->core.l_qseq; ++pos) {
+        switch(seq_nt16_str[bam_seqi(data, pos)]) {
+            case 'G':   //fall through
+            case 'C':
+                gc++;
+            break;
+        }
+    }
+    gcratio = gc / (float) bamdata->core.l_qseq;
+
+    if (bam_aux_append(bamdata, "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
+        printf("Failed to add aux tag xr, errno: %d\n", errno);
+        ret = -1;
+    }
+    return ret;
+}
+
+
+/// getbamstorage - allocates storage for alignments to queue
+/** @param chunk no of alignments to be queued together
+returns allocated data.
+*/
+data* getbamstorage(int chunk)
+{
+    int i = 0;
+    data *bamdata = malloc(sizeof(data));
+    if (!bamdata) {
+        return NULL;
+    }
+    bamdata->bamarray = malloc(chunk * sizeof(bam1_t*));
+    if (!bamdata->bamarray) {
+        return NULL;
+    }
+    for (i = 0; i < chunk; ++i) {
+        bamdata->bamarray[i] = bam_init1();
+    }
+    bamdata->count = 0;
+    bamdata->size = chunk;
+
+    return bamdata;
+}
+
+/// thread_ordered_proc - does the processing of task in queue and queues the output back
+/** @param args pointer to set of data to be processed
+returns the processed data
+the processing could be in any order based on the number of threads in use but read of output
+from queue will be in order
+*/
+void *thread_ordered_proc(void *args)
+{
+    int i = 0;
+    data *bamdata = (data*)args;
+    for ( i = 0; i < bamdata->count; ++i) {
+        //add count
+        if (addcount(bamdata->bamarray[i]) < 0) {
+            printf("Failed to calculate gc data\n");
+            break;
+        }
+    }
+    return bamdata;
+}
+
+/// threadfn_orderedwrite - thread that read the output from queue and writes
+/** @param args pointer to data specific for the thread
+returns NULL
+*/
+void *threadfn_orderedwrite(void *args)
+{
+    orderedwrite *tdata = (orderedwrite*)args;
+    hts_tpool_result *r = NULL;
+    data *bamdata = NULL;
+    int i = 0;
+
+    struct timeval now;
+    struct timespec timeout;
+    long usec = 0;
+
+    pthread_mutex_lock(tdata->lock);                //lock to check the exit condition
+    do {
+        //get result and write; wait if no result is in queue - until shutdown of queue
+        while ((r = hts_tpool_next_result(tdata->queue))) {
+            bamdata = (data*) hts_tpool_result_data(r);
+            for (i = 0; i < bamdata->count; ++i) {
+                if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) {
+                    printf("Failed to write output data\n");
+                    break;
+                }
+            }
+            hts_tpool_delete_result(r, 0);          //release the result memory
+            if (bamdata) {
+                for (i = 0; i < bamdata->size; ++i) {
+                    bam_destroy1(bamdata->bamarray[i]); //clear the bamdata;
+                }
+                free(bamdata->bamarray);
+                free(bamdata);
+            }
+        }
+        //no more data in queues, check and wait till exit is triggered
+        gettimeofday(&now, NULL);
+        usec = now.tv_usec + 100000;    //+100msec
+        if (usec >= 1000000) {          //overflow
+            usec %= 1000000;
+            now.tv_sec++;
+        }
+        timeout.tv_sec = now.tv_sec;
+        timeout.tv_nsec = usec * 1000;
+
+    } while (pthread_cond_timedwait(tdata->done, tdata->lock, &timeout) == ETIMEDOUT);
+
+    pthread_mutex_unlock(tdata->lock);
+
+    return NULL;
+}
+
+/// main_demo - start of the demo
+/** @param argc - count of arguments
+ *  @param argv - pointer to array of arguments
+returns 1 on failure 0 on success
+*/
+int main(int argc, char *argv[])
+{
+    const char *inname = NULL, *outdir = NULL;
+    char *file = NULL;
+    int c = 0, ret = EXIT_FAILURE, cnt = 0, clearthread = 0, chunk = 0;
+    size_t size = 0;
+    samFile *infile = NULL, *outfile = NULL;
+    sam_hdr_t *in_samhdr = NULL;
+    pthread_mutex_t stopcondsynch = PTHREAD_MUTEX_INITIALIZER;
+    pthread_cond_t stopcond = PTHREAD_COND_INITIALIZER;
+    pthread_t thread;
+    orderedwrite twritedata = {0};
+    hts_tpool *pool = NULL;
+    hts_tpool_process *queue = NULL;
+    htsThreadPool tpool = {NULL, 0};
+    data *bamdata = NULL;
+
+    //qtask infile threadcount outdir [chunksize]
+    if (argc != 4 && argc != 5) {
+        print_usage(stdout);
+        goto end;
+    }
+    inname = argv[1];
+    cnt = atoi(argv[2]);
+    outdir = argv[3];
+    if (argc == 5) {
+        chunk = atoi(argv[4]);
+    }
+
+    if (cnt < 1) {
+        cnt = 1;
+    }
+    if (chunk < 1) {
+        chunk = 10000;
+    }
+
+    //allocate space for output
+    size = (strlen(outdir) + sizeof("/out.sam") + 1); //space for output file name and null termination
+    file = malloc(size);
+    if (!file) {
+        printf("Failed to set output path\n");
+        goto end;
+    }
+    snprintf(file, size, "%s/out.sam", outdir);         //output file name
+    if (!(pool = hts_tpool_init(cnt))) {                //thread pool
+        printf("Failed to create thread pool\n");
+        goto end;
+    }
+    tpool.pool = pool;      //to share the pool for file read and write as well
+    //queue to use with thread pool, for task and results
+    if (!(queue = hts_tpool_process_init(pool, cnt * 2, 0))) {
+        printf("Failed to create queue\n");
+        goto end;
+    }
+    //open input file - r reading
+    if (!(infile = sam_open(inname, "r"))) {
+        printf("Could not open %s\n", inname);
+        goto end;
+    }
+    //open output files - w write as SAM, wb  write as BAM
+    if (!(outfile = sam_open(file, "w"))) {
+        printf("Could not open output file\n");
+        goto end;
+    }
+    //share the thread pool with i/o files
+    if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 ||
+          hts_set_opt(outfile, HTS_OPT_THREAD_POOL, &tpool) < 0) {
+        printf("Failed to set threads to i/o files\n");
+        goto end;
+    }
+    //read header, required to resolve the target names to proper ids
+    if (!(in_samhdr = sam_hdr_read(infile))) {
+        printf("Failed to read header from file!\n");
+        goto end;
+    }
+    /*tasks are queued, worker threads get them and processes in parallel;
+    the results are queued and they are to be removed in parallel as well*/
+    //start output writer thread for ordered processing
+    twritedata.outfile = outfile;
+    twritedata.queue = queue;
+    twritedata.done = &stopcond;
+    twritedata.lock = &stopcondsynch;
+    twritedata.samhdr = in_samhdr;
+    if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
+        printf("Failed to create writer thread\n");
+        goto end;
+    }
+    clearthread = 1;
+    //write header
+    if ((sam_hdr_write(outfile, in_samhdr) == -1)) {
+        printf("Failed to write header\n");
+        goto end;
+    }
+
+    c = 0;
+    while (c >= 0) {
+        bamdata = getbamstorage(chunk);
+        //read alignments, upto max size for this lot
+        for (cnt = 0; cnt < bamdata->size; ++cnt) {
+            c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
+            if (c >= 0) {
+                continue;   //read next
+            }
+            else {
+                break;      //failure
+            }
+        }
+        if (c >= -1 ) {
+            //max size data or reached EOF
+            bamdata->count = ( c >= 0 )? bamdata->size : cnt;
+            //queue the lot for processing
+            if (hts_tpool_dispatch(pool, queue, thread_ordered_proc,
+                bamdata) == -1) {
+                printf("Failed to schedule processing\n");
+                goto end;
+            }
+            bamdata = NULL;
+        }
+        else {
+            printf("Error in reading data\n");
+            break;
+        }
+    }
+    if (-1 == c) {
+        //EOF read, trigger processing for anything pending, NOTE: will be blocked until queue is cleared
+        if (hts_tpool_process_flush(queue) == -1) {
+            printf("Failed to flush queues\n");
+            goto end;
+        }
+        //all tasks done, check for processing completion
+        while (1) {
+            if (!hts_tpool_process_empty(queue)) {
+                usleep(WAIT * 1000000);     //results yet to be empty, check again
+                continue;
+            }
+            break;
+        }
+        ret = EXIT_SUCCESS;
+    }
+
+    //trigger exit for ordered write thread
+    pthread_mutex_lock(twritedata.lock);
+    pthread_cond_signal(twritedata.done);
+    pthread_mutex_unlock(twritedata.lock);
+end:
+    //cleanup
+    if (clearthread) {
+        pthread_join(thread, NULL);
+    }
+    if (in_samhdr) {
+        sam_hdr_destroy(in_samhdr);
+    }
+    if (infile) {
+        sam_close(infile);
+    }
+    if (bamdata) {
+        for (cnt = 0; cnt < bamdata->size; ++cnt) {
+            bam_destroy1(bamdata->bamarray[cnt]);
+        }
+        free(bamdata);
+    }
+    if (file) {
+        free(file);
+    }
+    if (outfile) {
+        sam_close(outfile);
+    }
+    if (queue) {
+        hts_tpool_process_destroy(queue);
+    }
+    if (pool) {
+        hts_tpool_destroy(pool);
+    }
+    return ret;
+}
diff --git a/samples/qtask_thread.c b/samples/qtask_thread.c
deleted file mode 100644
index 558a90c86..000000000
--- a/samples/qtask_thread.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*  split_thread3.c --  showcases the htslib api usage
-
-    Copyright (C) 2023 Genome Research Ltd.
-
-    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE
-
-*/
-
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
-
-#include <getopt.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <sys/time.h>
-#include <htslib/sam.h>
-#include <htslib/thread_pool.h>
-
-//thread specific data
-typedef struct td_unordered {
-    samFile *outfile;           //output file handle
-    sam_hdr_t *samhdr;          //header used to write data
-    bam1_t *bamdata;            //data to be written
-
-    pthread_mutex_t *lock;      //to synchronize write to file
-} td_unordered;
-
-typedef struct td_orderedwrite {
-    samFile *outfile;           //output file handle
-    sam_hdr_t *samhdr;          //header used to write data
-    hts_tpool_process *queue;   //queue from which results to be retrieved
-
-    pthread_cond_t *done;       //indicates exit condition
-    pthread_mutex_t *lock;      //to synchronize queue access
-} td_orderedwrite;
-
-/// print_usage - print the usage
-/** @param fp pointer to the file / terminal to which usage to be dumped
-returns nothing
-*/
-static void print_usage(FILE *fp)
-{
-    fprintf(fp, "Usage: qtask infile threadcount outdir\n\
-Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\
-GC ratio - sum(G,C) / sum(A,T,C,G,N) - is calculated and added on each alignment as xr:f aux tag.\n");
-    return;
-}
-
-/// addcount - calculates and adds the aux tag
-/** @param bamdata pointer to bam data
-returns 0 on success and -1 for failure
-*/
-int addcount(bam1_t *bamdata)
-{
-    int pos = 0, gc = 0, ret = 0;
-    float gcratio = 0;
-    uint8_t *data = bam_get_seq(bamdata);
-    for (pos = 0; pos < bamdata->core.l_qseq; ++pos) {
-        switch(seq_nt16_str[bam_seqi(data, pos)]) {
-            case 'G':   //fall through
-            case 'C':
-                gc++;
-            break;
-        }
-    }
-    gcratio = gc / (float) bamdata->core.l_qseq;
-
-    if (bam_aux_append(bamdata, "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
-        printf("Failed to add aux tag xr, errno: %d\n", errno);
-        ret = -1;
-    }
-    return ret;
-}
-
-
-/// thread_unordered_proc - does the processing of task in queue1 (Read1) and writes output
-/** @param args pointer to data specific for the thread
-returns none
-the output could be unordered based on the number of threads in use
-*/
-void *thread_unordered_proc(void *args)
-{
-    td_unordered *tdata = (td_unordered*)args;
-
-    //add count
-    if (addcount(tdata->bamdata) < 0) {
-        printf("Failed to calculate gc data\n");
-    }
-
-    //lock to have exclusive write access to outfile
-    pthread_mutex_lock(tdata->lock);
-    if (sam_write1(tdata->outfile, tdata->samhdr, tdata->bamdata) < 0) {
-        printf("Failed to write output data\n");
-    }
-    pthread_mutex_unlock(tdata->lock);
-
-    bam_destroy1(tdata->bamdata);
-    free(tdata);
-    return NULL;
-}
-
-/// thread_ordered_proc - does the processing of task in queue2 (Read2) and queues the output back
-/** @param argss pointer to data specific for the thread
-returns the processed bamdata
-the processing could be unordered based on the number of threads in use but read of output from queue
-will be in order
-*/
-void *thread_ordered_proc(void *args)
-{
-    bam1_t *bamdata = (bam1_t*)args;
-    //add count
-    if (addcount(bamdata) < 0) {
-        printf("Failed to calculate gc data\n");
-    }
-    return bamdata;
-}
-
-/// thread_ordered_proc - thread that read the output from queue2 (Read2) and writes
-/** @param argss pointer to data specific for the thread
-returns NULL
-*/
-void *threadfn_orderedwrite(void *args)
-{
-    td_orderedwrite *tdata = (td_orderedwrite*)args;
-    hts_tpool_result *r = NULL;
-    bam1_t *bamdata = NULL;
-
-    struct timeval now;
-    struct timespec timeout;
-
-    //get time to set the wait time on exit condition check
-    gettimeofday(&now, NULL);
-    timeout.tv_sec = now.tv_sec + 0;
-    timeout.tv_nsec = 0;
-
-    pthread_mutex_lock(tdata->lock);                //lock to check the exit condition
-    while (pthread_cond_timedwait(tdata->done, tdata->lock, &timeout) == ETIMEDOUT) {
-        //exit not set, get result and write; wait if no result is in queue - until shutdown of queue
-        while ((r = hts_tpool_next_result_wait(tdata->queue))) {
-            bamdata = (bam1_t*) hts_tpool_result_data(r);
-            if (bamdata && sam_write1(tdata->outfile, tdata->samhdr, bamdata) < 0) {
-                printf("Failed to write output data\n");
-            }
-            hts_tpool_delete_result(r, 0);          //release the result memory
-            if (bamdata) {
-                bam_destroy1(bamdata);              //clear the bamdata;
-                bamdata = NULL;
-            }
-        }
-        //no more data in queues, check and wait till exit is triggered
-        gettimeofday(&now, NULL);
-        timeout.tv_sec = now.tv_sec + 1;            //atmost 1 sec wait
-        timeout.tv_nsec = 0;
-    }
-    pthread_mutex_unlock(tdata->lock);
-    return NULL;
-}
-
-/// main_demo - start of the demo
-/** @param argc - count of arguments
- *  @param argv - pointer to array of arguments
-returns 1 on failure 0 on success
-*/
-int main(int argc, char *argv[])
-{
-    const char *inname = NULL, *outdir = NULL;
-    char *file1 = NULL, *file2 = NULL;
-    int c = 0, ret = EXIT_FAILURE, cnt = 0;
-    size_t size = 0;
-    samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL;
-    sam_hdr_t *in_samhdr = NULL;
-    bam1_t *bamdata = NULL;
-    pthread_mutex_t filesynch = PTHREAD_MUTEX_INITIALIZER, stopcondsynch = PTHREAD_MUTEX_INITIALIZER;
-    pthread_cond_t stopcond = PTHREAD_COND_INITIALIZER;
-    pthread_t thread = 0;
-    td_orderedwrite twritedata = {0};
-    hts_tpool *pool = NULL;
-    hts_tpool_process *queue1 = NULL, *queue2 = NULL;
-
-    //split_t3 infile threadcount outdir
-    if (argc != 4) {
-        print_usage(stdout);
-        goto end;
-    }
-    inname = argv[1];
-    cnt = atoi(argv[2]);
-    outdir = argv[3];
-
-    if (cnt < 1) {
-        cnt = 1;
-    }
-
-    //allocate space for output
-    size = (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination
-    file1 = malloc(size); file2 = malloc(size);
-    if (!file1 || !file2) {
-        printf("Failed to set output path\n");
-        goto end;
-    }
-
-    //output file names
-    snprintf(file1, size, "%s/1.sam", outdir);  //for SAM output
-    snprintf(file2, size, "%s/2.bam", outdir);  //for BAM output
-    //bam data storage
-    if (!(bamdata = bam_init1())) {
-        printf("Failed to initialize bamdata\n");
-        goto end;
-    }
-    //thread pool
-    if (!(pool = hts_tpool_init(cnt))) {
-        printf("Failed to create thread pool\n");
-        goto end;
-    }
-    //queue to use with thread pool, no queuing of results for Q1 and results queued for Q2
-    if (!(queue1 = hts_tpool_process_init(pool, cnt * 2, 1)) ||
-        !(queue2 = hts_tpool_process_init(pool, cnt * 2, 0))) {
-        printf("Failed to create queue\n");
-        goto end;
-    }
-    //open input file - r reading
-    if (!(infile = sam_open(inname, "r"))) {
-        printf("Could not open %s\n", inname);
-        goto end;
-    }
-
-    //open output files - w write as SAM, wb  write as BAM
-    outfile1 = sam_open(file1, "w");
-    outfile2 = sam_open(file2, "wb");
-    if (!outfile1 || !outfile2) {
-        printf("Could not open output file\n");
-        goto end;
-    }
-    //read header, required to resolve the target names to proper ids
-    if (!(in_samhdr = sam_hdr_read(infile))) {
-        printf("Failed to read header from file!\n");
-        goto end;
-    }
-    //start output writer thread for ordered processing
-    twritedata.outfile = outfile2;
-    twritedata.queue = queue2;
-    twritedata.done = &stopcond;
-    twritedata.lock = &stopcondsynch;
-    twritedata.samhdr = in_samhdr;
-    if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
-        printf("Failed to create writer thread\n");
-        goto end;
-    }
-    //write header
-    if ((sam_hdr_write(outfile1, in_samhdr) == -1) ||
-        (sam_hdr_write(outfile2, in_samhdr) == -1)) {
-        printf("Failed to write header\n");
-        goto end;
-    }
-    //check flags and schedule
-    while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
-        if (bamdata->core.flag & BAM_FREAD1) {
-            //read1, scheduling for unordered output, using Q1; processing occurs in order of thread execution
-            td_unordered *tdata = malloc(sizeof(td_unordered));
-            tdata->bamdata = bamdata;
-            tdata->outfile = outfile1;
-            tdata->lock = &filesynch;
-            tdata->samhdr = in_samhdr;
-            if (hts_tpool_dispatch(pool, queue1, thread_unordered_proc, tdata) == -1) {
-                printf("Failed to schedule unordered processing\n");
-                goto end;
-            }
-        }
-        else if (bamdata->core.flag & BAM_FREAD2) {
-            //read2, scheduling for ordered output, using Q2; proces and result queueing occurs in order of thread execution and result retrieval
-            //showcases the threaded execution and ordered result handling on read2
-            if (hts_tpool_dispatch(pool, queue2, thread_ordered_proc, bamdata) == -1) {
-                printf("Failed to schedule ordered processing\n");
-                goto end;
-            }
-        }
-        bamdata = bam_init1();                  //for next read
-    }
-    if (-1 == c) {
-        //EOF read
-        //clear the queues of any tasks; NOTE: will be blocked until queue is cleared
-        if (hts_tpool_process_flush(queue1) == -1 || hts_tpool_process_flush(queue2) == -1) {
-            printf("Failed to flush queues\n");
-            goto end;
-        }
-        ret = EXIT_SUCCESS;
-    }
-    else {
-        printf("Error in reading data\n");
-    }
-
-    //trigger exit for ordered write thread
-    if (thread) {
-        //shutown queues to exit the result wait
-        hts_tpool_process_shutdown(queue1);
-        hts_tpool_process_shutdown(queue2);
-
-        pthread_mutex_lock(twritedata.lock);
-        pthread_cond_signal(twritedata.done);
-        pthread_mutex_unlock(twritedata.lock);
-        pthread_join(thread, NULL);
-        thread = 0;
-    }
-end:
-    //cleanup
-    if (in_samhdr) {
-        sam_hdr_destroy(in_samhdr);
-    }
-    if (infile) {
-        sam_close(infile);
-    }
-    if (bamdata) {
-        bam_destroy1(bamdata);
-    }
-    if (file1) {
-        free(file1);
-    }
-    if (file2) {
-        free(file2);
-    }
-    if (outfile1) {
-        sam_close(outfile1);
-    }
-    if (outfile2) {
-        sam_close(outfile2);
-    }
-    if (queue1) {
-        hts_tpool_process_destroy(queue1);
-    }
-    if (queue2) {
-        hts_tpool_process_destroy(queue2);
-    }
-    if (pool) {
-        hts_tpool_destroy(pool);
-    }
-    if (thread) {
-        pthread_join(thread, NULL);
-    }
-    return ret;
-}
diff --git a/samples/qtask_unordered.c b/samples/qtask_unordered.c
new file mode 100644
index 000000000..7e659fd3a
--- /dev/null
+++ b/samples/qtask_unordered.c
@@ -0,0 +1,311 @@
+/*  qtask_unordered.c --  showcases the htslib api usage
+
+    Copyright (C) 2024 Genome Research Ltd.
+
+    Author: Vasudeva Sarma <vasudeva.sarma@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+
+*/
+
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
+
+#include <getopt.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <htslib/hfile.h>
+#include <htslib/sam.h>
+#include <htslib/thread_pool.h>
+
+typedef struct beddata {
+    char *name;                 //chromosome name
+    hts_pos_t start;            //start position
+    hts_pos_t end;              //end position
+} beddata;
+
+typedef struct splitdata {
+    beddata *region;            //region information
+    const char *infile;         //input file
+    const char *outdir;         //output path
+} splitdata;
+
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
+returns nothing
+*/
+static void print_usage(FILE *fp)
+{
+    fprintf(fp, "Usage: qtask_unordered infile threadcount outdir bedfile\n\
+Splits the input to files specific for regions given in bedfile. Output is\n\
+saved in outdir. Expects the index file to be present along with infile.\n");
+    return;
+}
+
+/// readbedfile - read the bedfile and return region array
+/** @param file bed file name
+ *  @param data - output, pointer to array of beddata data, to hold bed regions
+returns number of regions in data
+*/
+int readbedfile(const char *file, struct beddata **data)
+{
+    int ret = -1, cnt = 0 , max = 0;
+    kstring_t line = KS_INITIALIZE;
+    htsFile *bedfile = NULL;
+    char *sptr = NULL, *token = NULL;
+    const char *sep ="\t";
+    beddata *bedtoks = NULL;
+
+    if (!data || *data) {
+        printf("Invalid argument\n");
+        goto fail;
+    }
+    if (!(bedfile = hts_open(file, "r"))) {
+        printf("Failed to open bedfile\n");
+        goto fail;
+    }
+    //get lines one by one and get region details
+    while (!(ret = kgetline(&line, (kgets_func*)hgets, bedfile->fp.hfile))) {
+        if (!line.l) {
+            continue;               //skip empty line
+        }
+        if  (line.s[0] == '#' || !strncmp("track ", line.s, sizeof("track ") - 1)) {
+            ks_clear(&line);
+            continue;               //ignore track lines and comments
+        }
+        token = strtok_r(line.s, sep, &sptr);
+        if (token) {                //allocate memory for regions
+            if ((cnt+1) > max) {
+                max += 100;         //for another 100 regions
+                bedtoks = realloc(bedtoks, sizeof(beddata) * max);
+            }
+        }
+        else {
+            break;
+        }
+        bedtoks[cnt].name = strdup(token);  //chromosome name
+        token = strtok_r(NULL, sep, &sptr);
+        if (!token) {
+            break;
+        }                                   //start position
+        bedtoks[cnt].start = token ? atoll(token) : 0;
+        token = strtok_r(NULL, sep, &sptr);
+        if (!token) {
+            break;
+        }                                   //end position
+        bedtoks[cnt++].end = token ? atoll(token) : 0;
+        ks_clear(&line);
+    }
+    if (ret != EOF) {
+        goto fail;
+    }
+    ret = cnt;
+    if (bedfile) {
+        hts_close(bedfile);
+    }
+    ks_free(&line);
+    if (!cnt) {
+        goto fail;
+    }
+    *data = bedtoks;
+    return ret;
+fail:
+    if (bedfile) {
+        hts_close(bedfile);
+    }
+    ks_free(&line);
+    if (bedtoks) {
+        for (max = cnt, cnt = 0; cnt < max; ++cnt) {
+            free(bedtoks[cnt].name);
+        }
+        free(bedtoks);
+    }
+    bedtoks = NULL;
+    return 0;
+}
+
+/// splittoregions - saves the relevant data to separate file
+/** @param args pointer to set of data to be processed
+returns NULL
+the processing could be in any order based on the number of threads in use
+*/
+void * splittoregions(void *args)
+{
+    samFile *infile = NULL, *outfile = NULL;
+    sam_hdr_t *in_samhdr = NULL;
+    bam1_t *bamdata = NULL;
+    hts_itr_t *iter = NULL;
+    hts_idx_t *idx = NULL;
+    splitdata *data = (splitdata*)args;
+    char *file = NULL, *region = NULL;
+    int size = 0, ret = 0;
+    if (!(infile = sam_open(data->infile, "r"))) {
+        printf("Failed to open input file\n");
+        goto end;
+    }
+    if (!(in_samhdr = sam_hdr_read(infile))) {
+        printf("Failed to read header data\n");
+        goto end;
+    }
+    if (!(bamdata = bam_init1())) {
+        printf("Failed to initialize bamdata\n");
+        goto end;
+    }
+    size = strlen(data->region->name) + 50;         //region specification
+    if (!(region = malloc(size))) {
+        printf("Failed to allocate memory\n");
+        goto end;
+    }
+    snprintf(region, size, "%s:%"PRIhts_pos"-%"PRIhts_pos, data->region->name, data->region->start, data->region->end);
+    size += strlen(data->outdir);                   //output file with path
+    if (!(file = malloc(size))) {
+        printf("Failed to allocate memory\n");
+        goto end;
+    }
+    snprintf(file, size, "%s/%s_%"PRIhts_pos"_%"PRIhts_pos".sam", data->outdir, data->region->name, data->region->start, data->region->end);
+    if (!(idx = sam_index_load(infile, data->infile))) {
+        printf("Failed to load index\n");
+        goto end;
+    }
+    if (!(iter = sam_itr_querys(idx, in_samhdr, region))) {
+        printf("Failed to create iterator\n");
+        goto end;
+    }
+    if (!(outfile = sam_open(file, "w"))) {
+        printf("Failed to open output file\n");
+        goto end;
+    }
+    if (sam_hdr_write(outfile, in_samhdr) < 0) {
+        printf("Failed to write header\n");
+        goto end;
+    }
+    while ((ret = sam_itr_next(infile, iter, bamdata)) >= 0) {  //read and write relevant data
+        if (sam_write1(outfile, in_samhdr, bamdata) < 0) {
+            printf("Failed to write data\n");
+            goto end;
+        }
+    }
+    if (ret != -1) {
+        printf("Failed to get all data\n");
+    }
+
+end:
+    free(data);
+    if (infile) {
+        sam_close(infile);
+    }
+    if (outfile) {
+        sam_close(outfile);
+    }
+    if (iter) {
+        sam_itr_destroy(iter);
+    }
+    if (idx) {
+        hts_idx_destroy(idx);
+    }
+    if (bamdata) {
+        bam_destroy1(bamdata);
+    }
+    if (in_samhdr) {
+        sam_hdr_destroy(in_samhdr);
+    }
+    if (file) {
+        free(file);
+    }
+    if (region) {
+        free(region);
+    }
+    return NULL;
+}
+
+/// main - splits the data to region specific files
+/** @param argc - count of arguments
+ *  @param argv - pointer to array of arguments
+returns 1 on failure 0 on success
+*/
+int main(int argc, char *argv[])
+{
+    const char *inname = NULL, *outdir = NULL, *bedfile = NULL;
+    int c = 0, ret = EXIT_FAILURE, cnt = 0, regcnt = 0;
+    hts_tpool *pool = NULL;
+    hts_tpool_process *queue = NULL;
+    beddata *regions = NULL;
+
+    //qtask infile threadcount outdir [chunksize]
+    if (argc != 5) {
+        print_usage(stdout);
+        goto end;
+    }
+    inname = argv[1];
+    cnt = atoi(argv[2]);
+    outdir = argv[3];
+    bedfile = argv[4];
+    //get regions from bedfile
+    if ((regcnt = readbedfile(bedfile, &regions)) <= 0) {
+        printf("Failed to get bed data\n");
+        goto end;
+    }
+    if (cnt < 1) {
+        cnt = 1;
+    }
+    if (!(pool = hts_tpool_init(cnt))) {                //thread pool
+        printf("Failed to create thread pool\n");
+        goto end;
+    }
+    //queue to use with thread pool, for tasks
+    if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1))) {
+        printf("Failed to create queue\n");
+        goto end;
+    }
+    for (c = 0; c < regcnt; ++c) {
+        struct splitdata *task = malloc(sizeof(splitdata));
+        task->infile = inname;
+        task->outdir = outdir;
+        task->region = regions + c;
+        //schedule jobs to run in parallel
+        if (hts_tpool_dispatch(pool, queue, splittoregions, task) < 0) {
+            printf("Failed to schedule processing\n");
+            goto end;
+        }
+    }
+    //trigger processing for anything pending, NOTE: will be blocked until queue is cleared
+    if (hts_tpool_process_flush(queue) == -1) {
+        printf("Failed to flush queues\n");
+        goto end;
+    }
+    ret = EXIT_SUCCESS;
+
+    //shutdown queues to exit the result wait
+    hts_tpool_process_shutdown(queue);
+
+end:
+    //cleanup
+    for (c = 0; c < regcnt; ++c) {
+        free(regions[c].name);
+    }
+    free(regions);
+
+    if (queue) {
+        hts_tpool_process_destroy(queue);
+    }
+    if (pool) {
+        hts_tpool_destroy(pool);
+    }
+    return ret;
+}
diff --git a/samples/read_aux.c b/samples/read_aux.c
index cbf972b98..efd6f3651 100644
--- a/samples/read_aux.c
+++ b/samples/read_aux.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
@@ -92,7 +92,7 @@ int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data)
         fprintf(fp, "%c", auxBType);
         for (i = 0; i < auxBcnt; ++i) {                                                     //iterate the array
             fprintf(fp, ",");
-            //calling recurssively  with index to reuse a few lines
+            //calling recursively  with index to reuse a few lines
             if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) {
                 return EXIT_FAILURE;
             }
@@ -166,7 +166,7 @@ int main(int argc, char *argv[])
         else {
             //option 2 - get raw data
             if (!(data = bam_aux_get(bamdata, tag))) {
-                //tag data not returned, errono gives the reason
+                //tag data not returned, errno gives the reason
                 if (errno == ENOENT) {
                     printf("Tag not present\n");
                 }
diff --git a/samples/read_bam.c b/samples/read_bam.c
index 7fca8c55d..30bedf81c 100644
--- a/samples/read_bam.c
+++ b/samples/read_bam.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/read_fast.c b/samples/read_fast.c
index dc24a1167..3ed6ba1f8 100644
--- a/samples/read_fast.c
+++ b/samples/read_fast.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - show flags_demo usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/read_fast_index.c b/samples/read_fast_index.c
index aa6e90da2..97076630a 100644
--- a/samples/read_fast_index.c
+++ b/samples/read_fast_index.c
@@ -24,7 +24,7 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
diff --git a/samples/read_header.c b/samples/read_header.c
index eb14daea5..54b07e736 100644
--- a/samples/read_header.c
+++ b/samples/read_header.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which susage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/read_refname.c b/samples/read_refname.c
index adbc71183..9b4918ded 100644
--- a/samples/read_refname.c
+++ b/samples/read_refname.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/rem_header.c b/samples/rem_header.c
index a0b6510fb..852d5f055 100644
--- a/samples/rem_header.c
+++ b/samples/rem_header.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
@@ -124,7 +124,7 @@ int main(int argc, char *argv[])
     ret = EXIT_SUCCESS;
     //bam data write to follow....
 end:
-    //cleanupq
+    //cleanup
     if (in_samhdr) {
         sam_hdr_destroy(in_samhdr);
     }
diff --git a/samples/sample.bed b/samples/sample.bed
new file mode 100644
index 000000000..2ae458fd5
--- /dev/null
+++ b/samples/sample.bed
@@ -0,0 +1,4 @@
+T1	1	2
+T1	30	35
+T2	10	15
+T2	30	40
diff --git a/samples/sample.sam b/samples/sample.sam
index e56efd69f..58515c976 100644
--- a/samples/sample.sam
+++ b/samples/sample.sam
@@ -9,7 +9,7 @@
 @CO	1234567890123456789012345678901234567890
 @CO	AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT	T1
 @CO	TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT	T2
-@CO	ITR1-ITR2M, ITR2-ITR2M are proper pairs in T1 and T2, UNMP1 is partly mapped and pair is unmapped, UNMP2 & 3 are unmappped
+@CO	ITR1-ITR2M, ITR2-ITR2M are proper pairs in T1 and T2, UNMP1 is partly mapped and pair is unmapped, UNMP2 & 3 are unmapped
 @CO	A1-A2, A4-A3 are proper pairs with A4-A3 in different read order. A5 is secondary alignment
 ITR1	99	T1	5	40	4M	=	33	10	ACTG	()()
 ITR2	147	T2	23	49	2M	=	35	-10	TT	**
diff --git a/samples/split.c b/samples/split.c
index 2eb9e6b79..c51dbd385 100644
--- a/samples/split.c
+++ b/samples/split.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/split2.c b/samples/split2.c
index 2354abfe3..33fabbd67 100644
--- a/samples/split2.c
+++ b/samples/split2.c
@@ -24,19 +24,19 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
 {
-    fprintf(fp, "Usage: split infile outdir\n\
+    fprintf(fp, "Usage: split2 infile outdir\n\
 Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\
 Shows file type selection through name and format api\n");
     return;
diff --git a/samples/split_thread1.c b/samples/split_thread1.c
index fd90e4b19..551c7f093 100644
--- a/samples/split_thread1.c
+++ b/samples/split_thread1.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/split_thread2.c b/samples/split_thread2.c
index dab897b5f..dc8bc9f31 100644
--- a/samples/split_thread2.c
+++ b/samples/split_thread2.c
@@ -24,15 +24,15 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 #include <htslib/thread_pool.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/update_header.c b/samples/update_header.c
index f6b1680cd..237d5c4df 100644
--- a/samples/update_header.c
+++ b/samples/update_header.c
@@ -24,14 +24,14 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
 #include <htslib/sam.h>
 
-/// print_usage - print the demo_usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - print the usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
diff --git a/samples/write_fast.c b/samples/write_fast.c
index 5bfcd73f0..626c693f6 100644
--- a/samples/write_fast.c
+++ b/samples/write_fast.c
@@ -24,7 +24,7 @@ DEALINGS IN THE SOFTWARE
 
 */
 
-/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */
+/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */
 
 #include <getopt.h>
 #include <unistd.h>
@@ -32,8 +32,8 @@ DEALINGS IN THE SOFTWARE
 #include <htslib/sam.h>
 #include <htslib/faidx.h>
 
-/// print_usage - show flags_demo usage
-/** @param fp pointer to the file / terminal to which demo_usage to be dumped
+/// print_usage - show usage
+/** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
 */
 static void print_usage(FILE *fp)
@@ -103,12 +103,6 @@ int main(int argc, char *argv[])
         printf("Failed to write data\n");
         goto end;
     }
-    //close and index it
-    sam_close(outfile); outfile = NULL;
-    if (fai_build3(outname, NULL, NULL) == -1) {
-        printf("Indexing failed with %d\n", errno);
-        goto end;
-    }
     ret = EXIT_SUCCESS;
 end:
     //clean up

From c553f408bf843f48f5f48212555a2260af6ba43c Mon Sep 17 00:00:00 2001
From: Rob Davies <rmd+git@sanger.ac.uk>
Date: Fri, 7 Jun 2024 18:04:28 +0100
Subject: [PATCH 4/9] Improve qtask_ordered sample program

Use hts_tpool_next_result_wait() in threadfn_orderedwrite() so it
blocks properly when it has nothing to do.  Remove lock and
conditional that are no longer needed.  Use a sentinel job to
tell threadfn_orderedwrite() that there is no more data and it can
terminate.

Improve error handling.  Add a data::result field so that
threadfn_orderedwrite() can report back to the main thread if
it encountered a problem.  Ensure everything is cleaned up
correctly if either the main thread or threadfn_orderedwrite()
fail, and in the right order.  Set the return value to EXIT_FAILURE
if either sam_close() or threadfn_orderedwrite() report failure.

Ensure error messages are written to stderr.
---
 samples/qtask_ordered.c | 241 +++++++++++++++++++++++-----------------
 1 file changed, 141 insertions(+), 100 deletions(-)

diff --git a/samples/qtask_ordered.c b/samples/qtask_ordered.c
index f080209bc..731587e97 100644
--- a/samples/qtask_ordered.c
+++ b/samples/qtask_ordered.c
@@ -37,9 +37,7 @@ typedef struct orderedwrite {
     samFile *outfile;           //output file handle
     sam_hdr_t *samhdr;          //header used to write data
     hts_tpool_process *queue;   //queue from which results to be retrieved
-
-    pthread_cond_t *done;       //indicates exit condition
-    pthread_mutex_t *lock;      //to synchronise queue access
+    int result;                 //result code returned by writer thread
 } orderedwrite;
 
 typedef struct data {
@@ -48,7 +46,6 @@ typedef struct data {
     bam1_t **bamarray;          //bam1_t array for optimal queueing
 } data;
 
-#define WAIT 1                  //1 sec
 /// print_usage - print the usage
 /** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
@@ -82,7 +79,7 @@ int addcount(bam1_t *bamdata)
     gcratio = gc / (float) bamdata->core.l_qseq;
 
     if (bam_aux_append(bamdata, "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
-        printf("Failed to add aux tag xr, errno: %d\n", errno);
+        fprintf(stderr, "Failed to add aux tag xr, errno: %d\n", errno);
         ret = -1;
     }
     return ret;
@@ -113,6 +110,28 @@ data* getbamstorage(int chunk)
     return bamdata;
 }
 
+/// cleanup_bamstorage - frees a bamdata struct plus contents
+/** @param arg Pointer to data to free
+
+    @p arg has type void * so it can be used as a callback passed
+    to hts_tpool_dispatch3().
+ */
+void cleanup_bamstorage(void *arg)
+{
+    data *bamdata = (data *) arg;
+
+    if (!bamdata)
+        return;
+    if (bamdata->bamarray) {
+        int i;
+        for (i = 0; i < bamdata->size; i++) {
+            bam_destroy1(bamdata->bamarray[i]);
+        }
+        free(bamdata->bamarray);
+    }
+    free(bamdata);
+}
+
 /// thread_ordered_proc - does the processing of task in queue and queues the output back
 /** @param args pointer to set of data to be processed
 returns the processed data
@@ -123,10 +142,14 @@ void *thread_ordered_proc(void *args)
 {
     int i = 0;
     data *bamdata = (data*)args;
+
+    if (bamdata == NULL)
+        return NULL; // Indicates no more input
+
     for ( i = 0; i < bamdata->count; ++i) {
         //add count
         if (addcount(bamdata->bamarray[i]) < 0) {
-            printf("Failed to calculate gc data\n");
+            fprintf(stderr, "Failed to calculate gc data\n");
             break;
         }
     }
@@ -143,44 +166,39 @@ void *threadfn_orderedwrite(void *args)
     hts_tpool_result *r = NULL;
     data *bamdata = NULL;
     int i = 0;
+    int count = 0;
 
     struct timeval now;
     struct timespec timeout;
     long usec = 0;
 
-    pthread_mutex_lock(tdata->lock);                //lock to check the exit condition
-    do {
-        //get result and write; wait if no result is in queue - until shutdown of queue
-        while ((r = hts_tpool_next_result(tdata->queue))) {
-            bamdata = (data*) hts_tpool_result_data(r);
-            for (i = 0; i < bamdata->count; ++i) {
-                if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) {
-                    printf("Failed to write output data\n");
-                    break;
-                }
-            }
-            hts_tpool_delete_result(r, 0);          //release the result memory
-            if (bamdata) {
-                for (i = 0; i < bamdata->size; ++i) {
-                    bam_destroy1(bamdata->bamarray[i]); //clear the bamdata;
-                }
-                free(bamdata->bamarray);
-                free(bamdata);
-            }
-        }
-        //no more data in queues, check and wait till exit is triggered
-        gettimeofday(&now, NULL);
-        usec = now.tv_usec + 100000;    //+100msec
-        if (usec >= 1000000) {          //overflow
-            usec %= 1000000;
-            now.tv_sec++;
+    tdata->result = 0;
+
+    //get result and write; wait if no result is in queue - until shutdown of queue
+    while (tdata->result == 0 &&
+           (r = hts_tpool_next_result_wait(tdata->queue)) != NULL) {
+        bamdata = (data*) hts_tpool_result_data(r);
+
+        if (bamdata == NULL) {
+            // Indicator for no more input. Time to stop.
+            hts_tpool_delete_result(r, 0);
+            break;
         }
-        timeout.tv_sec = now.tv_sec;
-        timeout.tv_nsec = usec * 1000;
 
-    } while (pthread_cond_timedwait(tdata->done, tdata->lock, &timeout) == ETIMEDOUT);
+        for (i = 0; i < bamdata->count; ++i) {
+            if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) {
+                fprintf(stderr, "Failed to write output data\n");
+                tdata->result = -1;
+                break;
+            }
+        }
+        hts_tpool_delete_result(r, 0);          //release the result memory
+        cleanup_bamstorage(bamdata);
+    }
 
-    pthread_mutex_unlock(tdata->lock);
+    // Shut down the process queue.  If we stopped early due to a write failure,
+    // this will signal to the other end that something has gone wrong.
+    hts_tpool_process_shutdown(tdata->queue);
 
     return NULL;
 }
@@ -194,12 +212,10 @@ int main(int argc, char *argv[])
 {
     const char *inname = NULL, *outdir = NULL;
     char *file = NULL;
-    int c = 0, ret = EXIT_FAILURE, cnt = 0, clearthread = 0, chunk = 0;
+    int c = 0, ret = EXIT_FAILURE, cnt = 0, started_thread = 0, chunk = 0;
     size_t size = 0;
     samFile *infile = NULL, *outfile = NULL;
     sam_hdr_t *in_samhdr = NULL;
-    pthread_mutex_t stopcondsynch = PTHREAD_MUTEX_INITIALIZER;
-    pthread_cond_t stopcond = PTHREAD_COND_INITIALIZER;
     pthread_t thread;
     orderedwrite twritedata = {0};
     hts_tpool *pool = NULL;
@@ -230,59 +246,61 @@ int main(int argc, char *argv[])
     size = (strlen(outdir) + sizeof("/out.sam") + 1); //space for output file name and null termination
     file = malloc(size);
     if (!file) {
-        printf("Failed to set output path\n");
+        fprintf(stderr, "Failed to set output path\n");
         goto end;
     }
     snprintf(file, size, "%s/out.sam", outdir);         //output file name
     if (!(pool = hts_tpool_init(cnt))) {                //thread pool
-        printf("Failed to create thread pool\n");
+        fprintf(stderr, "Failed to create thread pool\n");
         goto end;
     }
     tpool.pool = pool;      //to share the pool for file read and write as well
     //queue to use with thread pool, for task and results
     if (!(queue = hts_tpool_process_init(pool, cnt * 2, 0))) {
-        printf("Failed to create queue\n");
+        fprintf(stderr, "Failed to create queue\n");
         goto end;
     }
     //open input file - r reading
     if (!(infile = sam_open(inname, "r"))) {
-        printf("Could not open %s\n", inname);
+        fprintf(stderr, "Could not open %s\n", inname);
         goto end;
     }
     //open output files - w write as SAM, wb  write as BAM
     if (!(outfile = sam_open(file, "w"))) {
-        printf("Could not open output file\n");
+        fprintf(stderr, "Could not open output file\n");
         goto end;
     }
     //share the thread pool with i/o files
     if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 ||
           hts_set_opt(outfile, HTS_OPT_THREAD_POOL, &tpool) < 0) {
-        printf("Failed to set threads to i/o files\n");
+        fprintf(stderr, "Failed to set threads to i/o files\n");
         goto end;
     }
     //read header, required to resolve the target names to proper ids
     if (!(in_samhdr = sam_hdr_read(infile))) {
-        printf("Failed to read header from file!\n");
+        fprintf(stderr, "Failed to read header from file!\n");
         goto end;
     }
-    /*tasks are queued, worker threads get them and processes in parallel;
-    the results are queued and they are to be removed in parallel as well*/
-    //start output writer thread for ordered processing
+
+    //write header
+    if ((sam_hdr_write(outfile, in_samhdr) == -1)) {
+        fprintf(stderr, "Failed to write header\n");
+        goto end;
+    }
+
+    /*tasks are queued, worker threads get them and process in parallel;
+    the results are queued and they are to be removed in parallel as well */
+
+    // start output writer thread for ordered processing
     twritedata.outfile = outfile;
     twritedata.queue = queue;
-    twritedata.done = &stopcond;
-    twritedata.lock = &stopcondsynch;
     twritedata.samhdr = in_samhdr;
+    twritedata.result = 0;
     if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
-        printf("Failed to create writer thread\n");
-        goto end;
-    }
-    clearthread = 1;
-    //write header
-    if ((sam_hdr_write(outfile, in_samhdr) == -1)) {
-        printf("Failed to write header\n");
+        fprintf(stderr, "Failed to create writer thread\n");
         goto end;
     }
+    started_thread = 1;
 
     c = 0;
     while (c >= 0) {
@@ -290,76 +308,99 @@ int main(int argc, char *argv[])
         //read alignments, upto max size for this lot
         for (cnt = 0; cnt < bamdata->size; ++cnt) {
             c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
-            if (c >= 0) {
-                continue;   //read next
-            }
-            else {
-                break;      //failure
+            if (c < 0) {
+                break;      // EOF or failure
             }
         }
         if (c >= -1 ) {
             //max size data or reached EOF
-            bamdata->count = ( c >= 0 )? bamdata->size : cnt;
-            //queue the lot for processing
-            if (hts_tpool_dispatch(pool, queue, thread_ordered_proc,
-                bamdata) == -1) {
-                printf("Failed to schedule processing\n");
+            bamdata->count = cnt;
+            // Queue the data for processing.  hts_tpool_dispatch3() is
+            // used here as it allows in-flight data to be cleaned up
+            // properly when stopping early due to errors.
+            if (hts_tpool_dispatch3(pool, queue, thread_ordered_proc, bamdata,
+                                    cleanup_bamstorage, cleanup_bamstorage,
+                                    0) == -1) {
+                fprintf(stderr, "Failed to schedule processing\n");
                 goto end;
             }
             bamdata = NULL;
         }
         else {
-            printf("Error in reading data\n");
+            fprintf(stderr, "Error in reading data\n");
             break;
         }
     }
-    if (-1 == c) {
-        //EOF read, trigger processing for anything pending, NOTE: will be blocked until queue is cleared
-        if (hts_tpool_process_flush(queue) == -1) {
-            printf("Failed to flush queues\n");
-            goto end;
-        }
-        //all tasks done, check for processing completion
-        while (1) {
-            if (!hts_tpool_process_empty(queue)) {
-                usleep(WAIT * 1000000);     //results yet to be empty, check again
-                continue;
+
+    ret = EXIT_SUCCESS;
+
+ end:
+    // Tidy up after having dispatched all of the data.
+
+    // Note that the order here is important.  In particular, we need
+    // to join the thread that was started earlier before freeing anything
+    // to avoid any use-after-free errors.
+
+    // It's also possible to get here early due to various error conditions,
+    // so we need to carefully check which parts of the program state have
+    // been created before trying to clean them up.
+
+    if (queue) {
+        if (-1 == c) {
+            // EOF read, send a marker to tell the threadfn_orderedwrite()
+            // function to shut down.
+            if (hts_tpool_dispatch(pool, queue, thread_ordered_proc,
+                                   NULL) == -1) {
+                fprintf(stderr, "Failed to schedule processing\n");
+                ret = EXIT_FAILURE;
             }
-            break;
+
+            // trigger processing for anything pending
+            // NOTE: will be blocked until queue is cleared
+            if (hts_tpool_process_flush(queue) == -1) {
+                fprintf(stderr, "Failed to flush queues\n");
+                ret = EXIT_FAILURE;
+            }
+        } else {
+            // Error or we never wrote anything.  Shut down the queue to
+            // ensure threadfn_orderedwrite() wakes up and terminates.
+            hts_tpool_process_shutdown(queue);
         }
-        ret = EXIT_SUCCESS;
     }
 
-    //trigger exit for ordered write thread
-    pthread_mutex_lock(twritedata.lock);
-    pthread_cond_signal(twritedata.done);
-    pthread_mutex_unlock(twritedata.lock);
-end:
-    //cleanup
-    if (clearthread) {
+    // Wait for threadfn_orderedwrite to finish.
+    if (started_thread) {
         pthread_join(thread, NULL);
+
+        // Once the writer thread has finished, check the result it sent back
+        if (twritedata.result != 0)
+            ret = EXIT_FAILURE;
     }
+
+    if (queue) {
+        // Once threadfn_orderedwrite has stopped, the queue can be
+        // cleaned up.
+        hts_tpool_process_destroy(queue);
+    }
+
     if (in_samhdr) {
         sam_hdr_destroy(in_samhdr);
     }
     if (infile) {
-        sam_close(infile);
+        if (sam_close(infile) != 0)
+            ret = EXIT_FAILURE;
     }
+    if (outfile) {
+        if (sam_close(outfile) != 0)
+            ret = EXIT_FAILURE;
+    }
+
     if (bamdata) {
-        for (cnt = 0; cnt < bamdata->size; ++cnt) {
-            bam_destroy1(bamdata->bamarray[cnt]);
-        }
-        free(bamdata);
+        cleanup_bamstorage(bamdata);
     }
     if (file) {
         free(file);
     }
-    if (outfile) {
-        sam_close(outfile);
-    }
-    if (queue) {
-        hts_tpool_process_destroy(queue);
-    }
     if (pool) {
         hts_tpool_destroy(pool);
     }

From 9ce3917b10ade945794c3e303079c44dc1ca4e80 Mon Sep 17 00:00:00 2001
From: vasudeva8 <vasudeva.sarma@sanger.ac.uk>
Date: Tue, 2 Jul 2024 16:02:01 +0100
Subject: [PATCH 5/9] update to make faster

---
 samples/DEMO.md           | 197 ++++++++++--------
 samples/README.md         |   5 +-
 samples/qtask_ordered.c   | 191 +++++++++--------
 samples/qtask_unordered.c | 419 +++++++++++++++++++-------------------
 4 files changed, 435 insertions(+), 377 deletions(-)

diff --git a/samples/DEMO.md b/samples/DEMO.md
index f2a281445..0344b0f45 100644
--- a/samples/DEMO.md
+++ b/samples/DEMO.md
@@ -766,7 +766,9 @@ The array aux fields can be updated using bam_aux_update_array api.
 Refer: mod_aux_ba.c
 
 Shows the records updated with an array of integers, containing count of ACGT
-and N in that order.
+and N in that order. The bases are decoded before count for the sake of
+simplicity. Refer qtask_ordered.c for a better counting where decoding is made
+outside the loop.
 
     ./mod_aux_ba samtools/test/mpileup/mpileup.1.bam
 
@@ -1386,22 +1388,21 @@ Refer: flags_htsopt_field.c
 The HTSLib api supports thread pooling for better performance. There are a few
 ways in which this can be used. The pool can be made specific for a file or a
 generic pool can be created and shared across multiple files. Thread pool can
-also be used to execute user defined tasks. The tasks are to be queued on a
-queue and threads in pool executes them and results can be queued back if
-required.
-
-The samples are trivial ones to showcase the usage of api and the number of
-threads to use has to be identified based on complexity and parallelism of the
-job.
+also be used to execute user defined tasks. The tasks are to be added to queue,
+threads in pool executes them and results can be queued back if required.
 
 To have a thread pool specific for a file, hts_set_opt api can be used with the
-file pointer, HTS_OPT_NTHREADS and the number of threads to use in the pool.
-Closure of file releases the thread pool as well. To have a thread pool which
-can be shared across different files, it needs to be initialized using
-hts_tpool_init api, passing number of threads as argument. This thread pool can
-be associated with a file using hts_set_opt api. The file pointer,
-HTS_OPT_THREAD_POOL and the thread pool address are to be passed as arguments
-to api. The thread pool has to be released with hts_tpool_destroy.
+file pointer, HTS_OPT_NTHREADS and the number of threads to be in the pool.
+Thread pool is released on closure of file. To have a thread pool which can be
+shared across different files, it needs to be initialized using hts_tpool_init
+api, passing number of threads as an argument. This thread pool can be
+associated with a file using hts_set_opt api. The file pointer,
+HTS_OPT_THREAD_POOL and the thread pool address are to be passed as arguments to
+the api. The thread pool has to be released with hts_tpool_destroy.
+
+The samples are trivial ones to showcase the usage of api. The number of threads
+to use for different tasks has to be identified based on complexity and
+parallelism of the task.
 
 Below excerpt shows file specific thread pool,
 
@@ -1444,9 +1445,9 @@ The order of execution of threads are decided based on many factors and load on
 each task may vary, so the completion of the tasks may not be in the order of
 their queueing. The queues can be used in two different ways, one where the
 result is enqueued to queue again to be read in same order as initial queueing,
-two where the resuls is not enqueued and is readily available, possibly in a
-different order than initial queueing. Explicitly created threads can also be
-used along with hts thread pool usage.
+second where the resuls are not enqueued and completed possibly in a different
+order than initial queueing. Explicitly created threads can also be used along
+with hts thread pool usage.
 
 hts_tpool_process_init initializes the queue / process, associates a queue with
 thread pool and reserves space for given number of tasks on queue. It takes a
@@ -1464,124 +1465,156 @@ all the piled up tasks are processed, a possible case when the queueing and
 processing happen at different speeds. hts_tpool_process_shutdown api stops the
 processing of queue.
 
+There are a few apis which let the user to check the status of processing. The
+api hts_tpool_process_empty shows whether all the tasks are completed or not.
+The api hts_tpool_process_sz gives the number of tasks, at different states of
+processing. The api hts_tpool_process_len gives the number of results in output
+queue waiting to be collected.
+
 The order of execution of tasks depends on the number of threads involved and
 how the threads are scheduled by operating system. When the results are enqueued
 back to queue, they are read in same order of enqueueing of task and in that
 case the order of execution will not be noticed. When the results are not
 enqueued the results are available right away and the order of execution may be
-noticeable. Based on the nature of task and need of order maintenance, users
+noticeable. Based on the nature of task and the need of order maintenance, users
 can select either of the queueing.
 
 Below excerpts shows the usage of queues and threads in both cases. In the 1st,
 alignments are updated with an aux tag indicating GC ratio. The order of data
 has to be maintained even after update, hence the result queueing is used to
-ensure same order as initial. A number of alignments are bunched together for
-optimal processing.
+ensure same order as initial. A number of alignments are bunched together and
+reuse of allocated memory is made to make it perform better. A sentinel job is
+used to identify the completion of all tasks at the result collection side.
     ...
     void *thread_ordered_proc(void *args)
     {
     ...
         for ( i = 0; i < bamdata->count; ++i) {
-            //add count
-            if (addcount(bamdata->bamarray[i]) < 0) {
     ...
-        return bamdata;
+            for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) {
+                count[bam_seqi(data,pos)]++;
+    ...
+            gcratio = (count[2] /*C*/ + count[4] /*G*/) / (float) (count[1] /*A*/ + count[8] /*T*/ + count[2] + count[4]);
+
+            if (bam_aux_append(bamdata->bamarray[i], "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
     ...
     void *threadfn_orderedwrite(void *args)
     {
-        ...
-        do {
-            //get result and write; wait if no result is in queue - until shutdown of queue
-            while ((r = hts_tpool_next_result(tdata->queue))) {
-                bamdata = (data*) hts_tpool_result_data(r);
-                for (i = 0; i < bamdata->count; ++i) {
-                    if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) {
-        ...
-                hts_tpool_delete_result(r, 0);          //release the result memory
-        ..
-        } while (pthread_cond_timedwait(tdata->done, tdata->lock, &timeout) == ETIMEDOUT);
+    ...
+        //get result and write; wait if no result is in queue - until shutdown of queue
+        while (tdata->result == 0 &&
+            (r = hts_tpool_next_result_wait(tdata->queue)) != NULL) {
+            bamdata = (data*) hts_tpool_result_data(r);
+    ...
+            for (i = 0; i < bamdata->count; ++i) {
+                if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) {
+    ...
+            hts_tpool_delete_result(r, 0);              //release the result memory
+
+        // Shut down the process queue.  If we stopped early due to a write failure,
+        // this will signal to the other end that something has gone wrong.
+        hts_tpool_process_shutdown(tdata->queue);
     ...
     int main(int argc, char *argv[])
     {
     ...
         if (!(pool = hts_tpool_init(cnt))) {                //thread pool
     ...
+        tpool.pool = pool;      //to share the pool for file read and write as well
         //queue to use with thread pool, for task and results
         if (!(queue = hts_tpool_process_init(pool, cnt * 2, 0))) {
     ...
-        if (!(infile = sam_open(inname, "r"))) {
-    ...
-        if (!(outfile = sam_open(file, "w"))) {
+        //share the thread pool with i/o files
+        if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 ||
+            hts_set_opt(outfile, HTS_OPT_THREAD_POOL, &tpool) < 0) {
     ...
-        //start output writer thread for ordered processing
         if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
     ...
         while (c >= 0) {
-            bamdata = getbamstorage(chunk);
-            //read alignments, upto max size for this lot
-            for (cnt = 0; cnt < bamdata->size; ++cnt) {
+            if (!(bamdata = getbamstorage(chunk, &bamcache))) {
+    ...
+            for (cnt = 0; cnt < bamdata->maxsize; ++cnt) {
+                c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
+    ...
+                if (hts_tpool_dispatch3(pool, queue, thread_ordered_proc, bamdata,
+                                        cleanup_bamstorage, cleanup_bamstorage,
+                                        0) == -1) {
     ...
-                //queue the lot for processing
+        if (queue) {
+            if (-1 == c) {
+                // EOF read, send a marker to tell the threadfn_orderedwrite()
+                // function to shut down.
                 if (hts_tpool_dispatch(pool, queue, thread_ordered_proc,
-                    bamdata) == -1) {
+                                    NULL) == -1) {
     ...
-        if (-1 == c) {
-            //EOF read, trigger processing for anything pending, NOTE: will be blocked until queue is cleared
-            if (hts_tpool_process_flush(queue) == -1) {
+                hts_tpool_process_shutdown(queue);
     ...
-        usleep(WAIT * 1000000);                     //to ensure result check is done atleast once after processing
-        //trigger exit for ordered write thread
-        pthread_cond_signal(twritedata.done);
+        // Wait for threadfn_orderedwrite to finish.
+        if (started_thread) {
+            pthread_join(thread, NULL);
     ...
-        //shutdown queues to exit the result wait
-        hts_tpool_process_shutdown(queue);
+        if (queue) {
+            // Once threadfn_orderedwrite has stopped, the queue can be
+            // cleaned up.
+            hts_tpool_process_destroy(queue);
+        }
     ...
 Refer: qtask_ordered.c
 
-In this 2nd, the alignments from input are split to different files containing
-alignment specific to given regions. The order in which the output files are
-created doesnt matter and hence no result queueing is used.
+In this 2nd, the bases are counted and GC ratio of whole file is calculated.
+Order in which bases are counted is not relevant and no result queue required.
+The queue is created as input only.
     ...
-    void * splittoregions(void *args)
+    void *thread_unordered_proc(void *args)
     {
     ...
-        if (!(infile = sam_open(data->infile, "r"))) {
-    ...
-        if (!(in_samhdr = sam_hdr_read(infile))) {
-    ...
-        if (!(bamdata = bam_init1())) {
-    ...
-        if (!(idx = sam_index_load(infile, data->infile))) {
-    ...
-        if (!(iter = sam_itr_querys(idx, in_samhdr, region))) {
-    ...
-        if (!(outfile = sam_open(file, "w"))) {
+        for ( i = 0; i < bamdata->count; ++i) {
+            data = bam_get_seq(bamdata->bamarray[i]);
+            for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) {
     ...
-        if (sam_hdr_write(outfile, in_samhdr) < 0) {
+                counts[bam_seqi(data, pos)]++;
     ...
-        while ((ret = sam_itr_next(infile, iter, bamdata)) >= 0) {  //read and write relevant data
-            if (sam_write1(outfile, in_samhdr, bamdata) < 0) {
+        //update result and add the memory block for reuse
+        pthread_mutex_lock(&bamdata->cache->lock);
+        for (i = 0; i < 16; i++) {
+            bamdata->bases->counts[i] += counts[i];
+        }
+
+        bamdata->next = bamdata->cache->list;
+        bamdata->cache->list = bamdata;
+        pthread_mutex_unlock(&bamdata->cache->lock);
     ...
     int main(int argc, char *argv[])
     {
-    ...
-        //get regions from bedfile
-        if ((regcnt = readbedfile(bedfile, &regions)) < 0) {
-    ...
-        if (!(pool = hts_tpool_init(cnt))) {                //thread pool
     ...
         if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1))) {
     ...
-        for (c = 0; c < regcnt; ++c) {
+        c = 0;
+        while (c >= 0) {
+    ...
+            for (cnt = 0; cnt < bamdata->maxsize; ++cnt) {
+                c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
     ...
-            //schedule jobs to run in parallel
-            if (hts_tpool_dispatch(pool, queue, splittoregions, task) < 0) {
+            if (c >= -1 ) {
     ...
-        //trigger processing for anything pending, NOTE: will be blocked until queue is cleared
-        if (hts_tpool_process_flush(queue) == -1) {
+                if (hts_tpool_dispatch3(pool, queue, thread_unordered_proc, bamdata,
+                                        cleanup_bamstorage, cleanup_bamstorage,
+                                        0) == -1) {
     ...
-        //shutdown queues to exit the result wait
-        hts_tpool_process_shutdown(queue);
+        if (-1 == c) {
+            // EOF read, ensure all are processed, waits for all to finish
+            if (hts_tpool_process_flush(queue) == -1) {
+                fprintf(stderr, "Failed to flush queue\n");
+            } else { //all done
+                //refer seq_nt16_str to find position of required bases
+                fprintf(stdout, "GCratio: %f\nBase counts:\n",
+                    (gccount.counts[2] /*C*/ + gccount.counts[4] /*G*/) / (float)
+                        (gccount.counts[1] /*A*/ + gccount.counts[8] /*T*/ +
+                            gccount.counts[2] + gccount.counts[4]));
+    ...
+        if (queue) {
+            hts_tpool_process_destroy(queue);
+        }
     ...
 Refer: qtask_unordered.c
 
diff --git a/samples/README.md b/samples/README.md
index 6ede74007..c68354329 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -209,9 +209,8 @@ indexed.
 [Qtask_unordered][Qtask_unordered]
 
   This application showcases the use of queues and threads for custom
-  processing. In this, alignments in input files are split to different files
-  for requested regions. The processing may occur in any order and results may
-  appear in the order completion of processing.
+  processing. The count of bases and GC ratio are calculated and displayed.
+  The order of counting is irrelevant and hence ordered retrieval is not used.
 
 ### More Information
 
diff --git a/samples/qtask_ordered.c b/samples/qtask_ordered.c
index 731587e97..a76d59826 100644
--- a/samples/qtask_ordered.c
+++ b/samples/qtask_ordered.c
@@ -33,19 +33,27 @@ DEALINGS IN THE SOFTWARE
 #include <htslib/sam.h>
 #include <htslib/thread_pool.h>
 
+typedef struct data {
+    int count;                  //used up size
+    int maxsize;                //max size per data chunk
+    bam1_t **bamarray;          //bam1_t array for optimal queueing
+    struct data *next;          //pointer to next one - to reuse earlier allocations
+} data;
+
+typedef struct datacache
+{
+    pthread_mutex_t lock;       //synchronizes the access to cache
+    data *list;                 //data storage
+} datacache;
+
 typedef struct orderedwrite {
     samFile *outfile;           //output file handle
     sam_hdr_t *samhdr;          //header used to write data
     hts_tpool_process *queue;   //queue from which results to be retrieved
+    datacache *cache;           //to re-use allocated storage
     int result;                 //result code returned by writer thread
 } orderedwrite;
 
-typedef struct data {
-    int count;                  //used up size
-    int size;                   //max size
-    bam1_t **bamarray;          //bam1_t array for optimal queueing
-} data;
-
 /// print_usage - print the usage
 /** @param fp pointer to the file / terminal to which usage to be dumped
 returns nothing
@@ -53,78 +61,71 @@ returns nothing
 static void print_usage(FILE *fp)
 {
     fprintf(fp, "Usage: qtask_ordered infile threadcount outdir [chunksize]\n\
-Calculates GC ratio - sum(G,C) / sum(A,T,C,G,N) - and adds to each alignment\n\
+Calculates GC ratio - sum(G,C) / sum(A,T,C,G) - and adds to each alignment\n\
 as xr:f aux tag. Output is saved in outdir.\n\
-chunksize [100] sets the number of alignments clubbed together to process.\n");
+chunksize [4096] sets the number of alignments clubbed together to process.\n");
     return;
 }
 
-/// addcount - calculates and adds the aux tag
-/** @param bamdata pointer to bam data
-returns 0 on success and -1 for failure
-*/
-int addcount(bam1_t *bamdata)
-{
-    int pos = 0, gc = 0, ret = 0;
-    float gcratio = 0;
-    uint8_t *data = bam_get_seq(bamdata);
-    for (pos = 0; pos < bamdata->core.l_qseq; ++pos) {
-        switch(seq_nt16_str[bam_seqi(data, pos)]) {
-            case 'G':   //fall through
-            case 'C':
-                gc++;
-            break;
-        }
-    }
-    gcratio = gc / (float) bamdata->core.l_qseq;
-
-    if (bam_aux_append(bamdata, "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
-        fprintf(stderr, "Failed to add aux tag xr, errno: %d\n", errno);
-        ret = -1;
-    }
-    return ret;
-}
-
-
 /// getbamstorage - allocates storage for alignments to queue
-/** @param chunk no of alignments to be queued together
-returns allocated data.
+/** @param chunk number of bam data to allocate
+ * @param bamcache cached storage
+returns already allocated data storage if one is available, otherwise allocates new
 */
-data* getbamstorage(int chunk)
+data* getbamstorage(int chunk, datacache *bamcache)
 {
     int i = 0;
-    data *bamdata = malloc(sizeof(data));
-    if (!bamdata) {
+    data *bamdata = NULL;
+
+    if (!bamcache) {
         return NULL;
     }
+    //get from cache if there is an already allocated storage
+    if (pthread_mutex_lock(&bamcache->lock)) {
+        return NULL;
+    }
+    if (bamcache->list) {                   //available
+        bamdata = bamcache->list;
+        bamcache->list = bamdata->next;     //remove and set next one as available
+        bamdata->next = NULL;               //remove link
+        bamdata->count = 0;
+        goto end;
+    }
+    //allocate and use
+    if (!(bamdata = malloc(sizeof(data)))) {
+        goto end;
+    }
     bamdata->bamarray = malloc(chunk * sizeof(bam1_t*));
     if (!bamdata->bamarray) {
-        return NULL;
+        free(bamdata);
+        bamdata = NULL;
+        goto end;
     }
     for (i = 0; i < chunk; ++i) {
         bamdata->bamarray[i] = bam_init1();
     }
+    bamdata->maxsize = chunk;
     bamdata->count = 0;
-    bamdata->size = chunk;
+    bamdata->next = NULL;
 
+end:
+    pthread_mutex_unlock(&bamcache->lock);
     return bamdata;
 }
 
 /// cleanup_bamstorage - frees a bamdata struct plus contents
 /** @param arg Pointer to data to free
-
     @p arg has type void * so it can be used as a callback passed
     to hts_tpool_dispatch3().
  */
 void cleanup_bamstorage(void *arg)
 {
     data *bamdata = (data *) arg;
-
     if (!bamdata)
         return;
     if (bamdata->bamarray) {
         int i;
-        for (i = 0; i < bamdata->size; i++) {
+        for (i = 0; i < bamdata->maxsize; i++) {
             bam_destroy1(bamdata->bamarray[i]);
         }
         free(bamdata->bamarray);
@@ -137,19 +138,31 @@ void cleanup_bamstorage(void *arg)
 returns the processed data
 the processing could be in any order based on the number of threads in use but read of output
 from queue will be in order
+a null data indicates the end of input and a null is returned to be added back to result queue
 */
 void *thread_ordered_proc(void *args)
 {
-    int i = 0;
+    int i = 0, pos = 0;
     data *bamdata = (data*)args;
+    float gcratio = 0;
+    uint8_t *data = NULL;
 
     if (bamdata == NULL)
         return NULL; // Indicates no more input
 
     for ( i = 0; i < bamdata->count; ++i) {
         //add count
-        if (addcount(bamdata->bamarray[i]) < 0) {
-            fprintf(stderr, "Failed to calculate gc data\n");
+        uint64_t count[16] = {0};
+        data = bam_get_seq(bamdata->bamarray[i]);
+        for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) {
+            count[bam_seqi(data,pos)]++;
+        }
+        /*it is faster to count all and use offset to get required counts rather than select
+        require ones inside the loop*/
+        gcratio = (count[2] /*C*/ + count[4] /*G*/) / (float) (count[1] /*A*/ + count[8] /*T*/ + count[2] + count[4]);
+
+        if (bam_aux_append(bamdata->bamarray[i], "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
+            fprintf(stderr, "Failed to add aux tag xr, errno: %d\n", errno);
             break;
         }
     }
@@ -166,11 +179,6 @@ void *threadfn_orderedwrite(void *args)
     hts_tpool_result *r = NULL;
     data *bamdata = NULL;
     int i = 0;
-    int count = 0;
-
-    struct timeval now;
-    struct timespec timeout;
-    long usec = 0;
 
     tdata->result = 0;
 
@@ -192,8 +200,12 @@ void *threadfn_orderedwrite(void *args)
                 break;
             }
         }
-        hts_tpool_delete_result(r, 0);          //release the result memory
-        cleanup_bamstorage(bamdata);
+        hts_tpool_delete_result(r, 0);              //release the result memory
+
+        pthread_mutex_lock(&tdata->cache->lock);
+        bamdata->next = tdata->cache->list;         //make current list as next
+        tdata->cache->list = bamdata;               //set as current to reuse
+        pthread_mutex_unlock(&tdata->cache->lock);
     }
 
     // Shut down the process queue.  If we stopped early due to a write failure,
@@ -222,6 +234,7 @@ int main(int argc, char *argv[])
     hts_tpool_process *queue = NULL;
     htsThreadPool tpool = {NULL, 0};
     data *bamdata = NULL;
+    datacache bamcache = {PTHREAD_MUTEX_INITIALIZER, NULL};
 
     //qtask infile threadcount outdir [chunksize]
     if (argc != 4 && argc != 5) {
@@ -231,25 +244,23 @@ int main(int argc, char *argv[])
     inname = argv[1];
     cnt = atoi(argv[2]);
     outdir = argv[3];
-    if (argc == 5) {
+    if (argc == 5) {    //chunk size present
         chunk = atoi(argv[4]);
     }
-
-    if (cnt < 1) {
+    if (cnt < 1) {      //set proper thread count
         cnt = 1;
     }
-    if (chunk < 1) {
-        chunk = 10000;
+    if (chunk < 1) {    //set valid  chunk size
+        chunk = 4096;
     }
 
     //allocate space for output
-    size = (strlen(outdir) + sizeof("/out.sam") + 1); //space for output file name and null termination
-    file = malloc(size);
-    if (!file) {
+    size = (strlen(outdir) + sizeof("/out.bam") + 1);   //space for output file name and null termination
+    if (!(file = malloc(size))) {
         fprintf(stderr, "Failed to set output path\n");
         goto end;
     }
-    snprintf(file, size, "%s/out.sam", outdir);         //output file name
+    snprintf(file, size, "%s/out.bam", outdir);         //output file name
     if (!(pool = hts_tpool_init(cnt))) {                //thread pool
         fprintf(stderr, "Failed to create thread pool\n");
         goto end;
@@ -266,7 +277,7 @@ int main(int argc, char *argv[])
         goto end;
     }
     //open output files - w write as SAM, wb  write as BAM
-    if (!(outfile = sam_open(file, "w"))) {
+    if (!(outfile = sam_open(file, "wb"))) {
         fprintf(stderr, "Could not open output file\n");
         goto end;
     }
@@ -281,21 +292,21 @@ int main(int argc, char *argv[])
         fprintf(stderr, "Failed to read header from file!\n");
         goto end;
     }
-
     //write header
     if ((sam_hdr_write(outfile, in_samhdr) == -1)) {
         fprintf(stderr, "Failed to write header\n");
         goto end;
     }
 
-    /*tasks are queued, worker threads get them and process in parallel;
+    /* tasks are queued, worker threads get them and process in parallel;
     the results are queued and they are to be removed in parallel as well */
 
     // start output writer thread for ordered processing
     twritedata.outfile = outfile;
-    twritedata.queue = queue;
-    twritedata.samhdr = in_samhdr;
-    twritedata.result = 0;
+    twritedata.samhdr  = in_samhdr;
+    twritedata.result  = 0;
+    twritedata.queue   = queue;
+    twritedata.cache   = &bamcache;
     if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
         fprintf(stderr, "Failed to create writer thread\n");
         goto end;
@@ -304,9 +315,12 @@ int main(int argc, char *argv[])
 
     c = 0;
     while (c >= 0) {
-        bamdata = getbamstorage(chunk);
+        if (!(bamdata = getbamstorage(chunk, &bamcache))) {
+            fprintf(stderr, "Failed to allocate memory\n");
+            break;
+        }
         //read alignments, upto max size for this lot
-        for (cnt = 0; cnt < bamdata->size; ++cnt) {
+        for (cnt = 0; cnt < bamdata->maxsize; ++cnt) {
             c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
             if (c < 0) {
                 break;      // EOF or failure
@@ -325,8 +339,7 @@ int main(int argc, char *argv[])
                 goto end;
             }
             bamdata = NULL;
-        }
-        else {
+        } else {
             fprintf(stderr, "Error in reading data\n");
             break;
         }
@@ -351,14 +364,7 @@ int main(int argc, char *argv[])
             // function to shut down.
             if (hts_tpool_dispatch(pool, queue, thread_ordered_proc,
                                    NULL) == -1) {
-                fprintf(stderr, "Failed to schedule processing\n");
-                ret = EXIT_FAILURE;
-            }
-
-            // trigger processing for anything pending
-            // NOTE: will be blocked until queue is cleared
-            if (hts_tpool_process_flush(queue) == -1) {
-                fprintf(stderr, "Failed to flush queues\n");
+                fprintf(stderr, "Failed to schedule sentinel job\n");
                 ret = EXIT_FAILURE;
             }
         } else {
@@ -373,8 +379,9 @@ int main(int argc, char *argv[])
         pthread_join(thread, NULL);
 
         // Once the writer thread has finished, check the result it sent back
-        if (twritedata.result != 0)
+        if (twritedata.result != 0) {
             ret = EXIT_FAILURE;
+        }
     }
 
     if (queue) {
@@ -387,17 +394,27 @@ int main(int argc, char *argv[])
         sam_hdr_destroy(in_samhdr);
     }
     if (infile) {
-        if (sam_close(infile) != 0)
+        if (sam_close(infile) != 0) {
             ret = EXIT_FAILURE;
+        }
     }
     if (outfile) {
-        if (sam_close(outfile) != 0)
+        if (sam_close(outfile) != 0) {
             ret = EXIT_FAILURE;
+        }
     }
 
-    if (bamdata) {
-        cleanup_bamstorage(bamdata);
+    pthread_mutex_lock(&bamcache.lock);
+    if (bamcache.list) {
+        struct data *tmp = NULL;
+        while (bamcache.list) {
+            tmp = bamcache.list;
+            bamcache.list = bamcache.list->next;
+            cleanup_bamstorage(tmp);
+        }
     }
+    pthread_mutex_unlock(&bamcache.lock);
+
     if (file) {
         free(file);
     }
diff --git a/samples/qtask_unordered.c b/samples/qtask_unordered.c
index 7e659fd3a..05fe50346 100644
--- a/samples/qtask_unordered.c
+++ b/samples/qtask_unordered.c
@@ -1,4 +1,4 @@
-/*  qtask_unordered.c --  showcases the htslib api usage
+/*  qtask_ordered.c --  showcases the htslib api usage
 
     Copyright (C) 2024 Genome Research Ltd.
 
@@ -30,21 +30,30 @@ DEALINGS IN THE SOFTWARE
 #include <unistd.h>
 #include <pthread.h>
 #include <sys/time.h>
-#include <htslib/hfile.h>
 #include <htslib/sam.h>
 #include <htslib/thread_pool.h>
 
-typedef struct beddata {
-    char *name;                 //chromosome name
-    hts_pos_t start;            //start position
-    hts_pos_t end;              //end position
-} beddata;
+struct datacache;
 
-typedef struct splitdata {
-    beddata *region;            //region information
-    const char *infile;         //input file
-    const char *outdir;         //output path
-} splitdata;
+typedef struct basecount {
+    uint64_t counts[16];        //count of all bases
+} basecount;
+
+typedef struct data {
+    int count;                  //used up size
+    int maxsize;                //max size per data chunk
+    bam1_t **bamarray;          //bam1_t array for optimal queueing
+
+    struct datacache *cache;
+    basecount *bases;           //count of all possible bases
+    struct data *next;          //pointer to next one - to reuse earlier allocations
+} data;
+
+typedef struct datacache
+{
+    pthread_mutex_t lock;       //synchronizes the access to cache
+    data *list;                 //data storage
+} datacache;
 
 /// print_usage - print the usage
 /** @param fp pointer to the file / terminal to which usage to be dumped
@@ -52,258 +61,258 @@ returns nothing
 */
 static void print_usage(FILE *fp)
 {
-    fprintf(fp, "Usage: qtask_unordered infile threadcount outdir bedfile\n\
-Splits the input to files specific for regions given in bedfile. Output is\n\
-saved in outdir. Expects the index file to be present along with infile.\n");
+    fprintf(fp, "Usage: qtask_unordered infile threadcount [chunksize]\n\
+Shows the base counts and calculates GC ratio - sum(G,C) / sum(A,T,C,G)\n\
+chunksize [4096] sets the number of alignments clubbed together to process.\n");
     return;
 }
 
-/// readbedfile - read the bedfile and return region array
-/** @param file bed file name
- *  @param data - output, pointer to array of beddata data, to hold bed regions
-returns number of regions in data
+/// getbamstorage - allocates storage for alignments to queue
+/** @param chunk number of bam data to allocate
+ * @param bases storage of result
+ * @param bamcache cached storage
+returns already allocated data storage if one is available, otherwise allocates new
 */
-int readbedfile(const char *file, struct beddata **data)
+data* getbamstorage(int chunk, basecount *bases, datacache *bamcache)
 {
-    int ret = -1, cnt = 0 , max = 0;
-    kstring_t line = KS_INITIALIZE;
-    htsFile *bedfile = NULL;
-    char *sptr = NULL, *token = NULL;
-    const char *sep ="\t";
-    beddata *bedtoks = NULL;
-
-    if (!data || *data) {
-        printf("Invalid argument\n");
-        goto fail;
-    }
-    if (!(bedfile = hts_open(file, "r"))) {
-        printf("Failed to open bedfile\n");
-        goto fail;
-    }
-    //get lines one by one and get region details
-    while (!(ret = kgetline(&line, (kgets_func*)hgets, bedfile->fp.hfile))) {
-        if (!line.l) {
-            continue;               //skip empty line
-        }
-        if  (line.s[0] == '#' || !strncmp("track ", line.s, sizeof("track ") - 1)) {
-            ks_clear(&line);
-            continue;               //ignore track lines and comments
-        }
-        token = strtok_r(line.s, sep, &sptr);
-        if (token) {                //allocate memory for regions
-            if ((cnt+1) > max) {
-                max += 100;         //for another 100 regions
-                bedtoks = realloc(bedtoks, sizeof(beddata) * max);
-            }
-        }
-        else {
-            break;
-        }
-        bedtoks[cnt].name = strdup(token);  //chromosome name
-        token = strtok_r(NULL, sep, &sptr);
-        if (!token) {
-            break;
-        }                                   //start position
-        bedtoks[cnt].start = token ? atoll(token) : 0;
-        token = strtok_r(NULL, sep, &sptr);
-        if (!token) {
-            break;
-        }                                   //end position
-        bedtoks[cnt++].end = token ? atoll(token) : 0;
-        ks_clear(&line);
-    }
-    if (ret != EOF) {
-        goto fail;
+    int i = 0;
+    data *bamdata = NULL;
+
+    if (!bamcache || !bases) {
+        return NULL;
+    }
+    //get from cache if there is an already allocated storage
+    if (pthread_mutex_lock(&bamcache->lock)) {
+        return NULL;
+    }
+    if (bamcache->list) {                   //available
+        bamdata = bamcache->list;
+        bamcache->list = bamdata->next;     //remove and set next one as available
+        bamdata->next = NULL;               //remove link
+        bamdata->count = 0;
+
+        bamdata->bases = bases;
+        bamdata->cache = bamcache;
+        goto end;
     }
-    ret = cnt;
-    if (bedfile) {
-        hts_close(bedfile);
+    //allocate and use
+    if (!(bamdata = malloc(sizeof(data)))) {
+        goto end;
     }
-    ks_free(&line);
-    if (!cnt) {
-        goto fail;
+    bamdata->bamarray = malloc(chunk * sizeof(bam1_t*));
+    if (!bamdata->bamarray) {
+        free(bamdata);
+        bamdata = NULL;
+        goto end;
     }
-    *data = bedtoks;
-    return ret;
-fail:
-    if (bedfile) {
-        hts_close(bedfile);
+    for (i = 0; i < chunk; ++i) {
+        bamdata->bamarray[i] = bam_init1();
     }
-    ks_free(&line);
-    if (bedtoks) {
-        for (max = cnt, cnt = 0; cnt < max; ++cnt) {
-            free(bedtoks[cnt].name);
+    bamdata->maxsize = chunk;
+    bamdata->count = 0;
+    bamdata->next = NULL;
+
+    bamdata->bases = bases;
+    bamdata->cache = bamcache;
+
+end:
+    pthread_mutex_unlock(&bamcache->lock);
+    return bamdata;
+}
+
+/// cleanup_bamstorage - frees a bamdata struct plus contents
+/** @param arg Pointer to data to free
+    @p arg has type void * so it can be used as a callback passed
+    to hts_tpool_dispatch3().
+ */
+void cleanup_bamstorage(void *arg)
+{
+    data *bamdata = (data *) arg;
+    if (!bamdata)
+        return;
+    if (bamdata->bamarray) {
+        int i;
+        for (i = 0; i < bamdata->maxsize; i++) {
+            bam_destroy1(bamdata->bamarray[i]);
         }
-        free(bedtoks);
+        free(bamdata->bamarray);
     }
-    bedtoks = NULL;
-    return 0;
+    free(bamdata);
 }
 
-/// splittoregions - saves the relevant data to separate file
+/// thread_unordered_proc - does the processing of task in queue and updates result
 /** @param args pointer to set of data to be processed
 returns NULL
 the processing could be in any order based on the number of threads in use
 */
-void * splittoregions(void *args)
+void *thread_unordered_proc(void *args)
 {
-    samFile *infile = NULL, *outfile = NULL;
-    sam_hdr_t *in_samhdr = NULL;
-    bam1_t *bamdata = NULL;
-    hts_itr_t *iter = NULL;
-    hts_idx_t *idx = NULL;
-    splitdata *data = (splitdata*)args;
-    char *file = NULL, *region = NULL;
-    int size = 0, ret = 0;
-    if (!(infile = sam_open(data->infile, "r"))) {
-        printf("Failed to open input file\n");
-        goto end;
-    }
-    if (!(in_samhdr = sam_hdr_read(infile))) {
-        printf("Failed to read header data\n");
-        goto end;
-    }
-    if (!(bamdata = bam_init1())) {
-        printf("Failed to initialize bamdata\n");
-        goto end;
-    }
-    size = strlen(data->region->name) + 50;         //region specification
-    if (!(region = malloc(size))) {
-        printf("Failed to allocate memory\n");
-        goto end;
-    }
-    snprintf(region, size, "%s:%"PRIhts_pos"-%"PRIhts_pos, data->region->name, data->region->start, data->region->end);
-    size += strlen(data->outdir);                   //output file with path
-    if (!(file = malloc(size))) {
-        printf("Failed to allocate memory\n");
-        goto end;
-    }
-    snprintf(file, size, "%s/%s_%"PRIhts_pos"_%"PRIhts_pos".sam", data->outdir, data->region->name, data->region->start, data->region->end);
-    if (!(idx = sam_index_load(infile, data->infile))) {
-        printf("Failed to load index\n");
-        goto end;
-    }
-    if (!(iter = sam_itr_querys(idx, in_samhdr, region))) {
-        printf("Failed to create iterator\n");
-        goto end;
-    }
-    if (!(outfile = sam_open(file, "w"))) {
-        printf("Failed to open output file\n");
-        goto end;
-    }
-    if (sam_hdr_write(outfile, in_samhdr) < 0) {
-        printf("Failed to write header\n");
-        goto end;
-    }
-    while ((ret = sam_itr_next(infile, iter, bamdata)) >= 0) {  //read and write relevant data
-        if (sam_write1(outfile, in_samhdr, bamdata) < 0) {
-            printf("Failed to write data\n");
-            goto end;
+    int i = 0;
+    data *bamdata = (data*)args;
+    uint64_t pos = 0;
+    uint8_t *data = NULL;
+    uint64_t counts[16] = {0};
+    for ( i = 0; i < bamdata->count; ++i) {
+        data = bam_get_seq(bamdata->bamarray[i]);
+        for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) {
+            /* it is faster to count all bases and select required ones later
+            compared to select and count here */
+            counts[bam_seqi(data, pos)]++;
         }
     }
-    if (ret != -1) {
-        printf("Failed to get all data\n");
+    //update result and add the memory block for reuse
+    pthread_mutex_lock(&bamdata->cache->lock);
+    for (i = 0; i < 16; i++) {
+        bamdata->bases->counts[i] += counts[i];
     }
 
-end:
-    free(data);
-    if (infile) {
-        sam_close(infile);
-    }
-    if (outfile) {
-        sam_close(outfile);
-    }
-    if (iter) {
-        sam_itr_destroy(iter);
-    }
-    if (idx) {
-        hts_idx_destroy(idx);
-    }
-    if (bamdata) {
-        bam_destroy1(bamdata);
-    }
-    if (in_samhdr) {
-        sam_hdr_destroy(in_samhdr);
-    }
-    if (file) {
-        free(file);
-    }
-    if (region) {
-        free(region);
-    }
+    bamdata->next = bamdata->cache->list;
+    bamdata->cache->list = bamdata;
+    pthread_mutex_unlock(&bamdata->cache->lock);
+
     return NULL;
 }
 
-/// main - splits the data to region specific files
+/// main - start of the demo
 /** @param argc - count of arguments
  *  @param argv - pointer to array of arguments
 returns 1 on failure 0 on success
 */
 int main(int argc, char *argv[])
 {
-    const char *inname = NULL, *outdir = NULL, *bedfile = NULL;
-    int c = 0, ret = EXIT_FAILURE, cnt = 0, regcnt = 0;
+    const char *inname = NULL;
+    int c = 0, ret = EXIT_FAILURE, cnt = 0, chunk = 0;
+    samFile *infile = NULL;
+    sam_hdr_t *in_samhdr = NULL;
     hts_tpool *pool = NULL;
     hts_tpool_process *queue = NULL;
-    beddata *regions = NULL;
+    htsThreadPool tpool = {NULL, 0};
+    data *bamdata = NULL;
+    basecount gccount = {{0}};
+    datacache bamcache = {PTHREAD_MUTEX_INITIALIZER, NULL};
 
-    //qtask infile threadcount outdir [chunksize]
-    if (argc != 5) {
+    //qtask infile threadcount [chunksize]
+    if (argc != 3 && argc != 4) {
         print_usage(stdout);
         goto end;
     }
     inname = argv[1];
     cnt = atoi(argv[2]);
-    outdir = argv[3];
-    bedfile = argv[4];
-    //get regions from bedfile
-    if ((regcnt = readbedfile(bedfile, &regions)) <= 0) {
-        printf("Failed to get bed data\n");
-        goto end;
+    if (argc == 4) {
+        chunk = atoi(argv[3]);
     }
     if (cnt < 1) {
         cnt = 1;
     }
-    if (!(pool = hts_tpool_init(cnt))) {                //thread pool
-        printf("Failed to create thread pool\n");
+    if (chunk < 1) {
+        chunk = 4096;
+    }
+
+    if (!(pool = hts_tpool_init(cnt))) {
+        fprintf(stderr, "Failed to create thread pool\n");
         goto end;
     }
+    tpool.pool = pool;      //to share the pool for file read and write as well
     //queue to use with thread pool, for tasks
     if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1))) {
-        printf("Failed to create queue\n");
+        fprintf(stderr, "Failed to create queue\n");
         goto end;
     }
-    for (c = 0; c < regcnt; ++c) {
-        struct splitdata *task = malloc(sizeof(splitdata));
-        task->infile = inname;
-        task->outdir = outdir;
-        task->region = regions + c;
-        //schedule jobs to run in parallel
-        if (hts_tpool_dispatch(pool, queue, splittoregions, task) < 0) {
-            printf("Failed to schedule processing\n");
-            goto end;
-        }
+    //open input file - r reading
+    if (!(infile = sam_open(inname, "r"))) {
+        fprintf(stderr, "Could not open %s\n", inname);
+        goto end;
     }
-    //trigger processing for anything pending, NOTE: will be blocked until queue is cleared
-    if (hts_tpool_process_flush(queue) == -1) {
-        printf("Failed to flush queues\n");
+    //share the thread pool with i/o files
+    if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0) {
+        fprintf(stderr, "Failed to set threads to i/o files\n");
+        goto end;
+    }
+    //read header, required to resolve the target names to proper ids
+    if (!(in_samhdr = sam_hdr_read(infile))) {
+        fprintf(stderr, "Failed to read header from file!\n");
         goto end;
     }
-    ret = EXIT_SUCCESS;
 
-    //shutdown queues to exit the result wait
-    hts_tpool_process_shutdown(queue);
+    /*tasks are queued, worker threads get them and process in parallel;
+    all bases are counted instead of counting atcg alone as it is faster*/
 
-end:
-    //cleanup
-    for (c = 0; c < regcnt; ++c) {
-        free(regions[c].name);
+    c = 0;
+    while (c >= 0) {
+        //use cached storage to avoid allocate/deallocate overheads
+        if (!(bamdata = getbamstorage(chunk, &gccount, &bamcache))) {
+            fprintf(stderr, "Failed to allocate memory\n");
+            break;
+        }
+        //read alignments, upto max size for this lot
+        for (cnt = 0; cnt < bamdata->maxsize; ++cnt) {
+            c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
+            if (c < 0) {
+                break;      // EOF or failure
+            }
+        }
+        if (c >= -1 ) {
+            //max size data or reached EOF
+            bamdata->count = cnt;
+            // Queue the data for processing.  hts_tpool_dispatch3() is
+            // used here as it allows in-flight data to be cleaned up
+            // properly when stopping early due to errors.
+            if (hts_tpool_dispatch3(pool, queue, thread_unordered_proc, bamdata,
+                                    cleanup_bamstorage, cleanup_bamstorage,
+                                    0) == -1) {
+                fprintf(stderr, "Failed to schedule processing\n");
+                goto end;
+            }
+            bamdata = NULL;
+        } else {
+            fprintf(stderr, "Error in reading data\n");
+            break;
+        }
     }
-    free(regions);
 
+     if (-1 == c) {
+        // EOF read, ensure all are processed, waits for all to finish
+        if (hts_tpool_process_flush(queue) == -1) {
+            fprintf(stderr, "Failed to flush queue\n");
+        } else { //all done
+            //refer seq_nt16_str to find position of required bases
+            fprintf(stdout, "GCratio: %f\nBase counts:\n",
+                (gccount.counts[2] /*C*/ + gccount.counts[4] /*G*/) / (float)
+                    (gccount.counts[1] /*A*/ + gccount.counts[8] /*T*/ +
+                        gccount.counts[2] + gccount.counts[4]));
+
+            for (cnt = 0; cnt < 16; ++cnt) {
+                fprintf(stdout, "%c: %"PRIu64"\n", seq_nt16_str[cnt], gccount.counts[cnt]);
+            }
+
+            ret = EXIT_SUCCESS;
+        }
+    }
+ end:
     if (queue) {
         hts_tpool_process_destroy(queue);
     }
+
+    if (in_samhdr) {
+        sam_hdr_destroy(in_samhdr);
+    }
+    if (infile) {
+        if (sam_close(infile) != 0) {
+            ret = EXIT_FAILURE;
+        }
+    }
+
+    pthread_mutex_lock(&bamcache.lock);
+    if (bamcache.list) {
+        struct data *tmp = NULL;
+        while (bamcache.list) {
+            tmp = bamcache.list;
+            bamcache.list = bamcache.list->next;
+            cleanup_bamstorage(tmp);
+        }
+    }
+    pthread_mutex_unlock(&bamcache.lock);
+
     if (pool) {
         hts_tpool_destroy(pool);
     }

From 2492496be4933860c02561bc03f98f3ab497607a Mon Sep 17 00:00:00 2001
From: vasudeva8 <vasudeva.sarma@sanger.ac.uk>
Date: Fri, 5 Jul 2024 12:18:32 +0100
Subject: [PATCH 6/9] update

---
 samples/Makefile | 64 ++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/samples/Makefile b/samples/Makefile
index 9c51e8edc..ecbede4c5 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -2,7 +2,7 @@ HTS_DIR = ../
 include $(HTS_DIR)/htslib_static.mk
 
 CC = gcc
-CFLAGS = -Wall -g -O0
+CFLAGS = -Wall -O2
 
 #to statically link to libhts
 LDFLAGS = $(HTS_DIR)/libhts.a -L$(HTS_DIR) $(HTSLIB_static_LDFLAGS) $(HTSLIB_static_LIBS)
@@ -18,97 +18,97 @@ PRGS = flags split split2 cram read_fast read_header read_ref read_bam \
 
 all: $(PRGS)
 
-flags:
+flags: flags_demo.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) flags_demo.c -o $@  $(LDFLAGS)
 
-split:
+split: split.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) split.c -o $@  $(LDFLAGS)
 
-split2:
+split2: split2.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) split2.c -o $@  $(LDFLAGS)
 
-cram:
+cram: cram.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) cram.c -o $@  $(LDFLAGS)
 
-read_fast:
+read_fast: read_fast.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) read_fast.c -o $@  $(LDFLAGS)
 
-read_header:
+read_header: read_header.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) read_header.c -o $@  $(LDFLAGS)
 
-read_ref:
+read_ref: read_refname.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) read_refname.c -o $@  $(LDFLAGS)
 
-read_bam:
+read_bam: read_bam.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) read_bam.c -o $@  $(LDFLAGS)
 
-read_aux:
+read_aux: read_aux.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) read_aux.c -o $@  $(LDFLAGS)
 
-dump_aux:
+dump_aux: dump_aux.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) dump_aux.c -o $@  $(LDFLAGS)
 
-add_header:
+add_header: add_header.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) add_header.c -o $@  $(LDFLAGS)
 
-rem_header:
+rem_header: rem_header.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) rem_header.c -o $@  $(LDFLAGS)
 
-update_header:
+update_header: update_header.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) update_header.c -o $@  $(LDFLAGS)
 
-mod_bam:
+mod_bam: mod_bam.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) mod_bam.c -o $@  $(LDFLAGS)
 
-mod_aux:
+mod_aux: mod_aux.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux.c -o $@  $(LDFLAGS)
 
-mod_aux_ba:
+mod_aux_ba: mod_aux_ba.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux_ba.c -o $@  $(LDFLAGS)
 
-write_fast:
+write_fast: write_fast.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) write_fast.c -o $@  $(LDFLAGS)
 
-idx_on_write:
+idx_on_write: index_write.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) index_write.c -o $@  $(LDFLAGS)
 
-read_reg:
+read_reg: index_reg_read.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) index_reg_read.c -o $@  $(LDFLAGS)
 
-read_multireg:
+read_multireg: index_multireg_read.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) index_multireg_read.c -o $@  $(LDFLAGS)
 
-read_fast_i:
+read_fast_i: read_fast_index.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) read_fast_index.c -o $@  $(LDFLAGS)
 
-pileup:
+pileup: pileup.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) pileup.c -o $@  $(LDFLAGS)
 
-mpileup:
+mpileup: mpileup.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) mpileup.c -o $@  $(LDFLAGS)
 
-modstate:
+modstate: modstate.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) modstate.c -o $@  $(LDFLAGS)
 
-pileup_mod:
+pileup_mod: pileup_mod.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) pileup_mod.c -o $@  $(LDFLAGS)
 
-flags_field:
+flags_field: flags_htsopt_field.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) flags_htsopt_field.c -o $@  $(LDFLAGS)
 
-split_t1:
+split_t1: split_thread1.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) split_thread1.c -o $@  $(LDFLAGS)
 
-split_t2:
+split_t2: split_thread2.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) split_thread2.c -o $@  $(LDFLAGS)
 
-index_fasta:
+index_fasta: index_fasta.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) index_fasta.c -o $@  $(LDFLAGS)
 
-qtask_ordered:
+qtask_ordered: qtask_ordered.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) qtask_ordered.c -o $@  $(LDFLAGS)
 
-qtask_unordered:
+qtask_unordered: qtask_unordered.c
 	$(CC) $(CFLAGS) -I $(HTS_DIR) qtask_unordered.c -o $@  $(LDFLAGS)
 
 clean:

From d1c9efa8c4a5f260cb5741f562f0055a98a3c3f9 Mon Sep 17 00:00:00 2001
From: James Bonfield <jkb@sanger.ac.uk>
Date: Wed, 17 Jul 2024 09:54:30 +0100
Subject: [PATCH 7/9] Minor samples/*.md doc updates

---
 samples/DEMO.md   | 12 ++++++------
 samples/README.md |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/samples/DEMO.md b/samples/DEMO.md
index 0344b0f45..31886bd53 100644
--- a/samples/DEMO.md
+++ b/samples/DEMO.md
@@ -138,14 +138,14 @@ one as sam and other as bam. A pool of 4 threads is created and shared for both
 read and write.
 
 Qtask_ordered - This application showcases the use of queues and threads for
-custom processing. In this, alignments in input file are updated with GC ratio
-on a custom aux tag. The processing may occur in any order and the result is
+custom processing. Alignments in input file are updated with their GC ratio
+on a custom aux tag. The processing may occur in any order but the result is
 retrieved in same order as it was queued and saved to disk.
 
-Qtask_unordered - This application showcases the use of queues and threads for
-custom processing. In this, alignments in input files are split to different
-files for requested regions. The processing may occur in any order and results
-may appear in the order completion of processing.
+Qtask_unordered - This application showcases the use of queues and threads
+for custom processing. The count of bases and GC ratio are calculated and
+displayed.  The order of counting is irrelevant and hence ordered retrieval is
+not used.
 
 ## Building the sample apps
 
diff --git a/samples/README.md b/samples/README.md
index c68354329..6f90c0c3f 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -202,8 +202,8 @@ indexed.
 [Qtask_ordered][Qtask_ordered]
 
   This application showcases the use of queues and threads for custom
-  processing. In this, alignments in input file are updated with GC ratio on a
-  custom aux tag. The processing may occur in any order and the result is
+  processing. Alignments in input file are updated with their GC ratio on a
+  custom aux tag. The processing may occur in any order but the results are
   retrieved in same order as it was queued and saved to disk.
 
 [Qtask_unordered][Qtask_unordered]

From cecc7a7938a8657430ab4c96102d00c3c629548d Mon Sep 17 00:00:00 2001
From: James Bonfield <jkb@sanger.ac.uk>
Date: Wed, 17 Jul 2024 11:54:08 +0100
Subject: [PATCH 8/9] Improve indentation and program flow in DEMO.md

The "..." to indicate clipped out code is useful as it removes a lot
of the unnecessary boiler plate, especially error checking, and allows
us to drill down to the essense of what we're trying to teach.
However without indentation it's not clear what bits are being
removed.

Also there are times when the code removed is an error handler plus
a subsequent else clause, which gives the false impression with the
indentation that the if-clause has been negated.  Eg:

    ...
    //select required field alone, this is useful for CRAM alone
    if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) {
    ...
       //read header
       in_samhdr = sam_hdr_read(infile);
    ...

Clearly the `sam_hdr_read` is not within the if-clause of
`hts_set_opt` returning < 0, but it's how it reads because "..."
hid too much.  (In the above case just indenting the ... and
un-indenting the hdr read call reverses the mental image.)

My approach here is to replace all `...` error handlers with `... // error`
and to indent them accordingly.

Also where we only have a one line thing, removing open curly braces
if we've cut the close curly brace gives a more sensible view.

None of this is really changing the code we're showing, but presenting
it in a more obvious manner to those that don't understand what is and
isn't an error case.
---
 samples/DEMO.md | 510 +++++++++++++++++++++++++-----------------------
 1 file changed, 271 insertions(+), 239 deletions(-)

diff --git a/samples/DEMO.md b/samples/DEMO.md
index 31886bd53..cefc004b7 100644
--- a/samples/DEMO.md
+++ b/samples/DEMO.md
@@ -200,30 +200,31 @@ and macros, data can easily be read from it.
     {
         ...
         //initialize
-        if (!(bamdata = bam_init1())) {
-        ...
+        if (!(bamdata = bam_init1()))
+           ... // error
         //open input files - r reading
-        if (!(infile = sam_open(inname, "r"))) {
-        ...
+        if (!(infile = sam_open(inname, "r")))
+           ... // error
         //read header
-        if (!(in_samhdr = sam_hdr_read(infile))) {
-        ...
+        if (!(in_samhdr = sam_hdr_read(infile)))
+           ... // error
+
         //read data, check flags and update count
         while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
-            if (bamdata->core.flag & BAM_FREAD1) {
+            if (bamdata->core.flag & BAM_FREAD1)
                 cntread1++;
-            }
-        ...
+            ...
+
         //clean up
-        if (in_samhdr) {
+        if (in_samhdr)
             sam_hdr_destroy(in_samhdr);
-        }
-        if (infile) {
+
+        if (infile)
             sam_close(infile);
-        }
-        if (bamdata) {
+
+        if (bamdata)
             bam_destroy1(bamdata);
-        }
+
         return ret;
     }
 Refer: flags_demo.c
@@ -270,21 +271,23 @@ set the reference name in the alignment. It returns -ve value on error.
     int main(int argc, char *argv[])
     {
         ...
-        if (!(infile = sam_open(inname, "r"))) {
-        ...
+        if (!(infile = sam_open(inname, "r")))
+           ... // error
         outfile1 = sam_open(file1, "w");            //as SAM
         outfile2 = sam_open(file2, "wb");           //as BAM
         ...
-        if (!(in_samhdr = sam_hdr_read(infile))) {
-        ...
+        if (!(in_samhdr = sam_hdr_read(infile)))
+           ... // error
+
         //write header
         if ((sam_hdr_write(outfile1, in_samhdr) == -1) ||
-         (sam_hdr_write(outfile2, in_samhdr) == -1)) {
-        ...
+         (sam_hdr_write(outfile2, in_samhdr) == -1))
+           ... // error
+
         while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
             if (bamdata->core.flag & BAM_FREAD1) {
                 if (sam_write1(outfile1, in_samhdr, bamdata) < 0) {
-        ...
+                    ... // error
     }
 Refer: split.c
 
@@ -299,10 +302,11 @@ Below code excerpt shows sam_open_mode api usage.
         ...
         //set file open mode based on file name for 1st and as explicit for 2nd
         if ((sam_open_mode(mode1+1, file1, NULL) == -1) ||
-         (sam_open_mode(mode2+1, file2, "sam.gz") == -1)) {
-        ...
-        if (!(infile = sam_open(inname, "r"))) {
-        ...
+         (sam_open_mode(mode2+1, file2, "sam.gz") == -1))
+           ... // error
+        if (!(infile = sam_open(inname, "r")))
+           ... // error
+
         //open output files
         outfile1 = sam_open(file1, mode1);                          //as compressed SAM through sam_open
         outfile2 = sam_open_format(file2, mode2, NULL);             //as compressed SAM through sam_open_format
@@ -336,7 +340,7 @@ api and used with sam_open_format api to create appropriate CRAM file.
             hts_parse_format(&fmt2, reffmt2) == -1 ||               //embed the reference internally
             hts_parse_format(&fmt3, "cram,embed_ref=2") == -1 ||    //embed autogenerated reference
             hts_parse_format(&fmt4, "cram,no_ref=1") == -1) {       //no reference data encoding at all
-    ...
+       ... // error
     outfile1 = sam_open_format(file1, "wc", &fmt1); outfile2 = sam_open_format(file2, "wc", &fmt2);
     ...
 Refer: cram.c
@@ -355,16 +359,17 @@ structure.
 It is the FASTA format which is mainly in use to store the reference data.
 
     ...
-    if (!(bamdata = bam_init1())) {
-    ...
-    if (!(infile = sam_open(inname, "r"))) {
-    ...
-    if (infile->format.format != fasta_format && infile->format.format != fastq_format) {
-    ...
-    if (!(in_samhdr = sam_hdr_read(infile))) {
-    ...
-    while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
-    ...
+    if (!(bamdata = bam_init1()))
+      ... // error
+    if (!(infile = sam_open(inname, "r")))
+       ... // error
+    if (infile->format.format != fasta_format && infile->format.format != fastq_format)
+       ... // error
+    if (!(in_samhdr = sam_hdr_read(infile)))
+       ... // error
+
+    while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0)
+       ... // error
         printf("\nsequence: ");
         for (c = 0; c < bamdata->core.l_qseq; ++c) {
             printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]);
@@ -379,16 +384,15 @@ Refer: read_fast.c
     ...
     char mode[4] = "a";
     ...
-    if (sam_open_mode(mode + 1, outname, NULL) < 0) {
-    ...
-    if (!(outfile = sam_open(outname, mode))) {
-    ...
-    if (bam_set1(bamdata, strlen(name), name, BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, strlen(data), data, qual, 0)
-     < 0) {
-    ...
+    if (sam_open_mode(mode + 1, outname, NULL) < 0)
+       ... // error
+    if (!(outfile = sam_open(outname, mode)))
+       ... // error
+    if (bam_set1(bamdata, strlen(name), name, BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, strlen(data), data, qual, 0) < 0)
+       ... // error
     if (sam_write1(outfile, out_samhdr, bamdata) < 0) {
         printf("Failed to write data\n");
-    ...
+        ...
 Refer: write_fast.c
 
 
@@ -406,17 +410,21 @@ line can be retrieved using sam_hdr_find_line_pos or sam_hdr_line_id with
 position and unique identifier values respectively.
 
     ...
-    if (!(in_samhdr = sam_hdr_read(infile))) {
-    ...
-            ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data);
-    ...
-            ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data);
-    ...
-        linecnt = sam_hdr_count_lines(in_samhdr, header);
+    if (!(in_samhdr = sam_hdr_read(infile)))
+        ... // error
+
+    if (tag)
+        ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data);
+    else
+        ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data);
     ...
-            ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data);
+
+    linecnt = sam_hdr_count_lines(in_samhdr, header);
     ...
-            ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data);
+    if (tag)
+        ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data);
+    else
+        ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data);
     ...
 Refer: read_header.c
 
@@ -435,16 +443,19 @@ Below code excerpt shows the reference names which has length above given value.
     ...
     //iterate and check each reference's length
     for (pos = 1, c = 0; c < linecnt; ++c) {
-        if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2)) {
-    ...
+        if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2))
+            ... // error
+
         size = atoll(data.s);
         if (size < minsize) {
             //not required
             continue;
         }
-        if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c))) {
-            //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same!
-    ...
+
+        //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same!
+        if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c)))
+            ... // error
+
         printf("%d,%s,%s\n", pos, id, data.s);
     ...
 Refer: read_refname.c
@@ -483,8 +494,8 @@ indexing the seq_nt16_str array.
         printf("MQUAL: %d\n", bamdata->core.qual);                              //map quality value
         cigar = bam_get_cigar(bamdata);                                         //retrieves the cigar data
         for (i = 0; i < bamdata->core.n_cigar; ++i) {                           //no. of cigar data entries
-            printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i]));   //the macros gives the count of operation
-             and the symbol of operation for given cigar entry
+            printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i]));
+            //the macros gives the count of operation and the symbol of operation for given cigar entry
         }
         printf("\nTLEN/ISIZE: %"PRIhts_pos"\n", bamdata->core.isize);
         data = bam_get_seq(bamdata);
@@ -493,7 +504,7 @@ indexing the seq_nt16_str array.
         ...
         for (i = 0; i < bamdata->core.l_qseq ; ++i) {       //sequence length
             printf("%c", seq_nt16_str[bam_seqi(data, i)]);  //retrieves the base from (internal compressed) sequence data
-        ...
+            ...
             printf("%c", bam_get_qual(bamdata)[i]+33);      //retrieves the quality value
         ...
 Refer: read_bam.c
@@ -534,15 +545,13 @@ given position of the array.
 
     ...
     while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
-        if (i % 2) {    //use options alternatively to demonstrate both
-            //option 1 - get data as string with tag and type
-            if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) {
-                printf("%s\n",sdata.s);
-    ...
-            //option 2 - get raw data
-            if (!(data = bam_aux_get(bamdata, tag))) {
-    ...
-                if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) {
+        //option 1 - get data as string with tag and type
+        if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) {
+            printf("%s\n",sdata.s);
+        ...
+        //option 2 - get raw data
+        if ((data = bam_aux_get(bamdata, tag)) != NULL) {
+            printauxdata(stdout, bam_aux_type(data), -1, data);
     ...
 Refer: read_aux.c
 
@@ -557,8 +566,8 @@ Shows the MD aux tag from alignments.
             printf("%.2s:%c:", bam_aux_tag(data), NULL != strchr("cCsSiI", bam_aux_type(data)) ? 'i' : bam_aux_type(data));
               //macros gets the tag and type of aux data
             //dump the data
-            if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) {
-    ...
+            printauxdata(stdout, bam_aux_type(data), -1, data);
+            ...
             data = bam_aux_next(bamdata, data);                                     //get the next aux data
     ...
 Refer: dump_aux.c
@@ -581,19 +590,22 @@ sam_hdr_write api does the write of the header data to file.
 
     ...
     //add SQ line with SN as TR1 and TR2
-    if (sam_hdr_add_lines(in_samhdr, &sq[0], 0)) {                                      //length as 0 for NULL terminated data
-    ...
+    if (sam_hdr_add_lines(in_samhdr, &sq[0], 0))                                        //length as 0 for NULL terminated data
+        ... // error
+
     //add RG line with ID as RG1
-    if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL)) {
-    ...
-    //add pg line
-    if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL)) {    //NULL is to indicate end of args
-    ...
-    if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL)) {                         //NULL is to indicate end of args
-    ...
+    if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL))
+        ... // error
+
+    //add PG/CO lines
+    if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL))      //NULL is to indicate end of args
+        ... // error
+    if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL))                           //NULL is to indicate end of args
+        ... // error
+
     //write output
-    if (sam_hdr_write(outfile, in_samhdr) < 0) {
-    ...
+    if (sam_hdr_write(outfile, in_samhdr) < 0)
+        ... // error
 Refer: add_header.c
 
 Not all type of header data can be removed but where it is possible, either a
@@ -603,14 +615,14 @@ to be used. To remove all lines of a type, header type and unique identifier
 field tag are to be used.
 
     ...
-        //remove specific line
-        if (sam_hdr_remove_line_id(in_samhdr, header, id, idval)) {
-    ...
-        //remove multiple lines of a header type
-        if (sam_hdr_remove_lines(in_samhdr, header, id, NULL)) {
-    ...
-    if (sam_hdr_write(outfile, in_samhdr) < 0) {
-    ...
+
+    //remove specific line
+    if (sam_hdr_remove_line_id(in_samhdr, header, id, idval) < 0)
+        ... // error
+
+    //remove multiple lines of a header type
+    if (sam_hdr_remove_lines(in_samhdr, header, id, NULL) < 0)
+        ... // error
 Refer: rem_header.c
 
 Shows the file content after removing SQ line with SN 2.
@@ -658,13 +670,12 @@ be easier than update of existing record.
             break;
             case 3:// RNAME
             case 7:// RNEXT
-                if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0) {
-    ...
+                if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0)
+                    ... // error
                 if (field == 3) {
                     //reference
                     bamdata->core.tid = ret;
-                }
-                else {
+                } else {
                     //mate reference
                     bamdata->core.mtid = ret;
                 }
@@ -677,20 +688,21 @@ be easier than update of existing record.
             break;
             case 6:// CIGAR
             {
-    ...
+                ...
                 //get cigar array and set all data in new bam record
-                if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0) {
-    ...
+                if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0)
+                    ... // error
                 if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid,
                  bamdata->core.pos, bamdata->core.qual, ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos,
                   bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata),
-                   (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) {
-    ...
+                   (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0)
+                    ... // error
+
                 //correct sequence data as input is expected in ascii format and not as compressed inside bam!
                 memcpy(bam_get_seq(newbam), bam_get_seq(bamdata), (bamdata->core.l_qseq + 1) / 2);
                 //copy the aux data
                 memcpy(bam_get_aux(newbam), bam_get_aux(bamdata), bam_get_l_aux(bamdata));
-    ...
+            ...
             break;
             case 8:// PNEXT
                 bamdata->core.mpos = atoll(val);
@@ -699,18 +711,16 @@ be easier than update of existing record.
                 bamdata->core.isize = atoll(val);
             break;
             case 10:// SEQ
-    ...
+                ...
                 for( c = 0; c < i; ++c) {
                     bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]);
                 }
             break;
             case 11:// QUAL
-    ...
-                for (c = 0; c < i; ++c) {
+                ...
+                for (c = 0; c < i; ++c)
                     val[c] -= 33;               //phred score from ascii value
-                }
                 memcpy(bam_get_qual(bamdata), val, i);
-    ...
 Refer: mod_bam.c
 
 Shows data with RNAME modified to T2.
@@ -725,33 +735,32 @@ present at all, it can be appended using bam_aux_append.
     //matched to qname, update aux
     if (!(data = bam_aux_get(bamdata, tag))) {
         //tag not present append
-    ...
-        if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) {
-    ...
-    else {
-        char auxtype = bam_aux_type(data);
+        ... // cut: computed length and val based on tag type
+        if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val))
+            ... // error
+    } else {
         //update the tag with newer value
+        char auxtype = bam_aux_type(data);
         switch (type) {
             case 'f':
             case 'd':
-    ...
-                if (bam_aux_update_float(bamdata, tag, atof(val))) {
-    ...
+                ...
+                if (bam_aux_update_float(bamdata, tag, atof(val)))
+                    ... // error
             case 'C':
             case 'S':
             case 'I':
-    ...
-                if (bam_aux_update_int(bamdata, tag, atoll(val))) {
-    ...
+                ...
+                if (bam_aux_update_int(bamdata, tag, atoll(val)))
+                    ... // error
             case 'Z':
-    ...
-                if (bam_aux_update_str(bamdata, tag, length, val)) {
-    ...
+                ...
+                if (bam_aux_update_str(bamdata, tag, length, val))
+                    ... // error
             case 'A':
-    ...
+                ...
                 //update the char data directly on buffer
                 *(data+1) = val[0];
-    ...
 Refer: mod_aux.c
 
 Shows the given record's MD tag set to Test.
@@ -761,8 +770,8 @@ Shows the given record's MD tag set to Test.
 The array aux fields can be updated using bam_aux_update_array api.
 
     ...
-    if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt)) {
-    ...
+    if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt))
+        ... // error
 Refer: mod_aux_ba.c
 
 Shows the records updated with an array of integers, containing count of ACGT
@@ -799,15 +808,15 @@ At the end of write, sam_idx_save api need to be invoked to save the index.
 
     ...
     //write header
-    if (sam_hdr_write(outfile, in_samhdr)) {
-    ...
+    if (sam_hdr_write(outfile, in_samhdr))
+        ... // error
     // initialize indexing, before start of write
-    if (sam_idx_init(outfile, in_samhdr, size, fileidx)) {
-    ...
-        if (sam_write1(outfile, in_samhdr, bamdata) < 0) {
-    ...
-    if (sam_idx_save(outfile)) {
-    ...
+    if (sam_idx_init(outfile, in_samhdr, size, fileidx))
+        ... // error
+        if (sam_write1(outfile, in_samhdr, bamdata) < 0)
+            ... // error
+    if (sam_idx_save(outfile))
+        ... // error
 Refer:index_write.c
 
 Creates mpileup.1.bam and mpileup.1.bam.bai in /tmp/.
@@ -831,8 +840,8 @@ When fai/gzi path are NULL, they are created along with input file.
 These index files will be useful for reference data access.
 
     ...
-    if (fai_build3(filename, NULL, NULL) == -1) {
-    ...
+    if (fai_build3(filename, NULL, NULL) == -1)
+        ... // error
 Refer: index_fast.c
 
 A tabix index can be created for compressed vcf/sam/bed and other data using
@@ -884,18 +893,19 @@ sam_itr_destroy and hts_idx_destroy apis does this.
 
     ...
     //load index file
-    if (!(idx = sam_index_load2(infile, inname, idxfile))) {
-    ...
+    if (!(idx = sam_index_load2(infile, inname, idxfile)))
+        ... // error
     //create iterator
-    if (!(iter = sam_itr_querys(idx, in_samhdr, region))) {
-    ...
+    if (!(iter = sam_itr_querys(idx, in_samhdr, region)))
+        ... // error
+
     //read using iterator
-    while ((c = sam_itr_next(infile, iter, bamdata)) >= 0) {
-    ...
-    if (iter) {
+    while ((c = sam_itr_next(infile, iter, bamdata)) >= 0)
+        ... // error
+
+    if (iter)
         sam_itr_destroy(iter);
-    }
-    if (idx) {
+    if (idx)
         hts_idx_destroy(idx);
     ...
 Refer:index_reg_read.c
@@ -926,19 +936,20 @@ itself.
 
     ...
     //load index file, assume it to be present in same location
-    if (!(idx = sam_index_load(infile, inname))) {
-    ...
+    if (!(idx = sam_index_load(infile, inname)))
+        ... // error
     //create iterator
-    if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt))) {
-    ...
+    if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt)))
+        ... // error
     if (regions) {
         //can be freed as it is no longer required
         free(regions);
         regions = NULL;
     }
+
     //get required area
-    while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0)) {
-    ...
+    while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0))
+        ... // process bamdata
 Refer:index_multireg_read.c
 
 With compressed sample.sam and 2 regions from reference T1 (30 to 32) and 1
@@ -968,37 +979,45 @@ Below excerpt shows fasta/q access with single and multiregions,
 
     ...
     //load index
-    if (!(idx = fai_load3_format(inname, NULL, NULL, FAI_CREATE, fmt))) {
-    ...
-        //get data from given region
-        if (!(data = fai_fetch64(idx, region, &len))) {
-    ...
-        else {
-            printf("Data: %"PRId64" %s\n", len, data);
-            free((void*)data);
-            //get quality for fastq type
-            if (fmt == FAI_FASTQ) {
-                if (!(data = fai_fetchqual64(idx, region, &len))) {
+    if (!(idx = fai_load3_format(inname, NULL, NULL, FAI_CREATE, fmt)))
+        ... // error
+
     ...
+    if (!usemulti) {
+        //get data from single given region
+        if (!(data = fai_fetch64(idx, region, &len)))
+            ... // region not found
+
+        printf("Data: %"PRId64" %s\n", len, data);
+        free((void*)data);
+        //get quality for fastq type
+        if (fmt == FAI_FASTQ) {
+            if (!(data = fai_fetchqual64(idx, region, &len)))
+                ... // region not found
+        ...
+
+    } else { // usemulti
         //parse, get each region and get data for each
         while ((remaining = fai_parse_region(idx, region, &tid, &beg, &end, HTS_PARSE_LIST))) {     //here expects regions as csv
             //parsed the region, correct end points based on actual data
-            if (fai_adjust_region(idx, tid, &beg, &end) == -1) {
-    ...
+            if (fai_adjust_region(idx, tid, &beg, &end) == -1)
+                ... // error
             //get data for given region
-            if (!(data = faidx_fetch_seq64(idx, faidx_iseq(idx, tid), beg, end, &len))) {
-    ...
-                printf("Data: %"PRIhts_pos" %s\n", len, data);
+            if (!(data = faidx_fetch_seq64(idx, faidx_iseq(idx, tid), beg, end, &len)))
+                ... // region not found
+
+            printf("Data: %"PRIhts_pos" %s\n", len, data);
+            free((void*)data);
+            data = NULL;
+            //get quality data for fastq
+            if (fmt == FAI_FASTQ) {
+                if (!(data = faidx_fetch_qual64(idx, faidx_iseq(idx, tid), beg, end, &len)))
+                    ... // error
+                printf("Qual: %"PRIhts_pos" %s\n", len, data);
                 free((void*)data);
-                data = NULL;
-                //get quality data for fastq
-                if (fmt == FAI_FASTQ) {
-                    if (!(data = faidx_fetch_qual64(idx, faidx_iseq(idx, tid), beg, end, &len))) {
-    ...
-                        printf("Qual: %"PRIhts_pos" %s\n", len, data);
-                        free((void*)data);
-    ...
+            ...
             region = remaining;                                     //parse remaining region defs
+
     ...
     if (idx) {
         fai_destroy(idx);
@@ -1062,8 +1081,8 @@ above the cache limit are discarded.
 Once done, the pileup iterator to be discarded by sam_plp_destroy api.
 
     ...
-    if (!(plpiter = bam_plp_init(readdata, &conf))) {
-    ...
+    if (!(plpiter = bam_plp_init(readdata, &conf)))
+        ... // error
     //set constructor destructor callbacks
     bam_plp_constructor(plpiter, plpconstructor);
     bam_plp_destructor(plpiter, plpdestructor);
@@ -1095,7 +1114,7 @@ Once done, the pileup iterator to be discarded by sam_plp_destroy api.
                     printf("?");
                 }
     ...
-    if (plpiter) {
+    if (plpiter)
         bam_plp_destroy(plpiter);
     ...
 Refer:pileup.c
@@ -1151,8 +1170,8 @@ above the cache limit are discarded.
 Once done, the pileup iterator to be discarded by sam_mplp_destroy api.
 
     ...
-    if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) {
-    ...
+    if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf)))
+        ... // error
     //set constructor destructor callbacks
     bam_mplp_constructor(mplpiter, plpconstructor);
     bam_mplp_destructor(mplpiter, plpdestructor);
@@ -1218,13 +1237,13 @@ end of processing, the state need to be released using hts_base_mod_state_free
 api.
 
     ...
-    if (!(ms = hts_base_mod_state_alloc())) {
-    ...
+    if (!(ms = hts_base_mod_state_alloc()))
+        ... // error
     while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0)
     {
-    ...
-        if (bam_parse_basemod(bamdata, ms)) {
-    ...
+        ...
+        if (bam_parse_basemod(bamdata, ms))
+            ... // error
         bm = bam_mods_recorded(ms, &cnt);
         for (k = 0; k < cnt; ++k) {
             printf("%c", bm[k]);
@@ -1275,7 +1294,7 @@ api.
             }
         }
     ...
-    if (ms) {
+    if (ms)
         hts_base_mod_state_free(ms);
     ...
 Refer:modstate.c
@@ -1305,7 +1324,7 @@ api.
     {
     ...
     if (!(plpiter = bam_plp_init(readdata, &conf))) {
-    ...
+        ... // error
     //set constructor destructor callbacks
     bam_plp_constructor(plpiter, plpconstructor);
     bam_plp_destructor(plpiter, plpdestructor);
@@ -1322,11 +1341,11 @@ api.
             }
             /*invoke bam mods_mods_at_qpos before bam_plp_insertion_mod that the base modification
             is retrieved before change in pileup pos thr' plp_insertion_mod call*/
-            if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) {
-    ...
+            if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1)
+                ... // error
             //use plp_insertion/_mod to get insertion and del at the same position
-            if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1) {
-    ...
+            if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1)
+                ... // error
             //start and end are displayed in UPPER and rest on LOWER, only 1st modification considered
             //base and modification
             printf("%c%c%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) :
@@ -1344,7 +1363,7 @@ api.
                     printf("-%d", dellen);
                     for (k = 0; k < dellen; ++k) {
                         printf("?");
-    ...
+                ...
             else if (plp[j].indel < 0) {
                 //deletion
                 printf("%d", plp[j].indel);
@@ -1369,17 +1388,18 @@ data and a combination of flags for the required fields can be passed with
 CRAM_OPT_REQUIRED_FIELDS to this api.
 
     ...
-       //select required field alone, this is useful for CRAM alone
-       if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) {
-    ...
-       //read header
-       in_samhdr = sam_hdr_read(infile);
+    //select required field alone, this is useful for CRAM alone
+    if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0)
+        ... // error
+
+    //read header
+    in_samhdr = sam_hdr_read(infile);
     ...
     //read data, check flags and update count
     while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) {
-        if (bamdata->core.flag & BAM_FREAD1) {
+        if (bamdata->core.flag & BAM_FREAD1)
             cntread1++;
-    ...
+        ...
 Refer: flags_htsopt_field.c
 
 
@@ -1420,16 +1440,19 @@ Below excerpt shows a thread pool shared across files,
 
     ...
     //create a pool of 4 threads
-    if (!(tpool.pool = hts_tpool_init(4))) {
-    ...
+    if (!(tpool.pool = hts_tpool_init(4)))
+        ... // error
     //share the pool with all the 3 files
     if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 ||
     hts_set_opt(outfile1, HTS_OPT_THREAD_POOL, &tpool) < 0 ||
     hts_set_opt(outfile2, HTS_OPT_THREAD_POOL, &tpool) < 0) {
-    ...
-    if (tpool.pool) {
+        ... // error
+
+    ... // do something
+
+    //tidy up at end
+    if (tpool.pool)
         hts_tpool_destroy(tpool.pool);
-    }
     ...
 Refer: split_thread2.c
 
@@ -1488,71 +1511,78 @@ used to identify the completion of all tasks at the result collection side.
     ...
     void *thread_ordered_proc(void *args)
     {
-    ...
+        ...
         for ( i = 0; i < bamdata->count; ++i) {
-    ...
-            for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) {
+            ...
+            for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos)
                 count[bam_seqi(data,pos)]++;
-    ...
+            ...
             gcratio = (count[2] /*C*/ + count[4] /*G*/) / (float) (count[1] /*A*/ + count[8] /*T*/ + count[2] + count[4]);
 
             if (bam_aux_append(bamdata->bamarray[i], "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) {
+
     ...
     void *threadfn_orderedwrite(void *args)
     {
-    ...
+        ...
         //get result and write; wait if no result is in queue - until shutdown of queue
         while (tdata->result == 0 &&
             (r = hts_tpool_next_result_wait(tdata->queue)) != NULL) {
             bamdata = (data*) hts_tpool_result_data(r);
-    ...
+            ...
             for (i = 0; i < bamdata->count; ++i) {
                 if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) {
-    ...
+                    ... // error
+            ...
             hts_tpool_delete_result(r, 0);              //release the result memory
+            ...
 
         // Shut down the process queue.  If we stopped early due to a write failure,
         // this will signal to the other end that something has gone wrong.
         hts_tpool_process_shutdown(tdata->queue);
+
     ...
     int main(int argc, char *argv[])
     {
-    ...
-        if (!(pool = hts_tpool_init(cnt))) {                //thread pool
-    ...
+        ...
+        if (!(pool = hts_tpool_init(cnt)))                  //thread pool
+            ... // error
         tpool.pool = pool;      //to share the pool for file read and write as well
         //queue to use with thread pool, for task and results
         if (!(queue = hts_tpool_process_init(pool, cnt * 2, 0))) {
     ...
         //share the thread pool with i/o files
         if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 ||
-            hts_set_opt(outfile, HTS_OPT_THREAD_POOL, &tpool) < 0) {
-    ...
-        if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) {
-    ...
+            hts_set_opt(outfile, HTS_OPT_THREAD_POOL, &tpool) < 0)
+            ... // error
+        if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata))
+            ... // error
         while (c >= 0) {
-            if (!(bamdata = getbamstorage(chunk, &bamcache))) {
-    ...
+            if (!(bamdata = getbamstorage(chunk, &bamcache)))
+                ... // error
             for (cnt = 0; cnt < bamdata->maxsize; ++cnt) {
                 c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
-    ...
+                ...
                 if (hts_tpool_dispatch3(pool, queue, thread_ordered_proc, bamdata,
                                         cleanup_bamstorage, cleanup_bamstorage,
-                                        0) == -1) {
-    ...
+                                        0) == -1)
+                    ... // error
+        ...
         if (queue) {
             if (-1 == c) {
                 // EOF read, send a marker to tell the threadfn_orderedwrite()
                 // function to shut down.
                 if (hts_tpool_dispatch(pool, queue, thread_ordered_proc,
                                     NULL) == -1) {
-    ...
+                    ... // error
                 hts_tpool_process_shutdown(queue);
-    ...
+
+        ...
         // Wait for threadfn_orderedwrite to finish.
         if (started_thread) {
             pthread_join(thread, NULL);
-    ...
+
+        ...
         if (queue) {
             // Once threadfn_orderedwrite has stopped, the queue can be
             // cleaned up.
@@ -1567,13 +1597,13 @@ The queue is created as input only.
     ...
     void *thread_unordered_proc(void *args)
     {
-    ...
+        ...
         for ( i = 0; i < bamdata->count; ++i) {
             data = bam_get_seq(bamdata->bamarray[i]);
-            for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) {
-    ...
+            for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos)
                 counts[bam_seqi(data, pos)]++;
-    ...
+
+        ...
         //update result and add the memory block for reuse
         pthread_mutex_lock(&bamdata->cache->lock);
         for (i = 0; i < 16; i++) {
@@ -1583,24 +1613,27 @@ The queue is created as input only.
         bamdata->next = bamdata->cache->list;
         bamdata->cache->list = bamdata;
         pthread_mutex_unlock(&bamdata->cache->lock);
+
     ...
     int main(int argc, char *argv[])
     {
-    ...
-        if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1))) {
-    ...
+        ...
+        if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1)))
+            ... // error
         c = 0;
         while (c >= 0) {
-    ...
+            ...
             for (cnt = 0; cnt < bamdata->maxsize; ++cnt) {
                 c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]);
-    ...
+
+            ...
             if (c >= -1 ) {
-    ...
+                ...
                 if (hts_tpool_dispatch3(pool, queue, thread_unordered_proc, bamdata,
                                         cleanup_bamstorage, cleanup_bamstorage,
-                                        0) == -1) {
-    ...
+                                        0) == -1)
+                    ... // error
+        ...
         if (-1 == c) {
             // EOF read, ensure all are processed, waits for all to finish
             if (hts_tpool_process_flush(queue) == -1) {
@@ -1611,11 +1644,10 @@ The queue is created as input only.
                     (gccount.counts[2] /*C*/ + gccount.counts[4] /*G*/) / (float)
                         (gccount.counts[1] /*A*/ + gccount.counts[8] /*T*/ +
                             gccount.counts[2] + gccount.counts[4]));
-    ...
+        ...
         if (queue) {
             hts_tpool_process_destroy(queue);
         }
-    ...
 Refer: qtask_unordered.c
 
 ## More Information

From 7d0c19e50b449e64b3e08a40a8fa62f553862423 Mon Sep 17 00:00:00 2001
From: vasudeva8 <vasudeva.sarma@sanger.ac.uk>
Date: Thu, 18 Jul 2024 00:56:12 +0100
Subject: [PATCH 9/9] update based on reviews

---
 samples/DEMO.md     | 25 ++++++++++++-------------
 samples/read_fast.c |  2 +-
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/samples/DEMO.md b/samples/DEMO.md
index cefc004b7..98c9981b8 100644
--- a/samples/DEMO.md
+++ b/samples/DEMO.md
@@ -377,7 +377,7 @@ It is the FASTA format which is mainly in use to store the reference data.
         if (infile->format.format == fastq_format) {
             printf("\nquality: ");
             for (c = 0; c < bamdata->core.l_qseq; ++c) {
-                printf("%c", bam_get_qual(bamdata)[c]);
+                printf("%c", bam_get_qual(bamdata)[c] + 33);
     ...
 Refer: read_fast.c
 
@@ -412,20 +412,19 @@ position and unique identifier values respectively.
     ...
     if (!(in_samhdr = sam_hdr_read(infile)))
         ... // error
-
-    if (tag)
-        ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data);
-    else
-        ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data);
-    ...
-
-    linecnt = sam_hdr_count_lines(in_samhdr, header);
     ...
-    if (tag)
-        ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data);
-    else
-        ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data);
+      if (tag)
+          ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data);
+      else
+          ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data);
     ...
+        linecnt = sam_hdr_count_lines(in_samhdr, header);
+        ...
+            if (tag)
+                ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data);
+            else
+                ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data);
+        ...
 Refer: read_header.c
 
 This will show the VN tag's value from HD header.
diff --git a/samples/read_fast.c b/samples/read_fast.c
index 3ed6ba1f8..10f807b69 100644
--- a/samples/read_fast.c
+++ b/samples/read_fast.c
@@ -92,7 +92,7 @@ int main(int argc, char *argv[])
         if (infile->format.format == fastq_format) {
             printf("\nquality: ");
             for (c = 0; c < bamdata->core.l_qseq; ++c) {
-                printf("%c", bam_get_qual(bamdata)[c]);
+                printf("%c", bam_get_qual(bamdata)[c] + 33);
             }
         }
     }