#!/bin/bash

set -e

if [ $# -lt 5 ]
then
    echo -e "
    Invalid user input.\n
    Please run as: MergeBam.sh AssayType STVersion RunName MaxCores FileBamFilesList \n
    Exiting...\n" >&2
    exit -1
fi

assay=$1
st_version=$2
run_name=$3
max_cores=$4
file_bam_files_list=$5

# If any contig in the BAM files exceeds 500M in size add a switch to create a csi index instead of bai
csi_index_switch=$(samtools view -H $(head -n 1 $file_bam_files_list) | awk '/^@SQ/ {split($3, a, ":"); if (a[2] > 500000000) {print "-c"; exit}}')

output_bam="${run_name}"
if [ $st_version != "None" ]
then
    output_bam=("Combined_${run_name}")
fi

if [ $assay == "ATAC" ]
then
    output_bam+="_ATAC.bam"

    # Use cat subcommand to concat the sorted bams - this is very fast
    cmd="samtools cat -@ $max_cores -b $file_bam_files_list -o $output_bam"
    echo $cmd >&2
    eval $cmd
else
    # Use merge subcommand to combine the sorted bams - this can take a while for large bams
    output_bam="${output_bam}_Bioproduct.bam"
    cmd="samtools merge -c -p -@ $max_cores -b $file_bam_files_list -o - | \
           samtools reheader - -c 'awk '\''/^@CO/ {if (!seen++) print; next} {print}'\''' > $output_bam" # to remove redundant @CO lines
    echo $cmd >&2
    eval $cmd
fi

cmd="samtools index $csi_index_switch -@ $max_cores $output_bam"
echo $cmd >&2
eval $cmd
