set -euo pipefail

THIS_SCRIPT_PATH=$(realpath "$0")
BASE_IGBLAST_DIR=$(dirname "$THIS_SCRIPT_PATH")
CONSTANT_REGION_DIR="$(dirname "$BASE_IGBLAST_DIR")/control_files/VDJ"

# Get it from here: https://www.imgt.org/download/V-QUEST/IMGT_V-QUEST_reference_directory.zip and uncompress it.
IMGT_REF_DIR=

# Get it from here : https://www.imgt.org/download/V-QUEST/IMGT_vquest_release.txt
IMGT_VER_FILE=

# Get the latest version of IGBlast from here : https://ftp.ncbi.nih.gov/blast/executables/igblast/release/LATEST/ncbi-igblast-{ver}-x64-linux.tar.gz and uncompress it
IGBLAST_RELEASE_DIR=

# If this variable is set, then the directory will be removed at the end of this script.
temporary_directory=

if [ $# -eq 0 ]
then
    temporary_directory=$(mktemp -d 2>/dev/null)
    pushd .
    cd ${temporary_directory}

    wget https://www.imgt.org/download/V-QUEST/IMGT_V-QUEST_reference_directory.zip
    wget https://www.imgt.org/download/V-QUEST/IMGT_vquest_release.txt
    igblast_tgz=$(wget https://ftp.ncbi.nih.gov/blast/executables/igblast/release/LATEST/ -O - 2>/dev/null | \
        awk '/ncbi-igblast-.*-x64-linux.tar.gz/{ if( match( $0, /"ncbi-.*.tar.gz"/ ) ){ print substr( $0, RSTART + 1, RLENGTH - 2 ); } }')
    wget https://ftp.ncbi.nih.gov/blast/executables/igblast/release/LATEST/${igblast_tgz}

    unzip IMGT_V-QUEST_reference_directory.zip
    tar xzf ${igblast_tgz}

    IMGT_REF_DIR=${temporary_directory}/IMGT_V-QUEST_reference_directory
    IMGT_VER_FILE=${temporary_directory}/IMGT_vquest_release.txt
    IGBLAST_RELEASE_DIR=${temporary_directory}/$(basename -s -x64-linux.tar.gz ${igblast_tgz})

    popd
elif [ $# -eq 3 ]
then
    IMGT_REF_DIR=$(readlink -f $1)
    IMGT_VER_FILE=$2
    IGBLAST_RELEASE_DIR=$(readlink -f $3)
else
    echo "Run as : bash update_igblast.sh <IMGT_REF_DIR> <IMGT_VER_FILE> <IGBLAST_RELEASE_DIR>"
    echo " or as : bash update_igblast.sh"
    exit -1
fi


declare -A species_map=([human]=Homo_sapiens [mouse]=Mus_musculus)

# Copy over igblast exe and internal files
echo -e "Copying over igblast executable and internal data\n"
cp $IGBLAST_RELEASE_DIR/bin/igblastn $BASE_IGBLAST_DIR/igblastn_linux
rm -rf "$BASE_IGBLAST_DIR/internal_data"
cp -r "$IGBLAST_RELEASE_DIR/internal_data" "$BASE_IGBLAST_DIR/"
rm -rf "$BASE_IGBLAST_DIR/optional_file"
cp -r "$IGBLAST_RELEASE_DIR/optional_file" "$BASE_IGBLAST_DIR/"

# Copy IMGT version info
cp $IMGT_VER_FILE $BASE_IGBLAST_DIR/

# Concat and prune imgt files then create igblast index
for species in human mouse;
do
    echo -e "\nGenerating igblast index file for $species:"
    echo -e "----------------------------------------------------------------"
    for chain in TR IG;
    do
        species_folder=${species_map[$species]}
        if [ $chain == "TR" ];
        then
            output_folder=${BASE_IGBLAST_DIR}/TCR/${species}/
            prefix=${species}_TCR
        else
            output_folder=${BASE_IGBLAST_DIR}/Ig/${species}/
            prefix=${species}_Ig
        fi

        mkdir -p $output_folder
        cd $output_folder
        echo "    Deleting existing index files"
        rm -f ./*
        for segment in V D J C;
        do
            output_prefix=${prefix}_${segment}
            echo "    Creating index for chain: $chain and segment: $segment"

            if [ $segment == 'C' ]; then
                grep -i -A 1 "^>${chain}" ${CONSTANT_REGION_DIR}/${species}_c_fragments.fasta > tmp
            else
                all_fastas=$(ls $IMGT_REF_DIR/$species_folder/$chain/*$segment.fasta)            
                # concat fastas
                cat $all_fastas > tmp
            fi

            # perl script to prune seq ids and remove duplicate fasta records based on sequence id
            perl $IGBLAST_RELEASE_DIR/bin/edit_imgt_file.pl tmp \
                | awk '/^>/{f=!d[$1];d[$1]=1}f' > "$output_prefix" \
                2> >(sed 's/^/    /' >&2)
            rm tmp

            # create igblast index
            $IGBLAST_RELEASE_DIR/bin/makeblastdb -parse_seqids -dbtype nucl -in "$output_prefix" \
                2>&1 | sed 's/^/    /'
            echo  "    Done creating index for chain: $chain and segment: $segment"
        done
    done
    echo -e "----------------------------------------------------------------"
done

# Clean up the temporary directory if it's present
if [ -n "${temporary_directory}" ]
then
    rm -rf ${temporary_directory}
fi

echo "Done updating igblast and all index files."
