#!/bin/bash

echo "Called $0 with arguments: ${@}" 1>&2

options=$(getopt -o '' -l transcript-length:,threads:,run-metadata:,gtf:,assay:,extra-seqs:,r2-bam:,quality-metrics:,exclude-intronic-reads -- "$@")
eval set -- "$options"

hasRunMetadata=false
hasAssay=false
hasGTF=false
hasR2Bam=false
hasQualMetrics=false
hasAllRequiredArguments=true

ALIGNMENT_ANALYSIS_ARGS=()
COUNT_CELLS_BIOPRODUCTS_ARGS=()

while true
do
case "$1" in
--transcript-length)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--transcript-length" "${1}" )
	;;
--threads)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--threads" "${1}" )
	;;
--run-metadata)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--run-metadata" "${1}" )
	COUNT_CELLS_BIOPRODUCTS_ARGS+=( "--run-metadata" "${1}" )
	hasRunMetadata=true
	;;
--assay)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--assay" "${1}" )
	hasAssay=true
	;;
--gtf)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--gtf" "${1}" )
	hasGTF=true
	;;
--extra-seqs)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--extra-seqs" "${1}" )
	;;
--r2-bam)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--r2-bam" "${1}" )
	hasR2Bam=true
	;;
--quality-metrics)
	shift
	ALIGNMENT_ANALYSIS_ARGS+=( "--quality-metrics" "${1}" )
	hasQualMetrics=true
	;;
--exclude-intronic-reads)
	ALIGNMENT_ANALYSIS_ARGS+=( "--exclude-intronic-reads" )
	;;
--)
	shift
	break
	;;
esac
shift
done <<< "$(echo)"

# Check that we have the required arguments
if ! ${hasRunMetadata}
then
	echo "Missing required flag '--run-metadata'" 1>&2
	hasAllRequiredArguments=false
fi

if ! ${hasAssay}
then
	echo "Missing required flag '--assay" 1>&2
	hasAllRequiredArguments=false
fi

if ! ${hasGTF}
then
	echo "Missing required flag '--gtf'" 1>&2
	hasAllRequiredArguments=false
fi

if ! ${hasR2Bam}
then
	echo "Missing required flag '--r2-bam'" 1>&2
	hasAllRequiredArguments=false
fi

if ! ${hasQualMetrics}
then
	echo "Missing required flag '--quality-metrics'" 1>&2
	hasAllRequiredArguments=false
fi

if ! ${hasAllRequiredArguments}
then
	echo "Exiting" 1>&2
	exit 1
fi

AlignmentAnalysis "${ALIGNMENT_ANALYSIS_ARGS[@]}"

AlignmentAnalysisExitCode=$?

if [ 0 -ne ${AlignmentAnalysisExitCode} ]
then
	exit ${AlignmentAnalysisExitCode}
fi

mkdir counts

echo "Getting bioproduct counts" 1>&2
# In parallel for each _Sorted_Valid_Reads file:
    # Extract 3rd column (bioproduct), run uniq, then counts lines
    #  Assumes sorted_valid_reads file is already sorted by bioproduct (done in AlignmentAnalysis)
# *bioproduct_count.txt will contain a single line for each sorted_valid_reads file, with a count of the number of bioproducts
find . -name "*_Sorted_Valid_Reads*.gz" -print0 | \
	xargs -0 -I {} -P $(nproc) sh -c \
	"gunzip -c '{}' | cut -d ',' -f 3 | uniq | wc -l > './counts/{}_bioproduct_count.txt'"

echo "Getting cellID read counts" 1>&2
# In parallel for each _Sorted_Valid_Reads file:
    # Extract the 1st column (cellID), sort numerically, uniq with count - will list how many times each cell ID was seen
    # We are only checking the first 50,000,000 reads from each file, but should be enough to identify putative cells
# *cellID_readCounts.txt will contain a line for each cellID, with # of reads and cellID, for each sorted_valid_reads
find . -name "*_Sorted_Valid_Reads*.gz" -print0 | \
	xargs -0 -I {} -P $(nproc) sh -c \
	"gunzip -c '{}' | head -50000000 | cut -d ',' -f 1 | sort -n | uniq -c > './counts/{}_cellID_readCounts.txt'"

# Combine all counts - mist_count_cells_bioproducts.py will look for these files specifically
cat ./counts/*_bioproduct_count.txt > all_bioproduct_counts.txt
cat ./counts/*_cellID_readCounts.txt > all_cellID_readCounts.txt
rm -rf counts/

mist_count_cells_bioproducts.py "${COUNT_CELLS_BIOPRODUCTS_ARGS[@]}"

if [ 0 -ne $? ]
then
	exit $?
fi

# This regex contains errors from tar that we're okay with
# ignoring so that the pipeline doesn't break
IGNORE_ERROR="^tar:.*(Removing leading|socket ignored|file changed as we read it)"

# Compress *.(log|Log.*.out)$ files
LogFiles=( `ls *.log *_Logs.tar.gz` )
tarMessages=$(tar czf "AlignmentAnalysis-logs.tar.gz" --remove-files ${LogFiles[@]} 2>&1)
tarExitCode=$?
if [ ${tarExitCode} -ne 0 ]
then
	seriousErrors=$(echo "${tarMessages}" | grep "^tar:" | grep -Ev "${IGNORE_ERROR:?}")
	if [ -n "${seriousErrors}" ]
	then
		echo "${seriousErrors}"
		exit ${tarExitCode}
	fi
fi

exit 0
