"""
ATAC Generate Fragments Files
"""

import argparse
import multiprocessing
import os
from pathlib import Path
from typing import List

from sinto import fragments

import mist.lib.MistLogger as logging
from mist.lib.MistShellUtils import shell_command_log_stderr


def cli():
    """
    ATAC Generate Fragments Files
    """
    parser = argparse.ArgumentParser()

    parser.add_argument('--library-name', required=True, help='The library name for this experiment')
    parser.add_argument(
        '--bam-fp', action='store', dest='bam_fp', required=True, type=Path, help='An ATAC sorted BAM file'
    )

    args = parser.parse_args().__dict__
    return args


def generate_fragments_tsv(
    output_fragments_tsv_fp: str, bam_fp: str, contigs_include_regex: str, threads: int = multiprocessing.cpu_count()
):
    """
    ATAC: Generate a tsv file containing fragment information using sinto.

    Args:
        output_fragments_tsv_fp:    Output file name for the fragments tsv that you want
        bam_fp:                     Sorted BAM file (expects index file in same directory)
        contigs_include_regex       Generate fragments from only the contigs that match this regex.
        threads:                    Used to specify the number of cpus that can be used by sinto
    """
    logging.info('ATAC: Generating Fragment tsv file...')
    logging.info(f'Using {threads} CPU threads...')

    # TODO: should any of these params be customizable?
    fragments.fragments(
        bam_fp,
        output_fragments_tsv_fp,
        min_mapq=30,
        nproc=threads,
        cellbarcode='CB',
        chromosomes=contigs_include_regex,
        readname_barcode=None,
        cells=None,
        max_distance=5000,
        min_distance=10,
        chunksize=500000,
        shifts=[4, -5],
        collapse_within=True,
    )


def post_process_fragments_tsv(
    output_fragments_tsv_fp: str, run_name: str, threads: int = multiprocessing.cpu_count()
) -> List[str]:
    """Process sinto's fragments tsv file to generate transposase_sites and fragment_ends bed files.

    Args:
        output_fragments_tsv_fp: The output fragments tsv file obtained by running sinto.
        run_name: Run name of the experiment. Used to construct output file names.
        threads: The number of threads to use for sorting and bgzf compression.
    """
    output_fragments_bed_fp = f'{run_name}_ATAC_Fragments.bed'
    output_fragments_bgzip = f'{run_name}_ATAC_Fragments.bed.gz'
    output_fragments_tabix = f'{run_name}_ATAC_Fragments.bed.gz.tbi'
    output_transposase_sites_left = f'{run_name}_ATAC_Transposase_Sites_Left.bed'
    output_transposase_sites_right = f'{run_name}_ATAC_Transposase_Sites_Right.bed'
    output_transposase_sites_merged = f'{run_name}_ATAC_Transposase_Sites.bed'
    output_transposase_sites_bgzip = f'{run_name}_ATAC_Transposase_Sites.bed.gz'
    output_transposase_sites_tabix = f'{run_name}_ATAC_Transposase_Sites.bed.gz.tbi'
    output_fragment_ends_bed = f'{run_name}_ATAC_Fragment_Ends.bed'
    output_fragment_ends_bgzip = f'{run_name}_ATAC_Fragment_Ends.bed.gz'
    output_fragment_ends_tabix = f'{run_name}_ATAC_Fragment_Ends.bed.gz.tbi'

    # list of final outputs to return
    final_outputs = [
        output_transposase_sites_bgzip,
        output_transposase_sites_tabix,
        output_fragments_bgzip,
        output_fragments_tabix,
        output_fragment_ends_bgzip,
        output_fragment_ends_tabix,
    ]

    # Sort by chrom, start and end
    logging.info('Getting fragments BED file...')
    cmd = f"""sort --parallel {threads} -S20G -k1,1V -k2,2n -k3,3n {output_fragments_tsv_fp} -s > {output_fragments_bed_fp}"""
    shell_command_log_stderr(cmd, shell=True)

    # Create two bed files with entries for just the base
    # at each end of the fragments.
    logging.info('Getting left and right transposase cut sites...')
    cutsites_bed_cmd = (
        f'<{output_fragments_bed_fp} awk -v OFS="\\t" '
        f"' {{ "
        f'print $1, $2, $2 + 1, $4, $5, "+" > "{output_transposase_sites_left}" ; '
        f'print $1, $3 - 1, $3, $4, $5, "-" '
        f"}} ' "
        f'| '
        f'sort -k1,1V -k2,2n -k3,3n -s > "{output_transposase_sites_right}"'
    )
    shell_command_log_stderr(cutsites_bed_cmd, shell=True)

    # Merge-sort the left and right cut sites to save space on the sort
    # Tee the merge into the fragends file containing strand info.
    logging.info('Merge sort left and right transposase cut sites...')
    merge_sort_cmd = (
        f'sort -k1,1V -k2,2n -k3,3n --merge -s '
        f'{output_transposase_sites_left} {output_transposase_sites_right} '
        f'| tee {output_fragment_ends_bed} '
        '| awk -v OFS="\\t" \' { print $1, $2, $3, $4, $5 } \' '
        f'> "{output_transposase_sites_merged}"'
    )
    shell_command_log_stderr(merge_sort_cmd, shell=True)

    logging.info('bgzip transposase cut sites...')
    shell_command_log_stderr(
        f'bgzip -f {output_transposase_sites_merged} -@ {threads}',
        shell=True,
    )

    logging.info('tabix transposase cut sites...')
    shell_command_log_stderr(
        f'tabix -p bed {output_transposase_sites_bgzip}',
        shell=True,
    )

    logging.info('bgzip fragments BED file...')
    shell_command_log_stderr(
        f'bgzip -f {output_fragments_bed_fp} -@ {threads}',
        shell=True,
    )

    logging.info('tabix fragments BED file...')
    shell_command_log_stderr(
        f'tabix -p bed {output_fragments_bgzip}',
        shell=True,
    )

    logging.info('bgzip fragment ends BED file...')
    shell_command_log_stderr(
        f'bgzip -f {output_fragment_ends_bed} -@ {threads}',
        shell=True,
    )

    logging.info('tabix fragment ends BED file...')
    shell_command_log_stderr(
        f'tabix -p bed {output_fragment_ends_bgzip}',
        shell=True,
    )

    # remove temporary files
    for fp in [output_transposase_sites_left, output_transposase_sites_right]:
        os.remove(fp)

    logging.info('ATAC: Done Generating Fragments Files')

    return final_outputs


def main():
    """Main method to generate ATAC Generate Fragments Files"""
    generate_fragments_tsv(**cli())


if __name__ == '__main__':
    main()
