#!/usr/bin/env Rscript

library(Matrix)
library(Signac)
library(Seurat)
library(patchwork)
library(ggplot2)
library(jsonlite)
library(rtracklayer)
library(R.utils)
library(zip)
library(Matrix)

source(file.path(.Platform$file.sep, Sys.getenv(c("RHAPSODY_HOME")), "rscript", "Mist_R_Logger.R"))

atac_compile_results <- function(opt) {

    suppressWarnings({

        # For local testing
        #opt$base_name <- "Local_Testing"
        #opt$initial_seurat <- "/data/ATAC_Cell_By_Peak/Local_Testing_Initial_Seurat.rds"
        #opt$cell_order <- "/data/ATAC_Cell_By_Peak/Test_cell_order.json"
        #opt$unified_metrics <- "/data/ATAC_Compile_Results/UnifiedMetrics.json"
        #opt$total_fragment_metrics <- "/data/ATAC_Cell_By_Peak/Local_Testing_Total_Fragment_Metrics.json"
        #opt$fragments <- "/data/ATAC_Compile_Results/AM053_3_S3_chr21.fragments.bed.gz"
        #opt$input_metrics_tar <- "/data/ATAC_Compile_Results/metrics-files.tar"


        required_inputs <- list(
            "metadata json" = opt$run_metadata,
            "ATAC initial Seurat object" = opt$initial_seurat,
            "putative cell order json" = opt$cell_order,
            "unified metrics json" = opt$unified_metrics,
            "feature_name_map json" = opt$feature_name_json,
            "ATAC fragments file" = opt$fragments,
            "Metrics file" = opt$input_metrics_tar,
            "ATAC total fragment metrics json" = opt$total_fragment_metrics,
            "Genome size" = opt$genome_size
        )

        for (input_name in names(required_inputs)) {
            if (is.na(required_inputs[[input_name]])) {
                log_error("The {input_name} is a required input. Skipping ATAC_Compile_Results node. ")
                stop()
            }
        }

        # parse metadata json
        run_metadata <- jsonlite::fromJSON(opt$run_metadata)
        putative_cell_call <- as.integer(run_metadata$Cell_Calling_Data)

        # parse feature_map_json
        feature_name_map <- jsonlite::fromJSON(opt$feature_name_json)

        # Extract metrics tar.gz
        metrics_dir <- "Metrics-files"
        untar(opt$input_metrics_tar)

        # create a output directory inside the metrics folder for atac metrics
        atac_metrics_dir <- file.path(metrics_dir, "ATAC")
        if (!dir.exists(atac_metrics_dir)) {
            log_info("Creating ATAC metrics directory: {atac_metrics_dir} ...")
            dir.create(atac_metrics_dir)
        }

        # load the initial seurat object
        log_info("Loading initial seurat object...")
        seurat_object <- readRDS(opt$initial_seurat)

        # update the fragments object with the new file path
        log_info("Updating fragment object...")
        seurat_object <- update_fragments_object(seurat_object, c(opt$fragments))

        # load the unified metrics
        log_info("Loading unified read quality metrics...")
        unified_metrics_file <- opt$unified_metrics
        unified_metrics <- jsonlite::fromJSON(unified_metrics_file)

        # copy the unified metrics file to Atac_metrics folder
        file.copy(unified_metrics_file, atac_metrics_dir)

        # load the fragment metrics
        log_info("Loading fragment metrics...")
        total_fragment_metrics <- jsonlite::fromJSON(opt$total_fragment_metrics)

        # load the cell order json
        log_info("Loading cells...")
        cell_order <- jsonlite::fromJSON(opt$cell_order)
        all_cells <- cell_order$Unfiltered
        putative_cells <- cell_order$Filtered
        #log_info("Cell Labels from GDT... {head(all_cells)}")
        #log_info("Putative Cells from GDT.. {head(putative_cells)}")

        # We are calling cells based on Tn5sites in peaks
        # rather than raw counts of all Tn5sites
        seurat_object$ATAC_called_cell <- (
            Cells(seurat_object) %in% putative_cells
        )

        # Compute the gene activity matrix
        log_info("   Saving a cell x gene matrix containing the Gene Activity counts...")
        gene_activity_matrix <- GeneActivity(seurat_object, extend.upstream = 2000, extend.downstream = 0)
        gene_activity_matrix <- gene_activity_matrix[order(rownames(gene_activity_matrix)), ]
        seurat_object[["gene_activity"]] <- CreateAssayObject(counts = gene_activity_matrix)

        # Get the seurat object for just the putative cells
        log_info("Number of cell labels = {length(Cells(seurat_object))}")
        if (length(putative_cells) > 0) {
            putative_seurat_object <- subset(
                x = seurat_object,
                subset = (ATAC_called_cell == T),
            )
            exist_putative_cells <- TRUE
            log_info("Number of putative cells = {length(Cells(putative_seurat_object))}")
        } else {
            putative_seurat_object <- NA
            exist_putative_cells <- FALSE
        }

        if (exist_putative_cells) {
            # plot the TSS Enrichment
            log_info("Plotting TSS enrichment plot...")
            # Read contig lengths for coordinate validation
            contig_lengths <- read_contig_lengths(opt$genome_contig_lengths)
            tss_putative_peak_value <- plot_tss_enrichment(putative_seurat_object, atac_metrics_dir, "Putative_Cells", contig_lengths)

            # plot the Fragment Length distribution
            log_info("Plotting fragment length distribution plot...")
            plot_fragment_dist(putative_seurat_object, atac_metrics_dir, "Putative_Cells")

            # read bioproduct putative data table
            log_info("Try reading bioproduct putative data table...")
            read_biop_putative_dt_result <- read_biop_putative_data_table(opt$biop_putative_data_table)
            exists_biop_gene_by_cell <- read_biop_putative_dt_result$exists_biop_gene_by_cell
            biop_gene_by_cell <- read_biop_putative_dt_result$biop_gene_by_cell
            rm(read_biop_putative_dt_result)

            # cell annotation
            log_info("Annotating cell types...")
            tryCatch(
                {
                    cell_annotation(putative_seurat_object, exists_biop_gene_by_cell, biop_gene_by_cell, putative_cell_call, atac_metrics_dir)
                },
                error = function(e) {
                    log_error(paste("... Failed cell annotation: ", e))
                }
            )

            # plot dimensionality reduction
            if (putative_cell_call >= 3) {
                log_info("Performing dimensionality reduction...")
                tryCatch(
                    {
                        perform_dimReduction(
                            putative_seurat_object,
                            opt$cell_order_subsampled,
                            exists_biop_gene_by_cell,
                            biop_gene_by_cell,
                            putative_cell_call,
                            metrics_dir
                        )
                    },
                    error = function(e) {
                        log_error(paste("Failed dimensionality reduction: ", e))
                    }
                )
            } else {
                log_info("Skipping dimensionality reduction here, as it was previously done in GetDataTable node")
            }

        } else {
            tss_putative_peak_value <- c("TSS_Enrichment_Score" = 0)
        }

        # get the fragment metrics
        log_info("Getting fragment metrics...")
        fragment_metrics <- get_fragment_metrics(seurat_object, unified_metrics, total_fragment_metrics, opt$fragments)
        #log_info("Fragment metrics... {toJSON(fragment_metrics, pretty=TRUE)}")

        # get the peak metrics
        log_info("Getting peak metrics...")
        peak_metrics <- get_peak_metrics(seurat_object, unified_metrics, total_fragment_metrics, opt$fragments, opt$genome_size)
        #log_info("Peak metrics... {toJSON(peak_metrics, pretty=TRUE)}")

        # get the putative cell metrics
        log_info("Getting putative cell metrics...")
        cell_metrics <- get_cell_metrics(seurat_object, putative_seurat_object, unified_metrics, total_fragment_metrics, exist_putative_cells)
        #log_info("Putative cell metrics... {toJSON(cell_metrics, pretty=TRUE)}")

        # output the final metrics
        log_info("Getting final metrics json...")
        final_metrics <- get_final_metrics(fragment_metrics, peak_metrics, cell_metrics, tss_putative_peak_value)
        log_info("Final metrics... {toJSON(final_metrics, pretty=TRUE)}")

        atac_metrics_json_file <- file.path(atac_metrics_dir, paste0(seurat_object@project.name, "_ATAC_Metrics.json", sep = ""))
        log_info("Saving ATAC Metrics JSON: {atac_metrics_json_file}")
        write_json(final_metrics, atac_metrics_json_file, pretty=TRUE, auto_unbox=TRUE)

        # generate the unfiltered cell by peak matrix
        unfiltered_peak_mex_zipname <- paste0(seurat_object@project.name, "_ATAC_Cell_by_Peak_Unfiltered_MEX")
        log_info("Writing ATAC unfiltered cell x peak datatable to ./{unfiltered_peak_mex_zipname}.zip")
        # Rename peaks of the chromatin assay to use original seqnames. Don't do this inside v3 Seurat objects.
        peaks_counts <- seurat_object@assays$peaks@counts
        row.names(peaks_counts) <- Signac::GRangesToString(seurat_object@assays$peaks@ranges)
        write_MEX(peaks_counts, unfiltered_peak_mex_zipname, "Peaks")

        # generate the unfiltered cell by gene matrix
        unfiltered_gene_activity_mex_zipname <- paste0(seurat_object@project.name, "_ATAC_Cell_by_Gene_Activity_Unfiltered_MEX")
        log_info("Writing ATAC unfiltered cell x gene datatable to ./{unfiltered_gene_activity_mex_zipname}.zip")
        write_MEX(seurat_object@assays$gene_activity@counts, unfiltered_gene_activity_mex_zipname, "Gene_Activity", feature_name_map)

        log_info("...Done writing ATAC unfiltered datatables")

        # generate the cell by peak matrix with putative cells
        if (exist_putative_cells) {
            putative_peak_mex_zipname <- paste0(seurat_object@project.name, "_ATAC_Cell_by_Peak_MEX")
            log_info("Writing ATAC putative cell x peak datatable to ./{putative_peak_mex_zipname}.zip")
            # Rename peaks of the chromatin assay to use original seqnames. Don't do this inside v3 Seurat objects.
            peaks_counts <- putative_seurat_object@assays$peaks@counts
            row.names(peaks_counts) <- Signac::GRangesToString(putative_seurat_object@assays$peaks@ranges)
            write_MEX(peaks_counts, putative_peak_mex_zipname, "Peaks")

            putative_gene_activity_mex_zipname <- paste0(seurat_object@project.name, "_ATAC_Cell_by_Gene_Activity_MEX")
            log_info("Writing ATAC putative cell x peak datatable to ./{putative_gene_activity_mex_zipname}.zip")
            write_MEX(putative_seurat_object@assays$gene_activity@counts, putative_gene_activity_mex_zipname, "Gene_Activity", feature_name_map)
            log_info("...Done writing ATAC putative datatable")

            log_info("Changing the file path of fragment file")
            new_fragment_file_path <- basename(opt$fragments)
            file.copy(opt$fragments, getwd())
            file.copy(paste0(opt$fragments, ".tbi"), getwd())
            putative_seurat_object <- update_fragments_object(putative_seurat_object, c(new_fragment_file_path))

            output_file <- paste0(seurat_object@project.name, "_ATAC_Seurat.rds", sep = "")
            log_info("Writing ATAC Seurat object to disk: {output_file}")
            saveRDS(putative_seurat_object, file = output_file)
        }

        # tar archive the metrics folder
        log_info("Zipping metrics folder...")
        tar("metrics-files.tar.gz", metrics_dir, compression = 'gzip')
    })
}


write_MEX <- function(counts_matrix, zipname, feature_type, feature_name_map = NULL) {

    barcodes_file_name <- "atac-barcodes.tsv"
    features_file_name <- "atac-features.tsv"
    matrix_file_name <- "atac-matrix.mtx"

    # re-order the matrix based on the cell index
    counts_matrix <- counts_matrix[, order(as.numeric(colnames(counts_matrix)))]

    # barcodes
    write(colnames(counts_matrix), file=barcodes_file_name)

    # features
    feature_lines <- sapply(rownames(counts_matrix), function(this_featureName) {
        if (is.null(feature_name_map) || feature_type == "Peaks") {
            this_gene_id <- gsub('(.*)-([0-9]+)-([0-9]+)$', '\\1:\\2-\\3', this_featureName)
            this_gene_name <- this_gene_id
        } else if (feature_type == "Gene_Activity") {
            # Check that we have seen this feature in the GTF, sometimes the feature names can have things appended.
            if (! is.null(feature_name_map[[this_featureName]])) {
                gene_id <- feature_name_map[[this_featureName]][["gene_id"]]
                gene_name <- feature_name_map[[this_featureName]][["gene_name"]]
                this_gene_id <- paste0(gene_id, "_activity")
                this_gene_name <- paste0(gene_name, "_activity")
            } else {
                # If we have a feature map but haven't seen this featureName, note that in the log.
                #log_info(paste0("Feature ", this_featureName, " not found in feature_name_map."))
                this_gene_id <- paste0(this_featureName, "_activity")
                this_gene_name <- paste0(this_featureName, "_activity")
            }
        }
        paste0(c(this_gene_id, this_gene_name, feature_type), collapse = "\t")
    })

    writeLines(feature_lines, features_file_name)

    # matrix
    # writeMM requires a sparse matrix. The chromvar matrix can be dense, so
    # we must convert it to a sparse format before writing.
    if (!is(counts_matrix, "sparseMatrix")) {
        counts_matrix <- as(counts_matrix, "sparseMatrix")
    }
    writeMM(counts_matrix, file = "atac-matrix.mtx")

    # compress the files here
    system("pigz -f atac-barcodes.tsv")
    system("pigz -f atac-features.tsv")
    system("pigz -f atac-matrix.mtx")

    # Put all files into a zip folder
    files_to_zip <- c(paste0(barcodes_file_name, ".gz"),
                     paste0(features_file_name, ".gz"),
                     paste0(matrix_file_name, ".gz"))

    zip(zipfile=paste0(zipname, '.zip'),
        files=files_to_zip)

    file.remove(paste0(barcodes_file_name, ".gz"))
    file.remove(paste0(features_file_name, ".gz"))
    file.remove(paste0(matrix_file_name, ".gz"))

}

update_fragments_object <- function(seurat_object, fragments_file_arr) {
    # The Fragments object stores the path to the fragments file
    # The Fragments object is initially created in the ATAC_Cell_by_Peak node
    # The file paths can differ from node to node, so update the fragments file path here

    # get list of fragment objects
    frags <- Fragments(seurat_object)

    # remove fragment information from object
    Fragments(seurat_object) <- NULL

    # update paths
    for (i in seq_along(frags)) {
        frags[[i]] <- update_frag_path(frags[[i]], new.path = fragments_file_arr[i])
    }

    # assign updated list back to the object
    Fragments(seurat_object) <- frags

    return(seurat_object)
}


get_fragment_metrics <- function(unfiltered_seurat, unified_metrics, total_fragment_metrics, fragments_file) {

    # get fragment metrics

    read_pairs_nuclear_fragments <- unified_metrics[["Alignment_Categories"]][["Combined_stats"]][["Total_Nuclear_Fragment_Reads"]]
    pct_read_pairs_nuclear_fragments <- unified_metrics[["Alignment_Categories"]][["Combined_stats"]][["Pct_Nuclear_Fragments"]]
    total_nonduplicate_fragments <- total_fragment_metrics[["Total_Fragments"]]
    total_duplicate_nuclear_fragments <- read_pairs_nuclear_fragments - total_nonduplicate_fragments
    pct_duplicate_nuclear_fragments <- round(total_duplicate_nuclear_fragments / read_pairs_nuclear_fragments * 100, 2)

    nonduplicate_cell_label_fragments <- sum(unfiltered_seurat$fragments)
    pct_nonduplicate_cell_label_fragments <- round(nonduplicate_cell_label_fragments / total_nonduplicate_fragments * 100, 2)
    pct_nonduplicate_fragments_NFR_lengths <- round(total_fragment_metrics[["Total_Nucleosome_Free_Fragments"]] / total_nonduplicate_fragments * 100, 2)
    pct_nonduplicate_fragments_mononucleosomal_lengths <- round(total_fragment_metrics[["Total_Mononucleosomal_Fragments"]] / total_nonduplicate_fragments * 100, 2)

    total_raw_read_pairs <- unified_metrics[["Sequencing_Quality"]][["Combined_stats"]][["Total_Reads_in_FASTQ"]]
    reads_pairs_aligned_properly_paired <- unified_metrics[["Alignment_Categories"]][["Combined_stats"]][["Reads_Aligned_Properly_Paired"]]
    pct_nonduplicate_fragments_input_readpairs <- round(total_nonduplicate_fragments / total_raw_read_pairs * 100, 2)
    pct_nonduplicate_fragments_proper_readpairs <- round(total_nonduplicate_fragments / reads_pairs_aligned_properly_paired * 100, 2)

    fragment_metrics <- list(
        "Total_Nuclear_Fragment_Reads" = read_pairs_nuclear_fragments,
        "Pct_Nuclear_Fragments" = pct_read_pairs_nuclear_fragments,
        "Total_Nonduplicate_Fragments" = total_nonduplicate_fragments,
        "Pct_Duplicate_Fragments" = pct_duplicate_nuclear_fragments,
        "Nonduplicate_Fragments_from_Cell_Labels" = nonduplicate_cell_label_fragments,
        "Pct_Nonduplicate_Fragments_from_Cell_Labels" = pct_nonduplicate_cell_label_fragments,
        "Pct_Nonduplicate_Fragments_to_Input_Reads" = pct_nonduplicate_fragments_input_readpairs,
        "Pct_Nonduplicate_Fragments_to_Proper_Reads" = pct_nonduplicate_fragments_proper_readpairs,
        "Pct_Nonduplicate_Fragments_with_NFR_Lengths" = pct_nonduplicate_fragments_NFR_lengths,
        "Pct_Nonduplicate_Fragments_with_Mononucleosomal_Lengths" = pct_nonduplicate_fragments_mononucleosomal_lengths
    )

    return(fragment_metrics)
}


get_peak_metrics <- function(unfiltered_seurat, unified_metrics, total_fragment_metrics, fragments_file, genome_size) {
    # get peak metrics

    total_nonduplicate_fragments <- total_fragment_metrics[["Total_Fragments"]]
    total_raw_read_pairs <- unified_metrics[["Sequencing_Quality"]][["Combined_stats"]][["Total_Reads_in_FASTQ"]]
    total_peaks <- length(unfiltered_seurat@assays[["peaks"]]@ranges)
    fraction_transposase_sites_in_peaks <- round(sum(unfiltered_seurat$transposase_sites_in_peaks) / total_fragment_metrics[["Total_Transposase_Sites"]] * 100, 2)
    fraction_fragments_overlapping_peaks <- round(sum(unfiltered_seurat$fragments_overlapping_peaks) / total_nonduplicate_fragments * 100, 2)
    total_peaks_width <- sum(GenomicRanges::width(GenomicRanges::reduce(unfiltered_seurat@assays[["peaks"]]@ranges)))
    genome_in_peaks <- round((total_peaks_width / as.numeric(genome_size)) * 100, 2)

    peak_metrics <- list(
        "Total_Nonduplicate_Fragments" = total_nonduplicate_fragments,
        "Total_Transposase_Sites" = total_fragment_metrics[["Total_Transposase_Sites"]],
        "Total_Peaks" = total_peaks,
        "Total_Peak_Basepairs" = total_peaks_width,
        "Pct_Genome_Within_Peaks" = genome_in_peaks,
        "Fraction_of_Fragments_Overlapping_Peaks" = fraction_fragments_overlapping_peaks,
        "Fraction_of_Transposase_Sites_in_Peaks" = fraction_transposase_sites_in_peaks
    )

    return(peak_metrics)
}


get_cell_metrics <- function(unfiltered_seurat, filtered_seurat, unified_metrics, total_fragment_metrics, exist_putative_cells) {

    `Total_Putative_Cells` <- 0
    `Pct_Reads_from_Putative_Cells` <- 0
    `Mean_Reads_per_Cell` <- 0
    `Median_Reads_per_Cell` <- 0
    `Median_Pct_Nonredundant_Reads_per_Cell` <- 0

    `Total_Nonduplicate_Fragments` <- 0
    `Median_Nonduplicate_Fragments_per_Cell` <- 0
    `Total_Nonduplicate_Fragments_from_Putative_Cells` <- 0
    `Pct_Nonduplicate_Fragments_from_Putative_Cells` <- 0

    `Pct_Cellular_Fragments_Overlapping_Peaks` <- 0
    `Pct_Cellular_Transposase_Sites_in_Peaks` <- 0

    if (exist_putative_cells) {
        # Cells
        `Total_Putative_Cells` <- length(Cells(filtered_seurat))
        `Total_Reads_in_FASTQ` <- unified_metrics[["Sequencing_Quality"]][["Combined_stats"]][["Total_Reads_in_FASTQ"]]

        # Reads per putative cell
        `Pct_Reads_from_Putative_Cells` <- round(sum(filtered_seurat$read_pairs) / `Total_Reads_in_FASTQ` * 100, 2)
        `Mean_Reads_per_Cell` <- round(`Total_Reads_in_FASTQ` / `Total_Putative_Cells`, 2)
        `Median_Reads_per_Cell` <- median(filtered_seurat$read_pairs)
        `Median_Pct_Nonredundant_Reads_per_Cell` <- round(median(filtered_seurat$ratio_fragments_to_readpairs) * 100, 2)

        # Fragments per putative cell
        `Total_Nonduplicate_Fragments` <- total_fragment_metrics[["Total_Fragments"]]
        `Median_Nonduplicate_Fragments_per_Cell` <- median(filtered_seurat$fragments)
        `Total_Nonduplicate_Fragments_from_Putative_Cells` <- sum(filtered_seurat$fragments)
        `Pct_Nonduplicate_Fragments_from_Putative_Cells` <- round(sum(filtered_seurat$fragments) / `Total_Nonduplicate_Fragments` * 100, 2)

        # Peaks per putative cell
        `Pct_Cellular_Transposase_Sites_in_Peaks` <- round(sum(filtered_seurat$transposase_sites_in_peaks) / sum(filtered_seurat$transposase_sites) * 100, 2)
        `Pct_Cellular_Fragments_Overlapping_Peaks` <- round(sum(filtered_seurat$fragments_overlapping_peaks) / sum(filtered_seurat$fragments) * 100, 2)
    }

    cell_metrics <- list(
        "Total_Putative_Cells" = `Total_Putative_Cells`,
        "Pct_Reads_from_Putative_Cells" = `Pct_Reads_from_Putative_Cells`,
        "Mean_Reads_per_Cell" = `Mean_Reads_per_Cell`,
        "Median_Reads_per_Cell" = `Median_Reads_per_Cell`,
        "Median_Pct_Nonredundant_Reads_per_Cell" = `Median_Pct_Nonredundant_Reads_per_Cell`,

        "Total_Nonduplicate_Fragments" = `Total_Nonduplicate_Fragments`,
        "Median_Nonduplicate_Fragments_per_Cell" = `Median_Nonduplicate_Fragments_per_Cell`,
        "Total_Nonduplicate_Fragments_from_Putative_Cells" = `Total_Nonduplicate_Fragments_from_Putative_Cells`,
        "Pct_Nonduplicate_Fragments_from_Putative_Cells" = `Pct_Nonduplicate_Fragments_from_Putative_Cells`,

        "Pct_Cellular_Fragments_Overlapping_Peaks" = `Pct_Cellular_Fragments_Overlapping_Peaks`,
        "Pct_Cellular_Transposase_Sites_in_Peaks" = `Pct_Cellular_Transposase_Sites_in_Peaks`
    )

    return(cell_metrics)
}

get_final_metrics <- function(fragment_metrics, peak_metrics, cell_metrics, tss_scores) {

    # add TSS enrichment score to peaks
    peak_metrics <- c(
        peak_metrics,
        tss_scores
    )

    final_metrics <- list(
        "Fragments" = fragment_metrics,
        "Peaks" = peak_metrics,
        "Cells" = cell_metrics
    )

    return(final_metrics)
}

# Helper function to read chromosome/contig lengths
read_contig_lengths <- function(contig_lengths_file) {
    if (is.na(contig_lengths_file) || !file.exists(contig_lengths_file)) {
        log_error("No contig lengths file provided or file does not exist")
        return(NULL)
    }

    tryCatch({
        # Read the contig lengths file (JSON format from RunQualCLAlign.py)
        contig_data <- jsonlite::fromJSON(contig_lengths_file)

        if (is.list(contig_data)) {
            contig_lengths <- unlist(contig_data)
        } else {
            log_error("DEBUG read_contig_lengths: Expected JSON to be a list/object, got: {class(contig_data)}")
            stop("Invalid JSON format in contig lengths file: {contig_lengths_file}")
        }

        return(contig_lengths)
    }, error = function(e) {
        log_error("Failed to read contig lengths file: {e$message}")
        log_error("DEBUG read_contig_lengths: Full error details: {toString(e)}")
        return(NULL)
    })
}

## Adapted from Signac function `TSSEnrichment`.
#' Compute bulk TSS enrichment score
#'
#' Compute the transcription start site (TSS) enrichment score for each cell,
#' as defined by ENCODE:
#' \url{https://www.encodeproject.org/data-standards/terms/}.
#'
#' The normalized values around TSSs will be returned as a named numeric vector.
#'
#' @param object A Seurat object
#' @param assay Name of assay to use
#' @param tss.positions A GRanges object containing the TSS positions. If NULL,
#' use the genomic annotations stored in the assay.
#' @param n Number of TSS positions to use. This will select the first _n_
#' TSSs from the set. If NULL, use all TSSs (slower).
#' @param cells A vector of cells to include. If NULL (default), use all cells
#' in the object
#' @param verbose Display messages
#' @param region_extension Distance extended upstream and downstream from TSS
#' in which to calculate enrichment and background.
#'
#' @importFrom IRanges IRanges
#' @importFrom GenomicRanges start width strand
#' @importFrom SeuratObject DefaultAssay
#'
#' @return Returns a \code{\link[SeuratObject]{Seurat}} object
#' @export
#' @concept qc
#' @examples
#' \dontrun{
#' fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac")
#' Fragments(atac_small) <- CreateFragmentObject(
#'   path = fpath,
#'   cells = colnames(atac_small),
#'   tolerance = 0.5
#' )
#' TSSEnrichment(object = atac_small)
#' }
bulkTSSEnrichment <- function(
  object,
  tss.positions = NULL,
  n = NULL,
  assay = NULL,
  cells = NULL,
  verbose = FALSE,
  region_extension = 2000,
  contig_lengths = NULL
) {
  assay <- Signac:::SetIfNull(x = assay, y = DefaultAssay(object = object))
  if (!inherits(x = object[[assay]], what = "ChromatinAssay")) {
    log_error("The requested assay is not a ChromatinAssay")
    stop()
  }
  # first check that fragments are present
  frags <- Fragments(object = object[[assay]])
  if (length(x = frags) == 0) {
    log_error("No fragment files present in assay")
    stop()
  }
  if (is.null(x = tss.positions)) {
    if (verbose) {
        log_info("Extracting TSS positions")
    }
    # work out TSS positions from gene annotations
    # These annotations must have a "gene_biotype" column.
    annotations <- Annotation(object = object[[assay]])
    if (is.null(x = annotations)) {
        log_error("No fragment files present in assay")
        stop()
    }
    tss.positions <- GetTSSPositions(ranges = annotations)
  } else {
    log_info("Using provided TSS positions: {length(tss.positions)}")
  }

  if (!is.null(x = n)) {
    if (n > length(x = tss.positions)) {
      n <- length(x = tss.positions)
    }
    tss.positions <- tss.positions[1:n, ]
  }

  tss.positions <- Extend(
    x = tss.positions,
    upstream = region_extension,
    downstream = region_extension,
    from.midpoint = TRUE
  )

  # Check for invalid coordinates before any filtering
  invalid_coords <- start(tss.positions) < 1
  if (any(invalid_coords)) {
    log_warn("Found {sum(invalid_coords)} TSS regions with start positions < 1")
    log_warn("Example invalid start positions: {paste(start(tss.positions)[invalid_coords][1:min(3, sum(invalid_coords))], collapse=', ')}")
    # Correct negative start positions to 1
    start(tss.positions)[invalid_coords] <- 1
    log_warn("Corrected negative start coordinates to 1, total TSS positions: {length(tss.positions)}")
  }

  # Correct TSS regions beyond chromosome ends if contig lengths are available
  if (!is.null(contig_lengths)) {
    tss_chroms <- as.character(seqnames(tss.positions))
    tss_ends <- end(tss.positions)

    # Vectorized approach: check if any TSS regions extend beyond chromosome ends and correct them
    chr_lengths <- contig_lengths[tss_chroms]  # Get corresponding lengths for each TSS chromosome
    beyond_end <- !is.na(chr_lengths) & tss_ends > chr_lengths  # Find positions beyond chromosome ends

    if (any(beyond_end)) {
      corrected_count <- sum(beyond_end)
      log_warn("Correcting {corrected_count} TSS regions extending beyond chromosome ends")

      # Vectorized correction: set end positions to chromosome lengths where needed
      end(tss.positions)[beyond_end] <- chr_lengths[beyond_end]
      log_info("Total TSS positions after correction: {length(tss.positions)}")
    }
  }

  # Filter TSS regions to have consistent width AFTER correcting invalid coordinates
  region_widths <- width(tss.positions)
  expected_width <- (2 * region_extension) + 1

  # Find regions with the expected width
  if (any(region_widths != expected_width)) {
    log_warn("Filtering out TSS regions due to incorrect width")
    valid_width_regions <- region_widths == expected_width
    original_tss_count <- length(tss.positions)
    filtered_out_count <- sum(!valid_width_regions)
    kept_count <- sum(valid_width_regions)
    log_warn("TSS region filtering results:")
    log_warn("  - Original TSS regions: {original_tss_count}")
    log_warn("  - Regions with expected width ({expected_width} bp): {kept_count}")
    log_warn("  - Regions filtered out due to incorrect width: {filtered_out_count}")
    log_warn("  - Percentage kept: {round(kept_count / original_tss_count * 100, 2)}%")
    # Filter to only regions with consistent width
    tss.positions <- tss.positions[valid_width_regions]

    if (kept_count == 0) {
      log_warn("No TSS regions have the expected width of {expected_width} bp!")
      log_warn("Available widths: {paste(unique(region_widths), collapse=', ')}")
      log_warn("Returning default TSS enrichment values - TSS plot will be empty")

      # Return a flat normalized region vector with length equal to expected region width
      # This will result in a flat line at y=1 in the TSS plot, indicating no enrichment
      default_region <- rep(1, expected_width)
      names(default_region) <- seq(-region_extension, region_extension)
      return(default_region)
    }
  }

  # TODO this could probably be a lot more efficient
  cutmatrix <- Signac:::CreateRegionPileupMatrix(
    object = object,
    regions = tss.positions,
    assay = assay,
    cells = cells,
    verbose = verbose
  )
  # Collapse the cut matrix for all cells together.
  regionsums <- colSums(cutmatrix)

  # compute mean read counts in 100 bp at each flank for each cell
  # (200 bp total averaged)
  if (verbose) {
    log_info("Computing mean insertion frequency in flanking regions")
  }
  total_region_length <- (2 * region_extension) + 1
  right_flank <- seq.int(from = (total_region_length - 99), to = total_region_length)
  normalizer <- mean(x = regionsums[c(1:100, right_flank)])

  # compute fold change at each position relative to flanking mean or min
  # (flanks should start at 1)
  if (verbose) {
    log_info("Normalizing TSS score")
  }

  normalizedregion <- regionsums / normalizer
  return(normalizedregion)
}

plot_tss_enrichment <- function(seurat_object, output_dir, cell_type, contig_lengths = NULL) {
    log_info("Starting TSS enrichment calculation for {seurat_object@project.name}")
    # Get TSS positions with potential debug info
    log_info("Getting TSS positions from annotations...")
    tss_positions <- Signac::GetTSSPositions(ranges = Annotation(seurat_object), biotypes = NULL)
    log_info("Number of TSS positions: {length(tss_positions)}")
    # Calculate bulk TSS enrichment
    positionEnrichment <- bulkTSSEnrichment(seurat_object, tss.positions = tss_positions, region_extension = 2000, contig_lengths = contig_lengths)
    peak_value <- round(max(positionEnrichment), 2)
    log_info("Calculated peak TSS enrichment value: {peak_value}")

    # Plot the TSS Enrichment
    tss_plot <- ggplot(
            data = data.frame(position = as.numeric(names(positionEnrichment)), norm.value = positionEnrichment),
            mapping = aes(x = position, y = norm.value)
        ) +
            geom_line(stat = "identity", linewidth = 0.2) +
            xlab("Distance from TSS (bp)") +
            ylab(label = "Normalized Tn5 activity") +
            theme_classic() +
            theme(
              legend.position = "none",
              strip.background = element_blank()
            ) +
            labs(title = paste(seurat_object@project.name, "TSS Enrichment"),
                     subtitle = paste("Peak height:", peak_value)) + NoLegend()

    output_file <- paste(seurat_object@project.name, "_", cell_type, "_TSS_Plot.png", sep = "")
    ggsave(filename = file.path(output_dir, output_file), plot = tss_plot, width = 7, height = 7, units = "in", bg= "white")

    return(c("TSS_Enrichment_Score" = peak_value))
}


plot_fragment_dist <- function(seurat_object, output_dir, cell_type) {

    fragment_path <- GetFragmentData(object = Fragments(seurat_object)[[1]], slot = "path")

    # Count all the fragments seen of each length, using very fast awk pipe
    fragment_length_matrix <- stringr::str_split_fixed(
        system2(
            command = "zcat",
            args = c(
                fragment_path,
                " | ",
                "awk '{ lengths[$3 - $2]++; } END { for (i in lengths) print i, lengths[i] }'"
            ),
            stdout = TRUE
        ),
        pattern = " ",
        n = 2
    )

    # Convert to numeric with no calculations
    class(fragment_length_matrix) <- "numeric"

    # Convert to data.frame for ggplot2
    fragment_length_counts <- as.data.frame(fragment_length_matrix)
    colnames(fragment_length_counts) <- c("fragment_length", "Count")

    # Plot using geom_col instead of geom_hist because we already have counts
    frag_dist_plot <- ggplot(fragment_length_counts, aes(x = fragment_length, y = Count)) +
        geom_col(width = 1) +
        xlim(c(0, 800)) +
        theme_classic() +
        theme(
            legend.position = "none",
            strip.background = element_blank()
        ) +
        xlab("Fragment length (bp)") +
        ylab("Count") +
        ggtitle(paste(seurat_object@project.name, "Fragment Length Distribution")) +
        NoLegend()

    output_file <- paste(seurat_object@project.name, "_", cell_type, "_Frag_Dist_Plot.png", sep = "")
    ggsave(filename = file.path(output_dir, output_file), plot=frag_dist_plot, width = 7, height = 7, units = "in", bg= "white")
}

read_biop_putative_data_table <- function(biop_putative_data_table) {

    exists_biop_gene_by_cell <- FALSE
    biop_gene_by_cell <- NA

    if (!is.na(biop_putative_data_table)) {
        # read mtx
        tryCatch(
            {
                unzip(biop_putative_data_table, exdir="Biop_putative_data_table")
                biop_gene_by_cell <- ReadMtx(mtx = "Biop_putative_data_table/matrix.mtx.gz",
                                             features = "Biop_putative_data_table/features.tsv.gz",
                                             cells = "Biop_putative_data_table/barcodes.tsv.gz")
                exists_biop_gene_by_cell <- TRUE
                log_info("... Done reading bioproduct putative data table")
            },
            error = function(e) {
                log_error(paste("... Failed reading bioproduct putative data table: ", e))
            }
        )
    }

    return(list(exists_biop_gene_by_cell=exists_biop_gene_by_cell,
                 biop_gene_by_cell=biop_gene_by_cell))
}



perform_dimReduction <- function(atac_seurat_object,
                                 cell_order_subsampled_file,
                                 exists_biop_gene_by_cell,
                                 biop_gene_by_cell,
                                 putative_cell_call,
                                 metrics_dir) {
    is_subsampled <- FALSE
    if (!is.na(cell_order_subsampled_file)) {
        is_subsampled <- TRUE
        tryCatch(
            {
                cell_order_subsampled <- jsonlite::fromJSON(cell_order_subsampled_file)
                subsampled_putative_cell_list <- as.character(cell_order_subsampled$Subsampled)
            }, error = function(e) {
                log_error(paste("   ... Failed reading subsampled cell order json file. Skipping dimensionliaty reduction: ", e))
                return(atac_seurat_object)
            }
        )
    }

    # when putative cell call is done either with mrna_and_atac or protein_and_atac,
    # joint dim reduction is performed.
    if (exists_biop_gene_by_cell) {
        if (putative_cell_call == 3 || putative_cell_call == 4) {
            is_joint_dim_reduction <- TRUE
            dim_method <- "Joint"
        } else {
            is_joint_dim_reduction <- FALSE
            dim_method <- "ATAC"
        }
    } else {
        is_joint_dim_reduction <- FALSE
        dim_method <- "ATAC"
    }

    if (is_subsampled) {
        log_info("   Subset ATAC seurat object with subsampled putative cell list")
        atac_seurat_object$subsampled <- (
            Cells(atac_seurat_object) %in% subsampled_putative_cell_list
        )

        dimReduct_seurat_object <- subset(
            x = atac_seurat_object,
            subset = (subsampled == T)
        )

        tSNE_coordinate_file <- paste0(dimReduct_seurat_object@project.name, "_", dim_method, "_tSNE_coordinates_subsampled.csv")
        UMAP_coordinate_file <- paste0(dimReduct_seurat_object@project.name, "_", dim_method, "_UMAP_coordinates_subsampled.csv")

    } else {
        dimReduct_seurat_object <- atac_seurat_object

        tSNE_coordinate_file <- paste0(dimReduct_seurat_object@project.name, "_", dim_method, "_tSNE_coordinates.csv")
        UMAP_coordinate_file <- paste0(dimReduct_seurat_object@project.name, "_", dim_method, "_UMAP_coordinates.csv")
    }

    tSNE_coordinate_fp <- file.path(metrics_dir, tSNE_coordinate_file)
    UMAP_coordinate_fp <- file.path(metrics_dir, UMAP_coordinate_file)

    dim_putative_cell_list <- Cells(dimReduct_seurat_object)

    log_info("   Prepare dimensionality reduction for ATAC")
    DefaultAssay(dimReduct_seurat_object) <- "peaks"
    dimReduct_seurat_object <- FindTopFeatures(dimReduct_seurat_object, min.cutoff = 10)
    dimReduct_seurat_object <- RunTFIDF(dimReduct_seurat_object)

    # override number of singular values to compute from the default of 50 for low number of features/cells
    # e.g. of PCA failing when features/cells < n_pcs:
    #   https://github.com/satijalab/seurat/issues/1914
    n_peak_features <- length(dimReduct_seurat_object@assays$peaks@var.features)
    n_cells <- ncol(dimReduct_seurat_object@assays$peaks@counts)
    n_singular_vals_to_compute <- min(
        50,
        max(n_cells - 3, 1), # max against 1 to avoid specifying <= 0 values if n_cells <= 3
        max(n_peak_features - 3, 1)
    )
    dimReduct_seurat_object <- RunSVD(dimReduct_seurat_object, n = n_singular_vals_to_compute)
    # Default value of perplexity in Rtsne is 30 - for low number of cells (< 300) override that value.
    perplexity <- min(30, max(n_cells %/% 10, 1))

    if (is_joint_dim_reduction) {
        log_info("   Prepare dimensionality reduction for bioproduct")
        # make the order the cell names the same
        biop_gene_by_cell <- biop_gene_by_cell[, dim_putative_cell_list]

        dimReduct_seurat_object[["RNA"]] <- CreateAssayObject(
                                            counts = biop_gene_by_cell,
                                            assay = "RNA"
                                        )

        # prepare dim reduction for bioproduct
        log_info("   Prepare the joint dimensionality reduction for bioproduct data..")
        DefaultAssay(dimReduct_seurat_object) <- "RNA"
        dimReduct_seurat_object <- SCTransform(dimReduct_seurat_object, verbose = F)

        # override default npcs - same reason as for RunSVD()
        n_gene_features <-length(dimReduct_seurat_object@assays$SCT@var.features)
        n_pcs_to_compute <- min(
            50,
            max(n_cells - 3, 1),
            max(n_gene_features - 3, 1)
        )
        dimReduct_seurat_object<- RunPCA(dimReduct_seurat_object, npcs = n_pcs_to_compute)

        # build a joint neighbor graph using both assays
        log_info("   Integrate bioproduct data with ATAC data")
        dimReduct_seurat_object <- FindMultiModalNeighbors(
            object = dimReduct_seurat_object,
            reduction.list = list("pca", "lsi"),
            # note: Still possible number of PCs/Singular Vals computed could be less than what was requested
            dims.list = list(
                1:min(50, ncol(dimReduct_seurat_object@reductions$pca)),
                2:min(40, ncol(dimReduct_seurat_object@reductions$lsi))
            ),
            modality.weight.name = list("RNA.weight", "ATAC.weight"),
            verbose = T,
            k.nn = min(
                20,
                max(n_cells - 3, 1)
            ),
            knn.range = min(
                120,
                max(n_cells - 3, 1)
            ),
        )

        log_info("   Perform Joint tSNE dimensionality reduction")
        reduction_tsne_name <- "tsne.joint"
        dimReduct_seurat_object <- RunTSNE(
            object = dimReduct_seurat_object,
            perplexity = perplexity,
            nn.name = "weighted.nn",
            assay = "RNA",
            verbose = F,
            reduction.name = reduction_tsne_name,
            reduction.key = "tSNE_"
        )

        log_info("   Perform Joint UMAP dimensionality reduction")
        reduction_umap_name <- "umap.joint"
        dimReduct_seurat_object <- RunUMAP(
            object = dimReduct_seurat_object,
            nn.name = "weighted.nn",
            assay = "RNA",
            verbose = F,
            reduction.name = reduction_umap_name,
            reduction.key = "UMAP_"
        )

    } else { # atac dim reduction
        log_info("   Perform ATAC tSNE dimensionality reduction")
        reduction_tsne_name <- "tsne.atac"
        dimReduct_seurat_object <- RunTSNE(dimReduct_seurat_object,
            reduction = "lsi",
            perplexity = perplexity,
            dims = 2:min(30, ncol(dimReduct_seurat_object@reductions$lsi)),
            reduction.name = reduction_tsne_name,
            reduction.key = "tSNE_",
            check_duplicates = FALSE
        )

        log_info("   Perform ATAC UMAP dimensionality reduction")
        reduction_umap_name <- "umap.atac"
        dimReduct_seurat_object <- RunUMAP(dimReduct_seurat_object,
            reduction = "lsi",
            dims = 2:min(30, ncol(dimReduct_seurat_object@reductions$lsi)),
            reduction.name = reduction_umap_name,
            reduction.key = "UMAP_",
            check_duplicates = FALSE
        )
    }

    log_info("   Save tSNE coordinates")
    tsne_df <-  data.frame(Embeddings(object = dimReduct_seurat_object[[reduction_tsne_name]]))
    tsne_df$Cell_Index <- rownames(tsne_df)
    column_order <- c("Cell_Index", "tSNE_1", "tSNE_2")
    tsne_df <- tsne_df[, column_order]

    write.table(tsne_df,
                file=tSNE_coordinate_fp,
                sep=",",
                quote=FALSE,
                row.names=FALSE,
                col.names=TRUE)

    log_info("   Save UMAP coordinates")
    umap_df <- data.frame(Embeddings(object = dimReduct_seurat_object[[reduction_umap_name]]))
    umap_df$Cell_Index <- rownames(umap_df)
    column_order <- c("Cell_Index", "UMAP_1", "UMAP_2")
    umap_df <- umap_df[, column_order]

    write.table(umap_df,
                file=UMAP_coordinate_fp,
                sep=",",
                quote=FALSE,
                row.names=FALSE,
                col.names=TRUE)

}

cell_annotation <- function(putative_seurat_object, exists_biop_gene_by_cell, biop_gene_by_cell, putative_cell_call, atac_metrics_dir) {
    # get the gene activity matrix
    atac_gene_by_cell <- GetAssayData(putative_seurat_object, assay = "gene_activity", slot = "counts")

    # convert atac gene by cell to atac cell by gene
    atac_cell_by_gene_filename <- paste0(putative_seurat_object@project.name, "_ATAC_Cell_by_Gene.csv")
    atac_cell_by_gene <- t(as.matrix(atac_gene_by_cell))
    mode(atac_cell_by_gene) <- "integer"

    atac_cell_by_gene_colnames <- colnames(atac_cell_by_gene)
    atac_cell_by_gene <- rbind(atac_cell_by_gene_colnames, atac_cell_by_gene)
    rownames(atac_cell_by_gene)[1] <- "Cell_Index"

    log_info("   Saving ATAC cell by gene matrix... : {atac_cell_by_gene_filename}")
    write.table(atac_cell_by_gene,
                file=atac_cell_by_gene_filename,
                sep=",",
                quote=FALSE,
                col.names=FALSE)

    # zipping atac cell by gene
    log_info("   Zipping ATAC cell by gene matrix to {atac_cell_by_gene_filename}.gz")
    zip(zipfile=file.path(atac_metrics_dir, paste0(atac_cell_by_gene_filename, ".gz")), file=atac_cell_by_gene_filename)

    if (putative_cell_call <= 2) {
        log_info("   Skipping cell annotation here, as it was previously done in GetDataTable node")
        return()
    }

    # when putative cell call is done either with mrna_and_atac or protein_and_atac,
    # joint annotion is performed.
    if (exists_biop_gene_by_cell) {
        if (putative_cell_call == 3 || putative_cell_call == 4) {
                is_joint_annotation <- TRUE
        } else {
            is_joint_annotation <- FALSE
        }
    } else {
        is_joint_annotation <- FALSE
    }

    if (is_joint_annotation) {
        log_info("   Intersect bioproduct and ATAC gene by cell matrix...")
        common_genes <- intersect(rownames(atac_gene_by_cell), rownames(biop_gene_by_cell))
        if (length(common_genes) == 0) {
            log_error("   Skipping joint cell annotation: No common gene is found between bioproduct and ATAC gene by cell matrix.. ")
            return()
        }

        atac_gene_by_cell <- atac_gene_by_cell[common_genes, ]
        biop_gene_by_cell <- biop_gene_by_cell[common_genes, ]

        biop_gene_by_cell <- biop_gene_by_cell[, colnames(atac_gene_by_cell)]

        log_info("   Normalize bioproduct and ATAC gene by cell matrix...")
        atac_total_counts <- sum(atac_gene_by_cell)
        biop_total_counts <- sum(biop_gene_by_cell)

        if (atac_total_counts > biop_total_counts) {
            scaler <- atac_total_counts / biop_total_counts
            biop_gene_by_cell <- biop_gene_by_cell * scaler
        } else {
            scaler <- biop_total_counts / atac_total_counts
            atac_gene_by_cell <- atac_gene_by_cell * scaler
        }

        log_info("   Make a joint gene by cell matrix...")
        joined_cell_by_gene <- t(as.matrix(atac_gene_by_cell + biop_gene_by_cell))
        mode(joined_cell_by_gene) <- "integer"

        joined_cell_by_gene_colnames <- colnames(joined_cell_by_gene)
        joined_cell_by_gene <- rbind(joined_cell_by_gene_colnames, joined_cell_by_gene)
        rownames(joined_cell_by_gene)[1] <- "Cell_Index"
        joined_cell_by_gene_filename <- paste0(putative_seurat_object@project.name, "_Joint_Cell_by_Gene.csv")

        write.table(joined_cell_by_gene,
                    file=joined_cell_by_gene_filename,
                    sep=",",
                    quote=FALSE,
                    col.names=FALSE)

        log_info("   Start cell classification...")
        log_info("   .... Use {dim(atac_gene_by_cell)[1]} genes and {dim(atac_gene_by_cell)[2]} cells for the joint cell type prediction...")

        system(paste("python", file.path(Sys.getenv(c('RHAPSODY_HOME')), "pythonRhapsody", "bin", "mist_cell_classifier.py"), joined_cell_by_gene_filename, sep=" "))

    } else {
        log_info("   Start cell classification using ATAC data only...")
        log_info("   .... Use {dim(atac_gene_by_cell)[1]} genes and {dim(atac_gene_by_cell)[2]} cells for the ATAC cell type prediction...")
        system(paste("python", file.path(Sys.getenv(c('RHAPSODY_HOME')), "pythonRhapsody", "bin", "mist_cell_classifier.py"), atac_cell_by_gene_filename, sep=" "))
    }

    cell_classifier_log_file <- "mist_cell_classifier.log"
    if (file.exists(cell_classifier_log_file)) {
        node_log_file <- "mist_atac_compile_results.log"
        cell_classifier_log_file <- file(cell_classifier_log_file, "r")
        while (TRUE) {
            this_log_line <- readLines(cell_classifier_log_file, n=1)
            if (length(this_log_line) == 0) {
                break
            }
            write(this_log_line, node_log_file, append=TRUE)
        }
    }
}

## Adapted from Signac function `UpdatePath`.
# https://rdrr.io/github/timoast/signac/src/R/fragments.R
# Commenting out the normalizePath function that converts a relative
# file path into a absolute file path

update_frag_path <- function(object, new.path, verbose = TRUE) {

    # new.path <- normalizePath(path = new.path, mustWork = TRUE)
    index.file <- paste0(new.path, ".tbi")
    if (!file.exists(new.path)) {
        stop("Fragment file not found")
    } else if (!file.exists(index.file)) {
        stop("Fragment file not indexed")
    }

    old.path <- GetFragmentData(object = object, slot = "path")
    if (identical(x = old.path, y = new.path)) {
        return(object)
    }

    slot(object = object, name = "path") <- new.path
    if (ValidateHash(object = object, verbose = verbose)) {
        return(object)
    } else {
        stop("MD5 sum does not match previously computed sum")
    }
}

suppressPackageStartupMessages(require(optparse))

option_list <- list(
    make_option(
        c("--initial-seurat"),
        action = "store",
        default = NA,
        type = "character",
        help = "Initial Seurat object"
    ),
    make_option(
        c("--cell-order"),
        action = "store",
        default = NA,
        type = "character",
        help = "Cell order json"
    ),
    make_option(
        c("--cell-order-subsampled"),
        action = "store",
        default = NA,
        type = "character",
        help = "subsampled Cell order json"
    ),
    make_option(
        c("--unified-metrics"),
        action = "store",
        default = NA,
        type = "character",
        help = "Unified read quality metrics json"
    ),
    make_option(
        c("--total-fragment-metrics"),
        action = "store",
        default = NA,
        type = "character",
        help = "Fragment metrics json"
    ),
    make_option(
        c("--fragments"),
        action = "store",
        default = NA,
        type = "character",
        help = "Fragments file"
    ),
    make_option(
        c("--input-metrics-tar"),
        action = "store",
        default = NA,
        type = "character",
        help = "Metric files (tar.gz) from GetDataTable node"
    ),
    make_option(
        c("--genome-size"),
        action = "store",
        default = NA,
        type = "character",
        help = "Total base pairs in the reference genome"
    ),
    make_option(
        c("--genome-contig-lengths"),
        action = "store",
        default = NA,
        type = "character",
        help = "File containing chromosome/contig lengths"
    ),
    make_option(
        c("--run-metadata"),
        action = "store",
        default = NA,
        type = "character",
        help = "Metadata json file"
    ),
    make_option(
        c("--feature-name-json"),
        action = "store",
        default = NA,
        type = "character",
        help = "JSON file mapping relationship of gene_id and gene_name feature attributes."
    ),
    make_option(
        c("--biop-putative-data-table"),
        action = "store",
        type = "character",
        default = NA,
        help = "Bioproduct putative data table"
    )
)


parser <- OptionParser(option_list = option_list)
opt <- parse_args(parser, convert_hyphens_to_underscores = TRUE)
#log_debug("Running with options: {jsonlite::toJSON(opt, auto_unbox = TRUE, pretty = TRUE)}")

atac_compile_results(opt)