#!/usr/bin/Rscript

# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany, 
# All rights reserved. For details, please note the license.txt.

# surankco-score: alignment of contigs (FASTA format) and reference genomes 
#                 (FASTA format) and score calculation


# get script path
args <- commandArgs(trailingOnly = FALSE)
script.arg <- "--file="
script.name <- sub(script.arg, "", args[grep(script.arg, args)])
script.path <- dirname(script.name)


# testing/debugging
# args <- c("--directory=data")
# script.path <- getwd()
DEBUGGING <- FALSE


# sources and libraries
source(paste(script.path, '/r/parameter.R', sep=""))
source(paste(script.path, '/r/scores.R', sep=""))
source(paste(script.path, '/r/expofits.R', sep=""))
source(paste(script.path, '/r/import.R', sep=""))
loadPackages(c("optparse","MASS"), quietly=TRUE)


# parsing parameter
cat("prepare files\n")
parameters <- parseSurankcoScore()
files <- parameters$files

if (DEBUGGING){
  print(args)
  print(parameters)
  print(files)
}

files$scores <- vector(mode="character", length=nrow(files))

# external processes
cat("align and score contigs: ")
for (i in 1:nrow(files)){
  cat(paste0(i, " "))
  input.file.assembly <- files[i, parameters$assembly.suffix]
  input.file.reference <- files[i, parameters$reference.suffix]
  output.file.psl <- paste(rownames(files)[i], ".psl.tmp", sep="")
  output.file.filter <- paste(rownames(files)[i], ".filter.tmp", sep="")
  output.file.pretty <- paste(rownames(files)[i], ".pretty.tmp", sep="")
  output.file.scores <- paste(rownames(files)[i], ".scores.tmp", sep="")
  files$scores[i] <- output.file.scores
  
  # build alignments
  blat.call <- paste(paste("blat", sep=""),
                     input.file.reference, input.file.assembly,
                     output.file.psl, "-minIdentity=0")
  
  if (DEBUGGING){
    print(blat.call)
  }
  
  if(code <- system(blat.call, ignore.stdout=TRUE)){
    complainAndStop("blat not successfully executed", code)
  }
  
  # filter alignments
  filter.call <- paste(paste(script.path, "/r/pslMatchFilter", sep=""), 
                       output.file.psl, output.file.filter)
  
  if (DEBUGGING){
    print(filter.call)
  }
  
  if(code <- system(filter.call, ignore.stdout=FALSE)){
    complainAndStop("pslMatchFilter not successfully executed", code)
  }
  
  # extract alignments from psl
  pretty.call <- paste(paste("pslPretty", sep=""), 
                       output.file.filter, input.file.reference,
                       input.file.assembly, output.file.pretty, "-long")
  
  if (DEBUGGING){
    print(pretty.call)
  }
  
  if(code <- system(pretty.call, ignore.stdout=FALSE)){
    complainAndStop("pslPretty not successfully executed", code)
  }
  
  # calculate scores in Java and export data for R
  java.call <- paste("java", 
                     paste("-Xms", parameters$memory, "G", sep=""), 
                     paste("-Xmx", parameters$memory, "G", sep=""), 
                     "-cp",
                     paste(script.path, "surankco.jar", sep="/"),
                     "de.rki.ng4.surankco.scoring.Main",
                     output.file.pretty,
                     output.file.scores)
  
  if (DEBUGGING){
    print(java.call)
  }
  
  if(code <- system(java.call, ignore.stdout=FALSE)){
    complainAndStop("java not successfully executed", code)
  }
  
  if (!DEBUGGING){
    unlink(c(output.file.psl, output.file.filter, output.file.pretty))
  }
}
cat("\n")

# import Java score
cat("finalize scores\n")
scores.raw <- importScores(files$scores, 
                           files[ ,parameters$assembly.suffix], 
                           files[ ,parameters$reference.suffix])

# post process scores
# remove redundant and highly correlating scores
# (???could be moved to training/prediction to export a full set of score???)
scores.final <- correctScores(scores.raw)

# calculate fittings
# print score distributions and threshold suggestions
cat("export histograms\n")
expoFit(do.call(rbind, scores.final), TRUE, parameters$pdf.histograms)

# export score as csv
cat("export scores\n")
for (i in 1:nrow(files)){
  output.file.scores <- paste(rownames(files)[i], ".scores.txt", sep="")
  
  write.table(scores.final[[i]], file=output.file.scores, 
              sep="\t", dec = ".", col.names=TRUE, row.names=FALSE)
}

if (!DEBUGGING){
  # unlink tmp filescat
  cat("remove temporary files\n")
  unlink(files$scores)
}

# done
cat("surankco-score calculations done\n")
