#!/usr/bin/env Rscript

# Reading in the input parameters.
args = commandArgs(trailingOnly=TRUE)
expFile = args[[1]]
controlCols = args[[2]]
experimentCols = args[[3]]
outPrefix = args[[4]]
expCutOff = 1

# Loading required libraries
library(edgeR)
library(pheatmap)

# Read the input file
cnt=read.table(expFile, header=T, row.names = 1)[,c(controlCols, experimentCols)]
group=c(rep('Control',length(controlCols)),rep('Exp',length(experimentCols)))
d = DGEList(counts = cnt, group = group)

# Filter data
d.norm <- cpm(d)
keep <- rowSums(d.norm > expCutOff) == dim(d.norm)[2] #rowMeans(log2(d.norm+8))>=5
d <- d[keep,]
d$samples$lib.size <- colSums(d$counts)
d <- calcNormFactors(d)
d.norm = cpm(d)

# A method that uses GLM to calculate dispersion.
design.mat <- model.matrix(~ 0 + d$samples$group)
colnames(design.mat) <- levels(d$samples$group)
d2 <- estimateGLMCommonDisp(d,design.mat)
d2 <- estimateGLMTrendedDisp(d2,design.mat, method="bin.spline")
d2 <- estimateGLMTagwiseDisp(d2,design.mat)

fit <- glmFit(d2, design.mat)
# compare (group 2 - group 1) to 0:
# this is equivalent to comparing group 2 to group 1
lrt <- glmLRT(fit, contrast=c(-1,1))  # Exp/Control
de2 <- decideTestsDGE(lrt, adjust.method="BH", p.value = 0.05)

# Differentially expressed genes
de2tags12 <- rownames(d2)[as.logical(de2)]

# Plotting the DE genes

png( paste0(outPrefix, ".ma.png"), width=500, height=500)
plotSmear(lrt, de.tags=de2tags12, ylab = "log2FC (sample2/sample1)")
abline(h = c(-2, 2), col = "blue")
dev.off()

lrt.tbl = topTags(lrt, n=nrow(lrt))$table
lrt.tbl$Row.names = rownames(lrt.tbl)
write.table(lrt.tbl, paste0(outPrefix, ".deg.edgeR.out"), sep="\t", quote = F)

d.norm.zscore = t(apply(d.norm, 1, scale))
deGene.data = d.norm.zscore[ row.names(lrt.tbl[lrt.tbl$FDR < 0.05 & abs(lrt.tbl$logFC) > 1 ,]) , ]
png( paste0(outPrefix, ".heatmap.png"), width=200, height=500)
pheatmap(deGene.data)
dev.off()