#!/usr/bin/env Rscript # Reading in the input parameters. args = commandArgs(trailingOnly=TRUE) expFile = args[[1]] controlCols = args[[2]] experimentCols = args[[3]] outPrefix = args[[4]] expCutOff = 1 # Loading required libraries library(edgeR) library(pheatmap) # Read the input file cnt=read.table(expFile, header=T, row.names = 1)[,c(controlCols, experimentCols)] group=c(rep('Control',length(controlCols)),rep('Exp',length(experimentCols))) d = DGEList(counts = cnt, group = group) # Filter data d.norm <- cpm(d) keep <- rowSums(d.norm > expCutOff) == dim(d.norm)[2] #rowMeans(log2(d.norm+8))>=5 d <- d[keep,] d$samples$lib.size <- colSums(d$counts) d <- calcNormFactors(d) d.norm = cpm(d) # A method that uses GLM to calculate dispersion. design.mat <- model.matrix(~ 0 + d$samples$group) colnames(design.mat) <- levels(d$samples$group) d2 <- estimateGLMCommonDisp(d,design.mat) d2 <- estimateGLMTrendedDisp(d2,design.mat, method="bin.spline") d2 <- estimateGLMTagwiseDisp(d2,design.mat) fit <- glmFit(d2, design.mat) # compare (group 2 - group 1) to 0: # this is equivalent to comparing group 2 to group 1 lrt <- glmLRT(fit, contrast=c(-1,1)) # Exp/Control de2 <- decideTestsDGE(lrt, adjust.method="BH", p.value = 0.05) # Differentially expressed genes de2tags12 <- rownames(d2)[as.logical(de2)] # Plotting the DE genes png( paste0(outPrefix, ".ma.png"), width=500, height=500) plotSmear(lrt, de.tags=de2tags12, ylab = "log2FC (sample2/sample1)") abline(h = c(-2, 2), col = "blue") dev.off() lrt.tbl = topTags(lrt, n=nrow(lrt))$table lrt.tbl$Row.names = rownames(lrt.tbl) write.table(lrt.tbl, paste0(outPrefix, ".deg.edgeR.out"), sep="\t", quote = F) d.norm.zscore = t(apply(d.norm, 1, scale)) deGene.data = d.norm.zscore[ row.names(lrt.tbl[lrt.tbl$FDR < 0.05 & abs(lrt.tbl$logFC) > 1 ,]) , ] png( paste0(outPrefix, ".heatmap.png"), width=200, height=500) pheatmap(deGene.data) dev.off()