#!/usr/bin/env Rscript # Reading in the input parameters. args = commandArgs(trailingOnly=TRUE) file = args[[1]] out = args[[2]] # Loading required libraries library(edgeR) library(ggplot2) # Reading annotation detail mentioned in the first row annotations = read.table(file,row.names = 1)[1,] # Reading in the expression data data = read.table(file,row.names = 1, skip=1) # Filter genes that are not expressed significantly keep <- rowSums(cpm(data)>1) >= 2 data <- data[keep,] # Normalize the gene expression data for PCA analysis normedCounts=t(as.data.frame(log(cpm(data)+0.1, 2))) # Perform PCA analysis using an R package "prcomp" pca=prcomp(normedCounts) # Calculate variance for each principle component. percentVar = round((pca$sdev)^2 / sum(pca$sdev^2)*100) # Prepare a data frame to use for plotting purpose pca_mod = as.data.frame(pca$x) pca_mod$class = as.matrix(annotations)[1,] # Plotting the PCA analysis data png(out, width=500, height=500) ggplot(pca_mod, aes(PC1, PC2, color=class, shape=class)) + geom_point(size=5) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed(ratio = 1) + guides(colour = guide_legend(override.aes = list(shape = 15))) dev.off()