library(data.table) library(RMariaDB); library(parallel); setwd("/local/ftp/pub/hub/dTOX/TFBS"); ## terminal: ## mysql -u root ## show databases; ## use TFBS; ## show tables; ## show create table TFBS; ## select * from TFBS limit 5; ## select count(*) from TFBS; ## show table status from TFBS # Create table TFBS_occupy # dbRemoveTable(con, "TF", temporary = FALSE); #TF <- read.table("./motif_rtfbsdb_hg19_ext/motif.list.tab"); #colnames(TF) <- c("TF", "motif"); #dbWriteTable(con, "TF", TF, temporary = FALSE); # Create table TFBS_occupy # dbRemoveTable(con, "TFBS_occupy", temporary = FALSE); # # > CREATE TABLE `TFBS_occupy` ( # -> `chr` varchar(8), # -> `start` int(11) DEFAULT NULL, # -> `stop` int(11) DEFAULT NULL, # -> `max` double DEFAULT NULL, # -> `max25` double DEFAULT NULL, # -> `min25` double DEFAULT NULL, # -> `avg25` double DEFAULT NULL, # -> `motifs` varchar(255) # -> ) ; # # We dont keep TFBS_occupy table! if(0) { files.bed <- list.files("./motif_rtfbsdb_hg19_ext2", pattern = "\\.bed.gz$") tb <- read.table(paste("./motif_rtfbsdb_hg19_ext2",files.bed[1], sep="/")); colnames(tb) <- c("chr", "start", "stop", "max", "max25", "min25", "avg25", "motifs"); dbWriteTable(con, "TFBS_occupy", tb, temporary = FALSE, append=TRUE); gc(reset=TRUE); mclapply(files.bed[-1], function(f.bed) { con <- dbConnect(RMariaDB::MariaDB(), dbname = "TFBS", username="root") cat(f.bed, "\n"); tb <- read.table(paste("./motif_rtfbsdb_hg19_ext2",f.bed, sep="/")); colnames(tb) <- c("chr", "start", "stop", "max", "max25", "min25", "avg25", "motifs"); dbWriteTable(con, "TFBS_occupy", tb, temporary = FALSE, append=TRUE); rm(tb); gc(reset=TRUE); }, mc.cores=32); } # CREATE TABLE `TFBS` ( # `chr` varchar(2), # `start` int(9), # `stop` int(9) , # `center` int(9) , # `score` float, # `strand` nchar(1), # `motif` varchar(10) ) ; # CREATE UNIQUE INDEX tfbsid ON TFBS(chr,start,stop,motif); # Connect database files.bed <- list.files("./motif_rtfbsdb_hg19_ext", pattern = "\\.bed.gz$") ##For test in mclapply if(0) { tb <- read.table(paste("./motif_rtfbsdb_hg19_ext",files.bed[1], sep="/")); colnames(tb) <- c("chr", "start", "stop", "motif", "score", "strand", "min25", "max25", "avg25"); tb$chr <- substring(as.character(tb$chr), 4); tb$center<-round((tb$start+tb$stop)/2) con <- dbConnect(RMariaDB::MariaDB(), dbname = "TFBS", username="root") dbWriteTable(con, "TFBS", tb[,c("chr", "start", "stop", "center", "score", "strand", "motif")], temporary = FALSE, append=TRUE); dbDisconnect(con); rm(tb); gc(reset=TRUE); } files.bed <- list.files("./motif_rtfbsdb_hg19_ext", pattern = "\\.bed.gz$") L <- mclapply(files.bed, function(f.bed) { file.cat <- paste0("./motif_rtfbsdb_hg19_ext/", f.bed, ".txt"); if(!file.exists(file.cat)) { con <- dbConnect(RMariaDB::MariaDB(), dbname = "TFBS", username="root") cat(f.bed, "\n"); tb <- read.table(paste("./motif_rtfbsdb_hg19_ext",f.bed, sep="/")); colnames(tb) <- c("chr", "start", "stop", "motif", "score", "strand", "min25", "max25", "avg25"); tb$chr <- substring(as.character(tb$chr), 4); tb$center<-round((tb$start+tb$stop)/2) dbWriteTable(con, "TFBS", tb[,c("chr", "start", "stop", "center", "score", "strand", "motif")], temporary = FALSE, append=TRUE); dbDisconnect(con); cat("DONE!", file=file.cat); rm(tb); gc(reset=TRUE); } }, mc.cores=3, mc.preschedule=FALSE); gc(reset=TRUE); # CREATE TABLE `BLOOD1` ( # `chr` varchar(2), # `center` int(9) , # `rfscore` float, # `svmscore` float ) ; # CREATE UNIQUE INDEX BLOOD1_idx ON BLOOD1(chr,center); #fast method to import data # SHOW VARIABLES LIKE "secure_file_priv"; # LOAD data LOCAL INFILE '/local/ftp/pub/hub/dTOX/TFBS/blood2.out.txt' INTO table BLOOD2; # disable INDEX # alter table TFBS disable KEYS; if(0) { # load blood1 tb <- read.table("/home/danko_0001/projects/ayh8/dtox_encode_dnase/blood1/blood1.out.dTOX.bound.bed.gz") colnames(tb) <- c("chr", "center", "stop", "motifs", "max", "strand", "rfscore", "svmscore"); tb$chr <- substring(as.character(tb$chr), 4); con <- dbConnect(RMariaDB::MariaDB(), dbname = "TFBS", username="root") dbWriteTable(con, "BLOOD1", tb[,c(1,2,7,8)], temporary = FALSE, append=TRUE); dbDisconnect(con); }