/*
 * Copyright 2012 K.K.DNAFORM
 * This file is part of idr_paraclu program.
 * Idr_paraclu is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, any later version.
 *
 * Idr_paraclu is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Foobar. If not, see <http://www.gnu.org/licenses/>.
 *
 * FILE: 
 * 		automatedClustering.h
 * USAGE: 
 * 		automatedClustering inputdir outputdir tpm idr outputdir2 project
 * 			inputdir	directory containing input files (You need at least 2 files)
 * 			outputdir	directory containing output files
 * 			tpm			TPM used for a threshold in clustering
 * 			idr		 	IDR used for a threshold
 *			outputdir2	directory containing files for scatter plot of hierarchical stability
 *			project		project name (you like)
 * DESCRIPTION: 
 * 		Executing clustering for all pairs of CAGE data replicates.
 * CREATED:	
 *		2012.04.17
 * REVISION:
 * 		2012.04.23	Adding counting the number of reproducible/irreproducible clusters.
 * 		2012.04.23	Adding creating files for scatter plot of hierarchical stability.
 * 		2012.04.25	Adding creating directory for output files.
 * 		2012.06.25	Adding project name as a prefix of output files.
 */
const char* const	IDR_SCRIPT1[] = {
	"args <- commandArgs(trailingOnly=T)\n",
	"matched.file <- args[1]\n",
	"output.prefix <- args[2]\n",
	"sink(paste(output.prefix, \"-Rout.txt\", sep=\"\"))\n",
	"data.file <- read.table(paste(matched.file, sep=\"\"))\n",
	"data.matched <- list()\n",
	"data.matched$merge1 <- data.frame(id=data.file[,1], sig.value=data.file[,2])\n",
	"data.matched$merge2 <- data.frame(id=data.file[,3], sig.value=data.file[,4])\n",
	"uri.output <- get.uri.matched.RNA(data.matched)\n",
	"cat(paste(\"URI is done\\n\"))\n",
	"save(uri.output, file=paste(output.prefix, \"-uri.sav\", sep=\"\"))\n",
	"cat(paste(\"URI is saved at: \", output.prefix, \"-uri.sav \\n\", sep=\"\"))\n",
	"em.output <- fit.em(data.matched, fix.rho2=T)\n",
	"cat(paste(\"EM is done\\n\\n\"))\n",
	"save(em.output, file=paste(output.prefix, \"-em.sav\", sep=\"\"))\n",
	"cat(paste(\"EM is saved at: \", output.prefix, \"-em.sav \\n\", sep=\"\"))\n",
	"cat(paste(\"EM estimation for the following files\\n\", matched.file))\n",
	"print(em.output$em.fit$para)\n",
	"write.out.data <- data.frame(ID1=em.output$data.pruned$sample1$id,\n",
	"score1=em.output$data.pruned$sample1$sig.value,\n",
	"ID2=em.output$data.pruned$sample2$id,\n",
	"score2=em.output$data.pruned$sample2$sig.value,\n",
	"idr=1-em.output$em.fit$e.z)\n",
	"write.table(write.out.data, file=paste(output.prefix, \"-overlapped-peaks.txt\", sep=\"\"))\n",
	"cat(paste(\"Write overlapped peaks and local idr to: \", output.prefix, \"-overlapped-peaks.txt\\n\", sep=\"\"))\n",
	"IDR.cutoff <- seq(0.01, 0.25, by=0.01)\n",
	"idr.o <- order(write.out.data$idr)\n",
	"idr.ordered <- write.out.data$idr[idr.o]\n",
	"IDR.sum <- cumsum(idr.ordered)/c(1:length(idr.ordered))\n",
	"IDR.count <- c()\n",
	"n.cutoff <- length(IDR.cutoff)\n",
	"idr.thr <- c()\n",
	"for(i in 1:n.cutoff){\n",
	"IDR.count[i] <- sum(IDR.sum <= IDR.cutoff[i])\n",
	"temp <- which(IDR.sum <= IDR.cutoff[i])\n",
	"idr.thr[i] <- max(idr.ordered[temp])\n",
	"}\n",
	"sample1 <- em.output$data.pruned$sample1[idr.o,]\n",
	"sample2 <- em.output$data.pruned$sample2[idr.o,]\n",
	"sig.thr1 <- c()\n",
	"sig.thr2 <- c()\n",
	"for(j in 1:n.cutoff){\n",
	"temp <- which(IDR.sum <= IDR.cutoff[j])\n",
	"sig.thr1[j] <- quantile(sample1$sig.value[temp], prob=0.05)\n",
	"sig.thr2[j] <- quantile(sample2$sig.value[temp], prob=0.05)\n",
	"}\n",
	"idr.cut <- data.frame(matched.file, IDR.cutoff=IDR.cutoff, local.idr.cutoff=idr.thr, IDR.count=IDR.count, sig.thr1=sig.thr1, sig.thr2=sig.thr2)\n",
	"colnames(idr.cut) <- c(\"file\", \"IDR.cutoff\", \"local.idr.cutoff\", \"Number.common.peak.above.IDR\", \"sig.thr1\", \"sig.thr2\")\n",
	"write.table(idr.cut, file=paste(output.prefix, \"-npeaks-aboveIDR.txt\", sep=\"\"), append=T, quote=F, row.names=F, col.names=F)\n",
	"cat(paste(\"Write number of peaks above IDR cutoff [0.01, 0.25]: \",\"npeaks-aboveIDR.txt\\n\", sep=\"\"))\n",
	"mar.mean <- get.mar.mean(em.output$em.fit)\n",
	"cat(paste(\"Marginal mean of two components:\\n\"))\n",
	"print(mar.mean)\n",
	"sink()\n"
};

const char* const	IDR_SCRIPT2[] = {
	"process.narrowpeak <- function(narrow.file, chr.size, half.width=NULL, summit=\"offset\", stop.exclusive=T, broadpeak=F, is.RNAseq=F){\n",
	"aa <- read.table(narrow.file)\n",
	"if(broadpeak){\n",
	"bb.ori <- data.frame(chr=aa$V1, start=aa$V2, stop=aa$V3, signal.value=aa$V7, p.value=aa$V8, q.value=aa$V9)\n",
	"}else{\n",
	"bb.ori <- data.frame(chr=aa$V1, start=aa$V2, stop=aa$V3, signal.value=aa$V7, p.value=aa$V8, q.value=aa$V9, summit=aa$V10)\n",
	"}\n",
	"if(summit==\"summit\"){\n",
	"bb.ori$summit <- bb.ori$summit-bb.ori$start # change summit to offset to avoid error when concatenating chromosomes\n",
	"}\n",
	"bb <- concatenate.chr(bb.ori, chr.size)\n",
	"bb <- bb[bb$start != bb$stop,]\n",
	"if(stop.exclusive==T){\n",
	"bb$stop <- bb$stop-1\n",
	"}\n",
	"if(!is.null(half.width)){\n",
	"bb$start.ori <- bb$start\n",
	"bb$stop.ori <- bb$stop\n",
	"width <- bb$stop-bb$start +1\n",
	"is.wider <- width > 2*half.width\n",
	"if(summit==\"offset\" | summit==\"summit\"){ # if summit is offset from start\n",
	"bb$start[is.wider] <- bb$start.ori[is.wider] + bb$summit[is.wider]-half.width\n",
	"bb$stop[is.wider] <- bb$start.ori[is.wider] + bb$summit[is.wider]+half.width\n",
	"} else {\n",
	"if(summit==\"unknown\"){\n",
	"bb$start[is.wider] <- bb$start.ori[is.wider]+round(width[is.wider]/2) - half.width\n",
	"bb$stop[is.wider] <- bb$start.ori[is.wider]+round(width[is.wider]/2) + half.width\n",
	"}\n",
	"}\n",
	"}\n",
	"if(is.RNAseq)\n",
	"bb <- clean.data(bb)\n",
	"invisible(list(data.ori=bb.ori, data.cleaned=bb))\n",
	"}\n",
	"clean.data <- function(adata){\n",
	"adata <- adata[adata$start != adata$stop,]\n",
	"stop.in.start <- is.element(adata$stop, adata$start)\n",
	"n.fix <- sum(stop.in.start)\n",
	"if(n.fix >0){\n",
	"print(paste(\"Fix\", n.fix, \"stops\\n\"))\n",
	"adata$stop[stop.in.start] <- adata$stop[stop.in.start]-1\n",
	"}\n",
	"return(adata)\n",
	"}\n",
	"concatenate.chr <- function(peaks, chr.size){\n",
	"chr.o <- order(chr.size[,1])\n",
	"chr.size <- chr.size[chr.o,]\n",
	"chr.shift <- cumsum(c(0, chr.size[-nrow(chr.size),2]))\n",
	"chr.size.cum <- data.frame(chr=chr.size[,1], shift=chr.shift)\n",
	"peaks$start.ori <- peaks$start\n",
	"peaks$stop.ori <- peaks$stop\n",
	"for(i in 1:nrow(chr.size)){\n",
	"is.in <- as.character(peaks$chr) == as.character(chr.size.cum$chr[i])\n",
	"if(sum(is.in)>0){\n",
	"peaks[is.in,]$start <- peaks[is.in,]$start + chr.size.cum$shift[i]\n",
	"peaks[is.in,]$stop <- peaks[is.in,]$stop + chr.size.cum$shift[i]\n",
	"}\n",
	"}\n",
	"invisible(peaks)\n",
	"}\n",
	"deconcatenate.chr <- function(peaks, chr.size){\n",
	"chr.o <- order(chr.size[,1])\n",
	"chr.size <- chr.size[chr.o,]\n",
	"chr.shift <- cumsum(c(0, chr.size[-nrow(chr.size),2]))\n",
	"chr.size.cum <- data.frame(chr=chr.size[,1], shift=chr.shift)\n",
	"peaks$chr <- rep(NA, nrow(peaks))\n",
	"for(i in 1:(nrow(chr.size.cum)-1)){\n",
	"is.in <- peaks$start > chr.size.cum[i,2] & peaks$start <= chr.size.cum[i+1, 2]\n",
	"if(sum(is.in)>0){\n",
	"peaks[is.in,]$start <- peaks[is.in,]$start - chr.size.cum[i,2]\n",
	"peaks[is.in,]$stop <- peaks[is.in,]$stop - chr.size.cum[i,2]+1\n",
	"peaks[is.in,]$chr <- chr.size[i,1]\n",
	"}\n",
	"}\n",
	"if(i == nrow(chr.size.cum)){\n",
	"is.in <- peaks$start > chr.size.cum[i, 2]\n",
	"if(sum(is.in)>0){\n",
	"peaks[is.in,]$start <- peaks[is.in,]$start - chr.size.cum[i,2]\n",
	"peaks[is.in,]$stop <- peaks[is.in,]$stop - chr.size.cum[i,2]+1\n",
	"peaks[is.in,]$chr <- chr.size[i,1]\n",
	"}\n",
	"}\n",
	"invisible(peaks)\n",
	"}\n",
	"find.overlap <- function(rep1, rep2){\n",
	"o1 <- order(rep1$start)\n",
	"rep1 <- rep1[o1,]\n",
	"o2 <- order(rep2$start)\n",
	"rep2 <- rep2[o2,]\n",
	"n1 <- length(o1)\n",
	"n2 <- length(o2)\n",
	"id1 <- rep(0, n1) # ID assigned on rep1\n",
	"id2 <- rep(0, n2) # ID assigned on rep2\n",
	"id <- 1 # keep track common id's\n",
	"i <- 1\n",
	"j <- 1\n",
	"while(i <= n1|| j <= n2){\n",
	"if(i > n1 && j < n2){\n",
	"j <- j+1\n",
	"id2[j] <- id\n",
	"id <- id +1\n",
	"next\n",
	"} else{\n",
	"if(j > n2 && i < n1){\n",
	"i <- i+1\n",
	"id1[i] <- id\n",
	"id <- id +1\n",
	"next\n",
	"} else {\n",
	"if(i >= n1 && j >=n2)\n",
	"break\n",
	"}\n",
	"}\n",
	"if(!(rep1$start[i] <= rep2$stop[j] && rep2$start[j] <= rep1$stop[i])){\n",
	"if(id1[i] ==0 && id2[j]==0){\n",
	"if(rep1$stop[i] < rep2$stop[j]){\n",
	"id1[i] <- id\n",
	"} else {\n",
	"id2[j] <- id\n",
	"}\n",
	"} else { # in the middle of the loop, when one is already assigned\n",
	"if(rep1$stop[i] <= rep2$stop[j]){\n",
	"id1[i] <- max(id2[j], id1[i])\n",
	"id2[j] <- id\n",
	"} else {\n",
	"if(rep1$stop[i] > rep2$stop[j]){\n",
	"id2[j] <- max(id1[i], id2[j])\n",
	"id1[i] <- id\n",
	"}\n",
	"}\n",
	"}\n",
	"id <- id +1\n",
	"} else { # if overlap\n",
	"if(id1[i] == 0 && id2[j] == 0){ # not assign label yet\n",
	"id1[i] <- id\n",
	"id2[j] <- id\n",
	"id <- id +1\n",
	"} else { # one peak is already assigned label, the other is 0\n",
	"id1[i] <- max(id1[i], id2[j]) # this is a way to copy the label of the assigned peak without knowing which one is already assigned\n",
	"id2[j] <- id1[i] # syncronize the labels\n",
	"}\n",
	"}\n",
	"if(rep1$stop[i] < rep2$stop[j]){\n",
	"i <- i+1\n",
	"} else {\n",
	"j <- j+1\n",
	"}\n",
	"}\n",
	"invisible(list(id1=id1, id2=id2))\n",
	"}\n",
	"fill.missing.peaks <- function(rep1, rep2, id1, id2, p.value.impute){\n",
	"o1 <- order(rep1$start)\n",
	"rep1 <- rep1[o1,]\n",
	"o2 <- order(rep2$start)\n",
	"rep2 <- rep2[o2,]\n",
	"entry.in1.not2 <- !is.element(id1, id2)\n",
	"entry.in2.not1 <- !is.element(id2, id1)\n",
	"if(sum(entry.in1.not2) > 0){\n",
	"temp1 <- rep1[entry.in1.not2, ]\n",
	"temp1$sig.value <- p.value.impute\n",
	"temp1$signal.value <- p.value.impute\n",
	"temp1$p.value <- p.value.impute\n",
	"temp1$q.value <- p.value.impute\n",
	"rep2.filled <- rbind(rep2, temp1)\n",
	"id2.filled <- c(id2, id1[entry.in1.not2])\n",
	"} else {\n",
	"id2.filled <- id2\n",
	"rep2.filled <- rep2\n",
	"}\n",
	"if(sum(entry.in2.not1) > 0){\n",
	"temp2 <- rep2[entry.in2.not1, ]\n",
	"temp2$sig.value <- p.value.impute\n",
	"temp2$signal.value <- p.value.impute\n",
	"temp2$p.value <- p.value.impute\n",
	"temp2$q.value <- p.value.impute\n",
	"rep1.filled <- rbind(rep1, temp2)\n",
	"id1.filled <- c(id1, id2[entry.in2.not1])\n",
	"} else {\n",
	"id1.filled <- id1\n",
	"rep1.filled <- rep1\n",
	"}\n",
	"o1 <- order(id1.filled)\n",
	"rep1.ordered <- rep1.filled[o1, ]\n",
	"o2 <- order(id2.filled)\n",
	"rep2.ordered <- rep2.filled[o2, ]\n",
	"invisible(list(rep1=rep1.ordered, rep2=rep2.ordered,\n",
	"id1=id1.filled[o1], id2=id2.filled[o2]))\n",
	"}\n",
	"merge.peaks <- function(peak.list, id){\n",
	"i <- 1\n",
	"j <- 1\n",
	"dup.index <- c()\n",
	"sig.value <- c()\n",
	"start.new <- c()\n",
	"stop.new <- c()\n",
	"id.new <- c()\n",
	"chr <- c()\n",
	"start.ori <- c()\n",
	"stop.ori <- c()\n",
	"signal.value <- c()\n",
	"p.value <- c()\n",
	"q.value <- c()\n",
	"while(i < length(id)){\n",
	"if(id[i] == id[i+1]){\n",
	"dup.index <- c(dup.index, i, i+1) # push on dup.index\n",
	"} else {\n",
	"if(length(dup.index)>0){ # pop from dup.index\n",
	"sig.value[j] <- mean(peak.list$sig.value[unique(dup.index)]) # mean of -log(pvalue)\n",
	"start.new[j] <- peak.list$start[min(dup.index)]\n",
	"stop.new[j] <- peak.list$stop[max(dup.index)]\n",
	"id.new[j] <- id[max(dup.index)]\n",
	"signal.value[j] <- mean(peak.list$signal.value[unique(dup.index)]) # mean of -log(pvalue)\n",
	"p.value[j] <- mean(peak.list$p.value[unique(dup.index)]) # mean of -log(pvalue)\n",
	"q.value[j] <- mean(peak.list$q.value[unique(dup.index)]) # mean of -log(pvalue)\n",
	"chr[j] <- as.character(peak.list$chr[min(dup.index)])\n",
	"start.ori[j] <- peak.list$start.ori[min(dup.index)]\n",
	"stop.ori[j] <- peak.list$stop.ori[max(dup.index)]\n",
	"dup.index <- c()\n",
	"} else { # nothing to pop\n",
	"sig.value[j] <- peak.list$sig.value[i]\n",
	"start.new[j] <- peak.list$start[i]\n",
	"stop.new[j] <- peak.list$stop[i]\n",
	"id.new[j] <- id[i]\n",
	"signal.value[j] <- peak.list$signal.value[i]\n",
	"p.value[j] <- peak.list$p.value[i]\n",
	"q.value[j] <- peak.list$q.value[i]\n",
	"chr[j] <- as.character(peak.list$chr[i])\n",
	"start.ori[j] <- peak.list$start.ori[i]\n",
	"stop.ori[j] <- peak.list$stop.ori[i]\n",
	"}\n",
	"j <- j+1\n",
	"}\n",
	"i <- i+1\n",
	"}\n",
	"data.new <- data.frame(id=id.new, sig.value=sig.value, start=start.new, stop=stop.new, signal.value=signal.value, p.value=p.value, q.value=q.value, chr=chr, start.ori=start.ori, stop.ori=stop.ori)\n",
	"invisible(data.new)\n",
	"}\n",
	"pair.peaks <- function(out1, out2, p.value.impute=0){\n",
	"aa <- find.overlap(out1, out2)\n",
	"bb <- fill.missing.peaks(out1, out2, aa$id1, aa$id2, p.value.impute=0)\n",
	"cc1 <- merge.peaks(bb$rep1, bb$id1)\n",
	"cc2 <- merge.peaks(bb$rep2, bb$id2)\n",
	"invisible(list(merge1=cc1, merge2=cc2))\n",
	"}\n",
	"pair.peaks.filter <- function(out1, out2, p.value.impute=0, overlap.ratio=0){\n",
	"aa <- find.overlap(out1, out2)\n",
	"bb <- fill.missing.peaks(out1, out2, aa$id1, aa$id2, p.value.impute=0)\n",
	"cc1 <- merge.peaks(bb$rep1, bb$id1)\n",
	"cc2 <- merge.peaks(bb$rep2, bb$id2)\n",
	"frag12 <- cbind(cc1$start, cc1$stop, cc2$start, cc2$stop)\n",
	"frag.ratio <- apply(frag12, 1, overlap.middle)\n",
	"frag.ratio[cc1$sig.value==p.value.impute | cc2$sig.value==p.value.impute] <- 0\n",
	"cc1$frag.ratio <- frag.ratio\n",
	"cc2$frag.ratio <- frag.ratio\n",
	"merge1 <- cc1[cc1$frag.ratio >= overlap.ratio,]\n",
	"merge2 <- cc2[cc2$frag.ratio >= overlap.ratio,]\n",
	"invisible(list(merge1=merge1, merge2=merge2))\n",
	"}\n",
	"pair.peaks.RNA.chr <- function(out1, out2, p.value.impute=0){\n",
	"start.power <- ceiling(log10(max(out1$start.ori, out2$start.ori)))\n",
	"stop.power <- ceiling(log10(max(out1$stop.ori, out2$stop.ori)))\n",
	"start1.tag <- 10^start.power + out1$start.ori\n",
	"start2.tag <- 10^start.power + out2$start.ori\n",
	"stop1.tag <- 10^stop.power + out1$stop.ori\n",
	"stop2.tag <- 10^stop.power + out2$stop.ori\n",
	"temp1 <- paste(as.character(start1.tag), \".\", as.character(stop1.tag), sep=\"\")\n",
	"temp2 <- paste(as.character(start2.tag), \".\", as.character(stop2.tag), sep=\"\")\n",
	"o1 <- order(temp1)\n",
	"o2 <- order(temp2)\n",
	"temp1 <- temp1[o1]\n",
	"temp2 <- temp2[o2]\n",
	"out1 <- out1[o1,]\n",
	"out2 <- out2[o2,]\n",
	"temp.unique <- union(temp1, temp2)\n",
	"order.temp <- order(temp.unique)\n",
	"temp <- as.character(temp.unique[order.temp])\n",
	"n1 <- length(out1$start.ori)\n",
	"n2 <- length(out2$start.ori)\n",
	"n <- length(temp)\n",
	"index1 <- rep(NA, n1)\n",
	"index2 <- rep(NA, n2)\n",
	"name1 <- colnames(out1)\n",
	"merge1 <- data.frame(matrix(p.value.impute, nrow=n, ncol=length(name1)))\n",
	"colnames(merge1) <- name1\n",
	"name2 <- colnames(out2)\n",
	"merge2 <- data.frame(matrix(p.value.impute, nrow=n, ncol=length(name2)))\n",
	"colnames(merge2) <- name2\n",
	"j <- 1\n",
	"for(i in 1:n1){\n",
	"index1[i] <- which(temp[j:n]==temp1[i])+j-1\n",
	"j <- index1[i]+1\n",
	"cat(\"i=\", i, \"\\t\", \"j=\", j, \"\\n\")\n",
	"}\n",
	"j <- 1\n",
	"for(i in 1:n2){\n",
	"index2[i] <- which(temp[j:n]==temp2[i])+j-1\n",
	"j <- index2[i]+1\n",
	"}\n",
	"for(i in 1:length(name1)){\n",
	"if(name1[i] == \"chr\")\n",
	"merge1[index1, name1[i]] <- as.character(out1[, name1[i]])\n",
	"else\n",
	"merge1[index1, name1[i]] <- out1[, name1[i]]\n",
	"}\n",
	"for(i in 1:length(name2)){\n",
	"if(name2[i] == \"chr\")\n",
	"merge2[index2, name2[i]] <- as.character(out2[, name2[i]])\n",
	"else\n",
	"merge2[index2, name2[i]] <- out2[, name2[i]]\n",
	"}\n",
	"merge1$id <- c(1:length(merge1$start))\n",
	"merge1$start[index2] <- out2$start\n",
	"merge1$stop[index2] <- out2$stop\n",
	"merge1$chr[index2] <- as.character(out2$chr)\n",
	"merge1$start.ori[index2] <-out2$start.ori\n",
	"merge1$stop.ori[index2] <- out2$stop.ori\n",
	"merge2$id <- c(1:length(merge2$start))\n",
	"merge2$start[index1] <- out1$start\n",
	"merge2$stop[index1] <- out1$stop\n",
	"merge2$chr[index1] <- as.character(out1$chr)\n",
	"merge2$start.ori[index1] <- out1$start.ori\n",
	"merge2$stop.ori[index1] <- out1$stop.ori\n",
	"invisible(list(merge1=merge1, merge2=merge2))\n",
	"}\n",
	"pair.peaks.RNA <- function(out1, out2, p.value.impute=0){\n",
	"chr <- sort(unique(c(as.character(out1$chr), as.character(out2$chr))))\n",
	"n.chr <- length(chr)\n",
	"merge1 <- data.frame()\n",
	"merge2 <- data.frame()\n",
	"for(i in 1:n.chr){\n",
	"out1.chr <- out1[as.character(out1$chr) == chr[i],]\n",
	"out2.chr <- out2[as.character(out2$chr) == chr[i],]\n",
	"cat(\"Pairing signals on \", chr[i], \"\\n\")\n",
	"temp <- pair.peaks.RNA.chr(out1.chr, out2.chr, p.value.impute)\n",
	"merge1 <- rbind(merge1, temp$merge1)\n",
	"merge2 <- rbind(merge2, temp$merge2)\n",
	"}\n",
	"invisible(list(merge1=merge1, merge2=merge2))\n",
	"}\n",
	"pair.peaks.RNA.list <- function(out1, out2, p.value.impute=0){\n",
	"o1 <- order(out1$start, out1$stop)\n",
	"o2 <- order(out2$start, out2$stop)\n",
	"n1 <- length(out1$start)\n",
	"n2 <- length(out2$start)\n",
	"index1 <- rep(NA, n1)\n",
	"index2 <- rep(NA, n2)\n",
	"ss1 <- list()\n",
	"for(i in 1:n1){\n",
	"ss1[[i]] <- c(out1$start[o1][i], out1$stop[o1][i])\n",
	"}\n",
	"ss2 <- list()\n",
	"for(i in 1:n2){\n",
	"ss2[[i]] <- c(out2$start[o2][i], out2$stop[o2][i])\n",
	"}\n",
	"temp <- unique(ss1, ss2)\n",
	"name1 <- colnames(out1)\n",
	"merge1 <- data.frame(matrix(p.value.impute, nrow=n, ncol=length(name1)))\n",
	"colnames(merge1) <- name1\n",
	"name2 <- colnames(out2)\n",
	"merge2 <- data.frame(matrix(p.value.impute, nrow=n, ncol=length(name2)))\n",
	"colnames(merge2) <- name2\n",
	"j <- 1\n",
	"for(i in 1:n1){\n",
	"index1[i] <- which(temp[[j:n]][1]==ss1[[i]][1] & temp[[j:n]][2]==ss1[[i]][2])+j-1\n",
	"j <- index1[i]+1\n",
	"cat(\"i=\", i, \"\\t\", \"j=\", j, \"\\n\")\n",
	"}\n",
	"j <- 1\n",
	"for(i in 1:n2){\n",
	"index2[i] <- which(temp[[j:n]][1]==ss2[[i]][1] & temp[[j:n]][2]==ss2[[i]][2])+j-1\n",
	"j <- index2[i]+1\n",
	"}\n",
	"for(i in 1:length(name1)){\n",
	"merge1[index1, name1[i]] <- out1[, name1[i]]\n",
	"}\n",
	"for(i in 1:length(name2)){\n",
	"merge2[index2, name2[i]] <- out2[, name2[i]]\n",
	"}\n",
	"merge1$id <- c(1:length(merge1$start))\n",
	"merge1$start[index2] <- out2$start\n",
	"merge1$stop[index2] <- out2$stop\n",
	"merge1$chr[index2] <- out2$chr\n",
	"merge1$start.ori[index2] <- out2$start.ori\n",
	"merge1$stop.ori[index2] <- out2$stop.ori\n",
	"merge2$id <- c(1:length(merge2$start))\n",
	"merge2$start[index1] <- out1$start\n",
	"merge2$stop[index1] <- out1$stop\n",
	"merge2$chr[index1] <- out1$chr\n",
	"merge2$start.ori[index1] <- out1$start.ori\n",
	"merge2$stop.ori[index1] <- out1$stop.ori\n",
	"invisible(list(merge1=merge1, merge2=merge2))\n",
	"}\n",
	"overlap.middle  <- function(x){\n",
	"x.o <- x[order(x)]\n",
	"f1 <- x[2]-x[1]\n",
	"f2 <- x[4]-x[3]\n",
	"f.overlap <- abs(x.o[3]-x.o[2])\n",
	"f.overlap.ratio <- f.overlap/min(f1, f2)\n",
	"return(f.overlap.ratio)\n",
	"}\n",
	"comp.uri <- function(tv, x){\n",
	"n <- length(x)\n",
	"qt <- quantile(x, prob=1-tv[1]) # tv[1] is t\n",
	"sum(x[1:ceiling(n*tv[2])] >= qt)/n\n",
	"}\n",
	"get.uri.2d <- function(x1, x2, tt, vv, spline.df=NULL){\n",
	"o <- order(x1, x2, decreasing=T)\n",
	"x2.ordered <- x2[o]\n",
	"tv <- cbind(tt, vv)\n",
	"ntotal <- length(x1) # number of peaks\n",
	"uri <- apply(tv, 1, comp.uri, x=x2.ordered)\n",
	"uri.binned <- uri[seq(1, length(uri), by=4)]\n",
	"tt.binned <- tt[seq(1, length(uri), by=4)]\n",
	"uri.slope <- (uri.binned[2:(length(uri.binned))] - uri.binned[1:(length(uri.binned)-1)])/(tt.binned[2:(length(uri.binned))] - tt.binned[1:(length(tt.binned)-1)])\n",
	"short.list.length <- min(sum(x1>0)/length(x1), sum(x2>0)/length(x2))\n",
	"if(short.list.length < max(tt)){\n",
	"jump.left <- which(tt>short.list.length)[1]-1\n",
	"} else {\n",
	"jump.left <- which.max(tt)\n",
	"}\n",
	"if(jump.left < 6){\n",
	"jump.left <- length(tt)\n",
	"}\n",
	"if(is.null(spline.df))\n",
	"uri.spl <- smooth.spline(tt[1:jump.left], uri[1:jump.left], df=6.4)\n",
	"else{\n",
	"uri.spl <- smooth.spline(tt[1:jump.left], uri[1:jump.left], df=spline.df)\n",
	"}\n",
	"uri.der <- predict(uri.spl, tt[1:jump.left], deriv=1)\n",
	"invisible(list(tv=tv, uri=uri,\n",
	"uri.slope=uri.slope, t.binned=tt.binned[2:length(uri.binned)],\n",
	"uri.spl=uri.spl, uri.der=uri.der, jump.left=jump.left,\n",
	"ntotal=ntotal))\n",
	"}\n",
	"scale.t2n <- function(uri){\n",
	"ntotal <- uri$ntotal\n",
	"tv <- uri$tv*uri$ntotal\n",
	"uri.uri <- uri$uri*uri$ntotal\n",
	"jump.left <- uri$jump.left\n",
	"uri.spl <- uri$uri.spl\n",
	"uri.spl$x <- uri$uri.spl$x*uri$ntotal\n",
	"uri.spl$y <- uri$uri.spl$y*uri$ntotal\n",
	"t.binned <- uri$t.binned*uri$ntotal\n",
	"uri.slope <- uri$uri.slope\n",
	"uri.der <- uri$uri.der\n",
	"uri.der$x <- uri$uri.der$x*uri$ntotal\n",
	"uri.der$y <- uri$uri.der$y\n",
	"uri.n <- list(tv=tv, uri=uri.uri, t.binned=t.binned, uri.slope=uri.slope, uri.spl=uri.spl, uri.der=uri.der, ntotal=ntotal, jump.left=jump.left)\n",
	"return(uri.n)\n",
	"}\n",
	"compute.pair.uri <- function(data.1, data.2, sig.value1=\"signal.value\", sig.value2=\"signal.value\", spline.df=NULL, overlap.ratio=0, is.RNAseq=F){\n",
	"tt <- seq(0.01, 1, by=0.01)\n",
	"vv <- tt\n",
	"if(sig.value1==\"signal.value\"){\n",
	"data.1.enrich <- data.frame(chr=data.1$chr, start.ori=data.1$start.ori, stop.ori=data.1$stop.ori, start=data.1$start, stop=data.1$stop, sig.value=data.1$signal.value, signal.value=data.1$signal.value, p.value=data.1$p.value, q.value=data.1$q.value)\n",
	"} else {\n",
	"if(sig.value1==\"p.value\"){\n",
	"data.1.enrich <- data.frame(chr=data.1$chr, start.ori=data.1$start.ori, stop.ori=data.1$stop.ori, start=data.1$start, stop=data.1$stop, sig.value=data.1$p.value, signal.value=data.1$signal.value, p.value=data.1$p.value, q.value=data.1$q.value)\n",
	"} else {\n",
	"if(sig.value1==\"q.value\"){\n",
	"data.1.enrich <- data.frame(chr=data.1$chr, start.ori=data.1$start.ori, stop.ori=data.1$stop.ori, start=data.1$start, stop=data.1$stop, sig.value=data.1$q.value, signal.value=data.1$signal.value, p.value=data.1$p.value, q.value=data.1$q.value)\n",
	"}\n",
	"}\n",
	"}\n",
	"if(sig.value2==\"signal.value\"){\n",
	"data.2.enrich <- data.frame(chr=data.2$chr, start.ori=data.2$start.ori, stop.ori=data.2$stop.ori, start=data.2$start, stop=data.2$stop, sig.value=data.2$signal.value, signal.value=data.2$signal.value, p.value=data.2$p.value, q.value=data.2$q.value)\n",
	"} else {\n",
	"if(sig.value2==\"p.value\"){\n",
	"data.2.enrich <- data.frame(chr=data.2$chr, start.ori=data.2$start.ori, stop.ori=data.2$stop.ori, start=data.2$start, stop=data.2$stop, sig.value=data.2$p.value, signal.value=data.2$signal.value, p.value=data.2$p.value, q.value=data.2$q.value)\n",
	"} else {\n",
	"if(sig.value2==\"q.value\"){\n",
	"data.2.enrich <- data.frame(chr=data.2$chr, start.ori=data.2$start.ori, stop.ori=data.2$stop.ori, start=data.2$start, stop=data.2$stop, sig.value=data.2$q.value, signal.value=data.2$signal.value, p.value=data.2$p.value, q.value=data.2$q.value)\n",
	"}\n",
	"}\n",
	"}\n",
	"if(!is.RNAseq)\n",
	"data12.enrich <- pair.peaks.filter(data.1.enrich, data.2.enrich, p.value.impute=0, overlap.ratio)\n",
	"else\n",
	"data12.enrich <- pair.peaks.RNA(data.1.enrich, data.2.enrich, p.value.impute=0)\n",
	"uri <- get.uri.2d(as.numeric(as.character(data12.enrich$merge1$sig.value)), as.numeric(as.character(data12.enrich$merge2$sig.value)), tt, vv, spline.df=spline.df)\n",
	"uri.n <- scale.t2n(uri)\n",
	"return(list(uri=uri, uri.n=uri.n, data12.enrich=data12.enrich, sig.value1=sig.value1, sig.value2=sig.value2))\n",
	"}\n",
	"get.uri.matched <- function(data12, df=10, select.match=F){\n",
	"tt <- seq(0.01, 1, by=0.01)\n",
	"vv <- tt\n",
	"if(!select.match){\n",
	"uri <- get.uri.2d(data12$sample1$sig.value, data12$sample2$sig.value, tt, vv, spline.df=df)\n",
	"} else {\n",
	"is.match <- (data12$sample1$id != -1) & (data12$sample2$id !=-1)\n",
	"uri <- get.uri.2d(data12$sample1$sig.value[is.match], data12$sample2$sig.value[is.match], tt, vv, spline.df=df)\n",
	"}\n",
	"uri.n <- scale.t2n(uri)\n",
	"return(list(uri=uri, uri.n=uri.n))\n",
	"}\n",
	"get.uri.matched.RNA <- function(data12, df=10, select.match=F){\n",
	"tt <- seq(0.01, 1, by=0.01)\n",
	"vv <- tt\n",
	"if(!select.match){\n",
	"uri <- get.uri.2d(data12$merge1$sig.value, data12$merge2$sig.value, tt, vv, spline.df=df)\n",
	"} else {\n",
	"is.match <- (data12$merge1$id != -1) & (data12$merge2$id !=-1)\n",
	"uri <- get.uri.2d(data12$merge1$sig.value[is.match], data12$merge2$sig.value[is.match], tt, vv, spline.df=df)\n",
	"}\n",
	"uri.n <- scale.t2n(uri)\n",
	"return(list(uri=uri, uri.n=uri.n))\n",
	"}\n",
	"map.sig.value <- function(data.set, map.uv, nominal.value){\n",
	"index.nominal <- which(names(data.set$merge1)==nominal.value)\n",
	"nentry <- nrow(map.uv)\n",
	"map.nominal <- rbind(map.uv[, c(\"sig.value1\", \"sig.value2\")])\n",
	"for(i in 1:nentry){\n",
	"map.nominal[i, \"sig.value1\"] <- data.set$merge1[unique(which.min(abs(data.set$merge1$sig.value-map.uv[i, \"sig.value1\"]))), index.nominal]\n",
	"map.nominal[i, \"sig.value2\"] <- data.set$merge2[unique(which.min(abs(data.set$merge2$sig.value-map.uv[i, \"sig.value2\"]))), index.nominal]\n",
	"}\n",
	"invisible(map.nominal)\n",
	"}\n",
	"plot.uri.group <- function(uri.n.list, plot.dir, file.name=NULL, legend.txt, xlab.txt=\"num of significant peaks\", ylab.txt=\"num of peaks in common\", col.start=0, col.txt=NULL, plot.missing=F, title.txt=NULL){\n",
	"if(is.null(col.txt))\n",
	"col.txt <- c(\"black\", \"red\", \"purple\", \"green\", \"blue\", \"cyan\", \"magenta\", \"orange\", \"grey\")\n",
	"n <- length(uri.n.list)\n",
	"ntotal <- c()\n",
	"for(i in 1:n)\n",
	"ntotal[i] <- uri.n.list[[i]]$ntotal\n",
	"jump.left <- c()\n",
	"jump.left.der <- c()\n",
	"ncommon <- c()\n",
	"for(i in 1:n){\n",
	"jump.left[i] <- uri.n.list[[i]]$jump.left\n",
	"jump.left.der[i] <- jump.left[i]\n",
	"ncommon[i] <- uri.n.list[[i]]$tv[jump.left[i],1]\n",
	"}\n",
	"if(plot.missing){\n",
	"max.peak <- max(ntotal)\n",
	"} else {\n",
	"max.peak <- max(ncommon)*1.05\n",
	"}\n",
	"if(!is.null(file.name)){\n",
	"postscript(paste(plot.dir, \"uri.\", file.name, sep=\"\"))\n",
	"par(mfrow=c(1,1), mar=c(5,5,4,2))\n",
	"}\n",
	"plot(uri.n.list[[1]]$tv[,1], uri.n.list[[1]]$uri, type=\"n\", xlab=xlab.txt, ylab=ylab.txt, xlim=c(0, max.peak), ylim=c(0, max.peak), cex.lab=2)\n",
	"for(i in 1:n){\n",
	"if(plot.missing){\n",
	"points(uri.n.list[[i]]$tv[,1], uri.n.list[[i]]$uri, col=col.txt[i+col.start], cex=0.5 )\n",
	"} else {\n",
	"points(uri.n.list[[i]]$tv[1:jump.left[i],1], uri.n.list[[i]]$uri[1:jump.left[i]], col=col.txt[i+col.start], cex=0.5)\n",
	"}\n",
	"lines(uri.n.list[[i]]$uri.spl, col=col.txt[i+col.start], lwd=4)\n",
	"}\n",
	"abline(coef=c(0,1), lty=3)\n",
	"legend(0, max.peak, legend=legend.txt, col=col.txt[(col.start+1):length(col.txt)], lty=1, lwd=3, cex=2)\n",
	"if(!is.null(title))\n",
	"title(title.txt)\n",
	"if(!is.null(file.name)){\n",
	"dev.off()\n",
	"}\n",
	"if(!is.null(file.name)){\n",
	"postscript(paste(plot.dir, \"duri.\", file.name, sep=\"\"))\n",
	"par(mfrow=c(1,1), mar=c(5,5,4,2))\n",
	"}\n",
	"plot(uri.n.list[[1]]$t.binned, uri.n.list[[1]]$uri.slope, type=\"n\", xlab=xlab.txt, ylab=\"slope\", xlim=c(0, max.peak), ylim=c(0, 1.5), cex.lab=2)\n",
	"for(i in 1:n){\n",
	"lines(uri.n.list[[i]]$uri.der, col=col.txt[i+col.start], lwd=4)\n",
	"}\n",
	"abline(h=1, lty=3)\n",
	"legend(0.5*max.peak, 1.5, legend=legend.txt, col=col.txt[(col.start+1):length(col.txt)], lty=1, lwd=3, cex=2)\n",
	"if(!is.null(title))\n",
	"title(title.txt)\n",
	"if(!is.null(file.name)){\n",
	"dev.off()\n",
	"}\n",
	"}\n",
	"get.ez <- function(p, c1, c2, xd1, yd1, xd2, yd2){\n",
	"return(p*c1*xd1*yd1/(p*c1*xd1*yd1 + (1-p)*c2*xd2*yd2))\n",
	"}\n",
	"gaussian.cop.den <- function(t, s, rho){\n",
	"A <- qnorm(t)^2 + qnorm(s)^2\n",
	"B <- qnorm(t)*qnorm(s)\n",
	"loglik <-  -log(1-rho^2)/2 - rho/(2*(1-rho^2))*(rho*A-2*B)\n",
	"return(exp(loglik))\n",
	"}\n",
	"clayton.cop.den <- function(t, s, rho){\n",
	"if(rho > 0)\n",
	"return(exp(log(rho+1)-(rho+1)*(log(t)+log(s))-(2+1/rho)*log(t^(-rho) + s^(-rho)-1)))\n",
	"if(rho==0)\n",
	"return(1)\n",
	"if(rho<0)\n",
	"stop(\"Incorrect Clayton copula coefficient\")\n",
	"}\n",
	"mle.gaussian.copula <- function(t, s, e.z){\n",
	"l.c <- function(rho, t, s, e.z){\n",
	"sum(e.z*log(gaussian.cop.den(t, s, rho)))}\n",
	"rho.max <- optimize(f=l.c, c(-0.998, 0.998), maximum=T, tol=0.00001, t=t, s=s, e.z=e.z)\n",
	"return(rho.max$m)\n",
	"}\n",
	"mle.clayton.copula <- function(t, s, e.z){\n",
	"l.c <- function(rho, t, s, e.z){\n",
	"lc <- sum(e.z*log(clayton.cop.den(t, s, rho)))\n",
	"return(lc)\n",
	"}\n",
	"rho.max <- optimize(f=l.c, c(0.1, 20), maximum=T, tol=0.00001, t=t, s=s, e.z=e.z)\n",
	"return(rho.max$m)\n",
	"}\n",
	"loglik.2gaussian.copula <- function(x, y, p, rho1, rho2, x.mar, y.mar){\n",
	"px.1 <- get.pdf.cdf(x, x.mar$f1)\n",
	"px.2 <- get.pdf.cdf(x, x.mar$f2)\n",
	"py.1 <- get.pdf.cdf(y, y.mar$f1)\n",
	"py.2 <- get.pdf.cdf(y, y.mar$f2)\n",
	"c1 <- gaussian.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"sum(log(p*c1*px.1$pdf*py.1$pdf + (1-p)*c2*px.2$pdf*py.2$pdf))\n",
	"}\n",
	"loglik.2copula <- function(x, y, p, rho1, rho2, x.mar, y.mar, copula.txt){\n",
	"px.1 <- pdf.cdf$px.1\n",
	"px.2 <- pdf.cdf$px.2\n",
	"py.1 <- pdf.cdf$py.1\n",
	"py.2 <- pdf.cdf$py.2\n",
	"if(copula.txt==\"gaussian\"){\n",
	"c1 <- gaussian.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"} else {\n",
	"if(copula.txt==\"clayton\"){\n",
	"c1 <- clayton.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- clayton.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"}\n",
	"}\n",
	"sum(log(p*c1*px.1$pdf*py.1$pdf + (1-p)*c2*px.2$pdf*py.2$pdf))\n",
	"}\n",
	"est.mar.hist <- function(x, e.z, breaks){\n",
	"binwidth <- c()\n",
	"nbin <- length(breaks)-1\n",
	"nx <- length(x)\n",
	"x1.pdf <- c()\n",
	"x2.pdf <- c()\n",
	"x1.cdf <- c()\n",
	"x2.cdf <- c()\n",
	"x1.pdf.value <- rep(NA, nx)\n",
	"x2.pdf.value <- rep(NA, nx)\n",
	"x1.cdf.value <- rep(NA, nx)\n",
	"x2.cdf.value <- rep(NA, nx)\n",
	"for(i in 1:nbin){\n",
	"binwidth[i] <- breaks[i+1] - breaks[i]\n",
	"if(i < nbin)\n",
	"in.bin <- x>= breaks[i] & x < breaks[i+1]\n",
	"else    # last bin\n",
	"in.bin <- x>= breaks[i] & x <=breaks[i+1]\n",
	"x1.pdf[i] <- (sum(e.z[in.bin])+1)/(sum(e.z)+nbin)/binwidth[i]*(nx+nbin)/(nx+nbin+1)\n",
	"x2.pdf[i] <- (sum(1-e.z[in.bin])+1)/(sum(1-e.z)+nbin)/binwidth[i]*(nx+nbin)/(nx+nbin+1)\n",
	"if(i>1){\n",
	"x1.cdf[i] <- sum(x1.pdf[1:(i-1)]*binwidth[1:(i-1)])\n",
	"x2.cdf[i] <- sum(x2.pdf[1:(i-1)]*binwidth[1:(i-1)])\n",
	"} else{\n",
	"x1.cdf[i] <- 0\n",
	"x2.cdf[i] <- 0\n",
	"}\n",
	"x1.pdf.value[in.bin] <- x1.pdf[i]\n",
	"x2.pdf.value[in.bin] <- x2.pdf[i]\n",
	"x1.cdf.value[in.bin] <- x1.cdf[i] + x1.pdf[i]*(x[in.bin]-breaks[i])\n",
	"x2.cdf.value[in.bin] <- x2.cdf[i] + x2.pdf[i]*(x[in.bin]-breaks[i])\n",
	"}\n",
	"f1 <-list(breaks=breaks, density=x1.pdf, cdf=x1.cdf)\n",
	"f2 <-list(breaks=breaks, density=x2.pdf, cdf=x2.cdf)\n",
	"f1.value <- list(pdf=x1.pdf.value, cdf=x1.cdf.value)\n",
	"f2.value <- list(pdf=x2.pdf.value, cdf=x2.cdf.value)\n",
	"return(list(f1=f1, f2=f2, f1.value=f1.value, f2.value=f2.value))\n",
	"}\n",
	"est.cdf.rank <- function(x, conf.z){\n",
	"x1.cdf <- rank(x[conf.z==1])/(length(x[conf.z==1])+1)\n",
	"x2.cdf <- rank(x[conf.z==0])/(length(x[conf.z==0])+1)\n",
	"return(list(cdf1=x1.cdf, cdf2=x2.cdf))\n",
	"}\n",
	"get.pdf <- function(x, df){\n",
	"if(x < df$breaks[1])\n",
	"cat(\"x is out of the range of df\\n\")\n",
	"index <- which(df$breaks >= x)[1]\n",
	"if(index==1)\n",
	"index <- index +1\n",
	"return(df$density[index-1])\n",
	"}\n",
	"get.cdf <- function(x, df){\n",
	"index <- which(df$breaks >= x)[1]\n",
	"if(index==1)\n",
	"index <- index +1\n",
	"return(df$cdf[index-1])\n",
	"}\n",
	"get.pdf.cdf <- function(x.vec, df){\n",
	"x.pdf <- sapply(x.vec, get.pdf, df=df)\n",
	"x.cdf <- sapply(x.vec, get.cdf, df=df)\n",
	"return(list(cdf=x.cdf, pdf=x.pdf))\n",
	"}\n",
	"e.step.2gaussian <- function(x, y, p, rho1, rho2, x.mar, y.mar){\n",
	"px.1 <- get.pdf.cdf(x, x.mar$f1)\n",
	"px.2 <- get.pdf.cdf(x, x.mar$f2)\n",
	"py.1 <- get.pdf.cdf(y, y.mar$f1)\n",
	"py.2 <- get.pdf.cdf(y, y.mar$f2)\n",
	"c1 <- gaussian.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"return(get.ez(p, c1, c2, px.1$pdf, py.1$pdf, px.2$pdf, py.2$pdf))\n",
	"}\n",
	"e.step.2copula <- function(x, y, p, rho1, rho2, x.mar, y.mar, copula.txt){\n",
	"px.1 <- get.pdf.cdf(x, x.mar$f1)\n",
	"px.2 <- get.pdf.cdf(x, x.mar$f2)\n",
	"py.1 <- get.pdf.cdf(y, y.mar$f1)\n",
	"py.2 <- get.pdf.cdf(y, y.mar$f2)\n",
	"if(copula.txt==\"gaussian\"){\n",
	"c1 <- gaussian.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"} else {\n",
	"if(copula.txt==\"clayton\"){\n",
	"c1 <- clayton.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- clayton.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"}\n",
	"}\n",
	"return(get.ez(p, c1, c2, px.1$pdf, py.1$pdf, px.2$pdf, py.2$pdf))\n",
	"}\n",
	"m.step.2gaussian <- function(x, y, e.z, breaks){\n",
	"x.mar <- est.mar.hist(x, e.z, breaks)\n",
	"y.mar <- est.mar.hist(y, e.z, breaks)\n",
	"px.1 <- get.pdf.cdf(x, x.mar$f1)\n",
	"px.2 <- get.pdf.cdf(x, x.mar$f2)\n",
	"py.1 <- get.pdf.cdf(y, y.mar$f1)\n",
	"py.2 <- get.pdf.cdf(y, y.mar$f2)\n",
	"rho1 <- mle.gaussian.copula(px.1$cdf, py.1$cdf, e.z)\n",
	"rho2 <- mle.gaussian.copula(px.2$cdf, py.2$cdf, 1-e.z)\n",
	"p <- sum(e.z)/length(e.z)\n",
	"return(list(p=p, rho1=rho1, rho2=rho2, x.mar=x.mar, y.mar=y.mar))\n",
	"}\n",
	"m.step.2copula <- function(x, y, e.z, breaks, copula.txt){\n",
	"x.mar <- est.mar.hist(x, e.z, breaks)\n",
	"y.mar <- est.mar.hist(y, e.z, breaks)\n",
	"px.1 <- get.pdf.cdf(x, x.mar$f1)\n",
	"px.2 <- get.pdf.cdf(x, x.mar$f2)\n",
	"py.1 <- get.pdf.cdf(y, y.mar$f1)\n",
	"py.2 <- get.pdf.cdf(y, y.mar$f2)\n",
	"if(copula.txt==\"gaussian\"){\n",
	"rho1 <- mle.gaussian.copula(px.1$cdf, py.1$cdf, e.z)\n",
	"rho2 <- mle.gaussian.copula(px.2$cdf, py.2$cdf, 1-e.z)\n",
	"} else {\n",
	"if(copula.txt==\"clayton\"){\n",
	"rho1 <- mle.clayton.copula(px.1$cdf, py.1$cdf, e.z)\n",
	"rho2 <- mle.clayton.copula(px.2$cdf, py.2$cdf, 1-e.z)\n",
	"}\n",
	"}\n",
	"p <- sum(e.z)/length(e.z)\n",
	"return(list(p=p, rho1=rho1, rho2=rho2, x.mar=x.mar, y.mar=y.mar))\n",
	"}\n",
	"e.step.2gaussian.value <- function(x, y, p, rho1, rho2, pdf.cdf){\n",
	"c1 <- gaussian.cop.den(pdf.cdf$px.1$cdf, pdf.cdf$py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(pdf.cdf$px.2$cdf, pdf.cdf$py.2$cdf, rho2)\n",
	"e.z <- get.ez(p, c1, c2, pdf.cdf$px.1$pdf, pdf.cdf$py.1$pdf,\n",
	"pdf.cdf$px.2$pdf, pdf.cdf$py.2$pdf)\n",
	"return(e.z)\n",
	"}\n",
	"e.step.2copula.value <- function(x, y, p, rho1, rho2, pdf.cdf, copula.txt){\n",
	"if(copula.txt ==\"gaussian\"){\n",
	"c1 <- gaussian.cop.den(pdf.cdf$px.1$cdf, pdf.cdf$py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(pdf.cdf$px.2$cdf, pdf.cdf$py.2$cdf, rho2)\n",
	"} else {\n",
	"if(copula.txt ==\"clayton\"){\n",
	"c1 <- clayton.cop.den(pdf.cdf$px.1$cdf, pdf.cdf$py.1$cdf, rho1)\n",
	"c2 <- clayton.cop.den(pdf.cdf$px.2$cdf, pdf.cdf$py.2$cdf, rho2)\n",
	"}\n",
	"}\n",
	"e.z <- get.ez(p, c1, c2, pdf.cdf$px.1$pdf, pdf.cdf$py.1$pdf,\n",
	"pdf.cdf$px.2$pdf, pdf.cdf$py.2$pdf)\n",
	"return(e.z)\n",
	"}\n",
	"m.step.2gaussian.value <- function(x, y, e.z, breaks, fix.rho2){\n",
	"x.mar <- est.mar.hist(x, e.z, breaks)\n",
	"y.mar <- est.mar.hist(y, e.z, breaks)\n",
	"px.1 <- x.mar$f1.value\n",
	"px.2 <- x.mar$f2.value\n",
	"py.1 <- y.mar$f1.value\n",
	"py.2 <- y.mar$f2.value\n",
	"rho1 <- mle.gaussian.copula(px.1$cdf, py.1$cdf, e.z)\n",
	"if(!fix.rho2)\n",
	"rho2 <- mle.gaussian.copula(px.2$cdf, py.2$cdf, 1-e.z)\n",
	"else\n",
	"rho2 <- 0\n",
	"p <- sum(e.z)/length(e.z)\n",
	"pdf.cdf <- list(px.1=px.1, px.2=px.2, py.1=py.1, py.2=py.2)\n",
	"return(list(p=p, rho1=rho1, rho2=rho2, x.mar=x.mar, y.mar=y.mar,\n",
	"pdf.cdf=pdf.cdf))\n",
	"}\n",
	"m.step.2gaussian.value2 <- function(x, y, e.z, breaks, fix.rho2, x.mar, y.mar){\n",
	"px.1 <- x.mar$f1.value\n",
	"px.2 <- x.mar$f2.value\n",
	"py.1 <- y.mar$f1.value\n",
	"py.2 <- y.mar$f2.value\n",
	"rho1 <- mle.gaussian.copula(px.1$cdf, py.1$cdf, e.z)\n",
	"if(!fix.rho2)\n",
	"rho2 <- mle.gaussian.copula(px.2$cdf, py.2$cdf, 1-e.z)\n",
	"else\n",
	"rho2 <- 0\n",
	"p <- sum(e.z)/length(e.z)\n",
	"pdf.cdf <- list(px.1=px.1, px.2=px.2, py.1=py.1, py.2=py.2)\n",
	"return(list(p=p, rho1=rho1, rho2=rho2, x.mar=x.mar, y.mar=y.mar,\n",
	"pdf.cdf=pdf.cdf))\n",
	"}\n",
	"m.step.2copula.value <- function(x, y, e.z, breaks, fix.rho2, copula.txt){\n",
	"x.mar <- est.mar.hist(x, e.z, breaks)\n",
	"y.mar <- est.mar.hist(y, e.z, breaks)\n",
	"px.1 <- x.mar$f1.value\n",
	"px.2 <- x.mar$f2.value\n",
	"py.1 <- y.mar$f1.value\n",
	"py.2 <- y.mar$f2.value\n",
	"if(copula.txt==\"gaussian\"){\n",
	"rho1 <- mle.gaussian.copula(px.1$cdf, py.1$cdf, e.z)\n",
	"if(!fix.rho2)\n",
	"rho2 <- mle.gaussian.copula(px.2$cdf, py.2$cdf, 1-e.z)\n",
	"else\n",
	"rho2 <- 0\n",
	"} else {\n",
	"if(copula.txt==\"clayton\"){\n",
	"rho1 <- mle.clayton.copula(px.1$cdf, py.1$cdf, e.z)\n",
	"if(!fix.rho2)\n",
	"rho2 <- mle.clayton.copula(px.2$cdf, py.2$cdf, 1-e.z)\n",
	"else\n",
	"rho2 <- 0\n",
	"}\n",
	"}\n",
	"p <- sum(e.z)/length(e.z)\n",
	"pdf.cdf <- list(px.1=px.1, px.2=px.2, py.1=py.1, py.2=py.2)\n",
	"return(list(p=p, rho1=rho1, rho2=rho2, x.mar=x.mar, y.mar=y.mar,\n",
	"pdf.cdf=pdf.cdf))\n",
	"}\n",
	"loglik.2gaussian.copula.value <- function(x, y, p, rho1, rho2, pdf.cdf){\n",
	"px.1 <- pdf.cdf$px.1\n",
	"px.2 <- pdf.cdf$px.2\n",
	"py.1 <- pdf.cdf$py.1\n",
	"py.2 <- pdf.cdf$py.2\n",
	"c1 <- gaussian.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"sum(log(p*c1*px.1$pdf*py.1$pdf + (1-p)*c2*px.2$pdf*py.2$pdf))\n",
	"}\n",
	"loglik.2copula.value <- function(x, y, p, rho1, rho2, pdf.cdf, copula.txt){\n",
	"px.1 <- pdf.cdf$px.1\n",
	"px.2 <- pdf.cdf$px.2\n",
	"py.1 <- pdf.cdf$py.1\n",
	"py.2 <- pdf.cdf$py.2\n",
	"if(copula.txt==\"gaussian\"){\n",
	"c1 <- gaussian.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- gaussian.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"} else {\n",
	"if(copula.txt==\"clayton\"){\n",
	"c1 <- clayton.cop.den(px.1$cdf, py.1$cdf, rho1)\n",
	"c2 <- clayton.cop.den(px.2$cdf, py.2$cdf, rho2)\n",
	"}\n",
	"}\n",
	"sum(log(p*c1*px.1$pdf*py.1$pdf + (1-p)*c2*px.2$pdf*py.2$pdf))\n",
	"}\n",
	"em.2gaussian.quick <- function(x, y, p0, rho1.0, rho2.0, eps, fix.p=F, stoc=T, fix.rho2=T){\n",
	"x <- rank(x, tie=\"random\")\n",
	"y <- rank(y, tie=\"random\")\n",
	"xy.min <- min(x, y)\n",
	"xy.max <- max(x, y)\n",
	"binwidth <- (xy.max-xy.min)/50\n",
	"breaks <- seq(xy.min-binwidth/100, xy.max+binwidth/100, by=(xy.max-xy.min+binwidth/50)/50)\n",
	"e.z <- c(rep(0.9, round(length(x)*p0)), rep(0.1, length(x)-round(length(x)*p0)))\n",
	"if(!stoc)\n",
	"para <- m.step.2gaussian.value(x, y, e.z, breaks, fix.rho2)\n",
	"else\n",
	"para <- m.step.2gaussian.stoc.value(x, y, e.z, breaks, fix.rho2)\n",
	"if(fix.p){\n",
	"p <- p0\n",
	"} else {\n",
	"p <- para$p\n",
	"}\n",
	"if(fix.rho2){\n",
	"rho2 <- rho2.0\n",
	"} else {\n",
	"rho2 <- para$rho2\n",
	"}\n",
	"rho1 <- para$rho1\n",
	"l0 <- loglik.2gaussian.copula.value(x, y, p, rho1, rho2, para$pdf.cdf)\n",
	"loglik.trace <- c()\n",
	"loglik.trace[1] <- l0\n",
	"to.run <- T\n",
	"i <- 2\n",
	"while(to.run){\n",
	"e.z <- e.step.2gaussian.value(x, y, p, rho1, rho2, para$pdf.cdf)\n",
	"if(!stoc)\n",
	"para <- m.step.2gaussian.value(x, y, e.z, breaks, fix.rho2)\n",
	"else\n",
	"para <- m.step.2gaussian.stoc.value(x, y, e.z, breaks, fix.rho2)\n",
	"if(fix.p){\n",
	"p <- p0\n",
	"} else {\n",
	"p <- para$p\n",
	"}\n",
	"if(fix.rho2){\n",
	"rho2 <- rho2.0\n",
	"} else {\n",
	"rho2 <- para$rho2\n",
	"}\n",
	"rho1 <- para$rho1\n",
	"l1 <- loglik.2gaussian.copula.value(x, y, p, rho1, rho2, para$pdf.cdf)\n",
	"loglik.trace[i] <- l1\n",
	"if(i > 2){\n",
	"l.inf <- loglik.trace[i-2] + (loglik.trace[i-1] - loglik.trace[i-2])/(1-(loglik.trace[i]-loglik.trace[i-1])/(loglik.trace[i-1]-loglik.trace[i-2]))\n",
	"to.run <- abs(l.inf - loglik.trace[i]) > eps\n",
	"}\n",
	"i <- i+1\n",
	"}\n",
	"bic <- -2*l1 + (2*(length(breaks)-1+1)+1-fix.p-fix.rho2)*log(length(x)) # parameters\n",
	"return(list(para=list(p=para$p, rho1=rho1, rho2=rho2),\n",
	"loglik=l1, bic=bic, e.z=e.z, conf.z = para$conf.z,\n",
	"loglik.trace=loglik.trace, x.mar=para$x.mar, y.mar=para$y.mar,\n",
	"breaks=breaks))\n",
	"}\n",
	"em.2copula.quick <- function(x, y, p0, rho1.0, rho2.0, eps, fix.p=F, stoc=T, fix.rho2=T, copula.txt, nbin=50){\n",
	"x <- rank(x, tie=\"random\")\n",
	"y <- rank(y, tie=\"random\")\n",
	"xy.min <- min(x, y)\n",
	"xy.max <- max(x, y)\n",
	"binwidth <- (xy.max-xy.min)/50\n",
	"breaks <- seq(xy.min-binwidth/100, xy.max+binwidth/100, by=(xy.max-xy.min+binwidth/50)/nbin)\n",
	"e.z <- c(rep(0.9, round(length(x)*p0)), rep(0.1, length(x)-round(length(x)*p0)))\n",
	"if(!stoc)\n",
	"para <- m.step.2copula.value(x, y, e.z, breaks, fix.rho2, copula.txt)\n",
	"else\n",
	"para <- m.step.2copula.stoc.value(x, y, e.z, breaks, fix.rho2, copula.txt)\n",
	"if(fix.p){\n",
	"p <- p0\n",
	"} else {\n",
	"p <- para$p\n",
	"}\n",
	"if(fix.rho2){\n",
	"rho2 <- rho2.0\n",
	"} else {\n",
	"rho2 <- para$rho2\n",
	"}\n",
	"l0 <- loglik.2copula.value(x, y, p, para$rho1, rho2, para$pdf.cdf, copula.txt)\n",
	"loglik.trace <- c()\n",
	"loglik.trace[1] <- l0\n",
	"to.run <- T\n",
	"i <- 2\n",
	"while(to.run){\n",
	"e.z <- e.step.2copula.value(x, y, p, para$rho1, rho2, para$pdf.cdf, copula.txt)\n",
	"if(!stoc)\n",
	"para <- m.step.2copula.value(x, y, e.z, breaks, fix.rho2, copula.txt)\n",
	"else\n",
	"para <- m.step.2copula.stoc.value(x, y, e.z, breaks, fix.rho2, copula.txt)\n",
	"if(fix.p){\n",
	"p <- p0\n",
	"} else {\n",
	"p <- para$p\n",
	"}\n",
	"if(fix.rho2){\n",
	"rho2 <- rho2.0\n",
	"} else {\n",
	"rho2 <- para$rho2\n",
	"}\n",
	"l1 <- loglik.2copula.value(x, y, p, para$rho1, rho2, para$pdf.cdf, copula.txt)\n",
	"loglik.trace[i] <- l1\n",
	"cat(\"l1=\", l1, \"\\n\")\n",
	"if(i > 2){\n",
	"l.inf <- loglik.trace[i-2] + (loglik.trace[i-1] - loglik.trace[i-2])/(1-(loglik.trace[i]-loglik.trace[i-1])/(loglik.trace[i-1]-loglik.trace[i-2]))\n",
	"to.run <- abs(l.inf - loglik.trace[i]) > eps\n",
	"cat(\"para=\", \"p=\", para$p, \" rho1=\", para$rho1, \" rho2=\", rho2, \"\\n\")\n",
	"}\n",
	"i <- i+1\n",
	"}\n",
	"bic <- -2*l1 + (2*(length(breaks)-1+1)+1-fix.p-fix.rho2)*log(length(x)) # parameters\n",
	"return(list(para=list(p=para$p, rho1=para$rho1, rho2=rho2),\n",
	"loglik=l1, bic=bic, e.z=e.z, conf.z = para$conf.z,\n",
	"loglik.trace=loglik.trace, x.mar=para$x.mar, y.mar=para$y.mar,\n",
	"breaks=breaks))\n",
	"}\n",
	"rm.unmatch <- function(sample1, sample2, p.value.impute=0){\n",
	"sample1.prune <- sample1[sample1$sig.value > p.value.impute & sample2$sig.value > p.value.impute,]\n",
	"sample2.prune <- sample2[sample1$sig.value > p.value.impute & sample2$sig.value > p.value.impute,]\n",
	"invisible(list(sample1=sample1.prune, sample2=sample2.prune))\n",
	"}\n",
	"fit.em <- function(sample12, fix.rho2=T){\n",
	"prune.sample <- rm.unmatch(sample12$merge1, sample12$merge2)\n",
	"em.fit <- em.2gaussian.quick(-prune.sample$sample1$sig.value, -prune.sample$sample2$sig.value,\n",
	"p0=0.5, rho1.0=0.7, rho2.0=0, eps=0.01, fix.p=F, stoc=F, fix.rho2)\n",
	"invisible(list(em.fit=em.fit, data.pruned=prune.sample))\n",
	"}\n",
	"fit.2copula.em <- function(sample12, fix.rho2=T, copula.txt){\n",
	"prune.sample <- rm.unmatch(sample12$merge1, sample12$merge2)\n",
	"para <- list()\n",
	"para$rho <- 0.6\n",
	"para$p <- 0.3\n",
	"para$mu <- 2.5\n",
	"para$sigma <- 1\n",
	"cat(\"EM is running\")\n",
	"em.fit <- em.transform(prune.sample$sample1$sig.value, prune.sample$sample2$sig.value, para$mu, para$sigma, para$rho, para$p, eps=0.01)\n",
	"invisible(list(em.fit=em.fit, data.pruned=prune.sample))\n",
	"}\n",
	"fit.1.component <- function(data.pruned, breaks){\n",
	"gaussian.1 <- fit.gaussian.1(-data.pruned$sample1, -data.pruned$sample2, breaks)\n",
	"clayton.1 <- fit.clayton.1(-data.pruned$sample1, -data.pruned$sample2, breaks)\n",
	"return(list(gaussian.1=gaussian.1, clayton.1=clayton.1))\n",
	"}\n",
	"fit.gaussian.1 <- function(x, y, breaks=NULL){\n",
	"t <- emp.mar.cdf.rank(x)\n",
	"s <- emp.mar.cdf.rank(y)\n",
	"mle.rho <- mle.gaussian.copula(t, s, rep(1, length(t)))\n",
	"c1 <- gaussian.cop.den(t, s, mle.rho)\n",
	"cat(\"c1\", sum(log(c1)), \"\\n\")\n",
	"if(is.null(breaks)){\n",
	"f1 <- emp.mar.pdf.rank(t)\n",
	"f2 <- emp.mar.pdf.rank(s)\n",
	"} else {\n",
	"x.mar <- est.mar.hist(rank(x), rep(1, length(x)), breaks)\n",
	"y.mar <- est.mar.hist(rank(y), rep(1, length(y)), breaks)\n",
	"f1 <- x.mar$f1.value$pdf  # only one component\n",
	"f2 <- y.mar$f1.value$pdf\n",
	"}\n",
	"cat(\"f1\", sum(log(f1)), \"\\n\")\n",
	"cat(\"f2\", sum(log(f2)), \"\\n\")\n",
	"loglik <- sum(log(c1)+log(f1)+log(f2))\n",
	"bic <- -2*loglik + log(length(t))*(1+length(breaks)-1)\n",
	"return(list(rho=mle.rho, loglik=loglik, bic=bic))\n",
	"}\n",
	"fit.clayton.1 <- function(x, y, breaks=NULL){\n",
	"t <- emp.mar.cdf.rank(x)\n",
	"s <- emp.mar.cdf.rank(y)\n",
	"mle.rho <- mle.clayton.copula(t, s, rep(1, length(t)))\n",
	"c1 <- clayton.cop.den(t, s, mle.rho)\n",
	"if(is.null(breaks)){\n",
	"f1 <- emp.mar.pdf.rank(t)\n",
	"f2 <- emp.mar.pdf.rank(s)\n",
	"} else {\n",
	"x.mar <- est.mar.hist(rank(x), rep(1, length(x)), breaks)\n",
	"y.mar <- est.mar.hist(rank(y), rep(1, length(y)), breaks)\n",
	"f1 <- x.mar$f1.value$pdf  # only one component\n",
	"f2 <- y.mar$f1.value$pdf\n",
	"}\n",
	"loglik <- sum(log(c1)+log(f1)+log(f2))\n",
	"bic <- -2*loglik + log(length(t))*(1+length(breaks)-1)\n",
	"return(list(rho=mle.rho, tau=rho/(rho+2), loglik=loglik, bic=bic))\n",
	"}\n",
	"comp.uri.ez <- function(tt, u, v, e.z){\n",
	"u.t <- quantile(u, prob=(1-tt))\n",
	"v.t <- quantile(v, prob=(1-tt))\n",
	"ez <- mean(e.z[u >= u.t & v >=v.t])\n",
	"return(ez)\n",
	"}\n",
	"comp.ez.cutoff <- function(tt, u, v, e.z, boundary.txt){\n",
	"u.t <- quantile(u, prob=(1-tt))\n",
	"v.t <- quantile(v, prob=(1-tt))\n",
	"if(boundary.txt == \"max\"){\n",
	"ez.bound <- max(e.z[u >= u.t & v >=v.t])\n",
	"} else {\n",
	"ez.bound <- min(e.z[u >= u.t & v >=v.t])\n",
	"}\n",
	"return(ez.bound)\n",
	"}\n",
	"get.ez.tt.old  <- function(em.fit, reverse=T, fdr.level=c(0.01, 0.05, 0.1)){\n",
	"u <- em.fit$data.pruned$sample1\n",
	"v <- em.fit$data.pruned$sample2\n",
	"tt <- seq(0.01, 0.99, by=0.01)\n",
	"if(reverse){\n",
	"e.z <-  1-em.fit$em.fit$e.z # this is the error prob\n",
	"uri.ez <- sapply(tt, comp.uri.ez, u=u, v=v, e.z=e.z)\n",
	"ez.bound <- sapply(tt, comp.ez.cutoff, u=u, v=v, e.z=e.z, boundary.txt=\"max\")\n",
	"} else {\n",
	"e.z <-  em.fit$em.fit$e.z\n",
	"uri.ez <- sapply(tt, comp.uri.ez, u=u, v=v, e.z=e.z)\n",
	"ez.bound <- sapply(tt, comp.ez.cutoff, u=u, v=v, e.z=e.z, boundary.txt=\"min\")\n",
	"}\n",
	"u.t <- quantile(u, prob=(1-tt))\n",
	"v.t <- quantile(v, prob=(1-tt))\n",
	"sig.value1 <- c()\n",
	"sig.value2 <- c()\n",
	"error.prob.cutoff <- c()\n",
	"n.selected.match <- c()\n",
	"for(i in 1:length(fdr.level)){\n",
	"index <- which.min(abs(uri.ez - fdr.level[i]))\n",
	"sig.value1[i] <- u.t[index]\n",
	"sig.value2[i] <- v.t[index]\n",
	"error.prob.cutoff[i] <- ez.bound[index]\n",
	"if(reverse){\n",
	"n.selected.match[i] <- sum(e.z<=ez.bound[index])\n",
	"} else {\n",
	"n.selected.match[i] <- sum(e.z>=ez.bound[index])\n",
	"}\n",
	"}\n",
	"map.uv <- cbind(error.prob.cutoff, sig.value1, sig.value2, n.selected.match)\n",
	"return(list(n=tt*length(u), uri.ez=uri.ez, u.t=u.t, v.t=v.t, tt=tt, fdr.level=fdr.level,  map.uv=map.uv, e.z=e.z, error.prob.cutoff=error.prob.cutoff))\n",
	"}\n",
	"get.ez.tt <- function(em.fit, idr.level=c(0.01, 0.05, 0.1)){\n",
	"u <- em.fit$data.pruned$sample1\n",
	"v <- em.fit$data.pruned$sample2\n",
	"e.z <-  1-em.fit$em.fit$e.z # this is the error prob\n",
	"o <- order(e.z)\n",
	"e.z.ordered <- e.z[o]\n",
	"n.select <- c(1:length(e.z))\n",
	"IDR <- cumsum(e.z.ordered)/n.select\n",
	"u.o <- u[o]\n",
	"v.o <- v[o]\n",
	"n.level <- length(idr.level)\n",
	"ez.cutoff <- rep(NA, n.level)\n",
	"n.selected <- rep(NA, n.level)\n",
	"for(i in 1:length(idr.level)){\n",
	"index <- which.min(abs(IDR - idr.level[i]))\n",
	"ez.cutoff[i] <- e.z[index]\n",
	"n.selected[i] <- sum(e.z<=ez.cutoff[i])\n",
	"}\n",
	"map.uv <- cbind(ez.cutoff, n.selected)\n",
	"return(list(n=n.select, IDR=IDR, idr.level=idr.level, map.uv=map.uv))\n",
	"}\n",
	"get.mar.mean <- function(em.out){\n",
	"x.f1 <- em.out$x.mar$f1\n",
	"x.f2 <- em.out$x.mar$f2\n",
	"y.f1 <- em.out$y.mar$f1\n",
	"y.f2 <- em.out$y.mar$f2\n",
	"x.stat1 <- get.hist.mean(x.f1)\n",
	"x.stat2 <- get.hist.mean(x.f2)\n",
	"y.stat1 <- get.hist.mean(y.f1)\n",
	"y.stat2 <- get.hist.mean(y.f2)\n",
	"return(list(x.mean1=x.stat1$mean, x.mean2=x.stat2$mean,\n",
	"y.mean1=y.stat1$mean, y.mean2=y.stat2$mean,\n",
	"x.sd1=x.stat1$sd, x.sd2=x.stat2$sd,\n",
	"y.sd1=y.stat1$sd, y.sd2=y.stat2$sd\n",
	"))\n",
	"}\n",
	"get.hist.mean  <- function(x.f){\n",
	"nbreaks <- length(x.f$breaks)\n",
	"x.bin <- x.f$breaks[-1]-x.f$breaks[-nbreaks]\n",
	"x.mid <- (x.f$breaks[-nbreaks]+x.f$breaks[-1])/2\n",
	"x.mean <- sum(x.mid*x.f$density*x.bin)\n",
	"x.sd <- sqrt(sum(x.mid*x.mid*x.f$density*x.bin)-x.mean^2)\n",
	"return(list(mean=x.mean, sd=x.sd))\n",
	"}\n",
	"get.hist.var <- function(x.f){\n",
	"nbreaks <- length(x.f$breaks)\n",
	"x.bin <- x.f$breaks[-1]-x.f$breaks[-nbreaks]\n",
	"x.mid <- (x.f$breaks[-nbreaks]+x.f$breaks[-1])/2\n",
	"x.mean <- sum(x.mid*x.f$density*x.bin)\n",
	"return(mean=x.mean)\n",
	"}\n",
	"plot.ez.group.old <- function(ez.list, plot.dir, file.name=NULL, legend.txt, y.lim=NULL, xlab.txt=\"num of significant peaks\",  ylab.txt=\"avg posterior prob of being random\", col.txt=NULL, title.txt=NULL){\n",
	"if(is.null(col.txt))\n",
	"col.txt <- c(\"black\", \"red\", \"purple\", \"green\", \"blue\", \"cyan\", \"magenta\", \"orange\", \"grey\")\n",
	"x <- c()\n",
	"y <- c()\n",
	"for(i in 1:length(ez.list)){\n",
	"x <- c(x, ez.list[[i]]$n)\n",
	"y <- c(y, ez.list[[i]]$uri.ez)\n",
	"}\n",
	"if(is.null(y.lim))\n",
	"y.lim <- c(0, max(y))\n",
	"if(!is.null(file.name)){\n",
	"postscript(paste(plot.dir, \"ez.\", file.name, sep=\"\"))\n",
	"par(mfrow=c(1,1), mar=c(5,5,4,2))\n",
	"}\n",
	"plot(x, y, ylim=y.lim, type=\"n\", xlab=xlab.txt, ylab=ylab.txt, lwd=5, cex=5, cex.axis=2, cex.lab=2)\n",
	"for(i in 1:length(ez.list)){\n",
	"lines(ez.list[[i]]$n, ez.list[[i]]$uri.ez, col=col.txt[i], cex=2, lwd=5)\n",
	"}\n",
	"legend(0, y.lim[2], legend=legend.txt, col=col.txt[1:length(col.txt)], lty=1, lwd=5, cex=2)\n",
	"if(!is.null(title))\n",
	"title(title.txt)\n",
	"if(!is.null(file.name)){\n",
	"dev.off()\n",
	"}\n",
	"}\n",
	"plot.ez.group <- function(ez.list, plot.dir, file.name=NULL, legend.txt, y.lim=NULL, xlab.txt=\"num of significant peaks\",  ylab.txt=\"IDR\", col.txt=NULL, title.txt=NULL){\n",
	"if(is.null(col.txt))\n",
	"col.txt <- c(\"black\", \"red\", \"purple\", \"green\", \"blue\", \"cyan\", \"magenta\", \"orange\", \"grey\")\n",
	"n.entry <- length(ez.list)\n",
	"x <- rep(NA, n.entry)\n",
	"y.max <- rep(NA, n.entry)\n",
	"for(i in 1:n.entry){\n",
	"x[i] <- max(ez.list[[i]]$n)\n",
	"y.max[i] <- max(ez.list[[i]]$IDR)\n",
	"}\n",
	"if(is.null(y.lim))\n",
	"y.lim <- c(0, max(y.max))\n",
	"if(!is.null(file.name)){\n",
	"postscript(paste(plot.dir, \"ez.\", file.name, sep=\"\"))\n",
	"par(mfrow=c(1,1), mar=c(5,5,4,2))\n",
	"}\n",
	"plot(c(0, max(x)), y.lim, ylim=y.lim, type=\"n\", xlab=xlab.txt, ylab=ylab.txt, lwd=5, cex=5, cex.axis=2, cex.lab=2)\n",
	"q <- seq(0.01, 0.99, by=0.01)\n",
	"for(i in 1:length(ez.list)){\n",
	"n.plot <- round(quantile(ez.list[[i]]$n, prob=q))\n",
	"IDR.plot <- ez.list[[i]]$IDR[n.plot]\n",
	"lines(n.plot, IDR.plot, col=col.txt[i], cex=2, lwd=5)\n",
	"}\n",
	"legend(0, y.lim[2], legend=legend.txt, col=col.txt[1:length(col.txt)], lty=1, lwd=5, cex=2)\n",
	"if(!is.null(title))\n",
	"title(title.txt)\n",
	"if(!is.null(file.name)){\n",
	"dev.off()\n",
	"}\n",
	"}\n",
	"get.ez.tt.all.old  <- function(em.fit, all.data1, all.data2, idr.level){\n",
	"u <- em.fit$data.pruned$sample1\n",
	"v <- em.fit$data.pruned$sample2\n",
	"tt <- seq(0.01, 0.99, by=0.01)\n",
	"e.z <-  1-em.fit$em.fit$e.z # this is the error prob\n",
	"uri.ez <- sapply(tt, comp.uri.ez, u=u, v=v, e.z=e.z)\n",
	"ez.bound <- sapply(tt, comp.ez.cutoff, u=u, v=v, e.z=e.z, boundary.txt=\"max\")\n",
	"u.t <- quantile(u, prob=(1-tt))\n",
	"v.t <- quantile(v, prob=(1-tt))\n",
	"sig.value1 <- c()\n",
	"sig.value2 <- c()\n",
	"error.prob.cutoff <- c()\n",
	"n.selected.match <- c()\n",
	"npeak.rep1 <- c()\n",
	"npeak.rep2 <- c()\n",
	"for(i in 1:length(idr.level)){\n",
	"index <- which.min(abs(uri.ez - as.numeric(idr.level[i])))\n",
	"sig.value1[i] <- u.t[index]\n",
	"sig.value2[i] <- v.t[index]\n",
	"error.prob.cutoff[i] <- ez.bound[index]\n",
	"n.selected.match[i] <- sum(u>= u.t[index] & v>=v.t[index])\n",
	"npeak.rep1[i] <- sum(all.data1[\"sig.value\"] >= sig.value1[i])\n",
	"npeak.rep2[i] <- sum(all.data2[\"sig.value\"] >= sig.value2[i])\n",
	"}\n",
	"map.uv <- cbind(error.prob.cutoff, sig.value1, sig.value2, n.selected.match, npeak.rep1, npeak.rep2)\n",
	"return(list(n=tt*length(u), uri.ez=uri.ez, u.t=u.t, v.t=v.t, tt=tt, idr.level=idr.level,  map.uv=map.uv, e.z=e.z, error.prob.cutoff=error.prob.cutoff))\n",
	"}\n",
	"get.ez.tt.all <- function(em.fit, all.data1, all.data2, idr.level=c(0.01, 0.05, 0.1)){\n",
	"u <- em.fit$data.pruned$sample1$sig.value\n",
	"v <- em.fit$data.pruned$sample2$sig.value\n",
	"local.idr <-  1-em.fit$em.fit$e.z # this is the error prob\n",
	"o <- order(local.idr)\n",
	"local.idr.ordered <- local.idr[o]\n",
	"n.select <- c(1:length(local.idr))\n",
	"IDR <- cumsum(local.idr.ordered)/n.select\n",
	"u.o <- u[o]\n",
	"v.o <- v[o]\n",
	"n.level <- length(idr.level)\n",
	"local.idr.cutoff <- rep(NA, n.level)\n",
	"n.selected <- rep(NA, n.level)\n",
	"npeak.rep1 <- rep(NA, n.level)\n",
	"npeak.rep2 <- rep(NA, n.level)\n",
	"for(i in 1:length(idr.level)){\n",
	"index <- which.min(abs(IDR - idr.level[i]))\n",
	"local.idr.cutoff[i] <- local.idr.ordered[index]      # fixed on 02/20/10\n",
	"n.selected[i] <- sum(local.idr<=local.idr.cutoff[i])\n",
	"}\n",
	"map.uv <- cbind(local.idr.cutoff, n.selected)\n",
	"return(list(n=n.select, IDR=IDR, idr.level=idr.level, map.uv=map.uv))\n",
	"}\n",
	"pass.threshold <- function(sig.map.list, sig.value.name, combined, idr.level, nrep, chr.size){\n",
	"sig.map <- c()\n",
	"idr.index <- which(rbind(sig.map.list[[1]])[,1] == idr.level)\n",
	"if(length(i) ==0){\n",
	"print(\"no level matches specified idr.level\")\n",
	"return(-1)\n",
	"}\n",
	"for(i in 1:length(sig.map.list))\n",
	"sig.map <- c(sig.map, rbind(sig.map.list[[i]])[idr.index, c(\"sig.value1\", \"sig.value2\")])\n",
	"npeak.tight <- c()\n",
	"npeak.loose <- c()\n",
	"max.sig <- max(sig.map)\n",
	"min.sig <- min(sig.map)\n",
	"selected.sig.tight <- combined[combined[,sig.value.name]>=max.sig, ]\n",
	"selected.sig.loose <- combined[combined[,sig.value.name]>=min.sig, ]\n",
	"selected.sig.tight <- deconcatenate.chr(selected.sig.tight, chr.size)[,c(\"chr\", \"start\", \"stop\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"selected.sig.loose <- deconcatenate.chr(selected.sig.loose, chr.size)[,c(\"chr\", \"start\", \"stop\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"npeak.tight <- nrow(selected.sig.tight)\n",
	"npeak.loose <- nrow(selected.sig.loose)\n",
	"npeak.stat <- list(idr.level=idr.level, max.sig=max.sig, min.sig=min.sig, npeak.tight=npeak.tight, npeak.loose=npeak.loose)\n",
	"invisible(list(npeak.stat=npeak.stat, combined.selected.tight=selected.sig.tight, combined.selected.loose=selected.sig.loose))\n",
	"}\n",
	"pass.region <- function(sig.map.list, uri.output, ez.list, em.output, combined, idr.level, sig.value.impute=0, chr.size){\n",
	"combined <- combined[, c(\"start\", \"stop\", \"sig.value\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"npair <- length(uri.output) # number of pairs of consistency analysis\n",
	"combined.region <- c()\n",
	"idr.index <- which(rbind(sig.map.list[[1]])[,1] == idr.level)\n",
	"if(length(idr.index) ==0){\n",
	"print(\"no level matches specified idr.level\")\n",
	"return(-1)\n",
	"}\n",
	"for(j in 1:npair){\n",
	"above.1 <- uri.output[[j]]$data12.enrich$merge1[\"sig.value\"] >= ez.list[[j]]$map.uv[idr.index,\"sig.value1\"]\n",
	"above.2 <- uri.output[[j]]$data12.enrich$merge1[\"sig.value\"] >= ez.list[[j]]$map.uv[idr.index,\"sig.value2\"]\n",
	"selected.sig.rep1 <- uri.output[[j]]$data12.enrich$merge1[above.1, c(\"start\", \"stop\", \"sig.value\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"selected.sig.rep2 <- uri.output[[j]]$data12.enrich$merge2[above.2, c(\"start\", \"stop\", \"sig.value\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"overlap.1 <- pair.peaks(selected.sig.rep1, combined)$merge2\n",
	"overlap.2 <- pair.peaks(selected.sig.rep2, combined)$merge2\n",
	"combined.in1 <- overlap.1[overlap.1$sig.value > sig.value.impute, c(\"start\", \"stop\", \"sig.value\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"combined.in2 <- overlap.2[overlap.2$sig.value > sig.value.impute, c(\"start\", \"stop\", \"sig.value\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"combined.region <- rbind(combined.region, combined.in1, combined.in2)\n",
	"is.repeated <- duplicated(combined.region$start)\n",
	"combined.region <- combined.region[!is.repeated, c(\"start\", \"stop\", \"sig.value\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"}\n",
	"npeak <- nrow(combined.region)\n",
	"sig.combined <- c(min(combined.region[,\"sig.value\"], na.rm=T), max(combined.region[,\"sig.value\"], na.rm=T))\n",
	"npeak.stat <- list(idr.level=idr.level, npeak=npeak)\n",
	"combined.region <- deconcatenate.chr(combined.region, chr.size)[,c(\"chr\", \"start\", \"stop\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"invisible(list(npeak.stat=npeak.stat, combined.selected=combined.region, sig.combined=sig.combined))\n",
	"}\n",
	"pass.structure <- function(uri.output, em.output, combined, idr.level, sig.value.impute, chr.size, overlap.ratio=0, is.RNAseq=F){\n",
	"columns.keep <- c(\"sig.value\", \"start\", \"stop\", \"signal.value\", \"p.value\", \"q.value\", \"chr\", \"start.ori\", \"stop.ori\")\n",
	"combined <- combined[, columns.keep]\n",
	"combined.selected.all <- c()\n",
	"for(j in 1:npair){\n",
	"sample1 <- uri.output[[j]]$data12.enrich$merge1[, columns.keep]\n",
	"sample2 <- uri.output[[j]]$data12.enrich$merge2[, columns.keep]\n",
	"data.matched <- keep.match(sample1, sample2, sig.value.impute=sig.value.impute)\n",
	"data.matched$sample1 <- data.matched$sample1[, columns.keep]\n",
	"data.matched$sample2 <- data.matched$sample2[, columns.keep]\n",
	"if(!is.RNAseq){\n",
	"overlap.1 <- pair.peaks.filter(data.matched$sample1, combined, p.value.impute=sig.value.impute, overlap.ratio)$merge2\n",
	"overlap.2 <- pair.peaks.filter(data.matched$sample2, combined, p.value.impute=sig.value.impute, overlap.ratio)$merge2\n",
	"} else {\n",
	"overlap.1 <- pair.peaks.RNA(data.matched$sample1, combined, p.value.impute=sig.value.impute)$merge2\n",
	"overlap.2 <- pair.peaks.RNA(data.matched$sample2, combined, p.value.impute=sig.value.impute)$merge2\n",
	"}\n",
	"combined.in1 <- overlap.1[overlap.1$sig.value > sig.value.impute, ]\n",
	"combined.in2 <- overlap.2[overlap.2$sig.value > sig.value.impute, ]\n",
	"combined.region <- rbind(combined.in1, combined.in2)\n",
	"is.repeated <- duplicated(combined.region$start)\n",
	"combined.region <- combined.region[!is.repeated,]\n",
	"rank.combined <- rank(-combined.region$sig.value)\n",
	"npeaks.overlap <- nrow(combined.region)\n",
	"npeaks.consistent <- nrow(cbind(em.output[[j]]$data.pruned$sample1))\n",
	"f1 <- list(breaks=em.output[[j]]$em.fit$x.mar$f1$breaks*npeaks.overlap/npeaks.consistent, density=(em.output[[j]]$em.fit$x.mar$f1$density+em.output[[j]]$em.fit$y.mar$f1$density)/2)\n",
	"f1$breaks[1] <- min(f1$breaks[1], 0.95)\n",
	"f2 <- list(breaks=em.output[[j]]$em.fit$x.mar$f2$breaks*npeaks.overlap/npeaks.consistent, density=(em.output[[j]]$em.fit$x.mar$f2$density+em.output[[j]]$em.fit$y.mar$f2$density)/2)\n",
	"f2$breaks[1] <- min(f2$breaks[1], 0.95)\n",
	"p <- em.output[[j]]$em.fit$para$p\n",
	"errorprob.combined <- get.comp2.prob(rank.combined, p, f1, f2)\n",
	"o <- order(errorprob.combined)\n",
	"idr <- cumsum(errorprob.combined[o])/c(1:length(o))\n",
	"idr.index <- which(idr > idr.level)[1]\n",
	"errorprob.cutoff <- errorprob.combined[o][idr.index]\n",
	"sig.value <- min(combined.region$sig.value[o][1:idr.index])\n",
	"combined.selected <- combined[combined$sig.value >= sig.value,]\n",
	"combined.selected.all <- rbind(combined.selected.all, combined.selected)\n",
	"}\n",
	"is.repeated <- duplicated(combined.selected.all$start)\n",
	"combined.selected.all <- combined.selected.all[!is.repeated,]\n",
	"npeak <- nrow(combined.selected.all)\n",
	"npeak.stat <- list(idr.level=idr.level, npeak=npeak)\n",
	"sig.combined <- c(min(combined.selected.all[,\"sig.value\"], na.rm=T), max(combined.selected.all[,\"sig.value\"], na.rm=T))\n",
	"combined.selected.all <- combined.selected.all[,  c(\"chr\", \"start.ori\", \"stop.ori\", \"signal.value\", \"p.value\", \"q.value\")]\n",
	"colnames(combined.selected.all) <- c(\"chr\", \"start\", \"stop\", \"signal.value\", \"p.value\", \"q.value\")\n",
	"invisible(list(npeak.stat=npeak.stat, combined.selected=combined.selected.all, sig.combined=sig.combined))\n",
	"}\n",
	"get.comp2.prob <- function(x, p, f1, f2){\n",
	"px.1 <- sapply(x, get.pdf, df=f1)\n",
	"px.2 <- sapply(x, get.pdf, df=f2)\n",
	"comp2prob <- 1 - p*px.1/(p*px.1+(1-p)*px.2)\n",
	"return(comp2prob)\n",
	"}\n",
	"keep.match <- function(sample1, sample2, sig.value.impute=0){\n",
	"sample1.prune <- sample1[sample1$sig.value > sig.value.impute & sample2$sig.value > sig.value.impute,]\n",
	"sample2.prune <- sample2[sample1$sig.value > sig.value.impute & sample2$sig.value > sig.value.impute,]\n",
	"invisible(list(sample1=sample1.prune, sample2=sample2.prune))\n",
	"}\n",
	"simu.gaussian.copula <- function(u, rho){\n",
	"n <- length(u)\n",
	"y <- qnorm(u)*rho + rnorm(n)*sqrt(1-rho^2)\n",
	"v <- pnorm(y)\n",
	"invisible(v)\n",
	"}\n",
	"phi.ori <- function(t, s){\n",
	"(t^(-s) -1)/s\n",
	"}\n",
	"phi.inv <- function(y, s){\n",
	"exp(-log(s*y+1)/s)\n",
	"}\n",
	"phi.der <- function(t, s){\n",
	"-t^(-s-1)\n",
	"}\n",
	"phi.der.inv <- function(y, s){\n",
	"exp(log(-y)/(-s-1))\n",
	"}\n",
	"get.w <- function(u, t, s){\n",
	"phi.der.inv(phi.der(u, s)/t, s)\n",
	"}\n",
	"get.v <- function(w, u, s){\n",
	"phi.inv(phi.ori(w, s) - phi.ori(u, s), s)\n",
	"}\n",
	"simu.clayton.copula <- function(u, s){\n",
	"t <- runif(length(u))\n",
	"if(s>0){\n",
	"w <- get.w(u, t, s)\n",
	"v <- get.v(w, u, s)\n",
	"return(v)\n",
	"}\n",
	"if(s==0){\n",
	"return(t)\n",
	"}\n",
	"if(s <0){\n",
	"print(\"Invalid association parameters for clayton copula\")\n",
	"}\n",
	"}\n",
	"simu.copula.2mix <- function(s1, s2, p, n, mu1, mu2, sd1, sd2, copula.txt){\n",
	"n1 <- round(n*p)\n",
	"n2 <- n-n1\n",
	"u1 <- runif(n1)\n",
	"if(copula.txt ==\"clayton\")\n",
	"v1 <- simu.clayton.copula(u1, s1)\n",
	"else{\n",
	"if(copula.txt ==\"gaussian\")\n",
	"v1 <- simu.gaussian.copula(u1, s1)\n",
	"}\n",
	"u2 <- runif(n2)\n",
	"if(copula.txt ==\"clayton\")\n",
	"v2 <- simu.clayton.copula(u2, s2)\n",
	"else{\n",
	"if(copula.txt ==\"gaussian\")\n",
	"v2 <- simu.gaussian.copula(u2, s2)\n",
	"}\n",
	"sample1.1 <- qnorm(u1, mu1, sd1)\n",
	"sample1.2 <- qnorm(v1, mu1, sd1)\n",
	"sample2.1 <- qnorm(u2, mu2, sd2)\n",
	"sample2.2 <- qnorm(v2, mu2, sd2)\n",
	"return(list(u=c(u1, u2), v=c(v1, v2),\n",
	"u.inv=c(sample1.1, sample2.1), v.inv=c(sample1.2, sample2.2),\n",
	"label=c(rep(1, n1), rep(2, n2))))\n",
	"}\n",
	"simu.copula.2mix.inv <- function(s1, s2, p, n, cdf1.x, cdf1.y, cdf2.x, cdf2.y, copula.txt){\n",
	"n1 <- round(n*p)\n",
	"n2 <- n-n1\n",
	"u1 <- runif(n1)\n",
	"if(copula.txt ==\"clayton\")\n",
	"v1 <- simu.clayton.copula(u1, s1)\n",
	"else{\n",
	"if(copula.txt ==\"gaussian\")\n",
	"v1 <- simu.gaussian.copula(u1, s1)\n",
	"}\n",
	"u2 <- runif(n2)\n",
	"if(copula.txt ==\"clayton\")\n",
	"v2 <- simu.clayton.copula(u2, s2)\n",
	"else{\n",
	"if(copula.txt ==\"gaussian\")\n",
	"v2 <- simu.gaussian.copula(u2, s2)\n",
	"}\n",
	"sample1.x <- inv.cdf.vec(u1, cdf1.x)\n",
	"sample1.y <- inv.cdf.vec(v1, cdf1.y)\n",
	"sample2.x <- inv.cdf.vec(u2, cdf2.x)\n",
	"sample2.y <- inv.cdf.vec(v2, cdf2.y)\n",
	"return(list(u=c(u1, u2), v=c(v1, v2),\n",
	"u.inv=c(sample1.x, sample2.x), v.inv=c(sample1.y, sample2.y),\n",
	"label=c(rep(1, n1), rep(2, n2))))\n",
	"}\n",
	"inv.cdf <- function(u, u.cdf){\n",
	"i <- which(u.cdf$cdf> u)[1]\n",
	"q.u  <- (u - u.cdf$cdf[i-1])/(u.cdf$cdf[i] - u.cdf$cdf[i-1])* (u.cdf$breaks[i]-u.cdf$breaks[i-1]) + u.cdf$breaks[i-1]\n",
	"return(q.u)\n",
	"}\n",
	"inv.cdf.vec <- function(u, u.cdf){\n",
	"ncdf <- length(u.cdf$cdf)\n",
	"nbreaks <- length(u.cdf$breaks)\n",
	"if(ncdf == nbreaks-1 & u.cdf$cdf[ncdf]< 1)\n",
	"u.cdf[ncdf] <- 1\n",
	"q.u <- sapply(u, inv.cdf, u.cdf)\n",
	"return(q.u)\n",
	"}\n",
	"simu.test.stat <- function(p, n, mu1, sd1, mu0, sd0, sd.e){\n",
	"n.signal <- round(n*p)\n",
	"n.noise <- n - n.signal\n",
	"labels <- c(rep(1, n.signal), rep(0, n.noise))\n",
	"mu.signal <- rnorm(n.signal, mu1, sd1)\n",
	"x.signal <- mu.signal + rnorm(n.signal, 0, sd.e)\n",
	"x.noise <- rnorm(n.noise, mu0, sd0) + rnorm(n.noise, 0, sd.e)\n",
	"y.signal <- mu.signal + rnorm(n.signal, 0, sd.e)\n",
	"y.noise <- rnorm(n.noise, mu0, sd0) + rnorm(n.noise, 0, sd.e)\n",
	"x <- c(x.signal, x.noise)\n",
	"y <- c(y.signal, y.noise)\n",
	"p.x <- 1-pnorm(x, mu0, sqrt(sd0^2+sd.e^2))\n",
	"p.y <- 1-pnorm(y, mu0, sqrt(sd0^2+sd.e^2))\n",
	"return(list(p.x=p.x, p.y=p.y, x=x, y=y, labels=labels))\n",
	"}\n",
	"forward.decoy.tradeoff.ndecoy <- function(xx, labels, ndecoy){\n",
	"xx <- round(xx, 5)\n",
	"o <- order(xx, decreasing=T)\n",
	"rand <- 1-labels # if rand==0, consistent\n",
	"rand.o <- rand[o]\n",
	"if(sum(rand.o) > ndecoy){\n",
	"index.decoy <- which(cumsum(rand.o)==ndecoy)\n",
	"} else {\n",
	"index.decoy <- which(cumsum(rand.o)==sum(rand.o))\n",
	"}\n",
	"cutoff.decoy <- xx[o][index.decoy]\n",
	"cutoff.unique <- unique(xx[o])\n",
	"cutoff <- cutoff.unique[cutoff.unique >= cutoff.decoy[length(cutoff.decoy)]]\n",
	"get.decoy.count <- function(cut.off){\n",
	"above <- rep(0, length(xx))\n",
	"above[xx >= cut.off] <- 1\n",
	"decoy.count <- sum(above==1 & rand==1)\n",
	"return(decoy.count)\n",
	"}\n",
	"get.forward.count <- function(cut.off){\n",
	"above <- rep(0, length(xx))\n",
	"above[xx >= cut.off] <- 1\n",
	"forward.count <- sum(above==1 & rand==0)\n",
	"return(forward.count)\n",
	"}\n",
	"get.est.fdr <- function(cut.off){\n",
	"above <- rep(0, length(xx))\n",
	"above[xx >= cut.off] <- 1\n",
	"est.fdr <- 1-mean(xx[above==1])\n",
	"return(est.fdr)\n",
	"}\n",
	"get.false.neg.count <- function(cut.off){\n",
	"below <- rep(0, length(xx))\n",
	"below[xx < cut.off] <- 1\n",
	"false.neg.count <- sum(below==1 & rand==0)\n",
	"return(false.neg.count)\n",
	"}\n",
	"get.false.pos.count <- function(cut.off){\n",
	"above <- rep(0, length(xx))\n",
	"above[xx >= cut.off] <- 1\n",
	"false.pos.count <- sum(above==1 & rand==1)\n",
	"return(false.pos.count)\n",
	"}\n",
	"decoy <- sapply(cutoff, get.decoy.count)\n",
	"forward <- sapply(cutoff, get.forward.count)\n",
	"est.fdr <- sapply(cutoff, get.est.fdr)\n",
	"emp.fdr <- decoy/(decoy+forward)\n",
	"false.neg <- sapply(cutoff, get.false.neg.count)\n",
	"false.pos <- sapply(cutoff, get.false.pos.count)\n",
	"true.pos <- sum(rand==0)-false.neg\n",
	"true.neg <- sum(rand==1)-false.pos\n",
	"sensitivity <- true.pos/(true.pos+false.neg)\n",
	"specificity <- true.neg/(true.neg+false.pos)\n",
	"return(list(decoy=decoy, forward=forward, cutoff=cutoff, est.fdr=est.fdr, emp.fdr=emp.fdr, sensitivity=sensitivity, specificity=specificity))\n",
	"}\n",
	"get.emp.jack <- function(a, p0){\n",
	"nobs <- length(a$labels)\n",
	"est <- list()\n",
	"est.all <- list()\n",
	"temp.all <- em.transform(-a$p.x, -a$p.y, mu=1.5, sigma=1.4, rho=0.4, p=0.7, eps=0.01)\n",
	"est.all$p <- temp.all$para$p\n",
	"est.all$rho1 <- temp.all$para$rho1\n",
	"est.all$FDR <- get.FDR(temp.all$e.z)\n",
	"FDR <- list()\n",
	"p <- c()\n",
	"rho1 <- c()\n",
	"for(i in 1:nobs){\n",
	"temp <- em.transform(-a$p.x[-i], -a$p.y[-i], mu=1.5, sigma=1.4, rho=0.4, p=0.7, eps=0.01)\n",
	"est[[i]] <- list(p=temp$para$p, rho1=temp$para$rho1, FDR=get.FDR(temp$e.z))\n",
	"FDR[[i]] <- est[[i]]$FDR # this is the FDR for top n peaks\n",
	"p[i] <- est[[i]]$p\n",
	"rho1[i] <- est[[i]]$rho1\n",
	"}\n",
	"est.jack <- list(FDR=FDR, p=p, rho1=rho1)\n",
	"return(list(est.jack=est.jack, est.all=est.all))\n",
	"}\n",
	"get.FDR.jack <- function(est, FDR.nominal){\n",
	"nobs <- length(est$est.jack$FDR)\n",
	"FDR.all <- c()\n",
	"top.n <- c()\n",
	"for(i in 1:nobs){\n",
	"top.n[i] <- max(which(est$est.jack$FDR[[i]] <= FDR.nominal))\n",
	"FDR.all[i] <- est$est.all$FDR[top.n[i]]\n",
	"}\n",
	"invisible(list(FDR.all=FDR.all, top.n=top.n))\n",
	"}\n",
	"get.emp.IF <- function(a, p0){\n",
	"nobs <- length(a$labels)\n",
	"est <- list()\n",
	"est.all <- list()\n",
	"temp.all <- em.2copula.quick(a$p.x, a$p.y, p0=p0, rho1.0=0.7,\n",
	"rho2.0=0, eps=0.01, fix.p=T, stoc=F, fix.rho2=T, \"gaussian\")\n",
	"est.all$p <- temp.all$para$p\n",
	"est.all$rho1 <- temp.all$para$rho1\n",
	"est.all$FDR <- get.FDR(temp.all$e.z)\n",
	"IF.FDR <- list()\n",
	"IF.p <- c()\n",
	"IF.rho1 <- c()\n",
	"for(i in 1:nobs){\n",
	"temp <- em.2copula.quick(a$p.x[-i], a$p.y[-i], p0=p0, rho1.0=0.7,\n",
	"rho2.0=0, eps=0.01, fix.p=T, stoc=F, fix.rho2=T, \"gaussian\")\n",
	"est[[i]] <- list(p=temp$para$p, rho1=temp$para$rho1, FDR=get.FDR(temp$e.z))\n",
	"IF.FDR[[i]] <- (nobs-1)*(est.all$FDR[-nobs] - est[[i]]$FDR) # this is the FDR for top n peaks\n",
	"IF.p[i] <- (nobs-1)*(est.all$p - est[[i]]$p)\n",
	"IF.rho1[i] <- (nobs-1)*(est.all$rho1 - est[[i]]$rho1)\n",
	"}\n",
	"emp.IF <- list(FDR=IF.FDR, p=IF.p, rho1=IF.rho1)\n",
	"invisible(list(emp.IF=emp.IF, est.all=est.all, est=est))\n",
	"}\n",
	"get.FDR <- function(e.z){\n",
	"e.z.o <- order(1-e.z)\n",
	"FDR <- cumsum(1-e.z[e.z.o])/c(1:length(e.z.o))\n",
	"invisible(FDR)\n",
	"}\n",
	"get.IF.FDR <- function(IF.est, top.n){\n",
	"nobs <- length(IF.est$emp.IF$FDR)\n",
	"FDR <- c()\n",
	"for(i in 1:nobs)\n",
	"FDR[i] <- IF.est$emp.IF$FDR[[i]][top.n]\n",
	"invisible(FDR)\n",
	"}\n",
	"get.IF.FDR.all <- function(IF.est, FDR.size){\n",
	"top.n <- which.min(abs(IF.est$est.all$FDR -FDR.size))\n",
	"nobs <- length(IF.est$est.all$FDR)\n",
	"FDR <- c()\n",
	"for(i in 1:nobs)\n",
	"FDR[i] <- IF.est$emp.IF$FDR[[i]][top.n]\n",
	"invisible(list(FDR=FDR, top.n=top.n))\n",
	"}\n",
	"plot.simu.uri <- function(x, y){\n",
	"tt <- seq(0.01, 0.99, by=0.01)\n",
	"uri <- sapply(tt, comp.uri.prob, u=x, v=y)\n",
	"uri.thin <- uri[seq(1, length(tt), by=3)]\n",
	"tt.thin <- tt[seq(1, length(tt), by=3)]\n",
	"duri <- (uri.thin[-1]-uri.thin[-length(uri.thin)])/(tt.thin[-1]-tt.thin[-length(tt.thin)])\n",
	"uri.spl <- smooth.spline(tt, uri, df=6.4)\n",
	"uri.der <- predict(uri.spl, tt, deriv=1)\n",
	"par(mfrow=c(2,2))\n",
	"plot(x[1:n0], y[1:n0])\n",
	"points(x[(n0+1):n], y[(n0+1):n], col=2)\n",
	"plot(rank(-x)[1:n0], rank(-y)[1:n0])\n",
	"points(rank(-x)[(1+n0):n], rank(-y)[(1+n0):n])\n",
	"plot(tt, uri)\n",
	"lines(c(0,1), c(0,1), lty=2)\n",
	"title(paste(\"rho1=\", rho1, \" rho2=\", rho2, \"p=\", p, sep=\"\"))\n",
	"plot(tt.thin[-1], duri)\n",
	"lines(uri.der)\n",
	"abline(h=1)\n",
	"invisible(list(x=x, y=y, uri=uri, tt=tt, duri=duri, tt.thin=tt.thin, uri.der=uri.der))\n",
	"}\n",
	"get.pseudo.mix <- function(x, mu, sigma, rho, p){\n",
	"nw <- 1000\n",
	"w <- seq(min(-3, mu-3*sigma), max(mu+3*sigma, 3), length=nw)\n",
	"w.cdf <- p*pnorm(w, mean=mu, sd=sigma) + (1-p)*pnorm(w, mean=0, sd=1)\n",
	"i <- 1\n",
	"quan.x <- rep(NA, length(x))\n",
	"for(i in c(1:nw)){\n",
	"index <- which(x >= w.cdf[i] & x < w.cdf[i+1])\n",
	"quan.x[index] <- (x[index]-w.cdf[i])*(w[i+1]-w[i])/(w.cdf[i+1]-w.cdf[i]) +w[i]\n",
	"}\n",
	"index <- which(x < w.cdf[1])\n",
	"if(length(index)>0)\n",
	"quan.x[index] <- w[1]\n",
	"index <- which(x > w.cdf[nw])\n",
	"if(length(index)>0)\n",
	"quan.x[index] <- w[nw]\n",
	"invisible(quan.x)\n",
	"}\n",
	"em.transform <- function(x, y, mu, sigma, rho, p, eps){\n",
	"x.cdf.func <- ecdf(x)\n",
	"y.cdf.func <- ecdf(y)\n",
	"afactor <- length(x)/(length(x)+1)\n",
	"x.cdf <- x.cdf.func(x)*afactor\n",
	"y.cdf <- y.cdf.func(y)*afactor\n",
	"para <- list()\n",
	"para$mu <- mu\n",
	"para$sigma <- sigma\n",
	"para$rho <- rho\n",
	"para$p <- p\n",
	"j <- 1\n",
	"to.run <- T\n",
	"loglik.trace <- c()\n",
	"loglik.inner.trace <- c()\n",
	"z.1 <- get.pseudo.mix(x.cdf, para$mu, para$sigma, para$rho, para$p)\n",
	"z.2 <- get.pseudo.mix(y.cdf, para$mu, para$sigma, para$rho, para$p)\n",
	"while(to.run){\n",
	"i <- 1\n",
	"while(to.run){\n",
	"e.z <- e.step.2normal(z.1, z.2, para$mu, para$sigma, para$rho, para$p)\n",
	"para <- m.step.2normal(z.1, z.2, e.z)\n",
	"if(i > 1)\n",
	"l.old <- l.new\n",
	"l.new <- loglik.2binormal(z.1, z.2, para$mu, para$sigma, para$rho, para$p)\n",
	"loglik.inner.trace[i] <- l.new\n",
	"if(i > 1){\n",
	"to.run <- loglik.inner.trace[i]-loglik.inner.trace[i-1]>eps\n",
	"}\n",
	"cat(\"loglik.inner.trace[\", i, \"]=\", loglik.inner.trace[i], \"\\n\")\n",
	"cat(\"mu=\", para$mu, \"sigma=\", para$sigma, \"p=\", para$p, \"rho=\", para$rho, \"\\n\\n\")\n",
	"i <- i+1\n",
	"}\n",
	"z.1 <- get.pseudo.mix(x.cdf, para$mu, para$sigma, para$rho, para$p)\n",
	"z.2 <- get.pseudo.mix(y.cdf, para$mu, para$sigma, para$rho, para$p)\n",
	"if(j > 1)\n",
	"l.old.outer <- l.new.outer\n",
	"l.new.outer <- loglik.2binormal(z.1, z.2, para$mu, para$sigma, para$rho, para$p)\n",
	"loglik.trace[j] <- l.new.outer\n",
	"if(j == 1)\n",
	"to.run <- T\n",
	"else{ # stop when iteration>100\n",
	"if(j > 100)\n",
	"to.run <- F\n",
	"else\n",
	"to.run <- l.new.outer - l.old.outer > eps\n",
	"}\n",
	"cat(\"loglik.trace[\", j, \"]=\", loglik.trace[j], \"\\n\")\n",
	"cat(\"mu=\", para$mu, \"sigma=\", para$sigma, \"p=\", para$p, \"rho=\", para$rho, \"\\n\")\n",
	"j <- j+1\n",
	"}\n",
	"bic <- -2*l.new + 4*log(length(z.1))\n",
	"return(list(para=list(p=para$p, rho=para$rho, mu=para$mu, sigma=para$sigma),\n",
	"loglik=l.new, bic=bic, e.z=e.z, loglik.trace=loglik.trace))\n",
	"}\n",
	"loglik.2binormal <- function(z.1, z.2, mu, sigma, rho, p){\n",
	"l.m <- sum(d.binormal(z.1, z.2, 0, 1, 0)+log(p*exp(d.binormal(z.1, z.2, mu, sigma, rho)-d.binormal(z.1, z.2, 0, 1, 0))+(1-p)))\n",
	"return(l.m)\n",
	"}\n",
	"d.binormal <- function(z.1, z.2, mu, sigma, rho){\n",
	"loglik <- (-log(2)-log(pi)-2*log(sigma) - log(1-rho^2)/2 - (0.5/(1-rho^2)/sigma^2)*((z.1-mu)^2 -2*rho*(z.1-mu)*(z.2-mu) + (z.2-mu)^2))\n",
	"return(loglik)\n",
	"}\n",
	"e.step.2normal <- function(z.1, z.2, mu, sigma, rho, p){\n",
	"e.z <- p/((1-p)*exp(d.binormal(z.1, z.2, 0, 1, 0)-d.binormal(z.1, z.2, mu, sigma, rho))+ p)\n",
	"invisible(e.z)\n",
	"}\n",
	"m.step.2normal <- function(z.1, z.2, e.z){\n",
	"p <- mean(e.z)\n",
	"mu <- sum((z.1+z.2)*e.z)/2/sum(e.z)\n",
	"sigma <- sqrt(sum(e.z*((z.1-mu)^2+(z.2-mu)^2))/2/sum(e.z))\n",
	"rho <- 2*sum(e.z*(z.1-mu)*(z.2-mu))/(sum(e.z*((z.1-mu)^2+(z.2-mu)^2)))\n",
	"return(list(p=p, mu=mu, sigma=sigma, rho=rho))\n",
	"}\n",
	"init <- function(x, y, x.label){\n",
	"x.o <- order(x)\n",
	"x.ordered <- x[x.o]\n",
	"y.ordered <- y[x.o]\n",
	"x.label.ordered <- x.label[x.o]\n",
	"n <- length(x)\n",
	"p <- sum(x.label)/n\n",
	"rho <- cor(x.ordered[1:ceiling(p*n)], y.ordered[1:ceiling(p*n)])\n",
	"temp <- find.mu.sigma(x.ordered, x.label.ordered)\n",
	"mu <- temp$mu\n",
	"sigma <- temp$sigma\n",
	"invisible(list(mu=mu, sigma=sigma, rho=rho, p=p))\n",
	"}\n",
	"init.dist <- function(f0, f1){\n",
	"index.median.0 <- which(f0$cdf>0.5)[1]\n",
	"q.0.small <- f0$cdf[index.median.0] # because f0 and f1 have the same bins\n",
	"q.1.small <- f1$cdf[index.median.0]\n",
	"index.median.1 <- which(f1$cdf>0.5)[1]\n",
	"q.0.big <- f0$cdf[index.median.1] # because f0 and f1 have the same bins\n",
	"q.1.big <- f1$cdf[index.median.1]\n",
	"pseudo.small.0 <- qnorm(q.0.small, mean=0, sd=1)\n",
	"pseudo.small.1 <- qnorm(q.1.small, mean=0, sd=1)\n",
	"pseudo.big.0 <- qnorm(q.0.big, mean=0, sd=1)\n",
	"pseudo.big.1 <- qnorm(q.1.big, mean=0, sd=1)\n",
	"mu <- (pseudo.small.0*pseudo.big.1 - pseudo.small.1*pseudo.big.0)/(pseudo.big.1-pseudo.small.1)\n",
	"sigma <- (pseudo.small.0-mu)/pseudo.small.1\n",
	"return(list(mu=mu, sigma=sigma))\n",
	"}\n",
	"find.mu.sigma <- function(x, x.label){\n",
	"x.0 <- x[x.label==0]\n",
	"x.1 <- x[x.label==1]\n",
	"n.x0 <- length(x.0)\n",
	"n.x1 <- length(x.1)\n",
	"x.end <- c(min(x.0), min(x.1), max(x.0), max(x.1))\n",
	"o <- order(x.end)\n",
	"x.middle <- x.end[o][c(2,3)]\n",
	"q.1.small <- mean(x.1 <= x.middle[1])*n.x1/(n.x1+1)\n",
	"q.0.small <- mean(x.0 <= x.middle[1])*n.x0/(n.x0+1)\n",
	"q.1.big <- mean(x.1 <= x.middle[2])*n.x1/(n.x1+1)\n",
	"q.0.big <- mean(x.0 <= x.middle[2])*n.x0/(n.x0+1)\n",
	"pseudo.small.0 <- qnorm(q.0.small, mean=0, sd=1)\n",
	"pseudo.small.1 <- qnorm(q.1.small, mean=0, sd=1)\n",
	"pseudo.big.0 <- qnorm(q.0.big, mean=0, sd=1)\n",
	"pseudo.big.1 <- qnorm(q.1.big, mean=0, sd=1)\n",
	"mu <- (pseudo.small.0*pseudo.big.1 - pseudo.small.1*pseudo.big.0)/(pseudo.big.1-pseudo.small.1)\n",
	"sigma <- (pseudo.small.0-mu)/pseudo.small.1\n",
	"return(list(mu=mu, sigma=sigma))\n",
	"}\n"
};

