################################################## ### Author: Natasa Rajicic ### Date: Spring 2011 ### Place: MGH Biostatistics, Boston, Massachusetts ### Title: Functions (R code) supplement to "Analysis of the relationship between longitudinal gene expressions and ordered categorical event data" ### Statist. Med. 2009; 28:2817-2832 ########################## lg=length ########################## re.sample <- function(x, size, ...){ if(length(x) <= 1) { if(!missing(size) && size == 0) x[FALSE] else x } else sample(x, size, ...) } ########################## get.cols= function(test,hybids) { cols= sapply(1:lg(hybids), function(d){seq(1:dim(test)[2])[hybids[d]==names(test)]}) subdata= as.data.frame(test[,cols]) names(subdata)= names(test)[cols] row.names(subdata)= row.names(test) return(subdata) } ########################## get.rows= function(ds,pnames) { rows=NULL for (i in 1:lg(pnames)){rows= rbind(rows,ds[ds[,1]==pnames[i],]) } return(rows) } ########################## get.vc= function(k, ddata, glim) { y= t(as.matrix(get.cols(ddata[k,], glim[,3]))) mat= cbind(glim[,1:2],y) names(mat)=c("pat","day","y") return(mat) } ########################## get.ui= function(i,lgi, y1i,y3i, nrisk,temp,sigs){ tmp= temp[[i]] lgx= lgi[i] d1i= y1i[i,1:lgx] d3i= y3i[i,1:lgx] y1= apply(y1i,2,sum)[1:lgx] y3= apply(y3i,2,sum)[1:lgx] nt= nrisk[1:lgx] p1= y1/nt p3= y3/nt s2= sigs[i,1:lgx] ui= sweep(tmp,2,(1-d3i)*(1-p1),"*")-sweep(tmp,2,(1-d1i)*(1-p3),"*") vi= sweep(s2+tmp^2,2,(1-p1)*(1-p3)*(p1+p3),"*") ui= as.matrix(apply(ui,1,sum)) vi= as.matrix(apply(vi,1,sum)) return(list(ui,vi)) } #################################################### get.rpat= function(s,b,ai){ y1i= y3i= y2i= NULL nrisk= NULL rpat= as.matrix(rep(TRUE, n)) etimes= unique( sort(ceiling(rbeta(50,1,1)*28)) ) d=1 while (sum(rpat[,d])>2) { nrisk[d]= sum(rpat[,d]) alpha= ai[rpat[,d],] et= etimes[d] e1= e1hat[d]/exp(b*(alpha%*%c(1,et))) e1= e1/(1+e1) e2= e2hat[d]/exp(b*(alpha%*%c(1,et))) e2= e2/(1+e2) p1i= e1 p2i= e2- e1 p3i= 1 - e2 events= sapply(1:nrisk[d],function(k){rmultinom(1,1,cbind(p1i,p2i,p3i)[k,])}) y1i= cbind(y1i, replace(rep(FALSE,n),rpat[,d],events[1,]>0)) y2i= cbind(y2i, replace(rep(FALSE,n),rpat[,d],events[2,]>0)) y3i= cbind(y3i, replace(rep(FALSE,n),rpat[,d],events[3,]>0)) rpat= cbind(rpat, y2i[,d]) #if (d==1) { p1=p1i; p3=p3i } d= d+1 } rpat= rpat[,1:dim(y1i)[2]] return(list(y1i,y2i,y3i,rpat,etimes)) } #################################################### get.resp= function(s,indx){ y1i= indx[[1]] y2i= indx[[2]] y3i= indx[[3]] rpat=indx[[4]] etimes= indx[[5]] pats= 1:n resp= data.frame(cbind(pats,rep(-1,n),rep(-1,n))) names(resp)= c("pats","day","cens") etimes= etimes[1:dim(rpat)[2]] died= apply(y1i,1,sum)>0 resp[died,2]=apply(y1i[died,],1,function(k){etimes[k]}) resp[died,3]= 1 alive= apply(y3i,1,sum)>0 resp[alive,2]= apply(y3i[alive,],1,function(k){etimes[k]}) resp[alive,3]= 3 resp[resp[,2]<0,2]= max(etimes) resp[resp[,3]==-1,3]= 2 return(resp) } #################################################### get.Ri= function(i,simd){ y= get.cols(simd,get.rows(glims,i)[,"hybs"]) xmat=cbind(rep(1,dim(y)[2]),times[1:dim(y)[2]]) hmat=xmat%*%solve(t(xmat)%*%xmat)%*%t(xmat) rss= as.matrix(y) %*% (diag(dim(y)[2]) - hmat) return( apply((rss)^2,1,sum) ) } ####### calculate FDR get.fdr= function(j,t2,t2perm){ fdr= sapply(1:nperm,function(d){sum(abs(t2perm[,d])>abs(t2[j]))}) return(mean(fdr)/sum(abs(t2)>=abs(t2[j]))) } #################### ## Efron, JASA 2004 get.efdr0= function(t2,sig0){ ss= smooth.spline(density(t2)$x,density(t2)$y) delta0= density(t2)$x[which(density(t2)$y==max(density(t2)$y))] f0= function(x){dnorm(x,delta0,sig0)} plot(t2,f0(t2), col= 2) lines(density(t2)) lines(density(t2perm),lty=2,col="BLUE") return( f0(t2)/predict(ss,t2)[[2]] ) } get.sig0= function(delta0,ss){ d2= predict(ss,delta0,der=2)[[2]] d1= predict(ss,delta0,der=1)[[2]] fz= predict(ss,delta0)[[2]] return( (d2*fz-d1^2) / fz^2 ) } get.efdr= function(t2){ ss= smooth.spline(density(t2)$x,density(t2)$y) delta0= density(t2)$x[which(density(t2)$y==max(density(t2)$y))] sig0= 1/sqrt(-get.sig0(delta0,ss)) f0= function(x){dnorm(x,delta0,sig0)} plot(t2,f0(t2), col= 2) lines(density(t2)) lines(density(t2perm),lty=2,col="BLUE") return( f0(t2)/predict(ss,t2)[[2]] ) } ##### PLOTS plotgene= function(gnum, simd, resp){ par(cex=.7) gdata= simd[gnum,] yl= max(abs(gdata)) yl= round(seq(-yl,yl,length=4),2) plot(0,0,xlim=c(1,ntp),xaxt="n",yaxt="n",ylab=" ",xlab="Days", cex=.7,col="WHITE") for(i in 1:n){ lgx= 1:sum(times <= resp[i,2]) test= get.rows(glims,i) test= get.cols(gdata,test$hybs) lines(as.numeric(test)[lgx],col= resp[i,3],lwd= 3.5-resp[i,3]) } axis(side=1, at= 1:ntp, labels=as.character(times)) axis(side=2, at= seq(-1,1,0.5),label= seq(-1,1,0.5),cex=.7) mtext(paste("gene",gnum ), cex=.7, line=.7) } get.t2var =function(s, resp, indx,simd) { y1i= indx[[1]] y2i= indx[[2]] y3i= indx[[3]] etimes= indx[[5]] rpat= indx[[4]] nrisk= apply(rpat,2,sum) etimes= etimes[1:dim(rpat)[2]] test= rpat*seq(1:n) r2= lapply(1:lg(etimes),get.re,rpat,nrisk,resp,etimes,simd) r2.names= sapply(1:lg(r2),function(d){test[test[,d]>0,d] }) lgi= apply(test, 1, function(d){sum(d>0)}) subr2= lapply(lgi, function(d){r2[1:d]}) temp= lapply(1:n,function(d){ sapply(1:lgi[d],function(k){subr2[[d]][[k]][,r2.names[[k]]==d]})}) denom= sapply(resp$day, function(d){sum(times<=d)}) Rss= apply(sapply(1:n,get.Ri,simd),1,sum)/sum(denom-2) temp1= sapply(1:lg(etimes), function(d){times[times <= etimes[d]]}) ss= sapply(temp1,function(d){crossprod(d-mean(d)) }) sigs= as.matrix(Rss)%*%t(1/sapply(temp1,lg)+(etimes-sapply(temp1,mean))^2/ss) uis= lapply(1:n, get.ui,lgi, y1i, y3i, nrisk, temp, sigs) ui= sapply(uis,function(d){d[[1]]}) vi= sapply(uis,function(d){d[[2]]}) vars= -get.der2(y1i,y3i,etimes,r2,rpat,nrisk)/n return(list(ui,vars)) } ##################################### get.t2p =function(s, resp, indx,simd) { y1i= indx[[1]] y3i= indx[[3]] etimes= indx[[5]] rpat= indx[[4]] nrisk= apply(rpat,2,sum) etimes= etimes[1:dim(rpat)[2]] test= rpat*seq(1:n) r2= lapply(1:lg(etimes),get.re,rpat,nrisk,resp,etimes,simd) r2.names= sapply(1:lg(r2),function(d){test[test[,d]>0,d] }) lgi= apply(test, 1, function(d){sum(d>0)}) subr2= lapply(lgi, function(d){r2[1:d]}) temp= lapply(1:n,function(d){ sapply(1:lgi[d],function(k){subr2[[d]][[k]][,r2.names[[k]]==d]})}) denom= sapply(resp$day, function(d){sum(times<=d)}) Rss= apply(sapply(1:n,get.Ri,simd),1,sum)/sum(denom-2) temp1= sapply(1:lg(etimes), function(d){times[times <= etimes[d]]}) ss= sapply(temp1,function(d){crossprod(d-mean(d)) }) sigs= as.matrix(Rss)%*%t(1/sapply(temp1,lg)+(etimes-sapply(temp1,mean))^2/ss) uis= lapply(1:n, get.ui,lgi, y1i, y3i, nrisk, temp, sigs) ui= sapply(uis,function(d){d[[1]]}) vi= sapply(uis,function(d){d[[2]]}) # vars= sqrt(-get.der2(y1i,y3i,etimes,r2,rpat,nrisk)) # t2= apply(ui,1,sum)/vars t2= apply(ui,1,sum)/sqrt(apply(vi,1,sum)) t2perm=sapply(1:nperm,get.perm,r2,y1i,y3i,lgi,nrisk,rpat,temp,sigs) return(cbind(t2,t2perm)) } #################################################### get.perm= function(j,r2,y1i,y3i,lgi,nrisk,rpat,temp,sigs){ y1= apply(y1i,2,sum) y3= apply(y3i,2,sum) net= lg(y1) z1i= sapply(1:net,function(d){replace(rep(FALSE,n),res(seq(1:n)[rpat[,d]],y1[d]),TRUE)}) z3i= sapply(1:net,function(d){replace(rep(FALSE,n),res(seq(1:n)[rpat[,d]],y3[d]),TRUE)}) uis= lapply(1:n,get.ui,lgi,z1i,z3i,nrisk,temp,sigs) ui= sapply(uis,function(d){d[[1]]}) vi= sapply(uis,function(d){d[[2]]}) return( apply(ui,1,sum)/sqrt(apply(vi,1,sum)) ) } get.sum= function(f){ return((f[[1]]-f[[2]])/f[1])} #### second derivative variance get.der2= function(v1,v2,etimes,r2,rpat,nrisk){ y1= apply(v1,2,sum) y3= apply(v2,2,sum) sum1= sapply(1:lg(etimes),function(j){r2[[j]][,v1[rpat[,j],j]]}) sum2= sapply(1:lg(etimes),function(j){r2[[j]][,v2[rpat[,j],j]]}) sum3= sapply(r2,apply,1,crossprod) sum1= sapply(sum1,as.matrix) sum2= sapply(sum2,as.matrix) vars= sapply(sum1,apply,1,crossprod) %*% as.matrix((y1*(nrisk-y1)+y3*(nrisk-y3))/nrisk^2 )- sapply(sum2,apply,1,crossprod) %*% as.matrix(y1*(nrisk-y1)/nrisk^2)- sum3 %*% as.matrix(y3*(nrisk-y3)/nrisk^2) return(vars) } get.t2= function(s,resp,indx,simd) { y1i= indx[[1]] y3i= indx[[3]] etimes= indx[[5]] rpat= indx[[4]] nrisk= apply(rpat,2,sum) etimes= etimes[1:dim(rpat)[2]] test= rpat*seq(1:n) r2= lapply(1:lg(etimes),get.re,rpat,nrisk,resp,etimes,simd) r2.names= sapply(1:lg(r2),function(d){test[test[,d]>0,d] }) lgi= apply(test, 1, function(d){sum(d>0)}) subr2= lapply(lgi, function(d){r2[1:d]}) temp= lapply(1:n,function(d){ sapply(1:lgi[d],function(k){subr2[[d]][[k]][,r2.names[[k]]==d]})}) denom= sapply(resp$day, function(d){sum(times<=d)}) Rss= apply(as.matrix(sapply(1:n,get.Ri,simd)),1,sum)/sum(denom-2) temp1= sapply(1:lg(etimes), function(d){times[times <= etimes[d]]}) ss= sapply(temp1,function(d){crossprod(d-mean(d)) }) sigs= as.matrix(Rss)%*%t(1/sapply(temp1,lg)+(etimes-sapply(temp1,mean))^2/ss) uis= lapply(1:n, get.ui,lgi, y1i, y3i, nrisk, temp, sigs) ui= sapply(uis,function(d){d[[1]]}) vi= sapply(uis,function(d){d[[2]]}) #vars= sqrt(-get.der2(y1i,y3i,etimes,r2,rpat,nrisk)) #t2= apply(ui,1,sum)/vars t2= apply(ui,1,sum)/sqrt(apply(vi,1,sum)) return(t2) } #################################################### get.lines= function(ai,err){ sig= round(.10*ng) tmat= cbind(rep(1,ntp), times) simdata= NULL for (g in 1:sig){ error= NULL for(i in 1:n) {error= rbind(error,rnorm(ntp,0,sd=err))} simdata= rbind(simdata, c(t(ai %*% t(tmat) + error)))} for(g in 1:(ng-sig)){ error= NULL ai2= mvrnorm(n, mu=c(0,0), Sigma= varre) for (i in 1:n){error= rbind(error,rnorm(ntp,0,sd= err))} simdata= rbind(simdata, c(t(ai2 %*% t(tmat) + error)))} simdata= as.data.frame(simdata) row.names(simdata)= as.character(1:ng) names(simdata)= as.character(glims[,3]) return(simdata) }