# Change Lane 1: definition of parameters **************************************************** bp0=2 ;minms= 10;sr= 0.0; minf= 0.035; nbin= 15; minht=200 namid=c("fname","lane","dye","marker") nambi =paste(c("bi"),1:nbin,sep=""); nambp =paste(c("sz"),1:nbin,sep="") namht =paste(c("ht"),1:nbin,sep="") namms =paste(c("ms"),1:nbin,sep="") namnl=c("nl") names=c(namid,nambi,nambp,namht,namms,namnl) # Change Lane 2: name of the input file ******************************************************** #f0=read.table(file="phi031.dat", header=F, skip=1, col.names=names,sep="\t") #f0=read.table(file="phi056.dat", header=F, skip=1, col.names=names,sep="\t") #f0=read.table(file="phi062.dat", header=F, skip=1, col.names=names,sep="\t") #f0=read.table(file="phi084.dat", header=F, skip=1, col.names=names,sep="\t") f0=read.table(file="phi115.dat", header=F, skip=1, col.names=names,sep="\t") f1=f0[order(f0$nl, decreasing=T),] pista=c(1:nrow(f1)) # maximum number of pistes maxpis=max(as.numeric(pista)) id=cbind(pista,as.matrix(f1[,1:4])) bi=cbind(pista,as.matrix(f1[,5:(4+nbin)])) bp=cbind(pista,as.matrix(f1[,(4+nbin+1):(4+2*nbin)])) ht=cbind(pista,as.matrix(f1[,(4+2*nbin+1):(4+3*nbin)])) ms=cbind(pista,as.matrix(f1[,(4+3*nbin+1):(4+4*nbin)])) nl=cbind(pista,as.matrix(f1[,(4+4*nbin+1)])) colnames(nl)=c("pista","nl") pist=as.numeric(as.matrix(gl(max(bi[,1]),nbin))) bit=t(bi[,nambi]) bpt=t(bp[,nambp]) htt=t(ht[,namht]) mst=t(ms[,namms]) col=c(1:ncol(bit)) bin=as.matrix(bit[,col[1]]) for(k in 2:ncol(bit)) {bin=rbind(bin,as.matrix(bit[,col[k]]))} bpn=as.matrix(bpt[,col[1]]) for(k in 2:ncol(bpt)) {bpn=rbind(bpn,as.matrix(bpt[,col[k]]))} htn=as.matrix(htt[,col[1]]) for(k in 2:ncol(htt)) {htn=rbind(htn,as.matrix(htt[,col[k]]))} msn=as.matrix(mst[,col[1]]) for(k in 2:ncol(mst)) {msn=rbind(msn,as.matrix(mst[,col[k]]))} new1=cbind(pist,bin,bpn,htn,msn) names1=c("pista","bin","bp","ht","ms") colnames(new1)=names1 #graph is a data.frame, new1 is a matrix graph=merge(id,as.data.frame(new1)) o1graph=graph[order(graph$bin),] # unique values to alleles a=split(o1graph[6:7],o1graph$bin) a1=round(t(sapply(a,mean))) colnames(a1)=c("bin","bpr") a2=as.matrix(sapply(a,nrow)) a3=cbind(a1,a2); colnames(a3)=c("bin","bpr","nobs") a4=merge(o1graph,a3) a5=a4[order(a4$pista,a4$bpr, decreasing=T),] stat1=a5[order(a5$pista),] rm(list=paste(c("a"),1:5,sep="")) #bp=0 and bpr=0 transformed to NA data stat1$bp[stat1$bp==0]=NA; stat1$bpr[stat1$bpr==0]=NA #total on all observations tot=bp0*round(((max(stat1$bpr, na.rm=T)-min(stat1$bpr, na.rm=T))/bp0)+1) a1=min(stat1$bpr, na.rm=T):(min(stat1$bpr, na.rm=T)+tot-1) a2=cbind(rep(c(1:bp0),(tot/bp0)),a1); colnames(a2)=c("step","bpr") a3=a2[order(a2[,2], decreasing=T),] a30=gl(max(as.numeric(stat1$pista)),tot) a31=rep(a3[,1],max(as.numeric(stat1$pista))) a32=rep(a3[,2],max(as.numeric(stat1$pista))) a4=cbind(a30,a31,a32) colnames(a4)=c("pista","step","bpr") a5=merge(stat1, a4, all=T, sort=F) todo=a5[order(a5$pista),] todo$ht[is.na(todo$ht)==T]=0 todo$ht[todo$ms<=minms & todo$ht <= minht]=0 rm(list=paste(c("a"),1:5,sep="")) rm(list=paste(c("a"),30:32,sep="")) todo$aux=-todo$bpr a1=todo[order(todo$pista,todo$step,todo$aux),] a1$unht=a1$ht a1$cht=0 a1[1,15]=a1[1,14] for(i in 2:nrow(a1)) {a1[i,15]=a1[(i-1),14]} a1$adjht=a1$unht-sr*a1$cht a2=a1[c("pista","bpr","bin","fname","lane","dye", "marker","bp","ht","ms","step","adjht")] a2$adjht[a2$adjht<=0]=0 stut=a2[is.na(a2$marker)==F,] rm(list=paste(c("a"),1:2,sep="")) ## begin the frequencies calculus a1=split(stut[,12],as.numeric(stut[,1])) a2=sapply(a1,sum) a3=cbind(unique(stut$pista),a2); colnames(a3)=c("pista","htot") stut$pista=as.numeric(stut$pista) freq1=merge(stut, a3, all=T, sort=F) rm(list=paste(c("a"),1:3,sep="")) for (nn in 1:nrow(freq1)) { if (freq1$htot[nn] != 0) freq1$freq1[nn]=freq1$adjht[nn]/freq1$htot[nn] else freq1$freq1[nn] = 0 } freq1$freq1[freq1$freq1 <= minf]=0 a1=split(freq1[,14],freq1[,1]) a2=sapply(a1,sum) a3=cbind(unique(stut$pista),a2); colnames(a3)=c("pista","ftot") freq2=merge(freq1, a3, all=T, sort=F) rm(list=paste(c("a"),1:3,sep="")) for (nn in 1:nrow(freq2)) { if (freq2$ftot[nn] != 0) freq2$adjfr[nn]=freq2$freq1[nn]/freq2$ftot[nn] else freq2$adjfr[nn] = 0 } freq3=freq2[c("pista","step","bpr","bin","fname","lane", "marker","adjfr")] freq4=freq3[order(freq3$pista,freq3$bpr, decreasing=T),] freq=freq4[order(freq4$pista),] rm(list=paste(c("freq"),1:4,sep="")) ## frequencies by marker fin1=freq[is.na(freq$bpr)==F,] ## names of the observations entnames=as.character(unique(fin1$fname)) nument=(length(entnames)) fin2=split(fin1[,c(3,4,5,7,8)],as.numeric(fin1$pista)) #col1=c(1:length(fin2)) fin3=fin2[[1]] colnames(fin3)=c("bpr","bin",paste(c("fname"),1,sep=""),"marker",paste(c("adjfr"),1,sep="")) for(k in 2:maxpis) {colnames(fin2[[k]])=c("bpr","bin",paste(c("fname"),k,sep=""),"marker",paste(c("adjfr"),k,sep="")) fin3=merge(fin3,fin2[[k]],by=c("bin", "bpr","marker"),all=T,sort=F) } fin4=fin3[order(fin3$bpr),] #namfin4=c("bin","bpr","marker",paste(c("fname","adjfr"),gl(maxpis,2),sep="")) #colnames(fin4)=namfin4 final1=fin4[,c(3,2,4:ncol(fin4))] rm(list=paste(c("fin"),1:4,sep="")) for(k in seq(4, ncol(final1), by=2)) {final1[,k][is.na(final1[,k])==T]=0} final2=final1[,c(1,seq(2, ncol(final1), by=2))] ## delete rows with sum of frequencies = 0 for(k in 1:nrow(final2)) {final2[k,(nument+3)]=sum(final2[k,3:(nument+2)])} final3=final2[(final2[,nument+3] != 0),] final4=final3[,1:(nument+2)] #colnames(final4)=c("marker","bpr",entnames) sum(final4[,3:ncol(final4)],na.rm=T) last1=rbind(entnames,final4[,3:ncol(final4)]) last2=t(last1) last3=last2[order(last2[,1]),] last4=t(last3) entnames1=as.character(last4[1,]) last5=last4[2:nrow(last4),1:ncol(last4)] last6=matrix(as.numeric(last5),nrow(last5),ncol(last5)) final=cbind(final4[,1:2],last6[,1:ncol(last6)]) colnames(final)=c("marker","bpr",entnames1) #Change Lane 3: Name of the output file ****************************************************** #write.csv(final, file="phi031-out.csv", quote=F) #write.csv(final, file="phi056-out.csv", quote=F) #write.csv(final, file="phi062-out.csv", quote=F) #write.csv(final, file="phi084-out.csv", quote=F) write.csv(final, file="phi115-out.csv", quote=F) rm(list=paste(c("final"),1:4,sep="")) rm(list=paste(c("last"),1:6,sep="")) rm(list=ls())