- 昨日の話題は説明変数が2値、従属変数も2値
- 従属変数を3値に拡張
- また、計算を少し速く
- 図は従属変数が3値。灰色は仮説空間。黒い点は複数の仮説が指し示す「最尤モデル」でその強さを点の大きさにしたもの。赤い点は全体総合の「平均」
my.marginal.apply <- function(A,v,fun="sum"){
if(length(v)==0){
return(fun(A))
}else{
return(apply(A,v,fun))
}
}
my.multi.cat <- function(D,P){
n <- length(D[1,])
tmp <- apply(t(D)*2^(0:(n-1)),2,sum)+1
tmp.1 <- tmp[which(P==0)]
tmp.2 <- tmp[which(P==1)]
tab.1 <- tabulate(tmp.1,2^n)
tab.2 <- tabulate(tmp.2,2^n)
return(list(A1=tab.1,A2=tab.2))
}
my.p.seq <- function(p.len){
tmp <- seq(from=0,to=1,length=p.len)
tmp <- tmp[-1]
tmp <- tmp[-length(tmp)]
return(tmp)
}
my.d.beta <- function(D,P,phenotype,p.len=100,pre.p=rep(1/(2^length(D[1,])),2^length(D[1,])),model="all"){
n <- length(D[1,])
if(model=="full"){
X <- rbind(rep(0,n),rep(1,n))
}else if(model=="all"){
X <- expand.grid(rep(list(0:1),n))
}else if(model=="alternative"){
X <- matrix(rep(1,n),nrow=1)
}
As <- my.multi.cat(D,P)
A1 <- array(As[[1]],rep(2,n))
A2 <- array(As[[2]],rep(2,n))
p <- my.p.seq(p.len)
K <- rep(0,length(X[,1]))
prs.all <- list()
for(i in 1:length(phenotype[,1])){
prs.all[[i]] <- matrix(0,length(X[,1]),length(p))
}
for(i in 1:length(X[,1])){
s <- which(X[i,]==1)
tmp1 <- my.marginal.apply(A1,s,sum)
tmp2 <- my.marginal.apply(A2,s,sum)
marg <- tmp1+tmp2
K[i] <- sum(lgamma(marg+2)-lgamma(tmp1+1)-lgamma(tmp2+1))
locs <- rep(1,length(prs.all))
for(j in 1:length(locs)){
if(length(s)>0){
locs[j] <- sum(phenotype[j,s]*2^(0:(length(s)-1)))+1
}
prs.all[[j]][i,] <- dbeta(p,tmp1[locs[j]]+1,tmp2[locs[j]]+1)
}
}
K.2 <- -K+K[length(K)]
pre.p <- rep(1/length(X[,1]),length(X[,1]))
post.p <- pre.p * exp(K.2)
post.p <- post.p/sum(post.p)
PR.all <- matrix(0,length(prs.all),length(p))
for(j in 1:length(phenotype[,1])){
PR <- rep(0,length(p))
for(i in 1:length(pre.p)){
PR <- PR + post.p[i]*prs.all[[j]][i,]
}
PR.all[j,] <- PR
}
return(list(As=As,phenotype=phenotype,p=p,pre.p=pre.p,post.p=post.p,prs=prs.all,LL=K.2,est.pr=PR.all))
}
my.d.beta.models <- function(D,P,phenotype,p.len=100,pre.p=rep(1/(2^length(D[1,])),2^length(D[1,])),models=c("all","full","alt")){
ret <- list()
for(i in 1:length(models)){
ret[[i]] <- my.d.beta(D,P,phenotype,p.len=p.len,pre.p=pre.p,model=models[i])
}
return(list(models=models,result = ret))
}
my.marginal.apply <- function(A,v,fun="sum"){
if(length(v)==0){
return(fun(A))
}else{
return(apply(A,v,fun))
}
}
my.full.model <- function(n){
expand.grid(rep(list(0:1),n))
}
my.convert.cat <- function(D){
n <- length(D[1,])
apply(t(D)*2^(0:(n-1)),2,sum)+1
}
my.tabulate.cat <- function(R,N){
tabulate(R,N)
}
my.count.N <- function(A,n){
X <- my.full.model(n)
N <- 2^n
ret <- list()
for(i in 1:N){
s <- which(X[i,]==1)
ret[[i]] <- my.marginal.apply(A,s,sum)
}
ret
}
my.vars.id <- function(vars){
sum(vars*2^(0:(length(vars)-1)))+1
}
my.locator.N <- function(phenotype,vars,var.id){
s <- which(vars==1)
loc <- 1
if(length(s)>0){
loc <- sum(phenotype[s]*2^(0:(length(s)-1)))+1
}
return(c(var.id,loc))
}
my.locator.phenotype.2 <- function(phenotype,vars){
vars.p <- t(vars)*phenotype
cum.vars <- t(apply(vars,1,cumsum))-1
if(length(vars.p[,1])==1){
tmp <- matrix((vars.p) * (2^cum.vars),ncol=1)
}else{
tmp <- t(vars.p) * (2^cum.vars)
}
apply(tmp,1,sum)+1
}
my.loc.phenotype <- function(phenotype,X=my.full.model(length(phenotype))){
ret <- matrix(0,length(X[,1]),2)
for(i in 1:length(X[,1])){
ret[i, ]<- my.locator.N(phenotype,X[i,],i)
}
ret
}
my.LL <- function(B.level,N){
n.level <- length(B.level)
K <- rep(1,N)
for(i in 1:N){
tmp <- matrix(0,n.level,length(B.level[[1]][i][[1]]))
for(j in 1:n.level){
tmp[j,] <- B.level[[j]][i][[1]]
}
marg <- apply(tmp,2,sum)
K[i] <- sum(sum(lgamma(marg+n.level-1+1))-sum(lgamma(c(tmp)+1)))
}
-K+K[length(K)]
}
my.level.hx <- function(B.level,Loc){
n.level <- length(B.level)
N <- length(Loc)
N.level <- matrix(0,n.level,N)
for(i in 1:n.level){
for(j in 1:N){
N.level[i,j] <- B.level[[i]][j][[1]][Loc[j]]
}
}
N.level
}
my.composit.beta <- function(N.level,K.pr,p){
ret <- rep(0,length(p))
for(i in 1:length(N.level[1,])){
tmp <- dbeta(p,N.level[1,i]+1,N.level[2,i]+1)
ret <- ret + K.pr[i]*tmp
}
ret
}
library(MCMCpack)
my.composit.dirichlet <- function(N.level,K.pr,n.pt=1000){
n.level <- length(N.level[,1])
crds <- rdirichlet(n.pt,rep(1,n.level))
ret <- rep(0,n.pt)
for(i in 1:n.pt){
for(j in 1:length(N.level[1,])){
tmp <- ddirichlet(crds[i,],N.level[,j]+1)
ret[i] <- ret[i] + K.pr[j]*tmp
}
}
return(list(pr=ret,crds=crds))
}
my.composit.dirichlet.exp <- function(N.level,K.pr){
n.level <- length(N.level[,1])
ret.mean <- rep(0,n.level)
ret.mode.list <- matrix(0,length(N.level[1,]),n.level)
ret.mode.dens <- rep(0,length(N.level[1,]))
for(j in 1:length(N.level[1,])){
tmp.mean <- (N.level[,j]+1)/(sum(N.level[,j]+1))
tmp.mode <- (N.level[,j])/(sum(N.level[,j]))
ret.mean <- ret.mean + K.pr[j]*tmp.mean
ret.mode.list[j,] <- tmp.mode
ret.mode.dens[j] <- K.pr[j]*ddirichlet(tmp.mode,N.level[,j]+1)
}
return(list(mean=ret.mean,modes=ret.mode.list,mode.dens=ret.mode.dens))
}
my.CD <- function(R,P,phenotype,n.level,dir.est=FALSE,n.pt=1000){
if(n.level==2){
dir.mc=FALSE
}else{
dir.mc=TRUE
}
n <- length(phenotype)
N <- 2^n
P.levels <- 0:(n.level-1)
R.level <- list()
for(i in 1:n.level){
R.level[[i]] <- my.tabulate.cat(R[which(P==P.levels[i])],N)
}
A.level <- list()
for(i in 1:n.level){
A.level[[i]] <- array(R.level[[i]],rep(2,n))
}
B.level <- list()
for(i in 1:n.level){
B.level[[i]] <- my.count.N(A.level[[i]],n)
}
K <- my.LL(B.level,N)
K.pr <- exp(K)
K.pr <- K.pr/sum(K.pr)
X <- my.full.model(n)
Loc.2 <- my.locator.phenotype.2(phenotype,X)
N.level <- my.level.hx(B.level,Loc.2)
if(dir.est){
return(my.composit.dirichlet.exp(N.level,K.pr))
}else{
if(dir.mc){
Est.D.dirichlet <- my.composit.dirichlet(N.level,K.pr,n.pt=n.pt)
Est.D <- Est.D.dirichlet$pr
p <- Est.D.dirichlet$crds
}else{
p <- seq(from=0,to=1,length=100)
Est.D <- my.composit.beta(N.level,K.pr,p)
p <- cbind(p,1-p)
}
return(list(R.level=R.level,A.level=A.level,B.level=B.level,K.pr=K.pr,Loc=Loc.2,N.level=N.level,p=p,Est.D=Est.D))
}
}
n <- 2
n.level <- 2
N <- 2^n
phenotype <- rep(0,n)
n.samples <- c(10,20)
R <- rep(0,sum(n.samples))
R <- sample(1:N,length(R),replace=TRUE)
P <- c(rep(0,n.samples[1]),rep(1,n.samples[2]))
my.cd.out <- my.CD(R,P,phenotype,n.level)
plot(my.cd.out$p[,1],my.cd.out$Est.D,type="l")
n <- 2
n.level <- 3
N <- 2^n
phenotype <- rep(0,n)
n.samples <- c(10,20,10)
R <- rep(0,sum(n.samples))
R <- sample(1:N,length(R),replace=TRUE)
P <- c(rep(0,n.samples[1]),rep(1,n.samples[2]),rep(2,n.samples[3]))
my.cd.out.2 <- my.CD(R,P,phenotype,n.level,dir.est=TRUE)
my.cd.out.3 <- my.CD(R,P,phenotype,n.level,dir.est=FALSE,n.pt=3000)
M <- matrix(c(1,-1/2,-1/2,0,sqrt(3)/2,-sqrt(3)/2),byrow=TRUE,ncol=3)
crds.3 <- my.cd.out.3$p %*% t(M)
prr <- my.cd.out.3$Est.D
logprr<-log(prr)
plot(crds.3,col=gray((max(prr)-prr)/(max(prr)-min(prr))*0.9),pch=20)
plot(crds.3,col=gray(0.95),pch=20)
MeAn <- M %*% my.cd.out.2[[1]]
Modes <- my.cd.out.2[[2]] %*% t(M)
points(Modes,pch=20,cex=my.cd.out.2[[3]]/max(my.cd.out.2[[3]])*3)
points(MeAn[1],MeAn[2],pch=20,cex=2,col=2)