- Rには固定周辺度数を引数に分割表を乱数発生する関数r2dtable()がある(こちら)
- Patefield's algorithmを用いる
- 地道にやるのとPatefield's でやるのとの違いはどれくらい?
- ほとんど違わない
Nr<-2
Nc<-2
M<-matrix(sample(1:100,Nr*Nc,replace=TRUE),Nr,Nc)
M
rSum<-apply(M,1,sum)
cSum<-apply(M,2,sum)
Lab1<-Lab2<-c()
for(i in 1:Nr){
Lab1<-c(Lab1,rep(i,rSum[i]))
}
for(i in 1:Nc){
Lab2<-c(Lab2,rep(i,cSum[i]))
}
ExpTable<-ExpTable<-outer(rSum,cSum,"*")/sum(M)
Niter<-10000
ChiS<-rep(0,Niter)
ChiP<-rep(0,Niter)
ExPS<-ChiS
ExSS<-ChiS
tPs<-r2dtable(Niter,rSum,cSum)
tSs<-tPs
for(i in 1:Niter){
tmp<-cbind(Lab1,sample(Lab2))
tmpM<-matrix(0,Nr,Nc)
for(j in 1:length(tmp[,1])){
tmpM[tmp[j,1],tmp[j,2]]<-tmpM[tmp[j,1],tmp[j,2]]+1
}
tSs[[i]]<-tmpM
ChiS[i]<-sum((tSs[[i]]-ExpTable)^2/ExpTable)
ChiP[i]<-sum((tPs[[i]]-ExpTable)^2/ExpTable)
ExPS[i]<-fisher.test(tPs[[i]])$p
ExSS[i]<-fisher.test(tSs[[i]])$p
}
plot(pchisq(sort(ChiS),(Nc-1)*(Nr-1)),pchisq(sort(ChiP),(Nc-1)*(Nr-1)))
plot(sort(ExPS))
plot(sort(ExSS))
par(mfcol=c(1,2))
plot(log(ppoints(Niter,a=0)),cex=0.1,pch=19,log(sort(ExPS)),xlim=c(-8,0),ylim=c(-8,0))
abline(0,1,col=3)
plot(log(ppoints(Niter,a=0)),cex=0.1,pch=19,log(sort(ExSS)),col=2,xlim=c(-8,0),ylim=c(-8,0))
abline(0,1,col=3)