p値の和 - ryamadaの遺伝学・遺伝統計学メモ

n個の独立なp値(0-1)があるときにその和は0からnに分布する
その確率密度関数・累積密度関数を作る
こちらが説明
# n個の独立な0-1均一乱数の和の確率密度関数

pdfSum<-function(n){
	library(polynom)
	# n個の変数があると、n個の関数が必要
	# それを格納するリスト
	Ps<-list()
	Ps[[1]]<-list()
	# n=1の場合は指定可能
	Ps[[1]][[1]]<-as.polynomial(c(1))
	# あとは、漸化式
	for(i in 2:n){
		Ps[[i]]<-list()
		Ps[[i]][[1]]<-integral(Ps[[i-1]][[1]])
		tmpcoef<-coef(Ps[[i]][[1]])
		# 0<=T<=1と n-1<=T<=nの部分は、２つの関数にまたがらないので特別
		# この両端の対称性を使う
		Ps[[i]][[i]]<-as.polynomial(tmpcoef*((-1)^(0:(length(tmpcoef)-1))))
		# 原点移動関数を使う
		Ps[[i]][[i]]<-change.origin(Ps[[i]][[i]],-i)
		# 両端以外の関数を作る
		if(i > 2){
			for(j in 2:(i-1)){
				tmp1<-integral(Ps[[i-1]][[j-1]])
				tmp1.2<-change.origin(tmp1,-1)
				tmp2<-integral(Ps[[i-1]][[j]])
				# ２つの定積分のうちTに依存する部分を出して足し合わせる
				coef1<-coef(tmp1)
				coef1.2<--coef(tmp1.2)
				#print(coef1.2)
				coef2<-coef(tmp2)
				#print(coef2)
				coef<-coef1.2+coef2
				# ２つの定積分の定数部分
				C1<-sum((j-1)^(0:(length(coef1)-1))*coef1)
				C2<-sum((j-1)^(0:(length(coef2)-1))*coef2)
				coef[1]<-coef[1]-C2+C1
				Ps[[i]][[j]]<-as.polynomial(coef)
			}
		}
	}
	Ps

}
n<-10
Ps<-pdfSum(n)
# 積分して１になることの確かめ
for(i in 1:n){
	v<-0
	for(j in 1:i){
		tmp<-integral(Ps[[i]][[j]])
		tmpcoef<-coef(tmp)
		v<-v-sum(tmpcoef*(j-1)^(0:(length(tmpcoef)-1)))+sum(tmpcoef*j^(0:(length(tmpcoef)-1)))
		#print(v)
	}
	print(v)
}
# 累積分布関数
cdfSum<-function(n,Ps=NULL,x,lower.tail=TRUE){
	if(is.null(Ps)){
		Ps<-pdfSum(n)
	}
	ret<-0
	upto<-ceiling(x)
	if(upto>0){
		
		uptos<-1:upto
		fromheres<-uptos-1
		uptos[length(uptos)]<-x
		for(i in 1:length(uptos)){
			tmp<-integral(Ps[[n]][[i]])
			tmpcoef<-coef(tmp)
			ret<-ret-sum(tmpcoef*(fromheres[i])^(0:(length(tmpcoef)-1)))+sum(tmpcoef*uptos[i]^(0:(length(tmpcoef)-1)))
		}
	}
	ret2<-ret
	if(!lower.tail)ret2<-1-ret
	ret2
}

n<-4
testns<-seq(from=0,to=n,by=0.01)

cdf.out<-rep(0,length(testns))
for(i in 1:length(cdf.out)){
	cdf.out[i]<-cdfSum(n,x=testns[i],lower.tail=TRUE)
}
plot(testns,cdf.out)