判定条件の検討 - ryamadaの遺伝学・遺伝統計学メモ

スクリーニングという方法がある
「完璧」はありえない
感度を立てれば特異度を失う、そしてその逆も
量的検査値でこれを考えるときにはrocカーブが使える
スクリーニングのときには、スクリーニング方法・条件をパラメタ化して、それに応じて感度特異度が出ることがあり、その場合、パラメタの大小について、個々のサンプルのIN/OUTが順序対応するとは限らないので、いわゆるrocカーブ関数をそのまま使うのには適さない
そんな話

library(pROC)
N.true.positive <- 500
N.true.negative <- 4500
N.all <- N.true.positive + N.true.negative
truth <- c(rep(1,N.true.positive),rep(0,N.true.negative))

# 各サンプルについて、値が得られて、その大小で判定する場合
V <- rep(0,N.all)
for(i in 1:N.all){
	V[i] <- rpois(1,20+truth[i]*5)
}
par(mfrow=c(2,3))
boxplot(V~truth)

roc.out <- roc(truth,V)
plot(roc.out)

# 各サンプルについて、条件パラメタごとに、スクリーニングIN/OUTが決まる場合
param <- 0:6
Vs <- matrix(0,N.all,length(param))
for(i in 1:N.all){
	for(j in 1:length(param)){
		if(truth[i]==1){
			tmp.prob <- 0.8^(j)*0.8
			Vs[i,j] <- sample(0:1,1,prob=c(1-tmp.prob,tmp.prob))
		}else{
			tmp.prob <- 0.6^(j)*0.4
			Vs[i,j] <- sample(0:1,1,prob=c(1-tmp.prob,tmp.prob))
		}
	}
}

Sens <- Spec <- PPV <- NPV <- rep(0,length(param))
for(i in 1:length(param)){
	Sens[i] <- length(which(Vs[1:N.true.positive,i]==1))/N.true.positive
	Spec[i] <- length(which(Vs[(N.true.positive+1):N.all,i]==0))/N.true.negative
	PPV[i] <- length(which(Vs[1:N.true.positive,i]==1))/length(which(Vs[,i]==1))
	NPV[i] <- length(which(Vs[(N.true.positive+1):N.all,i]==0))/length(which(Vs[,i]==0))
}
plot(1-Spec,Sens,xlim=c(0,1),ylim=c(0,1),type="b")
text(1-Spec+0.05,Sens,paste("",param))

plot(1-NPV,PPV,xlim=c(0,1),ylim=c(0,1),type="b")
text(1-NPV+0.05,PPV,paste("",param))
matplot(cbind(Sens,Spec,PPV,NPV),type="l")
legend(5,0.5,c("Sens","Spec","PPV","NPV"),col=1:4,lty=1:4)