- 911WTCでも使われたソフトウェア→こちら
- kinshipパッケージのPDF解説文書→こちら
- forensicパッケージのPDF解説文書→こちら
- paramlinkパッケージのPDF解説文書→こちら
MakeFamilyGraph<-function(d){
}
library(kinship)
par(mfcol=c(1,2))
test1 <- data.frame(id =c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14), mom =c(0, 0, 0, 0, 2, 2, 4, 4, 6, 2, 0, 0, 12, 13), dad =c(0, 0, 0, 0, 1, 1, 3, 3, 3, 7, 0, 0, 11, 10),sex =c(0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1))
affected<-sample(c(0,1),length(test1$sex),replace=TRUE)
ptemp<-pedigree(id=test1$id,dadid=test1$dad,momid=test1$mom,sex=test1$sex,affected=affected)
plot(ptemp)
test1<-as.matrix(test1)
maxid<-max(test1[,1])
fakem<-maxid+1
fakef<-fakem+1
test2<-matrix(0,length(test1[,1])+2,length(test1[1,]))
test2[,1]<-c(test1[,1],fakem,fakef)
test2[,2]<-c(test1[,2],0,0)
test2[which(test1[,2]==0),2]<-fakem
test2[,3]<-c(test1[,3],0,0)
test2[which(test1[,3]==0),3]<-fakef
test2[,4]<-c(test1[,4],1,0)
affected<-c(affected,0,0)
ptemp2<-pedigree(id=test2[,1],dadid=test2[,3],momid=test2[,2],sex=test2[,4],affected=affected)
plot(ptemp2)
par(mfcol=c(1,1))
NL<-15
Alleles<-NULL
Probs<-NULL
library(MCMCpack)
for(i in 1:NL){
na<-sample(3:8,1)
st<-sample(10:20,1)
tmp<-NULL
for(j in 1:na){
a<-paste("",st+j,sep="")
tmp<-c(tmp,a)
}
Alleles[[i]]<-tmp
Probs[[i]]<-c(rdirichlet(1,rep(1,na)))
print(sum(Probs[[i]]))
}
Alleles
Probs
RandomGenotypeFamily<-function(d,n,Alleles,Probs){
ns<-length(d[,1])
g<-array(0,c(ns,2,n))
noparents<-which(d[,2]==0,d[,3]==0)
for(i in noparents){
for(j in 1:n){
g[i,,j]<-sample(Alleles[[j]],2,replace=TRUE,Probs[[j]])
}
}
given<-rep(0,ns)
given[noparents]<-1
loop<-TRUE
if(sum(given)==ns)loop<-FALSE
while(loop){
yet<-which(given==0)
for(i in yet){
prt1<-d[i,2]
prt2<-d[i,3]
if(given[prt1]+given[prt2]==2){
for(j in 1:n){
g[i,1,j]<-sample(g[prt1,,j],1)
g[i,2,j]<-sample(g[prt2,,j],1)
}
given[i]<-1
}
}
if(sum(given)==ns)loop<-FALSE
}
g
}
G<-RandomGenotypeFamily(test1,NL,Alleles,Probs)
> G
, , 1
[,1] [,2]
[1,] "13" "15"
[2,] "15" "15"
[3,] "13" "15"
[4,] "15" "15"
[5,] "15" "13"
[6,] "15" "15"
[7,] "15" "15"
[8,] "15" "13"
[9,] "15" "15"
[10,] "15" "15"
[11,] "15" "15"
[12,] "15" "15"
[13,] "15" "15"
[14,] "15" "15"
, , 2
[,1] [,2]
[1,] "17" "17"
[2,] "16" "17"
[3,] "17" "16"
[4,] "18" "16"
[5,] "17" "17"
[6,] "16" "17"
...
- ランダムにHWE集団のジェノタイプのサンプルプールを作成しよう
RandomGenotype<-function(N,n,Alleles,Probs){
ns<-N
g<-array(0,c(ns,2,n))
noparents<-1:ns
for(i in noparents){
for(j in 1:n){
g[i,,j]<-sample(Alleles[[j]],2,replace=TRUE,Probs[[j]])
}
}
g
}
Np<-20
Gpool<-RandomGenotype(Np,NL,Alleles,Probs)
MakeSearchPattern<-function(v){
library(gtools)
nt<-length(v)
x<-0
X<-NULL
for(i in 1:nt){
tmp<-combinations(nt,i)
tmp2<-tmp
tmp2[1:length(tmp2)]<-c(v[tmp])
X[[i]]<-tmp2
x<-sum(x,length(X[[i]][,1]))
}
list(X=X,x=x)
}
tobeSearched<-which(Type==2)
SearchPattern<-MakeSearchPattern(tobeSearched)
> SearchPattern
$X
$X[[1]]
[,1]
[1,] 3
[2,] 7
$X[[2]]
[,1] [,2]
[1,] 3 7
$x
[1] 3
- 探索パターン
- 複数の探索個人をプールしたサンプルに対応付けようとするとき、
- 探索個人はサンプルプールに複数の候補があるとする
- 複数の探索個人が、同一のサンプルを候補とする場合もあるとする
- そのときに、対応付けにあっては、探索個人は異なるサンプルに対応付ける必要があるので、
- 探索パターンは、複数の探索個人の候補に関する積パターンのうち、同一サンプルを候補に挙げている場合を除いたものとなる
- それを作る関数が以下
targets<-c(2,6)
candidates<-NULL
for(i in 1:length(targets)){
candidates[[i]]<-sample(1:Npool,3)
}
candidates[[1]][1]<-candidates[[2]][1]
MakeTestPattern<-function(candidates){
gr<-expand.grid(candidates)
n<-length(gr[,1])
ok<-rep(1,n)
for(i in 1:n){
tmp<-unlist(gr[i,])
tmp2<-outer(tmp,tmp,"-")
diag(tmp2)<-1
if(prod(tmp2)==0){
ok[i]<-0
}
}
gr[which(ok==1),]
}
MakeTestPattern(candidates)
- 実際には、探索したい個人のリストがあり、それぞれの個人をプールのどれかに対応付けたいのでMakeSearchPattern()とMakeTestPattern()を組み合わせて探索することになる
for(i in 1:length(SearchPattern$X)){
for(j in 1:length(SearchPattern$X[[i]][,1])){
numtested<-length(SearchPattern$X[[i]][j,])
tmpcandidates<-NULL
ToTestCurrent<-SearchPattern$X[[i]][j,]
for(k in 1:numtested){
selectedToBeTested<-which(tobeSearched==SearchPattern$X[[i]][j,k])
tmpcandidates[[k]]<-candidates[[selectedToBeTested]]
}
tmpTestPattern<-MakeTestPattern(tmpcandidates)
print("to be tested IDs in pedigree")
print(ToTestCurrent)
print("to be tested sampleIDs in pool")
print(tmpTestPattern)
}
}
[1] "to be tested IDs in pedigree"
[1] 3
[1] "to be tested sampleIDs in pool"
[,1]
[1,] 14
[2,] 27
[3,] 26
[1] "to be tested IDs in pedigree"
[1] 7
[1] "to be tested sampleIDs in pool"
[,1]
[1,] 14
[2,] 34
[3,] 30
[1] "to be tested IDs in pedigree"
[1] 3 7
[1] "to be tested sampleIDs in pool"
Var1 Var2
2 27 14
3 26 14
4 14 34
5 27 34
6 26 34
7 14 30
8 27 30
9 26 30
library(kinship)
p<-matrix(
c(1:8,
0,0,1,0,3,3,3,3,
0,0,2,0,4,4,4,4,
1,0,1,0,1,1,0,0,
1,1,2,1,1,1,2,1),
ncol=5)
MakePedigreeFromFamilyInfo<-function(p){
ns<-length(p[,1])
affected<-status<-rep(1,ns)
affected[which(p[,5]==3)]<-0
status[which(p[,5]==1)]<-0
ptemp<-pedigree(id=p[,1],dadid=p[,3],momid=p[,2],sex=p[,4],affected=affected,status=status)
if(sum(ptemp$affected)==0)ptemp$affected<-affected
ptemp
}
ptemp<-MakePedigreeFromFamilyInfo(p)
plot(ptemp)
NL<-15
Alleles<-NULL
Probs<-NULL
library(MCMCpack)
for(i in 1:NL){
na<-sample(3:8,1)
st<-sample(10:20,1)
tmp<-NULL
for(j in 1:na){
a<-paste("",st+j,sep="")
tmp<-c(tmp,a)
}
Alleles[[i]]<-tmp
Probs[[i]]<-c(rdirichlet(1,rep(1,na)))
}
Alleles
Probs
RandomGenotype<-function(N,n,Alleles,Probs){
ns<-N
g<-array(0,c(ns,2,n))
noparents<-1:ns
for(i in noparents){
for(j in 1:n){
g[i,,j]<-sample(Alleles[[j]],2,replace=TRUE,Probs[[j]])
}
}
g
}
Np<-3
Gpool<-RandomGenotype(Np,NL,Alleles,Probs)
RandomGenotypeFamily<-function(d,n,Alleles,Probs){
ns<-length(d[,1])
g<-array(0,c(ns,2,n))
noparents<-which(d[,2]==0,d[,3]==0)
for(i in noparents){
for(j in 1:n){
g[i,,j]<-sample(Alleles[[j]],2,replace=TRUE,Probs[[j]])
}
}
given<-rep(0,ns)
given[noparents]<-1
loop<-TRUE
if(sum(given)==ns)loop<-FALSE
while(loop){
yet<-which(given==0)
for(i in yet){
prt1<-d[i,2]
prt2<-d[i,3]
if(given[prt1]+given[prt2]==2){
for(j in 1:n){
g[i,1,j]<-sample(g[prt1,,j],1)
g[i,2,j]<-sample(g[prt2,,j],1)
}
given[i]<-1
}
}
if(sum(given)==ns)loop<-FALSE
}
g
}
G<-RandomGenotypeFamily(p,NL,Alleles,Probs)
Gpool2<-array(0,c(length(Gpool[,1,1])+length(G[,1,1]),2,NL))
Gpool2[1:length(G[,1,1]),,]<-G
Gpool2[(length(G[,1,1])+1):length(Gpool2[,1,1]),,]<-Gpool
Gpool2
Type<-p[,5]
Npool<-length(Gpool2[,1,1])
G2<-array(0,c(length(p[,1]),2,NL))
G2[which(Type==1),,]<-G[which(Type==1),,]
MakeSearchPattern<-function(v){
library(gtools)
nt<-length(v)
x<-0
X<-NULL
for(i in 1:nt){
tmp<-combinations(nt,i)
tmp2<-tmp
tmp2[1:length(tmp2)]<-c(v[tmp])
X[[i]]<-tmp2
x<-sum(x,length(as.matrix(X[[i]])[,1]))
}
list(X=X,x=x)
}
candidates<-NULL
tobeSearched<-which(p[,5]==2)
for(i in 1:length(tobeSearched)){
candidates[[i]]<-1:Npool
}
MakeTestPattern<-function(candidates){
gr<-expand.grid(candidates)
n<-length(gr[,1])
ok<-rep(1,n)
for(i in 1:n){
tmp<-unlist(gr[i,])
tmp2<-outer(tmp,tmp,"-")
diag(tmp2)<-1
if(prod(tmp2)==0){
ok[i]<-0
}
}
as.matrix(gr[which(ok==1),])
}
OffspringGenotypeProb<-function(p1,p2){
V<-p1%*%t(p2)
U<-V+t(V)
U[lower.tri(U)]<-0
diag(U)<-diag(U)/2
U
}
LogLikelihoodTrio<-function(p1,p2,c){
V<-p1%*%t(p2)
U<-V+t(V)
U[lower.tri(U)]<-0
diag(U)<-diag(U)/2
}
CalcLogLikelihoodFamilyLocus<-function(p,g,A,P){
unknown<-"0"
ns<-length(p[,1])
na<-length(A)
gvector<-array(0,c(ns,na,2))
given<-rep(0,ns)
for(i in 1:ns){
tmp<-0
if(g[i,1]!=unknown){
gvector[i,which(A==g[i,1]),1]<-1
tmp<-tmp+1
}
if(g[i,2]!=unknown){
gvector[i,which(A==g[i,2]),2]<-1
tmp<-tmp+1
}
if(tmp==2)given[i]<-1
}
loop<-TRUE
if(sum(given)==ns)loop<-FALSE
while(loop){
yet<-which(given==0)
for(i in yet){
if(p[i,2]+p[i,3]==0){
gvector[i,,1]<-gvector[i,,2]<-P
given[i]<-1
}else{
tmp1<-p[i,2]
tmp2<-p[i,3]
if(given[tmp1]+given[tmp2]==2){
gvector[i,,1]<-(gvector[tmp1,,1]+gvector[tmp1,,2])/2
gvector[i,,2]<-(gvector[tmp2,,1]+gvector[tmp2,,2])/2
given[i]<-1
}
}
}
if(sum(given)==ns)loop<-FALSE
}
h<-which(p[,5]==2)
for(i in h){
if(g[i,1]==unknown){
gvector[i,,1]<-gvector[i,,2]<-P
}
}
ret<-rep(0,ns)
for(i in 1:ns){
tmp1<-p[i,2]
tmp2<-p[i,3]
if(tmp1*tmp2>0){
P1<-(gvector[tmp1,,1]+gvector[tmp1,,2])/2
P2<-(gvector[tmp2,,1]+gvector[tmp2,,2])/2
FromParents<-OffspringGenotypeProb(P1,P2)
Offspring<-OffspringGenotypeProb(gvector[i,,1],gvector[i,,2])
ret[i]<-log(sum(FromParents*Offspring))
}
}
list(loglikelist=ret,loglikesum=sum(ret))
}
CalcLogLikelihoodFamily<-function(p,g,Alleles,Probs){
nl<-length(g[1,1,])
ret<-rep(0,nl)
for(i in 1:length(g[1,1,])){
tmp<-CalcLogLikelihoodFamilyLocus(p,g[,,i],Alleles[[i]],Probs[[i]])
ret[i]<-tmp$loglikesum
}
list(loglikelist=ret,loglikesum=sum(ret))
}
CalcLogLikelihoodFamily(p,G2,Alleles,Probs)
TestAllPatterns<-function(p,G,Gpool,candidates,Alleles,Probs){
ptemp<-MakePedigreeFromFamilyInfo(p)
tobeSearched<-which(p[,5]==2)
SearchPattern<-MakeSearchPattern(tobeSearched)
ret<-NULL
ret2<-NULL
ret3<-NULL
counter<-1
for(i in 1:length(SearchPattern$X)){
spx<-as.matrix(SearchPattern$X[[i]])
for(j in 1:length(spx[,1])){
ToTestCurrent<-spx[j,]
tmpG2<-G
tobeEvaluated<-rep(0,length(p[,1]))
tobeEvaluated[which(p[,5]==1)]<-1
tobeEvaluated[which(p[,5]==3)]<-1
tobeEvaluated[ToTestCurrent]<-1
tmpp<-p[which(tobeEvaluated==1),]
tmptmpG2<-tmpG2[which(tobeEvaluated==1),,]
print("---")
print(tobeEvaluated)
print(tmpp)
print(tmptmpG2)
ret[[counter]]<-CalcLogLikelihoodFamily(tmpp,tmptmpG2,Alleles,Probs)
ret2[[counter]]<-list(ToTestCurrent)
ret3[[counter]]<-list()
counter<-counter+1
numtested<-length(spx[j,])
tmpcandidates<-NULL
for(k in 1:numtested){
selectedToBeTested<-which(tobeSearched==spx[j,k])
tmpcandidates[[k]]<-candidates[[selectedToBeTested]]
}
tmpTestPattern<-MakeTestPattern(tmpcandidates)
for(k in 1:length(tmpTestPattern[,1])){
tmpG2<-G
tmpG2[ToTestCurrent,,]<-Gpool[tmpTestPattern[k,],,]
tobeEvaluated<-rep(0,length(p[,1]))
tobeEvaluated[which(p[,5]==1)]<-1
tobeEvaluated[which(p[,5]==3)]<-1
tobeEvaluated[ToTestCurrent]<-1
tmpp<-p[which(tobeEvaluated==1),]
tmptmpG2<-tmpG2[which(tobeEvaluated==1),,]
print("---")
print(tobeEvaluated)
print(tmpp)
print(tmptmpG2)
ret[[counter]]<-CalcLogLikelihoodFamily(tmpp,tmptmpG2,Alleles,Probs)
ret2[[counter]]<-list(ToTestCurrent)
ret3[[counter]]<-list(tmpTestPattern[k,])
counter<-counter+1
}
}
}
list(LogLike=ret,SearchedID=ret2,CandidateID=ret3,familyinfo=p,pedigree=ptemp,tobeSearched=tobeSearched,candidates=candidates,offeredGenotype=G,poolGenotype=Gpool,Alleles=Alleles,Probs=Probs)
}
pwithType<-cbind(p,Type)
pwithType
tap<-TestAllPatterns(p=p,G=G2,Gpool=Gpool2,candidates=candidates,Alleles=Alleles,Probs=Probs)
printTAPout<-function(tap){
n<-length(tap$LogLike)
for(i in 1:n){
tmp<-paste("LogLike",tap$LogLike[[i]][2],"SearchedID",c(tap$SearchedID[[i]]),"CandidateID",c(tap$CandidateID[[i]]))
print(tmp)
}
}
printTAPout(tap)
G[,,1]
Gpool2[,,1]