CNV多型をはじめとする多(3以上)アレルのHWE検定
アレルタイプ数をとする。Diploidではジェノタイプは。アレルをとし、その頻度を、観測本数をすると、ジェノタイプの観測人数がと表せる。
今、HWEにおける期待人数は,(ただしを観測人数、)と表される。
カイ自乗検定をするとすれば、、として表せる。
これを自由度で評価すれば、漸近検定になる。
他方、正確検定はどうなるか。
人、本の染色体。ホモの個体数はヘテロの個体数は。
観測ジェノタイプテーブルの生起確率は
書き換えて
これを、すべてのテーブルについて算出し、大小比較して累積することで正確確率が出る。・・・問題は、可能なテーブルすべてを網羅するための計算量ではあるが。昨日のソースにHWE検定を加えたらこう・・・(遅い)
package CNV; public class CNVcalculator { /* * アレル単位での最大コピー数をK * 最小コピー数をkとしたとき * Na=K-k+1 * Ng=Na*(Na+1)/2 * とすること! */ public int Na; //No. allele public int Ng; //No. genotype public double[][] t;//contingency table public int[][] t2;//CopyNumber別ジェノタイプテーブル public int[][] t3;//allele本数テーブル //public double[] w;//weight vector public double N;//No.samples public double R;//No.cases public double S;//No.controls public double[] n;//number vector int df=1;//自由度 public org.apache.commons.math.distribution.ChiSquaredDistributionImpl chidf1 = new org.apache.commons.math.distribution.ChiSquaredDistributionImpl(df); public org.apache.commons.math.distribution.ChiSquaredDistributionImpl chidfX; public double[][] hweChi; public double[][] hweExact; public double[][] assoc;//[0] trend,[1] MWW,[2] CNpool /** * @param args */ public static void main(String[] args) { // TODO 自動生成されたメソッド・スタブ int cnvN=1; int caseN=500; int contN=500; int numallele = 2; int numgenotype = (int)(numallele*(numallele+1)/2); int x=10000000; int seed=(int)(Math.random()*x); Utils.MersenneTwisterFast mz= new Utils.MersenneTwisterFast(seed); double[] sl = StatUtilsX.Fisher.serialLogfact((caseN+contN)*2); for(int i=0;i<cnvN;i++){ int[][] table=new int[2][numgenotype]; double[] weight = new double[numgenotype]; for(int j=0;j<weight.length;j++){ weight[j]=j; } double[] af=new double[numallele]; for(int j=0;j<af.length;j++){ af[j]=mz.nextDouble(); } StatUtilsX.MiscUtil.standard(af); //af=StatUtilsX.MiscUtil.accumstandard(af); /* for(int j=0;j<af.length;j++){ System.out.println(af[j]); } */ //System.out.println("##"); double[] gf = new double[numgenotype]; //System.out.println("numgenotype="+numgenotype); int counter=0; for(int j=0;j<af.length;j++){ for(int k=j;k<af.length;k++){ double tmp = af[j]*af[k]; if(j==k){ gf[counter]=tmp; }else{ gf[counter]=tmp*2; } counter++; } } /* for(int j=0;j<gf.length;j++){ System.out.println(gf[j]+"\t"); } */ gf=StatUtilsX.MiscUtil.accumstandard(gf); /* for(int j=0;j<gf.length;j++){ System.out.println(gf[j]+"\t"); } */ for(int j=0;j<caseN;j++){ double r=mz.nextDouble(); for(int k=0;k<gf.length;k++){ if(r<gf[k]){ table[0][k]++; break; } } } for(int j=0;j<contN;j++){ double r=mz.nextDouble(); for(int k=0;k<gf.length;k++){ if(r<gf[k]){ table[1][k]++; break; } } } double hweexactpCase=StatUtilsX.Fisher.hweFisherAbecasis(table[0]); double hweexactpCont=StatUtilsX.Fisher.hweFisherAbecasis(table[1]); //double hweexactpCase=StatUtilsX.Fisher.hweFisherAbecasis(table[0]); //System.out.print("casehwe\t"+hweexactpCase+"\t"); //System.out.print("conthwe\t"+hweexactpCont+"\t"); CNVcalculator cnvc=new CNVcalculator(table); cnvc.assoc[0]= cnvc.testcnvTrend(weight); cnvc.assoc[2] = cnvc.testCNpool(); boolean cor=false; cnvc.assoc[1] = cnvc.testMWW(cor); cnvc.hweChi = cnvc.hweChicasecontall(); cnvc.hweExact=cnvc.hweExactcasecontall(sl); cnvc.Print("\t", "\t"); } /* int[][] t = {{1,2,3,4,5,6},{2,3,4,5,6,7}}; //int[][] t = {{5,10,3},{8,9,2}}; CNVcalculator cnv = new CNVcalculator(t); double[] w = {1,2,3,4,5,6}; cnv.assoc[0]= cnv.testcnvTrend(w); cnv.assoc[2] = cnv.testCNpool(); boolean cor=false; cnv.assoc[1] = cnv.testMWW(cor); cnv.hweChi = cnv.hweChicasecontall(); cnv.Print("\t", "\n"); */ /* for(int i=0;i<hwe.length;i++){ for(int j=0;j<hwe[i].length;j++){ System.out.print(hwe[i][j]+"\t"); } System.out.println(); } */ } public CNVcalculator(int[][] t_){ t = new double[t_.length][t_[0].length]; for(int i=0;i<t.length;i++){ for(int j=0;j<t[i].length;j++){ t[i][j]=(double)t_[i][j]; } } //t = StatUtilsX.MiscUtil.DeepCopyInt2(t_); Ng=t[0].length; Na=(int)(Math.round((Math.sqrt(1+8*(double)Ng)-1)/2)); //System.out.println(""+Na); N=0; R=0; S=0; n = new double[t[0].length]; for(int i=0;i<t.length;i++){ for(int j=0;j<t[i].length;j++){ N+=t[i][j]; if(i==0){ R+=t[i][j]; }else if(i==1){ S+=t[i][j]; } n[j]+=t[i][j]; } } int maxCN=(Na-1)*2; t2= new int[2][maxCN+1]; t3=new int[2][Na]; int a1=0; int a2=0; for(int i=0;i<t[0].length;i++){ int numcp=a1+a2; t2[0][numcp]+=t[0][i]; t2[1][numcp]+=t[1][i]; t3[0][a1]+=t[0][i]; t3[0][a2]+=t[0][i]; t3[1][a1]+=t[1][i]; t3[1][a2]+=t[1][i]; a2++; if(a2==Na){ a1++; a2=a1; } } int dfX=Ng-Na; chidfX = new org.apache.commons.math.distribution.ChiSquaredDistributionImpl(dfX); hweChi = new double[3][2]; hweExact = new double[3][2]; assoc = new double[3][2]; /* for(int i=0;i<t2[0].length;i++){ System.out.println(t2[0][i]+"\t"+t2[1][i]); } System.out.println(""); for(int i=0;i<t3[0].length;i++){ System.out.println(t3[0][i]+"\t"+t3[1][i]); } */ //w = new double[Ng]; } public CNVcalculator(int[][] t_,String st){ /* * trendテストをするため * t_はアレル数・ジェノタイプ数の基準を満たしていなくてもよい */ t = new double[t_.length][t_[0].length]; for(int i=0;i<t.length;i++){ for(int j=0;j<t[i].length;j++){ t[i][j]=(double)t_[i][j]; } } //t = StatUtilsX.MiscUtil.DeepCopyInt2(t_); Ng=t[0].length; /* Na=(int)(Math.round((Math.sqrt(1+8*(double)Ng)-1)/2)); //System.out.println(""+Na); */ N=0; R=0; S=0; n = new double[t[0].length]; for(int i=0;i<t.length;i++){ for(int j=0;j<t[i].length;j++){ N+=t[i][j]; if(i==0){ R+=t[i][j]; }else if(i==1){ S+=t[i][j]; } n[j]+=t[i][j]; } } /* int maxCN=(Na-1)*2; t2= new int[2][maxCN+1]; t3=new int[2][Na]; int a1=0; int a2=0; for(int i=0;i<t[0].length;i++){ int numcp=a1+a2; t2[0][numcp]+=t[0][i]; t2[1][numcp]+=t[1][i]; t3[0][a1]+=t[0][i]; t3[0][a2]+=t[0][i]; t3[1][a1]+=t[1][i]; t3[1][a2]+=t[1][i]; a2++; if(a2==Na){ a1++; a2=a1; } } chidfX = new org.apache.commons.math.distribution.ChiSquaredDistributionImpl(t3[0].length-1); hweChi = new double[3][2]; assoc = new double[3][2]; */ /* for(int i=0;i<t2[0].length;i++){ System.out.println(t2[0][i]+"\t"+t2[1][i]); } System.out.println(""); for(int i=0;i<t3[0].length;i++){ System.out.println(t3[0][i]+"\t"+t3[1][i]); } */ //w = new double[Ng]; } public void Print(String sep1,String sep2){ System.out.println(StringCNVcalculator(sep1,sep2)); } public String StringCNVcalculator(String sep1,String sep2){ String ret =""; ret += "contTable"+sep2; for(int i=0;i<t.length;i++){ for(int j=0;j<t[i].length;j++){ ret+=t[i][j]+sep1; } ret+=sep2; } for(int i=0;i<n.length;i++){ ret+=n[i]+sep1; } ret +=sep2; ret+="t2"+sep2; for(int i=0;i<t2.length;i++){ for(int j=0;j<t2[i].length;j++){ ret+=t2[i][j]+sep1; } ret+=sep2; } ret+="t3"+sep2; for(int i=0;i<t3.length;i++){ for(int j=0;j<t3[i].length;j++){ ret+=t3[i][j]+sep1; } ret+=sep2; } ret += "R"+sep1+R+sep1+"S"+sep1+S+sep1+"N"+sep1+N+sep2; ret += "Na"+sep1+Na+sep2; ret += "Ng"+sep1+Ng+sep2; ret += "hweChi"+sep2; for(int i=0;i<hweChi.length;i++){ for(int j=0;j<hweChi[i].length;j++){ ret += hweChi[i][j]+sep1; } ret +=sep2; } ret += "hweExact"+sep2; for(int i=0;i<hweExact.length;i++){ for(int j=0;j<hweExact[i].length;j++){ ret += hweExact[i][j]+sep1; } ret +=sep2; } ret+="assoc"+sep2; for(int i=0;i<assoc.length;i++){ for(int j=0;j<assoc[i].length;j++){ ret+=assoc[i][j]+sep1; } ret+=sep2; } return ret; } public double[] testcnvTrend(double[] w){ double[] ret = new double[2]; if(N==0){ return ret; } double[] p = new double[Ng]; double P = R/N; for(int i=0;i<p.length;i++){ if(n[i]!=0){ p[i]=t[0][i]/n[i]; } } double[] wn = new double[Ng]; double W = 0; for(int i=0;i<wn.length;i++){ wn[i]=n[i]*w[i]; W+=wn[i]; } double Wav=W/N; double[] wcor=new double[Ng]; for(int i=0;i<wcor.length;i++){ wcor[i]=w[i]-Wav; } double[] rsqn=new double[Ng]; double Rsqn=0; for(int i=0;i<rsqn.length;i++){ rsqn[i]=t[0][i]*t[0][i]/n[i]; Rsqn+=rsqn[i]; } double[] numer=new double[Ng]; double Numer=0; for(int i=0;i<numer.length;i++){ numer[i]=n[i]*(p[i]-P)*wcor[i]; Numer+=numer[i]; } double[] denom=new double[Ng]; double Denom=0; for(int i=0;i<denom.length;i++){ denom[i]=n[i]*wcor[i]*wcor[i]; Denom+=denom[i]; } ret[0]=Numer*Numer/(Denom*P*(1-P)); /* ret[0]=(N*Math.pow((N*(t[0][1]+2*t[0][2])-R*(n[1]+2*n[2])),2))/ (R*S*(N*(n[1]+4*n[2])-Math.pow((n[1]+2*n[2]),2))); */ try{ ret[1]=1-chidf1.cumulativeProbability(ret[0]); } catch (org.apache.commons.math.MathException ex) { } //double s=Math.pow(3,2); //System.out.println("cnvtrend\t"+ret[0]+"\t"+ret[1]); return ret; } public double[] testMWW(boolean correct){ /* * Mann-Whiteney-Willcoxon */ double[] ret = new double[2]; /* int maxCN=(Na-1)*2; int[][] t2= new int[2][maxCN+1]; int a1=Na-1; int a2=Na-1; for(int i=0;i<t[0].length;i++){ int numcp=a1+a2; t2[0][numcp]+=t[0][i]; t2[1][numcp]+=t[1][i]; a2--; if(a2<0){ a1--; a2=a1; } } */ int count=0; int counted=1; double countedNum=S; if(R>S){ count=1; counted=0; countedNum=R; } double U=0; //System.out.println("countedNum="+countedNum); for(int i=0;i<t2[count].length;i++){ countedNum-=t2[counted][i]; //System.out.println("i="+i+"\t"+countedNum+"\t"+t2[count][i]); U+=t2[count][i]*(countedNum+(double)t2[counted][i]/2); } double tmpU=R*S-U; U=Math.min(U,tmpU); double[] tie=new double[t2[0].length]; double Tsum=0; for(int i=0;i<t2[0].length;i++){ tie[i]=t2[0][i]+t2[1][i]; Tsum+=tie[i]*tie[i]*tie[i]-tie[i]; } double V=R*S*(N*N*N-N-Tsum)/12/(N*N-N); double E=R*S/2; double cor = 0; if(correct){ cor=0.5; }else{ } double Z=(Math.abs(U-E)-cor)/Math.sqrt(V); ret[0]=Z; System.out.print("U=\t"+U+"\t"); System.out.print("V=\t"+V+"\t"); System.out.print("E=\t"+E+"\t"); org.apache.commons.math.distribution.NormalDistributionImpl norm = new org.apache.commons.math.distribution.NormalDistributionImpl(); try{ ret[1]= (1-norm.cumulativeProbability(Z))*2; } catch (org.apache.commons.math.MathException ex) { } //System.out.println("Z\t"+Z+"\t"+"p\t"+ret[1]); return ret; } public double[] testCNpool(){ /* int maxCN=(Na-1)*2; int[][] t2= new int[2][maxCN+1]; int a1=Na-1; int a2=Na-1; for(int i=0;i<t[0].length;i++){ int numcp=a1+a2; t2[0][numcp]+=t[0][i]; t2[1][numcp]+=t[1][i]; a2--; if(a2<0){ a1--; a2=a1; } } */ String st=""; CNVcalculator cnv=new CNVcalculator(t2,st); double[] w= new double[t2[0].length]; w[0]=2*(Na-1); for(int i=1;i<w.length;i++){ w[i]=w[i-1]-1; } double[] ret = cnv.testcnvTrend(w); return ret; } public double[][] hweChicasecontall(){ double[][] ret = new double[3][2]; if(N==0){ return ret; } if(R!=0){ ret[0]=hweChi(t[0],t3[0]); } if(S!=0){ ret[1]=hweChi(t[1],t3[1]); } int[] n3 = new int[t3[0].length]; for(int i=0;i<n3.length;i++){ n3[i]=t3[0][i]+t3[1][i]; } ret[2]=hweChi(n,n3); return ret; } public double[][] hweExactcasecontall(double[] sl){ double[][] ret = new double[3][2]; if(N==0){ return ret; } if(R!=0){ ret[0]=hweExact(t[0],t3[0],sl); } if(S!=0){ ret[1]=hweExact(t[1],t3[1],sl); } int[] n3 = new int[t3[0].length]; for(int i=0;i<n3.length;i++){ n3[i]=t3[0][i]+t3[1][i]; } ret[2]=hweExact(n,n3,sl); return ret; } public double[] hweChi(double[] t,int[] t3){ double[] ret = new double[2]; double[] exp=new double[t.length]; double sum =0; for(int i=0;i<t3.length;i++){ sum+=t3[i]; } int counter=0; for(int i=0;i<t3.length;i++){ for(int j=i;j<t3.length;j++){ double tmp = t3[i]*t3[j]/(2*sum); if(i==j){ exp[counter]=tmp; }else{ exp[counter]=tmp*2; } if(exp[counter]!=0){ ret[0]+=(t[counter]-exp[counter])*(t[counter]-exp[counter])/exp[counter]; } counter++; } } /* for(int i=0;i<exp.length;i++){ System.out.println(t[i]+"\t"+exp[i]); } */ try{ ret[1]=1-chidfX.cumulativeProbability(ret[0]); } catch (org.apache.commons.math.MathException ex) { } return ret; } public double[] hweExact(double[] t,int[] t3,double[] sl){ int numhetero=0; int numchrom=0; int numsubj=0; int[] limit = new int[t.length]; int a1=0; int a2=0; for(int i=0;i<limit.length;i++){ numchrom+=2*t[i]; numsubj+=t[i]; if(a1==a2){ limit[i]=t3[a1]/2; }else{ limit[i]=Math.min(t3[a1], t3[a2]); numhetero+=t[i]; } a2++; if(a2==t3.length){ a1++; a2=a1; } } double prob=0; double commune=-sl[numchrom]+sl[numsubj]; for(int i=0;i<t3.length;i++){ commune+=sl[t3[i]]; } double originalpart=numhetero*sl[2]; for(int i=0;i<t.length;i++){ originalpart-=sl[(int)(t[i])]; } //System.out.print(originalpart+"\t"); /* for(int i=0;i<limit.length;i++){ System.out.println("limit\t"+limit[i]); } */ int[] data=new int[t.length]; boolean yes=true; while(yes){ int tmpnumhetero=0; double tablepart=0; boolean ok=true; int[] allelesum=new int[t3.length]; int b1=0; int b2=0; for(int i=0;i<data.length;i++){ allelesum[b1]+=data[i]; allelesum[b2]+=data[i]; if(allelesum[b1]>t3[b1]){ ok=false; break; } if(allelesum[b2]>t3[b2]){ ok=false; break; } if(b1!=b2){ tmpnumhetero+=data[i]; } b2++; if(b2>=t3.length){ b1++; b2=b1; } } if(ok){ for(int i=0;i<t3.length;i++){ if(allelesum[i]!=t3[i]){ ok=false; } } } //ok=true; if(ok){ /* for(int i=0;i<data.length;i++){ System.out.print(data[i]+"\t"); } */ tablepart = tmpnumhetero*sl[2]; for(int i=0;i<data.length;i++){ tablepart-=sl[(int)(data[i])]; } //System.out.print(tablepart+"\t"); double tmpPr=Math.exp(commune+tablepart); //System.out.print(tmpPr+"\t"); //System.out.println(); if(tablepart<=originalpart){ prob+=Math.exp(commune+tablepart); } } data[0]++; for(int i=0;i<limit.length-1;i++){ if(data[i]>limit[i]){ data[i]=0; data[i+1]++; } } if(data[data.length-1]==limit[data.length-1]+1){ yes=false; } } double[] ret = new double[2]; ret[1]=prob; ret[0]=Math.exp(commune+originalpart); //System.out.println("Prob\t="+ret[0]+"\t"+ret[1]); return ret; } }