データのシミュレーション作成



SNP数の多いハプロタイプのデータを作りたい。

かなりハプロタイプの種類を少なくする→LD強い

HWEを仮定する。

手っ取り早く、それなりのデータを作るためのソース。

SNP数は1万程度までOK。ハプロタイプ数は指定する。

人数は、それなりに。

ただし、SNP数x人数が膨大だとメモリ制限を越える(シミュレーションしてそれを吐き出すだけなら、微修正で対応可能)

ソース


public class FromHaplotype {

/**
* @param args
*/
public int numsnp;
public double[] hapfreqcase;
public double[] hapfreqcont;
public int numcase;
public int numcont;
public int[][] gencase;
public int[][] gencont;
public int[][][] twoBYthree;
public double fraczerohap;
public int numzerohap;
public int numnonzerohap;
public int[] hapint;
public String[][] hapst;
public static void main(String[] args) {
// TODO 自動生成されたメソッド・スタブ
int iteration = 500;
int numsnp = 100;
int numcase = 10000;
int numcont = 10000;
double fraczerohap =0.999;
int numhap=10;
int[] testTypes = {9};
double[] minP=new double[iteration];
for(int x=0;x<iteration;x++){
minP[x]=1;
FromHaplotype fh=new FromHaplotype(numsnp,numcase,numcont,numhap);
double[] serialLogFact = StatUtils.Fisher.serialLogfact(2*(numcase+numcont));
for(int i=0;i<fh.twoBYthree.length;i++){
double[] ps = BBPPhenotypeVector3GC.SingleSNPtest4.PforSelectedTestsOriginal2(fh.twoBYthree[i], serialLogFact, testTypes);

for(int j=0;j<fh.twoBYthree[i].length;j++){
for(int k=0;k<fh.twoBYthree[i][j].length;k++){
//System.out.print(fh.twoBYthree[i][j][k]+"\t");
}
}
for(int j=0;j<testTypes.length;j++){
//System.out.print(ps[testTypes[j]]+"\t");
if(ps[testTypes[j]]<minP[x]){
minP[x]=ps[testTypes[j]];
}
}
//System.out.print("\n");
}
System.out.print(minP[x]+"\n");
}

}

public FromHaplotype(int numsnp_,int numcase_,int numcont_,int numnonzerohap_){
numsnp=numsnp_;
numcase=numcase_;
numcont=numcont_;
numnonzerohap=numnonzerohap_;
//fraczerohap=fraczerohap_;
/*
* numsnpが大きいときはセグメントに分割
*/
int segmentlen = 10;
int numsegment =0;
int resid = numsnp%segmentlen;
//System.out.println("resid="+resid);
if(resid==0){
numsegment = numsnp/segmentlen;
}else{
numsegment = numsnp/segmentlen+1;
}
int[] numslist = new int[numsegment];
hapst = new String[numnonzerohap][numsegment];
if(resid==0){
for(int i=0;i<numslist.length;i++){
numslist[i]=segmentlen;
}
}else{
for(int i=0;i<numslist.length-1;i++){
numslist[i]=segmentlen;
}
numslist[numslist.length-1]=resid;
}
//for(int i=0;i<numslist.length;i++){
//System.out.println("numslist "+numslist[i]);
//}
//numzerohap=numhap-numnonzerohap;
//System.out.println("numzerohap="+numzerohap);
//hapfreqcase = new double[numhap];
//hapfreqcont = new double[numhap];
hapfreqcase = new double[numnonzerohap];
hapfreqcont = new double[numnonzerohap];
hapint = new int[numnonzerohap];
gencase = new int[numcase][numsnp];
gencont = new int[numcont][numsnp];
twoBYthree = new int[numsnp][2][3];
int x = 1000000;
int seed = (int)(Math.random()*x);
Utils.MersenneTwisterFast mz = new Utils.MersenneTwisterFast(seed);

int gensnpcounter=0;
for(int i=0;i<hapfreqcase.length;i++){
hapfreqcase[i]=Math.pow(mz.nextDouble(),2);
}
StatUtils.MiscUtil.standard(hapfreqcase);
//for(int i=0;i<hapfreqcase.length;i++){
//if(hapfreqcase[i]>0){
//String st1 = Integer.toBinaryString(hapint[i]);
//st1 = stbinaryIntFromBottom(hapint[i],numslist[y]);
//System.out.println(i+"\t"+hapfreqcase[i]);
//hapst[i][y]=st1;
//}

//}
double[] hapfreqcaseAccum = StatUtils.MiscUtil.accumstandard(hapfreqcase);
//hapfreqcase=StatUtils.MiscUtil.accumstandard(hapfreqcase);

hapfreqcont= hapfreqcase;
double[] hapfreqcontAccum = hapfreqcaseAccum;

for(int y=0;y<numslist.length;y++){
//int numhap=(int)Math.pow(2,numsnp);
int numhap=(int)Math.pow(2,numslist[y]);
//System.out.println("numhap "+numhap);
//numzerohap= (int)(numhap*fraczerohap);
/*
for(int i=0;i<hapfreqcase.length;i++){
hapfreqcase[i]=mz.nextDouble();
}
*/
int counterX=0;
//System.out.println("A");
if(numslist[y]>=numnonzerohap){
while(counterX<numnonzerohap){
int tmpint = mz.nextInt(numhap);
boolean judge=false;
for(int i=0;i<counterX;i++){
if(tmpint==hapint[i]){
judge=true;
break;

}
}
if(!judge){
hapint[counterX]=tmpint;
counterX++;
}
}
}else{
while(counterX<numnonzerohap){
int tmpint = mz.nextInt(numhap);
boolean judge=false;
//for(int i=0;i<counterX;i++){
//if(tmpint==hapint[i]){
//judge=true;
//break;

//}
//}
//if(!judge){
hapint[counterX]=tmpint;
counterX++;
//}
}
}

//System.out.println("B");


for(int i=0;i<hapfreqcase.length;i++){
//if(hapfreqcase[i]>0){
//String st1 = Integer.toBinaryString(hapint[i]);
String st1 = stbinaryIntFromBottom(hapint[i],numslist[y]);
//System.out.println(i+"\t"+st1);
hapst[i][y]=st1;
//}

}


for(int i=0;i<numcase;i++){
double r1 = mz.nextDouble();
double r2 = mz.nextDouble();
//System.out.print("r1="+r1);
int h1=0;
int h2=0;
boolean judge1 =false;
boolean judge2 =false;
for(int j=0;j<hapfreqcaseAccum.length;j++){
if(!judge1){
if(r1<=hapfreqcaseAccum[j]){
h1=hapint[j];
judge1=true;
}
}
if(!judge2){
if(r2<=hapfreqcaseAccum[j]){
h2=hapint[j];
judge2=true;
}
}

if(judge1 && judge2){
break;
}
}
//gencase[i]=genstring(h1,h2,numsnp);
int[] tmpgencase=genstring(h1,h2,numslist[y]);
//System.out.println("gensnpcounter="+gensnpcounter);
for(int j=0;j<tmpgencase.length;j++){
gencase[i][gensnpcounter+j]=tmpgencase[j];
//System.out.println("tmpgen "+tmpgencase[j]);
}
for(int j=0;j<tmpgencase.length;j++){
twoBYthree[gensnpcounter+j][0][tmpgencase[j]]++;
}
}
for(int i=0;i<numcont;i++){
double r1 = mz.nextDouble();
double r2 = mz.nextDouble();
int h1=0;
int h2=0;
boolean judge1 =false;
boolean judge2 =false;
for(int j=0;j<hapfreqcontAccum.length;j++){
if(!judge1){
if(r1<=hapfreqcontAccum[j]){
h1=hapint[j];
judge1=true;
}
}
if(!judge2){
if(r2<=hapfreqcontAccum[j]){
h2=hapint[j];
judge2=true;
}
}

if(judge1 && judge2){
break;
}
}
//gencont[i]=genstring(h1,h2,numsnp);

int[] tmpgencont=genstring(h1,h2,numslist[y]);
for(int j=0;j<tmpgencont.length;j++){
gencont[i][gensnpcounter+j]=tmpgencont[j];
}
for(int j=0;j<tmpgencont.length;j++){
twoBYthree[gensnpcounter+j][1][tmpgencont[j]]++;
}
//gencont[i]=genstring(h1,h2,numsnp);
//for(int j=0;j<gencont[i].length;j++){
//twoBYthree[j][1][gencont[i][j]]++;
//}
}
gensnpcounter+=numslist[y];
}
/*
for(int i=0;i<hapst.length;i++){
System.out.print(hapfreqcase[i]+"\t"+hapfreqcont[i]+"\t");
for(int j=0;j<hapst[i].length;j++){
System.out.print(hapst[i][j]+" ");
}
System.out.print("\n");
}
*/

}

public static int[] genstring(int h1,int h2,int numsnp){
int[] ret = new int[numsnp];
String st1 = Integer.toBinaryString(h1);
String st2 = Integer.toBinaryString(h2);
String[] t1 = st1.split("");
String[] t2 = st2.split("");
//System.out.println("st1="+st1);
//System.out.println("st2="+st2);
//for(int i=0;i<t1.length;i++){
//System.out.println("t1>"+t1[i]+"<t1");
//}
for(int i=0;i<numsnp;i++){
String s1 ="";
String s2 ="";
if(t1.length-1-i<=0){
s1="0";
}else{
s1=t1[t1.length-1-i];
}
if(t2.length-1-i<=0){
s2="0";
}else{
s2=t2[t2.length-1-i];
}
//System.out.println(">"+s1+"<");
//System.out.println(">"+s2+"<");
if(s1.equals("0") && s2.equals("0")){
ret[i]=0;
}else if(s1.equals("1") && s2.equals("1")){
ret[i]=2;
}else{
ret[i]=1;
}
}
return ret;
}
public static String stbinaryIntFromBottom(int x,int r){
String s = Integer.toBinaryString(x);
String[] t1 = s.split("");
String ret ="";
for(int i=0;i<r;i++){
if(t1.length-1-i>0){
ret+=t1[t1.length-1-i];
}else{
ret+="0";
}
}
return ret;
}
}