Nucleotide diversity



前項のnucleotide polymorphismではDNA配列集団に存在する多型の分布を見たが、nucleotide diversityでは、集団を構成するDNA分子同士の異同箇所を問題にする。

全ペアにつき、異なる塩基箇所を数え、その和を、全ペア数xDNA配列長にて除す。その分散も、ソースに示した計算式で与えられる。

前項の例の場合は

pi : 0.016

V(pi) : 0.000107

下のソースの出力例は


File name seq2.txt
No.seq 5
No.polysites 16
Length 500
S 全長における多型箇所の比率 0.032
theta : the level of nucleotide polymorphismの期待値 0.015360000000000002
V(theta) : the level of nucleotide polymorphismの期待値の分散 9.213050880000004E-5
pi : nucleotide diversityの期待値 0.0158
V(pi) : nucleotide diversityの期待値の分散 1.0733466666666668E-4


public class Heterogeneity {
public String file;
public String[][] seqdata;
public int numseq;
public int numnt;
public double S;
public double theta;
public double thetaVar;
public double pi;
public double piVar;
public int length;

public static void main(String[] args) {
String in = "";
int length =0;
for(int i=0;i<args.length;i++){

if(args[i].equals("-if")){
in = args[i+1];
}
if(args[i].equals("-len")){
length = Integer.parseInt(args[i+1]);
}


}

Heterogeneity het = new Heterogeneity(in,"",length);
if(length==0){
het = new Heterogeneity(in,"");
}

het.PrintHeterogeneity("\t","\n");
}

public Heterogeneity(String in, String delim){
file = in;
seqdata=readFile(in,delim);
numseq = seqdata.length;
numnt = seqdata[0].length;
length = numnt;
S = calcS();
theta = calcTheta();
thetaVar = calcThetaVar();
pi = calcPi();
piVar = calcPiVar();
}
public Heterogeneity(String in, String delim,int len){
file = in;
seqdata=readFile(in,delim);
numseq = seqdata.length;
numnt = seqdata[0].length;
length = len;
S = calcS();
theta = calcTheta();
thetaVar = calcThetaVar();
pi = calcPi();
piVar = calcPiVar();
}
public double calcPi(){
double ret =0;
int numpair = numseq *(numseq-1)/2;
int[] numtype = new int[seqdata[0].length];
int[][] numberType = new int[seqdata[0].length][0];
for(int i=0;i<seqdata[0].length;i++){
int Notype = 0;
String[] tmptype= new String[0];
for(int j=0;j<seqdata.length;j++){
boolean flag = true;
for(int k=0;k<tmptype.length;k++){
if(tmptype[k].equals(seqdata[j][i])){
numberType[i][k]++;
flag = false;
break;
}
}
if(flag){
numtype[i]++;
tmptype = AddElemString1(tmptype,seqdata[j][i]);
numberType[i]=AddElemInt1(numberType[i],1);
}
}
}
for(int i=0;i<numberType.length;i++){

for(int j=0;j<numberType[i].length;j++){
ret += numberType[i][j] *(numseq-numberType[i][j]);
}
}
ret /=(2*numpair*length);

return ret;
}
public double calcPiVar(){
double ret = 0;
double b1 = CalcB1(numseq);
double b2 = CalcB2(numseq);
ret = b1/length * pi + b2 * pi * pi;
return ret;
}
public double CalcB1(int n){
double ret =0;
ret =(double)(n+1)/(double)(3*(n-1));
return ret;
}
public double CalcB2(int n){
double ret =0;
ret =(double)(2*(n*n+n+3))/(double)(9*n*(n-1));
return ret;
}
public double calcTheta(){
double ret =0;
double a1 = CalcA1(numseq-1);

ret = S/a1;
return ret;
}
public double calcThetaVar(){
double ret =0;
double a1 = CalcA1(numseq-1);
double a2 = CalcA2(numseq-1);

ret = theta/((double)length*a1) + a2*theta*theta/(a1*a1);
return ret;
}
public double CalcA1(int n){
double ret =0;
for(int i=1;i<=n;i++){
ret += 1/(double)i;
}
return ret;
}
public double CalcA2(int n){
double ret =0;
for(int i=1;i<=n;i++){
ret += 1/(double)(i*i);
}
return ret;
}
public double calcS(){
double ret =0;
for(int i=0;i<seqdata[0].length;i++){
for(int j=1;j<seqdata.length;j++){
if(!seqdata[0][i].equals(seqdata[j][i])){
ret+=1;
break;
}
}
}

ret /= (double)length;
return ret;
}
public String StringHeterogeneity(String sep1,String sep2){
String ret="";
ret += "File name" + sep1 + file + sep2;
ret += "No.seq" + sep1 + numseq + sep2;
ret += "No.polysites" + sep1 + numnt + sep2;
ret += "Length" + sep1 + length +sep2;
ret += "S 全長における多型箇所の比率" +sep1 + S + sep2;
ret += "theta : the level of nucleotide polymorphismの期待値" + sep1 + theta +sep2;
ret += "V(theta) : the level of nucleotide polymorphismの期待値の分散" +sep1 + thetaVar + sep2;
ret += "pi : nucleotide diversityの期待値" + sep1 + pi + sep2;
ret += "V(pi) : nucleotide diversityの期待値の分散" +sep1 + piVar + sep2;

return ret;

}
public void PrintHeterogeneity(String sep1,String sep2){
String ret=StringHeterogeneity(sep1,sep2);
System.out.println(ret);
}
public static String[][] readFile(String in,String delim){
String[][] records = new String[0][0];
try{


FileReader fr = new FileReader(in);

BufferedReader objData=new BufferedReader(fr);


int linecounter=0;
while(objData.ready()){
String[] line = objData.readLine().split(delim);
line[line.length-1].trim();
if(line[0].equals("")){
String[] tmpline = new String[line.length-1];
for(int i=0;i<tmpline.length;i++){
tmpline[i]=line[i+1];
}
line = MiscUtil.DeepCopyString1(tmpline);
}
records=MiscUtil.AddElemString2(records,line);




}
fr.close();
objData.close();




}catch(IOException e){
System.out.println(e);
}
return records;
}
public static String[] AddElemString1(String[] in,String e){
String[] ret = new String[in.length+1];
for(int i=0;i<in.length;i++){
ret[i]=in[i];
}
ret[in.length]=e;
return ret;
}

public static int[] AddElemInt1(int[] in,int e){
int[] ret = new int[in.length+1];
for(int i=0;i<in.length;i++){
ret[i]=in[i];
}
ret[in.length]=e;
return ret;
}
}

  • 集団のハプロタイプについてその頻度と集団人数とからpiを出すのはこちら

public class HeterogeneityPop {
public String file;
public String fileFreq;
public String[][] seqdata;
public double[] freq;
public int numseq;
public int numnt;
public double S;
public double theta;
public double thetaVar;
public double pi;
public double piVar;
public int length;
public double N;

/*
* -if in.txt -len 500 -N 1000 -infreq infreq.txt
*/
public static void main(String[] args) {
String in = "";
String infreq ="";
int length =0;
double N =0;
for(int i=0;i<args.length;i++){

if(args[i].equals("-if")){
in = args[i+1];
}
if(args[i].equals("-len")){
length = Integer.parseInt(args[i+1]);
}
if(args[i].equals("-N")){
N = Double.parseDouble(args[i+1]);
}
if(args[i].equals("-infreq")){
infreq = args[i+1];
}

}

HeterogeneityPop het = new HeterogeneityPop(in,infreq,"",length,N);


if(length==0){
het = new HeterogeneityPop(in,"");
}

het.PrintHeterogeneity("\t","\n");
}

public HeterogeneityPop(String in, String delim){
file = in;
seqdata=readFile(in,delim);
numseq = seqdata.length;
numnt = seqdata[0].length;
length = numnt;
S = calcS();
theta = calcTheta();
thetaVar = calcThetaVar();
pi = calcPi();
piVar = calcPiVar();
}
public HeterogeneityPop(String in, String infreq,String delim,int len,double N_){
file = in;
seqdata=readFile(in,delim);
fileFreq = infreq;

freq=readFileFreq(infreq);
double sumfreq = 0;
for(int i=0;i<freq.length;i++){
sumfreq+=freq[i];
}
for(int i=0;i<freq.length;i++){
freq[i]/=sumfreq;
}
N=N_;
numseq = seqdata.length;
numnt = seqdata[0].length;
length = len;
S = calcS();
theta = calcTheta();
thetaVar = calcThetaVar();
pi = calcPi();
piVar = calcPiVar();
}
public double calcPi(){
double ret =0;
//int numpair = numseq *(numseq-1)/2;
double numpair = N*(N-1)/2;
int[] numtype = new int[seqdata[0].length];
//int[][] numberType = new int[seqdata[0].length][0];
double[][] numberType = new double[seqdata[0].length][0];
for(int i=0;i<seqdata[0].length;i++){
int Notype = 0;
String[] tmptype= new String[0];
for(int j=0;j<seqdata.length;j++){
boolean flag = true;
for(int k=0;k<tmptype.length;k++){
if(tmptype[k].equals(seqdata[j][i])){
//numberType[i][k]++;
numberType[i][k]+=freq[j];
flag = false;
break;
}
}
if(flag){
numtype[i]++;
tmptype = AddElemString1(tmptype,seqdata[j][i]);
//numberType[i]=AddElemInt1(numberType[i],1);
numberType[i]=MiscUtil.AddElemDouble1(numberType[i],freq[j]);
}
}
}
for(int i=0;i<numberType.length;i++){

for(int j=0;j<numberType[i].length;j++){
//ret += numberType[i][j] *(numseq-numberType[i][j]);
//System.out.println(numberType[i][j]);
ret += numberType[i][j] *(1-numberType[i][j]);
}
}
ret /=(2*numpair*length);
ret *=(N*N);
//ret *=(numpair/length);
return ret;
}
public double calcPiVar(){
double ret = 0;
double b1 = CalcB1(numseq);
double b2 = CalcB2(numseq);
ret = b1/length * pi + b2 * pi * pi;
return ret;
}
public double CalcB1(int n){
double ret =0;
ret =(double)(n+1)/(double)(3*(n-1));
return ret;
}
public double CalcB2(int n){
double ret =0;
ret =(double)(2*(n*n+n+3))/(double)(9*n*(n-1));
return ret;
}
public double calcTheta(){
double ret =0;
double a1 = CalcA1(numseq-1);

ret = S/a1;
return ret;
}
public double calcThetaVar(){
double ret =0;
double a1 = CalcA1(numseq-1);
double a2 = CalcA2(numseq-1);

ret = theta/((double)length*a1) + a2*theta*theta/(a1*a1);
return ret;
}
public double CalcA1(int n){
double ret =0;
for(int i=1;i<=n;i++){
ret += 1/(double)i;
}
return ret;
}
public double CalcA2(int n){
double ret =0;
for(int i=1;i<=n;i++){
ret += 1/(double)(i*i);
}
return ret;
}
public double calcS(){
double ret =0;
for(int i=0;i<seqdata[0].length;i++){
for(int j=1;j<seqdata.length;j++){
if(!seqdata[0][i].equals(seqdata[j][i])){
ret+=1;
break;
}
}
}

ret /= (double)length;
return ret;
}
public String StringHeterogeneity(String sep1,String sep2){
String ret="";
ret += "File name" + sep1 + file + sep2;
ret += "No.seq" + sep1 + numseq + sep2;
ret += "No.polysites" + sep1 + numnt + sep2;
ret += "Length" + sep1 + length +sep2;
ret += "S 全長における多型箇所の比率" +sep1 + S + sep2;
//ret += "theta : the level of nucleotide polymorphismの期待値" + sep1 + theta +sep2;
//ret += "V(theta) : the level of nucleotide polymorphismの期待値の分散" +sep1 + thetaVar + sep2;
ret += "pi : nucleotide diversityの期待値" + sep1 + pi + sep2;
ret += "V(pi) : nucleotide diversityの期待値の分散" +sep1 + piVar + sep2;
ret += "PopSizeN" + sep1 + N + sep2;
ret += "frequency" + sep1 ;
for(int i=0;i<freq.length;i++){
ret += freq[i]+sep1;
}
return ret;

}
public void PrintHeterogeneity(String sep1,String sep2){
String ret=StringHeterogeneity(sep1,sep2);
System.out.println(ret);
}
public static String[][] readFile(String in,String delim){
String[][] records = new String[0][0];
try{


FileReader fr = new FileReader(in);

BufferedReader objData=new BufferedReader(fr);


int linecounter=0;
while(objData.ready()){
String[] line = objData.readLine().split(delim);
line[line.length-1].trim();
if(line[0].equals("")){
String[] tmpline = new String[line.length-1];
for(int i=0;i<tmpline.length;i++){
tmpline[i]=line[i+1];
}
line = MiscUtil.DeepCopyString1(tmpline);
}
records=MiscUtil.AddElemString2(records,line);




}
fr.close();
objData.close();




}catch(IOException e){
System.out.println(e);
}
return records;
}

public static double[] readFileFreq(String in){
double[] records = new double[0];
try{


FileReader fr = new FileReader(in);

BufferedReader objData=new BufferedReader(fr);


int linecounter=0;
while(objData.ready()){
String line = objData.readLine();
line.trim();
double tmp = Double.parseDouble(line);
records=MiscUtil.AddElemDouble1(records,tmp);




}
fr.close();
objData.close();




}catch(IOException e){
System.out.println(e);
}
return records;
}
public static String[] AddElemString1(String[] in,String e){
String[] ret = new String[in.length+1];
for(int i=0;i<in.length;i++){
ret[i]=in[i];
}
ret[in.length]=e;
return ret;
}

public static int[] AddElemInt1(int[] in,int e){
int[] ret = new int[in.length+1];
for(int i=0;i<in.length;i++){
ret[i]=in[i];
}
ret[in.length]=e;
return ret;
}
}