線形回帰

先日エクセルで線形回帰、という記事を書いた(こちら)。
その記事からリンクを張ったエクセルの計算をjavaにやらせたソースがこちら

package StatUtilsX;

public class LinearRegression {

	public int N;
	public double[] df;
	public double[] SumSq;
	public double[] MeanSq;
	public double f;
	public double p;
	public double rPearson;
	public double[] eqcof;
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO 自動生成されたメソッド・スタブ
		int N=100;
		double[] x= new double[N];
		double[] y= new double[N];
		for(int i=0;i<x.length;i++){
			x[i]=Math.random();
			y[i]=Math.random();
		}
		for(int i=0;i<x.length;i++){
			System.out.println(x[i]+"\t"+y[i]);
		}
		double avx=mean(x);
		double avy=mean(y);
		System.out.println("avx\t"+avx+"\tavy\t"+avy);
		double residx=sxx(x);
		double residy=sxx(y);
		System.out.println("residx\t"+residx+"\tresidy\t"+residy);
		double vx=vx(x);
		double vy=vx(y);
		System.out.println("vx\t"+vx+"\tvy\t"+vy);
		double sxy=sxy(x,y);
		System.out.println("sxy\t"+sxy);
		double rPearson=rPearson(x,y);
		System.out.println("rPearson\t"+rPearson);
		String st =StringLinearRegression(x,y);
		System.out.println(st);
		double tss=sxx(y);
		double ess=ess(x,y);
		double ssr=ssr(x,y);
		System.out.println("tss\t"+tss+"\tess\t"+ess+"\tssr\t"+ssr);
		double s2xy=s2xy(x,y);
		double fstat=fstat(x,y);
		System.out.println("s2xy\t"+s2xy+"\tfstat\t"+fstat);
		double ftestp=ftestLinReg(x,y);
		System.out.println("ftestP\t"+ftestp);
		
		System.out.println("----------------------\n\n");
		LinearRegression lr = new LinearRegression(x,y);
		lr.PrintLinReg("\t","\n");
		System.out.println("----------------------\n\n");
		String log="log";
		LinearRegression lrLog = new LinearRegression(x,y,log);
		lrLog.PrintLinReg("\t","\n");
	}
	
	public LinearRegression(double[] x,double[] y){
		N=x.length;
		df=new double[3];
		SumSq=new double[3];
		MeanSq=new double[2];
		eqcof=new double[2];
		df[0]=1;df[1]=N-2;df[2]=N-1;
		SumSq[0]=ssr(x,y);
		SumSq[1]=ess(x,y);
		SumSq[2]=sxx(y);
		MeanSq[0]=SumSq[0];
		MeanSq[1]=s2xy(x,y);
		f=fstat(x,y);
		p=ftestLinReg(x,y);
		rPearson=rPearson(x,y);
		eqcof=linearRegression(x,y);
		
		
	}
	public LinearRegression(double[] x,double[] y,String log){
		/*
		 * yを対数変換する
		 */
		double[] ylog=new double[y.length];
		for(int i=0;i<y.length;i++){
			ylog[i]=Math.log(y[i]);
		}
		
		N=x.length;
		df=new double[3];
		SumSq=new double[3];
		MeanSq=new double[2];
		eqcof=new double[2];
		df[0]=1;df[1]=N-2;df[2]=N-1;
		SumSq[0]=ssr(x,ylog);
		SumSq[1]=ess(x,ylog);
		SumSq[2]=sxx(ylog);
		MeanSq[0]=SumSq[0];
		MeanSq[1]=s2xy(x,ylog);
		f=fstat(x,ylog);
		p=ftestLinReg(x,ylog);
		rPearson=rPearson(x,ylog);
		eqcof=linearRegression(x,ylog);
		
		
	}
	public void PrintLinReg(String sep1,String sep2){
		String ret = StringLinReg(sep1,sep2);
		System.out.println(ret);
	}
	public String StringLinReg(String sep1,String sep2){
		String ret="Linear Regression Equation"+sep1;
		ret += "y="+eqcof[0]+"*x+"+eqcof[1]+sep2;
		ret += "Pearson's r"+sep1+rPearson+sep2;
		ret += "Ftest"+sep2;
		ret += "df"+sep1;
		ret += df[0]+sep1+df[1]+sep1+df[2]+sep2;
		ret += "SumSq"+sep1+SumSq[0]+sep1+SumSq[1]+sep1+SumSq[2]+sep2;
		ret += "MeanSq"+sep1+MeanSq[0]+sep1+MeanSq[1]+sep2;
		ret += "Fstat"+sep1 + f+sep2;
		ret += "p"+sep1+p+sep2;
		
		return ret;
		
		
	}
	public static double sum(double[] x){
		double ret =0;
		for(int i=0;i<x.length;i++){
			ret+=x[i];
		}
		return ret;
	}
	public static double mean(double[] x){
		double ret=0;
		if(x.length==0){
			ret = -9999;
		}else{
			ret=sum(x);
			ret/=(double)x.length;
		}
		
		return ret;
	}
	public static double sxx(double[] x){
		/*
		 * 平方和
		 */
		double avx=mean(x);
		double ret=0;
		for(int i=0;i<x.length;i++){
			ret+=(x[i]-avx)*(x[i]-avx);
		}
		return ret;
	}
	public static double vx(double[] x){
		/*
		 * 不偏分散
		 */
		double ret = sxx(x);
		ret/=(double)(x.length-1);
		return ret;
	}
	public static double sxy(double[] x,double[] y){
		double avx=mean(x);
		double avy=mean(y);
		double ret =0;
		for(int i=0;i<x.length;i++){
			ret+=(x[i]-avx)*(y[i]-avy);
		}
		return ret;
	}
	public static double rPearson(double[] x,double[] y){
		double ret = sxy(x,y)/Math.sqrt(sxx(x)*sxx(y));
		return ret;
	}
	public static double[] linearRegression(double[] x,double[] y){
		double[] ret = new double[2];
		ret[0]=sxy(x,y)/sxx(x);
		ret[1]=mean(y)-ret[0]*mean(x);
		return ret;
	}
	public static String StringLinearRegression(double[] x, double[] y){
		double[] tmp = linearRegression(x,y);
		String ret = "y="+tmp[0]+"*x+"+tmp[1];
		return ret;
	}
	public static double ess(double[] x,double[] y){
		double[] linreg=linearRegression(x,y);
		double ret =0;
		for(int i=0;i<y.length;i++){
			ret+=(y[i]-(linreg[0]*x[i]+linreg[1]))*(y[i]-(linreg[0]*x[i]+linreg[1]));
		}
		return ret;
	}
	public static double ssr(double[] x,double[] y){
		double[] linreg=linearRegression(x,y);
		double avy=mean(y);
		double ret=0;
		for(int i=0;i<y.length;i++){
			ret+=(linreg[0]*x[i]+linreg[1]-avy)*(linreg[0]*x[i]+linreg[1]-avy);
		}
		return ret;
	}
	public static double s2xy(double[] x, double[] y){
		double ret = ess(x,y)/(double)(x.length-2);
		return ret;
	}
	public static double fstat(double[] x,double[] y){
		double ret = ssr(x,y)/s2xy(x,y);
		return ret;
	}
	
	public static double ftestLinReg(double[] x,double[] y){
		double fstat=fstat(x,y);
		double[] df={1,(double)x.length-2};
		org.apache.commons.math.distribution.FDistributionImpl fd=
			new org.apache.commons.math.distribution.FDistributionImpl(df[0],df[1]);

		double ret=0;
		try{
			ret = 1-fd.cumulativeProbability(fstat);
		} catch (org.apache.commons.math.MathException ex) {
			
		}
		return ret;
	}
	

}