線形回帰
先日エクセルで線形回帰、という記事を書いた(こちら)。
その記事からリンクを張ったエクセルの計算をjavaにやらせたソースがこちら
package StatUtilsX; public class LinearRegression { public int N; public double[] df; public double[] SumSq; public double[] MeanSq; public double f; public double p; public double rPearson; public double[] eqcof; /** * @param args */ public static void main(String[] args) { // TODO 自動生成されたメソッド・スタブ int N=100; double[] x= new double[N]; double[] y= new double[N]; for(int i=0;i<x.length;i++){ x[i]=Math.random(); y[i]=Math.random(); } for(int i=0;i<x.length;i++){ System.out.println(x[i]+"\t"+y[i]); } double avx=mean(x); double avy=mean(y); System.out.println("avx\t"+avx+"\tavy\t"+avy); double residx=sxx(x); double residy=sxx(y); System.out.println("residx\t"+residx+"\tresidy\t"+residy); double vx=vx(x); double vy=vx(y); System.out.println("vx\t"+vx+"\tvy\t"+vy); double sxy=sxy(x,y); System.out.println("sxy\t"+sxy); double rPearson=rPearson(x,y); System.out.println("rPearson\t"+rPearson); String st =StringLinearRegression(x,y); System.out.println(st); double tss=sxx(y); double ess=ess(x,y); double ssr=ssr(x,y); System.out.println("tss\t"+tss+"\tess\t"+ess+"\tssr\t"+ssr); double s2xy=s2xy(x,y); double fstat=fstat(x,y); System.out.println("s2xy\t"+s2xy+"\tfstat\t"+fstat); double ftestp=ftestLinReg(x,y); System.out.println("ftestP\t"+ftestp); System.out.println("----------------------\n\n"); LinearRegression lr = new LinearRegression(x,y); lr.PrintLinReg("\t","\n"); System.out.println("----------------------\n\n"); String log="log"; LinearRegression lrLog = new LinearRegression(x,y,log); lrLog.PrintLinReg("\t","\n"); } public LinearRegression(double[] x,double[] y){ N=x.length; df=new double[3]; SumSq=new double[3]; MeanSq=new double[2]; eqcof=new double[2]; df[0]=1;df[1]=N-2;df[2]=N-1; SumSq[0]=ssr(x,y); SumSq[1]=ess(x,y); SumSq[2]=sxx(y); MeanSq[0]=SumSq[0]; MeanSq[1]=s2xy(x,y); f=fstat(x,y); p=ftestLinReg(x,y); rPearson=rPearson(x,y); eqcof=linearRegression(x,y); } public LinearRegression(double[] x,double[] y,String log){ /* * yを対数変換する */ double[] ylog=new double[y.length]; for(int i=0;i<y.length;i++){ ylog[i]=Math.log(y[i]); } N=x.length; df=new double[3]; SumSq=new double[3]; MeanSq=new double[2]; eqcof=new double[2]; df[0]=1;df[1]=N-2;df[2]=N-1; SumSq[0]=ssr(x,ylog); SumSq[1]=ess(x,ylog); SumSq[2]=sxx(ylog); MeanSq[0]=SumSq[0]; MeanSq[1]=s2xy(x,ylog); f=fstat(x,ylog); p=ftestLinReg(x,ylog); rPearson=rPearson(x,ylog); eqcof=linearRegression(x,ylog); } public void PrintLinReg(String sep1,String sep2){ String ret = StringLinReg(sep1,sep2); System.out.println(ret); } public String StringLinReg(String sep1,String sep2){ String ret="Linear Regression Equation"+sep1; ret += "y="+eqcof[0]+"*x+"+eqcof[1]+sep2; ret += "Pearson's r"+sep1+rPearson+sep2; ret += "Ftest"+sep2; ret += "df"+sep1; ret += df[0]+sep1+df[1]+sep1+df[2]+sep2; ret += "SumSq"+sep1+SumSq[0]+sep1+SumSq[1]+sep1+SumSq[2]+sep2; ret += "MeanSq"+sep1+MeanSq[0]+sep1+MeanSq[1]+sep2; ret += "Fstat"+sep1 + f+sep2; ret += "p"+sep1+p+sep2; return ret; } public static double sum(double[] x){ double ret =0; for(int i=0;i<x.length;i++){ ret+=x[i]; } return ret; } public static double mean(double[] x){ double ret=0; if(x.length==0){ ret = -9999; }else{ ret=sum(x); ret/=(double)x.length; } return ret; } public static double sxx(double[] x){ /* * 平方和 */ double avx=mean(x); double ret=0; for(int i=0;i<x.length;i++){ ret+=(x[i]-avx)*(x[i]-avx); } return ret; } public static double vx(double[] x){ /* * 不偏分散 */ double ret = sxx(x); ret/=(double)(x.length-1); return ret; } public static double sxy(double[] x,double[] y){ double avx=mean(x); double avy=mean(y); double ret =0; for(int i=0;i<x.length;i++){ ret+=(x[i]-avx)*(y[i]-avy); } return ret; } public static double rPearson(double[] x,double[] y){ double ret = sxy(x,y)/Math.sqrt(sxx(x)*sxx(y)); return ret; } public static double[] linearRegression(double[] x,double[] y){ double[] ret = new double[2]; ret[0]=sxy(x,y)/sxx(x); ret[1]=mean(y)-ret[0]*mean(x); return ret; } public static String StringLinearRegression(double[] x, double[] y){ double[] tmp = linearRegression(x,y); String ret = "y="+tmp[0]+"*x+"+tmp[1]; return ret; } public static double ess(double[] x,double[] y){ double[] linreg=linearRegression(x,y); double ret =0; for(int i=0;i<y.length;i++){ ret+=(y[i]-(linreg[0]*x[i]+linreg[1]))*(y[i]-(linreg[0]*x[i]+linreg[1])); } return ret; } public static double ssr(double[] x,double[] y){ double[] linreg=linearRegression(x,y); double avy=mean(y); double ret=0; for(int i=0;i<y.length;i++){ ret+=(linreg[0]*x[i]+linreg[1]-avy)*(linreg[0]*x[i]+linreg[1]-avy); } return ret; } public static double s2xy(double[] x, double[] y){ double ret = ess(x,y)/(double)(x.length-2); return ret; } public static double fstat(double[] x,double[] y){ double ret = ssr(x,y)/s2xy(x,y); return ret; } public static double ftestLinReg(double[] x,double[] y){ double fstat=fstat(x,y); double[] df={1,(double)x.length-2}; org.apache.commons.math.distribution.FDistributionImpl fd= new org.apache.commons.math.distribution.FDistributionImpl(df[0],df[1]); double ret=0; try{ ret = 1-fd.cumulativeProbability(fstat); } catch (org.apache.commons.math.MathException ex) { } return ret; } }