線形回帰
先日エクセルで線形回帰、という記事を書いた(こちら)。
その記事からリンクを張ったエクセルの計算をjavaにやらせたソースがこちら
package StatUtilsX;
public class LinearRegression {
public int N;
public double[] df;
public double[] SumSq;
public double[] MeanSq;
public double f;
public double p;
public double rPearson;
public double[] eqcof;
/**
* @param args
*/
public static void main(String[] args) {
// TODO 自動生成されたメソッド・スタブ
int N=100;
double[] x= new double[N];
double[] y= new double[N];
for(int i=0;i<x.length;i++){
x[i]=Math.random();
y[i]=Math.random();
}
for(int i=0;i<x.length;i++){
System.out.println(x[i]+"\t"+y[i]);
}
double avx=mean(x);
double avy=mean(y);
System.out.println("avx\t"+avx+"\tavy\t"+avy);
double residx=sxx(x);
double residy=sxx(y);
System.out.println("residx\t"+residx+"\tresidy\t"+residy);
double vx=vx(x);
double vy=vx(y);
System.out.println("vx\t"+vx+"\tvy\t"+vy);
double sxy=sxy(x,y);
System.out.println("sxy\t"+sxy);
double rPearson=rPearson(x,y);
System.out.println("rPearson\t"+rPearson);
String st =StringLinearRegression(x,y);
System.out.println(st);
double tss=sxx(y);
double ess=ess(x,y);
double ssr=ssr(x,y);
System.out.println("tss\t"+tss+"\tess\t"+ess+"\tssr\t"+ssr);
double s2xy=s2xy(x,y);
double fstat=fstat(x,y);
System.out.println("s2xy\t"+s2xy+"\tfstat\t"+fstat);
double ftestp=ftestLinReg(x,y);
System.out.println("ftestP\t"+ftestp);
System.out.println("----------------------\n\n");
LinearRegression lr = new LinearRegression(x,y);
lr.PrintLinReg("\t","\n");
System.out.println("----------------------\n\n");
String log="log";
LinearRegression lrLog = new LinearRegression(x,y,log);
lrLog.PrintLinReg("\t","\n");
}
public LinearRegression(double[] x,double[] y){
N=x.length;
df=new double[3];
SumSq=new double[3];
MeanSq=new double[2];
eqcof=new double[2];
df[0]=1;df[1]=N-2;df[2]=N-1;
SumSq[0]=ssr(x,y);
SumSq[1]=ess(x,y);
SumSq[2]=sxx(y);
MeanSq[0]=SumSq[0];
MeanSq[1]=s2xy(x,y);
f=fstat(x,y);
p=ftestLinReg(x,y);
rPearson=rPearson(x,y);
eqcof=linearRegression(x,y);
}
public LinearRegression(double[] x,double[] y,String log){
/*
* yを対数変換する
*/
double[] ylog=new double[y.length];
for(int i=0;i<y.length;i++){
ylog[i]=Math.log(y[i]);
}
N=x.length;
df=new double[3];
SumSq=new double[3];
MeanSq=new double[2];
eqcof=new double[2];
df[0]=1;df[1]=N-2;df[2]=N-1;
SumSq[0]=ssr(x,ylog);
SumSq[1]=ess(x,ylog);
SumSq[2]=sxx(ylog);
MeanSq[0]=SumSq[0];
MeanSq[1]=s2xy(x,ylog);
f=fstat(x,ylog);
p=ftestLinReg(x,ylog);
rPearson=rPearson(x,ylog);
eqcof=linearRegression(x,ylog);
}
public void PrintLinReg(String sep1,String sep2){
String ret = StringLinReg(sep1,sep2);
System.out.println(ret);
}
public String StringLinReg(String sep1,String sep2){
String ret="Linear Regression Equation"+sep1;
ret += "y="+eqcof[0]+"*x+"+eqcof[1]+sep2;
ret += "Pearson's r"+sep1+rPearson+sep2;
ret += "Ftest"+sep2;
ret += "df"+sep1;
ret += df[0]+sep1+df[1]+sep1+df[2]+sep2;
ret += "SumSq"+sep1+SumSq[0]+sep1+SumSq[1]+sep1+SumSq[2]+sep2;
ret += "MeanSq"+sep1+MeanSq[0]+sep1+MeanSq[1]+sep2;
ret += "Fstat"+sep1 + f+sep2;
ret += "p"+sep1+p+sep2;
return ret;
}
public static double sum(double[] x){
double ret =0;
for(int i=0;i<x.length;i++){
ret+=x[i];
}
return ret;
}
public static double mean(double[] x){
double ret=0;
if(x.length==0){
ret = -9999;
}else{
ret=sum(x);
ret/=(double)x.length;
}
return ret;
}
public static double sxx(double[] x){
/*
* 平方和
*/
double avx=mean(x);
double ret=0;
for(int i=0;i<x.length;i++){
ret+=(x[i]-avx)*(x[i]-avx);
}
return ret;
}
public static double vx(double[] x){
/*
* 不偏分散
*/
double ret = sxx(x);
ret/=(double)(x.length-1);
return ret;
}
public static double sxy(double[] x,double[] y){
double avx=mean(x);
double avy=mean(y);
double ret =0;
for(int i=0;i<x.length;i++){
ret+=(x[i]-avx)*(y[i]-avy);
}
return ret;
}
public static double rPearson(double[] x,double[] y){
double ret = sxy(x,y)/Math.sqrt(sxx(x)*sxx(y));
return ret;
}
public static double[] linearRegression(double[] x,double[] y){
double[] ret = new double[2];
ret[0]=sxy(x,y)/sxx(x);
ret[1]=mean(y)-ret[0]*mean(x);
return ret;
}
public static String StringLinearRegression(double[] x, double[] y){
double[] tmp = linearRegression(x,y);
String ret = "y="+tmp[0]+"*x+"+tmp[1];
return ret;
}
public static double ess(double[] x,double[] y){
double[] linreg=linearRegression(x,y);
double ret =0;
for(int i=0;i<y.length;i++){
ret+=(y[i]-(linreg[0]*x[i]+linreg[1]))*(y[i]-(linreg[0]*x[i]+linreg[1]));
}
return ret;
}
public static double ssr(double[] x,double[] y){
double[] linreg=linearRegression(x,y);
double avy=mean(y);
double ret=0;
for(int i=0;i<y.length;i++){
ret+=(linreg[0]*x[i]+linreg[1]-avy)*(linreg[0]*x[i]+linreg[1]-avy);
}
return ret;
}
public static double s2xy(double[] x, double[] y){
double ret = ess(x,y)/(double)(x.length-2);
return ret;
}
public static double fstat(double[] x,double[] y){
double ret = ssr(x,y)/s2xy(x,y);
return ret;
}
public static double ftestLinReg(double[] x,double[] y){
double fstat=fstat(x,y);
double[] df={1,(double)x.length-2};
org.apache.commons.math.distribution.FDistributionImpl fd=
new org.apache.commons.math.distribution.FDistributionImpl(df[0],df[1]);
double ret=0;
try{
ret = 1-fd.cumulativeProbability(fstat);
} catch (org.apache.commons.math.MathException ex) {
}
return ret;
}
}