2013-11-23

ベクトル場での共役事前分布

こちらで点クラウドデータからグラフ化を介して交通量的な評価をして「重要なところ」というものを定量する話を書いた
「重要なところ」は「交通に関する情報量が多いところ」でもある
「情報量が多いところ」では、そこの「交通」についての推定精度が高いことにもつながる
さて、ある空間上の点に「方向情報」が観察されたとする。その情報は観察件数なので、「離散的」。それを統合して、その点における「背景にある真の(母の？)方向に関する確率密度分布」を推定したいとする
２項観察からベータ分布、多項観察からディリクレ分布を推定することがあるが、これは共役事前分布でつながった分布間の関係
では、方向情報から方向母分布へと対応付けるのは、方向母分布に方向情報の尤度で関係づけられた何かを対応付けることだろう
それってどうする？
方向統計学の範疇(こちら)？
それとも、多項を正単体の頂点に対応付けられるようなことを使って、ユークリッドなままで幾何的に扱うことでできる？？？
とはいえ、少し簡単に考えて、方向観察を空間の点(単位ベクトル)としておいて(始点と終点の両方)、それに対してcor()をかけて固有値を取ると、全方位に均一なら、固有値が次元数に均等に分散され、特定の方向に集中していれば、そういうことになる
この固有値法だと「分岐点」という情報を落としてしまうのだが、まあ、何もないよりはまし…

library(geometry)
library(igraph)
library(rgl)
# 点集合
# ドロネーグラフ
# 全ての点ペアの最短経路
# 点とエッジに交通量の重みを計算
# 「舗装」効率
# 「舗装」「未舗装」で交通量は変わらないままで「交通コストのみ」が変わると考える# もしくは「舗装」されるとそこの交通量が増えて交通量の重みも変わると考えるかもしれない
# 実は最小全域木はこの「舗装」の効果を無限大にしたものらしい
my.traffic <- function(X){
	if(!is.matrix(X)){
		X <- matrix(X,ncol=1)
	}
	n.pt <- length(X[,1])
	d <- length(X[1,])
	delaunay.X <- delaunayn(X)
	n.v <- length(delaunay.X[1,])
	n.tri <- length(delaunay.X[,1])
	tmp.n <- n.v*(n.v-1)/2
	tmp.ed <- matrix(0,tmp.n*n.tri,2)
	cnt <- 1
	for(i in 1:(n.v-1)){
		for(j in (i+1):n.v){
			tmp.ed[cnt:(cnt+n.tri-1),] <- cbind(delaunay.X[,i],delaunay.X[,j])
			cnt <- cnt+n.tri
		}
	}
	tmp.ed <- t(apply(tmp.ed,1,sort))

	ed <- as.matrix(unique(as.data.frame(tmp.ed)))
	g <- graph.edgelist(ed,directed =FALSE)
	deg <- degree(g)
	adj <- get.adjlist(g,mode="all")
	m <- list()
	for(i in 1:length(adj)){
		m[[i]] <- matrix(0,length(adj[[i]]),length(adj[[i]]))
	}
	n.e <- length(ed[,1])
	e.weight <- sqrt(apply((X[ed[,1],]-X[ed[,2],])^2,1,sum))
	path.cnt <- rep(0,n.e)
	path.cnt.v <- rep(0,n.pt)
	for(i in 1:n.pt){
		tmp <- get.shortest.paths(g,i,mode="all",output="epath",weight=e.weight)
		tmp.tab <- tabulate(unlist(tmp),n.e)
		path.cnt <- path.cnt+tmp.tab
		tmp.v <- get.shortest.paths(g,i,mode="all",output="vpath",weight=e.weight)
		tmp.tab <- tabulate(unlist(tmp.v),n.pt)
		#print(tmp.tab)
		path.cnt.v <- path.cnt.v + tmp.tab
		for(j2 in 1:length(tmp.v)){
			tmp2 <- tmp.v[[j2]]
			if(length(tmp2)>2){
				for(j in 1:(length(tmp2)-2)){
					target <- tmp2[j+1]
					pre <- tmp2[j]
					post <- tmp2[j+2]
					pre.id <- which(adj[[target]]==pre)
					post.id <- which(adj[[target]]==post)
					m[[target]][pre.id,post.id] <- m[[target]][pre.id,post.id] + 1
				}
			}
		}
	}
	eigen.out <- list()
	for(i in 1:n.pt){
		tmp.X <- matrix(0,0,length(X[1,]))
		for(j1 in 1:(length(m[[i]][,1])-1)){
			for(j2 in (j1+1):length(m[[i]][1,])){
				pre <- adj[[i]][j1]
				post <- adj[[i]][j2]
				tmp.tmp <- X[c(rep(pre,m[[i]][j1,j2]),rep(post,m[[i]][j1,j2])),]-X[c(rep(post,m[[i]][j1,j2]),rep(pre,m[[i]][j1,j2])),]
				tmp.X <- rbind(tmp.X,tmp.tmp)
			}
		}
		if(length(tmp.X[,1])>0){
			eigen.out[[i]] <- eigen(cor(tmp.X))
		}
		
	}
	return(list(X=X,ed.list=ed,path.cnt.e=path.cnt,path.cnt.v=path.cnt.v,e.len=e.weight,deg=deg,adj=adj,m=m,eigen.out=eigen.out))
}
my.traffic.mst <- function(X){
	if(!is.matrix(X)){
		X <- matrix(X,ncol=1)
	}
	n.pt <- length(X[,1])
	d <- length(X[1,])
	delaunay.X <- delaunayn(X)
	n.v <- length(delaunay.X[1,])
	n.tri <- length(delaunay.X[,1])
	tmp.n <- n.v*(n.v-1)/2
	tmp.ed <- matrix(0,tmp.n*n.tri,2)
	cnt <- 1
	for(i in 1:(n.v-1)){
		for(j in (i+1):n.v){
			tmp.ed[cnt:(cnt+n.tri-1),] <- cbind(delaunay.X[,i],delaunay.X[,j])
			cnt <- cnt+n.tri
		}
	}
	ed <- as.matrix(unique(as.data.frame(tmp.ed)))
	g <- graph.edgelist(ed,directed =FALSE)
	n.e <- length(ed[,1])
	e.weight <- sqrt(apply((X[ed[,1],]-X[ed[,2],])^2,1,sum))
	path.cnt <- rep(0,n.e)
	path.cnt.v <- rep(0,n.pt)
	for(i in 1:n.pt){
		tmp <- get.shortest.paths(g,i,mode="all",output="epath",weight=e.weight)
		tmp.tab <- tabulate(unlist(tmp),n.e)
		path.cnt <- path.cnt+tmp.tab
		tmp.v <- get.shortest.paths(g,i,mode="all",output="vpath",weight=e.weight)
		tmp.tab <- tabulate(unlist(tmp.v),n)
		path.cnt.v <- path.cnt.v + tmp.tab
	}
	w.hx <- matrix(0,n.e+1,n.e)
	w.hx[1,] <- e.weight
	ord <- rep(0,n.e)
	for(ii in 1:n.e){
		tmp.cnt <- rep(0,n.e)
		for(i in 1:n.pt){
			tmp <- get.shortest.paths(g,i,mode="all",output="epath",weight=w.hx[ii,])
			tmp.tab <- tabulate(unlist(tmp),n.e)
			tmp.cnt <- tmp.cnt+tmp.tab
		}
		#print(tmp.cnt)
		non.zero <- which(w.hx[ii,]!=0)
		e.weight.per.len <- rep(-1,n.e)
		e.weight.per.len[non.zero] <- tmp.cnt[non.zero]/w.hx[ii,non.zero]
		selected <- which(e.weight.per.len==max(e.weight.per.len))[1]
		ord[ii] <- selected
		w.hx[ii+1,]<-w.hx[ii,]
		w.hx[ii+1,selected] <-0

	}

	return(list(X=X,ed.list=ed,path.cnt.e=path.cnt,path.cnt.v=path.cnt.v,e.len=e.weight,select.hx=ord,w.hx=w.hx))
}
X <- matrix(rnorm(10),ncol=2)
n <- 100
t <- rnorm(n)
t <- t/(max(abs(t))) * pi*4
x <- cos(t)
#y <- sin(t)
y <- t^2
x. <- x + rnorm(n,0,0.05)
y. <- y + rnorm(n,0,0.05)
plot(x.,y.)

X <- cbind(x.,y.)
X <- matrix(rnorm(100),ncol=2)

traffic.out <- my.traffic(X)