- マハラノビス距離のウィキはこちら
- NxM分割表を自由度次元空間に配置する話しがあって、その空間における分割表座標と独立仮説下期待度数表の座標との間のマハラノビス距離がピアソンのカイ自乗統計量である話
> N<-4;M<-6
> T<-matrix(runif(N*M),N,M)
> chisq.test(T)$statistic
X-squared
2.337295
警告メッセージ:
In chisq.test(T) : カイ自乗近似は不正確かもしれません
> Sp<-RegularSpherize(T)
> mahalanobis(Sp$P,center=rep(0,length(Sp$P)),cov=Sp$V,inverted=TRUE)
[1] 2.337295
> RegularSpherize<-
function (O = matrix(round(runif(2 * 3) * 100, 0), 2, 3))
{
EandM <- tableExpAndMarginals(O)
N <- length(O[, 1])
M <- length(O[1, ])
E <- EandM$etable
D <- EandM$dtable
K <- sum(D^2/E)
X <- CategoryVector2D(N, M)
P <- dfCoordinate(D)
V <- EllipseChiMatrix(E)
Xe <- X/c(t(sqrt(E)))
EigenOut <- eigen(V)
U <- EigenOut$vectors
Ui <- solve(U)
Pi <- Ui %*% P
Mi <- diag(sqrt(EigenOut$values))
if (length(EigenOut$values) == 1) {
Mi <- matrix(1, 1, 1)
}
Pii <- Mi %*% Pi
list(O = O, E = E, D = D, K = K, X = X, V = V, Xe = Xe, P = P,
EigenOut = EigenOut, Ui = Ui, Mi = Mi, Pi = Pi, Pii = Pii)
}
> tableExpAndMarginals<-
function (O = matrix(round(runif(N * M) * 100, 0), N, M))
{
mrow <- apply(O, 1, sum)
mcol <- apply(O, 2, sum)
total <- sum(mrow)
etable <- outer(mrow, mcol, FUN = "*")/total
list(mrow = mrow, mcol = mcol, total = total, etable = etable,
dtable = O - etable, df = (length(mrow) - 1) * (length(mcol) -
1))
}
> CategoryVector2D<-
function (N = 3, M = 4)
{
cvN <- CategoryVector(N)
cvM <- CategoryVector(M)
m <- matrix(0, N * M, (N - 1) * (M - 1))
counter1 <- 1
counter2 <- 1
for (i in 1:N) {
for (j in 1:M) {
counter2 <- 1
for (k in 1:(N - 1)) {
for (l in 1:(M - 1)) {
m[counter1, counter2] <- cvN[i, k] * cvM[j,
l]
counter2 <- counter2 + 1
}
}
counter1 <- counter1 + 1
}
}
m
}
CategoryVector<-
function (nc = 3)
{
df <- nc - 1
d <- df + 1
diagval <- 1:d
diagval <- sqrt((df + 1)/df) * sqrt((df - diagval + 1)/(df -
diagval + 2))
others <- -diagval/(df - (0:(d - 1)))
m <- matrix(rep(others, df + 1), nrow = df + 1, byrow = TRUE)
diag(m) <- diagval
m[upper.tri(m)] <- 0
as.matrix(m[, 1:df])
}
dfCoordinate<-
function (D, Obs = FALSE)
{
if (Obs) {
D <- tableExpAndMarginals(D)$dtable
}
X <- CategoryVector2D(length(D[, 1]), length(D[1, ]))
(length(D[, 1]) - 1) * (length(D[1, ]) - 1)/(length(D[, 1]) *
length(D[1, ])) * c(t(X) %*% c(t(D)))
}
EllipseChiMatrix<-
function (E, K = 1, Obs = TRUE)
{
if (Obs) {
E <- tableExpAndMarginals(E)$etable
}
X <- CategoryVector2D(length(E[, 1]), length(E[1, ]))
t(X) %*% diag(1/(c(t(E) * K))) %*% X
}