R -- 一元配置分散分析（exact test）

一元配置分散分析（exact test）　　　　　Last modified: Nov 15, 2011

目的

　正確な P 値を計算する一元配置分散分析（正確確率検定；並べ替え検定 permutation test）である。データによっては計算量が多くなり実用的な時間内で計算が終了できないこともあるので，そのような場合にはモンテカルロ法による近似計算もできる。
　なお，一元配置分散分析は，各群の分散が等しいことを仮定しないものである（ウェルチの方法）。

使用法

exact.onway.test(x, exact=TRUE, hybrid=FALSE, loop=10000)

引数

x      各群のデータをベクトル，それらをリスト要素とするリスト（使用例参照）
exact  正確な P 値を求める場合，またはシミュレーションにより近似的な P 値を求めるときには TRUE（デフォルト）
       FALSE の場合には標本に対して一元配置分散分析のみを行う
hybrid TRUE を指定すれば，シミュレーションにより近似的な P 値を計算する
loop   シミュレーションの回数

ソース

インストールは，以下の 1 行をコピーし，R コンソールにペーストする
source("http://aoki2.si.gunma-u.ac.jp/R/src/exact-oneway-test.R", encoding="euc-jp")

# 一元配置分散分析（並べ替え検定）
exact.oneway.test <- function(       x,                                              # リスト
                                permutation=TRUE,                               # 並べ替え検定を行うかどうか
                                hybrid=FALSE,                                   # TRUE にすれば，シミュレーションによる
                                loop=10000)                                     # シミュレーションの回数
{

        printf <- function(fmt, ...)                                         # 書式付き出力
        {
                cat(sprintf(fmt, ...))
        }

        found <- function()                                                  # 周辺度数が同じ分割表が見つかった
        {
                hh <- perform.test(um)
                if (hh <= p.value+EPSILON) {
                        nprod <- sum(perm_table[rt+1])-sum(perm_table[um+1])
                        nntrue <<- nntrue+exp(nprod-nntrue2*log_expmax)
                        while (nntrue >= EXPMAX) {
                                nntrue <<- nntrue/EXPMAX
                                nntrue2 <<- nntrue2+1
                        }
                }
                ntables <<- ntables+1
        }

        search <- function(x, y)                                             # 分割表の探索
        {
                if (y == 1) {                                                   # 見つかった
                        found()
                }
                else if (x == 1) {
                        if ((t <- um[1, 1]-um[y, 1]) >= 0) {
                                um[1, 1] <<- t
                                search(nc, y-1)
                                um[1, 1] <<- um[1, 1]+um[y, 1]
                        }
                }
                else {
                        search(x-1, y)
                        while (um[y, 1] && um[1, x]) {
                                um[y, x] <<- um[y, x]+1
                                um[y, 1] <<- um[y, 1]-1
                                um[1, x] <<- um[1, x]-1
                                search(x-1, y)
                        }
                        um[y, 1] <<- um[y, 1]+um[y, x]
                        um[1, x] <<- um[1, x]+um[y, x]
                        um[y, x] <<- 0
                }
        }

        permutation.test <- function()                                               # 並べ替え検定
        {
                denom2 <- 0
                denom <- perm_table[n+1]-sum(perm_table[ct+1])
                while (denom > log_expmax) {
                        denom <- denom-log_expmax
                        denom2 <- denom2+1
                }
                denom <- exp(denom)
                um[,1] <<- rt
                um[1,] <<- ct
                search(nc, nr)
                p.value <- nntrue/denom*EXPMAX^(nntrue2-denom2)
                printf("並べ替え検定による P 値 = %.10g\n", p.value)
                printf("査察した分割表の個数は %s 個\n", ntables)
                return(p.value)
        }

        perform.test <- function(u)                                          # 並べ替え検定
        {
                x <- NULL
                for (i in 1:nr) {
                        x <- c(x, rep(score, u[i,]))
                }
                return(oneway.test(x~rep(1:nr, rt))$p.value)
        }
        
        monte.carlo <- function()                                            # モンテカルロ検定
        {
                p.value <- mean(sapply(r2dtable(loop, rt, ct), perform.test) <= p.value+EPSILON)
                printf("%i 回のシミュレーションによる P 値 = %g\n", loop, p.value)
                return(p.value)
        }

        simple.oneway.test <- function()
        {
                printf("観察値による一元配置分散分析の P 値 = %g\n", p.value)
        }

###### 関数本体

        if (is.list(x)) {
                y <- factor(rep(1:length(x), sapply(x, length)))
                t <- table(y, unlist(x))
        }
        else {
                stop("群ごとのデータはリストで与えること")
        }

        EPSILON <- 1e-10
        EXPMAX <- 1e100      
        log_expmax <- log(EXPMAX)
        nr <- nrow(t)                                                                # 分割表の行数
        nc <- ncol(t)                                                                # 分割表の列数
        rt <- rowSums(t)                                                     # 分割表の行和
        ct <- colSums(t)                                                     # 分割表の列和
        n <- sum(t)                                                          # 総和
        q <- cumsum(c(0,ct[-nc]))+(ct+1)*0.5
        half <- (n+1)*0.5
        score <- as.numeric(colnames(t))
        p.value <- perform.test(t)                                           # 観察値による検定
        simple.oneway.test()                                                            # 検定結果を出力
        if (permutation) {
                if (hybrid) {                                                   # モンテカルロ法による検定
                        p.value <- monte.carlo()
                }
                else {                                                          # 並べ替え検定
                        perm_table <- cumsum(c(0, log(1:(n+1))))
                        ntables <- nntrue <- nntrue2 <- 0
                        um <- matrix(0, nr, nc)
                        p.value <- permutation.test()
                }
        }
        invisible(p.value)
}


以下のプログラムは短いが時間がかかりすぎる。（注を参照）
インストールは，以下の 1 行をコピーし，R コンソールにペーストする
source("http://aoki2.si.gunma-u.ac.jp/R/src/permutation-oneway-test.R", encoding="euc-jp")

# usage: permutation.oneway.test(list(c(36.7, 52.4, 65.8), c(45.7, 61.9, 65.3), c(52.6, 76.6, 81.3)))
permutation.oneway.test <- function(x)
{
        n <- sapply(x, length)
        g <- factor(rep(1:length(x), n))
        z <- unlist(x)
        library(e1071)
        perm <- permutations(sum(n))
        p0 <- oneway.test(z~g)$p.value
        ps <- apply(perm, 1, function(i) oneway.test(z[i]~g)$p.value)
        invisible(list(p0=p0, permutation.p=mean(ps < p0+1e-10), ps=ps))
}


使用例

exact.oneway.test(list(c(36.7, 52.4, 65.8), c(45.7, 61.9, 65.3), c(52.6, 76.6, 81.3)))

出力結果例

観察値による一元配置分散分析の P 値 = 0.440037
並べ替え検定による P 値 = 0.4107142857
査察した分割表の個数は 1680 個

n個の中に，p, q, r, ...個同じものがあるとき（n=p+q+r+…），これらを並べてできる順列のリストは，e1071 の permutations が速いので，以下のようにしてやれば，一応はできる。ただし，permutations の戻り値を求める順列に変換することと，重複を除くことに時間がかかるので，ベストではない。

comb2 <- function(n.i)
{
	n <- sum(n.i)
	x <- rep(1:length(n.i), n.i)
	library(e1071)
	a <- permutations(n)
	b <- unique(data.frame(t(apply(a, 1, function(i) x[i]))))
	rownames(b) <- 1:nrow(b)
	return(b)
}

これを実行すると以下のような結果が得られる。

> ans <- comb2(c(3,1,2)) # 順列の個数は (3+1+2)! / 3! / 1! / 2! = 60
> ans
   X1 X2 X3 X4 X5 X6
1   1  1  1  2  3  3
2   1  1  2  1  3  3
3   1  2  1  1  3  3
    途中省略
57  3  3  1  1  1  2
58  3  3  1  1  2  1
59  3  3  1  2  1  1
60  3  3  2  1  1  1

直前のページへ戻る　　

E-mail to Shigenobu AOKI