R -- 一元配置分散分析と多重比較

一元配置分散分析と多重比較　　　　　Last modified: Feb 07, 2025
目的

生データに対して，等分散性の検定，一元配置分散分析と多重比較を行う all in one 関数。

使用法

oneway.ANOVA(x, g, verbose=TRUE)

引数

x       分析対象となる観察値のベクトル
g       観察値がどの群に属するかを表すベクトル
verbose デフォルトでは結果をプリントする

ソース

インストールは，以下の 1 行をコピーし，R コンソールにペーストする
source("http://aoki2.si.gunma-u.ac.jp/R/src/oneway-ANOVA.R", encoding="euc-jp")

oneway.ANOVA <- function(x, g, verbose=TRUE) { # x: データベクトル，g: 群変数ベクトル
# 基礎統計量
  d <- data.frame(x, g, stringsAsFactors = TRUE)
  level <- levels(d$g)
  k <- length(level)
  n <- aggregate(x ~ g, d, length)[,2]
  N <- sum(n)
  Mean <- aggregate(x ~ g, d, mean)[,2]
  Var <- aggregate(x ~ g, d, var)[,2]
  SD <- aggregate(x ~ g, d, sd)[,2]
  SE <- SD/sqrt(n)
  result <- data.frame(水準=level, n, Mean,
                       SD, "Mean-SD"=Mean-SD, "Mean+SD"=Mean+SD,
                       SE, "Mean-SE"=Mean-SE, "Mean+SE"=Mean+SE, check.names=FALSE)
# デザインプロット
  if (verbose) {
    x.pos <- seq_along(level)
    plot(x.pos, Mean, xlim=c(0.5, k+0.5), ylim=range(Mean-SD, Mean+SD), pch=16,
         xlab="", xaxt="n", ylab="")
    axis(1, at=x.pos, labels=level)
    arrows(x.pos, Mean-SD, x.pos, Mean+SD, length=0.10, angle=90, code=3, lty=3)
    arrows(x.pos, Mean-SE, x.pos, Mean+SE, length=0.05, angle=90, code=3, lwd=2)
  }
# 等分散性の検定
  r.bartlett <- bartlett.test(d$x, d$g)
  r.Levene <- Levene.test(d$x, d$g)
  r.eqvar <- data.frame(手法=c("Bartlett", "Levene"), 分布=c("chi square", "F"),
               統計量=c(r.bartlett$statistic, r.Levene$statistic),
               自由度=c(r.bartlett$parameter, 
                        sprintf("(%i, %i)", r.Levene$parameter[1], r.Levene$parameter[2])),
               "P 値"=c(r.bartlett$p.value, r.Levene$p.value))
# 分散分析（等分散性を仮定する）
  r.oneway <- oneway.test(x ~ g, d, var.equal=TRUE)
  gm <- mean(d$x)
  St <- sum((d$x-gm)^2)
  Sb <- sum(n*(Mean-gm)^2)
  Sw <- St-Sb
  SS <- c(Sb, Sw, St)
  df <- c(k-1, N-k, N-1)
  MS <- SS/df
  F <- c(MS[1]/MS[2], NA, NA)
  p <- c(pf(F[1], df[1], df[2], lower.tail=FALSE), NA, NA)
  MS[3] <- NA
  r.ANOVA <- data.frame(要因=c("級間", "級内", "全体"), 平方和=c(Sb, Sw, St),
                        自由度=df, 平均平方=MS,  "F 値"=F, "P 値"=p, check.names=FALSE)
# 分散分析（等分散性を仮定しない）
  r.Welch <- oneway.test(x ~ g, d)
  r.BF <- Brown.Forsythe.test(x, g)
  r.ANOVA2 <- data.frame(手法=c("Welch", "Brown-Forsythe"),
                         "F 値"=c(r.Welch$statistic, r.BF$statistic),
                         "第 1 自由度"=c(k-1, r.BF$parameter[1]),
                         "第 2 自由度"=c(r.Welch$parameter[2], r.BF$parameter[2]),
                         "P 値"=c(r.Welch$p.value, r.BF$p.value), check.names=FALSE)
# 多重比較
  r.tukey <- tukey(d$x, d$g)$Tukey
  lc <- t(sapply(rownames(r.tukey), function(s) as.integer(unlist(strsplit(s, ":")))))
  diff <- Mean[lc[,2]]-Mean[lc[,1]]
  r.GH <- tukey(d$x, d$g, method="G")$Games.Howell
  r.multi <- data.frame(群1=level[lc[,1]], 群2=level[lc[,2]],
                        平均1=Mean[lc[,1]], 平均2=Mean[lc[,2]], 差=diff,
                        差の標準誤差=diff/r.tukey[,1], "t 値(Tukey)"=r.tukey[,1],
                        "P 値"=r.tukey[,2], "t 値(Games-Howell)"=r.GH[,1],
                        "自由度"=r.GH[,2], "P 値"=r.GH[,3], check.names=FALSE)
  if (verbose) {
    cat("\n基礎統計量\n\n")
    print(result, row.names=FALSE)
    cat("\n等分散性の検定\n\n")
    print(r.eqvar, row.names=FALSE)
    cat("\n分散分析（等分散性を仮定する）\n\n")
    print(r.ANOVA, row.names=FALSE)
    cat("\n分散分析（等分散性を仮定しない）\n\n")
    print(r.ANOVA2, row.names=FALSE)
    cat("\n多重比較\n\n")
    print(r.multi, row.names=FALSE)
  }
  invisible(list(result, r.eqvar, r.ANOVA, r.ANOVA2, r.multi))
}


他に，以下の 3 つの関数も必要

インストールは，以下の 1 行をコピーし，R コンソールにペーストする
source("http://aoki2.si.gunma-u.ac.jp/R/src/Brown-Forsythe-test.R", encoding="euc-jp")

Brown.Forsythe.test <- function(x, group) {  # x: データベクトル，group: 群変数ベクトル
  data.name <- paste(deparse(substitute(x)), "~", deparse(substitute(group)))
  OK <- complete.cases(x, group)                             # 欠損値を持つケースを除く
  x <- x[OK]
  group <- as.factor(group[OK])
  group <- group[, drop=TRUE]
  d <- split(x, group)
  df1 <- length(d)-1
  ni <- sapply(d, length)
  mi <- sapply(d, mean)
  ui <- sapply(d, var)
  ci <- (1-ni/sum(ni))*ui
  F.BF <- sum(ni*(mi-mean(x))^2)/sum(ci)
  C <- ci/sum(ci)
  df2 <- 1/sum(C^2/(ni-1))
  p <- pf(F.BF, df1, df2, lower.tail=FALSE)
  method <- "Brown-Forsythe 検定"
  return(structure(list(statistic=c(F=F.BF), parameter=c("df1"=df1, "df2"=df2), "p.value"=p, method=method, data.name=data.name), class="htest"))
}

インストールは，以下の 1 行をコピーし，R コンソールにペーストする
source("http://aoki2.si.gunma-u.ac.jp/R/src/levene-test.R", encoding="euc-jp")

# 等分散性の検定
Levene.test <- function(x,                                   # データベクトル
                        group,                                  # 群変数ベクトル
                        method = c("mean", "median"))           # 検定統計量の計算方法
{
        data.name <- paste(deparse(substitute(x)), "~", deparse(substitute(group)))
        OK <- complete.cases(x, group)                               # 欠損値を持つケースを除く
        x <- x[OK]
        fac <- as.factor(group[OK])
        fac <- fac[, drop=TRUE]
        n <- length(x)                                               # 全体のデータ個数
        n.i <- tapply(x, fac, length)                                # 各群のデータ個数
        k <- length(n.i)                                     # 群の数
        method <- match.arg(method)                          # 引数の補完
        x <- abs(x-tapply(x, fac, method)[fac])                      # 測定値からそのデータが属する群の平均値（または中央値）を差し引く
        sw <- sum((n.i-1)*tapply(x, fac, var))                       # 群内変動
        dfw <- n-k                                           # 群内変動の自由度
        dfb <- k-1                                           # 群間変動の自由度
        f <- ((var(x)*(n-1)-sw)/dfb)/(sw/dfw)                        # 検定統計量
        P <- pf(f, dfb, dfw, lower.tail=FALSE)                       # P 値
        method <- paste("等分散性の検定（", method, " で調整）", sep="")
        return(structure(list(statistic=c(F=f), parameter=c("df b"=dfb, "df w"=dfw), p.value=P,
                method=method, data.name=data.name), class="htest"))
}

インストールは，以下の 1 行をコピーし，R コンソールにペーストする
source("http://aoki2.si.gunma-u.ac.jp/R/src/tukey.R", encoding="euc-jp")

# Tukey の方法による多重比較
# Games-Howell の方法も選択できるように拡張 2009/08/03
tukey <- function(   data,                                   # 観察値ベクトル
                        group,                                  # 群変数ベクトル
                        method=c("Tukey", "Games-Howell"))      # 手法の選択
{
        OK <- complete.cases(data, group)                    # 欠損値を持つケースを除く
        data <- data[OK]
        group <- factor(group[OK])
        n <- tapply(data, group, length)                     # 各群のケース数
        a <- length(n)                                               # 群の数
        phi.e <- sum(n)-a                                    # 誤差分散（群内不偏分散）の自由度
        Mean <- tapply(data, group, mean)                    # 各群の平均値
        Variance <- tapply(data, group, var)                 # 各群の不偏分散
        result1 <- cbind(n, Mean, Variance)                  # 各群の統計量
        rownames(result1) <- paste("Group", 1:a, sep="")
        method <- match.arg(method)
        if (method == "Tukey") {                                # Tukey の方法
                v.e <- sum((n-1)*Variance)/phi.e             # 誤差分散（群内不偏分散）
                t <- combn(a, 2, function(ij)                        # 対比較
                                        abs(diff(Mean[ij]))/sqrt(v.e*sum(1/n[ij])) )
                p <- ptukey(t*sqrt(2), a, phi.e, lower.tail=FALSE)   # 有意確率を計算する
                Tukey <- cbind(t, p)                                 # 対比較の結果
                rownames(Tukey) <- combn(a, 2, paste, collapse=":")
                return(list(result1=result1, Tukey=Tukey, phi=phi.e, v=v.e))
        }
        else {                                                  # Games-Howell の方法
                t.df <- combn(a, 2, function(ij) {           # 対比較
                                        t <- abs(diff(Mean[ij]))/sqrt(sum(Variance[ij]/n[ij]))
                                        df <- sum(Variance[ij]/n[ij])^2/sum((Variance[ij]/n[ij])^2/(n[ij]-1))
                                        return(c(t, df))} )
                t <- t.df[1,]
                df <- t.df[2,]
                p <- ptukey(t*sqrt(2), a, df, lower.tail=FALSE)      # 有意確率を計算する
                Games.Howell <- cbind(t, df, p)                      # 対比較の結果
                rownames(Games.Howell) <- combn(a, 2, paste, collapse=":")
                return(list(result1=result1, Games.Howell=Games.Howell))
        }
}


使用例

> x <- c(
  205, 206, 164, 190, 194, 203,
  201, 221, 197, 185,
  248, 265, 197, 220, 212, 281,
  202, 276, 237, 254, 230
  )
> g <- rep(paste("A", 1:4, sep=""), c(6, 4, 6, 5))
> oneway.ANOVA(x, g)

基礎統計量

 水準 n     Mean       SD  Mean-SD  Mean+SD        SE  Mean-SE  Mean+SE
   A1 6 193.6667 15.88290 177.7838 209.5496  6.484169 187.1825 200.1508
   A2 4 201.0000 14.96663 186.0334 215.9666  7.483315 193.5167 208.4833
   A3 6 237.1667 32.72563 204.4410 269.8923 13.360181 223.8065 250.5268
   A4 5 239.8000 27.58985 212.2101 267.3899 12.338557 227.4614 252.1386

等分散性の検定

     手法       分布  統計量  自由度      P.値
 Bartlett chi square 3.31855       3 0.3450692
   Levene          F 2.38566 (3, 17) 0.1050173

分散分析（等分散性を仮定する）

 要因    平方和 自由度  平均平方     F 値       P 値
 級間  9284.271      3 3094.7571 5.091555 0.01072138
 級内 10332.967     17  607.8216       NA         NA
 全体 19617.238     20        NA       NA         NA

分散分析（等分散性を仮定しない）

           手法     F 値 第 1 自由度 第 2 自由度       P 値
          Welch 4.927451           3    8.860454 0.02766496
 Brown-Forsythe 5.440656           3   13.322350 0.01163382

多重比較

 群1 群2    平均1    平均2        差 差の標準誤差 t 値(Tukey)       P 値 t 値(Games-Howell)
  A1  A2 193.6667 201.0000  7.333333     15.91411   0.4608069 0.96656131          0.7406106
  A1  A3 193.6667 237.1667 43.500000     14.23402   3.0560597 0.03276031          2.9291841
  A1  A4 193.6667 239.8000 46.133333     14.92876   3.0902319 0.03058705          3.3097548
  A2  A3 201.0000 237.1667 36.166667     15.91411   2.2726160 0.14395624          2.3617950
  A2  A4 201.0000 239.8000 38.800000     16.53843   2.3460508 0.12655164          2.6887452
  A3  A4 237.1667 239.8000  2.633333     14.92876   0.1763933 0.99797118          0.1447992
   自由度       P 値
 6.871719 0.87779880
 7.231682 0.07993964
 6.139828 0.05810207
 7.413345 0.16830990
 6.340133 0.12046051
 8.990836 0.99883370
直前のページへ戻る　　
E-mail to Shigenobu AOKI