このスクリプトは,データを切りの良い値に変換したり,並べ替えも行うなどを全てまかなっている。また,異常なデータファイルに対してエラーメッセージを出力するようにしている。
#!/usr/local/bin/gawk -f # 度数分布表 BEGIN { FS = "[, \t]+" if (ARGC != 2) { printf "使用法: freq.awk データファイル\n" > "/dev/stderr" error = 1 exit(1) } printf "★ データファイル: %s ★\n", ARGV[1] > "/dev/stderr" getline < ARGV[1] nv = NF for (i = 1; i <= nv; i++) { min_[i] = 1e33 max_[i] = -1e33 } } { if (NF != nv) { printf "NF = %i, nv = %i 1ケースあたりの変数の個数が違います!\n\n", NF, nv > "/dev/stderr" error = 1 exit(1) } for (i = 1; i <= nv; i++) { if ($i != 33e33) { min_[i] = min(min_[i], $i) max_[i] = max(max_[i], $i) } } } END { if (error) { exit(1) } nc = NR for (i = 1; i <= nv; i++) { width = round_cross(min_[i], max_[i], 8); if ((max_[i]-min_[i])/width > 11) { width = round_cross(min_[i], max_[i], 6); } nxr = 0 close(FILENAME) while ((getline < FILENAME) > 0) { x = floor($i/width)*width if (exist(xrin, nxr, x) == 0) { xrin[nxr++] = x } i1 = search(xrin, nxr, x) freq[i1]++ } exchange(freq, xrin, nxr) cum = 0 low = xrin[0] separate() printf "★ Frequency table of Var%02i\n\n", i printf "%15s%8s%10s%14s%14s\n", "class", "freq.", "rel.freq.", "cum.freq.", "cum.rel.freq." for (j = 0; j < nxr; j++) { while (abs(low-xrin[j]) > 1e-8) { printf "%#15.7g %6i %8.1f %10i %12.1f\n", low, 0, 0, cum, cum/nc*100 low += width } cum += freq[j] printf "%#15.7g %6i %8.1f %10i %12.1f\n", xrin[j], freq[j], freq[j]/nc*100, cum, cum/nc*100 low = xrin[j]+width delete freq[j] } printf "%15s %6i %8.1f\n", "total", nc, 100.0 } } function exchange(freq, xrin, nxr, i, k, minx, mink, temp) { for (i = 0; i < nxr-1; i++) { minx = xrin[i] mink = i for (k = i+1; < nxr; k++) { if (xrin[k] < minx) { minx = xrin[k] mink = k } } if (mink != i) { temp = xrin[i] xrin[i] = minx xrin[mink] = temp temp = freq[i] freq[i] = freq[mink] freq[mink] = temp } } } # function min and max function min(x, y) { return (x < y) ? x : y } function max(x, y) { return (x > y) ? x : y } # absolute value function abs(x) { if (x < 0.0) { return -x } else { return x } } function round_cross(minimum, maximum, nclass, width) { if (maximum <= minimum || (width = round1((maximum-minimum)/nclass)) <= 0.0) { return 1 } return width } function round1(x, i) { i = 0 if (x == 0) { return 0.0 } while (x > 1.0) { x /= 10.0 i++ } while (x <= 0.1) { x *= 10.0 i-- } if (x >= 0.5) { x = 0.5 } else if (x >= 0.25) { x = 0.25 } else if (x >= 0.2) { x = 0.2 } else { x = 0.1 } return x*(10^i) } function floor(x) { if (x > 0.0) return int(x) else if (int(x) == x) return x else return int(x)-1.0 } function search(x, n, y, i) { for (i = 0; i < n; i++) { if (x[i] == y) { return i } } return -1 } function exist(x, n, y, i) { return (search(x, n, y)) == -1 ? 0 : 1 } function separate(i) { printf "\n" for (i = 0; i < 79; i++) { printf "-" } printf "\n\n" }