R을 이용한 통계 프로그래밍 기초 제6장 이변량 데이터

투정이 2009. 3. 20. 14:40

2009. 3. 20. 14:40

프로그램 소스

# R을 이용한 통계 프로그래밍 기초

# 6장 이변량 데이터

# 6.1 범주형 데이터의 이원분할표

x = matrix(c(54,3,7,12), nrow = 2) # 행렬 만들기

rbind(c(54,7), c(3,12)) # 행으로 붙이기

cbind(c(54,3), c(7,12)) # 열로 붙이기

rownames(x) = c("p.buckled", "p.unbuckled")
colnames(x) = c("c.buckled", "c.unbuckled")

margin.table(x, 1) # 1 = 행을 기준으로 주변 분포
margin.table(x, 2) # 2 = 열을 기준으로 주변 분포

rowSums(x) # 행의 합을 구하기
colSums(x) # 열의 합을 구하기

addmargins(x) # 원래 행렬에 주변분포를 포함하기

prop.table(x) # 도수에 대한 비율 구하기

jpeg("06_01.jpg")
op = par(mfrow = c(1,2))
barplot(x, main = "child seat-belt usage") # 막대그래프
barplot(x, main = "child seat-belt usage", legend = TRUE, beside = TRUE) # 막대그래프
par(op)
dev.off()

# 예6.1
nico = read.table(file = "table_6_2.txt", header = TRUE) # 자료 읽기

y = table(nico$nicotin, nico$stopsmoke) # 빈도수 구하기
prop.table(y) # 비율 구하기

# 6.2 상관계수

# 예 6.2

blood <- read.table('table_6_3.csv', header = TRUE, sep = ',')
attach(blood)
rowMeans(blood[,2:3]) # 행의 평균 구하기
colMeans(blood[,2:3]) # 열의 평균 구하기
cor(machine, expert) # machine과 expert의 Pearson 상관계수 구하기

jpeg("06_02.jpg")
plot(machine, expert, main = 'blood pressure measurement')
dev.off()

jpeg("06_03.jpg")
op = par(mfrow=c(1,2)) # 1행 2열의 그래프
boxplot(machine, expert, names = c('machine','expert')) # machine과 expert의 boxplot
plot(density(machine), ylim = c(0,0.04), main = 'density plot')
lines(density(expert), lty = 2)
par(op) # 그래프 끝
dev.off()

cor(machine, expert, method = 'spearman') # Spearman의 순위상관계수
cor.test(machine, expert) # 상관 검정

# 예 6.3 : 산점도에 주변분포 히스토그램 그리기

x <- machine
y <- expert

zones <- matrix(c(2,0,1,3), ncol = 2, byrow = TRUE)
zones
layout(zones, width=c(2/3, 1/3), height=c(1/3, 2/3))
xhist <- hist(x, plot = FALSE)
yhist <- hist(y, plot = FALSE)
xhist
yhist

top <- max(c(xhist$counts, yhist$counts))
xrange = c(min(x), max(x))
yrange = c(min(y), max(y))
par(mar=c(6,6,1,1))
plot(x,y, xlim=xrange, ylim=yrange)
par(mar=c(0,6,1,1))
barplot(xhist$counts, axes=FALSE, ylim=c(0,top), space=0, col='green')
title("Scatterplot with marginal histogram")
par(mar=c(6,0,1,1))
barplot(yhist$counts, axes=FALSE, xlim=c(0,top), space=0, horiz=TRUE, col='yellow')

# 예 6.4 - 신체 데이터에 대한 Pearson 상관계수와 산점도

kid = read.table('table_6_4.csv', header = TRUE, sep = ',')

attach(kid)

cor.test(height, weight) # 상관 검정

jpeg('06_05.jpg')
plot(height, weight, pch=as.character(gender)) # 그래프 그리기, 점표시는 gender의 문자로
dev.off()

# 예6.5 : bubble plot 그리기, girth 값에 따라 원의 크기를 다르게 표시

data(trees)
attach(trees)
N = nrow(trees)
jpeg('06_06.jpg')
with(trees, {
symbols(Height, Volume, circles = Girth/16, inches = FALSE, bg = 1:N, main = 'bubble plot')
palette('default')
})
dev.off()

프로그램 결과

> # R을 이용한 통계 프로그래밍 기초
>
> # 6장 이변량 데이터
>
> # 6.1 범주형 데이터의 이원분할표
>
> x = matrix(c(54,3,7,12), nrow = 2) # 행렬 만들기
Warning messages:
1: Unable to open printer
2: opening device failed
>
> rbind(c(54,7), c(3,12)) # 행으로 붙이기
[,1] [,2]
[1,] 54 7
[2,] 3 12
>
> cbind(c(54,3), c(7,12)) # 열로 붙이기
[,1] [,2]
[1,] 54 7
[2,] 3 12
>
> rownames(x) = c("p.buckled", "p.unbuckled")
> colnames(x) = c("c.buckled", "c.unbuckled")
>
> x
c.buckled c.unbuckled
p.buckled 54 7
p.unbuckled 3 12
>
> margin.table(x, 1) # 1 = 행을 기준으로 주변 분포
p.buckled p.unbuckled
61 15
> margin.table(x, 2) # 2 = 열을 기준으로 주변 분포
c.buckled c.unbuckled
57 19
>
> rowSums(x) # 행의 합을 구하기
p.buckled p.unbuckled
61 15
> colSums(x) # 열의 합을 구하기
c.buckled c.unbuckled
57 19
>
> addmargins(x) # 원래 행렬에 주변분포를 포함하기
c.buckled c.unbuckled Sum
p.buckled 54 7 61
p.unbuckled 3 12 15
Sum 57 19 76
>
> prop.table(x) # 도수에 대한 비율 구하기
c.buckled c.unbuckled
p.buckled 0.71052632 0.09210526
p.unbuckled 0.03947368 0.15789474
>
> jpeg("06_01.jpg")
> op = par(mfrow = c(1,2))
> barplot(x, main = "child seat-belt usage") # 막대그래프
> barplot(x, main = "child seat-belt usage", legend = TRUE, beside = TRUE) # 막대그래프
> par(op)
> dev.off()
windows
2
>

> # 예6.1
> nico = read.table(file = "table_6_2.txt", header = TRUE) # 자료 읽기
>
> y = table(nico$nicotin, nico$stopsmoke) # 빈도수 구하기
> prop.table(y) # 비율 구하기

N Y
N 0.4 0.1
Y 0.2 0.3
>
> # 6.2 상관계수
>
> # 예 6.2
>
> blood <- read.table('table_6_3.csv', header = TRUE, sep = ',')
> attach(blood)

> rowMeans(blood[,2:3]) # 행의 평균 구하기
[1] 70.0 83.0 91.5 103.0 94.5 84.0 80.0 72.0 106.5 88.5 86.0 64.0
[13] 71.5 85.5 96.5
> colMeans(blood[,2:3]) # 열의 평균 구하기
machine expert
85.6 84.6
> cor(machine, expert) # machine과 expert의 Pearson 상관계수 구하기
[1] 0.90686
>
> jpeg("06_02.jpg")
> plot(machine, expert, main = 'blood pressure measurement')
> dev.off()
windows
2
>

> jpeg("06_03.jpg")
> op = par(mfrow=c(1,2)) # 1행 2열의 그래프
> boxplot(machine, expert, names = c('machine','expert')) # machine과 expert의 boxplot
> plot(density(machine), ylim = c(0,0.04), main = 'density plot')
> lines(density(expert), lty = 2)
> par(op) # 그래프 끝
> dev.off()
windows
2
>

> cor(machine, expert, method = 'spearman') # Spearman의 순위상관계수
[1] 0.8878956
> cor.test(machine, expert) # 상관 검정

Pearson's product-moment correlation

data: machine and expert
t = 7.7586, df = 13, p-value = 3.122e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7369457 0.9689817
sample estimates:
cor
0.90686

>
> # 예 6.3 : 산점도에 주변분포 히스토그램 그리기
>
> x <- machine
> y <- expert
>
> zones <- matrix(c(2,0,1,3), ncol = 2, byrow = TRUE)
> zones
[,1] [,2]
[1,] 2 0
[2,] 1 3
> layout(zones, width=c(2/3, 1/3), height=c(1/3, 2/3))
> xhist <- hist(x, plot = FALSE)
> yhist <- hist(y, plot = FALSE)
> xhist
$breaks
[1] 60 70 80 90 100 110

$counts
[1] 2 4 3 4 2

$intensities
[1] 0.01333333 0.02666667 0.02000000 0.02666667 0.01333333

$density
[1] 0.01333333 0.02666667 0.02000000 0.02666667 0.01333333

$mids
[1] 65 75 85 95 105

$xname
[1] "x"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"
> yhist
$breaks
[1] 60 70 80 90 100 110

$counts
[1] 3 1 7 3 1

$intensities
[1] 0.019999996 0.006666667 0.046666667 0.020000000 0.006666667

$density
[1] 0.019999996 0.006666667 0.046666667 0.020000000 0.006666667

$mids
[1] 65 75 85 95 105

$xname
[1] "y"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"
>
> top <- max(c(xhist$counts, yhist$counts))
> xrange = c(min(x), max(x))
> yrange = c(min(y), max(y))
> par(mar=c(6,6,1,1))
> plot(x,y, xlim=xrange, ylim=yrange)
> par(mar=c(0,6,1,1))
> barplot(xhist$counts, axes=FALSE, ylim=c(0,top), space=0, col='green')
> title("Scatterplot with marginal histogram")
> par(mar=c(6,0,1,1))
> barplot(yhist$counts, axes=FALSE, xlim=c(0,top), space=0, horiz=TRUE, col='yellow')
>

> # 예 6.4 - 신체 데이터에 대한 Pearson 상관계수와 산점도
>
> kid = read.table('table_6_4.csv', header = TRUE, sep = ',')
>
> attach(kid)

>
> cor.test(height, weight) # 상관 검정

Pearson's product-moment correlation

data: height and weight
t = 5.6933, df = 13, p-value = 7.375e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5863134 0.9471791
sample estimates:
cor
0.8448334

>
> jpeg('06_05.jpg')
> plot(height, weight, pch=as.character(gender)) # 그래프 그리기, 점표시는 gender의 문자로
> dev.off()
windows
2
>

>
>
> # 예6.5 : bubble plot 그리기, girth 값에 따라 원의 크기를 다르게 표시
>
> data(trees)
> attach(trees)

> N = nrow(trees)
> jpeg('06_06.jpg')
> with(trees, {
+ symbols(Height, Volume, circles = Girth/16, inches = FALSE, bg = 1:N, main = 'bubble plot')
+ palette('default')
+ })
> dev.off()
windows
2
>

>
>

'Programming > R' 카테고리의 다른 글

R을 이용한 통계 프로그래밍 기초 제8장 카이제곱 검정 (4)	2009.03.26
R을 이용한 통계 프로그래밍 기초 제7장 유의성 검정 (3)	2009.03.25
R을 이용한 통계프로그래밍 기초 제5장 일변량 데이터와 기술통계량 (3)	2009.03.18
R을 이용한 통계 프로그래밍 기초 제4장 난수 발생과 모의실험 (3)	2009.03.16
R을 이용한 통계 프로그래밍 기초 제3장 R 그래프 (4)	2009.03.10

Let's Study Animal Breeding

R을 이용한 통계 프로그래밍 기초 제6장 이변량 데이터

'Programming > R' 카테고리의 다른 글

+ Recent posts

티스토리툴바