프로그램 소스


# R을 이용한 통계 프로그래밍 기초

# 9장 회귀분석

# 9.1 단순 선형회귀모형

x = c(1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 4.6, 1.6, 5.5, 3.4) # 독립변수
y = c(0.7, -1.0, -0.2, -1.2, -0.1, 3.4, 0.0, 0.8, 3.7, 2.0) # 종속변수

out = lm(y ~ x) # 단순선형회귀
summary(out) # 기본 분석결과 확인
jpeg('09_01.jpg')
plot(x, y) # plotting
abline(out) # 회귀선 추가
dev.off()
attributes(out)
pred_y = predict(out, newdata = data.frame(x = x)) # y의 예측값 구하기
pred_y
predict(out, newdata = data.frame(x = 2.3)) # x = 2.3에서의 y의 예측값 구하기
predict(out, newdata = data.frame(x = c(1, 2.2, 6.7))) # x = 1, 2.2, 6.7에서의 y의 예측값 구하기

# 예 9.1 책의 부피에 대한 무게 회귀분석

volume = c(412, 953, 929, 1492, 419, 1010, 595, 1034)
weight = c(250, 700, 650, 975, 350, 950, 425, 725)
book.lm = lm(weight ~ volume) # 회귀분석 실시 후 book.lm에 결과 저장
summary(book.lm) # 기본 분석결과 확인
jpeg('09_02.jpg')
op = par(mfrow = c(2,2)) # 그래프 분할
plot(volume, weight)
lines(volume, book.lm$fitted.values)
plot(book.lm, which = 1)
plot(book.lm, which = 2, pch = 6)
par(op)
dev.off()
attributes(book.lm) # 분석결과 범주 확인
book.lm$coef # 회귀계수
book.lm$residuals # 잔차
book.lm$fitted.values # 추정값

# 예 9.2 : 2차 항이 포함된 선형회귀
book.lm2 = lm(weight ~ volume + I(volume^2))
summary(book.lm2)

# 예 9.3 : no intercept model
book.lm3 = lm(weight ~ 0 + volume)
summary(book.lm3)

jpeg('09_03.jpg')
op = par(mfrow = c(2,2))
plot(book.lm3)
par(op)
dev.off()

# 9.2 다중 선형 회귀모형

# 예 9.4 온도와 압력에 따른 제품 강도 회귀

temp = c(195, 179, 205, 204, 167, 184, 187)
pressure = c(57, 61, 60, 62, 61, 59, 62)
intensity = c(81.4, 122.2, 170.7, 175.6, 150.3, 96.8, 169.8)
all = cbind(temp, pressure, intensity)
library(lattice)
splom(~all, pch = 8) # scatter plot matrix
prod.lm = lm(intensity ~ temp + pressure) # 다중 회귀 선형모형
summary(prod.lm) # 분석결과 요약
jpeg('09_04.jpg')
op = par(mfrow = c(2,2))
plot(prod.lm)
par(op)
dev.off()
predict(prod.lm, newdata = data.frame(temp = 200, pressure = 63)) # 온도 200, 압력 63일 때의 y 기대값

# 예 9.5 : 상호작용이 포함된 다중회귀식

prod.lm2 = lm(intensity ~ temp + pressure + temp:pressure)
summary(prod.lm2)

# 예 9.6 : 각 변수의 로그 변환값에 대한 회귀분석

llm = lm(log(intensity) ~ log(temp) + log(pressure)) # 각 변수에 log함수를 이용하여 회귀분석
summary(llm) # 분석결과 요약


# 9.3 비모수적 회귀모형

data(cars) # cars 데이터 이용
attach(cars)
jpeg('09_05.jpg')
op = par(mfrow = c(2,2))
cars.lm = lm(dist ~ speed) # 단순선형회귀모형
plot(speed, dist, main = "simple linear regression")
lines(speed, cars.lm$fitted.values)
plot(speed, dist, main = "Kernel smoothing")
lines(ksmooth(speed, dist, "normal", bandwidth = 2), col = "red")
lines(ksmooth(speed, dist, "normal", bandwidth = 5), col = "green")

cars.lo = loess(dist ~ speed, cars) # LOESS(locally weighted regression)을 이용한 회귀분석
pp = predict(cars.lo, data.frame(speed), se = TRUE)
plot(speed, dist, main = "Local Polynomial regression")
lines(speed, pp$fit)
plot(speed, dist, main = "Cubic spline interpolation")
lines(spline(speed, dist))
par(op)
dev.off()
detach(cars)

프로그램 결과


> # R을 이용한 통계 프로그래밍 기초
>
> # 9장 회귀분석
>
> # 9.1 단순 선형회귀모형
>
> x = c(1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 4.6, 1.6, 5.5, 3.4) # 독립변수
> y = c(0.7, -1.0, -0.2, -1.2, -0.1, 3.4, 0.0, 0.8, 3.7, 2.0) # 종속변수
>
> out = lm(y ~ x) # 단순선형회귀
> summary(out) # 기본 분석결과 확인

Call:
lm(formula = y ~ x)

Residuals:
Min 1Q Median 3Q Max
-2.3650 -0.4036 0.3208 0.6613 1.1720

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.7861 0.5418 -1.451 0.18485
x 0.6850 0.1802 3.801 0.00523 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.083 on 8 degrees of freedom
Multiple R-squared: 0.6436, Adjusted R-squared: 0.599
F-statistic: 14.45 on 1 and 8 DF, p-value: 0.005231

> jpeg('09_01.jpg')
> plot(x, y) # plotting
> abline(out) # 회귀선 추가
> dev.off()
windows
2




> attributes(out)
$names
[1] "coefficients" "residuals" "effects" "rank"
[5] "fitted.values" "assign" "qr" "df.residual"
[9] "xlevels" "call" "terms" "model"

$class
[1] "lm"

> pred_y = predict(out, newdata = data.frame(x = x)) # y의 예측값 구하기
> pred_y
1 2 3 4 5 6
0.51543194 -0.23811424 -0.03260165 -0.71764364 -0.85465203 2.22803692
7 8 9 10
2.36504531 0.30991935 2.98158310 1.54299493
> predict(out, newdata = data.frame(x = 2.3)) # x = 2.3에서의 y의 예측값 구하기
1
0.7894487
> predict(out, newdata = data.frame(x = c(1, 2.2, 6.7))) # x = 1, 2.2, 6.7에서의 y의 예측값 구하기
1 2 3
-0.1011058 0.7209445 3.8036335
>
> # 예 9.1 책의 부피에 대한 무게 회귀분석
>
> volume = c(412, 953, 929, 1492, 419, 1010, 595, 1034)
> weight = c(250, 700, 650, 975, 350, 950, 425, 725)
> book.lm = lm(weight ~ volume) # 회귀분석 실시 후 book.lm에 결과 저장
> summary(book.lm) # 기본 분석결과 확인

Call:
lm(formula = weight ~ volume)

Residuals:
Min 1Q Median 3Q Max
-89.674 -39.888 -25.005 9.066 215.910

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 41.3725 97.5588 0.424 0.686293
volume 0.6859 0.1059 6.475 0.000644 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 102.2 on 6 degrees of freedom
Multiple R-squared: 0.8748, Adjusted R-squared: 0.8539
F-statistic: 41.92 on 1 and 6 DF, p-value: 0.0006445

> jpeg('09_02.jpg')
> op = par(mfrow = c(2,2)) # 그래프 분할
> plot(volume, weight)
> lines(volume, book.lm$fitted.values)
> plot(book.lm, which = 1)
> plot(book.lm, which = 2, pch = 6)
> par(op)
> dev.off()
windows
2




> attributes(book.lm) # 분석결과 범주 확인
$names
[1] "coefficients" "residuals" "effects" "rank"
[5] "fitted.values" "assign" "qr" "df.residual"
[9] "xlevels" "call" "terms" "model"

$class
[1] "lm"

> book.lm$coef # 회귀계수
(Intercept) volume
41.3724757 0.6858592
> book.lm$residuals # 잔차
1 2 3 4 5 6 7
-73.94646 5.00373 -28.53565 -89.67436 21.25253 215.90976 -24.45868
8
-25.55086
> book.lm$fitted.values # 추정값
1 2 3 4 5 6 7
323.9465 694.9963 678.5356 1064.6744 328.7475 734.0902 449.4587
8
750.5509
>
> # 예 9.2 : 2차 항이 포함된 선형회귀
> book.lm2 = lm(weight ~ volume + I(volume^2))
> summary(book.lm2)

Call:
lm(formula = weight ~ volume + I(volume^2))

Residuals:
1 2 3 4 5 6 7 8
-33.60 -34.19 -68.42 -10.21 59.30 179.92 -33.29 -59.52

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.930e+02 2.085e+02 -0.926 0.3970
volume 1.297e+00 4.966e-01 2.611 0.0476 *
I(volume^2) -3.400e-04 2.705e-04 -1.257 0.2643
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 97.59 on 5 degrees of freedom
Multiple R-squared: 0.9049, Adjusted R-squared: 0.8668
F-statistic: 23.78 on 2 and 5 DF, p-value: 0.002792

>
> # 예 9.3 : no intercept model
> book.lm3 = lm(weight ~ 0 + volume)
> summary(book.lm3)

Call:
lm(formula = weight ~ 0 + volume)

Residuals:
Min 1Q Median 3Q Max
-110.56 -32.93 -16.92 16.24 215.14

Coefficients:
Estimate Std. Error t value Pr(>|t|)
volume 0.72759 0.03686 19.74 2.14e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 96.03 on 7 degrees of freedom
Multiple R-squared: 0.9823, Adjusted R-squared: 0.9798
F-statistic: 389.6 on 1 and 7 DF, p-value: 2.141e-07

>
> jpeg('09_03.jpg')
> op = par(mfrow = c(2,2))
> plot(book.lm3)
> par(op)
> dev.off()
windows
2




>
> # 9.2 다중 선형 회귀모형
>
> # 예 9.4 온도와 압력에 따른 제품 강도 회귀
>
> temp = c(195, 179, 205, 204, 167, 184, 187)
> pressure = c(57, 61, 60, 62, 61, 59, 62)
> intensity = c(81.4, 122.2, 170.7, 175.6, 150.3, 96.8, 169.8)
> all = cbind(temp, pressure, intensity)
> library(lattice)
> splom(~all, pch = 8) # scatter plot matrix
> prod.lm = lm(intensity ~ temp + pressure) # 다중 회귀 선형모형
> summary(prod.lm) # 분석결과 요약

Call:
lm(formula = intensity ~ temp + pressure)

Residuals:
1 2 3 4 5 6 7
-1.339 -19.650 21.748 -9.870 20.385 -12.511 1.238

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1180.2817 304.1691 -3.880 0.0178 *
temp 0.9945 0.5869 1.695 0.1654
pressure 18.7559 4.4745 4.192 0.0138 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 19.57 on 4 degrees of freedom
Multiple R-squared: 0.8258, Adjusted R-squared: 0.7388
F-statistic: 9.484 on 2 and 4 DF, p-value: 0.03033

> jpeg('09_04.jpg')
> op = par(mfrow = c(2,2))
> plot(prod.lm)
> par(op)
> dev.off()
windows
2




> predict(prod.lm, newdata = data.frame(temp = 200, pressure = 63)) # 온도 200, 압력 63일 때의 y 기대값
1
200.2472
>
> # 예 9.5 : 상호작용이 포함된 다중회귀식
>
> prod.lm2 = lm(intensity ~ temp + pressure + temp:pressure)
> summary(prod.lm2)

Call:
lm(formula = intensity ~ temp + pressure + temp:pressure)

Residuals:
1 2 3 4 5 6 7
-6.2543 -21.5397 12.5716 0.3022 17.1601 3.5026 -5.7425

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.176e+04 8.394e+03 -1.401 0.256
temp 5.564e+01 4.332e+01 1.284 0.289
pressure 1.926e+02 1.379e+02 1.397 0.257
temp:pressure -8.977e-01 7.116e-01 -1.261 0.296

Residual standard error: 18.27 on 3 degrees of freedom
Multiple R-squared: 0.8862, Adjusted R-squared: 0.7724
F-statistic: 7.788 on 3 and 3 DF, p-value: 0.06289

>
> # 예 9.6 : 각 변수의 로그 변환값에 대한 회귀분석
>
> llm = lm(log(intensity) ~ log(temp) + log(pressure)) # 각 변수에 log함수를 이용하여 회귀분석
> summary(llm) # 분석결과 요약

Call:
lm(formula = log(intensity) ~ log(temp) + log(pressure))

Residuals:
1 2 3 4 5 6 7
-0.020177 -0.136979 0.188985 -0.078791 0.152091 -0.095691 -0.009438

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -39.0390 10.1841 -3.833 0.0186 *
log(temp) 1.1831 0.8571 1.380 0.2396
log(pressure) 9.2060 2.0852 4.415 0.0116 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1529 on 4 degrees of freedom
Multiple R-squared: 0.8341, Adjusted R-squared: 0.7511
F-statistic: 10.05 on 2 and 4 DF, p-value: 0.02754

>
>
> # 9.3 비모수적 회귀모형
>
> data(cars) # cars 데이터 이용
> attach(cars)
> jpeg('09_05.jpg')
> op = par(mfrow = c(2,2))
> cars.lm = lm(dist ~ speed) # 단순선형회귀모형
> plot(speed, dist, main = "simple linear regression")
> lines(speed, cars.lm$fitted.values)
> plot(speed, dist, main = "Kernel smoothing")
> lines(ksmooth(speed, dist, "normal", bandwidth = 2), col = "red")
> lines(ksmooth(speed, dist, "normal", bandwidth = 5), col = "green")
>
> cars.lo = loess(dist ~ speed, cars) # LOESS(locally weighted regression)을 이용한 회귀분석
> pp = predict(cars.lo, data.frame(speed), se = TRUE)
> plot(speed, dist, main = "Local Polynomial regression")
> lines(speed, pp$fit)
> plot(speed, dist, main = "Cubic spline interpolation")
> lines(spline(speed, dist))
Warning message:
In spline(speed, dist) : collapsing to unique 'x' values
> par(op)
> dev.off()
windows
2




> detach(cars)
>
>

+ Recent posts