Animal Breeding/R for Genetic Evaluation
R을 이용하여 공통 환경 효과 모형(common environmental effect model)에 대한 blupf90 분석 결과 정리하기
투정이
2020. 7. 1. 13:24
blupf90을 이용하여 공통 환경 효과 모형의 육종가를 구하는 것을 아래에서 설명한 바 있다.
2014/02/25 - [Animal Breeding/BLUPF90] - blupf90으로 common environmental effect 모형의 육종가 구하기
여기서는 blupf90이 구한 결과를 R로 정리하는 것을 설명한다.
blupf90은 육종가(breeding value)와 육종가의 SE(SEP : Standard Error of Prediction)를 제공하는데 SE를 이용하여 정확도(accuracy)를 구해야 한다. 또한 renumber된 개체 ID이기 때문에 원래 ID를 찾아 주어야 한다.
blupf90이 구한 solutions는 다음과 같다.
trait/effect level solution s.e.
1 1 1 91.49314010 5.36561786
1 1 2 75.76444444 4.72581563
1 2 1 -1.66706602 4.14999369
1 2 2 0.54503060 4.14518326
1 2 3 3.92525604 4.12387886
1 2 4 -1.14141063 4.12387886
1 2 5 -1.09755878 4.17520092
1 2 6 0.44787118 4.18886646
1 2 7 -2.33373269 4.14999369
1 2 8 -3.81879549 4.18886646
1 2 9 2.89476329 4.11247660
1 2 10 1.52525604 4.12387886
1 2 11 -1.44077295 4.31713808
1 2 12 1.44077295 4.31713808
1 2 13 1.44077295 4.31713808
1 2 14 -0.26589372 4.31713808
1 2 15 -1.17487923 4.30705522
1 3 1 -1.76231884 3.42624144
1 3 2 2.16115942 3.45467547
1 3 3 -0.39884058 3.45467547
renumber된 혈통과 원래 ID는 renadd02.ped에 나와 있다.
1 11 15 1 0 2 1 0 0 7
2 11 14 1 0 2 1 0 0 14
15 0 0 3 0 0 0 0 3 2
3 13 12 1 0 2 1 0 0 9
12 0 0 3 0 0 0 0 4 4
4 13 12 1 0 2 1 0 0 11
5 11 15 1 0 2 1 0 0 6
6 11 14 1 0 2 1 0 0 13
11 0 0 3 0 0 0 6 0 1
7 11 15 1 0 2 1 0 0 8
8 11 14 1 0 2 1 0 0 15
13 0 0 3 0 0 0 4 0 3
9 13 12 1 0 2 1 0 0 10
14 0 0 3 0 0 0 0 3 5
10 13 12 1 0 2 1 0 0 12
1, 2, 3열이 리넘버된 혈통이고 10열이 원래 ID이다.
공통 환경 효과 모형(common environmental effect model)에 대한 blupf90의 결과로부터 정확도를 계산하고 원래 ID를 붙이는 R 프로그램이다.
# common envrionmental effect model의 blupf90의 분석 결과를 R로 정리하기 - Date :2020-07-01
# blupf90의 육종가와 SE(standard error of prediction)읽어
#정확도를 계산하고, 원래 ID를 붙임
# set working directory
setwd("D:/temp/03_common_environment_blupf90")
# print working directory
getwd()
# sigma_a
sigma_a = 20
sigma_a
# read solutions
solutions = read.table("solutions", skip = 1, sep = "", col.names = c("trait", "effect", "level", "solution", "se"), stringsAsFactors = FALSE)
solutions
# read renumbered pedigree
pedi = read.table("renadd02.ped", sep = "", stringsAsFactors = FALSE)
pedi
# read renumbered data
data = read.table("renf90.dat", sep = "", stringsAsFactors = FALSE)
data
# read parity fixed solutions from solutions
sol_fixed_sex = solutions[solutions$effect == 1, 4]
sol_fixed_sex
# read animal solutions from solutions
sol_animal = solutions[solutions$effect == 2, 3:5]
rownames(sol_animal) = sol_animal[,1]
sol_animal
# read ce(common environmental) solutions from solutions
sol_pe = solutions[solutions$effect == 3, 3:4]
rownames(sol_pe) = sol_pe[,1]
sol_pe
# reliability
rel = 1 - (sol_animal$se^2 / sigma_a)
rel
# accuracy
acc = sqrt(rel)
acc
# 육종가 분할하기, DYD 구하기 생략
# result
repeat_result = data.frame(animal = sol_animal$level, animal_bv = sol_animal$solution, rel = rel, acc = acc, sep = sol_animal$se)
repeat_result
# join original animal id
aid_renum = pedi[,c(1,10)]
colnames(aid_renum) = c("animal", "orig_aid")
aid_renum
# 조인하기 위한 패키지 실행
#install.packages("plyr")
library(plyr)
common_result_final = join(x = repeat_result, y = aid_renum, by = "animal", type = "left")
common_result_final = common_result_final[order(common_result_final$orig_aid),]
common_result_final
# 파일로 출력, 분리자는 ",", 따옴표 없고
output_filename = gsub("[ -]", "", paste("common_result_", Sys.Date(), ".csv"))
write.table(common_result_final, output_filename, sep=",", row.names = FALSE, quote = FALSE)
다음은 실행 결과이다.
> # common envrionmental effect model의 blupf90의 분석 결과를 R로 정리하기 - Date :2020-07-01
>
> # blupf90의 육종가와 SE(standard error of prediction)읽어
>
> #정확도를 계산하고, 원래 ID를 붙임
>
> # set working directory
>
> setwd("D:/temp/03_common_environment_blupf90")
>
> # print working directory
>
> getwd()
[1] "D:/temp/03_common_environment_blupf90"
>
> # sigma_a
>
> sigma_a = 20
>
> sigma_a
[1] 20
>
> # read solutions
>
> solutions = read.table("solutions", skip = 1, sep = "", col.names = c("trait", "effect", "level", "solution", "se"), stringsAsFactors = FALSE)
>
> solutions
trait effect level solution se
1 1 1 1 91.4931401 5.365618
2 1 1 2 75.7644444 4.725816
3 1 2 1 -1.6670660 4.149994
4 1 2 2 0.5450306 4.145183
5 1 2 3 3.9252560 4.123879
6 1 2 4 -1.1414106 4.123879
7 1 2 5 -1.0975588 4.175201
8 1 2 6 0.4478712 4.188866
9 1 2 7 -2.3337327 4.149994
10 1 2 8 -3.8187955 4.188866
11 1 2 9 2.8947633 4.112477
12 1 2 10 1.5252560 4.123879
13 1 2 11 -1.4407729 4.317138
14 1 2 12 1.4407729 4.317138
15 1 2 13 1.4407729 4.317138
16 1 2 14 -0.2658937 4.317138
17 1 2 15 -1.1748792 4.307055
18 1 3 1 -1.7623188 3.426241
19 1 3 2 2.1611594 3.454675
20 1 3 3 -0.3988406 3.454675
>
> # read renumbered pedigree
>
> pedi = read.table("renadd02.ped", sep = "", stringsAsFactors = FALSE)
>
> pedi
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1 1 11 15 1 0 2 1 0 0 7
2 2 11 14 1 0 2 1 0 0 14
3 15 0 0 3 0 0 0 0 3 2
4 3 13 12 1 0 2 1 0 0 9
5 12 0 0 3 0 0 0 0 4 4
6 4 13 12 1 0 2 1 0 0 11
7 5 11 15 1 0 2 1 0 0 6
8 6 11 14 1 0 2 1 0 0 13
9 11 0 0 3 0 0 0 6 0 1
10 7 11 15 1 0 2 1 0 0 8
11 8 11 14 1 0 2 1 0 0 15
12 13 0 0 3 0 0 0 4 0 3
13 9 13 12 1 0 2 1 0 0 10
14 14 0 0 3 0 0 0 0 3 5
15 10 13 12 1 0 2 1 0 0 12
>
> # read renumbered data
>
> data = read.table("renf90.dat", sep = "", stringsAsFactors = FALSE)
>
> data
V1 V2 V3 V4
1 90 1 5 1
2 70 2 1 1
3 65 2 7 1
4 98 2 3 2
5 106 1 9 2
6 60 2 4 2
7 80 2 10 2
8 100 1 6 3
9 85 2 2 3
10 68 1 8 3
>
> # read parity fixed solutions from solutions
>
> sol_fixed_sex = solutions[solutions$effect == 1, 4]
>
> sol_fixed_sex
[1] 91.49314 75.76444
>
> # read animal solutions from solutions
>
> sol_animal = solutions[solutions$effect == 2, 3:5]
>
> rownames(sol_animal) = sol_animal[,1]
>
> sol_animal
level solution se
1 1 -1.6670660 4.149994
2 2 0.5450306 4.145183
3 3 3.9252560 4.123879
4 4 -1.1414106 4.123879
5 5 -1.0975588 4.175201
6 6 0.4478712 4.188866
7 7 -2.3337327 4.149994
8 8 -3.8187955 4.188866
9 9 2.8947633 4.112477
10 10 1.5252560 4.123879
11 11 -1.4407729 4.317138
12 12 1.4407729 4.317138
13 13 1.4407729 4.317138
14 14 -0.2658937 4.317138
15 15 -1.1748792 4.307055
>
> # read ce(common environmental) solutions from solutions
>
> sol_pe = solutions[solutions$effect == 3, 3:4]
>
> rownames(sol_pe) = sol_pe[,1]
>
> sol_pe
level solution
1 1 -1.7623188
2 2 2.1611594
3 3 -0.3988406
>
> # reliability
>
> rel = 1 - (sol_animal$se^2 / sigma_a)
>
> rel
[1] 0.13887762 0.14087279 0.14968116 0.14968116 0.12838486 0.12266989 0.13887762 0.12266989
[9] 0.15437681 0.14968116 0.06811594 0.06811594 0.06811594 0.06811594 0.07246377
>
> # accuracy
>
> acc = sqrt(rel)
>
> acc
[1] 0.3726629 0.3753302 0.3868865 0.3868865 0.3583083 0.3502426 0.3726629 0.3502426 0.3929081
[10] 0.3868865 0.2609903 0.2609903 0.2609903 0.2609903 0.2691909
>
> # 육종가 분할하기, DYD 구하기 생략
>
> # result
>
> repeat_result = data.frame(animal = sol_animal$level, animal_bv = sol_animal$solution, rel = rel, acc = acc, sep = sol_animal$se)
>
> repeat_result
animal animal_bv rel acc sep
1 1 -1.6670660 0.13887762 0.3726629 4.149994
2 2 0.5450306 0.14087279 0.3753302 4.145183
3 3 3.9252560 0.14968116 0.3868865 4.123879
4 4 -1.1414106 0.14968116 0.3868865 4.123879
5 5 -1.0975588 0.12838486 0.3583083 4.175201
6 6 0.4478712 0.12266989 0.3502426 4.188866
7 7 -2.3337327 0.13887762 0.3726629 4.149994
8 8 -3.8187955 0.12266989 0.3502426 4.188866
9 9 2.8947633 0.15437681 0.3929081 4.112477
10 10 1.5252560 0.14968116 0.3868865 4.123879
11 11 -1.4407729 0.06811594 0.2609903 4.317138
12 12 1.4407729 0.06811594 0.2609903 4.317138
13 13 1.4407729 0.06811594 0.2609903 4.317138
14 14 -0.2658937 0.06811594 0.2609903 4.317138
15 15 -1.1748792 0.07246377 0.2691909 4.307055
>
> # join original animal id
>
> aid_renum = pedi[,c(1,10)]
>
> colnames(aid_renum) = c("animal", "orig_aid")
>
> aid_renum
animal orig_aid
1 1 7
2 2 14
3 15 2
4 3 9
5 12 4
6 4 11
7 5 6
8 6 13
9 11 1
10 7 8
11 8 15
12 13 3
13 9 10
14 14 5
15 10 12
>
> # 조인하기 위한 패키지 실행
>
> #install.packages("plyr")
>
> library(plyr)
>
> common_result_final = join(x = repeat_result, y = aid_renum, by = "animal", type = "left")
>
> common_result_final = common_result_final[order(common_result_final$orig_aid),]
>
> common_result_final
animal animal_bv rel acc sep orig_aid
11 11 -1.4407729 0.06811594 0.2609903 4.317138 1
15 15 -1.1748792 0.07246377 0.2691909 4.307055 2
13 13 1.4407729 0.06811594 0.2609903 4.317138 3
12 12 1.4407729 0.06811594 0.2609903 4.317138 4
14 14 -0.2658937 0.06811594 0.2609903 4.317138 5
5 5 -1.0975588 0.12838486 0.3583083 4.175201 6
1 1 -1.6670660 0.13887762 0.3726629 4.149994 7
7 7 -2.3337327 0.13887762 0.3726629 4.149994 8
3 3 3.9252560 0.14968116 0.3868865 4.123879 9
9 9 2.8947633 0.15437681 0.3929081 4.112477 10
4 4 -1.1414106 0.14968116 0.3868865 4.123879 11
10 10 1.5252560 0.14968116 0.3868865 4.123879 12
6 6 0.4478712 0.12266989 0.3502426 4.188866 13
2 2 0.5450306 0.14087279 0.3753302 4.145183 14
8 8 -3.8187955 0.12266989 0.3502426 4.188866 15
>
> # 파일로 출력, 분리자는 ",", 따옴표 없고
>
> output_filename = gsub("[ -]", "", paste("common_result_", Sys.Date(), ".csv"))
>
> write.table(common_result_final, output_filename, sep=",", row.names = FALSE, quote = FALSE)
>
다음은 저장한 파일이다.
animal,animal_bv,rel,acc,sep,orig_aid
11,-1.44077295,0.0681159399106954,0.260990306162308,4.31713808,1
15,-1.17487923,0.0724637665935377,0.269190948201342,4.30705522,2
13,1.44077295,0.0681159399106954,0.260990306162308,4.31713808,3
12,1.44077295,0.0681159399106954,0.260990306162308,4.31713808,4
14,-0.26589372,0.0681159399106954,0.260990306162308,4.31713808,5
5,-1.09755878,0.128384863881558,0.358308336327189,4.17520092,6
1,-1.66706602,0.138877618648009,0.372662875328372,4.14999369,7
7,-2.33373269,0.138877618648009,0.372662875328372,4.14999369,8
3,3.92525604,0.149681157402255,0.386886491625457,4.12387886,9
9,2.89476329,0.154376810722622,0.392908145401215,4.1124766,10
4,-1.14141063,0.149681157402255,0.386886491625457,4.12387886,11
10,1.52525604,0.149681157402255,0.386886491625457,4.12387886,12
6,0.44787118,0.122669889014353,0.350242614503652,4.18886646,13
2,0.5450306,0.140872787050789,0.375330237325464,4.14518326,14
8,-3.81879549,0.122669889014353,0.350242614503652,4.18886646,15
csv 파일이므로 엑셀에서 열면 다음과 같다.