blupf90을 이용하여 반복 모형의 해를 구하는 방법을 아래에서 설명한 바 있다.
2014/02/25 - [Animal Breeding/BLUPF90] - blupf90으로 repeatability 모델의 육종가 구하기
여기서는 blupf90이 구한 결과를 R로 정리하는 것을 설명한다.
blupf90은 육종가(breeding value)와 육종가의 SE(SEP : Standard Error of Prediction)를 제공하는데 SE를 이용하여 정확도(accuracy)를 구해야 한다. 또한 renumber된 개체 ID이기 때문에 원래 ID를 찾아 주어야 한다.
blupf90이 구한 solutions는 다음과 같다.
trait/effect level solution s.e.
1 1 1 175.47201349 4.87281841
1 1 2 241.89292349 4.89514134
1 2 1 0.00000000 0.00000000
1 2 2 44.06544643 5.84677204
1 2 3 0.00000000 0.00000000
1 2 4 0.01317142 5.76142116
1 3 1 9.32843242 4.14473900
1 3 2 13.58074302 4.13086417
1 3 3 -18.38678174 4.15844835
1 3 4 24.19361843 4.29701737
1 3 5 -18.20697245 4.06001499
1 3 6 10.14757048 4.23061109
1 3 7 -3.08415295 4.36084294
1 3 8 -7.06341753 4.27063934
1 4 1 -1.38964658 3.02156498
1 4 2 8.41697930 3.03895386
1 4 3 -17.22849691 3.06853681
1 4 4 17.34674038 3.11992378
1 4 5 -7.14557620 3.09509319
1 4 6 0.00000000 3.46410162
1 4 7 0.00000000 3.46410162
1 4 8 0.00000000 3.46410162
renumber된 혈통과 원래 ID는 renadd03.ped에 나와 있다.
1 2 5 1 0 2 1 0 0 7
7 0 0 3 0 0 0 0 2 2
2 6 0 2 0 1 1 1 0 4
3 6 7 1 0 2 1 0 1 6
6 0 0 3 0 0 0 2 0 1
4 8 3 1 0 2 1 0 0 8
8 0 0 3 0 0 0 2 0 3
5 8 7 1 0 2 1 0 1 5
1, 2, 3열이 리넘버된 혈통이고 10열이 원래 ID이다.
반복 모형(repeatabilityl model)에 대한 blupf90의 결과로부터 정확도를 계산하고 원래 ID를 붙이는 R 프로그램이다.
# Repeatability model의 blupf90의 분석 결과를 R로 정리하기 - Date :2020-06-12
# blupf90의 육종가와 SE(standard error of prediction)읽어 정확도를 계산하고, 원래 ID를 붙임
# set working directory
setwd("D:/temp/repeatability_blupf90")
# print working directory
getwd()
# sigma_a
sigma_a = 20
sigma_a
# read solutions
solutions = read.table("solutions", skip = 1, sep = "", col.names = c("trait", "effect", "level", "solution", "se"), stringsAsFactors = FALSE)
solutions
# read renumbered pedigree
pedi = read.table("renadd03.ped", sep = "", stringsAsFactors = FALSE)
pedi
# read renumbered data
data = read.table("renf90.dat", sep = "", stringsAsFactors = FALSE)
data
# read parity fixed solutions from solutions
sol_fixed_parity = solutions[solutions$effect == 1, 4]
sol_fixed_parity
# read hys(herd-year-season) fixed solutions from solutions
sol_fixed_hys = solutions[solutions$effect == 2, 4]
sol_fixed_hys
# read animal solutions from solutions
sol_animal = solutions[solutions$effect == 3, 3:5]
rownames(sol_animal) = sol_animal[,1]
sol_animal
# read pe(permanent environmental) solutions from solutions
sol_pe = solutions[solutions$effect == 4, 3:4]
rownames(sol_pe) = sol_pe[,1]
sol_pe
# reliability
rel = 1 - (sol_animal$se^2 / sigma_a)
rel
# accuracy
acc = sqrt(rel)
acc
# 육종가 분할하기, DYD 구하기 생략
# result
repeat_result = data.frame(animal = sol_animal$level, animal_bv = sol_animal$solution, rel = rel, acc = acc, sep = sol_animal$se, pe = sol_pe$solution)
repeat_result
# join original animal id
aid_renum = pedi[,c(1,10)]
colnames(aid_renum) = c("animal", "orig_aid")
aid_renum
# 조인하기 위한 패키지 실행
#install.packages("plyr")
library(plyr)
repeat_result_final = join(x = repeat_result, y = aid_renum, by = "animal", type = "left")
repeat_result_final = repeat_result_final[order(repeat_result_final$orig_aid),]
repeat_result_final
# 파일로 출력, 분리자는 ",", 따옴표 없고
output_filename = gsub("[ -]", "", paste("repeatability_result_", Sys.Date(), ".csv"))
write.table(repeat_result_final, output_filename, sep=",", row.names = FALSE, quote = FALSE)
다음은 실행 결과이다.
> # Repeatability model의 blupf90의 분석 결과를 R로 정리하기 - Date :2020-06-12
>
> # blupf90의 육종가와 SE(standard error of prediction)읽어
>
> #정확도를 계산하고, 원래 ID를 붙임
>
> # set working directory
>
> setwd("D:/temp/repeatability_blupf90")
>
> # print working directory
>
> getwd()
[1] "D:/temp/repeatability_blupf90"
>
> # sigma_a
>
> sigma_a = 20
>
> sigma_a
[1] 20
>
> # read solutions
>
> solutions = read.table("solutions", skip = 1, sep = "", col.names = c("trait", "effect", "level", "solution", "se"), stringsAsFactors = FALSE)
>
> solutions
trait effect level solution se
1 1 1 1 175.47201349 4.872818
2 1 1 2 241.89292349 4.895141
3 1 2 1 0.00000000 0.000000
4 1 2 2 44.06544643 5.846772
5 1 2 3 0.00000000 0.000000
6 1 2 4 0.01317142 5.761421
7 1 3 1 9.32843242 4.144739
8 1 3 2 13.58074302 4.130864
9 1 3 3 -18.38678174 4.158448
10 1 3 4 24.19361843 4.297017
11 1 3 5 -18.20697245 4.060015
12 1 3 6 10.14757048 4.230611
13 1 3 7 -3.08415295 4.360843
14 1 3 8 -7.06341753 4.270639
15 1 4 1 -1.38964658 3.021565
16 1 4 2 8.41697930 3.038954
17 1 4 3 -17.22849691 3.068537
18 1 4 4 17.34674038 3.119924
19 1 4 5 -7.14557620 3.095093
20 1 4 6 0.00000000 3.464102
21 1 4 7 0.00000000 3.464102
22 1 4 8 0.00000000 3.464102
>
> # read renumbered pedigree
>
> pedi = read.table("renadd03.ped", sep = "", stringsAsFactors = FALSE)
>
> pedi
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1 1 8 2 1 0 2 2 0 1 7
2 7 0 0 3 0 0 0 0 2 2
3 2 6 7 1 0 2 2 0 1 4
4 3 6 5 1 0 2 2 0 0 6
5 6 0 0 3 0 0 0 3 0 1
6 4 6 1 1 0 2 2 0 0 8
7 8 0 0 3 0 0 0 2 0 3
8 5 8 7 1 0 2 2 0 1 5
>
> # read renumbered data
>
> data = read.table("renf90.dat", sep = "", stringsAsFactors = FALSE)
>
> data
V1 V2 V3 V4
1 201 1 1 2
2 280 2 3 2
3 150 1 1 5
4 200 2 4 5
5 160 1 2 3
6 190 2 3 3
7 180 1 1 1
8 250 2 3 1
9 285 1 2 4
10 300 2 4 4
>
> # read parity fixed solutions from solutions
>
> sol_fixed_parity = solutions[solutions$effect == 1, 4]
>
> sol_fixed_parity
[1] 175.4720 241.8929
>
> # read hys(herd-year-season) fixed solutions from solutions
>
> sol_fixed_hys = solutions[solutions$effect == 2, 4]
>
> sol_fixed_hys
[1] 0.00000000 44.06544643 0.00000000 0.01317142
>
>
> # read animal solutions from solutions
>
> sol_animal = solutions[solutions$effect == 3, 3:5]
>
> rownames(sol_animal) = sol_animal[,1]
>
> sol_animal
level solution se
1 1 9.328432 4.144739
2 2 13.580743 4.130864
3 3 -18.386782 4.158448
4 4 24.193618 4.297017
5 5 -18.206972 4.060015
6 6 10.147570 4.230611
7 7 -3.084153 4.360843
8 8 -7.063418 4.270639
>
> # read pe(permanent environmental) solutions from solutions
>
> sol_pe = solutions[solutions$effect == 4, 3:4]
>
> rownames(sol_pe) = sol_pe[,1]
>
> sol_pe
level solution
1 1 -1.389647
2 2 8.416979
3 3 -17.228497
4 4 17.346740
5 5 -7.145576
6 6 0.000000
7 7 0.000000
8 8 0.000000
>
> # reliability
>
> rel = 1 - (sol_animal$se^2 / sigma_a)
>
> rel
[1] 0.14105693 0.14679806 0.13536537 0.07678209 0.17581391 0.10509649
[7] 0.04915244 0.08808198
>
> # accuracy
>
> acc = sqrt(rel)
>
> acc
[1] 0.3755755 0.3831424 0.3679203 0.2770958 0.4193017 0.3241859 0.2217035
[8] 0.2967861
>
> # 육종가 분할하기, DYD 구하기 생략
>
> # result
>
> repeat_result = data.frame(animal = sol_animal$level, animal_bv = sol_animal$solution, rel = rel, acc = acc, sep = sol_animal$se, pe = sol_pe$solution)
>
> repeat_result
animal animal_bv rel acc sep pe
1 1 9.328432 0.14105693 0.3755755 4.144739 -1.389647
2 2 13.580743 0.14679806 0.3831424 4.130864 8.416979
3 3 -18.386782 0.13536537 0.3679203 4.158448 -17.228497
4 4 24.193618 0.07678209 0.2770958 4.297017 17.346740
5 5 -18.206972 0.17581391 0.4193017 4.060015 -7.145576
6 6 10.147570 0.10509649 0.3241859 4.230611 0.000000
7 7 -3.084153 0.04915244 0.2217035 4.360843 0.000000
8 8 -7.063418 0.08808198 0.2967861 4.270639 0.000000
>
> # join original animal id
>
> aid_renum = pedi[,c(1,10)]
>
> colnames(aid_renum) = c("animal", "orig_aid")
>
> aid_renum
animal orig_aid
1 1 7
2 7 2
3 2 4
4 3 6
5 6 1
6 4 8
7 8 3
8 5 5
>
> # 조인하기 위한 패키지 실행
>
> #install.packages("plyr")
>
> library(plyr)
>
> repeat_result_final = join(x = repeat_result, y = aid_renum, by = "animal", type = "left")
>
> repeat_result_final = repeat_result_final[order(repeat_result_final$orig_aid),]
>
> repeat_result_final
animal animal_bv rel acc sep pe orig_aid
6 6 10.147570 0.10509649 0.3241859 4.230611 0.000000 1
7 7 -3.084153 0.04915244 0.2217035 4.360843 0.000000 2
8 8 -7.063418 0.08808198 0.2967861 4.270639 0.000000 3
2 2 13.580743 0.14679806 0.3831424 4.130864 8.416979 4
5 5 -18.206972 0.17581391 0.4193017 4.060015 -7.145576 5
3 3 -18.386782 0.13536537 0.3679203 4.158448 -17.228497 6
1 1 9.328432 0.14105693 0.3755755 4.144739 -1.389647 7
4 4 24.193618 0.07678209 0.2770958 4.297017 17.346740 8
>
> # 파일로 출력, 분리자는 ",", 따옴표 없고
>
> output_filename = gsub("[ -]", "", paste("repeatability_result_", Sys.Date(), ".csv"))
>
> write.table(repeat_result_final, output_filename, sep=",", row.names = FALSE, quote = FALSE)
>
다음은 저장한 파일이다.
animal,animal_bv,rel,acc,sep,pe,orig_aid
6,10.14757048,0.105096490258451,0.324185888432008,4.23061109,0,1
7,-3.08415295,0.0491524426326077,0.221703501624597,4.36084294,0,2
8,-7.06341753,0.0880819813822183,0.296786086908093,4.27063934,0,3
2,13.58074302,0.146798060450511,0.383142350113519,4.13086417,8.4169793,4
5,-18.20697245,0.175813914048765,0.419301698122921,4.06001499,-7.1455762,5
3,-18.38678174,0.135365366019114,0.367920325640096,4.15844835,-17.22849691,6
1,9.32843242,0.14105693109395,0.375575466576226,4.144739,-1.38964658,7
4,24.19361843,0.0767820860959143,0.277095806709366,4.29701737,17.34674038,8
CSV 파일이므로 엑셀에서 열면 다음과 같다.
'Animal Breeding > R for Genetic Evaluation' 카테고리의 다른 글
R을 이용하여 공통 환경 효과 모형(common environmental effect model)에 대한 blupf90 분석 결과 정리하기 (0) | 2020.07.01 |
---|---|
R을 이용한 반복 모형(Repeatability Model)의 육종가 구하기 프로그래밍 (0) | 2020.06.14 |
R을 이용하여 단형질 개체 모형(single trait animal model)에 대한 blupf90 분석 결과 정리하기 (0) | 2020.06.03 |
R을 이용한 단형질 개체 모형(Single Trait Animal Model)의 육종가 구하기 프로그래밍 (0) | 2020.05.18 |
R을 이용한 혈통 파일 처리 (0) | 2020.04.10 |