blupf90을 이용하여 반복 모형의 해를 구하는 방법을 아래에서 설명한 바 있다.

 

2014/02/25 - [Animal Breeding/BLUPF90] - blupf90으로 repeatability 모델의 육종가 구하기

 

여기서는 blupf90이 구한 결과를 R로 정리하는 것을 설명한다.

blupf90은 육종가(breeding value)와 육종가의 SE(SEP : Standard Error of Prediction)를 제공하는데 SE를 이용하여 정확도(accuracy)를 구해야 한다. 또한 renumber된 개체 ID이기 때문에 원래 ID를 찾아 주어야 한다.

blupf90이 구한 solutions는 다음과 같다.

trait/effect level  solution          s.e.
   1   1         1        175.47201349          4.87281841
   1   1         2        241.89292349          4.89514134
   1   2         1          0.00000000          0.00000000
   1   2         2         44.06544643          5.84677204
   1   2         3          0.00000000          0.00000000
   1   2         4          0.01317142          5.76142116
   1   3         1          9.32843242          4.14473900
   1   3         2         13.58074302          4.13086417
   1   3         3        -18.38678174          4.15844835
   1   3         4         24.19361843          4.29701737
   1   3         5        -18.20697245          4.06001499
   1   3         6         10.14757048          4.23061109
   1   3         7         -3.08415295          4.36084294
   1   3         8         -7.06341753          4.27063934
   1   4         1         -1.38964658          3.02156498
   1   4         2          8.41697930          3.03895386
   1   4         3        -17.22849691          3.06853681
   1   4         4         17.34674038          3.11992378
   1   4         5         -7.14557620          3.09509319
   1   4         6          0.00000000          3.46410162
   1   4         7          0.00000000          3.46410162
   1   4         8          0.00000000          3.46410162

 

renumber된 혈통과 원래 ID는 renadd03.ped에 나와 있다.

 

1 2 5 1 0 2 1 0 0 7
7 0 0 3 0 0 0 0 2 2
2 6 0 2 0 1 1 1 0 4
3 6 7 1 0 2 1 0 1 6
6 0 0 3 0 0 0 2 0 1
4 8 3 1 0 2 1 0 0 8
8 0 0 3 0 0 0 2 0 3
5 8 7 1 0 2 1 0 1 5

 

1, 2, 3열이 리넘버된 혈통이고 10열이 원래 ID이다.

 

반복 모형(repeatabilityl model)에 대한 blupf90의 결과로부터 정확도를 계산하고 원래 ID를 붙이는 R 프로그램이다.

 

# Repeatability model의 blupf90의 분석 결과를 R로 정리하기 - Date :2020-06-12 

# blupf90의 육종가와 SE(standard error of prediction)읽어 정확도를 계산하고, 원래 ID를 붙임 

# set working directory 

setwd("D:/temp/repeatability_blupf90") 

# print working directory 

getwd() 

# sigma_a 

sigma_a = 20 

sigma_a 

# read solutions 

solutions = read.table("solutions", skip = 1, sep = "", col.names = c("trait", "effect", "level", "solution", "se"), stringsAsFactors = FALSE) 

solutions 

# read renumbered pedigree 

pedi = read.table("renadd03.ped", sep = "", stringsAsFactors = FALSE) 

pedi 

# read renumbered data 

data = read.table("renf90.dat", sep = "", stringsAsFactors = FALSE) 

data 

# read parity fixed solutions from solutions 

sol_fixed_parity = solutions[solutions$effect == 1, 4] 

sol_fixed_parity 

# read hys(herd-year-season) fixed solutions from solutions 

sol_fixed_hys = solutions[solutions$effect == 2, 4] 

sol_fixed_hys 


# read animal solutions from solutions 

sol_animal = solutions[solutions$effect == 3, 3:5] 

rownames(sol_animal) = sol_animal[,1] 

sol_animal 

# read pe(permanent environmental) solutions from solutions 

sol_pe = solutions[solutions$effect == 4, 3:4] 

rownames(sol_pe) = sol_pe[,1] 

sol_pe 

# reliability 

rel = 1 - (sol_animal$se^2 / sigma_a) 

rel 

# accuracy 

acc = sqrt(rel) 

acc 

# 육종가 분할하기, DYD 구하기 생략 

# result 

repeat_result = data.frame(animal = sol_animal$level, animal_bv = sol_animal$solution, rel = rel, acc = acc, sep = sol_animal$se, pe = sol_pe$solution) 

repeat_result 

# join original animal id 

aid_renum = pedi[,c(1,10)] 

colnames(aid_renum) = c("animal", "orig_aid") 

aid_renum 

# 조인하기 위한 패키지 실행 

#install.packages("plyr") 

library(plyr) 

repeat_result_final = join(x = repeat_result, y = aid_renum, by = "animal", type = "left") 

repeat_result_final = repeat_result_final[order(repeat_result_final$orig_aid),] 

repeat_result_final

# 파일로 출력, 분리자는 ",", 따옴표 없고 

output_filename = gsub("[ -]", "", paste("repeatability_result_", Sys.Date(), ".csv")) 

write.table(repeat_result_final, output_filename, sep=",", row.names = FALSE, quote = FALSE)

 

다음은 실행 결과이다.

 

> # Repeatability model의 blupf90의 분석 결과를 R로 정리하기 - Date :2020-06-12 
> 
> # blupf90의 육종가와 SE(standard error of prediction)읽어 
> 
> #정확도를 계산하고, 원래 ID를 붙임 
> 
> # set working directory 
> 
> setwd("D:/temp/repeatability_blupf90") 
> 
> # print working directory 
> 
> getwd() 
[1] "D:/temp/repeatability_blupf90"
> 
> # sigma_a 
> 
> sigma_a = 20 
> 
> sigma_a 
[1] 20
> 
> # read solutions 
> 
> solutions = read.table("solutions", skip = 1, sep = "", col.names = c("trait", "effect", "level", "solution", "se"), stringsAsFactors = FALSE) 
> 
> solutions 
   trait effect level     solution       se
1      1      1     1 175.47201349 4.872818
2      1      1     2 241.89292349 4.895141
3      1      2     1   0.00000000 0.000000
4      1      2     2  44.06544643 5.846772
5      1      2     3   0.00000000 0.000000
6      1      2     4   0.01317142 5.761421
7      1      3     1   9.32843242 4.144739
8      1      3     2  13.58074302 4.130864
9      1      3     3 -18.38678174 4.158448
10     1      3     4  24.19361843 4.297017
11     1      3     5 -18.20697245 4.060015
12     1      3     6  10.14757048 4.230611
13     1      3     7  -3.08415295 4.360843
14     1      3     8  -7.06341753 4.270639
15     1      4     1  -1.38964658 3.021565
16     1      4     2   8.41697930 3.038954
17     1      4     3 -17.22849691 3.068537
18     1      4     4  17.34674038 3.119924
19     1      4     5  -7.14557620 3.095093
20     1      4     6   0.00000000 3.464102
21     1      4     7   0.00000000 3.464102
22     1      4     8   0.00000000 3.464102
> 
> # read renumbered pedigree 
> 
> pedi = read.table("renadd03.ped", sep = "", stringsAsFactors = FALSE) 
> 
> pedi 
  V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1  1  8  2  1  0  2  2  0  1   7
2  7  0  0  3  0  0  0  0  2   2
3  2  6  7  1  0  2  2  0  1   4
4  3  6  5  1  0  2  2  0  0   6
5  6  0  0  3  0  0  0  3  0   1
6  4  6  1  1  0  2  2  0  0   8
7  8  0  0  3  0  0  0  2  0   3
8  5  8  7  1  0  2  2  0  1   5
> 
> # read renumbered data 
> 
> data = read.table("renf90.dat", sep = "", stringsAsFactors = FALSE) 
> 
> data 
    V1 V2 V3 V4
1  201  1  1  2
2  280  2  3  2
3  150  1  1  5
4  200  2  4  5
5  160  1  2  3
6  190  2  3  3
7  180  1  1  1
8  250  2  3  1
9  285  1  2  4
10 300  2  4  4
> 
> # read parity fixed solutions from solutions 
> 
> sol_fixed_parity = solutions[solutions$effect == 1, 4] 
> 
> sol_fixed_parity 
[1] 175.4720 241.8929
> 
> # read hys(herd-year-season) fixed solutions from solutions 
> 
> sol_fixed_hys = solutions[solutions$effect == 2, 4] 
> 
> sol_fixed_hys 
[1]  0.00000000 44.06544643  0.00000000  0.01317142
> 
> 
> # read animal solutions from solutions 
> 
> sol_animal = solutions[solutions$effect == 3, 3:5] 
> 
> rownames(sol_animal) = sol_animal[,1] 
> 
> sol_animal 
  level   solution       se
1     1   9.328432 4.144739
2     2  13.580743 4.130864
3     3 -18.386782 4.158448
4     4  24.193618 4.297017
5     5 -18.206972 4.060015
6     6  10.147570 4.230611
7     7  -3.084153 4.360843
8     8  -7.063418 4.270639
> 
> # read pe(permanent environmental) solutions from solutions 
> 
> sol_pe = solutions[solutions$effect == 4, 3:4] 
> 
> rownames(sol_pe) = sol_pe[,1] 
> 
> sol_pe 
  level   solution
1     1  -1.389647
2     2   8.416979
3     3 -17.228497
4     4  17.346740
5     5  -7.145576
6     6   0.000000
7     7   0.000000
8     8   0.000000
> 
> # reliability 
> 
> rel = 1 - (sol_animal$se^2 / sigma_a) 
> 
> rel 
[1] 0.14105693 0.14679806 0.13536537 0.07678209 0.17581391 0.10509649
[7] 0.04915244 0.08808198
> 
> # accuracy 
> 
> acc = sqrt(rel) 
> 
> acc 
[1] 0.3755755 0.3831424 0.3679203 0.2770958 0.4193017 0.3241859 0.2217035
[8] 0.2967861
> 
> # 육종가 분할하기, DYD 구하기 생략 
> 
> # result 
> 
> repeat_result = data.frame(animal = sol_animal$level, animal_bv = sol_animal$solution, rel = rel, acc = acc, sep = sol_animal$se, pe = sol_pe$solution) 
> 
> repeat_result 
  animal  animal_bv        rel       acc      sep         pe
1      1   9.328432 0.14105693 0.3755755 4.144739  -1.389647
2      2  13.580743 0.14679806 0.3831424 4.130864   8.416979
3      3 -18.386782 0.13536537 0.3679203 4.158448 -17.228497
4      4  24.193618 0.07678209 0.2770958 4.297017  17.346740
5      5 -18.206972 0.17581391 0.4193017 4.060015  -7.145576
6      6  10.147570 0.10509649 0.3241859 4.230611   0.000000
7      7  -3.084153 0.04915244 0.2217035 4.360843   0.000000
8      8  -7.063418 0.08808198 0.2967861 4.270639   0.000000
> 
> # join original animal id 
> 
> aid_renum = pedi[,c(1,10)] 
> 
> colnames(aid_renum) = c("animal", "orig_aid") 
> 
> aid_renum 
  animal orig_aid
1      1        7
2      7        2
3      2        4
4      3        6
5      6        1
6      4        8
7      8        3
8      5        5
> 
> # 조인하기 위한 패키지 실행 
> 
> #install.packages("plyr") 
> 
> library(plyr) 
> 
> repeat_result_final = join(x = repeat_result, y = aid_renum, by = "animal", type = "left") 
> 
> repeat_result_final = repeat_result_final[order(repeat_result_final$orig_aid),] 
> 
> repeat_result_final
  animal  animal_bv        rel       acc      sep         pe orig_aid
6      6  10.147570 0.10509649 0.3241859 4.230611   0.000000        1
7      7  -3.084153 0.04915244 0.2217035 4.360843   0.000000        2
8      8  -7.063418 0.08808198 0.2967861 4.270639   0.000000        3
2      2  13.580743 0.14679806 0.3831424 4.130864   8.416979        4
5      5 -18.206972 0.17581391 0.4193017 4.060015  -7.145576        5
3      3 -18.386782 0.13536537 0.3679203 4.158448 -17.228497        6
1      1   9.328432 0.14105693 0.3755755 4.144739  -1.389647        7
4      4  24.193618 0.07678209 0.2770958 4.297017  17.346740        8
> 
> # 파일로 출력, 분리자는 ",", 따옴표 없고 
> 
> output_filename = gsub("[ -]", "", paste("repeatability_result_", Sys.Date(), ".csv")) 
> 
> write.table(repeat_result_final, output_filename, sep=",", row.names = FALSE, quote = FALSE)
> 

 

다음은 저장한 파일이다.

 

animal,animal_bv,rel,acc,sep,pe,orig_aid
6,10.14757048,0.105096490258451,0.324185888432008,4.23061109,0,1
7,-3.08415295,0.0491524426326077,0.221703501624597,4.36084294,0,2
8,-7.06341753,0.0880819813822183,0.296786086908093,4.27063934,0,3
2,13.58074302,0.146798060450511,0.383142350113519,4.13086417,8.4169793,4
5,-18.20697245,0.175813914048765,0.419301698122921,4.06001499,-7.1455762,5
3,-18.38678174,0.135365366019114,0.367920325640096,4.15844835,-17.22849691,6
1,9.32843242,0.14105693109395,0.375575466576226,4.144739,-1.38964658,7
4,24.19361843,0.0767820860959143,0.277095806709366,4.29701737,17.34674038,8

 

CSV 파일이므로 엑셀에서 열면 다음과 같다.

 

 

+ Recent posts