# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.

# Raphael Mrode

# Example 11.2 p183

 

SNP effect를 임의 효과로 다루어 SNP effect를 추정한다. 각 SNP effect의 분산은 1) 상가적 유전 분산을 SNP의 수로 나누어 사용할 수도 있고 2) 상가적 유전 분산을 각 SNP의 2p(1-p)를 합한 값(s2pq)으로 나누어 사용하기도 한다. 두번째 경우가 대립 유전자 빈도의 차이를 고려한다는 점에서 많이 쓰이고 있다. 두번째를 사용할 경우 alpha = sigma_e * s2pq / sigma_a 로 계산하여 사용한다. 여기서는 혈통이 필요없다.

추정한 SNP effect를 이용하여 SNP genotype을 가지고 있는 개체의 DGV를 계산한다.

Data

animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
13 0 0 1 558 9 2 0 1 1 0 0 0 2 1 2
14 0 0 1 722 13.4 1 0 0 0 0 2 0 2 1 0
15 13 4 1 300 12.7 1 1 2 1 1 0 0 2 1 2
16 15 2 1 73 15.4 0 0 2 1 0 1 0 2 2 1
17 15 5 1 52 5.9 0 1 1 2 0 0 0 2 1 2
18 14 6 1 87 7.7 1 1 0 1 0 2 0 2 2 1
19 14 9 1 64 10.2 0 0 1 1 0 2 0 2 2 0
20 14 9 1 103 4.8 0 1 1 0 0 1 0 2 2 0
21 1 3 1 13 7.6 2 0 0 0 0 1 2 2 1 2
22 14 8 1 125 8.8 0 0 0 1 1 2 0 2 0 0
23 14 11 1 93 9.8 0 1 1 0 0 1 0 2 2 1
24 14 10 1 66 9.2 1 0 0 0 1 1 0 2 0 0
25 14 7 1 75 11.5 0 0 0 1 1 2 0 2 1 0
26 14 12 1 33 13.3 1 0 1 1 0 2 0 1 0 0

 

Program

# Mixed Linear Model(SNP-BLUP Model) for Computing SNP Effect, unweighted analysis

# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.

# Raphael Mrode

# Example 11.2 p183

# MASS packages
if (!("MASS" %in% installed.packages())) {
  install.packages("MASS", dependencies = TRUE)
}
library(MASS)

# set working directory 
setwd(".") 

# print working directory 
getwd()

# read data
data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
data

# variance component and ratio
sigma_a = 35.241
sigma_e = 245
sigma_a
sigma_e

# SNP 
M_all = as.matrix(data[, 7:16])
M_all

#  mean of each SNP(allele frequency)
snp_mean = colMeans(M_all) / 2
snp_mean

#  sum of 2pq(heterozygote ratio)
s2pq = 0
for (i in 1:10) {
  s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
}
s2pq

# alpha
alpha = sigma_e * s2pq / sigma_a
alpha

# observation
y = data[1:8, 6]
y

# design matrix of fixed effect(general mean)
X = matrix(rep(1, 8), 8, 1)
X

# design matrix for SNP effect
M = as.matrix(data[1:8, 7:16])
Z = sweep(M, 2, snp_mean * 2)
Z

# Left Hand Side
LHS = rbind(
  cbind(t(X) %*% X, t(X) %*% Z), 
  cbind(t(Z) %*% X, t(Z) %*% Z + diag(10) * alpha))
LHS

# Right Hand Side
RHS = rbind(t(X) %*% y, 
            t(Z) %*% y)
RHS

# Solutions

# generalized inverse of LHS
gi_LHS = ginv(LHS)
gi_LHS

# solution
sol = gi_LHS %*% RHS
round(sol, 3)

# mean effect of solutions
mean_eff = sol[1]
round(mean_eff, 3)

# snp effect of solutions
snp_eff = sol[2:11]
round(snp_eff, 3)

# calculate DGV
# genotype of animals
# design matrix of animals
P2 = sweep(M_all, 2, snp_mean * 2)
round(P2, 3)

# DGV
dgv = P2 %*% snp_eff
round(dgv, 3)

 

RStudio에서 실행하는 화면

 

실행 결과

> # Mixed Linear Model(SNP-BLUP Model) for Computing SNP Effect, unweighted analysis
> 
> # Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
> 
> # Raphael Mrode
> 
> # Example 11.2 p183
> 
> # MASS packages
> if (!("MASS" %in% installed.packages())) {
+   install.packages("MASS", dependencies = TRUE)
+ }
> library(MASS)
> 
> # set working directory 
> setwd(".") 
> 
> # print working directory 
> getwd()
[1] "D:/users/bhpark/2020/job/공부하기/07_Linear Models for the Prediction of Animal Breeding Values_3rd_Mrode/15_mixed linear model SNPBLUP for computing SNP effects_unweighted"
> 
> # read data
> data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
> data
   animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
1      13    0   0    1 558     9.0    2    0    1    1    0    0    0    2    1     2
2      14    0   0    1 722    13.4    1    0    0    0    0    2    0    2    1     0
3      15   13   4    1 300    12.7    1    1    2    1    1    0    0    2    1     2
4      16   15   2    1  73    15.4    0    0    2    1    0    1    0    2    2     1
5      17   15   5    1  52     5.9    0    1    1    2    0    0    0    2    1     2
6      18   14   6    1  87     7.7    1    1    0    1    0    2    0    2    2     1
7      19   14   9    1  64    10.2    0    0    1    1    0    2    0    2    2     0
8      20   14   9    1 103     4.8    0    1    1    0    0    1    0    2    2     0
9      21    1   3    1  13     7.6    2    0    0    0    0    1    2    2    1     2
10     22   14   8    1 125     8.8    0    0    0    1    1    2    0    2    0     0
11     23   14  11    1  93     9.8    0    1    1    0    0    1    0    2    2     1
12     24   14  10    1  66     9.2    1    0    0    0    1    1    0    2    0     0
13     25   14   7    1  75    11.5    0    0    0    1    1    2    0    2    1     0
14     26   14  12    1  33    13.3    1    0    1    1    0    2    0    1    0     0
> 
> # variance component and ratio
> sigma_a = 35.241
> sigma_e = 245
> sigma_a
[1] 35.241
> sigma_e
[1] 245
> 
> # SNP 
> M_all = as.matrix(data[, 7:16])
> M_all
      snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
 [1,]    2    0    1    1    0    0    0    2    1     2
 [2,]    1    0    0    0    0    2    0    2    1     0
 [3,]    1    1    2    1    1    0    0    2    1     2
 [4,]    0    0    2    1    0    1    0    2    2     1
 [5,]    0    1    1    2    0    0    0    2    1     2
 [6,]    1    1    0    1    0    2    0    2    2     1
 [7,]    0    0    1    1    0    2    0    2    2     0
 [8,]    0    1    1    0    0    1    0    2    2     0
 [9,]    2    0    0    0    0    1    2    2    1     2
[10,]    0    0    0    1    1    2    0    2    0     0
[11,]    0    1    1    0    0    1    0    2    2     1
[12,]    1    0    0    0    1    1    0    2    0     0
[13,]    0    0    0    1    1    2    0    2    1     0
[14,]    1    0    1    1    0    2    0    1    0     0
> 
> #  mean of each SNP(allele frequency)
> snp_mean = colMeans(M_all) / 2
> snp_mean
      snp1       snp2       snp3       snp4       snp5       snp6       snp7       snp8       snp9      snp10 
0.32142857 0.17857143 0.35714286 0.35714286 0.14285714 0.60714286 0.07142857 0.96428571 0.57142857 0.39285714 
> 
> #  sum of 2pq(heterozygote ratio)
> s2pq = 0
> for (i in 1:10) {
+   s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
+ }
> s2pq
    snp1 
3.538265 
> 
> # alpha
> alpha = sigma_e * s2pq / sigma_a
> alpha
    snp1 
24.59848 
> 
> # observation
> y = data[1:8, 6]
> y
[1]  9.0 13.4 12.7 15.4  5.9  7.7 10.2  4.8
> 
> # design matrix of fixed effect(general mean)
> X = matrix(rep(1, 8), 8, 1)
> X
     [,1]
[1,]    1
[2,]    1
[3,]    1
[4,]    1
[5,]    1
[6,]    1
[7,]    1
[8,]    1
> 
> # design matrix for SNP effect
> M = as.matrix(data[1:8, 7:16])
> Z = sweep(M, 2, snp_mean * 2)
> Z
        snp1       snp2       snp3       snp4       snp5       snp6       snp7       snp8       snp9      snp10
1  1.3571429 -0.3571429  0.2857143  0.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571  1.2142857
2  0.3571429 -0.3571429 -0.7142857 -0.7142857 -0.2857143  0.7857143 -0.1428571 0.07142857 -0.1428571 -0.7857143
3  0.3571429  0.6428571  1.2857143  0.2857143  0.7142857 -1.2142857 -0.1428571 0.07142857 -0.1428571  1.2142857
4 -0.6428571 -0.3571429  1.2857143  0.2857143 -0.2857143 -0.2142857 -0.1428571 0.07142857  0.8571429  0.2142857
5 -0.6428571  0.6428571  0.2857143  1.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571  1.2142857
6  0.3571429  0.6428571 -0.7142857  0.2857143 -0.2857143  0.7857143 -0.1428571 0.07142857  0.8571429  0.2142857
7 -0.6428571 -0.3571429  0.2857143  0.2857143 -0.2857143  0.7857143 -0.1428571 0.07142857  0.8571429 -0.7857143
8 -0.6428571  0.6428571  0.2857143 -0.7142857 -0.2857143 -0.2142857 -0.1428571 0.07142857  0.8571429 -0.7857143
> 
> # Left Hand Side
> LHS = rbind(
+   cbind(t(X) %*% X, t(X) %*% Z), 
+   cbind(t(Z) %*% X, t(Z) %*% Z + diag(10) * alpha))
> LHS
                        snp1        snp2       snp3        snp4        snp5       snp6        snp7        snp8        snp9      snp10
       8.0000000 -0.14285714  1.14285714  2.2857143  1.28571429 -1.28571429 -1.7142857 -1.14285714  0.57142857  2.85714286  1.7142857
snp1  -0.1428571 28.47603006 -0.52040816 -1.0408163 -0.39795918  0.39795918 -0.9693878  0.02040816 -0.01020408 -1.55102041  1.9693878
snp2   1.1428571 -0.52040816 26.76174435  0.3265306  0.68367347  0.31632653 -1.2448980 -0.16326531  0.08163265  0.40816327  1.2448980
snp3   2.2857143 -1.04081633  0.32653061 29.2515403  1.36734694  0.63265306 -3.4897959 -0.32653061  0.16326531  0.81632653  2.4897959
snp4   1.2857143 -0.39795918  0.68367347  1.3673469 27.68011170 -0.08163265 -2.2755102 -0.18367347  0.09183673 -0.04081633  3.2755102
snp5  -1.2857143  0.39795918  0.31632653  0.6326531 -0.08163265 25.68011170 -0.7244898  0.18367347 -0.09183673 -0.95918367  0.7244898
snp6  -1.7142857 -0.96938776 -1.24489796 -3.4897959 -2.27551020 -0.72448980 30.9658260  0.24489796 -0.12244898  1.38775510 -5.3673469
snp7  -1.1428571  0.02040816 -0.16326531 -0.3265306 -0.18367347  0.18367347  0.2448980 24.76174435 -0.08163265 -0.40816327 -0.2448980
snp8   0.5714286 -0.01020408  0.08163265  0.1632653  0.09183673 -0.09183673 -0.1224490 -0.08163265 24.63929537  0.20408163  0.1224490
snp9   2.8571429 -1.55102041  0.40816327  0.8163265 -0.04081633 -0.95918367  1.3877551 -0.40816327  0.20408163 27.61888721 -1.3877551
snp10  1.7142857  1.96938776  1.24489796  2.4897959  3.27551020  0.72448980 -5.3673469 -0.24489796  0.12244898 -1.38775510 30.9658260
> 
> # Right Hand Side
> RHS = rbind(t(X) %*% y, 
+             t(Z) %*% y)
> RHS
        [,1]
       79.10
snp1    0.95
snp2    2.85
snp3   29.60
snp4   10.30
snp5   -9.90
snp6  -13.25
snp7  -11.30
snp8    5.65
snp9   26.80
snp10  16.15
> 
> # Solutions
> 
> # generalized inverse of LHS
> gi_LHS = ginv(LHS)
> gi_LHS
               [,1]          [,2]          [,3]          [,4]          [,5]          [,6]          [,7]          [,8]          [,9]         [,10]
 [1,]  1.388294e-01 -5.663085e-05 -4.970969e-03 -9.042826e-03 -4.643044e-03  6.969389e-03  5.829371e-03  5.807560e-03 -2.903780e-03 -1.427606e-02
 [2,] -5.663085e-05  3.548365e-02  7.785237e-04  1.479041e-03  7.596326e-04 -4.419903e-04  8.832420e-04  2.243621e-17 -1.284610e-17  1.772102e-03
 [3,] -4.970969e-03  7.785237e-04  3.771277e-02  2.541537e-04 -4.906823e-04 -6.784168e-04  1.063631e-03  2.168404e-18 -4.824700e-18 -1.381252e-04
 [4,] -9.042826e-03  1.479041e-03  2.541537e-04  3.555059e-02 -8.504422e-04 -1.229133e-03  3.172508e-03  1.517883e-18 -4.770490e-18 -3.301998e-04
 [5,] -4.643044e-03  7.596326e-04 -4.906823e-04 -8.504422e-04  3.694391e-02  5.655121e-05  1.781453e-03 -1.338990e-17  6.478108e-18  3.572256e-04
 [6,]  6.969389e-03 -4.419903e-04 -6.784168e-04 -1.229133e-03  5.655121e-05  3.941053e-02  9.383621e-04  2.276825e-18 -4.391019e-18  5.733858e-04
 [7,]  5.829371e-03  8.832420e-04  1.063631e-03  3.172508e-03  1.781453e-03  9.383621e-04  3.414696e-02  7.155734e-18 -3.713392e-18 -2.095373e-03
 [8,]  5.807560e-03  8.548257e-18  2.059984e-18 -1.702197e-17 -6.559423e-18 -1.095044e-17 -1.084202e-18  4.065292e-02  2.574980e-18  7.155734e-18
 [9,] -2.903780e-03  2.554651e-18 -2.141299e-18  7.806256e-18  1.208885e-17  1.040834e-17 -6.803369e-18 -9.215718e-19  4.065292e-02  1.192622e-18
[10,] -1.427606e-02  1.772102e-03 -1.381252e-04 -3.301998e-04  3.572256e-04  5.733858e-04 -2.095373e-03 -2.168404e-18  4.878910e-18  3.802145e-02
[11,] -5.999035e-03 -2.241301e-03 -1.064947e-03 -1.808286e-03 -3.287558e-03 -9.713208e-04  4.937702e-03 -2.547875e-18  5.068645e-18  1.999292e-03
              [,11]
 [1,] -5.999035e-03
 [2,] -2.241301e-03
 [3,] -1.064947e-03
 [4,] -1.808286e-03
 [5,] -3.287558e-03
 [6,] -9.713208e-04
 [7,]  4.937702e-03
 [8,] -2.005774e-18
 [9,] -1.095044e-17
[10,]  1.999292e-03
[11,]  3.427246e-02
> 
> # solution
> sol = gi_LHS %*% RHS
> round(sol, 3)
        [,1]
 [1,]  9.944
 [2,]  0.087
 [3,] -0.311
 [4,]  0.262
 [5,] -0.080
 [6,]  0.110
 [7,]  0.139
 [8,]  0.000
 [9,]  0.000
[10,] -0.061
[11,] -0.016
> 
> # mean effect of solutions
> mean_eff = sol[1]
> round(mean_eff, 3)
[1] 9.944
> 
> # snp effect of solutions
> snp_eff = sol[2:11]
> round(snp_eff, 3)
 [1]  0.087 -0.311  0.262 -0.080  0.110  0.139  0.000  0.000 -0.061 -0.016
> 
> # calculate DGV
> # genotype of animals
> # design matrix of animals
> P2 = sweep(M_all, 2, snp_mean * 2)
> round(P2, 3)
        snp1   snp2   snp3   snp4   snp5   snp6   snp7   snp8   snp9  snp10
 [1,]  1.357 -0.357  0.286  0.286 -0.286 -1.214 -0.143  0.071 -0.143  1.214
 [2,]  0.357 -0.357 -0.714 -0.714 -0.286  0.786 -0.143  0.071 -0.143 -0.786
 [3,]  0.357  0.643  1.286  0.286  0.714 -1.214 -0.143  0.071 -0.143  1.214
 [4,] -0.643 -0.357  1.286  0.286 -0.286 -0.214 -0.143  0.071  0.857  0.214
 [5,] -0.643  0.643  0.286  1.286 -0.286 -1.214 -0.143  0.071 -0.143  1.214
 [6,]  0.357  0.643 -0.714  0.286 -0.286  0.786 -0.143  0.071  0.857  0.214
 [7,] -0.643 -0.357  0.286  0.286 -0.286  0.786 -0.143  0.071  0.857 -0.786
 [8,] -0.643  0.643  0.286 -0.714 -0.286 -0.214 -0.143  0.071  0.857 -0.786
 [9,]  1.357 -0.357 -0.714 -0.714 -0.286 -0.214  1.857  0.071 -0.143  1.214
[10,] -0.643 -0.357 -0.714  0.286  0.714  0.786 -0.143  0.071 -1.143 -0.786
[11,] -0.643  0.643  0.286 -0.714 -0.286 -0.214 -0.143  0.071  0.857  0.214
[12,]  0.357 -0.357 -0.714 -0.714  0.714 -0.214 -0.143  0.071 -1.143 -0.786
[13,] -0.643 -0.357 -0.714  0.286  0.714  0.786 -0.143  0.071 -0.143 -0.786
[14,]  0.357 -0.357  0.286  0.286 -0.286  0.786 -0.143 -0.929 -1.143 -0.786
> 
> # DGV
> dgv = P2 %*% snp_eff
> round(dgv, 3)
        [,1]
 [1,]  0.070
 [2,]  0.111
 [3,]  0.045
 [4,]  0.253
 [5,] -0.495
 [6,] -0.357
 [7,]  0.145
 [8,] -0.224
 [9,]  0.027
[10,]  0.114
[11,] -0.240
[12,]  0.143
[13,]  0.054
[14,]  0.354
> 

 

 

 

 

 

 

+ Recent posts