# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.

# Raphael Mrode

# Example 11.2 p183


간단한 설명은 다음 포스팅을 참고한다.

2020/12/19 - [Animal Breeding/R for Genetic Evaluation] - Mixed Linear Model(SNP-BLUP Model) for Computing SNP Effects, unweighted analysis

 

Mixed Linear Model for Computing SNP Effects(SNP-BLUP Model), unweighted analysis

# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition. # Raphael Mrode # Example 11.2 p183 SNP effect를 임의 효과로 다루어 SNP effect를 추정한다. 각 SNP effect의..

bhpark.tistory.com

Data

animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
13 0 0 1 558 9 2 0 1 1 0 0 0 2 1 2
14 0 0 1 722 13.4 1 0 0 0 0 2 0 2 1 0
15 13 4 1 300 12.7 1 1 2 1 1 0 0 2 1 2
16 15 2 1 73 15.4 0 0 2 1 0 1 0 2 2 1
17 15 5 1 52 5.9 0 1 1 2 0 0 0 2 1 2
18 14 6 1 87 7.7 1 1 0 1 0 2 0 2 2 1
19 14 9 1 64 10.2 0 0 1 1 0 2 0 2 2 0
20 14 9 1 103 4.8 0 1 1 0 0 1 0 2 2 0
21 1 3 1 13 7.6 2 0 0 0 0 1 2 2 1 2
22 14 8 1 125 8.8 0 0 0 1 1 2 0 2 0 0
23 14 11 1 93 9.8 0 1 1 0 0 1 0 2 2 1
24 14 10 1 66 9.2 1 0 0 0 1 1 0 2 0 0
25 14 7 1 75 11.5 0 0 0 1 1 2 0 2 1 0
26 14 12 1 33 13.3 1 0 1 1 0 2 0 1 0 0

 

프로그램

# Mixed Linear Model for Computing SNP Effect, weighted analysis

# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.

# Raphael Mrode

# Example 11.2 p183

# MASS packages
if (!("MASS" %in% installed.packages())) {
  install.packages("MASS", dependencies = TRUE)
}
library(MASS)

# set working directory 
setwd(".") 

# print working directory 
getwd()

# read data
data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
data

# variance component and ratio
sigma_a = 35.241
sigma_e = 245
sigma_a
sigma_e

# SNP 
M_all = as.matrix(data[, 7:16])
M_all

#  mean of each SNP(allele frequency)
snp_mean = colMeans(M_all) / 2
snp_mean

#  sum of 2pq(heterozygote ratio)
s2pq = 0
for (i in 1:10) {
  s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
}
s2pq

# alpha
alpha = sigma_e * s2pq / sigma_a
alpha

# observation
y = data[1:8, 6]
y

# design matrix of fixed effect(general mean)
X = matrix(rep(1, 8), 8, 1)
X

# design matrix for SNP effect
M = as.matrix(data[1:8, 7:16])
Z = sweep(M, 2, snp_mean * 2)
Z

# Weight : R from EDC
R = diag(data[1:8, 5])
R
Rinv = ginv(R)
Rinv

# Left Hand Side
LHS = rbind(
  cbind(t(X) %*% Rinv %*% X, t(X) %*% Rinv %*% Z), 
  cbind(t(Z) %*% Rinv %*% X, t(Z) %*% Rinv %*% Z + diag(10) * alpha))
LHS

# Right Hand Side
RHS = rbind(t(X) %*% Rinv %*% y, 
            t(Z) %*% Rinv %*% y)
RHS

# Solutions

# generalized inverse of LHS
gi_LHS = ginv(LHS)
gi_LHS

# solution
sol = gi_LHS %*% RHS
round(sol, 3)

# mean effect of solutions
mean_eff = sol[1]
round(mean_eff, 3)

# snp effect of solutions
snp_eff = sol[2:11]
round(snp_eff, 3)

# calculate DGV
# genotype of animals
# design matrix of animals
P2 = sweep(M_all, 2, snp_mean * 2)
round(P2, 3)

# DGV
dgv = P2 %*% snp_eff
round(dgv, 3)

 

RStudio에서 실행하는 화면

 

실행 결과

> # Mixed Linear Model for Computing SNP Effect, weighted analysis
> 
> # Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
> 
> # Raphael Mrode
> 
> # Example 11.2 p183
> 
> # MASS packages
> if (!("MASS" %in% installed.packages())) {
+   install.packages("MASS", dependencies = TRUE)
+ }
> library(MASS)
> 
> # set working directory 
> setwd(".") 
> 
> # print working directory 
> getwd()
[1] "D:/users/bhpark/2020/job/공부하기/07_Linear Models for the Prediction of Animal Breeding Values_3rd_Mrode/16_mixed linear model SNPBLUP for computing SNP effects_weighted"
> 
> # read data
> data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
> data
   animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
1      13    0   0    1 558     9.0    2    0    1    1    0    0    0    2    1     2
2      14    0   0    1 722    13.4    1    0    0    0    0    2    0    2    1     0
3      15   13   4    1 300    12.7    1    1    2    1    1    0    0    2    1     2
4      16   15   2    1  73    15.4    0    0    2    1    0    1    0    2    2     1
5      17   15   5    1  52     5.9    0    1    1    2    0    0    0    2    1     2
6      18   14   6    1  87     7.7    1    1    0    1    0    2    0    2    2     1
7      19   14   9    1  64    10.2    0    0    1    1    0    2    0    2    2     0
8      20   14   9    1 103     4.8    0    1    1    0    0    1    0    2    2     0
9      21    1   3    1  13     7.6    2    0    0    0    0    1    2    2    1     2
10     22   14   8    1 125     8.8    0    0    0    1    1    2    0    2    0     0
11     23   14  11    1  93     9.8    0    1    1    0    0    1    0    2    2     1
12     24   14  10    1  66     9.2    1    0    0    0    1    1    0    2    0     0
13     25   14   7    1  75    11.5    0    0    0    1    1    2    0    2    1     0
14     26   14  12    1  33    13.3    1    0    1    1    0    2    0    1    0     0
> 
> # variance component and ratio
> sigma_a = 35.241
> sigma_e = 245
> sigma_a
[1] 35.241
> sigma_e
[1] 245
> 
> # SNP 
> M_all = as.matrix(data[, 7:16])
> M_all
      snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
 [1,]    2    0    1    1    0    0    0    2    1     2
 [2,]    1    0    0    0    0    2    0    2    1     0
 [3,]    1    1    2    1    1    0    0    2    1     2
 [4,]    0    0    2    1    0    1    0    2    2     1
 [5,]    0    1    1    2    0    0    0    2    1     2
 [6,]    1    1    0    1    0    2    0    2    2     1
 [7,]    0    0    1    1    0    2    0    2    2     0
 [8,]    0    1    1    0    0    1    0    2    2     0
 [9,]    2    0    0    0    0    1    2    2    1     2
[10,]    0    0    0    1    1    2    0    2    0     0
[11,]    0    1    1    0    0    1    0    2    2     1
[12,]    1    0    0    0    1    1    0    2    0     0
[13,]    0    0    0    1    1    2    0    2    1     0
[14,]    1    0    1    1    0    2    0    1    0     0
> 
> #  mean of each SNP(allele frequency)
> snp_mean = colMeans(M_all) / 2
> snp_mean
      snp1       snp2       snp3       snp4       snp5       snp6       snp7       snp8       snp9      snp10 
0.32142857 0.17857143 0.35714286 0.35714286 0.14285714 0.60714286 0.07142857 0.96428571 0.57142857 0.39285714 
> 
> #  sum of 2pq(heterozygote ratio)
> s2pq = 0
> for (i in 1:10) {
+   s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
+ }
> s2pq
    snp1 
3.538265 
> 
> # alpha
> alpha = sigma_e * s2pq / sigma_a
> alpha
    snp1 
24.59848 
> 
> # observation
> y = data[1:8, 6]
> y
[1]  9.0 13.4 12.7 15.4  5.9  7.7 10.2  4.8
> 
> # design matrix of fixed effect(general mean)
> X = matrix(rep(1, 8), 8, 1)
> X
     [,1]
[1,]    1
[2,]    1
[3,]    1
[4,]    1
[5,]    1
[6,]    1
[7,]    1
[8,]    1
> 
> # design matrix for SNP effect
> M = as.matrix(data[1:8, 7:16])
> Z = sweep(M, 2, snp_mean * 2)
> Z
        snp1       snp2       snp3       snp4       snp5       snp6       snp7       snp8       snp9      snp10
1  1.3571429 -0.3571429  0.2857143  0.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571  1.2142857
2  0.3571429 -0.3571429 -0.7142857 -0.7142857 -0.2857143  0.7857143 -0.1428571 0.07142857 -0.1428571 -0.7857143
3  0.3571429  0.6428571  1.2857143  0.2857143  0.7142857 -1.2142857 -0.1428571 0.07142857 -0.1428571  1.2142857
4 -0.6428571 -0.3571429  1.2857143  0.2857143 -0.2857143 -0.2142857 -0.1428571 0.07142857  0.8571429  0.2142857
5 -0.6428571  0.6428571  0.2857143  1.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571  1.2142857
6  0.3571429  0.6428571 -0.7142857  0.2857143 -0.2857143  0.7857143 -0.1428571 0.07142857  0.8571429  0.2142857
7 -0.6428571 -0.3571429  0.2857143  0.2857143 -0.2857143  0.7857143 -0.1428571 0.07142857  0.8571429 -0.7857143
8 -0.6428571  0.6428571  0.2857143 -0.7142857 -0.2857143 -0.2142857 -0.1428571 0.07142857  0.8571429 -0.7857143
> 
> # Weight : R from EDC
> R = diag(data[1:8, 5])
> R
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,]  558    0    0    0    0    0    0    0
[2,]    0  722    0    0    0    0    0    0
[3,]    0    0  300    0    0    0    0    0
[4,]    0    0    0   73    0    0    0    0
[5,]    0    0    0    0   52    0    0    0
[6,]    0    0    0    0    0   87    0    0
[7,]    0    0    0    0    0    0   64    0
[8,]    0    0    0    0    0    0    0  103
> Rinv = ginv(R)
> Rinv
            [,1]        [,2]        [,3]       [,4]       [,5]       [,6]     [,7]        [,8]
[1,] 0.001792115 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000 0.000000 0.000000000
[2,] 0.000000000 0.001385042 0.000000000 0.00000000 0.00000000 0.00000000 0.000000 0.000000000
[3,] 0.000000000 0.000000000 0.003333333 0.00000000 0.00000000 0.00000000 0.000000 0.000000000
[4,] 0.000000000 0.000000000 0.000000000 0.01369863 0.00000000 0.00000000 0.000000 0.000000000
[5,] 0.000000000 0.000000000 0.000000000 0.00000000 0.01923077 0.00000000 0.000000 0.000000000
[6,] 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.01149425 0.000000 0.000000000
[7,] 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000 0.015625 0.000000000
[8,] 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000 0.000000 0.009708738
> 
> # Left Hand Side
> LHS = rbind(
+   cbind(t(X) %*% Rinv %*% X, t(X) %*% Rinv %*% Z), 
+   cbind(t(Z) %*% Rinv %*% X, t(Z) %*% Rinv %*% Z + diag(10) * alpha))
> LHS
                            snp1          snp2         snp3          snp4          snp5          snp6          snp7          snp8          snp9         snp10
       0.076267880 -0.0292324941  0.0165285648  0.025943492  0.0299278126 -1.845749e-02 -0.0121950399 -0.0108954114  0.0054477057  0.0396312095  1.398055e-02
snp1  -0.029232494 24.6279258974 -0.0028682259 -0.020567675 -0.0149681047  9.542617e-03  0.0095592165  0.0041760706 -0.0020880353 -0.0168110757  7.872053e-04
snp2   0.016528565 -0.0028682259 24.6207119740 -0.004921569  0.0113384107 -2.579590e-03 -0.0160931411 -0.0023612235  0.0011806118  0.0007965454  1.724097e-02
snp3   0.025943492 -0.0205676752 -0.0049215685 24.636989205  0.0111223220 -3.126712e-03 -0.0205867825 -0.0037062131  0.0018531066  0.0129343415  9.602677e-03
snp4   0.029927813 -0.0149681047  0.0113384107  0.011122322 24.6396792508 -7.598423e-03 -0.0258437486 -0.0042754018  0.0021377009  0.0004520377  3.606263e-02
snp5  -0.018457489  0.0095426174 -0.0025795900 -0.003126712 -0.0075984226  2.460613e+01 -0.0005633219  0.0026367842 -0.0013183921 -0.0117993932  5.317478e-05
snp6  -0.012195040  0.0095592165 -0.0160931411 -0.020586783 -0.0258437486 -5.633219e-04 24.6530639051  0.0017421486 -0.0008710743  0.0180342684 -4.347322e-02
snp7  -0.010895411  0.0041760706 -0.0023612235 -0.003706213 -0.0042754018  2.636784e-03  0.0017421486 24.6000355316 -0.0007782437 -0.0056616014 -1.997222e-03
snp8   0.005447706 -0.0020880353  0.0011806118  0.001853107  0.0021377009 -1.318392e-03 -0.0008710743 -0.0007782437 24.5988681661  0.0028308007  9.986111e-04
snp9   0.039631209 -0.0168110757  0.0007965454  0.012934342  0.0004520377 -1.179939e-02  0.0180342684 -0.0056616014  0.0028308007 24.6361259751 -1.650383e-02
snp10  0.013980555  0.0007872053  0.0172409722  0.009602677  0.0360626336  5.317478e-05 -0.0434732183 -0.0019972221  0.0009986111 -0.0165038270  2.465204e+01
> 
> # Right Hand Side
> RHS = rbind(t(X) %*% Rinv %*% y, 
+             t(Z) %*% Rinv %*% y)
> RHS
             [,1]
       0.69592505
snp1  -0.26572369
snp2   0.04235790
snp3   0.34506266
snp4   0.24713577
snp5  -0.15650240
snp6  -0.05461040
snp7  -0.09941786
snp8   0.04970893
snp9   0.40602373
snp10  0.09651420
> 
> # Solutions
> 
> # generalized inverse of LHS
> gi_LHS = ginv(LHS)
> gi_LHS
              [,1]          [,2]          [,3]          [,4]          [,5]          [,6]          [,7]          [,8]          [,9]         [,10]         [,11]
 [1,] 13.147293052  1.556141e-02 -8.807918e-03 -1.380434e-02 -1.592688e-02  9.837634e-03  6.465596e-03  5.807560e-03 -2.903780e-03 -2.113444e-02 -7.423874e-03
 [2,]  0.015561405  4.062281e-02 -5.712222e-06  1.751802e-05  5.776172e-06 -4.076705e-06 -8.056763e-06  8.744768e-18  3.433872e-18  2.674969e-06 -1.014599e-05
 [3,] -0.008807918 -5.712222e-06  4.062216e-02  1.740921e-05 -7.949681e-06 -2.339667e-06  2.211834e-05 -8.721898e-18  5.350707e-18  1.280970e-05 -2.336232e-05
 [4,] -0.013804342  1.751802e-05  1.740921e-05  4.060396e-02 -1.520532e-06 -5.199698e-06  2.706639e-05  4.538403e-18 -2.510182e-18  8.726196e-07 -7.949932e-06
 [5,] -0.015926881  5.776172e-06 -7.949681e-06 -1.520532e-06  4.060441e-02  6.012267e-07  3.457150e-05  1.866691e-17 -2.210756e-19  2.482223e-05 -5.028287e-05
 [6,]  0.009837634 -4.076705e-06 -2.339667e-06 -5.199698e-06  6.012267e-07  4.064766e-02  5.778862e-06  1.549308e-17 -7.883759e-18  3.634628e-06 -5.651215e-06
 [7,]  0.006465596 -8.056763e-06  2.211834e-05  2.706639e-05  3.457150e-05  5.778862e-06  4.056633e-02 -5.221111e-18 -1.382570e-18 -4.006944e-05  6.776774e-05
 [8,]  0.005807560  2.030423e-17 -1.004242e-17  8.459318e-18  1.655272e-17 -1.081492e-17 -8.314475e-18  4.065292e-02  2.593318e-17 -1.023724e-17 -6.425168e-18
 [9,] -0.002903780 -6.247715e-18  3.719745e-18  3.142069e-18 -1.326496e-17 -8.469482e-18  9.578672e-18  9.534626e-18  4.065292e-02 -2.255649e-18  5.814246e-18
[10,] -0.021134439  2.674969e-06  1.280970e-05  8.726196e-07  2.482223e-05  3.634628e-06 -4.006944e-05  5.082198e-20  2.004080e-18  4.062485e-02  3.906646e-05
[11,] -0.007423874 -1.014599e-05 -2.336232e-05 -7.949932e-06 -5.028287e-05 -5.651215e-06  6.776774e-05 -1.295537e-18 -7.271990e-18  3.906646e-05  4.056904e-02
> 
> # solution
> sol = gi_LHS %*% RHS
> round(sol, 3)
        [,1]
 [1,]  9.124
 [2,]  0.000
 [3,] -0.004
 [4,]  0.004
 [5,] -0.001
 [6,]  0.000
 [7,]  0.002
 [8,]  0.000
 [9,]  0.000
[10,]  0.002
[11,] -0.001
> 
> # mean effect of solutions
> mean_eff = sol[1]
> round(mean_eff, 3)
[1] 9.124
> 
> # snp effect of solutions
> snp_eff = sol[2:11]
> round(snp_eff, 3)
 [1]  0.000 -0.004  0.004 -0.001  0.000  0.002  0.000  0.000  0.002 -0.001
> 
> # calculate DGV
> # genotype of animals
> # design matrix of animals
> P2 = sweep(M_all, 2, snp_mean * 2)
> round(P2, 3)
        snp1   snp2   snp3   snp4   snp5   snp6   snp7   snp8   snp9  snp10
 [1,]  1.357 -0.357  0.286  0.286 -0.286 -1.214 -0.143  0.071 -0.143  1.214
 [2,]  0.357 -0.357 -0.714 -0.714 -0.286  0.786 -0.143  0.071 -0.143 -0.786
 [3,]  0.357  0.643  1.286  0.286  0.714 -1.214 -0.143  0.071 -0.143  1.214
 [4,] -0.643 -0.357  1.286  0.286 -0.286 -0.214 -0.143  0.071  0.857  0.214
 [5,] -0.643  0.643  0.286  1.286 -0.286 -1.214 -0.143  0.071 -0.143  1.214
 [6,]  0.357  0.643 -0.714  0.286 -0.286  0.786 -0.143  0.071  0.857  0.214
 [7,] -0.643 -0.357  0.286  0.286 -0.286  0.786 -0.143  0.071  0.857 -0.786
 [8,] -0.643  0.643  0.286 -0.714 -0.286 -0.214 -0.143  0.071  0.857 -0.786
 [9,]  1.357 -0.357 -0.714 -0.714 -0.286 -0.214  1.857  0.071 -0.143  1.214
[10,] -0.643 -0.357 -0.714  0.286  0.714  0.786 -0.143  0.071 -1.143 -0.786
[11,] -0.643  0.643  0.286 -0.714 -0.286 -0.214 -0.143  0.071  0.857  0.214
[12,]  0.357 -0.357 -0.714 -0.714  0.714 -0.214 -0.143  0.071 -1.143 -0.786
[13,] -0.643 -0.357 -0.714  0.286  0.714  0.786 -0.143  0.071 -0.143 -0.786
[14,]  0.357 -0.357  0.286  0.286 -0.286  0.786 -0.143 -0.929 -1.143 -0.786
> 
> # DGV
> dgv = P2 %*% snp_eff
> round(dgv, 3)
        [,1]
 [1,] -0.002
 [2,]  0.002
 [3,] -0.002
 [4,]  0.008
 [5,] -0.008
 [6,] -0.003
 [7,]  0.007
 [8,]  0.001
 [9,] -0.003
[10,] -0.001
[11,]  0.000
[12,] -0.002
[13,]  0.001
[14,]  0.003
> 

 

책의 값과 다른 결과 값이 나온다. EDC의 역수를 가중치로 주어야 하는데 EDC 값을 그대로 주어서 나온 것으로 보인다. EDC의 역수를 가중치로 주자.

 

+ Recent posts