# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
# Raphael Mrode
# Example 11.2 p183
간단한 설명은 다음 포스팅을 참고한다.
Data
animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
13 0 0 1 558 9 2 0 1 1 0 0 0 2 1 2
14 0 0 1 722 13.4 1 0 0 0 0 2 0 2 1 0
15 13 4 1 300 12.7 1 1 2 1 1 0 0 2 1 2
16 15 2 1 73 15.4 0 0 2 1 0 1 0 2 2 1
17 15 5 1 52 5.9 0 1 1 2 0 0 0 2 1 2
18 14 6 1 87 7.7 1 1 0 1 0 2 0 2 2 1
19 14 9 1 64 10.2 0 0 1 1 0 2 0 2 2 0
20 14 9 1 103 4.8 0 1 1 0 0 1 0 2 2 0
21 1 3 1 13 7.6 2 0 0 0 0 1 2 2 1 2
22 14 8 1 125 8.8 0 0 0 1 1 2 0 2 0 0
23 14 11 1 93 9.8 0 1 1 0 0 1 0 2 2 1
24 14 10 1 66 9.2 1 0 0 0 1 1 0 2 0 0
25 14 7 1 75 11.5 0 0 0 1 1 2 0 2 1 0
26 14 12 1 33 13.3 1 0 1 1 0 2 0 1 0 0
프로그램
# Mixed Linear Model for Computing SNP Effect, weighted analysis
# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
# Raphael Mrode
# Example 11.2 p183
# MASS packages
if (!("MASS" %in% installed.packages())) {
install.packages("MASS", dependencies = TRUE)
}
library(MASS)
# set working directory
setwd(".")
# print working directory
getwd()
# read data
data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
data
# variance component and ratio
sigma_a = 35.241
sigma_e = 245
sigma_a
sigma_e
# SNP
M_all = as.matrix(data[, 7:16])
M_all
# mean of each SNP(allele frequency)
snp_mean = colMeans(M_all) / 2
snp_mean
# sum of 2pq(heterozygote ratio)
s2pq = 0
for (i in 1:10) {
s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
}
s2pq
# alpha
alpha = sigma_e * s2pq / sigma_a
alpha
# observation
y = data[1:8, 6]
y
# design matrix of fixed effect(general mean)
X = matrix(rep(1, 8), 8, 1)
X
# design matrix for SNP effect
M = as.matrix(data[1:8, 7:16])
Z = sweep(M, 2, snp_mean * 2)
Z
# Weight : R from EDC
R = diag(data[1:8, 5])
R
Rinv = ginv(R)
Rinv
# Left Hand Side
LHS = rbind(
cbind(t(X) %*% Rinv %*% X, t(X) %*% Rinv %*% Z),
cbind(t(Z) %*% Rinv %*% X, t(Z) %*% Rinv %*% Z + diag(10) * alpha))
LHS
# Right Hand Side
RHS = rbind(t(X) %*% Rinv %*% y,
t(Z) %*% Rinv %*% y)
RHS
# Solutions
# generalized inverse of LHS
gi_LHS = ginv(LHS)
gi_LHS
# solution
sol = gi_LHS %*% RHS
round(sol, 3)
# mean effect of solutions
mean_eff = sol[1]
round(mean_eff, 3)
# snp effect of solutions
snp_eff = sol[2:11]
round(snp_eff, 3)
# calculate DGV
# genotype of animals
# design matrix of animals
P2 = sweep(M_all, 2, snp_mean * 2)
round(P2, 3)
# DGV
dgv = P2 %*% snp_eff
round(dgv, 3)
RStudio에서 실행하는 화면
실행 결과
> # Mixed Linear Model for Computing SNP Effect, weighted analysis
>
> # Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
>
> # Raphael Mrode
>
> # Example 11.2 p183
>
> # MASS packages
> if (!("MASS" %in% installed.packages())) {
+ install.packages("MASS", dependencies = TRUE)
+ }
> library(MASS)
>
> # set working directory
> setwd(".")
>
> # print working directory
> getwd()
[1] "D:/users/bhpark/2020/job/공부하기/07_Linear Models for the Prediction of Animal Breeding Values_3rd_Mrode/16_mixed linear model SNPBLUP for computing SNP effects_weighted"
>
> # read data
> data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
> data
animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
1 13 0 0 1 558 9.0 2 0 1 1 0 0 0 2 1 2
2 14 0 0 1 722 13.4 1 0 0 0 0 2 0 2 1 0
3 15 13 4 1 300 12.7 1 1 2 1 1 0 0 2 1 2
4 16 15 2 1 73 15.4 0 0 2 1 0 1 0 2 2 1
5 17 15 5 1 52 5.9 0 1 1 2 0 0 0 2 1 2
6 18 14 6 1 87 7.7 1 1 0 1 0 2 0 2 2 1
7 19 14 9 1 64 10.2 0 0 1 1 0 2 0 2 2 0
8 20 14 9 1 103 4.8 0 1 1 0 0 1 0 2 2 0
9 21 1 3 1 13 7.6 2 0 0 0 0 1 2 2 1 2
10 22 14 8 1 125 8.8 0 0 0 1 1 2 0 2 0 0
11 23 14 11 1 93 9.8 0 1 1 0 0 1 0 2 2 1
12 24 14 10 1 66 9.2 1 0 0 0 1 1 0 2 0 0
13 25 14 7 1 75 11.5 0 0 0 1 1 2 0 2 1 0
14 26 14 12 1 33 13.3 1 0 1 1 0 2 0 1 0 0
>
> # variance component and ratio
> sigma_a = 35.241
> sigma_e = 245
> sigma_a
[1] 35.241
> sigma_e
[1] 245
>
> # SNP
> M_all = as.matrix(data[, 7:16])
> M_all
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
[1,] 2 0 1 1 0 0 0 2 1 2
[2,] 1 0 0 0 0 2 0 2 1 0
[3,] 1 1 2 1 1 0 0 2 1 2
[4,] 0 0 2 1 0 1 0 2 2 1
[5,] 0 1 1 2 0 0 0 2 1 2
[6,] 1 1 0 1 0 2 0 2 2 1
[7,] 0 0 1 1 0 2 0 2 2 0
[8,] 0 1 1 0 0 1 0 2 2 0
[9,] 2 0 0 0 0 1 2 2 1 2
[10,] 0 0 0 1 1 2 0 2 0 0
[11,] 0 1 1 0 0 1 0 2 2 1
[12,] 1 0 0 0 1 1 0 2 0 0
[13,] 0 0 0 1 1 2 0 2 1 0
[14,] 1 0 1 1 0 2 0 1 0 0
>
> # mean of each SNP(allele frequency)
> snp_mean = colMeans(M_all) / 2
> snp_mean
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
0.32142857 0.17857143 0.35714286 0.35714286 0.14285714 0.60714286 0.07142857 0.96428571 0.57142857 0.39285714
>
> # sum of 2pq(heterozygote ratio)
> s2pq = 0
> for (i in 1:10) {
+ s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
+ }
> s2pq
snp1
3.538265
>
> # alpha
> alpha = sigma_e * s2pq / sigma_a
> alpha
snp1
24.59848
>
> # observation
> y = data[1:8, 6]
> y
[1] 9.0 13.4 12.7 15.4 5.9 7.7 10.2 4.8
>
> # design matrix of fixed effect(general mean)
> X = matrix(rep(1, 8), 8, 1)
> X
[,1]
[1,] 1
[2,] 1
[3,] 1
[4,] 1
[5,] 1
[6,] 1
[7,] 1
[8,] 1
>
> # design matrix for SNP effect
> M = as.matrix(data[1:8, 7:16])
> Z = sweep(M, 2, snp_mean * 2)
> Z
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
1 1.3571429 -0.3571429 0.2857143 0.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571 1.2142857
2 0.3571429 -0.3571429 -0.7142857 -0.7142857 -0.2857143 0.7857143 -0.1428571 0.07142857 -0.1428571 -0.7857143
3 0.3571429 0.6428571 1.2857143 0.2857143 0.7142857 -1.2142857 -0.1428571 0.07142857 -0.1428571 1.2142857
4 -0.6428571 -0.3571429 1.2857143 0.2857143 -0.2857143 -0.2142857 -0.1428571 0.07142857 0.8571429 0.2142857
5 -0.6428571 0.6428571 0.2857143 1.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571 1.2142857
6 0.3571429 0.6428571 -0.7142857 0.2857143 -0.2857143 0.7857143 -0.1428571 0.07142857 0.8571429 0.2142857
7 -0.6428571 -0.3571429 0.2857143 0.2857143 -0.2857143 0.7857143 -0.1428571 0.07142857 0.8571429 -0.7857143
8 -0.6428571 0.6428571 0.2857143 -0.7142857 -0.2857143 -0.2142857 -0.1428571 0.07142857 0.8571429 -0.7857143
>
> # Weight : R from EDC
> R = diag(data[1:8, 5])
> R
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 558 0 0 0 0 0 0 0
[2,] 0 722 0 0 0 0 0 0
[3,] 0 0 300 0 0 0 0 0
[4,] 0 0 0 73 0 0 0 0
[5,] 0 0 0 0 52 0 0 0
[6,] 0 0 0 0 0 87 0 0
[7,] 0 0 0 0 0 0 64 0
[8,] 0 0 0 0 0 0 0 103
> Rinv = ginv(R)
> Rinv
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 0.001792115 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000 0.000000 0.000000000
[2,] 0.000000000 0.001385042 0.000000000 0.00000000 0.00000000 0.00000000 0.000000 0.000000000
[3,] 0.000000000 0.000000000 0.003333333 0.00000000 0.00000000 0.00000000 0.000000 0.000000000
[4,] 0.000000000 0.000000000 0.000000000 0.01369863 0.00000000 0.00000000 0.000000 0.000000000
[5,] 0.000000000 0.000000000 0.000000000 0.00000000 0.01923077 0.00000000 0.000000 0.000000000
[6,] 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.01149425 0.000000 0.000000000
[7,] 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000 0.015625 0.000000000
[8,] 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000 0.000000 0.009708738
>
> # Left Hand Side
> LHS = rbind(
+ cbind(t(X) %*% Rinv %*% X, t(X) %*% Rinv %*% Z),
+ cbind(t(Z) %*% Rinv %*% X, t(Z) %*% Rinv %*% Z + diag(10) * alpha))
> LHS
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
0.076267880 -0.0292324941 0.0165285648 0.025943492 0.0299278126 -1.845749e-02 -0.0121950399 -0.0108954114 0.0054477057 0.0396312095 1.398055e-02
snp1 -0.029232494 24.6279258974 -0.0028682259 -0.020567675 -0.0149681047 9.542617e-03 0.0095592165 0.0041760706 -0.0020880353 -0.0168110757 7.872053e-04
snp2 0.016528565 -0.0028682259 24.6207119740 -0.004921569 0.0113384107 -2.579590e-03 -0.0160931411 -0.0023612235 0.0011806118 0.0007965454 1.724097e-02
snp3 0.025943492 -0.0205676752 -0.0049215685 24.636989205 0.0111223220 -3.126712e-03 -0.0205867825 -0.0037062131 0.0018531066 0.0129343415 9.602677e-03
snp4 0.029927813 -0.0149681047 0.0113384107 0.011122322 24.6396792508 -7.598423e-03 -0.0258437486 -0.0042754018 0.0021377009 0.0004520377 3.606263e-02
snp5 -0.018457489 0.0095426174 -0.0025795900 -0.003126712 -0.0075984226 2.460613e+01 -0.0005633219 0.0026367842 -0.0013183921 -0.0117993932 5.317478e-05
snp6 -0.012195040 0.0095592165 -0.0160931411 -0.020586783 -0.0258437486 -5.633219e-04 24.6530639051 0.0017421486 -0.0008710743 0.0180342684 -4.347322e-02
snp7 -0.010895411 0.0041760706 -0.0023612235 -0.003706213 -0.0042754018 2.636784e-03 0.0017421486 24.6000355316 -0.0007782437 -0.0056616014 -1.997222e-03
snp8 0.005447706 -0.0020880353 0.0011806118 0.001853107 0.0021377009 -1.318392e-03 -0.0008710743 -0.0007782437 24.5988681661 0.0028308007 9.986111e-04
snp9 0.039631209 -0.0168110757 0.0007965454 0.012934342 0.0004520377 -1.179939e-02 0.0180342684 -0.0056616014 0.0028308007 24.6361259751 -1.650383e-02
snp10 0.013980555 0.0007872053 0.0172409722 0.009602677 0.0360626336 5.317478e-05 -0.0434732183 -0.0019972221 0.0009986111 -0.0165038270 2.465204e+01
>
> # Right Hand Side
> RHS = rbind(t(X) %*% Rinv %*% y,
+ t(Z) %*% Rinv %*% y)
> RHS
[,1]
0.69592505
snp1 -0.26572369
snp2 0.04235790
snp3 0.34506266
snp4 0.24713577
snp5 -0.15650240
snp6 -0.05461040
snp7 -0.09941786
snp8 0.04970893
snp9 0.40602373
snp10 0.09651420
>
> # Solutions
>
> # generalized inverse of LHS
> gi_LHS = ginv(LHS)
> gi_LHS
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
[1,] 13.147293052 1.556141e-02 -8.807918e-03 -1.380434e-02 -1.592688e-02 9.837634e-03 6.465596e-03 5.807560e-03 -2.903780e-03 -2.113444e-02 -7.423874e-03
[2,] 0.015561405 4.062281e-02 -5.712222e-06 1.751802e-05 5.776172e-06 -4.076705e-06 -8.056763e-06 8.744768e-18 3.433872e-18 2.674969e-06 -1.014599e-05
[3,] -0.008807918 -5.712222e-06 4.062216e-02 1.740921e-05 -7.949681e-06 -2.339667e-06 2.211834e-05 -8.721898e-18 5.350707e-18 1.280970e-05 -2.336232e-05
[4,] -0.013804342 1.751802e-05 1.740921e-05 4.060396e-02 -1.520532e-06 -5.199698e-06 2.706639e-05 4.538403e-18 -2.510182e-18 8.726196e-07 -7.949932e-06
[5,] -0.015926881 5.776172e-06 -7.949681e-06 -1.520532e-06 4.060441e-02 6.012267e-07 3.457150e-05 1.866691e-17 -2.210756e-19 2.482223e-05 -5.028287e-05
[6,] 0.009837634 -4.076705e-06 -2.339667e-06 -5.199698e-06 6.012267e-07 4.064766e-02 5.778862e-06 1.549308e-17 -7.883759e-18 3.634628e-06 -5.651215e-06
[7,] 0.006465596 -8.056763e-06 2.211834e-05 2.706639e-05 3.457150e-05 5.778862e-06 4.056633e-02 -5.221111e-18 -1.382570e-18 -4.006944e-05 6.776774e-05
[8,] 0.005807560 2.030423e-17 -1.004242e-17 8.459318e-18 1.655272e-17 -1.081492e-17 -8.314475e-18 4.065292e-02 2.593318e-17 -1.023724e-17 -6.425168e-18
[9,] -0.002903780 -6.247715e-18 3.719745e-18 3.142069e-18 -1.326496e-17 -8.469482e-18 9.578672e-18 9.534626e-18 4.065292e-02 -2.255649e-18 5.814246e-18
[10,] -0.021134439 2.674969e-06 1.280970e-05 8.726196e-07 2.482223e-05 3.634628e-06 -4.006944e-05 5.082198e-20 2.004080e-18 4.062485e-02 3.906646e-05
[11,] -0.007423874 -1.014599e-05 -2.336232e-05 -7.949932e-06 -5.028287e-05 -5.651215e-06 6.776774e-05 -1.295537e-18 -7.271990e-18 3.906646e-05 4.056904e-02
>
> # solution
> sol = gi_LHS %*% RHS
> round(sol, 3)
[,1]
[1,] 9.124
[2,] 0.000
[3,] -0.004
[4,] 0.004
[5,] -0.001
[6,] 0.000
[7,] 0.002
[8,] 0.000
[9,] 0.000
[10,] 0.002
[11,] -0.001
>
> # mean effect of solutions
> mean_eff = sol[1]
> round(mean_eff, 3)
[1] 9.124
>
> # snp effect of solutions
> snp_eff = sol[2:11]
> round(snp_eff, 3)
[1] 0.000 -0.004 0.004 -0.001 0.000 0.002 0.000 0.000 0.002 -0.001
>
> # calculate DGV
> # genotype of animals
> # design matrix of animals
> P2 = sweep(M_all, 2, snp_mean * 2)
> round(P2, 3)
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
[1,] 1.357 -0.357 0.286 0.286 -0.286 -1.214 -0.143 0.071 -0.143 1.214
[2,] 0.357 -0.357 -0.714 -0.714 -0.286 0.786 -0.143 0.071 -0.143 -0.786
[3,] 0.357 0.643 1.286 0.286 0.714 -1.214 -0.143 0.071 -0.143 1.214
[4,] -0.643 -0.357 1.286 0.286 -0.286 -0.214 -0.143 0.071 0.857 0.214
[5,] -0.643 0.643 0.286 1.286 -0.286 -1.214 -0.143 0.071 -0.143 1.214
[6,] 0.357 0.643 -0.714 0.286 -0.286 0.786 -0.143 0.071 0.857 0.214
[7,] -0.643 -0.357 0.286 0.286 -0.286 0.786 -0.143 0.071 0.857 -0.786
[8,] -0.643 0.643 0.286 -0.714 -0.286 -0.214 -0.143 0.071 0.857 -0.786
[9,] 1.357 -0.357 -0.714 -0.714 -0.286 -0.214 1.857 0.071 -0.143 1.214
[10,] -0.643 -0.357 -0.714 0.286 0.714 0.786 -0.143 0.071 -1.143 -0.786
[11,] -0.643 0.643 0.286 -0.714 -0.286 -0.214 -0.143 0.071 0.857 0.214
[12,] 0.357 -0.357 -0.714 -0.714 0.714 -0.214 -0.143 0.071 -1.143 -0.786
[13,] -0.643 -0.357 -0.714 0.286 0.714 0.786 -0.143 0.071 -0.143 -0.786
[14,] 0.357 -0.357 0.286 0.286 -0.286 0.786 -0.143 -0.929 -1.143 -0.786
>
> # DGV
> dgv = P2 %*% snp_eff
> round(dgv, 3)
[,1]
[1,] -0.002
[2,] 0.002
[3,] -0.002
[4,] 0.008
[5,] -0.008
[6,] -0.003
[7,] 0.007
[8,] 0.001
[9,] -0.003
[10,] -0.001
[11,] 0.000
[12,] -0.002
[13,] 0.001
[14,] 0.003
>
책의 값과 다른 결과 값이 나온다. EDC의 역수를 가중치로 주어야 하는데 EDC 값을 그대로 주어서 나온 것으로 보인다. EDC의 역수를 가중치로 주자.
'Animal Breeding > R for Genetic Evaluation' 카테고리의 다른 글
SNP-BLUP Model with Polygenic Effects(unweighted analysis) (0) | 2020.12.24 |
---|---|
Mixed Linear Model(GBLUP model) for Computing DGV (0) | 2020.12.21 |
Mixed Linear Model(SNP-BLUP Model) for Computing SNP Effects, unweighted analysis (0) | 2020.12.19 |
Fixed Effect Model for SNP Effects, weighted analysis (0) | 2020.12.16 |
Fixed Effect Model for SNP Effects, unweighted analysis (0) | 2020.12.16 |