# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
# Raphael Mrode
# Example 11.2 p183
SNP effect를 임의 효과로 다루어 SNP effect를 추정한다. 각 SNP effect의 분산은 1) 상가적 유전 분산을 SNP의 수로 나누어 사용할 수도 있고 2) 상가적 유전 분산을 각 SNP의 2p(1-p)를 합한 값(s2pq)으로 나누어 사용하기도 한다. 두번째 경우가 대립 유전자 빈도의 차이를 고려한다는 점에서 많이 쓰이고 있다. 두번째를 사용할 경우 alpha = sigma_e * s2pq / sigma_a 로 계산하여 사용한다. 여기서는 혈통이 필요없다.
추정한 SNP effect를 이용하여 SNP genotype을 가지고 있는 개체의 DGV를 계산한다.
Data
animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
13 0 0 1 558 9 2 0 1 1 0 0 0 2 1 2
14 0 0 1 722 13.4 1 0 0 0 0 2 0 2 1 0
15 13 4 1 300 12.7 1 1 2 1 1 0 0 2 1 2
16 15 2 1 73 15.4 0 0 2 1 0 1 0 2 2 1
17 15 5 1 52 5.9 0 1 1 2 0 0 0 2 1 2
18 14 6 1 87 7.7 1 1 0 1 0 2 0 2 2 1
19 14 9 1 64 10.2 0 0 1 1 0 2 0 2 2 0
20 14 9 1 103 4.8 0 1 1 0 0 1 0 2 2 0
21 1 3 1 13 7.6 2 0 0 0 0 1 2 2 1 2
22 14 8 1 125 8.8 0 0 0 1 1 2 0 2 0 0
23 14 11 1 93 9.8 0 1 1 0 0 1 0 2 2 1
24 14 10 1 66 9.2 1 0 0 0 1 1 0 2 0 0
25 14 7 1 75 11.5 0 0 0 1 1 2 0 2 1 0
26 14 12 1 33 13.3 1 0 1 1 0 2 0 1 0 0
Program
# Mixed Linear Model(SNP-BLUP Model) for Computing SNP Effect, unweighted analysis
# Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
# Raphael Mrode
# Example 11.2 p183
# MASS packages
if (!("MASS" %in% installed.packages())) {
install.packages("MASS", dependencies = TRUE)
}
library(MASS)
# set working directory
setwd(".")
# print working directory
getwd()
# read data
data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
data
# variance component and ratio
sigma_a = 35.241
sigma_e = 245
sigma_a
sigma_e
# SNP
M_all = as.matrix(data[, 7:16])
M_all
# mean of each SNP(allele frequency)
snp_mean = colMeans(M_all) / 2
snp_mean
# sum of 2pq(heterozygote ratio)
s2pq = 0
for (i in 1:10) {
s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
}
s2pq
# alpha
alpha = sigma_e * s2pq / sigma_a
alpha
# observation
y = data[1:8, 6]
y
# design matrix of fixed effect(general mean)
X = matrix(rep(1, 8), 8, 1)
X
# design matrix for SNP effect
M = as.matrix(data[1:8, 7:16])
Z = sweep(M, 2, snp_mean * 2)
Z
# Left Hand Side
LHS = rbind(
cbind(t(X) %*% X, t(X) %*% Z),
cbind(t(Z) %*% X, t(Z) %*% Z + diag(10) * alpha))
LHS
# Right Hand Side
RHS = rbind(t(X) %*% y,
t(Z) %*% y)
RHS
# Solutions
# generalized inverse of LHS
gi_LHS = ginv(LHS)
gi_LHS
# solution
sol = gi_LHS %*% RHS
round(sol, 3)
# mean effect of solutions
mean_eff = sol[1]
round(mean_eff, 3)
# snp effect of solutions
snp_eff = sol[2:11]
round(snp_eff, 3)
# calculate DGV
# genotype of animals
# design matrix of animals
P2 = sweep(M_all, 2, snp_mean * 2)
round(P2, 3)
# DGV
dgv = P2 %*% snp_eff
round(dgv, 3)
RStudio에서 실행하는 화면
실행 결과
> # Mixed Linear Model(SNP-BLUP Model) for Computing SNP Effect, unweighted analysis
>
> # Linear Models for the Prediction of Animal Breeding Values, 3rd Edition.
>
> # Raphael Mrode
>
> # Example 11.2 p183
>
> # MASS packages
> if (!("MASS" %in% installed.packages())) {
+ install.packages("MASS", dependencies = TRUE)
+ }
> library(MASS)
>
> # set working directory
> setwd(".")
>
> # print working directory
> getwd()
[1] "D:/users/bhpark/2020/job/공부하기/07_Linear Models for the Prediction of Animal Breeding Values_3rd_Mrode/15_mixed linear model SNPBLUP for computing SNP effects_unweighted"
>
> # read data
> data = read.table("snp_data.txt", header = TRUE, sep = " ", stringsAsFactors = FALSE)
> data
animal sire dam mean edc fat_dyd snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
1 13 0 0 1 558 9.0 2 0 1 1 0 0 0 2 1 2
2 14 0 0 1 722 13.4 1 0 0 0 0 2 0 2 1 0
3 15 13 4 1 300 12.7 1 1 2 1 1 0 0 2 1 2
4 16 15 2 1 73 15.4 0 0 2 1 0 1 0 2 2 1
5 17 15 5 1 52 5.9 0 1 1 2 0 0 0 2 1 2
6 18 14 6 1 87 7.7 1 1 0 1 0 2 0 2 2 1
7 19 14 9 1 64 10.2 0 0 1 1 0 2 0 2 2 0
8 20 14 9 1 103 4.8 0 1 1 0 0 1 0 2 2 0
9 21 1 3 1 13 7.6 2 0 0 0 0 1 2 2 1 2
10 22 14 8 1 125 8.8 0 0 0 1 1 2 0 2 0 0
11 23 14 11 1 93 9.8 0 1 1 0 0 1 0 2 2 1
12 24 14 10 1 66 9.2 1 0 0 0 1 1 0 2 0 0
13 25 14 7 1 75 11.5 0 0 0 1 1 2 0 2 1 0
14 26 14 12 1 33 13.3 1 0 1 1 0 2 0 1 0 0
>
> # variance component and ratio
> sigma_a = 35.241
> sigma_e = 245
> sigma_a
[1] 35.241
> sigma_e
[1] 245
>
> # SNP
> M_all = as.matrix(data[, 7:16])
> M_all
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
[1,] 2 0 1 1 0 0 0 2 1 2
[2,] 1 0 0 0 0 2 0 2 1 0
[3,] 1 1 2 1 1 0 0 2 1 2
[4,] 0 0 2 1 0 1 0 2 2 1
[5,] 0 1 1 2 0 0 0 2 1 2
[6,] 1 1 0 1 0 2 0 2 2 1
[7,] 0 0 1 1 0 2 0 2 2 0
[8,] 0 1 1 0 0 1 0 2 2 0
[9,] 2 0 0 0 0 1 2 2 1 2
[10,] 0 0 0 1 1 2 0 2 0 0
[11,] 0 1 1 0 0 1 0 2 2 1
[12,] 1 0 0 0 1 1 0 2 0 0
[13,] 0 0 0 1 1 2 0 2 1 0
[14,] 1 0 1 1 0 2 0 1 0 0
>
> # mean of each SNP(allele frequency)
> snp_mean = colMeans(M_all) / 2
> snp_mean
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
0.32142857 0.17857143 0.35714286 0.35714286 0.14285714 0.60714286 0.07142857 0.96428571 0.57142857 0.39285714
>
> # sum of 2pq(heterozygote ratio)
> s2pq = 0
> for (i in 1:10) {
+ s2pq = s2pq + 2 * snp_mean[i] * (1 - snp_mean[i])
+ }
> s2pq
snp1
3.538265
>
> # alpha
> alpha = sigma_e * s2pq / sigma_a
> alpha
snp1
24.59848
>
> # observation
> y = data[1:8, 6]
> y
[1] 9.0 13.4 12.7 15.4 5.9 7.7 10.2 4.8
>
> # design matrix of fixed effect(general mean)
> X = matrix(rep(1, 8), 8, 1)
> X
[,1]
[1,] 1
[2,] 1
[3,] 1
[4,] 1
[5,] 1
[6,] 1
[7,] 1
[8,] 1
>
> # design matrix for SNP effect
> M = as.matrix(data[1:8, 7:16])
> Z = sweep(M, 2, snp_mean * 2)
> Z
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
1 1.3571429 -0.3571429 0.2857143 0.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571 1.2142857
2 0.3571429 -0.3571429 -0.7142857 -0.7142857 -0.2857143 0.7857143 -0.1428571 0.07142857 -0.1428571 -0.7857143
3 0.3571429 0.6428571 1.2857143 0.2857143 0.7142857 -1.2142857 -0.1428571 0.07142857 -0.1428571 1.2142857
4 -0.6428571 -0.3571429 1.2857143 0.2857143 -0.2857143 -0.2142857 -0.1428571 0.07142857 0.8571429 0.2142857
5 -0.6428571 0.6428571 0.2857143 1.2857143 -0.2857143 -1.2142857 -0.1428571 0.07142857 -0.1428571 1.2142857
6 0.3571429 0.6428571 -0.7142857 0.2857143 -0.2857143 0.7857143 -0.1428571 0.07142857 0.8571429 0.2142857
7 -0.6428571 -0.3571429 0.2857143 0.2857143 -0.2857143 0.7857143 -0.1428571 0.07142857 0.8571429 -0.7857143
8 -0.6428571 0.6428571 0.2857143 -0.7142857 -0.2857143 -0.2142857 -0.1428571 0.07142857 0.8571429 -0.7857143
>
> # Left Hand Side
> LHS = rbind(
+ cbind(t(X) %*% X, t(X) %*% Z),
+ cbind(t(Z) %*% X, t(Z) %*% Z + diag(10) * alpha))
> LHS
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
8.0000000 -0.14285714 1.14285714 2.2857143 1.28571429 -1.28571429 -1.7142857 -1.14285714 0.57142857 2.85714286 1.7142857
snp1 -0.1428571 28.47603006 -0.52040816 -1.0408163 -0.39795918 0.39795918 -0.9693878 0.02040816 -0.01020408 -1.55102041 1.9693878
snp2 1.1428571 -0.52040816 26.76174435 0.3265306 0.68367347 0.31632653 -1.2448980 -0.16326531 0.08163265 0.40816327 1.2448980
snp3 2.2857143 -1.04081633 0.32653061 29.2515403 1.36734694 0.63265306 -3.4897959 -0.32653061 0.16326531 0.81632653 2.4897959
snp4 1.2857143 -0.39795918 0.68367347 1.3673469 27.68011170 -0.08163265 -2.2755102 -0.18367347 0.09183673 -0.04081633 3.2755102
snp5 -1.2857143 0.39795918 0.31632653 0.6326531 -0.08163265 25.68011170 -0.7244898 0.18367347 -0.09183673 -0.95918367 0.7244898
snp6 -1.7142857 -0.96938776 -1.24489796 -3.4897959 -2.27551020 -0.72448980 30.9658260 0.24489796 -0.12244898 1.38775510 -5.3673469
snp7 -1.1428571 0.02040816 -0.16326531 -0.3265306 -0.18367347 0.18367347 0.2448980 24.76174435 -0.08163265 -0.40816327 -0.2448980
snp8 0.5714286 -0.01020408 0.08163265 0.1632653 0.09183673 -0.09183673 -0.1224490 -0.08163265 24.63929537 0.20408163 0.1224490
snp9 2.8571429 -1.55102041 0.40816327 0.8163265 -0.04081633 -0.95918367 1.3877551 -0.40816327 0.20408163 27.61888721 -1.3877551
snp10 1.7142857 1.96938776 1.24489796 2.4897959 3.27551020 0.72448980 -5.3673469 -0.24489796 0.12244898 -1.38775510 30.9658260
>
> # Right Hand Side
> RHS = rbind(t(X) %*% y,
+ t(Z) %*% y)
> RHS
[,1]
79.10
snp1 0.95
snp2 2.85
snp3 29.60
snp4 10.30
snp5 -9.90
snp6 -13.25
snp7 -11.30
snp8 5.65
snp9 26.80
snp10 16.15
>
> # Solutions
>
> # generalized inverse of LHS
> gi_LHS = ginv(LHS)
> gi_LHS
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 1.388294e-01 -5.663085e-05 -4.970969e-03 -9.042826e-03 -4.643044e-03 6.969389e-03 5.829371e-03 5.807560e-03 -2.903780e-03 -1.427606e-02
[2,] -5.663085e-05 3.548365e-02 7.785237e-04 1.479041e-03 7.596326e-04 -4.419903e-04 8.832420e-04 2.243621e-17 -1.284610e-17 1.772102e-03
[3,] -4.970969e-03 7.785237e-04 3.771277e-02 2.541537e-04 -4.906823e-04 -6.784168e-04 1.063631e-03 2.168404e-18 -4.824700e-18 -1.381252e-04
[4,] -9.042826e-03 1.479041e-03 2.541537e-04 3.555059e-02 -8.504422e-04 -1.229133e-03 3.172508e-03 1.517883e-18 -4.770490e-18 -3.301998e-04
[5,] -4.643044e-03 7.596326e-04 -4.906823e-04 -8.504422e-04 3.694391e-02 5.655121e-05 1.781453e-03 -1.338990e-17 6.478108e-18 3.572256e-04
[6,] 6.969389e-03 -4.419903e-04 -6.784168e-04 -1.229133e-03 5.655121e-05 3.941053e-02 9.383621e-04 2.276825e-18 -4.391019e-18 5.733858e-04
[7,] 5.829371e-03 8.832420e-04 1.063631e-03 3.172508e-03 1.781453e-03 9.383621e-04 3.414696e-02 7.155734e-18 -3.713392e-18 -2.095373e-03
[8,] 5.807560e-03 8.548257e-18 2.059984e-18 -1.702197e-17 -6.559423e-18 -1.095044e-17 -1.084202e-18 4.065292e-02 2.574980e-18 7.155734e-18
[9,] -2.903780e-03 2.554651e-18 -2.141299e-18 7.806256e-18 1.208885e-17 1.040834e-17 -6.803369e-18 -9.215718e-19 4.065292e-02 1.192622e-18
[10,] -1.427606e-02 1.772102e-03 -1.381252e-04 -3.301998e-04 3.572256e-04 5.733858e-04 -2.095373e-03 -2.168404e-18 4.878910e-18 3.802145e-02
[11,] -5.999035e-03 -2.241301e-03 -1.064947e-03 -1.808286e-03 -3.287558e-03 -9.713208e-04 4.937702e-03 -2.547875e-18 5.068645e-18 1.999292e-03
[,11]
[1,] -5.999035e-03
[2,] -2.241301e-03
[3,] -1.064947e-03
[4,] -1.808286e-03
[5,] -3.287558e-03
[6,] -9.713208e-04
[7,] 4.937702e-03
[8,] -2.005774e-18
[9,] -1.095044e-17
[10,] 1.999292e-03
[11,] 3.427246e-02
>
> # solution
> sol = gi_LHS %*% RHS
> round(sol, 3)
[,1]
[1,] 9.944
[2,] 0.087
[3,] -0.311
[4,] 0.262
[5,] -0.080
[6,] 0.110
[7,] 0.139
[8,] 0.000
[9,] 0.000
[10,] -0.061
[11,] -0.016
>
> # mean effect of solutions
> mean_eff = sol[1]
> round(mean_eff, 3)
[1] 9.944
>
> # snp effect of solutions
> snp_eff = sol[2:11]
> round(snp_eff, 3)
[1] 0.087 -0.311 0.262 -0.080 0.110 0.139 0.000 0.000 -0.061 -0.016
>
> # calculate DGV
> # genotype of animals
> # design matrix of animals
> P2 = sweep(M_all, 2, snp_mean * 2)
> round(P2, 3)
snp1 snp2 snp3 snp4 snp5 snp6 snp7 snp8 snp9 snp10
[1,] 1.357 -0.357 0.286 0.286 -0.286 -1.214 -0.143 0.071 -0.143 1.214
[2,] 0.357 -0.357 -0.714 -0.714 -0.286 0.786 -0.143 0.071 -0.143 -0.786
[3,] 0.357 0.643 1.286 0.286 0.714 -1.214 -0.143 0.071 -0.143 1.214
[4,] -0.643 -0.357 1.286 0.286 -0.286 -0.214 -0.143 0.071 0.857 0.214
[5,] -0.643 0.643 0.286 1.286 -0.286 -1.214 -0.143 0.071 -0.143 1.214
[6,] 0.357 0.643 -0.714 0.286 -0.286 0.786 -0.143 0.071 0.857 0.214
[7,] -0.643 -0.357 0.286 0.286 -0.286 0.786 -0.143 0.071 0.857 -0.786
[8,] -0.643 0.643 0.286 -0.714 -0.286 -0.214 -0.143 0.071 0.857 -0.786
[9,] 1.357 -0.357 -0.714 -0.714 -0.286 -0.214 1.857 0.071 -0.143 1.214
[10,] -0.643 -0.357 -0.714 0.286 0.714 0.786 -0.143 0.071 -1.143 -0.786
[11,] -0.643 0.643 0.286 -0.714 -0.286 -0.214 -0.143 0.071 0.857 0.214
[12,] 0.357 -0.357 -0.714 -0.714 0.714 -0.214 -0.143 0.071 -1.143 -0.786
[13,] -0.643 -0.357 -0.714 0.286 0.714 0.786 -0.143 0.071 -0.143 -0.786
[14,] 0.357 -0.357 0.286 0.286 -0.286 0.786 -0.143 -0.929 -1.143 -0.786
>
> # DGV
> dgv = P2 %*% snp_eff
> round(dgv, 3)
[,1]
[1,] 0.070
[2,] 0.111
[3,] 0.045
[4,] 0.253
[5,] -0.495
[6,] -0.357
[7,] 0.145
[8,] -0.224
[9,] 0.027
[10,] 0.114
[11,] -0.240
[12,] 0.143
[13,] 0.054
[14,] 0.354
>
'Animal Breeding > R for Genetic Evaluation' 카테고리의 다른 글
Mixed Linear Model(GBLUP model) for Computing DGV (0) | 2020.12.21 |
---|---|
Mixed Linear Model(SNP-BLUP Model) for Computing SNP effect, weighted analysis (0) | 2020.12.19 |
Fixed Effect Model for SNP Effects, weighted analysis (0) | 2020.12.16 |
Fixed Effect Model for SNP Effects, unweighted analysis (0) | 2020.12.16 |
임의 회귀 모형의 해를 이용한 개체 육종가 그래프 및 305일령 육종가 계산 (0) | 2020.11.26 |