AlphaSimR: Base population and traits
To start a set of simulations in the AlphaSimR
package
[@gaynor2021alphasimr], four steps must be
implemented, as follows:
In this vignette, we will implement and further explore the first two.
First, we clean our working environment and download/load the AlphaSimR package.
# install.packages('AlphaSimR')
require(AlphaSimR)
## Carregando pacotes exigidos: AlphaSimR
## Warning: package 'AlphaSimR' was built under R version 4.3.3
## Carregando pacotes exigidos: R6
To create the founder genome we may use the function
runMacs()
. It uses the method from Chen et al. [@chen2009fast] to simulate a set of haplotypes
and, from that, a species-specific demography. This allows us to
simulate a base genome with characteristics of the target crop that we
are working with.
# Founder genome
founderGenomes = runMacs(nInd = 3, # Number of individuals that compose the base population
nChr = 3, # Number of chromosome pairs of the target species
segSites = 100, # number of segregation sites
species = "MAIZE", # We can use the base parameters and we have available in the package MAIZE, WHEAT, CATTLE, and GENERIC.
ploidy = 2) # Setting the ploidy
# Object created
founderGenomes
## An object of class "MapPop"
## Ploidy: 2
## Individuals: 3
## Chromosomes: 3
## Loci: 300
# Genetic map of the segregation sites
founderGenomes@genMap
## $`1`
## 1_1 1_2 1_3 1_4 1_5 1_6 1_7
## 0.00000000 0.02837015 0.06204803 0.07339700 0.08044982 0.09837414 0.10184805
## 1_8 1_9 1_10 1_11 1_12 1_13 1_14
## 0.12349998 0.15468114 0.19101596 0.20782833 0.21936731 0.22275889 0.22957566
## 1_15 1_16 1_17 1_18 1_19 1_20 1_21
## 0.23631150 0.25513641 0.27870106 0.28101047 0.30915820 0.31636872 0.32954374
## 1_22 1_23 1_24 1_25 1_26 1_27 1_28
## 0.33057522 0.33329006 0.34610576 0.35355402 0.35432379 0.35702597 0.38477560
## 1_29 1_30 1_31 1_32 1_33 1_34 1_35
## 0.39187492 0.39911256 0.41190322 0.43425446 0.44349615 0.46026235 0.46422103
## 1_36 1_37 1_38 1_39 1_40 1_41 1_42
## 0.53139085 0.54782000 0.57319211 0.58041145 0.58442975 0.59284263 0.64447910
## 1_43 1_44 1_45 1_46 1_47 1_48 1_49
## 0.66688473 0.68060523 0.69439783 0.74476851 0.74992437 0.76175479 0.77944388
## 1_50 1_51 1_52 1_53 1_54 1_55 1_56
## 0.79800006 0.91755968 0.95245205 0.96469948 0.99690802 1.00777808 1.02441336
## 1_57 1_58 1_59 1_60 1_61 1_62 1_63
## 1.03972127 1.04557799 1.09261641 1.09315969 1.10024097 1.10548083 1.11927284
## 1_64 1_65 1_66 1_67 1_68 1_69 1_70
## 1.14982830 1.15116527 1.16576716 1.18061177 1.19477834 1.20070795 1.20393348
## 1_71 1_72 1_73 1_74 1_75 1_76 1_77
## 1.25704089 1.27694138 1.29249395 1.30358691 1.38402779 1.40367609 1.40385738
## 1_78 1_79 1_80 1_81 1_82 1_83 1_84
## 1.47606174 1.47957721 1.48938939 1.50696747 1.52910589 1.56570824 1.58070877
## 1_85 1_86 1_87 1_88 1_89 1_90 1_91
## 1.60433924 1.60601297 1.62578110 1.66366388 1.66511607 1.67581085 1.71791374
## 1_92 1_93 1_94 1_95 1_96 1_97 1_98
## 1.72920237 1.75958021 1.77857754 1.77932684 1.81260863 1.83200474 1.83425295
## 1_99 1_100
## 1.83949863 1.85807397
##
## $`2`
## 2_1 2_2 2_3 2_4 2_5 2_6 2_7
## 0.00000000 0.05797398 0.06037806 0.07078029 0.09530634 0.12347324 0.15513628
## 2_8 2_9 2_10 2_11 2_12 2_13 2_14
## 0.19105499 0.20365944 0.22904424 0.23717559 0.23932378 0.25155779 0.27501583
## 2_15 2_16 2_17 2_18 2_19 2_20 2_21
## 0.28060696 0.30838358 0.30999540 0.31143174 0.31958103 0.33153832 0.36296996
## 2_22 2_23 2_24 2_25 2_26 2_27 2_28
## 0.37046494 0.37409022 0.50124062 0.52898961 0.54235298 0.58410300 0.59790408
## 2_29 2_30 2_31 2_32 2_33 2_34 2_35
## 0.60140335 0.60612927 0.61280592 0.61907727 0.63030080 0.63106014 0.64562360
## 2_36 2_37 2_38 2_39 2_40 2_41 2_42
## 0.65810894 0.67972576 0.68215875 0.69028351 0.69081361 0.71305709 0.73348762
## 2_43 2_44 2_45 2_46 2_47 2_48 2_49
## 0.78843759 0.80700978 0.86879364 0.90107771 0.92020924 0.98214140 1.03138579
## 2_50 2_51 2_52 2_53 2_54 2_55 2_56
## 1.08015102 1.10516409 1.14727009 1.16607522 1.23515090 1.27814190 1.31512987
## 2_57 2_58 2_59 2_60 2_61 2_62 2_63
## 1.32310079 1.36802336 1.37088395 1.37520700 1.38825933 1.43754995 1.45192702
## 2_64 2_65 2_66 2_67 2_68 2_69 2_70
## 1.45311276 1.45781511 1.52351257 1.52587200 1.55115309 1.55879597 1.57097051
## 2_71 2_72 2_73 2_74 2_75 2_76 2_77
## 1.57997508 1.59173009 1.63671627 1.70087682 1.70643644 1.73355515 1.73540134
## 2_78 2_79 2_80 2_81 2_82 2_83 2_84
## 1.73913819 1.75396081 1.75468731 1.77013975 1.77367008 1.77406477 1.79955244
## 2_85 2_86 2_87 2_88 2_89 2_90 2_91
## 1.80388061 1.81218269 1.82306857 1.82541434 1.82721422 1.83200499 1.85422939
## 2_92 2_93 2_94 2_95 2_96 2_97 2_98
## 1.86192001 1.88033632 1.91506742 1.91614103 1.92212446 1.94415944 1.94504898
## 2_99 2_100
## 1.96708472 1.97775187
##
## $`3`
## 3_1 3_2 3_3 3_4 3_5 3_6 3_7
## 0.00000000 0.02422119 0.03987279 0.04216642 0.06535351 0.10962746 0.12810352
## 3_8 3_9 3_10 3_11 3_12 3_13 3_14
## 0.16779334 0.18755970 0.19642826 0.22179732 0.25545416 0.28150024 0.31997597
## 3_15 3_16 3_17 3_18 3_19 3_20 3_21
## 0.33037666 0.39319751 0.39740935 0.40439652 0.43682914 0.45930941 0.47749385
## 3_22 3_23 3_24 3_25 3_26 3_27 3_28
## 0.50945573 0.51547441 0.55832692 0.56820330 0.58320341 0.59297739 0.61079790
## 3_29 3_30 3_31 3_32 3_33 3_34 3_35
## 0.62130871 0.63412921 0.65828919 0.66087471 0.66558194 0.67287097 0.68352287
## 3_36 3_37 3_38 3_39 3_40 3_41 3_42
## 0.68407074 0.73932804 0.77225284 0.79243751 0.80542021 0.84110585 0.84574249
## 3_43 3_44 3_45 3_46 3_47 3_48 3_49
## 0.85849518 0.87083421 0.87737317 0.89235973 0.89885974 0.90616422 0.92414201
## 3_50 3_51 3_52 3_53 3_54 3_55 3_56
## 0.93765031 0.96043110 0.98513528 1.00258854 1.06297322 1.06446905 1.09568045
## 3_57 3_58 3_59 3_60 3_61 3_62 3_63
## 1.10571552 1.12600325 1.13584161 1.18553192 1.19717885 1.21398181 1.23224654
## 3_64 3_65 3_66 3_67 3_68 3_69 3_70
## 1.24954551 1.27075360 1.27137476 1.28029865 1.28891809 1.29833942 1.31692184
## 3_71 3_72 3_73 3_74 3_75 3_76 3_77
## 1.32118922 1.33453885 1.35259349 1.35448529 1.36444254 1.36900948 1.37535088
## 3_78 3_79 3_80 3_81 3_82 3_83 3_84
## 1.39430859 1.39526848 1.48549425 1.54484832 1.60956903 1.62134005 1.62463881
## 3_85 3_86 3_87 3_88 3_89 3_90 3_91
## 1.63525162 1.70867116 1.71318687 1.73152479 1.74249122 1.76645422 1.79716402
## 3_92 3_93 3_94 3_95 3_96 3_97 3_98
## 1.80092107 1.83379637 1.86655244 1.89922636 1.92158622 1.92905837 1.93806440
## 3_99 3_100
## 1.95827053 1.96909361
When we choose the species in the argument species, four parameters are automatically adjusted:
# Genome length (i.e. Maize)
genLen = 1.43
# Effective population size (Ne)
Ne = 50
# Demographic bottlenecks
speciesParams = "8E8 -t 4E-7 -r 3.6E-7"
# Species history
speciesHist = "-eN 0.03 1 -eN 0.05 2 -eN 0.10 4 -eN 0.15 6"
Obs: runMacs()
functions offers four
target species with their demography already implemented in the package.
Other than that, we can use runMacs2()
function to set our
own species characteristics (further discussed).
With the founder genomes in perspective, we can proceed to add the characteristics of the target trait or traits to the simulation. AlphaSimR has a vignette that we recommend you take a look at link. It gives, thoroughly, an explanation on the traits of interest and how to interpret its effects.
We will start with a trait with only additive effects. For such, we have the following parameters:
# Global simulation parameters from founder genomes.
SP = SimParam$new(founderGenomes)
# Additive trait
SP$addTraitA(nQtlPerChr = 15, # Number of QTL per chromosome
mean = 10, # Trait mean
var = 10) # Trait variance
# QTL effects (for the traits)
SP$traits[[1]]@addEff
## [1] 5.18369706 -0.04665924 2.67819987 4.18069387 -4.13664613 -2.55717076
## [7] -1.70496758 0.21118005 -0.15590280 -1.49761432 -0.69429385 -0.57762859
## [13] 0.37933696 0.48930026 -3.74719223 -0.99840873 -0.87839424 0.69904237
## [19] 0.64294367 -2.23251035 3.06182941 -0.81670421 1.89213880 -2.52716469
## [25] 0.87736165 0.48373715 -1.62513725 -2.89465910 -4.52812221 1.62640780
## [31] 2.91099670 -2.98542349 4.18165049 -0.48825013 -4.92205730 -0.30079222
## [37] 0.80483252 1.75289095 0.56586107 -3.46094774 -0.82691961 -3.72397542
## [43] 0.23932920 2.87351709 -0.56323985
After simulating the base genome and to set the trait
characteristics, we can create individuals from that base genome for the
trait of interest. We will use the function newPop()
, as
follows:
# Creating individuals/population
basePop = newPop(founderGenomes)
# Gen param
genParam(basePop)
## $varA
## Trait1
## Trait1 10
##
## $varD
## Trait1
## Trait1 0
##
## $varAA
## Trait1
## Trait1 0
##
## $varG
## Trait1
## Trait1 10
##
## $genicVarA
## Trait1
## 97.3069
##
## $genicVarD
## Trait1
## 0
##
## $genicVarAA
## Trait1
## 0
##
## $genicVarG
## Trait1
## 97.3069
##
## $covA_HW
## Trait1
## -39.21087
##
## $covD_HW
## Trait1
## 0
##
## $covAA_HW
## Trait1
## 0
##
## $covG_HW
## Trait1
## -39.21087
##
## $covA_L
## Trait1
## -48.09603
##
## $covD_L
## Trait1
## 0
##
## $covAA_L
## Trait1
## 0
##
## $covAD_L
## Trait1
## 0
##
## $covAAA_L
## Trait1
## 0
##
## $covDAA_L
## Trait1
## 0
##
## $covG_L
## Trait1
## -48.09603
##
## $mu
## Trait1
## 10
##
## $mu_HW
## Trait1
## 10
##
## $gv
## Trait1
## [1,] 6.527762
## [2,] 14.176953
## [3,] 9.295285
##
## $bv
## Trait1
## [1,] -3.4722381
## [2,] 4.1769533
## [3,] -0.7047152
##
## $dd
## Trait1
## [1,] 0
## [2,] 0
## [3,] 0
##
## $aa
## Trait1
## [1,] 0
## [2,] 0
## [3,] 0
##
## $gv_mu
## Trait1
## 23.43341
##
## $gv_a
## Trait1
## [1,] -16.905645
## [2,] -9.256454
## [3,] -14.138122
##
## $gv_d
## Trait1
## [1,] 0
## [2,] 0
## [3,] 0
##
## $gv_aa
## Trait1
## [1,] 0
## [2,] 0
## [3,] 0
# Looking at the population
# haplotypes
popHaplo = pullSegSiteHaplo(basePop)
popHaplo[, 1:10]
## 1_1 1_2 1_3 1_4 1_5 1_6 1_7 1_8 1_9 1_10
## 1_1 0 0 1 1 0 0 1 0 0 1
## 1_2 1 1 0 0 0 1 0 0 1 1
## 2_1 0 1 0 0 0 0 1 0 1 1
## 2_2 0 1 1 1 0 0 0 1 0 1
## 3_1 1 1 1 0 1 0 1 0 0 0
## 3_2 1 1 1 0 1 0 1 0 0 0
# Check the genotypes
popGeno = pullSegSiteGeno(basePop)
popGeno[, 1:10]
## 1_1 1_2 1_3 1_4 1_5 1_6 1_7 1_8 1_9 1_10
## 1 1 1 1 1 0 1 1 0 1 2
## 2 0 2 1 1 0 0 1 1 1 2
## 3 2 2 2 0 2 0 2 0 0 0
Allele frequency describes the proportion of mutations at a locus and can be estimated from the haplotypes (popHaplo object).
# Allele frequency
alleleFreq = colMeans(popHaplo)
plot(alleleFreq)
# Global simulation parameters from founder genomes.
SP = SimParam$new(founderGenomes)
# Additive trait
SP$addTraitAD(nQtlPerChr = 15, # Number of QTLs controlling the trait per chromosome
# nQtlPerChr = c(15, 5), # Alternative: QTL effect by chromosome
mean = 10,
var = 10,
meanDD = 0.1, # dominance degree - it varies from 0-1 (meaning no-dominance and codominance)
varDD = 0.2) # Variance for the degree of dominance
# QTL effects for additive effects
SP$traits[[1]]@addEff
## [1] -0.46914754 -0.58306053 -0.34767036 0.92316884 0.99645004 0.75450683
## [7] 0.32871618 1.22934896 -0.73158922 0.01915808 0.14479955 -0.80122761
## [13] 0.66372963 -0.38717444 -0.14783748 -0.07351241 -0.55890754 -0.31525680
## [19] 0.01173877 0.62706843 -0.21675538 -0.07157586 -0.29985650 -0.84245777
## [25] 0.11111941 -0.83235940 -0.59816762 -0.89828102 -1.10297484 -0.24262468
## [31] -0.22108602 -0.97637903 -0.13412185 1.09174369 0.60027153 0.41014302
## [37] -0.62615157 -0.34913145 0.25782116 0.04477153 0.10525056 0.64306248
## [43] -0.09944761 -0.14942588 -0.53277785
# QTL effects for dominance effects
SP$traits[[1]]@domEff
## [1] 0.2750242034 0.1904098522 0.0363930996 0.3931208598 -0.0388273378
## [6] 1.1170181426 0.3178918892 0.7109197477 -0.0943834545 0.0228228573
## [11] 0.0329670550 0.1562114269 0.4028062570 0.1702880985 0.0390050082
## [16] -0.0118985179 -0.2700778732 0.0095246092 -0.0006421675 0.2888494366
## [21] 0.2740047238 0.0049068146 0.0730821887 0.5340481898 -0.0767003838
## [26] 0.4887583369 0.2491775900 0.5035265059 0.6604663907 0.0717346556
## [31] 0.0987234339 -0.1839668422 -0.1758822918 0.6928498148 -0.1926607890
## [36] -0.0192730709 -0.2381453295 0.1955765744 0.1997986418 0.0168470438
## [41] 0.0130297880 0.0410519645 0.0616899448 0.0346306881 0.0693281580
The implementation of traits in AlphaSimR follows a biological model, which is responsible for converting into a genetic value each individual genotype before created. In a straightforward way, the genetic value is used to create the individuals’ phenotypes. The biological effects presented in AlphaSimR are:
A: additive effect
D: dominance effect
G: genotype by environment interaction effect
E: epistasis effect
So, we can create traits with the combinations of those effects (assuming that all of them as, at least, additive) using the ADGE framework, as it follows:
# Traits that can be created in AlphaSimR:
SP$addTraitA()
SP$addTraitAD()
SP$addTraitADG()
SP$addTraitADEG()
SP$addTraitAG()
SP$addTraitAE()
SP$addTraitAEG()
It is important to have in mind that for dominance effects (D) we set the mean of the dominance degree (0-1) and variance, whereas for genotype by environment effect (G) and epistasis effect (E) we have just to adjust the variance.
The AlphaSimR package allows you to import genomic data and requires a genetic map and phased genotypes.
First, we load the genetic map in the format ‘Marker name, Chromosome, Position (in Morgans)’.
In the example below, we have a diploid species with 20 individuals, 10 chromosomes, each with 10 markers, and these markers are equally spaced along 1 Morgan chromosome.
# Clean the directory
rm(list=ls())
# Load the External Data Set and Change the Directory
load("External_Data.RDATA")
# Genetic map
head(genMap)
## marker chromosome position
## 1 M1 1 0.0000000
## 2 M2 1 0.1111111
## 3 M3 1 0.2222222
## 4 M4 1 0.3333333
## 5 M5 1 0.4444444
## 6 M6 1 0.5555556
dim(genMap) # nrows: Number of markers
## [1] 100 3
If we have phased haplotypes, the file needs to be 40 rows (the number of individuals times the ploidy level), with each individual having 100 loci based on the map above.
# Haplotype file
haplo[1:6,1:6]
## M1 M2 M3 M4 M5 M6
## ID1 0 0 0 0 1 1
## ID1 1 0 0 0 1 0
## ID2 1 0 0 0 0 0
## ID2 0 0 1 0 1 0
## ID3 1 0 0 1 1 0
## ID3 0 0 0 1 0 1
dim(haplo) # nrows: Number of individuals multiplied by ploidy level, and ncols: number of markers
## [1] 40 100
If we don’t have phased haplotypes, we can simulate the haplotype phases multiple times.
# Loading a function for later
sim_haplo_phase <- function(M, ploidy) {
haplo <- NULL
for (i in 1:nrow(M)) {
b <- NULL
for (j in 1:ncol(M)) {
a <- rep(0, ploidy)
if (M[i, j] > 0) {
ones_count <- min(M[i, j], ploidy)
a[1:ones_count] <- 1
}
b <- cbind(b, a)
}
haplo <- rbind(haplo, b)
colnames(haplo)=colnames(M)
}
return(haplo)
}
# SNP matrix
snp[1:6,1:6]
## M1 M2 M3 M4 M5 M6
## ID1 1 2 2 0 1 0
## ID2 0 0 2 1 2 1
## ID3 0 0 2 1 1 1
## ID4 0 2 0 2 2 1
## ID5 0 2 2 0 0 0
## ID6 1 1 2 2 2 2
dim(snp) # nrows: Number of individuals and ncols: number of markers
## [1] 20 100
# Haplotype file simulated
haplo=sim_haplo_phase(M=snp, ploidy=2)
haplo[1:6,1:6]
## M1 M2 M3 M4 M5 M6
## [1,] 1 1 1 0 1 0
## [2,] 0 1 1 0 0 0
## [3,] 0 0 1 1 1 1
## [4,] 0 0 1 0 1 0
## [5,] 0 0 1 1 1 1
## [6,] 0 0 1 0 0 0
We can also import the pedigree file.
# Pedigree file
head(ped)
## id mother father
## 1 ID1 0 0
## 2 ID2 0 0
## 3 ID3 0 0
## 4 ID4 0 0
## 5 ID5 0 0
## 6 ID6 0 0
To create the founder genome using the imported data set, we can
utilize the importHaplo()
function.
# Create the founder population
founderPop = importHaplo(haplo = haplo,
genMap = genMap,
ploidy = 2,
ped = ped)
founderPop
## An object of class "NamedMapPop"
## Ploidy: 2
## Individuals: 20
## Chromosomes: 10
## Loci: 100
# Set simulation parameters
# Initialize parameters with founder haplotypes
SP = SimParam$new(founderPop)
We can also load our own QTL effects or simulated traits using our imported genomic data set.
# Load our own QTL effects
qtlEffects = data.frame(marker = c("M1", "M11","M80"),
aditiveEffect = c(1, -1, 1),
domEffect = c(0.3, 0, -0.3))
# Import in SimParam
SP$importTrait(markerNames = qtlEffects$marker,
addEff = qtlEffects$aditiveEffect,
intercept = 10,
domEff = qtlEffects$domEffect,
name = "Trait1")
# Create a population from the founder haplotypes
pop = newPop(founderPop)
# The population now works like any other AlphaSimR population
gv(pop)[1:6]
## [1] 11.3 7.7 9.0 8.0 9.7 10.0
The AlphaSimR
package allows to simulate a set of traits
for the individuals. For such, two strategies may be used.
The first strategy is to set all traits at once (using
$addTrait
). In this option, a correlation matrix between
each pair of traits, for each effect, should be added, as follows:
rm(list=ls())
# Founder genome
founderGenomes = runMacs(nInd = 3,
nChr = 3,
segSites = 100,
species = "MAIZE",
ploidy = 2)
# From the base population
SP = SimParam$new(founderGenomes)
# Alternative one - Setting the correlation between traits (Additive trait)
SP$addTraitA(nQtlPerChr = 20,
mean = c(0, 10),
var = c(0.5, 2),
corA = matrix(c(1.0, 0.5,
0.5, 1.0), nrow = 2))
# QTL effects for trait one
SP$traits[[1]]@addEff
## [1] -0.142143152 -0.231747279 0.037280501 0.241323211 0.466170017
## [6] -0.020947934 0.268800417 0.300331789 0.163119673 0.161948220
## [11] 0.257923558 0.011057770 -0.242525896 -0.159215346 0.054591340
## [16] 0.037885842 -0.112546003 0.018034556 -0.000680395 0.189771209
## [21] 0.108079096 0.044779875 0.065963816 -0.069411910 -0.296322102
## [26] -0.124295219 -0.366513904 0.002606585 -0.193133240 0.084206880
## [31] -0.096099394 -0.094692383 -0.138634209 0.122174338 -0.007992793
## [36] -0.370329347 -0.248400746 0.173065196 -0.238836860 -0.138694402
## [41] -0.299586730 -0.036479705 0.051676110 -0.096081125 0.101470661
## [46] 0.363502128 -0.114901511 -0.007898188 0.011371373 0.160810591
## [51] 0.087136476 -0.151183493 -0.093444473 -0.094554789 0.093334354
## [56] 0.140026998 -0.277369277 0.191129913 -0.167607060 -0.025963792
# QTL effects for trait two
SP$traits[[2]]@addEff
## [1] 0.125478623 -0.522972651 0.036473845 0.242986657 0.160553220
## [6] 0.254058715 0.314471073 0.307339592 -0.150048147 0.087288005
## [11] -0.360261774 0.002255296 -0.020234138 -0.391117692 -0.042612852
## [16] 0.271774728 -0.500350601 0.036925966 -0.397583544 0.083003958
## [21] 0.076336240 -0.005403775 0.236231812 -0.275502645 -0.303444662
## [26] -0.128764629 -0.526733075 0.452079453 -0.224879176 0.082232063
## [31] -0.291375578 -0.097366871 -0.297301476 -0.153165045 0.030150468
## [36] -0.162587373 -0.406641232 0.448614681 -0.075480501 -0.307897940
## [41] 0.269612222 -0.055447322 0.191399015 -0.424830693 0.325728201
## [46] 0.614474948 -0.113245566 -0.510349787 -0.035595465 0.389646651
## [51] 0.083338905 0.262430661 -0.259451883 0.182699100 -0.049186529
## [56] 0.188103556 0.033536809 0.034519220 -0.086452663 0.016930605
A second strategy in a multi-trait framework is to add each trait individually. In this case, we do not need to add the information of correlation between them. Another positive aspect of this second strategy is the possibility of setting a different number of QTLs controlling each trait simulated.
# From the base population
SP = SimParam$new(founderGenomes)
# Alternative two - One trait at a time
# Trait one
SP$addTraitA(nQtlPerChr = 20,
mean = 0,
var = 0.5)
# Trait two
SP$addTraitA(nQtlPerChr = 5,
mean = 100,
var = 20)
# QTL effects for trait one
SP$traits[[1]]@addEff
## [1] -0.09046418 0.03286128 0.16555261 -0.58137197 0.05974085 -0.04632199
## [7] 0.10228479 0.08651719 0.06673763 0.12032382 -0.28616091 0.19547042
## [13] -0.07984843 -0.07171504 0.18011597 0.05654407 -0.21759435 0.22145362
## [19] 0.26495762 -0.15079060 0.13034380 0.10017915 -0.21751602 0.12914976
## [25] 0.26336063 -0.21864124 0.05236706 0.04529591 -0.10844205 -0.22982458
## [31] -0.24885050 0.16626600 0.21074405 0.04989468 0.23603430 0.27246961
## [37] 0.10822436 -0.19887765 -0.12542202 0.18079796 0.37016491 0.19298600
## [43] 0.16562591 0.20234541 0.08042761 -0.04964470 -0.21009523 -0.05045650
## [49] 0.33827500 0.04505360 -0.20002637 -0.08103277 -0.47208077 0.09044588
## [55] -0.27936894 -0.11201628 -0.21627754 0.11481747 0.23038725 -0.23076059
# QTL effects for trait two
SP$traits[[2]]@addEff
## [1] 0.1560651 -1.7373050 1.0854878 1.4841674 0.6598960 -1.3669863
## [7] -1.7481451 -1.8820693 1.4712278 1.1098782 0.6278460 -1.0113660
## [13] -1.7741940 1.5316312 2.3220156
In breeding programs, such as maize, heterosis is harnessed by exploiting the crosses between heterotic groups. Usually, two distinct groups are used (i.e., maize dent and flint germplasm pools). The genetic divergence of these two pools is explained by their historic geographical separation and adaptation to different environments. It has undergone changes in the population structure and history of the base population.
For simulations that involve population separation in terms of
generations, the AlphaSimR package offers the split
argument in the runMacs()
function. It represents an
optional historic population split in terms of generations ago.
Below, we will simulate three different base genomes. The first one will use the default value (NULL) for the split argument. The second and third genomes will be split 10 and 50 generations ago, respectively. We will examine the base genome genotypes and explore them through a principal component analysis (PCA).
rm(list=ls())
#------------------- Founder genome with no split
founderGenomes = runMacs(nInd = 200,
nChr = 5,
segSites = 100,
species = "MAIZE",
ploidy = 2,
split = NULL)
# From the base population
SP = SimParam$new(founderGenomes)
# Creating individuals/population
basePop = newPop(founderGenomes)
# Check the genotypes
popGeno = pullSegSiteGeno(basePop)
# PCA
pca_noSplit = prcomp(popGeno)
#------------------- Founder genome with split equals to 10
founderGenomes = runMacs(nInd = 200,
nChr = 5,
segSites = 100,
species = "MAIZE",
ploidy = 2,
split = 10)
# From the base population
SP = SimParam$new(founderGenomes)
# Creating individuals/population
basePop = newPop(founderGenomes)
# Check the genotypes
popGeno_10 = pullSegSiteGeno(basePop)
# PCA
pca_Split10 = prcomp(popGeno_10)
#------------------- Founder genome with split equals to 50
founderGenomes = runMacs(nInd = 200,
nChr = 5,
segSites = 100,
species = "MAIZE",
ploidy = 2,
split = 50)
# From the base population
SP = SimParam$new(founderGenomes)
# Creating individuals/population
basePop = newPop(founderGenomes)
# Check the genotypes
popGeno_50 = pullSegSiteGeno(basePop)
# PCA
pca_Split50 = prcomp(popGeno_50)
Plotting the PCA from each base population created.
# Plotting the PCA
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.3.3
## Carregando pacotes exigidos: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
# Plot
par(mar = c(1, 3, .1, .1))
autoplot(pca_noSplit, data = popGeno)
autoplot(pca_Split10, data = popGeno_10)
autoplot(pca_Split50, data = popGeno_50)
Professor - University of Florida, mresende@ufl.edu↩︎
Post doc - University of Florida, deamorimpeixotom@ufl.edu↩︎
Professor - University of Florida, lferrao@ufl.edu↩︎
Professor - Federal University of Viçosa, camila.azevedo@ufv.br↩︎