AlphaSimR: Implementing a breeding program I

## Recurrent selection program - Wheat breeding program. In this vignette, we will present a simulation of a recurrent selection program. It is a implementation of the wheat breeding program described in @gaynor2017two. In addition, the pipeline was made available through the **AlphaSimR** course available at edx platform (for more details, see the [link](https://www.edx.org/learn/animal-breeding/the-university-of-edinburgh-breeding-programme-modelling-with-alphasimr)) We will start by filling the pipeline objects with information that comes from the individuals created from the base genome. In our recurrent selection program, we will first perform 10 years of truncated phenotype selection as described in see Figure 1. The implementation of these 10 years is so that each design or program could be evaluated with an equivalent starting point. It is generally called *burn-in* period. ```{r, fig.align="center", fig.cap="Figure 1: Simulated wheat breeding program with Parents, $F_{1}$ progeny (F1), Headrows (HDRW), Preliminary Yield Trial (PYT), Advanced Yield Trial (AYT), Elite Yield Trial (EYT) and a released Variety. Adapted from Gaynor et al. (2017) and Lara et al. 2022.", echo=FALSE} knitr::include_graphics("Wheat_Scheme.png") ``` After that, we will run another 10 years of the same type of selection (called here conventional breeding program) and a modified breeding program that uses the rapid cycling. The comparison will be in terms of genetic mean and variance. ## Global parameters To begin with the simulations, we need to set the main parameters of the breeding program. For more details of the parameters, please see @gaynor2017two. ```{r} # Clean the working environment rm(list = ls()) library(AlphaSimR) # Number of crosses per year nCrosses = 30 # DH lines per cross nDH = 50 # The maximum number of DH lines per cross to enter the PYT famMax = 10 # Genotypes per yield trial nPYT = 300 nAYT = 25 nEYT = 5 # Effective replication of yield trials in terms of locations repHDRW = 1/4 repPYT = 1 repAYT = 4 repEYT = 8 # Number of QTLs per chromosome nQTLs = 50 ``` ## Founder genome Setting the founder genome based on the parameters before implemented. ```{r} # Simulate founder genomes founderGenomes = runMacs(nInd = 30, nChr = 21, segSites = nQTLs, inbred = TRUE, species = "WHEAT") # Set simulation parameters SP = SimParam$new(founderGenomes) SP$addTraitAG(nQtlPerChr = nQTLs, mean = 4, var = 0.1, varGxE = 0.2) VarE = 0.4 # Founding parents Parents = newPop(founderGenomes) ``` ## Populating the breeding program. Starting with the breeding program with the Fill Pipeline ```{r} # Populate breeding program for (year in 1:7) { # F1 F1 = randCross(Parents, nCrosses) if (year < 7) { # Doubled Haploids DH = makeDH(F1, nDH) } if (year < 6) { # Headrows HDRW = setPheno(DH, varE = VarE, reps = repHDRW) } if (year < 5) { # Preliminary Yield Trial PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno") PYT = selectInd(PYT, nInd = nPYT, use = "pheno") PYT = setPheno(PYT, varE = VarE, reps = repPYT) } if (year < 4) { # Advanced Yield Trial AYT = selectInd(PYT, nInd = nAYT, use = "pheno") AYT = setPheno(AYT, varE = VarE, reps = repAYT) } if (year < 3) { # Elite Yield Trial EYT = selectInd(AYT, nEYT, use = "pheno") EYT = setPheno(EYT, varE = VarE, reps = repEYT) } if (year < 2) { # Selecting Variety Variety = selectInd(EYT, nInd = 1, use = "pheno") } } ``` ## Burn-In period This is the Bunr-in period. We will run it for 10 years and we will save the information of the population at the end. In this way, we can deploy the different scenarios and both will have the same starting point. We will start creating one data frame to store the variables that we want to keep track over the years of breeding. ```{r} # Burnin period BunrInYears = 10 futureYears =10 # Creating empty vectors to store genetic values meanPar = matrix(NA, BunrInYears+futureYears) varPar = matrix(NA, BunrInYears+futureYears) ``` Now, we will run the burn-in in a proper way. ```{r} # Burning for (year in 1:BunrInYears) { # Select Variety Variety = selectInd(EYT, nInd = 1, use = "pheno") # Elite Yield Trial EYT = selectInd(AYT, nInd = nEYT, use = "pheno") EYT = setPheno(EYT, varE = VarE, reps = repEYT) # Advanced Yield Trial AYT = selectInd(PYT, nInd = nAYT, use = "pheno") AYT = setPheno(AYT, varE = VarE, reps = repAYT) # Preliminary Yield Trial PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno") PYT = selectInd(PYT, nInd = nPYT, use = "pheno") PYT = setPheno(PYT, varE = VarE, reps = repPYT) # Headrows HDRW = setPheno(DH, varE = VarE, reps = repHDRW) # Doubled Haploids DH = makeDH(F1, nDH) # F1 and Parents Parents = c(EYT, AYT) F1 = randCross(Parents, nCrosses) # Report results meanPar[year] = meanG(Parents) varPar[year] = varG(Parents) # Save the state of simulation if (year == 10) { save.image(file = "results/year10.RData") } } ``` ## Scenario 1 The first scenario uses phenotypic selection and it use the same pipeline developed in the Burn-in stage. ```{r} # 1. Load state of simulation from year 10 load(file = "results/year10.RData") # 2. Loop for (year in (BunrInYears+1):(BunrInYears+futureYears)) { # Select Variety Variety = selectInd(EYT, nInd = 1, use = "pheno") # Elite Yield Trial EYT = selectInd(AYT, nInd = nEYT, use = "pheno") EYT = setPheno(EYT, varE = VarE, reps = repEYT) # Advanced Yield Trial AYT = selectInd(PYT, nInd = nAYT, use = "pheno") AYT = setPheno(AYT, varE = VarE, reps = repAYT) # Preliminary Yield Trial PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno") PYT = selectInd(PYT, nInd = nPYT, use = "pheno") PYT = setPheno(PYT, varE = VarE, reps = repPYT) # Headrows HDRW = setPheno(DH, varE = VarE, reps = repHDRW) # Doubled Haploids DH = makeDH(F1, nDH) # F1 and Parents Parents = PYT F1 = randCross(Parents, nCrosses) # Report results meanPar[year] = meanG(Parents) varPar[year] = varG(Parents) } # 3. Recording results output1 = data.frame(Year = c(1:20), scenario=rep("Conv", 20), meanPar, varPar, stringsAsFactors=FALSE) # 4. Saving the results as RDS saveRDS(output1,"results/Wheat_Scenario1.rds") ``` ## Scenario 2 In the scenario two of this set of simulations, we will implement the early parental recycling. So, the only change is, instead of using the AYT and EYT as the parents, we will use the individuals from PYT stage. ```{r} # 1. Load state of simulation from year 10 load(file = "results/year10.RData") # 2. Running the loop for (year in (BunrInYears+1):(BunrInYears+futureYears)) { # Select Variety Variety = selectInd(EYT, nInd = 1, use = "pheno") # Elite Yield Trial EYT = selectInd(AYT, nInd = nEYT, use = "pheno") EYT = setPheno(EYT, varE = VarE, reps = repEYT) # Advanced Yield Trial AYT = selectInd(PYT, nInd = nAYT, use = "pheno") AYT = setPheno(AYT, varE = VarE, reps = repAYT) # Preliminary Yield Trial PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno") PYT = selectInd(PYT, nInd = nPYT, use = "pheno") PYT = setPheno(PYT, varE = VarE, reps = repPYT) # Headrows HDRW = setPheno(DH, varE = VarE, reps = repHDRW) # Doubled Haploids DH = makeDH(F1, nDH) # F1 and Parents Parents = c(EYT, AYT) F1 = randCross(Parents, nCrosses) # Report results meanPar[year] = meanG(Parents) varPar[year] = varG(Parents) } # 3. Recording results output2 = data.frame(Year = c(1:20), scenario=rep("EarlySel", 20), meanPar, varPar, stringsAsFactors=FALSE) # 4. Saving the results as RDS saveRDS(output2,"results/Wheat_Scenario2.rds") ``` ## Plotting the results ```{r, eval = TRUE} # Load the results from the two scenarios rm(list=ls()) Scen1 = readRDS('results/Wheat_Scenario1.rds') Scen2 = readRDS('results/Wheat_Scenario2.rds') df = rbind(data.frame(Scen1), data.frame(Scen2)) # Organizing par(mfrow = c(2, 2), mar = c(4, 4, 1, 1)) # Plot mean of genetic values over time meanRanges = range(df$meanPar) plot(x = 1:20, y = df$meanPar[df$scenario == 'Conv'], type = "l", col = "black", lwd = 3, xlab = "Year", ylab = "Mean of genetic values", ylim = meanRanges) lines(x = 1:20, y = df$meanPar[df$scenario == 'EarlySel'], type = "l", col = "blue", lwd = 3) legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios", lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n") # Plot mean of genetic values over time varRanges = range(df$varPar) plot(x = 1:20, y = df$varPar[df$scenario == 'Conv'], type = "l", col = "black", lwd = 3, xlab = "Year", ylab = "Mean of genetic values", ylim = varRanges) lines(x = 1:20, y = df$varPar[df$scenario == 'EarlySel'], type = "l", col = "blue", lwd = 3) legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios", lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n") # Plot mean of genetic values over time meanRanges = range(c(df$meanPar[df$scenario == 'Conv' ][10:20]), df$meanPar[df$scenario == 'EarlySel' ][10:20]) plot(x = 10:20, y = df$meanPar[df$scenario == 'Conv'][10:20], type = "l", col = "black", lwd = 3, xlab = "Year", ylab = "Mean of genetic values", ylim = meanRanges) lines(x = 10:20, y = df$meanPar[df$scenario == 'EarlySel'][10:20], type = "l", col = "blue", lwd = 3) legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios", lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n") # Plot mean of genetic values over time varRanges = range(c(df$varPar[df$scenario == 'Conv' ][10:20]), df$varPar[df$scenario == 'EarlySel' ][10:20]) plot(x = 10:20, y = df$varPar[df$scenario == 'Conv'][10:20], type = "l", col = "black", lwd = 3, xlab = "Year", ylab = "Mean of genetic values", ylim = varRanges) lines(x = 10:20, y = df$varPar[df$scenario == 'EarlySel'][10:20], type = "l", col = "blue", lwd = 3) legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios", lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n") ``` *Obs.*: As it is a stochastic simulation implementations, we suggest to run more repetitions for both scenarios and work with the mean of the outcomes. ## References ::: {#refs} :::

Marco Antonio Peixoto