Recurrent selection program - Wheat breeding program.

In this vignette, we will present a simulation of a recurrent selection program. It is a implementation of the wheat breeding program described in Gaynor et al. (2017). In addition, the pipeline was made available through the AlphaSimR course available at edx platform (for more details, see the link)

We will start by filling the pipeline objects with information that comes from the individuals created from the base genome. In our recurrent selection program, we will first perform 10 years of truncated phenotype selection as described in see Figure 1. The implementation of these 10 years is so that each design or program could be evaluated with an equivalent starting point. It is generally called burn-in period.

Figure 1: Simulated wheat breeding program with Parents, $F_{1}$ progeny (F1), Headrows (HDRW), Preliminary Yield Trial (PYT), Advanced Yield Trial (AYT), Elite Yield Trial (EYT) and a released Variety. Adapted from Gaynor et al. (2017) and Lara et al. 2022.

Figure 1: Simulated wheat breeding program with Parents, \(F_{1}\) progeny (F1), Headrows (HDRW), Preliminary Yield Trial (PYT), Advanced Yield Trial (AYT), Elite Yield Trial (EYT) and a released Variety. Adapted from Gaynor et al. (2017) and Lara et al. 2022.

After that, we will run another 10 years of the same type of selection (called here conventional breeding program) and a modified breeding program that uses the rapid cycling. The comparision will be in terms of genetic mean and variance.

Global parameters

To begin with the simulations, we need to set the main parameters of the breeding program. For more details of the parameters, please see Gaynor et al. (2017).

# Clean the working environment
rm(list = ls())
library(AlphaSimR)
## Loading required package: R6
# Number of crosses per year
nCrosses = 30

# DH lines per cross
nDH = 50

# The maximum number of DH lines per cross to enter the PYT
famMax = 10

# Genotypes per yield trial
nPYT = 300
nAYT = 25
nEYT = 5

# Effective replication of yield trials in terms of locations
repHDRW = 1/4
repPYT = 1
repAYT = 4
repEYT = 8

# Number of QTLs per chromosome
nQTLs = 50

Founder genome

Setting the founder genome based on the parameters before implemented.

# Simulate founder genomes
founderGenomes = runMacs(nInd = 30, 
                         nChr = 21, 
                         segSites = nQTLs,
                         inbred = TRUE, 
                         species = "WHEAT")

# Set simulation parameters
SP = SimParam$new(founderGenomes)
SP$addTraitAG(nQtlPerChr = nQTLs, mean = 4, var = 0.1, varGxE = 0.2)
VarE = 0.4

# Founding parents
Parents = newPop(founderGenomes)

Populating the breeding program.

Starting with the breeding program with the Fill Pipeline

# Populate breeding program
for (year in 1:7) {
  # F1
  F1 = randCross(Parents, nCrosses)
  if (year < 7) {
    # Doubled Haploids
    DH = makeDH(F1, nDH)
  }
  if (year < 6) {
    # Headrows
    HDRW = setPheno(DH, varE = VarE, reps = repHDRW)
  }
  if (year < 5) {
    # Preliminary Yield Trial
    PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno")
    PYT = selectInd(PYT, nInd = nPYT, use = "pheno")
    PYT = setPheno(PYT, varE = VarE, reps = repPYT)
  }
  if (year < 4) {
    # Advanced Yield Trial
    AYT = selectInd(PYT, nInd = nAYT, use = "pheno")
    AYT = setPheno(AYT, varE = VarE, reps = repAYT)
  }
  if (year < 3) {
    # Elite Yield Trial
    EYT = selectInd(AYT, nEYT, use = "pheno")
    EYT = setPheno(EYT, varE = VarE, reps = repEYT)
  }
  if (year < 2) {
    # Selecting Variety
    Variety = selectInd(EYT, nInd = 1, use = "pheno")
  }
}

Burn-In period

This is the Bunr-in period. We will run it for 10 years and we will save the information of the population at the end. In this way, we can deploy the different scenarios and both will have the same starting point. We will start creating one data frame to store the variables that we want to keep track over the years of breeding.

# Burnin period
BunrInYears = 10
futureYears =10

# Creating empty vectors to store genetic values
meanPar = matrix(NA, BunrInYears+futureYears)
varPar = matrix(NA, BunrInYears+futureYears)

Now, we will run the burn-in in a proper way.

# Burning
for (year in 1:BunrInYears) {
  # Select Variety
  Variety = selectInd(EYT, nInd = 1, use = "pheno")

  # Elite Yield Trial
  EYT = selectInd(AYT, nInd = nEYT, use = "pheno")
  EYT = setPheno(EYT, varE = VarE, reps = repEYT)

  # Advanced Yield Trial
  AYT = selectInd(PYT, nInd = nAYT, use = "pheno")
  AYT = setPheno(AYT, varE = VarE, reps = repAYT)

  # Preliminary Yield Trial
  PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno")
  PYT = selectInd(PYT, nInd = nPYT, use = "pheno")
  PYT = setPheno(PYT, varE = VarE, reps = repPYT)

  # Headrows
  HDRW = setPheno(DH, varE = VarE, reps = repHDRW)

  # Doubled Haploids
  DH = makeDH(F1, nDH)

  # F1 and Parents
  Parents = c(EYT, AYT)
  F1 = randCross(Parents, nCrosses)

  # Report results
  meanPar[year] = meanG(Parents)
  varPar[year] = varG(Parents)
  
  # Save the state of simulation
  if (year == 10) {
    save.image(file = "results/year10.RData")
  }
}

Scenario 1

The first scenario uses phenotypic selection and it use the same pipeline developed in the Burn-in stage.

# 1. Load state of simulation from year 10
load(file = "results/year10.RData")

# 2. Loop
for (year in (BunrInYears+1):(BunrInYears+futureYears)) {
 # Select Variety
  Variety = selectInd(EYT, nInd = 1, use = "pheno")

  # Elite Yield Trial
  EYT = selectInd(AYT, nInd = nEYT, use = "pheno")
  EYT = setPheno(EYT, varE = VarE, reps = repEYT)

  # Advanced Yield Trial
  AYT = selectInd(PYT, nInd = nAYT, use = "pheno")
  AYT = setPheno(AYT, varE = VarE, reps = repAYT)

  # Preliminary Yield Trial
  PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno")
  PYT = selectInd(PYT, nInd = nPYT, use = "pheno")
  PYT = setPheno(PYT, varE = VarE, reps = repPYT)

  # Headrows
  HDRW = setPheno(DH, varE = VarE, reps = repHDRW)

  # Doubled Haploids
  DH = makeDH(F1, nDH)

  # F1 and Parents
  Parents = PYT
  F1 = randCross(Parents, nCrosses)

  # Report results
  meanPar[year] = meanG(Parents)
  varPar[year] = varG(Parents)
}

# 3. Recording results
output1 = data.frame(Year = c(1:20),
                     scenario=rep("Conv", 20),
                     meanPar,
                     varPar,
                     stringsAsFactors=FALSE)


# 4. Saving the results as RDS
saveRDS(output1,"results/Wheat_Scenario1.rds")

Scenario 2

In the scenario two of this set of simulations, we will implement the early parental recycling. So, the only change is, instead of using the AYT and EYT as the parents, we will use the individuals from PYT stage.

# 1. Load state of simulation from year 10
load(file = "results/year10.RData")

# 2. Running the loop
for (year in (BunrInYears+1):(BunrInYears+futureYears)) {
 # Select Variety
  Variety = selectInd(EYT, nInd = 1, use = "pheno")

  # Elite Yield Trial
  EYT = selectInd(AYT, nInd = nEYT, use = "pheno")
  EYT = setPheno(EYT, varE = VarE, reps = repEYT)

  # Advanced Yield Trial
  AYT = selectInd(PYT, nInd = nAYT, use = "pheno")
  AYT = setPheno(AYT, varE = VarE, reps = repAYT)

  # Preliminary Yield Trial
  PYT = selectWithinFam(HDRW, nInd = famMax, use = "pheno")
  PYT = selectInd(PYT, nInd = nPYT, use = "pheno")
  PYT = setPheno(PYT, varE = VarE, reps = repPYT)

  # Headrows
  HDRW = setPheno(DH, varE = VarE, reps = repHDRW)

  # Doubled Haploids
  DH = makeDH(F1, nDH)

  # F1 and Parents
  Parents = c(EYT, AYT)
  F1 = randCross(Parents, nCrosses)

  # Report results
  meanPar[year] = meanG(Parents)
  varPar[year] = varG(Parents)
}

# 3. Recording results
output2 = data.frame(Year = c(1:20),
                     scenario=rep("EarlySel", 20),
                     meanPar,
                     varPar,
                     stringsAsFactors=FALSE)


# 4. Saving the results as RDS
saveRDS(output2,"results/Wheat_Scenario2.rds")

Plotting the results

# Load the results from the two scenarios
rm(list=ls())
Scen1 = readRDS('results/Wheat_Scenario1.rds')
Scen2 = readRDS('results/Wheat_Scenario2.rds')
df = rbind(data.frame(Scen1), data.frame(Scen2))

# Organizing
par(mfrow = c(2, 2),
    mar = c(4, 4, 1, 1))

# Plot mean of genetic values over time
meanRanges = range(df$meanPar)
plot(x = 1:20, y = df$meanPar[df$scenario == 'Conv'], type = "l", col = "black", lwd = 3,
     xlab = "Year", ylab = "Mean of genetic values", ylim = meanRanges)
lines(x = 1:20, y = df$meanPar[df$scenario == 'EarlySel'], type = "l", col = "blue", lwd = 3)
legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios",
       lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n")


# Plot mean of genetic values over time
varRanges = range(df$varPar)
plot(x = 1:20, y = df$varPar[df$scenario == 'Conv'], type = "l", col = "black", lwd = 3,
     xlab = "Year", ylab = "Mean of genetic values", ylim = varRanges)
lines(x = 1:20, y = df$varPar[df$scenario == 'EarlySel'], type = "l", col = "blue", lwd = 3)
legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios",
       lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n")


# Plot mean of genetic values over time
meanRanges = range(c(df$meanPar[df$scenario == 'Conv' ][10:20]), df$meanPar[df$scenario == 'EarlySel' ][10:20])

plot(x = 10:20, y = df$meanPar[df$scenario == 'Conv'][10:20], type = "l", col = "black", lwd = 3,
     xlab = "Year", ylab = "Mean of genetic values", ylim = meanRanges)
lines(x = 10:20, y = df$meanPar[df$scenario == 'EarlySel'][10:20], type = "l", col = "blue", lwd = 3)
legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios",
       lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n")


# Plot mean of genetic values over time
varRanges = range(c(df$varPar[df$scenario == 'Conv' ][10:20]), df$varPar[df$scenario == 'EarlySel' ][10:20])
plot(x = 10:20, y = df$varPar[df$scenario == 'Conv'][10:20], type = "l", col = "black", lwd = 3,
     xlab = "Year", ylab = "Mean of genetic values", ylim = varRanges)
lines(x = 10:20, y = df$varPar[df$scenario == 'EarlySel'][10:20], type = "l", col = "blue", lwd = 3)
legend(x = "topleft", legend = c('Conv', 'EarlySel'), title = "Scenarios",
       lwd = 3, lty = c(1, 2), col = c("black", "blue"), bty = "n")

Obs.: As it is a stochastic simulation implementations, we suggest to run more repetitions for both scenarios and work with the mean of the outcomes.

References

Gaynor, R Chris, Gregor Gorjanc, Alison R Bentley, Eric S Ober, Phil Howell, Robert Jackson, Ian J Mackay, and John M Hickey. 2017. “A Two-Part Strategy for Using Genomic Selection to Develop Inbred Lines.” Crop Science 57 (5): 2372–86.

  1. University of Florida, ↩︎

  2. University of Florida, ↩︎