Importing external Data

The AlphaSimR package allows you to import genomic data and requires a genetic map and phased genotypes.

First, we load the genetic map in the format ‘Marker name, Chromosome, Position (in Morgans)’.

In the example below, we have a diploid species with 20 individuals, 10 chromosomes, each with 10 markers, and these markers are equally spaced along 1 Morgan chromosome.

# Clean the directory
rm(list=ls())
require(AlphaSimR)
## Carregando pacotes exigidos: AlphaSimR
## Warning: package 'AlphaSimR' was built under R version 4.3.3
## Carregando pacotes exigidos: R6
# Load the External Data Set and Change the Directory
load("External_Data.RDATA")

# Genetic map
head(genMap)
##   marker chromosome  position
## 1     M1          1 0.0000000
## 2     M2          1 0.1111111
## 3     M3          1 0.2222222
## 4     M4          1 0.3333333
## 5     M5          1 0.4444444
## 6     M6          1 0.5555556
dim(genMap) # nrows: Number of markers
## [1] 100   3

If we have phased haplotypes, the file needs to be 40 rows (the number of individuals times the ploidy level), with each individual having 100 loci based on the map above.

# Haplotype file
haplo[1:6,1:6] 
##     M1 M2 M3 M4 M5 M6
## ID1  0  0  0  0  1  1
## ID1  1  0  0  0  1  0
## ID2  1  0  0  0  0  0
## ID2  0  0  1  0  1  0
## ID3  1  0  0  1  1  0
## ID3  0  0  0  1  0  1
dim(haplo) # nrows: Number of individuals multiplied by ploidy level, and ncols: number of markers
## [1]  40 100

If we don’t have phased haplotypes, we can simulate the haplotype phases multiple times.

# Loading a function for later
sim_haplo_phase <- function(M, ploidy) {
  haplo <- NULL
  for (i in 1:nrow(M)) {
    b <- NULL
    for (j in 1:ncol(M)) {
      a <- rep(0, ploidy)
      if (M[i, j] > 0) {
        ones_count <- min(M[i, j], ploidy)
        a[1:ones_count] <- 1
      }
      b <- cbind(b, a)
    }
    haplo <- rbind(haplo, b)
    colnames(haplo)=colnames(M)
  }
  return(haplo)
}
# SNP matrix
snp[1:6,1:6] 
##     M1 M2 M3 M4 M5 M6
## ID1  1  2  2  0  1  0
## ID2  0  0  2  1  2  1
## ID3  0  0  2  1  1  1
## ID4  0  2  0  2  2  1
## ID5  0  2  2  0  0  0
## ID6  1  1  2  2  2  2
dim(snp) # nrows: Number of individuals and ncols: number of markers
## [1]  20 100
# Haplotype file simulated
haplo=sim_haplo_phase(M=snp, ploidy=2)
haplo[1:6,1:6]
##      M1 M2 M3 M4 M5 M6
## [1,]  1  1  1  0  1  0
## [2,]  0  1  1  0  0  0
## [3,]  0  0  1  1  1  1
## [4,]  0  0  1  0  1  0
## [5,]  0  0  1  1  1  1
## [6,]  0  0  1  0  0  0

We can also import the pedigree file.

# Pedigree file
head(ped)
##    id mother father
## 1 ID1      0      0
## 2 ID2      0      0
## 3 ID3      0      0
## 4 ID4      0      0
## 5 ID5      0      0
## 6 ID6      0      0

To create the founder genome using the imported data set, we can utilize the importHaplo() function.

# Create the founder population
founderPop = importHaplo(haplo = haplo,
                         genMap = genMap,
                         ploidy = 2,
                         ped = ped)
founderPop
## An object of class "NamedMapPop" 
## Ploidy: 2 
## Individuals: 20 
## Chromosomes: 10 
## Loci: 100
# Set simulation parameters
# Initialize parameters with founder haplotypes
SP = SimParam$new(founderPop)

We can also load our own QTL effects or simulated traits using our imported genomic data set.

# Load our own QTL effects
qtlEffects = data.frame(marker = c("M1", "M11","M80"),
                        aditiveEffect = c(1, -1, 1),
                        domEffect = c(0.3, 0, -0.3))

# Import in SimParam
SP$importTrait(markerNames = qtlEffects$marker,
               addEff = qtlEffects$aditiveEffect,
               intercept = 10,
               domEff = qtlEffects$domEffect,
               name = "Trait1")

# Create a population from the founder haplotypes
pop = newPop(founderPop)

# The population now works like any other AlphaSimR population
gv(pop)[1:6]
## [1] 11.3  7.7  9.0  8.0  9.7 10.0

References


  1. Post doc, University of Florida, ↩︎

  2. Professor - Federal University of Viçosa, ↩︎