Simulate genotype data of family

  • Brief comments in English.
###############
### 1 
###############

# When we have one marker
# No. alleles
Na<-4
# Allele frequency vector
Pa<-c(0.3,0.2,0.1,0.4)
# Sum of allele frequency should be 1.
Pa<-Pa/sum(Pa)

###############
### 2 
###############

# Generate allele frequency values with uniform distribution
Pa<-runif(Na)
# When you use uniform distribution, allele frequency tends to be 1/Na,
# you can use dirichlet distribution to avoid this phenomenon.
install.packages("MCMCpack")
library(MCMCpack)
# Generate 1 set of allele frequency accoring to dirichlet distribution.
Pa<-rdirichlet(1,rep(1,Na))
Pa
# Double-check sum of allele freq being 1.
sum(Pa)

###############
### 3 
###############

# Now we have multiple markers on one chromosome.
Ns<-4
# NaS is a vector of No. alleles of markers
NaS<-c(3,4,5,2)
# Allele frequency
PaS<-list()
for(i in 1:Ns){
  PaS[[i]]<-c(rdirichlet(1,rep(1,NaS[i])))
}
NaS
PaS

hap1<-hap2<-rep(0,Ns)
for(i in 1:Ns){
  hap1[i]<-sample(1:NaS[i],1,prob=PaS[[i]])
  hap2[i]<-sample(1:NaS[i],1,prob=PaS[[i]])
}
hap1
hap2

###############
### 4 
###############

# Function to generate haplotypes according to No. allele vector and allele freq. vector.
MakeHaplotype<-function(NaS,PaS,N){
  # haplotypeはN x length(NaS)の行列に納める
  H<-matrix(0,N,length(NaS))
  for(i in 1:length(NaS)){
    H[,i]<-sample(1:NaS[i],N,prob=PaS[[i]])
  }
  H
}
# Use this function
N<-2 # Generate 2 haplotypes for 1 indivudual.
Haps<-MakeHaplotype(NaS,PaS,N)
Haps

###############
### 5 
###############

# When you simulate recombination, you use two haplotypes.
# Define genetic distance between neighboring markers.
Dg<-runif(Ns-1)
# Let them cross-over at random.
# Decide which inbetweens cross-over.
rands.crossing<-runif(length(Dg))
# Take out inbetween with cross-over.
Cross<-which((Dg-rands.crossing)>0)
Cross
# Make recombinants.
hap1<-Haps[1,]
hap2<-Haps[2,]
rec.hap1<-hap1
rec.hap2<-hap2
if(length(Cross)>0){
  for(i in 1:length(Cross)){
    # Exchange segments downstream from cross-over points
    tmp.tail1<-rec.hap1[(Cross[i]+1):Ns]
    tmp.tail2<-rec.hap2[(Cross[i]+1):Ns]
    rec.hap1[(Cross[i]+1):Ns]<-tmp.tail2
    rec.hap2[(Cross[i]+1):Ns]<-tmp.tail1
  }
}
Cross
hap1
hap2
rec.hap1
rec.hap2

###############
### 6 
###############

# Make function to generate recombinants.
Rec.Haplotype<-function(hap1,hap2,d){
	Ns<-length(hap1)
	rands.crossing<-runif(length(d))
	# Take out inbetweens with crossover.
	Cross<-which((d-rands.crossing)>0)
	rec.hap1<-hap1
	rec.hap2<-hap2
	if(length(Cross)>0){
		for(i in 1:length(Cross)){
			tmp.tail1<-rec.hap1[(Cross[i]+1):Ns]
			tmp.tail2<-rec.hap2[(Cross[i]+1):Ns]
			rec.hap1[(Cross[i]+1):Ns]<-tmp.tail2
			rec.hap2[(Cross[i]+1):Ns]<-tmp.tail1
		}
	}
	matrix(c(rec.hap1,rec.hap2),byrow=TRUE,nrow=2)
}
# Use the function

rec.haps<-Rec.Haplotype(Haps[1,],Haps[2,],Dg)
rec.haps

###############
### 7 
###############


# Generate parents' haplotype pairs from population.
father<-MakeHaplotype(NaS,PaS,2)
mother<-MakeHaplotype(NaS,PaS,2)
father
mother
# Make gametes' haplotype
father.gamates<-Rec.Haplotype(father[1,],father[2,],Dg)
mother.gamates<-Rec.Haplotype(mother[1,],mother[2,],Dg)
# Select gamates and decide offspring's haplotype pair.
from.father<-father.gamates[sample(1:2,1),]
from.mother<-mother.gamates[sample(1:2,1),]
offspring<-matrix(c(from.father,from.mother),byrow=TRUE,nrow=2)
offspring

father
mother
offspring

###############
### 8 
###############
# Make the job into a function
MakeOffspring<-function(father,mother,d){
  father.gamates<-Rec.Haplotype(father[1,],father[2,],d)
  mother.gamates<-Rec.Haplotype(mother[1,],mother[2,],d)
  from.father<-father.gamates[sample(1:2,1),]
  from.mother<-mother.gamates[sample(1:2,1),]
  offspring<-matrix(c(from.father,from.mother),byrow=TRUE,nrow=2)
  offspring  
}
offspring<-MakeOffspring(father,mother,Dg)
father
mother
offspring

###############
### 9 
###############
# We make matrix to handle familial relationship.
# Trio with parents and an offspring
M.Oyako.Trio<-matrix(c(0,0,0,0,1,2),byrow=TRUE,ncol=2)
M.Oyako.Trio
# Somebody with its parents and 4 grandparents
M.3gen<-matrix(c(0,0,0,0,0,0,0,0,1,2,3,4,5,6),byrow=TRUE,ncol=2)
M.3gen
# A pair of cousins.
M.cousins<-matrix(c(0,0,0,0,1,2,0,0,1,2,0,0,3,4,5,6),byrow=TRUE,ncol=2)
M.cousins 

###############
### 10 
###############

# Generate haplotype pairs for all the individuals in the pedigree.
sim.G<-list()

# From the 1-st to the last member in the pedigree.
for(i in 1:n.rows){

  tmp.father<-M.cousins[i,1]
  tmp.mother<-M.cousins[i,2]
  if(tmp.father==0){
    father.genotype<-MakeHaplotype(NaS,PaS,2)
  }else{
    father.genotype<-sim.G[[tmp.father]]
  }
  if(tmp.mother==0){
    mother.genotype<-MakeHaplotype(NaS,PaS,2)
  }else{
    mother.genotype<-sim.G[[tmp.mother]]
  }
  sim.G[[i]]<-MakeOffspring(father.genotype,mother.genotype,Dg)
}
sim.G
M.cousins

###############
### 11 
###############

# Make the job into a function/
Simulate.Genotype<-function(M,NaS,PaS,Dg){
  sim.G<-list()

  for(i in 1:n.rows){
    tmp.father<-M.cousins[i,1]
    tmp.mother<-M.cousins[i,2]
    if(tmp.father==0){
      father.genotype<-MakeHaplotype(NaS,PaS,2)
    }else{
      father.genotype<-sim.G[[tmp.father]]
    }
    if(tmp.mother==0){
      mother.genotype<-MakeHaplotype(NaS,PaS,2)
    }else{
      mother.genotype<-sim.G[[tmp.mother]]
    }
    sim.G[[i]]<-MakeOffspring(father.genotype,mother.genotype,Dg)
  }
  sim.G
}
sim.G<-Simulate.Genotype(M.cousins,NaS,PaS,Dg)
sim.G

###############
### 12 
###############

# We now handle multiple chromosomes
num.Chromosomes<-22
NaS.list<-list()
PaS.list<-list()
Dg.list<-list()
for(i in 1:num.Chromosomes){
  NaS.list[[i]]<-NaS
  PaS.list[[i]]<-PaS
  Dg.list[[i]]<-Dg
}
sim.Gs<-list()
for(i in 1:num.Chromosomes){
  sim.Gs[[i]]<-Simulate.Genotype(M.cousins,NaS.list[[i]],PaS.list[[i]],Dg.list[[i]])
}

###############
### 13 
###############

# Make a function for the job.
Simulate.Genotype.Multi<-function(M,NaS.list,PaS.list,Dg.list){
  num.Chromosomes<-length(NaS.list)
  sim.Gs<-list()
  for(i in 1:num.Chromosomes){
    sim.Gs[[i]]<-Simulate.Genotype(M.cousins,NaS.list[[i]],PaS.list[[i]],Dg.list[[i]])
  }
  sim.Gs
}
sim.Gs<-Simulate.Genotype.Multi(M.cousins,NaS.list,PaS.list,Dg.list)


###############
### 14 
###############

# We make multiple families.
Niter<-10
sim.Gs.list<-list()
for(i in 1:Niter){
  sim.Gs.list[[i]]<-Simulate.Genotype.Multi(M.cousins,NaS.list,PaS.list,Dg.list)
}