-
Notifications
You must be signed in to change notification settings - Fork 0
/
sequoia_pedigree_subset.Rmd
65 lines (63 loc) · 2.42 KB
/
sequoia_pedigree_subset.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
---
title: "Sequoia"
---
Load sequoia library
```{r}
library(sequoia)
```
Create life history data
```{r}
mp_mastersheet <- read.csv("phenotypes.csv")
LH <- data.frame(mp_mastersheet$Label, rep(4, length(mp_mastersheet$Label)), mp_mastersheet$Type)
colnames(LH) <- c("ID","Sex","birthyear")
# Code age catagories as dummy birth years
LH$birthyear[which(LH$birthyear=="Juvenile")] <- as.integer(2005)
LH$birthyear[which(LH$birthyear=="Intermediate1")] <- as.integer(2004)
LH$birthyear[which(LH$birthyear=="Intermediate2")] <- as.integer(2003)
LH$birthyear[which(LH$birthyear=="Adult")] <- as.integer(2001)
LH$birthyear[which(LH$birthyear=="Unknown")] <- as.integer(-1)
LH$birthyear <- as.integer(LH$birthyear)
```
Read in a ped file of filtered SNPS - less than 25% missing, low LD, high MAF
```{r}
Geno0 <- GenoConvert(InFile ="sequoia_filtered_snps.ped",InFormat="ped")
```
```{r}
Geno <- as.matrix(Geno0[c(16, 33, 36, 347, 509:514, 518, 570),])
CheckGeno(Geno)
```
Duplicate check & parentage assignment (takes few minutes)
```{r}
ParOUT<-sequoia(GenoM =Geno, LifeHistData =LH, Module="par", quiet =FALSE, Plot =TRUE, Complex = "full", Herm="A", MaxSibshipSize = 500)
# (NOTE: from version 2.1 onward,'Module'replaces'MaxSibIter')
ParOUT$DupGenotype
# inspect duplicates (intentional or accidental)
```
Polish dataset
```{r}
# remove one indiv. from each duplicate pair
Geno2<-Geno[!rownames(Geno)%in%ParOUT$DupGenotype$ID2,]
# & drop low call rate samples
# & drop SNPs with high error rate and/or low MAF
stats<-SnpStats(Geno, ParOUT$PedigreePar)
MAF<-ifelse(stats[,"AF"]<=0.5, stats[,"AF"],1-stats[,"AF"])
#Geno2<-Geno2[,-which(stats[,"Err.hat"]>0.05|MAF<0.1)]
#Geno2<-Geno2[,-which(stats[,"Err.hat"]>1|MAF<0.1)]
#
Indiv.Mis<-apply(Geno2,1,function(x)sum(x== -9))/ ncol(Geno2)
Geno2<-Geno2[Indiv.Mis<0.2, ]
# check histograms for sensible thresholds, iterate if necessary
# run full pedigree reconstruction (may take up to a few hours)
SeqOUT<-sequoia(GenoM =Geno2, LifeHistData =LH, Module ="ped",Err =0.001, MaxSibshipSize = 500)
#
SummarySeq(SeqOUT)
# inspect assigned parents, prop. dummy parents, etc.
#
# check full sibs, half sibs etc.
Pairwise<-ComparePairs(SeqOUT$Pedigree,patmat =TRUE)
```
Save results: single compressed .RData file, and/or folder with text files
```{r}
save(SeqOUT, Geno, OtherStuff,file="Sequoia_output_date.RData")
writeSeq(SeqList =SeqOUT,GenoM =Geno,folder ="Sequoia-OUT")
```