Skip to content

Commit

Permalink
Reverting last two commits.
Browse files Browse the repository at this point in the history
As I was trying to add mafs as a targetable output, the way I handled polarization of major/minor seemed needlessy complicated. Instead, maf can just be done separately in a single rule or two and it cuts down on complexity a lot.
  • Loading branch information
zjnolen committed Sep 30, 2024
1 parent a6fdf70 commit 00ba4a3
Show file tree
Hide file tree
Showing 13 changed files with 124 additions and 236 deletions.
6 changes: 2 additions & 4 deletions .test/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,12 @@ params:
extra_saf: "" # goes to all -doSaf runs
extra_beagle: "" # goes to all -doGlf 2 runs
domajorminor: 1 # Currently only compatible with values 1, 2, 4, 5
domaf: 1 # Currently only compatible with values 1, 2, 3
popmafmaj: "population" # "all" or "population"
domaf: 1
snp_pval: "1e-6"
min_maf: -1 # Set -1 to disable
mindepthind_heterozygosity: 1
ngsrelate:
ibsrelate-only-extra: ""
freqbased-extra: ""
prune: true
ngsld:
max_kb_dist_est-ld: 200
rnd_sample_est-ld: 0.001
Expand Down
6 changes: 2 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,12 @@ params:
extra_saf: "" # goes to all -doSaf runs
extra_beagle: "" # goes to all -doGlf 2 runs
domajorminor: 1 # Currently only compatible with values 1, 2, 4, 5
domaf: 1 # Currently only compatible with values 1, 2, 3
popmafmaj: "population" # "all" or "population"
domaf: 1
snp_pval: "1e-6"
min_maf: -1 # Set -1 to disable
mindepthind_heterozygosity: 1
ngsrelate:
ibsrelate-only-extra: ""
freqbased-extra: ""
prune: false
ngsld:
max_kb_dist_est-ld: 4000
rnd_sample_est-ld: 0.001
Expand Down
1 change: 0 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ wildcard_constraints:
dp=".{0}|.dp[1-9][0-9]*",
chunk="[0-9]+",
sites="|".join(filters),
maj="all|pop|anc|ref",


# Accumulate desired output files from config file
Expand Down
53 changes: 27 additions & 26 deletions workflow/rules/3.2_beagles.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ rule angsd_doGlf2:
population beagle files, even if a population is fixed for a certain allele.
"""
input:
unpack(get_sitesfile),
unpack(filt_depth),
unpack(get_anc_ref),
bam="results/datasets/{dataset}/bamlists/{dataset}.{ref}_{population}{dp}.bamlist",
bams=get_bamlist_bams,
Expand All @@ -18,13 +18,13 @@ rule angsd_doGlf2:
reffai="results/ref/{ref}/{ref}.fa.fai",
regions="results/datasets/{dataset}/filters/chunks/{ref}_chunk{chunk}.rf",
output:
beagle="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.beagle.gz",
maf="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.mafs.gz",
arg="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.arg",
beagle="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.beagle.gz",
maf="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.mafs.gz",
arg="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.arg",
log:
"logs/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.log",
"logs/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.log",
benchmark:
"benchmarks/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.log"
"benchmarks/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.log"
container:
angsd_container
params:
Expand All @@ -33,9 +33,10 @@ rule angsd_doGlf2:
extra_beagle=config["params"]["angsd"]["extra_beagle"],
mapQ=config["mapQ"],
baseQ=config["baseQ"],
snp_pval=config["params"]["angsd"]["snp_pval"],
maf=config["params"]["angsd"]["domaf"],
snp_pval_maf=get_snppval_maf,
majmin=get_majmin,
minmaf=config["params"]["angsd"]["min_maf"],
majmin=config["params"]["angsd"]["domajorminor"],
counts=get_docounts,
trans=get_trans,
nind=get_nind,
Expand All @@ -48,12 +49,12 @@ rule angsd_doGlf2:
shell:
"""
angsd -doGlf 2 -bam {input.bam} -GL {params.gl_model} -ref {input.ref} \
{params.majmin} -doMaf {params.maf} {params.snp_pval_maf} \
-nThreads {threads} {params.extra} -minMapQ {params.mapQ} \
-minQ {params.baseQ} -sites {input.sites} -anc {input.anc} \
{params.extra_beagle} -rf {input.regions} {params.minind} \
-setMinDepthInd {params.mininddp} {params.counts} \
-rmTrans {params.trans} -out {params.out} &> {log}
-doMajorMinor {params.majmin} -doMaf {params.maf} -minMaf {params.minmaf} \
-SNP_pval {params.snp_pval} -nThreads {threads} {params.extra} \
-minMapQ {params.mapQ} -minQ {params.baseQ} -sites {input.sites} \
-anc {input.anc} {params.extra_beagle} -rf {input.regions} {params.minind} \
-setMinDepthInd {params.mininddp} {params.counts} -rmTrans {params.trans} \
-out {params.out} &> {log}
"""


Expand All @@ -63,15 +64,15 @@ rule merge_beagle:
"""
input:
lambda w: expand(
"results/datasets/{{dataset}}/beagles/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.{{maj}}maj.beagle.gz",
"results/datasets/{{dataset}}/beagles/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.beagle.gz",
chunk=chunklist,
),
output:
beagle="results/datasets/{dataset}/beagles/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj.beagle.gz",
beagle="results/datasets/{dataset}/beagles/{dataset}.{ref}_{population}{dp}_{sites}-filts.beagle.gz",
log:
"logs/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_merge-beagle.log",
"logs/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge-beagle.log",
benchmark:
"benchmarks/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_merge-beagle.log"
"benchmarks/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge-beagle.log"
container:
shell_container
resources:
Expand All @@ -93,15 +94,15 @@ rule merge_maf:
"""
input:
lambda w: expand(
"results/datasets/{{dataset}}/beagles/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.{{maj}}maj.mafs.gz",
"results/datasets/{{dataset}}/beagles/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.mafs.gz",
chunk=chunklist,
),
output:
maf="results/datasets/{dataset}/beagles/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj.mafs.gz",
maf="results/datasets/{dataset}/mafs/{dataset}.{ref}_{population}{dp}_{sites}-filts.mafs.gz",
log:
"logs/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_merge-mafs.log",
"logs/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge-mafs.log",
benchmark:
"benchmarks/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_merge-mafs.log"
"benchmarks/{dataset}/angsd/doGlf2/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge-mafs.log"
container:
shell_container
resources:
Expand All @@ -123,13 +124,13 @@ rule snpset:
dataset.
"""
input:
"results/datasets/{dataset}/beagles/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj.mafs.gz",
"results/datasets/{dataset}/mafs/{dataset}.{ref}_{population}{dp}_{sites}-filts.mafs.gz",
output:
"results/datasets/{dataset}/filters/snps/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_snps.sites",
"results/datasets/{dataset}/filters/snps/{dataset}.{ref}_{population}{dp}_{sites}-filts_snps.sites",
log:
"logs/{dataset}/filters/snps/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_snps.log",
"logs/{dataset}/filters/snps/{dataset}.{ref}_{population}{dp}_{sites}-filts_snps.log",
benchmark:
"benchmarks/{dataset}/filters/snps/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_snps.log"
"benchmarks/{dataset}/filters/snps/{dataset}.{ref}_{population}{dp}_{sites}-filts_snps.log"
container:
shell_container
shell:
Expand Down
13 changes: 6 additions & 7 deletions workflow/rules/4.0_estimate_LD.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ rule ngsLD_estLD:
Estimates pairwise linkage disequilibrium between SNPs.
"""
input:
beagle="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.beagle.gz",
beagle="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.beagle.gz",
bamlist="results/datasets/{dataset}/bamlists/{dataset}.{ref}_{population}{dp}.bamlist",
output:
ld=temp(
"results/datasets/{dataset}/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.gz"
"results/datasets/{dataset}/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.gz"
),
pos=temp(
"results/datasets/{dataset}/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.pos",
"results/datasets/{dataset}/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.pos",
),
log:
"logs/{dataset}/ngsLD/estLD/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.log",
"logs/{dataset}/ngsLD/estLD/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.log",
benchmark:
"benchmarks/{dataset}/ngsLD/estLD/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.log"
"benchmarks/{dataset}/ngsLD/estLD/{path}/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.log"
container:
ngsld_container
threads: lambda wildcards, attempt: attempt
Expand Down Expand Up @@ -50,9 +50,8 @@ rule ngsLD_estLD:
rule combine_LD_files:
input:
ldgz=expand(
"results/datasets/{{dataset}}/analyses/ngsLD/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.{maj}maj.ld_maxkbdist-{{maxkb}}_rndsample-{{rndsmp}}.gz",
"results/datasets/{{dataset}}/analyses/ngsLD/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.ld_maxkbdist-{{maxkb}}_rndsample-{{rndsmp}}.gz",
chunk=chunklist,
maj=get_maj,
),
output:
ldgz="results/datasets/{dataset}/analyses/ngsLD/{dataset}.{ref}_{population}{dp}_{sites}-filts.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.gz",
Expand Down
28 changes: 14 additions & 14 deletions workflow/rules/4.1_linkage_pruning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ rule ngsLD_prune_sites:
Prunes SNPs to produce a list of SNPs in linkage equilibrium.
"""
input:
ld="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.ld_maxkbdist-{maxkb}_rndsample-1.gz",
ld="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.ld_maxkbdist-{maxkb}_rndsample-1.gz",
output:
sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.pruned_maxkbdist-{maxkb}_minr2-{r2}.sites",
sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.sites",
log:
"logs/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj_maxkbdist-{maxkb}_minr2-{r2}.log",
"logs/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_maxkbdist-{maxkb}_minr2-{r2}.log",
benchmark:
"benchmarks/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj_maxkbdist-{maxkb}_minr2-{r2}.log"
"benchmarks/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_maxkbdist-{maxkb}_minr2-{r2}.log"
container:
ngsld_container
threads: 4
Expand All @@ -36,21 +36,21 @@ rule prune_chunk_beagle:
Subsets beagle file to pruned SNPs.
"""
input:
beagle="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.beagle.gz",
sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.pruned_maxkbdist-{maxkb}_minr2-{r2}.sites",
beagle="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.beagle.gz",
sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.sites",
output:
prunedgz="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
prunedgz="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
log:
"logs/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.pruned_maxkbdist-{maxkb}_minr2-{r2}.log",
"logs/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.log",
benchmark:
"benchmarks/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.pruned_maxkbdist-{maxkb}_minr2-{r2}.log"
"benchmarks/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.log"
container:
shell_container
shadow:
"minimal"
threads: lambda wildcards, attempt: attempt
params:
pruned="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.{maj}maj.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle",
pruned="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle",
resources:
runtime=lambda wildcards, attempt: attempt * 120,
shell:
Expand Down Expand Up @@ -78,15 +78,15 @@ rule merge_pruned_beagles:
"""
input:
pruned=lambda w: expand(
"results/datasets/{{dataset}}/beagles/pruned/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.{{maj}}maj.pruned_maxkbdist-{{maxkb}}_minr2-{{r2}}.beagle.gz",
"results/datasets/{{dataset}}/beagles/pruned/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.pruned_maxkbdist-{{maxkb}}_minr2-{{r2}}.beagle.gz",
chunk=chunklist,
),
output:
beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
log:
"logs/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_merge.pruned_maxkbdist-{maxkb}_minr2-{r2}.log",
"logs/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge.pruned_maxkbdist-{maxkb}_minr2-{r2}.log",
benchmark:
"benchmarks/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj_merge.pruned_maxkbdist-{maxkb}_minr2-{r2}.log"
"benchmarks/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge.pruned_maxkbdist-{maxkb}_minr2-{r2}.log"
wildcard_constraints:
population="|".join(
["all"]
Expand Down
13 changes: 5 additions & 8 deletions workflow/rules/4.2_linkage_decay.smk
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
rule combine_LDdecay_files:
input:
ldgz=expand(
"results/datasets/{{dataset}}/analyses/ngsLD/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.{{maj}}maj.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.gz",
"results/datasets/{{dataset}}/analyses/ngsLD/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.ld_maxkbdist-{maxkb}_rndsample-{rndsmp}.gz",
chunk=chunklist,
maxkb=config["params"]["ngsld"]["max_kb_dist_decay"],
rndsmp=config["params"]["ngsld"]["rnd_sample_decay"],
),
output:
ldgz=temp(
"results/datasets/{dataset}/analyses/ngsLD/decay/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj.LDdecay.gz"
"results/datasets/{dataset}/analyses/ngsLD/decay/{dataset}.{ref}_{population}{dp}_{sites}-filts.LDdecay.gz"
),
log:
"logs/{dataset}/ngsLD/combine_LDdecay_files/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj.ld_decay.log",
"logs/{dataset}/ngsLD/combine_LDdecay_files/{dataset}.{ref}_{population}{dp}_{sites}-filts.ld_decay.log",
benchmark:
"benchmarks/{dataset}/ngsLD/combine_LDdecay_files/{dataset}.{ref}_{population}{dp}_{sites}-filts.{maj}maj.ld_decay.log"
"benchmarks/{dataset}/ngsLD/combine_LDdecay_files/{dataset}.{ref}_{population}{dp}_{sites}-filts.ld_decay.log"
container:
shell_container
shell:
Expand All @@ -24,10 +24,7 @@ rule combine_LDdecay_files:

rule fit_LD_decay:
input:
lambda w: expand(
"results/datasets/{{dataset}}/analyses/ngsLD/decay/{{dataset}}.{{ref}}_{{population}}{{dp}}_{{sites}}-filts.{maj}maj.LDdecay.gz",
maj=get_maj,
),
"results/datasets/{dataset}/analyses/ngsLD/decay/{dataset}.{ref}_{population}{dp}_{sites}-filts.LDdecay.gz",
output:
plot=report(
"results/datasets/{dataset}/plots/LD_decay/{dataset}.{ref}_{population}{dp}_{sites}-filts.LDdecay.pdf",
Expand Down
Loading

0 comments on commit 00ba4a3

Please sign in to comment.