diff --git a/.test/config/config.yaml b/.test/config/config.yaml
index 00bcbea..e36026d 100644
--- a/.test/config/config.yaml
+++ b/.test/config/config.yaml
@@ -130,20 +130,27 @@ params:
     domajorminor: 1 # Currently only compatible with values 1, 2, 4, 5
     domaf: 1
     snp_pval: "1e-6"
-    min_maf: 0.05
+    min_maf: -1 # Set -1 to disable
     mindepthind_heterozygosity: 3
   ngsld:
     max_kb_dist_est-ld: 200
-    max_kb_dist_decay: 100
-    max_kb_dist_pruning: 50
     rnd_sample_est-ld: 0.001
+    max_kb_dist_decay: 100
     rnd_sample_decay: 0.9
     fit_LDdecay_extra: "--fit_level 3"
     fit_LDdecay_n_correction: true
-    pruning_min-weight: 0.1
+    max_kb_dist_pruning_dataset: 50 # used for PCA, Admix, NGSrelate, not ngsF-HMM
+    pruning_min-weight_dataset: 0.1 # used for PCA, Admix, NGSrelate, not ngsF-HMM
+  ngsf-hmm:
+    estimate_in_pops: false
+    prune: false
+    max_kb_dist_pruning_pop: 50 # used only for ngsf-hmm
+    pruning_min-weight_pop: 0.8 # used only for ngsf-hmm
+    min_roh_length: 50000
+    roh_bins: [ 100000, 250000, 500000, 1000000, 2500000, 5000000 ] # in ascending order, bins are between adjacent values and between min_roh_length and value 1 and the last value and infinity
   realsfs:
     fold: 1 # Should only be set to 1, unless ancestral reference is given above
-    sfsboot: 100
+    sfsboot: 100 # Does nothing for now
   fst:
     whichFst: 1
     win_size: 10000
diff --git a/config/README.md b/config/README.md
index f152fc9..c3824d7 100644
--- a/config/README.md
+++ b/config/README.md
@@ -451,17 +451,16 @@ or a pull request and I'll gladly put it in.
       for other options. I have not tested much beyond `1` and `8`, please open
       an issue if you have problems.
     - `min_maf:` The minimum minor allele frequency required to call a SNP.
-      (float, [docs](http://www.popgen.dk/angsd/index.php/Allele_Frequencies))
+      This is set when generating the beagle file, so will filter SNPs for
+      PCAngsd, NGSadmix, ngsF-HMM, and NGSrelate. If you would like each tool
+      to handle filtering for maf on its own you can set this to `-1`
+      (disabled). (float, [docs](http://www.popgen.dk/angsd/index.php/Allele_Frequencies))
   - `ngsld:` Settings for ngsLD ([docs](https://github.com/fgvieira/ngsLD))
     - `max_kb_dist_est-ld:` For the LD estimates generated when setting
       `estimate_ld: true` above, set the maximum distance between sites in kb
       that LD will be estimated for (`--max_kb_dist` in ngsLD, integer)
     - `max_kb_dist_decay:` The same as `max_kb_dist_est-ld:`, but used when
       estimating LD decay when setting `ld_decay: true` above (integer)
-    - `max_kb_dist_pruning:` The same as `max_kb_dist_est-ld:`, but used when
-      linkage pruning SNPs as inputs for PCA, Admix, and Inbreeding analyses.
-      Any positions above this distance will be assumed to be in linkage
-      equilibrium during the pruning process (integer)
     - `rnd_sample_est-ld:` For the LD estimates generated when setting
       `estimate_ld: true` above, randomly sample this proportion of pairwise
       linkage estimates rather than estimating all (`--rnd_sample` in ngsLD,
@@ -474,8 +473,41 @@ or a pull request and I'll gladly put it in.
       size corrected r^2 model be used? (`true`/`false`, `true` is the
       equivalent of passing a sample size to `fit_LDdecay.R` in ngsLD using
       `--n_ind`)
-    - `pruning_min-weight:` The minimum r^2 to assume two positions are in
-      linkage disequilibrium when pruning (float)
+    - `max_kb_dist_pruning_dataset:` The same as `max_kb_dist_est-ld:`, but
+      used when linkage pruning SNPs as inputs for PCAngsd, NGSadmix, and
+      NGSrelate analyses. Pruning is performed on the whole dataset. Any
+      positions above this distance will be assumed to be in linkage
+      equilibrium during the pruning process. (integer)
+    - `pruning_min-weight_dataset:` The minimum r^2 to assume two positions are
+      in linkage disequilibrium when pruning for PCAngsd, NGSadmix, and
+      NGSrelate analyses. (float)
+  - `ngsf-hmm:` Settings for ngsF-HMM
+    - `estimate_in_pops:` Set to `true` to run ngsF-HMM separately for each
+      population in your dataset. Set to `false` to run for whole dataset at
+      once. ngsF-HMM assumes Hardy-Weinberg Equilibrium (aside from inbreeding)
+      in the input data, so select the option that most reflects this. You can
+      use PCA and Admixture analyses to help determine this. (`true`/`false`)
+    - `prune:` Whether or not to prune SNPs for LD before running the analysis.
+      ngsF-HMM assumes independent sites, so it is preferred to set this to
+      `true` to satisfy that expectation. (`true`/`false`)
+    - `max_kb_dist_pruning_pop:` The maximum distance between sites in kb
+      that will be treated as in LD when pruning for the ngsF-HMM input. (INT)
+    - `pruning_min-weight_pop:` The minimum r^2 to assume two positions are in
+      linkage disequilibrium when pruning for the ngsF-HMM input. Note, that
+      this likely will be substantially higher for individual populations than
+      for the whole dataset, as background LD should be higher when no
+      substructure is present. (float)
+    - `min_roh_length:` Minimum ROH size in base pairs to include in inbreeding
+      coefficient calculation. Set if short ROH might be considered low
+      confidence for your data. (integer)
+    - `roh_bins:` A list of integers that describe the size classes in base
+      pairs you would like to partition the inbreeding coefficient by. This can
+      help visualize how much of the coefficient comes from ROH of certain size
+      classes (and thus, ages). List should be in ascending order and the first
+      entry should be greater than `min_roh_length`. The first bin will group
+      ROH between `min_roh_length` and the first entry, subsequent bins will
+      group ROH with sizes between adjacent entries in the list, and the final
+      bin will group all ROH larger than the final entry in the list. (list)
   - `realSFS:` Settings for realSFS
     - `fold:` Whether or not to fold the produced SFS. Set to 1 if you have not
       provided an ancestral-state reference (0 or 1, [docs](http://www.popgen.dk/angsd/index.php/SFS_Estimation))
@@ -508,6 +540,6 @@ or a pull request and I'll gladly put it in.
       assessment. (integer)
     - `extra:` Additional arguments to pass to NGSadmix (for instance,
       increasing `-maxiter`). (string, [docs](http://www.popgen.dk/software/index.php/NgsAdmix))
-    - `ibs:` Settings for identity by state calculation with ANGSD
-      - `-doIBS:` Whether to use a random (1) or consensus (2) base in IBS
+  - `ibs:` Settings for identity by state calculation with ANGSD
+    - `-doIBS:` Whether to use a random (1) or consensus (2) base in IBS
         distance calculation ([docs](http://www.popgen.dk/angsd/index.php/PCA_MDS))
diff --git a/config/config.yaml b/config/config.yaml
index e6d4f78..297acbf 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -129,20 +129,27 @@ params:
     domajorminor: 1 # Currently only compatible with values 1, 2, 4, 5
     domaf: 1
     snp_pval: "1e-6"
-    min_maf: 0.05
+    min_maf: -1 # Set -1 to disable
     mindepthind_heterozygosity: 3
   ngsld:
     max_kb_dist_est-ld: 4000
-    max_kb_dist_decay: 100
-    max_kb_dist_pruning: 100
     rnd_sample_est-ld: 0.001
+    max_kb_dist_decay: 100
     rnd_sample_decay: 0.001
     fit_LDdecay_extra: "--fit_level 100 --fit_boot 100 --plot_size 3,6 --plot_data --plot_no_legend --plot_y_lim 0.5"
     fit_LDdecay_n_correction: true
-    pruning_min-weight: 0.1
+    max_kb_dist_pruning_dataset: 100 # used for PCA, Admix, NGSrelate, not ngsF-HMM
+    pruning_min-weight_dataset: 0.1 # used for PCA, Admix, NGSrelate, not ngsF-HMM
+  ngsf-hmm:
+    estimate_in_pops: true
+    prune: true
+    max_kb_dist_pruning_pop: 100 # used only for ngsf-hmm
+    pruning_min-weight_pop: 0.8 # used only for ngsf-hmm
+    min_roh_length: 50000
+    roh_bins: [ 100000, 250000, 500000, 1000000, 2500000, 5000000 ] # in ascending order, bins are between adjacent values and between min_roh_length and value 1 and the last value and infinity
   realsfs:
     fold: 1 # Should only be set to 1, unless ancestral reference is given above
-    sfsboot: 100
+    sfsboot: 100 # Does nothing for now
   fst:
     whichFst: 1
     win_size: 50000
diff --git a/workflow/Snakefile b/workflow/Snakefile
index c717d1b..555d556 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -203,7 +203,7 @@ if config["analyses"]["heterozygosity_angsd"]:
 
 if config["analyses"]["inbreeding_ngsf-hmm"]:
     all_outputs.append(
-        "results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all_{sites}-filts.froh.pdf"
+        "results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all_{sites}-filts.ind_froh.tsv"
     )
 
 if config["analyses"]["admix_ngsadmix"]:
@@ -359,7 +359,7 @@ if config["subsample_analyses"]["heterozygosity_angsd"]:
 if config["subsample_analyses"]["inbreeding_ngsf-hmm"]:
     all_outputs.extend(
         expand(
-            "results/datasets/{{dataset}}/plots/inbreeding/{{dataset}}.{{ref}}_all{dp}_{{sites}}-filts.froh.pdf",
+            "results/datasets/{{dataset}}/plots/inbreeding/{{dataset}}.{{ref}}_all{dp}_{{sites}}-filts.ind_froh.tsv",
             dp=subdp,
         )
     )
diff --git a/workflow/rules/4.1_linkage_pruning.smk b/workflow/rules/4.1_linkage_pruning.smk
index 8965df3..d100749 100644
--- a/workflow/rules/4.1_linkage_pruning.smk
+++ b/workflow/rules/4.1_linkage_pruning.smk
@@ -7,29 +7,23 @@ rule ngsLD_prune_sites:
     Prunes SNPs to produce a list of SNPs in linkage equilibrium.
     """
     input:
-        ld=expand(
-            "results/datasets/{{dataset}}/beagles/pruned/ngsLD/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{{chunk}}_{{sites}}-filts.ld_maxkbdist-{maxkb}_rndsample-1.gz",
-            maxkb=config["params"]["ngsld"]["max_kb_dist_pruning"],
-        ),
+        ld="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.ld_maxkbdist-{maxkb}_rndsample-1.gz",
     output:
-        sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_pruned.sites",
+        sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.sites",
     log:
-        "logs/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.log",
+        "logs/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_maxkbdist-{maxkb}_minr2-{r2}.log",
     benchmark:
-        "benchmarks/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.log"
+        "benchmarks/{dataset}/ngsLD/prune_sites/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_maxkbdist-{maxkb}_minr2-{r2}.log"
     container:
         ngsld_container
     threads: 4
     resources:
         runtime="10d",
-    params:
-        maxdist=lambda w: str(config["params"]["ngsld"]["max_kb_dist_pruning"]) + "000",
-        minweight=config["params"]["ngsld"]["pruning_min-weight"],
     shell:
         """
         if [ -s {input.ld} ]; then
             zcat {input.ld} | prune_graph --weight-field 'column_7' \
-                --weight-filter 'column_3 <= {params.maxdist} && column_7 >= {params.minweight}' \
+                --weight-filter 'column_3 <= {wildcards.maxkb}000 && column_7 >= {wildcards.r2}' \
                 --out {output.sites}
         else
             > {output.sites}
@@ -43,20 +37,20 @@ rule prune_chunk_beagle:
     """
     input:
         beagle="results/datasets/{dataset}/beagles/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.beagle.gz",
-        sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_pruned.sites",
+        sites="results/datasets/{dataset}/beagles/pruned/ngsLD/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.sites",
     output:
-        prunedgz="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_pruned.beagle.gz",
+        prunedgz="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
     log:
-        "logs/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.log",
+        "logs/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.log",
     benchmark:
-        "benchmarks/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.log"
+        "benchmarks/{dataset}/ngsLD/prune_beagle/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.log"
     conda:
         "../envs/shell.yaml"
     shadow:
         "minimal"
     threads: lambda wildcards, attempt: attempt
     params:
-        pruned="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts_pruned.beagle",
+        pruned="results/datasets/{dataset}/beagles/pruned/chunks/{dataset}.{ref}_{population}{dp}_chunk{chunk}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle",
     resources:
         runtime=lambda wildcards, attempt: attempt * 120,
     shell:
@@ -84,15 +78,15 @@ rule merge_pruned_beagles:
     """
     input:
         pruned=lambda w: expand(
-            "results/datasets/{{dataset}}/beagles/pruned/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts_pruned.beagle.gz",
+            "results/datasets/{{dataset}}/beagles/pruned/chunks/{{dataset}}.{{ref}}_{{population}}{{dp}}_chunk{chunk}_{{sites}}-filts.pruned_maxkbdist-{{maxkb}}_minr2-{{r2}}.beagle.gz",
             chunk=chunklist,
         ),
     output:
-        beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_pruned.beagle.gz",
+        beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
     log:
-        "logs/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge_pruned.log",
+        "logs/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge.pruned_maxkbdist-{maxkb}_minr2-{r2}.log",
     benchmark:
-        "benchmarks/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge_pruned.log"
+        "benchmarks/{dataset}/ngsLD/merge_pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_merge.pruned_maxkbdist-{maxkb}_minr2-{r2}.log"
     wildcard_constraints:
         population="|".join(
             ["all"]
diff --git a/workflow/rules/5.0_relatedness.smk b/workflow/rules/5.0_relatedness.smk
index 710e5d6..c5d9b89 100644
--- a/workflow/rules/5.0_relatedness.smk
+++ b/workflow/rules/5.0_relatedness.smk
@@ -162,7 +162,11 @@ rule ngsrelate:
     Estimates inbreeding and relatedness measures using NGSrelate.
     """
     input:
-        beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_all{dp}_{sites}-filts_pruned.beagle.gz",
+        beagle=expand(
+            "results/datasets/{{dataset}}/beagles/pruned/{{dataset}}.{{ref}}_all{{dp}}_{{sites}}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
+            maxkb=config["params"]["ngsld"]["max_kb_dist_pruning_dataset"],
+            r2=config["params"]["ngsld"]["pruning_min-weight_dataset"],
+        ),
         inds="results/datasets/{dataset}/poplists/{dataset}_all.indiv.list",
     output:
         relate="results/datasets/{dataset}/analyses/kinship/ngsrelate/{dataset}.{ref}_all{dp}_{sites}-filts_relate.tsv",
diff --git a/workflow/rules/6.0_pca.smk b/workflow/rules/6.0_pca.smk
index 61f23b5..efa0584 100644
--- a/workflow/rules/6.0_pca.smk
+++ b/workflow/rules/6.0_pca.smk
@@ -8,13 +8,13 @@ rule remove_excl_pca_admix:
     results), while allowing them in all other analyses.
     """
     input:
-        "results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_all{dp}_{sites}-filts_pruned.beagle.gz",
+        "results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_all{dp}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
     output:
-        "results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_all_excl_pca-admix{dp}_{sites}-filts_pruned.beagle.gz",
+        "results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_all_excl_pca-admix{dp}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
     log:
-        "logs/{dataset}/ngsLD/excl_pca_admix_beagle/{dataset}.{ref}_all_excl_pca-admix{dp}_{sites}-filts.log",
+        "logs/{dataset}/ngsLD/excl_pca_admix_beagle/{dataset}.{ref}_all_excl_pca-admix{dp}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.log",
     benchmark:
-        "benchmarks/{dataset}/ngsLD/excl_pca_admix_beagle/{dataset}.{ref}_all_excl_pca-admix{dp}_{sites}-filts.log"
+        "benchmarks/{dataset}/ngsLD/excl_pca_admix_beagle/{dataset}.{ref}_all_excl_pca-admix{dp}_{sites}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.log"
     conda:
         "../envs/shell.yaml"
     params:
@@ -30,7 +30,11 @@ rule pca_pcangsd:
     Produces covariance matrix from SNP genotype likelihood data with PCAngsd.
     """
     input:
-        beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_pruned.beagle.gz",
+        beagle=expand(
+            "results/datasets/{{dataset}}/beagles/pruned/{{dataset}}.{{ref}}_{{population}}{{dp}}_{{sites}}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
+            maxkb=config["params"]["ngsld"]["max_kb_dist_pruning_dataset"],
+            r2=config["params"]["ngsld"]["pruning_min-weight_dataset"],
+        ),
     output:
         cov="results/datasets/{dataset}/analyses/pcangsd/{dataset}.{ref}_{population}{dp}_{sites}-filts.cov",
     log:
diff --git a/workflow/rules/6.1_admixture.smk b/workflow/rules/6.1_admixture.smk
index 27c70de..d5de210 100644
--- a/workflow/rules/6.1_admixture.smk
+++ b/workflow/rules/6.1_admixture.smk
@@ -9,7 +9,11 @@ rule ngsAdmix:
     replicates have been performed.
     """
     input:
-        beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_pruned.beagle.gz",
+        beagle=expand(
+            "results/datasets/{{dataset}}/beagles/pruned/{{dataset}}.{{ref}}_{{population}}{{dp}}_{{sites}}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
+            maxkb=config["params"]["ngsld"]["max_kb_dist_pruning_dataset"],
+            r2=config["params"]["ngsld"]["pruning_min-weight_dataset"],
+        ),
     output:
         qopt="results/datasets/{dataset}/analyses/ngsadmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}.qopt",
         fopt="results/datasets/{dataset}/analyses/ngsadmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}.fopt.gz",
@@ -68,7 +72,11 @@ rule evalAdmix:
     Perform evalAdmix analysis to get residuals on best fitting replicate for a given K.
     """
     input:
-        beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_pruned.beagle.gz",
+        beagle=expand(
+            "results/datasets/{{dataset}}/beagles/pruned/{{dataset}}.{{ref}}_{{population}}{{dp}}_{{sites}}-filts.pruned_maxkbdist-{maxkb}_minr2-{r2}.beagle.gz",
+            maxkb=config["params"]["ngsld"]["max_kb_dist_pruning_dataset"],
+            r2=config["params"]["ngsld"]["pruning_min-weight_dataset"],
+        ),
         qopt="results/datasets/{dataset}/analyses/ngsadmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}.qopt",
         fopt="results/datasets/{dataset}/analyses/ngsadmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}.fopt.gz",
     output:
diff --git a/workflow/rules/8.0_inbreeding.smk b/workflow/rules/8.0_inbreeding.smk
index d40949f..b075f4e 100644
--- a/workflow/rules/8.0_inbreeding.smk
+++ b/workflow/rules/8.0_inbreeding.smk
@@ -7,7 +7,13 @@ rule ngsf_hmm:
     Estimate IBD tracts within individual genomes.
     """
     input:
-        beagle="results/datasets/{dataset}/beagles/pruned/{dataset}.{ref}_{population}{dp}_{sites}-filts_pruned.beagle.gz",
+        beagle=expand(
+            "results/datasets/{{dataset}}/beagles/{folder}{{dataset}}.{{ref}}_{{population}}{{dp}}_{{sites}}-filts{pruning}.beagle.gz",
+            folder="pruned/" if config["params"]["ngsf-hmm"]["prune"] else "",
+            pruning=f".pruned_maxkbdist-{config['params']['ngsf-hmm']['max_kb_dist_pruning_pop']}_minr2-{config['params']['ngsf-hmm']['pruning_min-weight_pop']}"
+            if config["params"]["ngsf-hmm"]["prune"]
+            else "",
+        ),
     output:
         ibd="results/datasets/{dataset}/analyses/ngsF-HMM/{dataset}.{ref}_{population}{dp}_{sites}-filts.ibd",
         indF="results/datasets/{dataset}/analyses/ngsF-HMM/{dataset}.{ref}_{population}{dp}_{sites}-filts.indF",
@@ -61,17 +67,33 @@ rule plot_froh:
     input:
         roh=expand(
             "results/datasets/{{dataset}}/analyses/ngsF-HMM/{{dataset}}.{{ref}}_{population}{{dp}}_{{sites}}-filts.roh",
-            population=pop_list,
+            population=pop_list
+            if config["params"]["ngsf-hmm"]["estimate_in_pops"]
+            else "all",
         ),
         inds="results/datasets/{dataset}/poplists/{dataset}_all.indiv.list",
         autos=get_auto_sum,
     output:
-        plot=report(
-            "results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.froh.pdf",
+        barplot=report(
+            "results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.froh_bins.svg",
+            category="Inbreeding",
+            labels=lambda w: {
+                "Filter": "{sites}",
+                **dp_report(w),
+                "Type": "Froh Bins Barplot",
+            },
+        ),
+        scatter=report(
+            "results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.cumroh_nroh.svg",
             category="Inbreeding",
-            labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Barplot"},
+            labels=lambda w: {
+                "Filter": "{sites}",
+                **dp_report(w),
+                "Type": "Nroh ~ Lroh Scatterplot",
+            },
         ),
-        tsv="results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.froh.tsv",
+        roh="results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.all_roh.bed",
+        froh="results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.ind_froh.tsv",
     log:
         "logs/{dataset}/ngsF-HMM/{dataset}.{ref}_all{dp}_{sites}-filts_plot.log",
     benchmark:
@@ -79,7 +101,8 @@ rule plot_froh:
     conda:
         "../envs/r.yaml"
     params:
-        popnames=pop_list,
-        outpre=lambda w, output: output["plot"].removesuffix(".froh.pdf"),
+        bins=config["params"]["ngsf-hmm"]["roh_bins"],
+        minroh=config["params"]["ngsf-hmm"]["min_roh_length"],
+        outpre=lambda w, output: output["barplot"].removesuffix(".froh_bins.svg"),
     script:
         "../scripts/plot_Froh.R"
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 2301da6..4daaf09 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -171,15 +171,6 @@ def get_raw_fastq(wildcards):
             }
 
 
-## Get minimum overlap to collapse read pairs per sample
-def get_min_overlap(wildcards):
-    s = wildcards.sample
-    if s in samples.index[samples.time == "historical"]:
-        return config["params"]["fastp"]["min_overlap_hist"]
-    elif s in samples.index[samples.time == "modern"]:
-        return config["params"]["fastp"]["min_overlap_mod"]
-
-
 # Reference
 
 
diff --git a/workflow/scripts/plot_Froh.R b/workflow/scripts/plot_Froh.R
index 12eb213..6cbf36a 100644
--- a/workflow/scripts/plot_Froh.R
+++ b/workflow/scripts/plot_Froh.R
@@ -1,70 +1,160 @@
 sink(file(snakemake@log[[1]], open="wt"), type = "message")
 
-aggregate_roh <- function(rohlist,poplist,samplelist,lenautos) {
-  samples <- as.data.frame(read.table(samplelist, header=TRUE))
-  Froh <- data.frame()
-  
-  for (i in 1:length(poplist)) {
-
-    for (l in c(100000,1000000,2500000)) {
-      if (file.size(rohlist[i]) == 0) {
-        roh <- data.frame(matrix(ncol = 5, nrow = 0))
-      } else {
-        roh <- as.data.frame(read.table(rohlist[i], header = FALSE))
-      }
-      for (s in samples$sample[samples$population==poplist[i]]) {
-        sampleroh <- (subset(roh, roh[,4] == s))
-        nroh <- nrow(sampleroh[sampleroh[,5]>l,])
-        sumroh <- sum(sampleroh[,5][sampleroh[,5] > l])
-        Fsample <- sumroh/lenautos
-        row <- c(s,sumroh,Fsample,nroh,poplist[i],l)
-        Froh <- rbind(Froh, row)
-      }
+library(dplyr)
+library(ggplot2)
 
-    }
+# Combine ngsF-HMM roh beds into single table
+aggregate_roh <- function(rohlist) {
+
+  df <- data.frame()
 
+  for (i in rohlist) {
+    if (file.size(i) == 0) {
+      roh <- data.frame(matrix(ncol = 5, nrow = 0))
+    } else {
+      roh <- as.data.frame(read.table(i, header = FALSE))
+    }
+    df <- rbind(df, roh)
   }
+  
+  colnames(df) <- c("chr","start","stop","sample","length")
 
-  colnames(Froh) <- c("sample","lenroh","Froh","Nroh","population","minroh")
+  return(df)
+
+}
+
+# Calculate Froh for separate user defined size bins. Describes the proportion
+# of total Froh that comes from each size class
+froh_bins <- function(samplelist, roh_df, minroh, bins, lenautos) {
+  samples <- as.data.frame(read.table(samplelist, header=TRUE))
+
+  norun <- c()
+  norun$chr <- rep(0, nrow(samples))
+  norun$start <- rep(0, nrow(samples))
+  norun$end <- rep(0, nrow(samples))
+  norun$sample <- samples$sample
+  norun$length <- rep(0, nrow(samples))
+
+  bins <- c(minroh, bins, Inf)
+  frohs <- c()
+
+  for (i in seq(length(bins))) {
+    runs <- roh_df[roh_df[,5] >= bins[i],]
+    if (i <= length(bins)-1){
+      runs <- runs[runs[,5] < bins[i+1],]
+    }
+    runs <- rbind(runs, norun)
+    runs <- runs %>% group_by(sample) %>%
+      summarize(length = sum(length))
+    runs <- merge(runs, samples, by = "sample")
+    runs$froh <- runs$length / as.numeric(lenautos)
+    runs$range <- paste0("[",bins[i],",",bins[i+1],")")
+    frohs <- rbind(frohs, runs)
+  }
 
-  Froh$sample <- as.factor(Froh$sample)
-  Froh$population <- as.factor(Froh$population)
-  Froh$lenroh <- as.integer(Froh$lenroh)
-  Froh$Froh <- as.numeric(Froh$Froh)
-  Froh$Nroh <- as.integer(Froh$Nroh)
-  Froh$minroh <- as.factor(Froh$minroh)
+  return(frohs)
 
-  return(Froh)
 }
 
-plot_roh <- function(aggroh,plotpre) {
-  require(ggplot2)
+# Calculate the number of rohs and the cumulative length per individual for
+# rohs > the minimum roh size. This can help distinguish patterns of
+# consanguinity from increased background relatedness
+nroh_cumroh <- function(roh_df, minroh, samplelist) {
+  samples <- as.data.frame(read.table(samplelist, header=TRUE))
+  df <- roh_df[roh_df$length >= minroh, ]
+  df <- df %>% group_by(sample) %>%
+    summarize(cumlen = sum(length),
+              nroh = n())
+  df <- merge(df, samples, by = "sample")
+  return(df)
+}
 
-  ggplot(aggroh, aes(x=population,y=Froh)) +
-    geom_boxplot(outlier.shape = NA) +
-    geom_jitter(height = 0, width = 0.2) +
-    scale_y_continuous(limits = c(0, NA)) +
-    facet_wrap(~ minroh) +
-    theme_classic()
-  
-  ggsave(paste0(plotpre,".froh.pdf"))
+# Plot binned Froh per individual
+plot_bins <- function(frohs, minroh, bins, plotpre) {
+  bins <- c(minroh, bins, Inf)
+  ranges <- c()
+  for (i in seq(length(bins))) {
+    ranges <- c(ranges, paste0("[",bins[i],",",bins[i+1],")"))
+  }
+  frohs$range <- as.factor(frohs$range)
+  frohs$range <- factor(frohs$range, levels = ranges, labels = ranges)
+  indfroh <- frohs %>% group_by(sample) %>%
+    summarize(froh = sum(froh))
   
-  # ggplot(aggroh, aes(x=Nroh,y=lenroh)) +
-  #   geom_point(aes(col=population)) +
-  #   geom_smooth(method='lm') +
-  #   theme_classic() +
-  #   facet_grid(cols = vars(minroh))
+  frohs$sample <- factor(frohs$sample, levels = indfroh$sample[order(indfroh$froh)])
 
-  # ggsave(paste0(plotpre,".rohreg.pdf"))
+  ggplot(data = frohs, aes(x = sample, y = froh, fill = range)) +
+    geom_bar(position="stack", stat="identity") +
+    facet_grid(cols = vars(population), scales = "free_x") +
+    theme_bw() +
+    labs(y = expression(F[RoH]), fill = "RoH Length (bp)") +
+    scale_fill_grey() +
+    theme(
+      axis.title.x = element_blank(),
+      panel.grid.major = element_blank(),
+      panel.grid.minor = element_blank()
+    )
+  
+  ggsave(paste0(plotpre,".froh_bins.svg"))
+}
 
-  write.table(aggroh, file = paste0(plotpre,".froh.tsv"), quote = FALSE,
-    sep = "\t", row.names = FALSE, col.names = FALSE)
+# Plot Nroh ~ Cumulative roh length for individuals with rohs
+plot_cumroh_nroh <- function(nroh_cumroh, plotpre) {
+  ggplot(data = nroh_cumroh, aes(x = cumlen, y = nroh, color = population)) +
+    geom_point() +
+    geom_smooth(method = "lm", color = "black", se = FALSE) +
+    ylab("Total number of runs of homozygosity") +
+    xlab("Total length of runs of homozygosity (bp)") +
+    theme_classic() +
+    labs(color = "Population")
 
+  ggsave(paste0(plotpre,".cumroh_nroh.svg"))
 }
 
-aggroh <- aggregate_roh(snakemake@input[["roh"]],
-  snakemake@params[["popnames"]],
+
+# Actually run functions to produce outputs, saving tables for the bed file of
+# all rohs for all individuals and the Froh calculated for all rohs > the
+# minimum roh size for each individual
+
+aggroh <- aggregate_roh(snakemake@input[["roh"]])
+
+write.table(aggroh, file = paste0(snakemake@params[["outpre"]],".all_roh.bed"), quote = FALSE,
+  sep = "\t", row.names = FALSE, col.names = FALSE)
+
+frohs <- froh_bins(
   snakemake@input[["inds"]],
-  read.table(snakemake@input[["autos"]], sep = "\t")[,2])
+  aggroh,
+  snakemake@params[["minroh"]],
+  snakemake@params[["bins"]],
+  read.table(snakemake@input[["autos"]], sep = "\t")[,2]
+)
+
+indfroh <- frohs %>% group_by(sample) %>%
+  summarize(Froh = sum(froh))
+
+indfroh <- merge(
+  as.data.frame(read.table(snakemake@input[["inds"]], header=TRUE)),
+  indfroh,
+  by = "sample"
+)
+
+write.table(indfroh, file = paste0(snakemake@params[["outpre"]],".ind_froh.tsv"), quote = FALSE,
+  sep = "\t", row.names = FALSE, col.names = TRUE)
+
+cumroh <- nroh_cumroh(
+  aggroh,
+  snakemake@params[["minroh"]],
+  snakemake@input[["inds"]]
+)
+
+plot_bins(
+  frohs,
+  snakemake@params[["minroh"]],
+  snakemake@params[["bins"]],
+  snakemake@params[["outpre"]]
+)
 
-plot_roh(aggroh,snakemake@params[["outpre"]])
\ No newline at end of file
+plot_cumroh_nroh(
+  cumroh,
+  snakemake@params[["outpre"]]
+)
\ No newline at end of file