diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..692f1d2
Binary files /dev/null and b/.DS_Store differ
diff --git a/.command.sh b/.command.sh
new file mode 100644
index 0000000..e258707
--- /dev/null
+++ b/.command.sh
@@ -0,0 +1,2 @@
+#!/bin/bash -ue
+zcat phasedvcf_chr1.vcf.gz | grep -v '^#' | awk '{print $1, $2, $3}' > rsid_map_chr1.txt
diff --git a/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/000003.log b/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/000003.log
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/CURRENT b/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/CURRENT
new file mode 100644
index 0000000..1a84852
--- /dev/null
+++ b/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/CURRENT
@@ -0,0 +1 @@
+MANIFEST-000002
diff --git a/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/LOCK b/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/LOCK
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/MANIFEST-000002 b/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/MANIFEST-000002
new file mode 100644
index 0000000..bbbc585
Binary files /dev/null and b/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/db/MANIFEST-000002 differ
diff --git a/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/index.evil_legentil b/.nextflow/cache/047bdf78-a8b9-456c-bf7a-fa792e31cc93/index.evil_legentil
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/000003.log b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/000003.log
new file mode 100644
index 0000000..748d4e8
Binary files /dev/null and b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/000003.log differ
diff --git a/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/CURRENT b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/CURRENT
new file mode 100644
index 0000000..1a84852
--- /dev/null
+++ b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/CURRENT
@@ -0,0 +1 @@
+MANIFEST-000002
diff --git a/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/LOCK b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/LOCK
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/MANIFEST-000002 b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/MANIFEST-000002
new file mode 100644
index 0000000..bbbc585
Binary files /dev/null and b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/db/MANIFEST-000002 differ
diff --git a/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/index.kickass_euler b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/index.kickass_euler
new file mode 100644
index 0000000..fcafd86
Binary files /dev/null and b/.nextflow/cache/4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf/index.kickass_euler differ
diff --git a/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/000003.log b/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/000003.log
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/CURRENT b/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/CURRENT
new file mode 100644
index 0000000..1a84852
--- /dev/null
+++ b/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/CURRENT
@@ -0,0 +1 @@
+MANIFEST-000002
diff --git a/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/LOCK b/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/LOCK
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/MANIFEST-000002 b/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/MANIFEST-000002
new file mode 100644
index 0000000..bbbc585
Binary files /dev/null and b/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/db/MANIFEST-000002 differ
diff --git a/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/index.berserk_faggin b/.nextflow/cache/8a32e597-0fae-4fe6-bde4-332d5f87fdea/index.berserk_faggin
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/000003.log b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/000003.log
new file mode 100644
index 0000000..9e0f96a
Binary files /dev/null and b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/000003.log differ
diff --git a/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/CURRENT b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/CURRENT
new file mode 100644
index 0000000..1a84852
--- /dev/null
+++ b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/CURRENT
@@ -0,0 +1 @@
+MANIFEST-000002
diff --git a/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/LOCK b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/LOCK
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/MANIFEST-000002 b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/MANIFEST-000002
new file mode 100644
index 0000000..594b326
Binary files /dev/null and b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/db/MANIFEST-000002 differ
diff --git a/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/index.gigantic_keller b/.nextflow/cache/8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf/index.gigantic_keller
new file mode 100644
index 0000000..e69de29
diff --git a/.nextflow/history b/.nextflow/history
new file mode 100644
index 0000000..89a5ca6
--- /dev/null
+++ b/.nextflow/history
@@ -0,0 +1,4 @@
+2024-12-09 16:49:55	-	gigantic_keller	-	348db7354f08ee080d5ed7021b1e2114	8b7dcc42-cfa0-4267-993c-d92d4e2ec0bf	nextflow run main.nf --run_plink false --run_prsice false --run_LDpred2grid false --run_LDpred2auto false --run_Lassosum2 false --run_prscsx false --run_megaprs false
+2024-12-09 16:51:20	6.6s	berserk_faggin	ERR	f324416b50445b817c88a9a700016174	8a32e597-0fae-4fe6-bde4-332d5f87fdea	nextflow run main.nf --run_plink false --run_prsice false --run_LDpred2grid false --run_LDpred2auto false --run_Lassosum2 false --run_prscsx false --run_megaprs false
+2024-12-09 16:56:12	7.6s	evil_legentil	ERR	db0d5f7f64fe21ad8d61a059f22a9e9e	047bdf78-a8b9-456c-bf7a-fa792e31cc93	nextflow run main.nf --run_plink false --run_prsice false --run_LDpred2grid false --run_LDpred2auto false --run_Lassosum2 false --run_prscsx false --run_megaprs false
+2024-12-09 17:07:42	6m 4s	kickass_euler	ERR	b1cc4c25d53196387214f31567700398	4d6a8e6c-25c0-4bad-86b8-715eb5a5fbdf	nextflow run main.nf --run_plink false --run_prsice false --run_LDpred2grid false --run_LDpred2auto false --run_Lassosum2 false --run_prscsx false --run_megaprs false
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..1e83897
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,37 @@
+FROM tutkuyaras/pgsxplorer_image:latest
+
+RUN apt-get update && apt-get install -y \
+    wget \
+    curl \
+    bash \
+    bzip2 \
+    build-essential \
+    libbz2-dev \
+    zlib1g-dev \
+    liblzma-dev \
+    libcurl4-openssl-dev \
+    libssl-dev \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN wget -O /usr/local/bin/beagle.jar https://faculty.washington.edu/browning/beagle/beagle.28Jun21.220.jar && \
+    echo '#!/bin/bash\njava -jar /usr/local/bin/beagle.jar "$@"' > /usr/local/bin/beagle && \
+    chmod +x /usr/local/bin/beagle
+
+RUN wget -O /tmp/eagle.tar.gz https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/Eagle_v2.4.1.tar.gz && \
+    tar -xzvf /tmp/eagle.tar.gz -C /usr/local/bin/ && \
+    chmod +x /usr/local/bin/Eagle_v2.4.1/eagle && \
+    rm /tmp/eagle.tar.gz
+
+RUN wget https://github.com/samtools/bcftools/releases/download/1.17/bcftools-1.17.tar.bz2 && \
+    tar -xjf bcftools-1.17.tar.bz2 && \
+    cd bcftools-1.17 && \
+    ./configure && \
+    make && \
+    make install && \
+    cd .. && \
+    rm -rf bcftools-1.17 bcftools-1.17.tar.bz2
+
+ENV PATH="/usr/local/bin/Eagle_v2.4.1:$PATH"
+
+RUN apt-get clean && \
+    rm -rf /var/lib/apt/lists/* /tmp/*
diff --git a/QC_graphs/.DS_Store b/QC_graphs/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/QC_graphs/.DS_Store differ
diff --git a/bin/.DS_Store b/bin/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/bin/.DS_Store differ
diff --git a/bin/.Rhistory b/bin/.Rhistory
new file mode 100644
index 0000000..e69de29
diff --git a/bin/LDpred2auto.R b/bin/LDpred2auto.R
new file mode 100644
index 0000000..6b88e35
--- /dev/null
+++ b/bin/LDpred2auto.R
@@ -0,0 +1,233 @@
+# Set CRAN mirror
+options(repos = c(CRAN = "https://cran.rstudio.com"))
+options(bigstatsr.check.parallel.blas = FALSE)
+options(default.nproc.blas = NULL)
+
+# Install necessary packages if not already installed
+if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")
+if (!requireNamespace("bigsnpr", quietly = TRUE)) remotes::install_github("privefl/bigsnpr") 
+if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
+if (!requireNamespace("R.utils", quietly = TRUE)) install.packages("R.utils")
+if (!requireNamespace("data.table", quietly = TRUE)) install.packages("data.table") 
+if (!requireNamespace("magrittr", quietly = TRUE)) install.packages("magrittr")
+
+library(bigsnpr)
+library(dplyr)
+library(R.utils)
+library(data.table)
+library(magrittr)
+library(fmsb)
+
+# Parse arguments
+args <- commandArgs(trailingOnly = TRUE)
+phenotype_file <- args[1]
+eigenvec_file <- args[2]
+gwas_sumstat <- args[3]
+bed_file <- args[4]
+
+# 1. Read in the phenotype and covariate files
+phenotype <- fread(phenotype_file)
+pcs <- fread(eigenvec_file)
+colnames(pcs) <- c('FID', 'IID', paste0('PC', 1:10))
+pheno <- merge(phenotype, pcs)
+pheno$Phenotype <- ifelse(pheno$Phenotype == 2, 1, 0)
+
+# 2. Information about the HapMap3+ variants
+info <- readRDS(runonce::download_file(
+  'https://figshare.com/ndownloader/files/37802721',
+  dir = 'tmp-data2', fname = 'map_hm3_plus.rds'))
+str(info)
+
+# 3. Read external summary statistics
+sumstats <- bigreadr::fread2(gwas_sumstat)
+sumstats <- sumstats %>% rename('chr' = 'CHR', 'pos' = 'BP', 'a1' = 'A1', 'a0' = 'A2', 'A1_FREQ' = 'A1_FREQ', 'n_eff' = 'N', 'OR' = 'OR', 'beta_se' = 'SE', 'L95' = 'L95', 'U95' = 'U95', 'Z_STAT' = 'Z_STAT', 'p' = 'P', 'rsid' = 'SNP')
+sumstats <- sumstats %>% select(chr, pos, rsid, a1, a0, n_eff, beta_se, p, OR)
+sumstats$beta <- log(sumstats$OR)
+sumstats <- sumstats[sumstats$rsid %in% info$rsid,]
+fwrite(sumstats, 'ldpred2auto_output.txt')
+
+# Calculate LD Matrix
+NCORES <- nb_cores()
+tmp <- tempfile(tmpdir = 'tmp-data2')
+on.exit(file.remove(paste0(tmp, '.sbk')), add = TRUE)
+corr <- NULL
+ld <- NULL
+fam.order <- NULL
+
+# Debug messages
+cat("Reading bed file...\n")
+# Read from bed/bim/fam, it generates .bk and .rds files.
+snp_readBed(bed_file)
+cat("Finished reading bed file.\n")
+# Debug message
+cat("Attaching bigSNP object...\n")
+# Attach the "bigSNP" object in R session
+rds_file <- sub(".bed$", ".rds", bed_file)
+cat("RDS file path:", rds_file, "\n")
+obj.bigSNP <- snp_attach(rds_file)
+# extract the SNP information from the genotype
+map <- setNames(obj.bigSNP$map[-3], c('chr', 'rsid', 'pos', 'a1', 'a0'))
+# perform SNP matching
+df_beta <- snp_match(sumstats, map)
+
+# Get aliases for useful slots
+G <- obj.bigSNP$genotypes
+CHR <- obj.bigSNP$map$chromosome
+POS <- obj.bigSNP$map$physical.pos
+# Change affection column values into 0 and 1 
+obj.bigSNP$fam$affection[obj.bigSNP$fam$affection == 1] <- 0
+obj.bigSNP$fam$affection[obj.bigSNP$fam$affection == 2] <- 1
+y <- obj.bigSNP$fam$affection
+
+#correlation (take 3cm)
+# To convert physical positions (in bp) to genetic positions (in cM), use
+POS2 <- snp_asGeneticPos(CHR, POS, dir = "tmp-data2", ncores = NCORES)
+# Before computing the LD matrices, let us filter out variants with small minor allele frequencies (MAFs):
+ind.row <- rows_along(G)
+maf <- snp_MAF(G, ind.row = ind.row, ind.col = df_beta$`_NUM_ID_`, ncores = NCORES)
+maf_thr <- 1 / sqrt(length(ind.row))  # threshold I like to use
+df_beta <- df_beta[maf > maf_thr, ]
+#df_beta <- snp_match(sumstats, map, join_by_pos = FALSE)  # use rsid instead of pos
+
+## Prepare data as validation and test
+set.seed(1)
+# Calculate the number of samples for validation data
+n_val <- floor(0.7 * nrow(G))
+# Sample indices for validation data
+ind.val <- sample(nrow(G), n_val)
+# Remaining indices for test data
+ind.test <- setdiff(rows_along(G), ind.val)
+# Debug message
+cat("Split data as validation (70%) and test (30%)\n")
+
+# Debug message
+cat("Calculating LD matrix...\n")
+# calculate LD
+for (chr in unique(df_beta$chr)) {
+
+  # print(chr)
+  # indices in 'df_beta'
+  ind.chr <- which(df_beta$chr == chr)
+
+  # indices in 'G'
+  ind.chr2 <- df_beta$`_NUM_ID_`[ind.chr]
+  
+  # here we compute LD matrices ourselves, BUT
+  # we recall that we provide pre-computed LD matrices that are 
+  # usually much better (larger N, LD blocks for robustness, etc)
+  corr0 <- snp_cor(G, ind.col = ind.chr2, size = 3 / 1000,
+                   infos.pos = POS2[ind.chr2], ncores = NCORES)
+  
+  if (chr == 1) {
+    ld <- Matrix::colSums(corr0^2)
+    corr <- as_SFBM(corr0, tmp, compact = TRUE)
+  } else {
+    ld <- c(ld, Matrix::colSums(corr0^2))
+    corr$add_columns(corr0, nrow(corr))
+  }
+}
+
+# We assume the fam order is the same across different chromosomes
+fam.order <- as.data.table(obj.bigSNP$fam)
+# Rename fam order
+setnames(fam.order,c("family.ID", "sample.ID"), c("FID", "IID"))
+
+# Debug message
+cat("Perform LD score regression\n")
+
+# Perform LD score regression
+ldsc <- snp_ldsc(   ld, 
+                    length(ld), 
+                    chi2 = (df_beta$beta / df_beta$beta_se)^2,
+                    sample_size = df_beta$n_eff, 
+                    blocks = NULL)
+h2_est <- ldsc[["h2"]]
+
+# Calculate the null R2 (binary trait
+# Reformat the phenotype file such that y is of the same order as the 
+# sample ordering in the genotype file
+a <- pheno[fam.order, on = c("FID", "IID")]
+# Calculate the null R2
+# use glm for binary trait 
+# (will also need the fmsb package to calculate the pseudo R2)
+null.model <- paste("PC", 1:10, sep = "", collapse = "+") %>%
+  paste0("Phenotype~", .) %>%
+  as.formula %>%
+  glm(data = a, family=binomial) 
+null.r2 <- fmsb::NagelkerkeR2(null.model)
+
+### LD-Pred:auto
+coef_shrink <- 0.95  # reduce this up to 0.4 if you have some (large) mismatch with the LD ref
+set.seed(1)  # to get the same result every time
+                          
+ # Get adjusted beta from the auto model
+ multi_auto <- snp_ldpred2_auto(
+ corr,
+ df_beta,
+ h2_init = h2_est,
+ vec_p_init = seq_log(1e-4, 0.9, length.out = 30),
+ ncores = NCORES,  allow_jump_sign = FALSE, shrink_corr = coef_shrink)
+
+ str(multi_auto, max.level = 1)
+
+ str(multi_auto[[1]], max.level = 1)
+# Debug message
+cat("Visualization of auto model \n")
+
+# Visualization of auto
+library(ggplot2)
+auto <- multi_auto[[1]]  # first chain
+
+pdf("LDpred2 auto graph.pdf")
+plot_grid(
+qplot(y = auto$path_p_est) + 
+theme_bigstatsr() + 
+geom_hline(yintercept = auto$p_est, col = "blue") +
+scale_y_log10() +
+labs(y = "p"),
+qplot(y = auto$path_h2_est) + 
+theme_bigstatsr() + 
+geom_hline(yintercept = auto$h2_est, col = "blue") +
+labs(y = "h2"),
+ncol = 1, align = "hv")
+dev.off()
+
+
+(range <- sapply(multi_auto, function(auto) diff(range(auto$corr_est))))
+(keep <- which(range > (0.95 * quantile(range, 0.95, na.rm = TRUE))))
+
+# Debug message
+cat("Obtain Model PRS \n")
+
+# (Obtain Model PRS)
+beta_auto <- rowMeans(sapply(multi_auto[keep], function(auto) auto$beta_est))
+pred_auto <- big_prodVec(G, beta_auto, ind.row = ind.test, ind.col = df_beta[["_NUM_ID_"]])
+pcor(pred_auto, y[ind.test], NULL)
+
+# Get final effects(for all) 
+
+# Get final effects (for all data)
+beta_auto_all <- rowMeans(sapply(multi_auto[keep], function(auto) auto$beta_est))
+pred_auto_all <- big_prodVec(G, beta_auto, ind.row = ind.row, ind.col = df_beta[["_NUM_ID_"]])
+
+reg.formula <- paste("PC", 1:10, sep = "", collapse = "+") %>%
+paste0("Phenotype~", .) %>%
+as.formula
+reg.dat <- a
+reg.dat$PRS <- pred_auto_all
+auto.model <- lm(reg.formula, dat=reg.dat) %>%
+summary
+(result <- data.table(
+  auto = auto.model$r.squared - null.r2$R2,
+  null = null.r2$R2
+))
+
+write.table(reg.dat, "LDpred2_auto.txt", sep = "\t", row.names = FALSE, col.names = TRUE)
+
+# Debug message
+cat("Auto results was written as LDpred2_auto.txt file \n")
+
+# Debug message
+cat("Some cleaning \n")
+# Some cleaning
+rm(corr); gc(); file.remove(paste0(tmp, ".sbk"))
diff --git a/bin/LDpred2grid.R b/bin/LDpred2grid.R
new file mode 100644
index 0000000..0d828d1
--- /dev/null
+++ b/bin/LDpred2grid.R
@@ -0,0 +1,254 @@
+# Set CRAN mirror
+options(repos = c(CRAN = "https://cran.rstudio.com"))
+options(bigstatsr.check.parallel.blas = FALSE)
+options(default.nproc.blas = NULL)
+
+# Install necessary packages if not already installed
+if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")
+if (!requireNamespace("bigsnpr", quietly = TRUE)) remotes::install_github("privefl/bigsnpr")
+if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
+if (!requireNamespace("R.utils", quietly = TRUE)) install.packages("R.utils")
+if (!requireNamespace("data.table", quietly = TRUE)) install.packages("data.table")
+if (!requireNamespace("magrittr", quietly = TRUE)) install.packages("magrittr")
+if (!requireNamespace("fmsb", quietly = TRUE)) install.packages("fmsb")
+
+library(bigsnpr)
+library(dplyr)
+library(R.utils)
+library(data.table)
+library(magrittr)
+library(fmsb)
+
+# Parse arguments
+args <- commandArgs(trailingOnly = TRUE)
+phenotype_file <- args[1]
+eigenvec_file <- args[2]
+gwas_sumstat <- args[3]
+bed_file <- args[4]
+
+# 1. Read in the phenotype and covariate files
+phenotype <- fread(phenotype_file)
+pcs <- fread(eigenvec_file)
+colnames(pcs) <- c('FID', 'IID', paste0('PC', 1:10))
+pheno <- merge(phenotype, pcs)
+pheno$Phenotype <- ifelse(pheno$Phenotype == 2, 1, 0)
+
+# 2. Information about the HapMap3+ variants
+info <- readRDS(runonce::download_file(
+  'https://figshare.com/ndownloader/files/37802721',
+  dir = 'tmp-data', fname = 'map_hm3_plus.rds'))
+str(info)
+
+# 3. Read external summary statistics
+sumstats <- bigreadr::fread2(gwas_sumstat)
+sumstats <- sumstats %>% rename('chr' = 'CHR', 'pos' = 'BP', 'a1' = 'A1', 'a0' = 'A2', 'A1_FREQ' = 'A1_FREQ', 'n_eff' = 'N', 'OR' = 'OR', 'beta_se' = 'SE', 'L95' = 'L95', 'U95' = 'U95', 'Z_STAT' = 'Z_STAT', 'p' = 'P', 'rsid' = 'SNP')
+sumstats <- sumstats %>% select(chr, pos, rsid, a1, a0, n_eff, beta_se, p, OR)
+sumstats$beta <- log(sumstats$OR)
+sumstats <- sumstats[sumstats$rsid %in% info$rsid,]
+fwrite(sumstats, 'ldpred2grid_output.txt')
+
+# Calculate LD Matrix
+NCORES <- nb_cores()
+tmp <- tempfile(tmpdir = 'tmp-data')
+on.exit(file.remove(paste0(tmp, '.sbk')), add = TRUE)
+corr <- NULL
+ld <- NULL
+fam.order <- NULL
+
+# Debug messages
+cat("Reading bed file...\n")
+# Read from bed/bim/fam, it generates .bk and .rds files.
+snp_readBed(bed_file)
+cat("Finished reading bed file.\n")
+# Debug message
+cat("Attaching bigSNP object...\n")
+# Attach the "bigSNP" object in R session
+rds_file <- sub(".bed$", ".rds", bed_file)
+cat("RDS file path:", rds_file, "\n")
+obj.bigSNP <- snp_attach(rds_file)
+# extract the SNP information from the genotype
+map <- setNames(obj.bigSNP$map[-3], c('chr', 'rsid', 'pos', 'a1', 'a0'))
+# perform SNP matching
+df_beta <- snp_match(sumstats, map)
+
+# Get aliases for useful slots
+G <- obj.bigSNP$genotypes
+CHR <- obj.bigSNP$map$chromosome
+POS <- obj.bigSNP$map$physical.pos
+# Change affection column values into 0 and 1 
+obj.bigSNP$fam$affection[obj.bigSNP$fam$affection == 1] <- 0
+obj.bigSNP$fam$affection[obj.bigSNP$fam$affection == 2] <- 1
+y <- obj.bigSNP$fam$affection
+
+#correlation (take 3cm)
+# To convert physical positions (in bp) to genetic positions (in cM), use
+POS2 <- snp_asGeneticPos(CHR, POS, dir = "tmp-data", ncores = NCORES)
+# Before computing the LD matrices, let us filter out variants with small minor allele frequencies (MAFs):
+ind.row <- rows_along(G)
+maf <- snp_MAF(G, ind.row = ind.row, ind.col = df_beta$`_NUM_ID_`, ncores = NCORES)
+maf_thr <- 1 / sqrt(length(ind.row))  # threshold I like to use
+df_beta <- df_beta[maf > maf_thr, ]
+#df_beta <- snp_match(sumstats, map, join_by_pos = FALSE)  # use rsid instead of pos
+
+## Prepare data as validation and test
+set.seed(1)
+# Calculate the number of samples for validation data
+n_val <- floor(0.7 * nrow(G))
+# Sample indices for validation data
+ind.val <- sample(nrow(G), n_val)
+# Remaining indices for test data
+ind.test <- setdiff(rows_along(G), ind.val)
+# Debug message
+cat("Split data as validation (70%) and test (30%)\n")
+
+# Debug message
+cat("Calculating LD matrix...\n")
+# calculate LD
+for (chr in unique(df_beta$chr)) {
+  
+  # print(chr)
+  
+  ## indices in 'df_beta'
+  ind.chr <- which(df_beta$chr == chr)
+  ## indices in 'G'
+  ind.chr2 <- df_beta$`_NUM_ID_`[ind.chr]
+  
+  # here we compute LD matrices ourselves, BUT
+  # we recall that we provide pre-computed LD matrices that are 
+  # usually much better (larger N, LD blocks for robustness, etc)
+  corr0 <- snp_cor(G, ind.col = ind.chr2, size = 3 / 1000,
+                   infos.pos = POS2[ind.chr2], ncores = NCORES)
+  
+  if (chr == 1) {
+    ld <- Matrix::colSums(corr0^2)
+    corr <- as_SFBM(corr0, tmp, compact = TRUE)
+  } else {
+    ld <- c(ld, Matrix::colSums(corr0^2))
+    corr$add_columns(corr0, nrow(corr))
+  }
+}
+
+# We assume the fam order is the same across different chromosomes
+fam.order <- as.data.table(obj.bigSNP$fam)
+# Rename fam order
+setnames(fam.order,c("family.ID", "sample.ID"), c("FID", "IID"))
+
+# Debug message
+cat("Perform LD score regression\n")
+
+# Perform LD score regression
+ldsc <- snp_ldsc(   ld, 
+                    length(ld), 
+                    chi2 = (df_beta$beta / df_beta$beta_se)^2,
+                    sample_size = df_beta$n_eff, 
+                    blocks = NULL)
+h2_est <- ldsc[["h2"]]
+
+# Calculate the null R2 (binary trait
+# Reformat the phenotype file such that y is of the same order as the 
+# sample ordering in the genotype file
+a <- pheno[fam.order, on = c("FID", "IID")]
+# Calculate the null R2
+# use glm for binary trait 
+# (will also need the fmsb package to calculate the pseudo R2)
+null.model <- paste("PC", 1:10, sep = "", collapse = "+") %>%
+  paste0("Phenotype~", .) %>%
+  as.formula %>%
+  glm(data = a, family=binomial) 
+null.r2 <- fmsb::NagelkerkeR2(null.model)
+
+# Debug message
+cat("Prepare data for grid model... \n")
+
+# Prepare data for grid model
+p_seq <- signif(seq_log(1e-5, 1, length.out = 21), 2)
+h2_seq <- round(h2_est * c(0.3,0.7, 1, 1.4), 4)
+grid.param <- expand.grid(p = p_seq, h2 = h2_seq,sparse = c(FALSE, TRUE))
+
+# Debug message
+cat("Prepared data for grid model \n")
+
+# Get adjusted beta from grid model
+set.seed(1)  # to get the same result every time
+beta_grid <- snp_ldpred2_grid(corr, df_beta, grid.param, ncores = NCORES)
+
+# Debug message
+cat("Get adjusted beta from grid model \n")
+
+## Obtain Model PRS (Validation set)
+# calculate PRS for  samples
+pred_grid <- big_prodMat( G, beta_grid, ind.col = df_beta$`_NUM_ID_`)
+
+## Obtain scores For validation data                          
+grid.param$score <- apply(pred_grid[ind.val, ], 2, function(x) {
+if (all(is.na(x))) return(NA)
+# summary(lm(y[ind.val] ~ x))$coef["x", 3]
+summary(glm(y[ind.val] ~ x, family = "binomial"))$coef["x", 3]
+})
+
+# Debug message
+cat("Obtain scores For validation data \n")
+
+## Visualization of grid glm-z (For validation set)
+library(ggplot2)
+
+pdf("LDpred2 grid glm-z.pdf")
+ggplot(grid.param, aes(x = p, y = score, color = as.factor(h2))) +
+theme_bigstatsr() +
+geom_point() +
+geom_line() +
+scale_x_log10(breaks = 10^(-5:0), minor_breaks = grid.param$p) +
+facet_wrap(grid.param$sparse, labeller = label_both) +
+labs(y = "GLM Z-Score", color = "h2") +
+theme(legend.position = "top", panel.spacing = unit(1, "lines"))
+dev.off()
+
+# Debug message
+cat("Choose the best model \n")
+
+## Choose the best model
+library(dplyr)
+grid.param %>%
+mutate(sparsity = colMeans(beta_grid == 0), id = row_number()) %>%
+arrange(desc(score)) %>%
+mutate_at(c("score", "sparsity"), round, digits = 3) %>%
+slice(1:10)
+
+
+best_beta_grid <- grid.param %>%
+mutate(id = row_number()) %>%
+filter(sparse) %>% 
+arrange(desc(score)) %>%
+slice(1) %>%
+print() %>% 
+pull(id) %>% 
+beta_grid[, .]
+
+pred <- big_prodVec(G, best_beta_grid, ind.row = ind.test,
+ind.col = df_beta[["_NUM_ID_"]])
+pcor(pred, y[ind.test], NULL)
+
+## Get the final performance 
+reg.formula <- paste("PC", 1:10, sep = "", collapse = "+") %>%
+ paste0("Phenotype~", .) %>%
+as.formula
+reg.dat <- a
+max.r2 <- 0
+for(i in 1:ncol(pred_grid)){
+reg.dat$PRS <- pred_grid[,i]
+grid.model <- lm(reg.formula, dat=reg.dat) %>%
+summary()
+if(max.r2 < grid.model$r.squared){
+ max.r2 <- grid.model$r.squared
+}
+}
+write.table(reg.dat, "LDpred2_grid.txt", sep = "\t", row.names = FALSE, col.names = TRUE)
+(result <- data.table(grid = max.r2 - null.r2$R2, null = null.r2$R2))
+
+# Debug message
+cat("Grid results was written as LDpred2_grid.txt file \n")
+
+# Debug message
+cat("Some cleaning \n")
+# Some cleaning
+rm(corr); gc(); file.remove(paste0(tmp, ".sbk"))
\ No newline at end of file
diff --git a/bin/Lassosum2.R b/bin/Lassosum2.R
new file mode 100644
index 0000000..e57c5be
--- /dev/null
+++ b/bin/Lassosum2.R
@@ -0,0 +1,252 @@
+# Set CRAN mirror
+options(repos = c(CRAN = "https://cran.rstudio.com"))
+options(bigstatsr.check.parallel.blas = FALSE)
+options(default.nproc.blas = NULL)
+
+# Install necessary packages if not already installed
+if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")
+if (!requireNamespace("bigsnpr", quietly = TRUE)) remotes::install_github("privefl/bigsnpr")
+if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
+if (!requireNamespace("R.utils", quietly = TRUE)) install.packages("R.utils")
+if (!requireNamespace("data.table", quietly = TRUE)) install.packages("data.table")
+if (!requireNamespace("magrittr", quietly = TRUE)) install.packages("magrittr")
+if (!requireNamespace("fmsb", quietly = TRUE)) install.packages("fmsb")
+
+library(bigsnpr)
+library(dplyr)
+library(R.utils)
+library(data.table)
+library(magrittr)
+library(fmsb)
+
+# Parse arguments
+args <- commandArgs(trailingOnly = TRUE)
+phenotype_file <- args[1]
+eigenvec_file <- args[2]
+gwas_sumstat <- args[3]
+bed_file <- args[4]
+
+# 1. Read in the phenotype and covariate files
+phenotype <- fread(phenotype_file)
+pcs <- fread(eigenvec_file)
+colnames(pcs) <- c('FID', 'IID', paste0('PC', 1:10))
+pheno <- merge(phenotype, pcs)
+pheno$Phenotype <- ifelse(pheno$Phenotype == 2, 1, 0)
+
+# 2. Information about the HapMap3+ variants
+info <- readRDS(runonce::download_file(
+  'https://figshare.com/ndownloader/files/37802721',
+  dir = 'tmp-data3', fname = 'map_hm3_plus.rds'))
+str(info)
+
+# 3. Read external summary statistics
+sumstats <- bigreadr::fread2(gwas_sumstat)
+sumstats <- sumstats %>% rename('chr' = 'CHR', 'pos' = 'BP', 'a1' = 'A1', 'a0' = 'A2', 'A1_FREQ' = 'A1_FREQ', 'n_eff' = 'N', 'OR' = 'OR', 'beta_se' = 'SE', 'L95' = 'L95', 'U95' = 'U95', 'Z_STAT' = 'Z_STAT', 'p' = 'P', 'rsid' = 'SNP')
+sumstats <- sumstats %>% select(chr, pos, rsid, a1, a0, n_eff, beta_se, p, OR)
+sumstats$beta <- log(sumstats$OR)
+sumstats <- sumstats[sumstats$rsid %in% info$rsid,]
+fwrite(sumstats, 'lassosum2_output.txt')
+
+# Calculate LD Matrix
+NCORES <- nb_cores()
+tmp <- tempfile(tmpdir = 'tmp-data3')
+on.exit(file.remove(paste0(tmp, '.sbk')), add = TRUE)
+corr <- NULL
+ld <- NULL
+fam.order <- NULL
+
+# Debug messages
+cat("Reading bed file...\n")
+# Read from bed/bim/fam, it generates .bk and .rds files.
+snp_readBed(bed_file)
+cat("Finished reading bed file.\n")
+# Debug message
+cat("Attaching bigSNP object...\n")
+# Attach the "bigSNP" object in R session
+rds_file <- sub(".bed$", ".rds", bed_file)
+cat("RDS file path:", rds_file, "\n")
+obj.bigSNP <- snp_attach(rds_file)
+# extract the SNP information from the genotype
+map <- setNames(obj.bigSNP$map[-3], c('chr', 'rsid', 'pos', 'a1', 'a0'))
+# perform SNP matching
+df_beta <- snp_match(sumstats, map)
+
+# Get aliases for useful slots
+G <- obj.bigSNP$genotypes
+CHR <- obj.bigSNP$map$chromosome
+POS <- obj.bigSNP$map$physical.pos
+# Change affection column values into 0 and 1 
+obj.bigSNP$fam$affection[obj.bigSNP$fam$affection == 1] <- 0
+obj.bigSNP$fam$affection[obj.bigSNP$fam$affection == 2] <- 1
+y <- obj.bigSNP$fam$affection
+
+#correlation (take 3cm)
+# To convert physical positions (in bp) to genetic positions (in cM), use
+POS2 <- snp_asGeneticPos(CHR, POS, dir = "tmp-data3", ncores = NCORES)
+# Before computing the LD matrices, let us filter out variants with small minor allele frequencies (MAFs):
+ind.row <- rows_along(G)
+maf <- snp_MAF(G, ind.row = ind.row, ind.col = df_beta$`_NUM_ID_`, ncores = NCORES)
+maf_thr <- 1 / sqrt(length(ind.row))  # threshold I like to use
+df_beta <- df_beta[maf > maf_thr, ]
+#df_beta <- snp_match(sumstats, map, join_by_pos = FALSE)  # use rsid instead of pos
+
+## Prepare data as validation and test
+set.seed(1)
+# Calculate the number of samples for validation data
+n_val <- floor(0.7 * nrow(G))
+# Sample indices for validation data
+ind.val <- sample(nrow(G), n_val)
+# Remaining indices for test data
+ind.test <- setdiff(rows_along(G), ind.val)
+# Debug message
+cat("Split data as validation (70%) and test (30%)\n")
+
+# Debug message
+cat("Calculating LD matrix...\n")
+# calculate LD
+for (chr in unique(df_beta$chr)) {
+  
+  # print(chr)
+  
+  ## indices in 'df_beta'
+  ind.chr <- which(df_beta$chr == chr)
+  ## indices in 'G'
+  ind.chr2 <- df_beta$`_NUM_ID_`[ind.chr]
+  
+  # here we compute LD matrices ourselves, BUT
+  # we recall that we provide pre-computed LD matrices that are 
+  # usually much better (larger N, LD blocks for robustness, etc)
+  corr0 <- snp_cor(G, ind.col = ind.chr2, size = 3 / 1000,
+                   infos.pos = POS2[ind.chr2], ncores = NCORES)
+  
+  if (chr == 1) {
+    ld <- Matrix::colSums(corr0^2)
+    corr <- as_SFBM(corr0, tmp, compact = TRUE)
+  } else {
+    ld <- c(ld, Matrix::colSums(corr0^2))
+    corr$add_columns(corr0, nrow(corr))
+  }
+}
+
+# We assume the fam order is the same across different chromosomes
+fam.order <- as.data.table(obj.bigSNP$fam)
+# Rename fam order
+setnames(fam.order,c("family.ID", "sample.ID"), c("FID", "IID"))
+
+# Debug message
+cat("Perform LD score regression\n")
+
+# Perform LD score regression
+ldsc <- snp_ldsc(   ld, 
+                    length(ld), 
+                    chi2 = (df_beta$beta / df_beta$beta_se)^2,
+                    sample_size = df_beta$n_eff, 
+                    blocks = NULL)
+h2_est <- ldsc[["h2"]]
+
+# Calculate the null R2 (binary trait
+# Reformat the phenotype file such that y is of the same order as the 
+# sample ordering in the genotype file
+a <- pheno[fam.order, on = c("FID", "IID")]
+# Calculate the null R2
+# use glm for binary trait 
+# (will also need the fmsb package to calculate the pseudo R2)
+null.model <- paste("PC", 1:10, sep = "", collapse = "+") %>%
+  paste0("Phenotype~", .) %>%
+  as.formula %>%
+  glm(data = a, family=binomial) 
+null.r2 <- fmsb::NagelkerkeR2(null.model)
+
+# Debug message
+cat("Prepare data for grid model... \n")
+
+# Prepare data for grid model
+p_seq <- signif(seq_log(1e-5, 1, length.out = 21), 2)
+h2_seq <- round(h2_est * c(0.3,0.7, 1, 1.4), 4)
+grid.param <- expand.grid(p = p_seq, h2 = h2_seq,sparse = c(FALSE, TRUE))
+
+
+# Debug message
+cat("Start calculations for Lassosum2 \n")
+beta_lassosum2 <- snp_lassosum2(corr, df_beta, ncores = NCORES)
+(params2 <- attr(beta_lassosum2, "grid_param"))
+pred_grid2 <- big_prodMat(G, beta_lassosum2, ind.col = df_beta[["_NUM_ID_"]])
+
+params2$score <- apply(pred_grid2[ind.val, ], 2, function(x) {
+  if (all(is.na(x))) return(NA)
+  #summary(lm(y[ind.val] ~ x))$coef["x", 3]
+  summary(glm(y[ind.val] ~ x, family = "binomial"))$coef["x", 3]
+})
+
+library(ggplot2)
+pdf("Lassosum2_graph.pdf")
+ggplot(params2, aes(x = lambda, y = score, color = as.factor(delta))) +
+  theme_bigstatsr() +
+  geom_point() +
+  geom_line() +
+  scale_x_log10(breaks = 10^(-5:0)) +
+  labs(y = "GLM Z-Score", color = "delta") +
+  theme(legend.position = "top") +
+  guides(colour = guide_legend(nrow = 1))
+dev.off()
+
+best_grid_lassosum2 <- params2 %>%
+  mutate(id = row_number()) %>%
+  arrange(desc(score)) %>%
+  print() %>% 
+  slice(1) %>%
+  pull(id) %>% 
+  beta_lassosum2[, .]
+
+# Debug message
+cat("Choose the best among all LDpred2-grid and lassosum2 models \n")
+
+best_grid_overall <- 
+  `if`(max(params2$score, na.rm = TRUE) > max(grid.param$score, na.rm = TRUE),
+       best_grid_lassosum2, best_beta_grid)
+
+coef_shrink <- 0.95  
+
+
+multi_auto <- snp_ldpred2_auto(
+  corr, df_beta, h2_init = h2_est,
+  vec_p_init = seq_log(1e-4, 0.9, length.out = 50), ncores = NCORES,
+  burn_in = 500, num_iter = 500, report_step = 20,
+  # use_MLE = FALSE,  # uncomment if you have convergence issues or when power is low (need v1.11.9)
+  allow_jump_sign = FALSE, shrink_corr = coef_shrink)   
+
+(range <- sapply(multi_auto, function(auto) diff(range(auto$corr_est))))
+
+(keep <- which(range > (0.95 * quantile(range, 0.95, na.rm = TRUE))))
+all_h2 <- sapply(multi_auto[keep], function(auto) tail(auto$path_h2_est, 500))
+quantile(all_h2, c(0.5, 0.025, 0.975))
+
+all_p <- sapply(multi_auto[keep], function(auto) tail(auto$path_p_est, 500))
+quantile(all_p, c(0.5, 0.025, 0.975))
+
+all_alpha <- sapply(multi_auto[keep], function(auto) tail(auto$path_alpha_est, 500))
+quantile(all_alpha, c(0.5, 0.025, 0.975))
+
+bsamp <- lapply(multi_auto[keep], function(auto) auto$sample_beta)
+all_r2 <- do.call("cbind", lapply(seq_along(bsamp), function(ic) {
+  b1 <- bsamp[[ic]]
+  Rb1 <- apply(b1, 2, function(x)
+    coef_shrink * bigsparser::sp_prodVec(corr, x) + (1 - coef_shrink) * x)
+  b2 <- do.call("cbind", bsamp[-ic])
+  b2Rb1 <- as.matrix(Matrix::crossprod(b2, Rb1))
+}))
+quantile(all_r2, c(0.5, 0.025, 0.975))
+
+beta_auto <- rowMeans(sapply(multi_auto[keep], function(auto) auto$beta_est))
+pred_auto <- big_prodVec(G, beta_auto, ind.col = df_beta[["_NUM_ID_"]])
+pcor(pred_auto, y, NULL)^2
+
+postp <- rowMeans(sapply(multi_auto[keep], function(auto) auto$postp_est))
+pdf("Lassosum2_qplot.pdf")
+qplot(y = postp, alpha = I(0.2)) + theme_bigstatsr()
+dev.off()
+
+# Debug message
+cat("Some cleaning \n")
+# Some cleaning
+rm(corr); gc(); file.remove(paste0(tmp, ".sbk"))
\ No newline at end of file
diff --git a/bin/MAFcheck.R b/bin/MAFcheck.R
new file mode 100644
index 0000000..f6daf42
--- /dev/null
+++ b/bin/MAFcheck.R
@@ -0,0 +1,9 @@
+   #!/usr/bin/env Rscript
+    library(ggplot2)
+    library(data.table)
+    args <- commandArgs(trailingOnly = TRUE)
+    maf_freq <- read.table(args[1], header =TRUE, as.is=T)
+    pdf("Target_MAF distribution.pdf")
+    MAF <- hist(maf_freq[,5],main = "MAF distribution", xlab = "MAF",col="#d6604d")
+    dev.off()
+    
\ No newline at end of file
diff --git a/bin/PRSice_mac b/bin/PRSice_mac
new file mode 100644
index 0000000..5e72f5f
Binary files /dev/null and b/bin/PRSice_mac differ
diff --git a/bin/format_gwas.R b/bin/format_gwas.R
new file mode 100644
index 0000000..7d0f552
--- /dev/null
+++ b/bin/format_gwas.R
@@ -0,0 +1,27 @@
+#!/usr/bin/env Rscript
+args <- commandArgs(trailingOnly = TRUE)
+input_file <- args[1]
+
+library(dplyr)
+library(data.table)
+
+# Read GWAS
+gwas_data <- read.table(input_file, header = TRUE, stringsAsFactors = FALSE)
+
+# Kolonları yeniden adlandırma ve seçme
+formatted_gwas <- gwas_data %>%
+  dplyr::select(SNP, A1, A2, A1_FREQ, OR, SE, P, N) %>%
+  dplyr::rename(
+    freq = A1_FREQ,
+    b = OR,
+    se = SE,
+    p = P,
+    N = N
+  )
+
+# Odds Ratio'yu log Odds Ratio'ya dönüştürme
+formatted_gwas <- formatted_gwas %>%
+  dplyr::mutate(b = log(b))
+
+# Yeni formattaki dosyayı dışa aktarma
+write.table(formatted_gwas, "formatted_gwas.ma", quote = FALSE, row.names = FALSE, sep = "\t")
diff --git a/bin/heterozygosity.R b/bin/heterozygosity.R
new file mode 100644
index 0000000..a420cd7
--- /dev/null
+++ b/bin/heterozygosity.R
@@ -0,0 +1,8 @@
+   #!/usr/bin/env Rscript
+    library(data.table)
+    args <- commandArgs(trailingOnly = TRUE)
+    het <- read.table(args[1], header =TRUE, as.is=T)
+    het_fail = subset(het, (het$HET_RATE < mean(het$HET_RATE)-3*sd(het$HET_RATE)) | (het$HET_RATE > mean(het$HET_RATE)+3*sd(het$HET_RATE)));
+    het_fail$HET_DST = (het_fail$HET_RATE-mean(het$HET_RATE))/sd(het$HET_RATE);
+    write.table(het_fail, "target_fail-het-qc.txt", row.names=FALSE)
+    
\ No newline at end of file
diff --git a/bin/ldak6.mac b/bin/ldak6.mac
new file mode 100755
index 0000000..4f1ac80
Binary files /dev/null and b/bin/ldak6.mac differ
diff --git a/bin/plot_het.R b/bin/plot_het.R
new file mode 100644
index 0000000..9ddbfaf
--- /dev/null
+++ b/bin/plot_het.R
@@ -0,0 +1,15 @@
+args <- commandArgs(trailingOnly = TRUE)
+het_file <- args[1]
+
+library(ggplot2)
+library(data.table)
+library(dplyr)
+
+# Read data into R
+het <- read.table(het_file, header = TRUE)
+het$HET_RATE = (het$"N.NM." - het$"O.HOM.") / het$"N.NM."
+
+# Generate plot
+pdf("het.pdf")
+hist(het$HET_RATE, xlab = "Heterozygosity Rate", ylab = "Frequency", main = "Heterozygosity Rate", col = "#92c5de")
+dev.off()
diff --git a/bin/plot_hwe.R b/bin/plot_hwe.R
new file mode 100644
index 0000000..24735cb
--- /dev/null
+++ b/bin/plot_hwe.R
@@ -0,0 +1,14 @@
+args <- commandArgs(trailingOnly = TRUE)
+hwe_file <- args[1]
+
+library(ggplot2)
+library(data.table)
+library(dplyr)
+
+# Read data into R
+hwe <- read.table(hwe_file, header = TRUE)
+
+# Generate plot
+pdf("hwe.pdf")
+hist(hwe[, 9], main = "Histogram HWE", col = "#f4a582")
+dev.off()
diff --git a/bin/plot_ibd.R b/bin/plot_ibd.R
new file mode 100644
index 0000000..cd64625
--- /dev/null
+++ b/bin/plot_ibd.R
@@ -0,0 +1,14 @@
+args <- commandArgs(trailingOnly = TRUE)
+ibd_file <- args[1]
+
+library(ggplot2)
+library(data.table)
+library(dplyr)
+
+# Read data into R
+ibd_data <- read.table(ibd_file, header = TRUE)
+
+# Generate plot
+pdf("ibd_plot.pdf")
+hist(ibd_data[, "PI_HAT"], main = "Histogram of IBD (PI_HAT)", col = "#66c2a4")
+dev.off()
diff --git a/bin/plot_maf.R b/bin/plot_maf.R
new file mode 100644
index 0000000..423bf7f
--- /dev/null
+++ b/bin/plot_maf.R
@@ -0,0 +1,12 @@
+library(ggplot2)
+library(data.table)
+library(dplyr)
+args <- commandArgs(trailingOnly = TRUE)
+maf_freq_file <- args[1]
+# Read data into R
+maf_freq <- read.table("MAF_check.frq", header =TRUE, as.is=T)
+
+# Generate plot
+pdf("MAF_dist.pdf")
+MAF <- hist(maf_freq[,5], main = "MAF distribution", xlab = "MAF", col="#d6604d")
+dev.off()
diff --git a/bin/process_het.R b/bin/process_het.R
new file mode 100644
index 0000000..9ddbfaf
--- /dev/null
+++ b/bin/process_het.R
@@ -0,0 +1,15 @@
+args <- commandArgs(trailingOnly = TRUE)
+het_file <- args[1]
+
+library(ggplot2)
+library(data.table)
+library(dplyr)
+
+# Read data into R
+het <- read.table(het_file, header = TRUE)
+het$HET_RATE = (het$"N.NM." - het$"O.HOM.") / het$"N.NM."
+
+# Generate plot
+pdf("het.pdf")
+hist(het$HET_RATE, xlab = "Heterozygosity Rate", ylab = "Frequency", main = "Heterozygosity Rate", col = "#92c5de")
+dev.off()
diff --git a/eagle b/eagle
new file mode 100644
index 0000000..e87cf74
Binary files /dev/null and b/eagle differ
diff --git a/main.nf b/main.nf
new file mode 100644
index 0000000..335e6cd
--- /dev/null
+++ b/main.nf
@@ -0,0 +1,442 @@
+/*
+ * HELP MESSAGE
+ */
+
+if (params.help) {
+    log.info """
+    How to use :
+
+    Required Arguments:
+    --target   prefix of plink files (.bed, .bim, .fam)
+
+    Optional Arguments: 
+    --mind                  Threshold value to delete SNPs with missingness
+    --geno                  Threshold value to delete individuals with missingness 
+    --maf                   Threshold value for minimum MAF frequencies of SNPs
+    --hwe_case              Threshold value for Hardy-Weinberg Equilibrium for controls
+    --hwe_ctrl              Threshold value for Hardy-Weinberg Equilibrium for cases
+    --indep_window_size     The window size
+    --indep_step_size       The number of SNPs to shift the window at each step
+    --indep_threshold       The multiple correlation coefficient for a SNP being regressed on all other SNPs simultaneously.
+    --pihat                 The default threshold 0.1875 represents the half-way point between 2nd and 3rd degree relatives
+    --relatedness           The same threshold with pihat value
+    --ref_phase             Path of Reference BCF folder for Phasing
+    --genetic_map           Path of genetic_map file for Phasing
+    --ref_imp               Path of Refenrence bref3 folder for Imputation
+    --g_map                 Path of Reference genetic map folder for Imputation
+    --pca                   Number of principal components to compute, default is 10 
+    --pheno_file            Name of the phenotype file located under the target folder
+    --run_plink             Run the PLINK part of the workflow, default = true
+    --run_prsice            Run the PRSice-2 part of the workflow, default = true
+    --run_pca               Run the PCA part of  the workflow if set to true, cretaes ".eigenvec" file
+    --run_LDpred2grid       Run the LDPred2 Grid Model of the workflow , default = true
+    --run_LDpred2auto       Run the LDPred2 Auto Model of the workflow, default = true
+    --run_Lassosum2         Run the Lassosum2 model of the workflow, default = true
+    --run_megaprs           Run the MegaPRS model of the workflow, default = true
+    --ldak_executable       Path to the LDAK executable (For MegaPRS)
+    --mega_model            Model type for MegaPRS. Options: lasso, ridge, bayesr, etc. Default = bayesr
+    --run_prscsx            Run the PRScsx of the workflow, default = true
+    --run_mussel            Run the MUSSEL of the workflow if set to true, default = false
+    --prsice_script         Path to the PRSice R script
+    --prsice_executable     Path to the PRSice executable
+    --prscsxref_dir         Path to PRScsx reference panel, could be UKBB or 1KG
+    --prscsx_gwas1          1st GWAS sum stat for PRScsx analysis
+    --prscsx_gwas2          2nd GWAS sum stat for PRScsx analysis
+    --n_gwas1               Sample size for GWAS1
+    --n_gwas2               Sample size for GWAS2
+    --pop1                  Ancestry of 1st Gwas sum stat, could be AFR,AMR,EAS,EUR,SAS
+    --pop2                  Ancestry of 2nd Gwas sum stat, could be AFR,AMR,EAS,EUR,SAS
+    --phi                   Global shrinkage parameter phi, fixing phi to 1e-2(for highly polygenic traits) or 1e-4(for less polygenic tratits)
+    --meta                  Return combined SNP effect sizes across populations using inverse variance weighted meta-analysis of population-specific posterior effect size estimates. Default is True.             
+    --pack                  Path to MUSSEL folder
+    --data                  Path to MUSSEL data
+    --LDref                 Path to LDref folder for MUSSEL module
+    --sst                   Path to summary statistic data files for MUSSEL module
+    --pop                   Used populations for MUSSEL module, could be EUR, AFR, AMR, EAS or SAS
+    --mussel_chrom          Specify the chromosomes to be analyzed by MUSSEL module , by default all chromosomes are analyzed
+    --bfile_tuning          Path to PLINK binary input file prefix for tuning of MUSSEL module
+    --pheno_tuning          Path to phenotype file (PLINK format) for tuning of MUSSEL module
+    --covar_tuning          Path to quantitative covariates (PLINK format) for tuning
+    --bfile_testing         Path to PLINK binary input file prefix for testing of MUSSEL module
+    --pheno_testing         Path to phenotype file (PLINK format) for testing of MUSSEL module
+    --covar_testing         Path to quantitative covariates (PLINK format) for testing
+    --trait_type            Type of phenotype, continuous or binary for MUSSEL module. Default: continuous
+    --NCORES                How many cores to use for MUSSEL modules
+    --plink                 path to plink2 for MUSSEL module
+   """
+    exit 1
+}
+
+println """
+
+    Quality Control Steps of Target Data
+
+    =========================================
+    target:           ${params.target}
+    outdir:           ${params.outdir}
+    visualization :   ${params.graphs}
+
+    --help parameter could be used to see arguments
+
+""".stripIndent()
+include {ConvertVCFtoPLINK } from './modules/ConvertVCFtoPLINK'
+include { GWAS_QC } from './modules/GWAS_QC'
+include { QCSNPmissingness } from './modules/QCSNPmissingness'
+include { QCindmissingness } from './modules/QCindmissingness'
+include { FilterMAF } from './modules/FilterMAF'
+include { PlotMAF } from './modules/PlotMAF'
+include { RemoveLowMAF } from './modules/RemoveLowMAF'
+include { CalculateHWE } from './modules/CalculateHWE'
+include { PlotHWE } from './modules/PlotHWE'
+include { InitialHWEFilter } from './modules/InitialHWEFilter'
+include { SecondHWEFilter } from './modules/SecondHWEFilter'
+include { IBDLDPruning } from './modules/IBDLDPruning'
+include { GenerateIBD } from './modules/GenerateIBD'
+include { PlotIBD } from './modules/PlotIBD'
+include { RelCutoff } from './modules/RelCutoff'
+include { FilterIndividuals } from './modules/FilterIndividuals'
+include { CalculateHet } from './modules/CalculateHet'
+include { PlotHet } from './modules/PlotHet'
+include { ProcessHet } from './modules/ProcessHet'
+include { CleanHetFailInd } from './modules/CleanHetFailInd'
+include { RemoveHetOutliers } from './modules/RemoveHetOutliers'
+include { FindDuplicateSNPs } from './modules/FindDuplicateSNPs'
+include { RemoveDuplicateSNPs } from './modules/RemoveDuplicateSNPs'
+include { ConvertPLINKtoVCF } from './modules/ConvertPLINKtoVCF'
+include { SortVCF } from './modules/SortVCF.nf'
+include { SplitVCF } from './modules/SplitVCF.nf'
+include { PhaseVCF } from './modules/PhaseVCF'
+include { Takersid } from './modules/Takersid'
+include { ImputeVCF } from './modules/ImputeVCF'
+include { MergedVCF } from './modules/MergedVCF'
+include { fastmixture } from './modules/fastmixture'
+include { VCFtoPLINK } from './modules/VCFtoPLINK'
+include { FindDuplicates } from './modules/FindDuplicates'
+include { RemoveDuplicates } from './modules/RemoveDuplicates'
+include { ClumpSNPs } from './modules/ClumpSNPs'
+include { CreateValidSNPs } from './modules/CreateValidSNPs'
+include { CreateSNPpvalue } from './modules/CreateSNPpvalue'
+include { CreateRangeList } from './modules/CreateRangeList'
+include { PruneSNPs } from './modules/PruneSNPs'
+include { CalculatePCA } from './modules/CalculatePCA'
+include { CalculatePGS } from './modules/CalculatePGS'
+include { PRSice2 } from './modules/PRSice2.nf'
+include { LDpred2grid } from './modules/LDpred2grid.nf'
+include { LDpred2auto } from './modules/LDpred2auto.nf'
+include { Lassosum2 } from './modules/Lassosum2.nf'
+include { PRSCSx } from './modules/PRSCSx.nf'
+include { Step1_Run_LDpred2 } from './modules/Step1_Run_LDpred2.nf'
+include { Step2_LDpred2_Tuning } from './modules/Step2_LDpred2_Tuning.nf'
+include { Step3_Run_MUSS } from './modules/Step3_Run_MUSS.nf'
+include { Step4_Combine_PRS_Models } from './modules/Step4_Combine_PRS_Models.nf'
+include { MegaPRS } from './modules/MegaPRS.nf'
+include { FormatGWAS } from './modules/FormatGWAS.nf'
+include { Impute } from './modules/Impute.nf'
+include { Sbayesrc } from './modules/Sbayesrc.nf'
+
+workflow {
+    // Choose File Type 
+
+    if (params.vcf_to_plink) {
+        println "Files are VCF, Lets convert to PLINK format.."
+        // Take VCF file
+        vcf_ch = Channel.fromPath("${params.vcf}").ifEmpty { error "VCF file not found: ${params.vcf}" }
+        converted_plink_ch = ConvertVCFtoPLINK(vcf_ch)
+        target_ch = converted_plink_ch
+    } else {
+        println "Files are PLINK format"
+        // Kullanıcı PLINK formatında dosya belirttiğinde bu süreç çalışır
+        target_ch = Channel.fromPath("${params.target_prefix}").ifEmpty { error "PLINK files not found: ${params.target_prefix}" }
+    }
+
+    // Define the input channel
+    fail_het_qc_ch = Channel.fromPath("fail-het-qc.txt").ifEmpty { error "Input file not found: fail-het-qc.txt" }
+    gwas_sumstat_ch = Channel.fromPath("${params.target}/GWAS_sumstat_t1.txt").ifEmpty { error "Input file not found: ${params.target}/GWAS_sumstat_t1.txt" }
+    pheno_file_ch = Channel.fromPath("${params.pheno_file}").ifEmpty { error "Input file not found: ${params.pheno_file}"}
+    mega_summaries_ch = Channel.fromPath("${params.outdir}/quant.summaries").ifEmpty { error "Input file not found: ${params.outdir}/quant.summaries" }
+    ref_phase_ch = Channel.fromPath("${params.ref_phase}/Ref_chr${params.chr}_hg38.bcf.gz").ifEmpty { error "Input file not found: ${params.ref_phase}/Ref_chr${params.chr}_hg38.bcf.gz" }
+    genetic_map_ch = Channel.fromPath("$PWD/genetic_map_hg38_withX.txt.gz").ifEmpty { error "Input file not found: $PWD/genetic_map_hg38_withX.txt.gz"}
+    formatted_gwas_ch =  Channel.fromPath("${params.outdir}/formatted_gwas.ma").ifEmpty { error "Input file not found: ${params.outdir}/formatted_gwas.ma" }
+    impute_out_ch =  Channel.fromPath("$PWD/sbayesrc.imputed.ma").ifEmpty { error "Input file not found: $PWD/sbayesrc.imputed.ma"}
+
+    // Execute QC Part
+
+    // Execute GWAS QC
+    gwas_qc = GWAS_QC(gwas_sumstat_ch)
+
+    // Execute QCSNPmissingness
+    qcsnp_out = QCSNPmissingness(target_ch)
+    
+    // Execute QCindmissingness
+    qcind_out = QCindmissingness(qcsnp_out) 
+
+    // Execute FilterMAF
+    maf_out = FilterMAF(qcind_out)
+    
+    // Separate the outputs
+    merged_bfile_ch = maf_out.map { it[0] }
+    maf_frq_ch = maf_out.map { it[1] }
+    
+    // Execute RemoveLowMAF
+    remove_low_maf_out = RemoveLowMAF(merged_bfile_ch.first())
+    
+    // Execute PlotMAF
+    plot_out = PlotMAF(maf_frq_ch)
+
+    // Execute CalculateHWE
+    hwe_out = CalculateHWE(remove_low_maf_out)
+
+    // Execute PlotHWE
+    plot_hwe_out = PlotHWE(hwe_out)
+
+    // Execute InitialHWEFilter 
+    initial_hwe_filter_out = InitialHWEFilter(remove_low_maf_out)
+
+    // Execute SecondHWEFilter
+    second_hwe_filter_out = SecondHWEFilter(initial_hwe_filter_out) 
+
+    // Execute IBD-LD-pruning
+    ibd_ld_pruning_out = IBDLDPruning(second_hwe_filter_out)
+
+    // Extract the prune.in file
+    prune_in_file_ch = ibd_ld_pruning_out.prune_in
+
+    // Execute GenerateIBD
+    generate_ibd_out = GenerateIBD(second_hwe_filter_out, prune_in_file_ch)
+
+    // Execute PlotIBD
+    plot_ibd_out = PlotIBD(generate_ibd_out)
+
+    // Execute RelCutoff
+    rel_cutoff_out = RelCutoff(second_hwe_filter_out, prune_in_file_ch)
+
+    // Execute FilterIndividuals
+    filter_individuals_out = FilterIndividuals(second_hwe_filter_out, rel_cutoff_out)
+
+    // Execute CalculateHet
+    calculate_het_out = CalculateHet(filter_individuals_out, prune_in_file_ch)
+
+    // Execute PlotHet
+    plot_het_out = PlotHet(calculate_het_out)
+
+    // Execute ProcessHet
+    process_het_out = ProcessHet(calculate_het_out)
+
+    // Execute CleanHetFailInd
+    clean_het_fail_ind_out = CleanHetFailInd(fail_het_qc_ch)
+
+    // Execute RemoveHetOutliers
+    remove_het_outliers_out = RemoveHetOutliers(filter_individuals_out, clean_het_fail_ind_out )
+
+    // Execute FindDuplicatesSNPs
+    find_duplicate_snps_out = FindDuplicateSNPs(remove_het_outliers_out)
+
+    // Execute RemoveDuplicateSNPs
+    remove_duplicate_snps_out = RemoveDuplicateSNPs(remove_het_outliers_out, find_duplicate_snps_out )
+    
+    // LD Pruning
+    prune_snps_out = PruneSNPs(remove_duplicate_snps_out)
+
+    // Convert PLINK to VCF
+    vcf_out = ConvertPLINKtoVCF(remove_duplicate_snps_out)
+    
+    // Sort VCF Files
+    sorted_vcf_ch = SortVCF(input_vcf = vcf_out)
+
+    // Split VCF into chromosomes
+    chr_ch = Channel.of(1..22)
+    
+    split_ch = SplitVCF(vcf_file = sorted_vcf_ch.first(),
+                        chr = chr_ch.flatten())
+
+    // Phase VCF file
+    // Map split VCF files to corresponding reference files
+    split_ref_ch = split_ch
+        .map { splitted_vcf ->
+        // Extract chromosome number from the split VCF filename
+        def chr = splitted_vcf.name.replaceAll(/.*vcfbychrom_(\d+)\.vcf\.gz/, '$1') 
+        def ref_bcf = file("${params.ref_phase}/Ref_chr${chr}_hg38.bcf.gz")
+        [splitted_vcf, ref_bcf, chr]
+    }
+    .filter { tuple -> tuple[0] && tuple[1].exists() } // Ensure both files exist
+
+    phased_vcf_ch = PhaseVCF(
+        splitted_vcf = split_ref_ch.map { it[0] },
+        ref_bcf = split_ref_ch.map { it[1] },
+        genetic_map = file("${params.genetic_map}"),
+        chr = split_ref_ch.map { it[2] }
+    )
+    phased_vcf_ch.view { "Processing (Phasing): Phased VCF=${it[0]}, Ref=${it[1]}, Chr=${it[2]}" }
+
+    // Takersid
+    takersid_input_ch = phased_vcf_ch
+        .map { phased_vcf ->
+            // Extract chromosome number from phased VCF filename
+            def chr = phased_vcf.name.replaceAll(/.*phasedvcf_chr(\d+)\.vcf\.gz/, '$1')
+            [phased_vcf, chr]
+    }
+    .filter { tuple -> tuple[0].exists() } 
+
+    rsid_map_ch = Takersid (
+        phased_vcf = takersid_input_ch.map { it[0] },
+        chr = takersid_input_ch.map { it[1] }
+    )
+  
+    // Imputation 
+    // Map phased VCF files and reference files for imputations and combine rsid
+    imputation_input_ch = phased_vcf_ch
+        .map { phased_vcf ->
+        def chr = phased_vcf.name.replaceAll(/.*phasedvcf_chr(\d+)\.vcf\.gz/, '$1')
+        def ref_bref = file("${params.ref_imp}/Ref_chr${chr}_hg38.bref3")
+        def g_map = file("${params.g_map}/plink.chr${chr}.GRCh38.map")
+        [phased_vcf, ref_bref, g_map, chr]
+    }
+    .filter { tuple -> tuple[0] && tuple[1].exists() && tuple[2].exists() } // Ensure all files exist
+
+    // Debug to view matched files
+    imputation_input_ch.view { 
+        "Processing (Imputation): Phased VCF=${it[0]}, Ref=${it[1]}, Map=${it[2]}, Chr=${it[3]}" 
+    }
+
+    // Run imputation process
+    imputed_vcf_ch = ImputeVCF(
+        phased_vcf = imputation_input_ch.map { it[0] },
+        ref_bref = imputation_input_ch.map { it[1] },
+        g_map = imputation_input_ch.map { it[2] },
+        chr = imputation_input_ch.map { it[3] },
+        rsid_map = rsid_map_ch
+    )
+
+    
+    // Merge VCF Files 
+    merged_vcf_ch = MergedVCF(imputed_vcf_ch.collect())
+
+    // Convert Imputed VCF to PLINK format
+    plink_out = VCFtoPLINK(merged_vcf_ch)
+
+    // Remove Duplicates
+    find_duplicates = FindDuplicates(plink_out)
+
+    last_plink_out = RemoveDuplicates(plink_out, find_duplicates)
+
+    // Target Ancestry Inference
+    ancestry_inf = fastmixture(last_plink_out)
+    
+    // Conditionally execute PLINK processes
+    if (params.run_plink) {
+        clump_snps_out = ClumpSNPs(last_plink_out, gwas_qc)
+        create_valid_snps_out = CreateValidSNPs(clump_snps_out)
+        create_snp_pvalue_out = CreateSNPpvalue(gwas_qc)
+        create_range_list_out = CreateRangeList()
+        calculate_pgs_out = CalculatePGS(
+            last = last_plink_out,
+            gwas = gwas_qc,
+            range_list = create_range_list_out,
+            valid_snp = create_valid_snps_out,
+            snp_pvalue = create_snp_pvalue_out
+        )
+    }
+
+    if (params.run_pca) { 
+        // Execute CalculatePCA and extract .eigenvec file
+        calculate_pca_out = CalculatePCA(last_plink_out, prune_snps_out)
+
+        // Separate the outputs
+        eigenvec_ch = calculate_pca_out.map { it[0] }
+        eigenval_ch = calculate_pca_out.map { it[1] }
+    }
+
+     // Conditionally execute PRSice-2 processes
+    if (params.run_prsice) {
+        prsice_out = PRSice2(
+            last = last_plink_out,
+            gwas = gwas_qc,
+            pheno_file = pheno_file_ch,
+            eigenvec = eigenvec_ch,
+        )
+    }
+
+    // Conditionally execute LDpred grid process
+    if (params.run_LDpred2grid) {
+        bed_ch = last_plink_out.map { it[0] }
+
+        LDpred2grid(
+            pheno_file = pheno_file_ch,
+            eigenvec = eigenvec_ch,
+            gwas = gwas_qc,
+            last = last_plink_out
+             
+        )
+    }
+
+     // Conditionally execute LDpred auto process
+    if (params.run_LDpred2auto) {
+        bed_ch = last_plink_out.map { it[0] }
+
+        LDpred2auto(
+            pheno_file = pheno_file_ch,
+            eigenvec = eigenvec_ch,
+            gwas = gwas_qc,
+            last = last_plink_out,
+             
+        )
+    }
+
+    // Conditionally execute Lassosum2 process
+    if (params.run_Lassosum2) {
+        bed_ch = last_plink_out.map { it[0] }
+
+        Lassosum2(
+            pheno_file = pheno_file_ch,
+            eigenvec = eigenvec_ch,
+            gwas = gwas_qc,
+            last = last_plink_out,
+             
+        )
+    }
+   
+    // Conditionally execute PRS-CSx process
+    if (params.run_prscsx) {
+        PRSCSx(
+            last = last_plink_out
+        )
+    }
+
+    // Conditionally execute MUSSEL process 
+    if (params.run_mussel) {
+        ldpred2_outputs = Step1_Run_LDpred2()
+        ldpred2_tuned_outputs = Step2_LDpred2_Tuning(ldpred2_outputs)
+        muss_outputs = Step3_Run_MUSS()
+        Step4_Combine_PRS_Models()
+    }
+
+    // Conditionally execute MegaPRS process
+
+    if (params.run_megaprs) {
+        mega_prs_out = MegaPRS(
+            target_qc_prefix = last_plink_out,
+            pheno_file = pheno_file_ch,
+            mega_summaries = mega_summaries_ch)
+    }
+
+    if (params.run_sbayesrc) {
+        formatted_gwas = FormatGWAS(gwas_qc)
+        impute_out = Impute(
+                ld_folder = file(params.ld_folder),
+                ma_file = formatted_gwas_ch,
+                out_prefix = params.out_prefix,
+                threads = params.threads
+        )
+       sbayesr_out = Sbayesrc(
+                ld_folder = file(params.ld_folder),
+                imp_file = impute_out_ch,
+                annot = file(params.annot_file),
+                out_prefix = params.out_prefix,
+                threads = params.threads
+       )
+    }
+}
+
+
+
+
diff --git a/modules/.DS_Store b/modules/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/modules/.DS_Store differ
diff --git a/modules/CalculateHWE.nf b/modules/CalculateHWE.nf
new file mode 100644
index 0000000..732b2e3
--- /dev/null
+++ b/modules/CalculateHWE.nf
@@ -0,0 +1,16 @@
+process CalculateHWE {
+    tag "Calculating Hardy-Weinberg Equilibrium"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_4
+
+    output:
+    path "HWE.hwe"
+
+    script:
+    """
+    plink --bfile target_4 --hardy --out HWE
+    """
+}
diff --git a/modules/CalculateHet.nf b/modules/CalculateHet.nf
new file mode 100644
index 0000000..3d3e7e0
--- /dev/null
+++ b/modules/CalculateHet.nf
@@ -0,0 +1,17 @@
+process CalculateHet {
+    tag "Calculate heterozygosity rate"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_7
+    path prune_in_file
+
+    output:
+    path "het_R_check.het"
+
+    script:
+    """
+    plink --bfile target_7 --extract ${prune_in_file} --het --out het_R_check
+    """
+}
diff --git a/modules/CalculatePCA.nf b/modules/CalculatePCA.nf
new file mode 100644
index 0000000..e91e2a9
--- /dev/null
+++ b/modules/CalculatePCA.nf
@@ -0,0 +1,20 @@
+process CalculatePCA {
+    tag "CalculatePCA"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path last
+    path prune_in_file
+
+    output:
+    tuple path("*.eigenvec"), path("*.eigenval")
+
+
+    script:
+    """
+    plink --bfile last \
+          --extract ${prune_in_file} \
+          --pca ${params.pca} \
+          --out last_OR
+    """
+}
diff --git a/modules/CalculatePGS.nf b/modules/CalculatePGS.nf
new file mode 100644
index 0000000..3642bed
--- /dev/null
+++ b/modules/CalculatePGS.nf
@@ -0,0 +1,25 @@
+process CalculatePGS {
+    tag "Calculate PGS by PLINK"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path last
+    path gwas
+    path range_list
+    path valid_snp
+    path snp_pvalue
+
+    output:
+    path "target_9_OR.*"
+
+    script:
+    """
+    plink \
+        --bfile last \
+        --score ${gwas} 3 5 8 header \
+        --q-score-range ${range_list} ${snp_pvalue} \
+        --extract ${valid_snp} \
+        --out target_9_OR
+    """
+}
diff --git a/modules/CleanHetFailInd.nf b/modules/CleanHetFailInd.nf
new file mode 100644
index 0000000..2f8fca2
--- /dev/null
+++ b/modules/CleanHetFailInd.nf
@@ -0,0 +1,16 @@
+process CleanHetFailInd {
+    tag "Clean het fail individuals file"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path fail_het_qc_file
+
+    output:
+    path "het_fail_ind.txt"
+
+    script:
+    """
+    sed 's/\"//g' ${fail_het_qc_file} | awk '{print \$1, \$2}' > het_fail_ind.txt
+    """
+}
diff --git a/modules/ClumpSNPs.nf b/modules/ClumpSNPs.nf
new file mode 100644
index 0000000..9bb7585
--- /dev/null
+++ b/modules/ClumpSNPs.nf
@@ -0,0 +1,24 @@
+process ClumpSNPs {
+    tag "ClumpSNPs"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path last
+    path gwas
+
+    output:
+    path "last.clumped"
+    
+    script:
+    """
+    plink --bfile last \
+          --clump-p1 1 \
+          --clump-r2 0.1 \
+          --clump-kb 250 \
+          --clump ${gwas} \
+          --clump-snp-field SNP \
+          --clump-field P \
+          --out last
+    """
+}
diff --git a/modules/ConvertPLINKtoVCF.nf b/modules/ConvertPLINKtoVCF.nf
new file mode 100644
index 0000000..5e56809
--- /dev/null
+++ b/modules/ConvertPLINKtoVCF.nf
@@ -0,0 +1,15 @@
+process ConvertPLINKtoVCF {
+    tag "Convert PLINK to VCF and index files"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_9
+
+    output:
+    path "*.vcf"
+
+    script:
+    """
+    plink --bfile target_9 --recode vcf --out target_9
+    """
+}
diff --git a/modules/ConvertVCFtoPLINK.nf b/modules/ConvertVCFtoPLINK.nf
new file mode 100644
index 0000000..3864a9b
--- /dev/null
+++ b/modules/ConvertVCFtoPLINK.nf
@@ -0,0 +1,20 @@
+process ConvertVCFtoPLINK {
+    tag "Convert VCF to PLINK"
+    publishDir "${params.target}", mode: 'copy'
+
+    input:
+    path vcf_file
+
+    output:
+    path "target.*"
+
+    script:
+    """
+    # Step 1: Index the VCF file
+    bcftools sort ${vcf_file} -Oz -o target.vcf.gz
+    tabix -p vcf target.vcf.gz || { echo "Tabix indexing failed"; exit 1; }
+
+    # Step 2: Convert VCF to PLINK format
+    plink --vcf target.vcf.gz --allow-extra-chr --make-bed --out target || { echo "PLINK conversion failed"; exit 1; }
+    """
+}
diff --git a/modules/CreateRangeList.nf b/modules/CreateRangeList.nf
new file mode 100644
index 0000000..8954240
--- /dev/null
+++ b/modules/CreateRangeList.nf
@@ -0,0 +1,19 @@
+process CreateRangeList {
+    tag "CreateRangeList for P+T"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    output:
+    path "range_list"
+
+    script:
+    """
+    echo "0.001 0 0.001" > range_list 
+    echo "0.05 0 0.05" >> range_list
+    echo "0.1 0 0.1" >> range_list
+    echo "0.2 0 0.2" >> range_list
+    echo "0.3 0 0.3" >> range_list
+    echo "0.4 0 0.4" >> range_list
+    echo "0.5 0 0.5" >> range_list
+    """
+}
diff --git a/modules/CreateSNPpvalue.nf b/modules/CreateSNPpvalue.nf
new file mode 100644
index 0000000..ae7f5e7
--- /dev/null
+++ b/modules/CreateSNPpvalue.nf
@@ -0,0 +1,16 @@
+process CreateSNPpvalue {
+    tag "CreateSNPpvalue"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path gwas_sumstat
+
+    output:
+    path "SNP.pvalue"
+
+    script:
+    """
+    awk '{print \$3,\$13}' ${gwas_sumstat} > SNP.pvalue
+    """
+}
diff --git a/modules/CreateValidSNPs.nf b/modules/CreateValidSNPs.nf
new file mode 100644
index 0000000..4a545a6
--- /dev/null
+++ b/modules/CreateValidSNPs.nf
@@ -0,0 +1,16 @@
+process CreateValidSNPs {
+    tag "CreateValidSNPs"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path clumped_file
+
+    output:
+    path "last.valid.snp"
+
+    script:
+    """
+    awk 'NR!=1{print \$3}' ${clumped_file} > last.valid.snp
+    """
+}
diff --git a/modules/FilterIndividuals.nf b/modules/FilterIndividuals.nf
new file mode 100644
index 0000000..cc8f18a
--- /dev/null
+++ b/modules/FilterIndividuals.nf
@@ -0,0 +1,17 @@
+process FilterIndividuals {
+    tag "Filter individuals based on relatedness"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_6
+    path rel_id_file
+
+    output:
+    path "target_7.*"
+
+    script:
+    """
+    plink --bfile target_6 --keep ${rel_id_file} --make-bed --out target_7
+    """
+}
diff --git a/modules/FilterMAF.nf b/modules/FilterMAF.nf
new file mode 100644
index 0000000..06d9b0a
--- /dev/null
+++ b/modules/FilterMAF.nf
@@ -0,0 +1,18 @@
+process FilterMAF {
+    tag "Filtering autosomal SNPs and calculating MAF"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_2
+
+    output:
+    tuple path("target_3.*"), path("MAF_check.frq")
+
+    script:
+    """
+    awk '{ if (\$1 >= 1 && \$1 <= 22) print \$2 }' target_2.bim > snp_1_22.txt
+    plink --bfile target_2 --extract snp_1_22.txt --make-bed --out target_3
+    plink --bfile target_3 --freq --out MAF_check
+    """
+}
diff --git a/modules/FindDuplicateSNPs.nf b/modules/FindDuplicateSNPs.nf
new file mode 100644
index 0000000..8c483d0
--- /dev/null
+++ b/modules/FindDuplicateSNPs.nf
@@ -0,0 +1,16 @@
+process FindDuplicateSNPs {
+    tag "Find duplicate SNPs"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_8
+
+    output:
+    path "target_8_duplicate_snps.list"
+
+    script:
+    """
+    awk '{print \$2}' target_8.bim | sort | uniq -d > target_8_duplicate_snps.list
+    """
+}
diff --git a/modules/FindDuplicates.nf b/modules/FindDuplicates.nf
new file mode 100644
index 0000000..5e372c2
--- /dev/null
+++ b/modules/FindDuplicates.nf
@@ -0,0 +1,16 @@
+process FindDuplicates {
+    tag "Find duplicate SNPs "
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path merged
+
+    output:
+    path "last_duplicate_snps.list"
+
+    script:
+    """
+    awk '{print \$2}' merged.bim | sort | uniq -d > last_duplicate_snps.list
+    """
+}
diff --git a/modules/FormatGWAS.nf b/modules/FormatGWAS.nf
new file mode 100644
index 0000000..694480a
--- /dev/null
+++ b/modules/FormatGWAS.nf
@@ -0,0 +1,18 @@
+process FormatGWAS {
+    tag "Format GWAS for SBayesR-C"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path gwas
+
+    output:
+    path "formatted_gwas.ma"
+
+    script:
+    """
+    Rscript ${params.formatgwas} ${gwas} 
+    """
+}
+
+
diff --git a/modules/GWAS_QC.nf b/modules/GWAS_QC.nf
new file mode 100644
index 0000000..eecdc52
--- /dev/null
+++ b/modules/GWAS_QC.nf
@@ -0,0 +1,24 @@
+process GWAS_QC {
+    tag "GWAS QC Steps"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path gwas_sumstat
+
+    output:
+    path 'GWAS_QC.*'
+
+    script:
+    """
+    echo "Input file: ${gwas_sumstat}"
+    awk 'NR==1 || (\$6 > 0.01) && (\$14 > 0.8) {print}' < ${gwas_sumstat} |\
+    awk '{seen[\$3]++; if(seen[\$3]==1){ print}}' > GWAS_QC.txt
+    """
+
+}
+
+
+
+
+
diff --git a/modules/GenerateIBD.nf b/modules/GenerateIBD.nf
new file mode 100644
index 0000000..38fe5b5
--- /dev/null
+++ b/modules/GenerateIBD.nf
@@ -0,0 +1,16 @@
+process GenerateIBD {
+    tag "Generate genome-wide IBD estimates"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_6
+    path prune_in_file
+
+    output:
+    path "ibd_prune.*"
+
+    script:
+    """
+    plink --bfile target_6 --extract ${prune_in_file} --genome --out ibd_prune
+    """
+}
diff --git a/modules/IBDLDPruning.nf b/modules/IBDLDPruning.nf
new file mode 100644
index 0000000..a488916
--- /dev/null
+++ b/modules/IBDLDPruning.nf
@@ -0,0 +1,16 @@
+process IBDLDPruning {
+    tag "IBD-LD-pruning"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_6
+
+    output:
+    path "merged_indepSNP.prune.in", emit: prune_in
+    path "merged_indepSNP.prune.out"
+
+    script:
+    """
+    plink --bfile target_6 --indep-pairwise ${params.indep_window_size} ${params.indep_step_size} ${params.indep_threshold} --out merged_indepSNP
+    """
+}
diff --git a/modules/Impute.nf b/modules/Impute.nf
new file mode 100644
index 0000000..83c62ff
--- /dev/null
+++ b/modules/Impute.nf
@@ -0,0 +1,19 @@
+process Impute {
+    tag "Run SBayesR-C"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path ld_folder
+    path ma_file
+    val out_prefix
+    val threads
+
+    script:
+    """
+    # Main: SBayesRC
+    docker pull zhiliz/sbayesrc
+    docker run -v ${PWD}:/data zhiliz/sbayesrc --ldm-eigen /data/${ld_folder} --gwas-summary /data/${ma_file} --impute-summary --out /data/${out_prefix} --threads ${threads}
+    """
+
+} 
\ No newline at end of file
diff --git a/modules/ImputeVCF.nf b/modules/ImputeVCF.nf
new file mode 100644
index 0000000..9990687
--- /dev/null
+++ b/modules/ImputeVCF.nf
@@ -0,0 +1,37 @@
+process ImputeVCF {
+    tag "Imputation for chromosome ${chr} and QC"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path phased_vcf
+    path ref_bref
+    path g_map
+    val chr
+    path rsid_map
+
+    output:
+    path "imputed_chr${chr}_with_rsid.vcf.gz"
+   
+    script:
+    """
+    tabix -p vcf ${phased_vcf} 
+    beagle ref=${ref_bref} gt=${phased_vcf} map=${g_map} out=imputed_chr${chr}
+    bcftools index imputed_chr${chr}.vcf.gz
+    bcftools view -O z -o imputed_chr${chr}_QC.vcf.gz -e 'INFO/DR2<0.8' imputed_chr${chr}.vcf.gz
+    bcftools index imputed_chr${chr}_QC.vcf.gz
+
+    awk 'BEGIN {OFS="\t"} {if (NR > 1) print \$1, \$2, \$3, ".", ".", ".", ".", "."}' ${rsid_map} > rsid${chr}.vcf
+    bgzip rsid${chr}.vcf
+    tabix -p vcf rsid${chr}.vcf.gz
+    bcftools annotate -a rsid${chr}.vcf.gz -c CHROM,POS,ID -o imputed_chr${chr}_with_rsid.vcf -O v imputed_chr${chr}_QC.vcf.gz
+    bgzip imputed_chr${chr}_with_rsid.vcf
+    bcftools index imputed_chr${chr}_with_rsid.vcf.gz
+
+
+    sleep 200
+    echo "Files are ready, proceeding."
+    """
+}
+
+
+
diff --git a/modules/InitialHWEFilter.nf b/modules/InitialHWEFilter.nf
new file mode 100644
index 0000000..16420b0
--- /dev/null
+++ b/modules/InitialHWEFilter.nf
@@ -0,0 +1,16 @@
+process InitialHWEFilter {
+    tag "Initial HWE filtering"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_4
+
+    output:
+    path "target_5*"
+
+    script:
+    """
+    plink --bfile target_4 --hwe ${params.hwe_case} --make-bed --out target_5
+    """
+}
diff --git a/modules/LDpred2auto.nf b/modules/LDpred2auto.nf
new file mode 100644
index 0000000..112a467
--- /dev/null
+++ b/modules/LDpred2auto.nf
@@ -0,0 +1,20 @@
+process LDpred2auto {
+    tag "Calculate PGS with LDpred2 Auto Model"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+    
+    input:
+    path pheno_file
+    path eigenvec
+    path gwas
+    path last
+   
+    output:
+    path "*"
+    
+    script:
+    """
+    Rscript ${params.ldpred_auto_script} ${pheno_file} ${eigenvec} ${gwas} ${last} 
+    """
+}
+
diff --git a/modules/LDpred2grid.nf b/modules/LDpred2grid.nf
new file mode 100644
index 0000000..be34620
--- /dev/null
+++ b/modules/LDpred2grid.nf
@@ -0,0 +1,20 @@
+process LDpred2grid {
+    tag "Calculate PGS with LDpred2 Grid Model"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+    
+    input:
+    path pheno_file
+    path eigenvec
+    path gwas
+    path last
+
+    output:
+    path "*"
+    
+    script:
+    """
+    Rscript ${params.ldpred_grid_script} ${pheno_file} ${eigenvec} ${gwas} ${last}
+    """
+}
+
diff --git a/modules/Lassosum2.nf b/modules/Lassosum2.nf
new file mode 100644
index 0000000..76919fb
--- /dev/null
+++ b/modules/Lassosum2.nf
@@ -0,0 +1,20 @@
+process Lassosum2 {
+    tag "Lassosum2"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+    
+    input:
+    path pheno_file
+    path eigenvec
+    path gwas
+    path last
+
+    output:
+    path "*"
+    
+    script:
+    """
+    Rscript ${params.lassosum2_script} ${pheno_file} ${eigenvec} ${gwas} ${last}
+    """
+}
+
diff --git a/modules/MegaPRS.nf b/modules/MegaPRS.nf
new file mode 100644
index 0000000..ba28bf6
--- /dev/null
+++ b/modules/MegaPRS.nf
@@ -0,0 +1,32 @@
+process MegaPRS {
+    tag "Run MegaPRS Workflow"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_qc_prefix
+    path pheno_file
+    path mega_summaries
+
+    output:
+    path "*"
+
+    script:
+    """
+    # Run MegaPRS logistic regression
+    ${params.ldak_executable} --logistic quant --bfile ${params.target_qc_prefix} --pheno ${params.pheno_file} 
+    sleep 60
+    ${params.ldak_executable} --calc-cors cors --bfile ${params.target_qc_prefix}
+    ${params.ldak_executable} --mega-prs mega --model ${params.mega_model} --summary ${params.mega_summaries} --power -0.25 --cors cors --allow-ambiguous YES
+ 
+    """
+}
+
+
+
+
+
+
+
+
+
diff --git a/modules/MergedVCF.nf b/modules/MergedVCF.nf
new file mode 100644
index 0000000..8dd976a
--- /dev/null
+++ b/modules/MergedVCF.nf
@@ -0,0 +1,23 @@
+process MergedVCF {
+    tag "Convert Last QC ed VCF files into PLINK format"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path imputed_vcfs
+
+    output:
+    path "merged_output.vcf.gz"
+
+    script:
+    """
+    for vcf in ${imputed_vcfs}; do
+        echo "Indexing \$vcf"
+        bcftools index \$vcf
+    done
+    
+    bcftools concat ${imputed_vcfs.join(' ')} -Oz -o merged_output.vcf.gz
+    """
+}
+
+
+
diff --git a/modules/PRSCSx.nf b/modules/PRSCSx.nf
new file mode 100644
index 0000000..4a31129
--- /dev/null
+++ b/modules/PRSCSx.nf
@@ -0,0 +1,28 @@
+process PRSCSx {
+    tag "PRS-CSx"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path last
+    
+
+    output:
+    path "target_ukbb_meta*"
+
+    script:
+    """
+    echo "Output directory: ${params.outdir}"
+    python /sw/bip_apps/PRScsx/PRScsx.py \
+        --ref_dir=${params.prscsxref_dir} \
+        --bim_prefix=last \
+        --sst_file=${params.prscsx_gwas1},${params.prscsx_gwas2} \
+        --n_gwas=${params.n_gwas1},${params.n_gwas2}  \
+        --pop=${params.pop1},${params.pop2} \
+        --chrom=${params.chrom} \
+        --phi=${params.phi} \
+        --meta=${params.meta} \
+        --out_dir=./ \
+        --out_name=${params.out_name}
+    """
+}
diff --git a/modules/PRSice2.nf b/modules/PRSice2.nf
new file mode 100644
index 0000000..df17647
--- /dev/null
+++ b/modules/PRSice2.nf
@@ -0,0 +1,29 @@
+process PRSice2 {
+    tag "CalculatePGS with PRSice2"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+    
+    input:
+    path last
+    path gwas
+    path pheno_file
+    path eigenvec
+
+    output:
+    path "last_PRSice2*"
+
+    script:
+    """
+    Rscript ${params.prsice_script} \
+        --prsice ${params.prsice_executable} \
+        --base ${gwas} \
+        --target last \
+        --binary-target T \
+        --pheno ${pheno_file} \
+        --cov ${eigenvec} \
+        --stat OR \
+        --or \
+        --out last_PRSice2
+    """
+}
+
diff --git a/modules/PhaseVCF.nf b/modules/PhaseVCF.nf
new file mode 100644
index 0000000..980f648
--- /dev/null
+++ b/modules/PhaseVCF.nf
@@ -0,0 +1,31 @@
+process PhaseVCF {
+    tag "Phasing of VCF file"
+    tag { "chr${chr}" }
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path splitted_vcf
+    path ref_bcf
+    path genetic_map
+    val chr
+
+    output:
+    path "phasedvcf_chr${chr}.vcf.gz"
+   
+
+    script:
+    """
+    bcftools index ${ref_bcf}
+    tabix -p vcf ${splitted_vcf} 
+    $PWD/eagle \
+        --vcfTarget ${splitted_vcf} \
+        --vcfRef ${ref_bcf} \
+        --geneticMapFile ${genetic_map} \
+        --vcfOutFormat z \
+        --numThreads 24 \
+        --outPrefix phasedvcf_chr${chr}
+
+
+    
+    """
+}
diff --git a/modules/PlotHWE.nf b/modules/PlotHWE.nf
new file mode 100644
index 0000000..cc2235f
--- /dev/null
+++ b/modules/PlotHWE.nf
@@ -0,0 +1,16 @@
+process PlotHWE {
+    tag "Plotting Hardy-Weinberg Equilibrium"
+    debug true
+    publishDir "${params.graphs}", mode: 'copy'
+
+    input:
+    path hwe_file
+
+    output:
+    path "hwe.pdf"
+
+    script:
+    """
+    Rscript $PWD/bin/plot_hwe.R ${hwe_file}
+    """
+}
diff --git a/modules/PlotHet.nf b/modules/PlotHet.nf
new file mode 100644
index 0000000..c90d6b6
--- /dev/null
+++ b/modules/PlotHet.nf
@@ -0,0 +1,15 @@
+process PlotHet {
+    tag "Plot heterozygosity rate"
+    publishDir "${params.graphs}", mode: 'copy'
+
+    input:
+    path het_file
+
+    output:
+    path "het.pdf"
+
+    script:
+    """
+    Rscript $PWD/bin/plot_het.R ${het_file}
+    """
+}
diff --git a/modules/PlotIBD.nf b/modules/PlotIBD.nf
new file mode 100644
index 0000000..fc44287
--- /dev/null
+++ b/modules/PlotIBD.nf
@@ -0,0 +1,15 @@
+process PlotIBD {
+    tag "Plot IBD distribution"
+    publishDir "${params.graphs}", mode: 'copy'
+
+    input:
+    path ibd_file
+
+    output:
+    path "ibd_plot.pdf"
+
+    script:
+    """
+    Rscript $PWD/bin/plot_ibd.R ${ibd_file}
+    """
+}
diff --git a/modules/PlotMAF.nf b/modules/PlotMAF.nf
new file mode 100644
index 0000000..8a0b46a
--- /dev/null
+++ b/modules/PlotMAF.nf
@@ -0,0 +1,16 @@
+process PlotMAF {
+    tag "Generate MAF distribution plot"
+    debug true
+    publishDir "${params.graphs}", mode: 'copy'
+
+    input:
+    path maf_freq_file
+
+    output:
+    path 'MAF_dist.pdf'
+
+    script:
+    """
+    Rscript $PWD/bin/plot_maf.R ${maf_freq_file}
+    """
+}
diff --git a/modules/ProcessHet.nf b/modules/ProcessHet.nf
new file mode 100644
index 0000000..13b472f
--- /dev/null
+++ b/modules/ProcessHet.nf
@@ -0,0 +1,16 @@
+process ProcessHet {
+    tag "Process heterozygosity data"
+    debug true
+    publishDir "${params.graphs}", mode: 'copy'
+
+    input:
+    path het_file
+
+    output:
+    path "het.pdf"
+
+    script:
+    """
+    Rscript $PWD/bin/process_het.R ${het_file}
+    """
+}
diff --git a/modules/PruneSNPs.nf b/modules/PruneSNPs.nf
new file mode 100644
index 0000000..45f63ae
--- /dev/null
+++ b/modules/PruneSNPs.nf
@@ -0,0 +1,18 @@
+process PruneSNPs {
+    tag "PruneSNPs by PLINK"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_9
+
+    output:
+    path "target_9_OR.prune.in"
+    
+    script:
+    """
+    plink \
+        --bfile target_9  \
+        --indep-pairwise ${params.indep_window_size} ${params.indep_step_size} ${params.indep_threshold} \
+        --out target_9_OR
+    """
+}
diff --git a/modules/QCSNPmissingness.nf b/modules/QCSNPmissingness.nf
new file mode 100644
index 0000000..ffc912b
--- /dev/null
+++ b/modules/QCSNPmissingness.nf
@@ -0,0 +1,16 @@
+process QCSNPmissingness {
+    tag "Removal of missing SNPs according to the specified threshold value"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target
+
+    output:
+    path "target_*"
+
+    script:
+    """
+    plink --bfile ${params.target_prefix} --geno ${params.geno} --make-bed --out target_1
+    """
+}
diff --git a/modules/QCindmissingness.nf b/modules/QCindmissingness.nf
new file mode 100644
index 0000000..ab4a624
--- /dev/null
+++ b/modules/QCindmissingness.nf
@@ -0,0 +1,16 @@
+process QCindmissingness {
+    tag "Removal of missing individuals according to the specified threshold value"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_1
+
+    output:
+    path 'target_2.*'
+
+    script:
+    """
+    plink --bfile target_1 --mind ${params.mind} --make-bed --out target_2
+    """
+}
diff --git a/modules/RelCutoff.nf b/modules/RelCutoff.nf
new file mode 100644
index 0000000..55c02ca
--- /dev/null
+++ b/modules/RelCutoff.nf
@@ -0,0 +1,16 @@
+process RelCutoff {
+    tag "Create file with non-relative individuals"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_6
+    path prune_in_file
+
+    output:
+    path "pihat.rel.id"
+
+    script:
+    """
+    plink --bfile target_6 --extract ${prune_in_file} --rel-cutoff ${params.pihat} --out pihat
+    """
+}
diff --git a/modules/RemoveDuplicateSNPs.nf b/modules/RemoveDuplicateSNPs.nf
new file mode 100644
index 0000000..ba074cd
--- /dev/null
+++ b/modules/RemoveDuplicateSNPs.nf
@@ -0,0 +1,17 @@
+process RemoveDuplicateSNPs {
+    tag "Remove duplicate SNPs"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_8
+    path duplicate_snps_list
+
+    output:
+    path "target_9.*"
+
+    script:
+    """
+    plink --bfile target_8 --exclude ${duplicate_snps_list} --make-bed --out target_9
+    """
+}
diff --git a/modules/RemoveDuplicates.nf b/modules/RemoveDuplicates.nf
new file mode 100644
index 0000000..8f881e9
--- /dev/null
+++ b/modules/RemoveDuplicates.nf
@@ -0,0 +1,17 @@
+process RemoveDuplicates {
+    tag "Remove duplicates"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path last
+    path duplicate_snps_list
+
+    output:
+    path "last.*"
+
+    script:
+    """
+    plink --bfile merged --exclude ${duplicate_snps_list} --make-bed --out last
+    """
+}
diff --git a/modules/RemoveHetOutliers.nf b/modules/RemoveHetOutliers.nf
new file mode 100644
index 0000000..f2640f7
--- /dev/null
+++ b/modules/RemoveHetOutliers.nf
@@ -0,0 +1,17 @@
+process RemoveHetOutliers {
+    tag "Remove heterozygosity rate outliers"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_7
+    path het_fail_ind_file
+
+    output:
+    path "target_8.*"
+
+    script:
+    """
+    plink --bfile target_7 --allow-no-sex --remove ${het_fail_ind_file} --make-bed --out target_8
+    """
+}
diff --git a/modules/RemoveLowMAF.nf b/modules/RemoveLowMAF.nf
new file mode 100644
index 0000000..ed8917b
--- /dev/null
+++ b/modules/RemoveLowMAF.nf
@@ -0,0 +1,16 @@
+process RemoveLowMAF {
+    tag "Removing SNPs with low MAF frequency"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_3
+
+    output:
+    path "target_4.*"
+
+    script:
+    """
+    plink --bfile target_3 --maf ${params.maf} --make-bed --out target_4
+    """
+}
diff --git a/modules/Sbayesrc.nf b/modules/Sbayesrc.nf
new file mode 100644
index 0000000..77d5805
--- /dev/null
+++ b/modules/Sbayesrc.nf
@@ -0,0 +1,19 @@
+process Sbayesrc {
+    tag "Run SBayesR-C"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+    
+    input:
+    path ld_folder
+    path imp_file
+    path annot
+    val out_prefix
+    val threads
+
+    script:
+    """
+    # Main: SBayesRC
+    docker pull zhiliz/sbayesrc
+    docker run -v ${PWD}:/data zhiliz/sbayesrc --ldm-eigen /data/${params.ld_folder} --gwas-summary /data/${imp_file}  --sbayes RC --annot /data/${params.annot_file}  --out /data/${params.out_prefix} --threads ${params.threads}
+    """
+}
diff --git a/modules/SecondHWEFilter.nf b/modules/SecondHWEFilter.nf
new file mode 100644
index 0000000..ee716fe
--- /dev/null
+++ b/modules/SecondHWEFilter.nf
@@ -0,0 +1,16 @@
+process SecondHWEFilter {
+    tag "Second HWE filtering"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path target_5
+
+    output:
+    path "target_6.*"
+
+    script:
+    """
+    plink --bfile target_5 --hwe ${params.hwe_ctrl} --hwe-all --make-bed --out target_6
+    """
+}
diff --git a/modules/SortVCF.nf b/modules/SortVCF.nf
new file mode 100644
index 0000000..0932c0e
--- /dev/null
+++ b/modules/SortVCF.nf
@@ -0,0 +1,16 @@
+process SortVCF {
+    tag "Sort VCF File"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path input_vcf
+
+    output:
+    path "*"
+
+    script:
+    """
+    bcftools sort ${input_vcf} -Oz -o sorted.vcf.gz
+    tabix -p vcf sorted.vcf.gz
+    """
+}
diff --git a/modules/SplitVCF.nf b/modules/SplitVCF.nf
new file mode 100644
index 0000000..8fba167
--- /dev/null
+++ b/modules/SplitVCF.nf
@@ -0,0 +1,23 @@
+process SplitVCF {
+    tag "Split VCF files into Chromosomes"
+    tag { "chr${chr}" }
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path vcf_file 
+    val chr
+    
+    output:
+    path "vcfbychrom_${chr}.vcf.gz"
+ 
+
+    script:
+    """
+    tabix -p vcf ${vcf_file}
+    bcftools view ${vcf_file} --regions ${chr} -o vcfbychrom_${chr}.vcf.gz -Oz
+    """
+}
+
+
+
+
diff --git a/modules/Step1_Run_LDpred2.nf b/modules/Step1_Run_LDpred2.nf
new file mode 100644
index 0000000..00c2a9e
--- /dev/null
+++ b/modules/Step1_Run_LDpred2.nf
@@ -0,0 +1,28 @@
+process Step1_Run_LDpred2 {
+    tag "Step 1: LDpred2 is running"
+    publishDir "${params.out}", mode: 'copy'
+
+    output:
+    path "*", emit: ldpred2_outputs
+
+    script:
+    """
+    Rscript ${params.pack}/R/LDpred2_jobs.R \\
+        --PATH_package ${params.pack} \\
+        --PATH_data ${params.data} \\
+        --PATH_LDref ${params.LDref} \\
+        --PATH_out ${params.outdir} \\
+        --FILE_sst ${params.sst} \\
+        --pop ${params.pop} \\
+        --bfile_tuning ${params.bfile_tuning} \\
+        --NCORES 20
+
+    while [ ! -f "${params.outdir}/EUR/tmp/beta_files/beta_in_all_settings/params.txt" ] || [ ! -f "${params.outdir}/AFR/tmp/beta_files/beta_in_all_settings/params.txt" ]; do
+        echo "Files are writing..."
+        sleep 60
+    done
+
+    echo "Files are ready, proceeding."
+    
+    """
+}
\ No newline at end of file
diff --git a/modules/Step2_LDpred2_Tuning.nf b/modules/Step2_LDpred2_Tuning.nf
new file mode 100644
index 0000000..2842f5b
--- /dev/null
+++ b/modules/Step2_LDpred2_Tuning.nf
@@ -0,0 +1,35 @@
+process Step2_LDpred2_Tuning {
+    tag "Step 2: LDpred2 tuning is running"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path ldpred2_outputs
+
+    output:
+    tuple path ("EUR_optim_params.txt"), 
+          path ("AFR_optim_params.txt"), emit: ldpred2_tuned_outputs
+
+    script:
+    """
+    Rscript ${params.pack}/R/LDpred2_tuning.R \\
+        --PATH_package ${params.pack} \\
+        --PATH_out ${params.outdir} \\
+        --PATH_plink ${params.plink} \\
+        --FILE_sst ${params.sst} \\
+        --pop ${params.pop} \\
+        --chrom ${params.mussel_chrom} \\
+        --bfile_tuning ${params.bfile_tuning} \\
+        --pheno_tuning ${params.pheno_tuning} \\
+        --covar_tuning ${params.covar_tuning} \\
+        --bfile_testing ${params.bfile_testing} \\
+        --pheno_testing ${params.pheno_testing} \\
+        --covar_testing ${params.covar_testing} \\
+        --trait_type ${params.trait_type} \\
+        --testing TRUE \\
+        --NCORES 4
+
+    # Dosyaları işlem dizinine kopyalayın
+    cp ${params.outdir}/LDpred2/EUR_optim_params.txt EUR_optim_params.txt
+    cp ${params.outdir}/LDpred2/AFR_optim_params.txt AFR_optim_params.txt
+    """
+}
diff --git a/modules/Step3_Run_MUSS.nf b/modules/Step3_Run_MUSS.nf
new file mode 100644
index 0000000..d1aafe1
--- /dev/null
+++ b/modules/Step3_Run_MUSS.nf
@@ -0,0 +1,33 @@
+process Step3_Run_MUSS {
+    tag "Step 3: Running MUSS"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    output:
+    path "MUSS_beta_in_all_settings_bychrom/*", emit: muss_outputs
+
+    script:
+    """
+    Rscript ${params.pack}/R/MUSS_jobs.R \\
+        --PATH_package ${params.pack} \\
+        --PATH_data ${params.data} \\
+        --PATH_LDref ${params.LDref} \\
+        --PATH_out ${params.outdir} \\
+        --FILE_sst ${params.sst} \\
+        --pop ${params.pop} \\
+        --LDpred2_params ${params.outdir}/LDpred2/EUR_optim_params.txt,${params.outdir}/LDpred2/AFR_optim_params.txt \\
+        --chrom ${params.mussel_chrom} \\
+        --bfile_tuning ${params.bfile_tuning} \\
+        --NCORES 5
+
+    while [ ! -f "${params.outdir}/tmp/MUSS_beta_in_all_settings_bychrom/settings_22.txt" ] ; do
+        echo "Files are writing..."
+        sleep 60
+    done
+
+    echo "Files are ready, proceeding."
+
+    # Copy the output files to the current working directory
+    mkdir -p MUSS_beta_in_all_settings_bychrom
+    cp ${params.outdir}/tmp/MUSS_beta_in_all_settings_bychrom/*.txt MUSS_beta_in_all_settings_bychrom/
+    """
+}
diff --git a/modules/Step4_Combine_PRS_Models.nf b/modules/Step4_Combine_PRS_Models.nf
new file mode 100644
index 0000000..58c474c
--- /dev/null
+++ b/modules/Step4_Combine_PRS_Models.nf
@@ -0,0 +1,28 @@
+process Step4_Combine_PRS_Models {
+    tag "Step 4: Combining PRS models with SuperLearner"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    output:
+    path "*"
+
+
+    script:
+    """
+    Rscript ${params.pack}/R/MUSSEL.R \\
+        --PATH_package ${params.pack} \\
+        --PATH_out ${params.outdir} \\
+        --PATH_plink ${params.plink} \\
+        --pop ${params.pop} \\
+        --target_pop ${params.target_pop} \\
+        --chrom ${params.mussel_chrom} \\
+        --bfile_tuning ${params.bfile_tuning} \\
+        --pheno_tuning ${params.pheno_tuning} \\
+        --covar_tuning ${params.covar_tuning} \\
+        --bfile_testing ${params.bfile_testing} \\
+        --pheno_testing ${params.pheno_testing} \\
+        --covar_testing ${params.covar_testing} \\
+        --trait_type ${params.trait_type} \\
+        --testing TRUE \\
+        --NCORES 4
+    """
+}
diff --git a/modules/Takersid.nf b/modules/Takersid.nf
new file mode 100644
index 0000000..4c2209a
--- /dev/null
+++ b/modules/Takersid.nf
@@ -0,0 +1,25 @@
+process Takersid {
+    tag "Takingrsid"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path phased_vcf
+    val chr
+
+    output:
+    path "rsid_map_chr${chr}.txt"
+
+    script:
+    """
+    zcat ${phased_vcf} | grep -v '^#' | awk '{print \$1, \$2, \$3}' > rsid_map_chr${chr}.txt
+    """
+}
+
+
+
+
+
+
+
+
+
diff --git a/modules/VCFtoPLINK.nf b/modules/VCFtoPLINK.nf
new file mode 100644
index 0000000..79261d0
--- /dev/null
+++ b/modules/VCFtoPLINK.nf
@@ -0,0 +1,20 @@
+process VCFtoPLINK {
+    tag "Convert Imputed VCF to PLINK"
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path merged
+
+    output:
+    path "merged.*"
+
+    script:
+    """
+    # Step 1: Index the VCF file
+    bcftools sort ${merged} -Oz -o merged_sorted.vcf.gz
+    tabix -p vcf merged_sorted.vcf.gz || { echo "Tabix indexing failed"; exit 1; }
+
+    # Step 2: Convert VCF to PLINK format
+    plink --vcf merged_sorted.vcf.gz --allow-extra-chr --make-bed --out merged
+    """
+}
diff --git a/modules/fastmixture.nf b/modules/fastmixture.nf
new file mode 100644
index 0000000..8bd766f
--- /dev/null
+++ b/modules/fastmixture.nf
@@ -0,0 +1,16 @@
+process fastmixture {
+    tag "Target Ancestry Inference"
+    debug true
+    publishDir "${params.outdir}", mode: 'copy'
+
+    input:
+    path last
+
+    output:
+    path "*"
+
+    script:
+    """
+    fastmixture --bfile last --K ${params.num_of_ancestry} --threads 8 --seed 1 --out ancestry
+    """
+}
diff --git a/nextflow.config b/nextflow.config
new file mode 100644
index 0000000..62fc891
--- /dev/null
+++ b/nextflow.config
@@ -0,0 +1,112 @@
+/*
+ * User Input Parameters
+ * The specified parameters and input files can be given to the pipeline via command line options
+ */
+process {
+    errorStrategy = 'ignore'
+}
+
+params {
+    // Required parameters
+    vcf_to_plink = false
+    target = "$PWD/target/"
+    target_prefix = "$PWD/target/target"
+    target_qc_prefix = "$PWD/outputs/last"
+    pheno_file = "$PWD/target/phenotype_file_t2.txt"  // Default phenotype file
+    gwas_sumstat = "$PWD/target/GWAS_sumstat_t1.txt"  // Default GWAS summary statistics file
+    mega_summaries = "$PWD/outputs/quant.summaries"
+
+    // Optional parameters with default values
+    geno = 0.02
+    mind = 0.02
+    maf = 0.05
+    hwe_case = 1e-6
+    hwe_ctrl = 1e-10
+    pihat = 0.1875
+    relatedness = 0.1875
+    indep_window_size = 100
+    indep_step_size = 5
+    indep_threshold = 0.2
+    pca = 10 
+
+    // Help flag
+    help = false
+    
+    // Workflow control flags
+    run_plink = true            // Set to false if PLINK part should not be run
+    run_prsice = true           // Set to false if PRSice-2 part should not be run
+    run_pca = true              // To create .eigenvec file to be used as covariate file for further analysis
+    run_LDpred2grid = true      // Set to false if LDPred2 Grid part should not be run
+    run_LDpred2auto = true      // Set to false if LDPred2 Auto part should not be run
+    run_Lassosum2 = true        // Set to false if Lassosum2 part should not be run
+    run_prscsx = true           // Set to false if PRS-CSx part should not be run
+    run_mussel = false          // Set to true if MUSSEL part should not be run
+    run_megaprs = true          // Set to false if MegaPRS part should not be run
+    run_sbayesrc = true       // Set to false if SBayesR-C part should not be run
+
+    // Phasing - Imputation
+    ref_phase = "/archive/yarast/1KG_hg38_BCF/"
+    genetic_map = "$PWD/genetic_map_hg38_withX.txt.gz"
+    ref_imp = "/archive/yarast/1KG_hg38_bref3/" 
+    g_map = "/archive/yarast/plink.GRCh38.map/" // Genetic map directory
+
+    // Target Ancesty Inference
+    num_of_ancestry = 3
+
+    // PRSice-2 parameters
+    prsice_script = "$PWD/PRSice.R"
+    prsice_executable = "$PWD/bin/PRSice_linux"
+
+    // LDpred2 parameters
+    ldpred_grid_script = "$PWD/bin/LDpred2grid.R" 
+    ldpred_auto_script = "$PWD/bin/LDpred2auto.R" 
+
+    // Lassosum2
+    lassosum2_script = "$PWD/bin/Lassosum2.R" 
+
+    // PRScsx parameters
+    prscsxref_dir = "$PWD/PRScsx/UKBB"                // Default reference directory (can be '$PWD/PRScsx/UKBB' or '$PWD/PRScsx/1KG')
+    prscsx_gwas1 = "$PWD/target/EUR_GWAS_PRScsx.txt"  // First GWAS summary statistics file in the target folder
+    prscsx_gwas2 = "$PWD/target/EAS_GWAS_PRScsx.txt"  // Second GWAS summary statistics file in the target folder
+    n_gwas1 = '10000'                                 // Sample sizes for GWAS1
+    n_gwas2 = '10000'                                 // Sample sizes for GWAS1
+    pop1 = 'EUR'                                      // Population1 used in PRS-CSx
+    pop2 = 'EAS'                                      // Population2 used in PRS-CSx
+    phi = '1e-2'                                      // Tuning parameter for PRS-CSx
+    chrom = '1'                                      // the chromosome on which model is fitted, can be separated by comma e.g., --chrom=1,3,5.
+    meta = true                                       // Whether to run meta-analysis
+    out_name = 'target_ukbb_meta'                     // Specify output prefix         
+
+    // MUSSEL parameters
+    pack = "$PWD/MUSSEL"
+    data = "$PWD/MUSSEL/example"
+    LDref = "$PWD/MUSSEL/LDref"
+    sst = "$PWD/MUSSEL/example/summdata/EUR.txt,$PWD/MUSSEL/example/summdata/AFR.txt"
+    pop = "EUR,AFR"
+    mussel_chrom = "1-22"
+    bfile_tuning = "$PWD/MUSSEL/example/sample_data/EUR/tuning_geno,$PWD/MUSSEL/example/sample_data/AFR/tuning_geno"
+    pheno_tuning = "$PWD/example/sample_data/EUR/pheno.txt,$PWD/MUSSEL/example/sample_data/AFR/pheno.txt"
+    covar_tuning = "$PWD/MUSSEL/example/sample_data/EUR/covar.txt,$PWD/MUSSEL/example/sample_data/AFR/covar.txt"
+    bfile_testing = "$PWD/MUSSEL/example/sample_data/EUR/testing_geno,$PWD/MUSSEL/example/sample_data/AFR/testing_geno"
+    pheno_testing = "$PWD/MUSSEL/example/sample_data/EUR/pheno.txt,$PWD/MUSSEL/example/sample_data/AFR/pheno.txt"
+    covar_testing = "$PWD/MUSSEL/example/sample_data/EUR/covar.txt,$PWD/MUSSEL/example/sample_data/AFR/covar.txt"
+    trait_type = "continuous"
+    NCORES = 4
+    plink = "/sw/bip_apps/plink2/2.0.0.alpha/bin/plink2"
+
+    // MegaPRS Parameters
+    ldak_executable = "$PWD/bin/ldak6.linux" //Path to the LDAK executable (e.g., ldak6.mac or ldak6.linux)
+    mega_model = "bayesr"  // Model type for MegaPRS. Options: lasso, ridge, bayesr, etc. Default = bayesr
+
+    //SBayesR-C Parameters
+    formatgwas = "$PWD/bin/format_gwas.R" 
+    ld_folder = 'ukbEAS_Imputed'
+    annot_file = 'annot_baseline2.2.txt'
+    out_prefix = 'sbayesrc'
+    threads = 16
+
+
+    // Output directories
+    outdir = "$PWD/outputs"
+    graphs = "$PWD/QC_graphs"
+}
diff --git a/outputs/.DS_Store b/outputs/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/outputs/.DS_Store differ
diff --git a/target/.DS_Store b/target/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/target/.DS_Store differ