diff --git a/R/freq_table.R b/R/freq_table.R
index 5bf93150..75db07b9 100644
--- a/R/freq_table.R
+++ b/R/freq_table.R
@@ -12,6 +12,7 @@ if (getRversion() >= '2.15.1')
 #' @param byvar character: name of variable to tabulate. Use Standard evaluation.
 #' @param digits numeric: number of decimal places to display. Default is 1.
 #' @param na.rm logical: report NA values in frequencies. Default is FALSE.
+#' @param freq_var_name character: name for frequency variable. Default is "n"
 #'
 #' @return data.table with frequencies.
 #' @export
@@ -26,33 +27,45 @@ if (getRversion() >= '2.15.1')
 freq_table <- function(x,
                        byvar,
                        digits = 1,
-                       na.rm  = FALSE) {
+                       na.rm  = FALSE,
+                       freq_var_name = "n") {
 
   x_name <- as.character(substitute(x))
   if (!is.data.frame(x)) {
     cli::cli_abort("Argument {.arg x} ({.field {x_name}}) must be a data frame")
   }
+  if (isFALSE(is.data.table(x))) {
+    x <- qDT(x)
+  }
+
 
-  fq <- qtab(x[[byvar]], na.exclude = na.rm)
-  ft <- data.frame(joyn = names(fq),
-                   n = as.numeric(fq))
+  fq <- qtab(x[, ..byvar], na.exclude = na.rm, dnn = byvar)
+
+  ft <- fq |>
+    as.data.table() |>
+    setnames("N", "n") |>
+    # filter zeros
+    fsubset(n > 0)
 
   N <- fsum(ft$n)
   ft <- ft |>
     ftransform(percent = paste0(round(n / N * 100, digits), "%"))
 
   # add row with totals
-  ft <- rowbind(ft, data.table(joyn = "total",
-                               n = N,
-                               percent = "100%")) |>
-    # filter zeros
-    fsubset(n > 0)
+  total_row <- rep("total", length(byvar)) |>
+    as.list() |>
+    as.data.table() |>
+    setnames(new  = byvar) |>
+    ftransform(n = N,
+               percent = "100%")
 
-  setrename(ft, joyn = byvar, .nse = FALSE)
+  ft <- rowbind(ft, total_row)
+  setrename(ft,
+            n    = freq_var_name,
+            .nse = FALSE)
 }
 
 
-
 #' Report frequencies from attributes in report var
 #'
 #' @param x dataframe from [joyn_workhorse]
diff --git a/R/is_id.R b/R/is_id.R
index a1f77086..c8e3b80a 100644
--- a/R/is_id.R
+++ b/R/is_id.R
@@ -35,40 +35,42 @@ if (getRversion() >= '2.15.1')
 #' is_id(y1, by = "id")
 is_id <- function(dt,
                   by,
-                  verbose        = getOption("joyn.verbose"),
+                  verbose  = getOption("joyn.verbose", default = FALSE),
                   return_report  = FALSE) {
 
-  # make sure it is data.table
-  if (!(is.data.table(dt))) {
+  # Ensure dt is a data.table
+  if (!is.data.table(dt)) {
     dt <- as.data.table(dt)
-  } else {
-    dt <- data.table::copy(dt)
   }
 
-  # count
-  m     <- dt[, .(copies =.N), by = mget(by)]
-  is_id <- m[, mean(copies)] == 1
+  # Check for duplicates
+  is_id <- !(anyDuplicated(dt, by = by) > 0)
 
   if (verbose) {
-
-    cli::cli_h3("Duplicates in terms of {.code {by}}")
-
-    d <- freq_table(m, "copies")
-    print(d[])
-
-    cli::cli_rule(right = "End of {.field is_id()} report")
-
+    if (is_id) {
+      cli::cli_alert_success("No duplicates found by {.code {by}}")
+    } else {
+      cli::cli_alert_warning("Duplicates found by: {.code {by}}")
+    }
   }
 
-  if (isFALSE(return_report)) {
-
-    return(is_id)
+  if (return_report) {
+    # Return the duplicated rows if requested
+    if (verbose) cli::cli_h3("Duplicates in terms of {.code {by}}")
 
-  } else {
+    d <- freq_table(x = dt,
+                     byvar = by,
+                     freq_var_name = "copies")
 
-    return(m)
+    if (verbose) {
+      d |>
+        fsubset(copies > 1) |>
+        print()
+    }
 
+    if (verbose) cli::cli_rule(right = "End of {.field is_id()} report")
+    return(invisible(d))
+  } else {
+    return(is_id)
   }
-
 }
-
diff --git a/R/possible_ids.R b/R/possible_ids.R
index 86cea94a..faec30b3 100644
--- a/R/possible_ids.R
+++ b/R/possible_ids.R
@@ -1,16 +1,45 @@
 #' Find possible unique identifies of data frame
 #'
-#' Identify possible variables uniquely identifying x
+#' Identify possible combinations of variables that  uniquely identifying dt
 #'
 #' @param dt data frame
-#' @param exclude character: Exclude variables to be selected as identifiers. It
-#'   could be either the name of the variables of one type of the variable
-#'   prefixed by "_". For instance, "_numeric" or "_character".
+#' @param vars character: A vector of variable names to consider for identifying unique combinations.
+#' @param exclude character: Names of variables to exclude from analysis
 #' @param include character: Name of variable to be included, that might belong
 #'   to the group excluded in the `exclude`
+#' @param exclude_classes character: classes to exclude from analysis (e.g.,
+#'   "numeric", "integer", "date")
+#' @param include_classes character: classes to include in the analysis (e.g.,
+#'   "numeric", "integer", "date")
+#' @param min_combination_size numeric: Min number of combinations. Default is
+#'   1, so all combinations.
+#' @param max_combination_size numeric. Max number of combinations. Default is
+#'   5. If there is a combinations of identifiers larger than
+#'   `max_combination_size`, they won't be found
+#' @param max_processing_time numeric: Max time to process in seconds. After
+#'   that, it returns what it found.
+#' @param max_numb_possible_ids numeric: Max number of possible IDs to find. See
+#'   details.
+#' @param get_all logical: get all possible combinations based on the parameters
+#'   above.
 #' @param verbose logical: If FALSE no message will be displayed. Default is
 #'   TRUE
 #'
+#' @section Number of possible IDs:
+#'
+#'   The number of possible IDs in a dataframe could be very large. This is why,
+#'   `possible_ids()` makes use of heuristics to return something useful without
+#'   wasting the time of the user. In addition, we provide multiple parameter so
+#'   that the user can fine tune their search for possible IDs easily and
+#'   quickly.
+#'
+#'   Say for instance that you have a dataframe with 10 variables. Testing every
+#'   possible pair of variables will give you 90 possible unique identifiers for
+#'   this dataframe. If you want to test all the possible IDs, you will have to
+#'   test more 5000 combinations. If the dataframe has many rows, it may take a
+#'   while.
+#'
+#'
 #' @return list with possible identifiers
 #' @export
 #'
@@ -22,177 +51,397 @@
 #'                 x   = c(16, 12, NA, NA, 15))
 #' possible_ids(x4)
 possible_ids <- function(dt,
-                         exclude = NULL,
-                         include = NULL,
-                         verbose = getOption("possible_ids.verbose")) {
-
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  # Check inputs   ---------
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+                         vars                        = NULL,
+                         exclude                     = NULL,
+                         include                     = NULL,
+                         exclude_classes             = NULL,
+                         include_classes             = NULL,
+                         verbose                     = getOption("possible_ids.verbose",
+                                                        default = FALSE),
+                         min_combination_size        = 1,
+                         max_combination_size        = 5,
+                         max_processing_time         = 60, # in seconds
+                         max_numb_possible_ids       = 100,
+                         get_all                     = FALSE) {
+
+  # defenses ---------
+  # Ensure dt is a data.table
   if (!is.data.frame(dt)) {
     stop("data must be a data frame")
   }
+  if (!is.data.table(dt)) {
+    dt <- as.data.table(dt)
+  }
+
+  # Get variable
+  # Vars --------
+
+   if (is.null(vars)) {
+     vars <- names(dt) |>
+       copy()
+     } else {
+
+    # check if all vars are in dt
+    missing_vars <- setdiff(vars, names(dt))
+
+    if (length(missing_vars) > 0) {
+       cli::cli_abort("The following variables are not in the data table: {.strongVar {missing_vars}}")
+    }
 
-  if (is.data.table(dt)) {
-    dt <- as.data.frame(dt)
+    # check at least 2 vars are provided
+
+     if (length(vars) < 2) {
+      cli::cli_abort("Can't make combinations with a single var: {.strongVar {vars}}")
+     }
+
+    # exclude should not be used
+      if (!(is.null(exclude) & is.null(exclude_classes))) {
+        exclude         <- NULL
+        exclude_classes <- NULL
+        cli::cli_alert_danger("Args {.strongArg `exclude`} and {.strongArg `exclude_classes`} not available when using {.strongArg `vars`}")
+      }
+
+   }
+
+  # Exclude and include -------
+
+  ## classes ----------
+  vars <- filter_by_class(dt              = dt,
+                          vars            = vars,
+                          include_classes = include_classes,
+                          exclude_classes = exclude_classes)
+
+  ## var names --------
+  vars <- filter_by_name(vars, include, exclude, verbose)
+
+  ##  no duplicated vars -------------
+  if (anyDuplicated(vars)) {
+    dupvars <- vars[duplicated(vars)] |>
+      unique()
+    cli::cli_abort("vars {.strongVar {dupvars}} are duplicated.")
   }
 
+  if (verbose) {
+    cli::cli_alert_info("Variables to test: {.strongVar {vars}}")
+  }
 
-  if (is.null(exclude) && !is.null(include)) {
+  if (length(vars) == 0) {
     if (verbose) {
-      cli::cli_alert_warning("Since {.code exclude} is NULL, {.code include}
-                             directive does not make sense. Ignored.",
-                             wrap = TRUE)
+      cli::cli_alert_danger("No variables available after applying
+                            include/exclude filters.")
     }
-    warning("inconsistent use of `include`")
+    return(NULL) # should this be an error?
   }
 
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ## exclude variables from check ------
+  # Unique values ---------
 
-  vars    <- names(dt)
+  # Sort variables by number of unique values (ascending order)
+  unique_counts <- vapply(dt[, ..vars], fnunique, numeric(1))
+  vars          <- vars[order(unique_counts)]
+  unique_counts <- unique_counts[order(unique_counts)]
+  n_rows        <- fnrow(dt)
+  init_index    <- 0
 
-  ### Exclude variable according to their type ---------
-  if (!is.null(include)) {
 
-    # Find position of variable to include
-    ii <- which(names(dt) %in% include)
+  # Initialize list to store possible IDs
+  possible_ids_list <- vector("list", max_numb_possible_ids)
 
-  } else {
+  checked_ids <- vars |>
+    copy()
 
-    ii <- NULL
+  if (min_combination_size == 1) {
+    unique_ids    <- vars[unique_counts == n_rows]
+    # Add individual unique variables
+    init_index <- length(unique_ids)
+    if (init_index > 0) {
+      possible_ids_list[1:init_index] <- as.list(unique_ids)
+      if (verbose) {
+        cli::cli_alert_info("Found unique identifiers: {.code {unique_ids}}")
+      }
+      if (!get_all) {
+        ret_list <- store_checked_ids(checked_ids,
+                                      possible_ids_list)
+        return(ret_list)
+      }
 
+      # Remove unique identifiers from vars to reduce combinations
+      vars <- setdiff(vars, unique_ids)
+      if (length(vars) == 0) {
+        # All variables are unique identifiers
+        ret_list <- store_checked_ids(checked_ids,
+                                      possible_ids_list)
+        return(ret_list)
+      }
+      unique_counts <- unique_counts[vars]
+    }
   }
 
-  ### Exclude variable by name ---------
-  if (!is.null(exclude)) {
+  # combinations -----------
 
-    if (any(grepl("^_", exclude))) {
+  # Start testing combinations
+  start_time   <- Sys.time()
+  min_size     <- max(min_combination_size, 2)
+  max_size     <- min(length(vars), max_combination_size)
+  elapsed_time <- 0
 
-      type_ex <- exclude[grepl("^_", exclude)]
-      vars_ex <- exclude[!grepl("^_", exclude)]
+  # where there is only one variable or not enough vars to combine
+  if (min_size > max_size) {
+    if (verbose) {
+      cli::cli_alert_warning(
+        "Can't make combinations of {.field {vars}} if the min number of
+        combinations is {min_size} and the max is {max_size}")
+    }
 
-      type_ex <- match.arg(type_ex, c("_character", "_numeric"))
+    cli::cli_abort("No unique identifier found.")
+  }
 
-      # find variable that meet criteria and exclude them, making sure to include
-      # the variables of the user.
-      ex <- gsub("^_", "", type_ex)
-      FUN <- paste0("is.", ex)
+  j <- init_index + 1
+  for (comb_size in min_size:max_size) {
 
-      n_cols <- unlist(lapply(dt, FUN))
-      n_cols[ii] <- FALSE
+    # make sure length of vars is >= comb_size
+    if (length(vars) < comb_size) {
+      next
+      # or break
+    }
 
-      # Exclude variables by name
+    combos <- utils::combn(vars, comb_size, simplify = FALSE)
 
-      if (length(vars_ex) > 0) {
-        ex         <-which(names(dt) %in% vars_ex)
-        n_cols[ex] <- TRUE
-      }
+    # Prune combinations where the product of unique counts is less
+    # than n_rows
+    combos_to_keep <- vapply(combos,
+                             \(combo) {
+                               prod(unique_counts[combo]) >= n_rows
+                               },
+                             logical(1))
 
-      vars <- names(dt)[!n_cols]
+    combos <- combos[combos_to_keep]
 
-    } else {
-      vars <- vars[!(vars %in% exclude)]
+    # Estimate processing time and prune combinations
+    est_times <- vapply(combos,
+                        \(combo) {
+                          estimate_combination_time(n_rows,
+                                                    unique_counts[combo])
+                          },
+                        numeric(1))
 
-      if (identical(vars, names(dt))) {
-        if (verbose) {
-          cli::cli_alert_warning("Variable {.field {exclude}} is not available in data frame.
-                                Nothing is excluded.", wrap = TRUE)
+    if (verbose) {
+      cli::cli_progress_bar(
+        format = "combs of {cli::pb_extra$comb_size} vars: {cli::pb_bar} {cli::pb_percent} | ETA: {cli::pb_eta} | {cli::pb_current}/{cli::pb_total}",
+        extra = list(comb_size = comb_size),
+        total = length(combos))
+    }
+
+    for (combo in combos) {
+      # Check if the combination uniquely identifies the data
+      if (is_id(dt, by = combo, verbose = FALSE)) {
+        # This is inefficient... it is copying every time...
+        # I need to think better on how to do it.
+        possible_ids_list[[j]] <- combo
+
+        j <- j + 1
+
+        if (j > max_numb_possible_ids) {
+          if (verbose) {
+            cli::cli_alert_warning(
+            "Max number of possible IDs ({max_numb_possible_ids}) reached.
+            You may modify it in argument {.arg max_numb_possible_ids}")
+          }
+          ret_list <- store_checked_ids(checked_ids = checked_ids,
+                                        possible_ids = possible_ids_list)
+          return(ret_list)
         }
+        if (!get_all) {
 
-        warning("inconsistenty use of `exclude`")
+          ret_list <- store_checked_ids(checked_ids = checked_ids,
+                                        possible_ids = possible_ids_list)
+          return(ret_list)
 
+        }
+        # Remove variables in the current combo from vars to
+        # avoid redundant checks
+        vars <- setdiff(vars, combo)
+        unique_counts <- unique_counts[vars]
+
+        # Break since we found a minimal unique key of size i
+        if (!get_all) break
       }
+      # Check processing time
+      elapsed_time <- as.numeric(difftime(Sys.time(),
+                                          start_time,
+                                          units = "secs"))
+      if (elapsed_time > max_processing_time) {
+        if (verbose) {
+          mxt_msg <- "Maximum processing time exceeded.
+          modify {.arg max_processing_time} argument to increse time.
+          Stopping search."
+          cli::cli_alert_warning(mxt_msg)
+        }
+        break
+      }
+      if (verbose) cli::cli_progress_update()
+    }
 
+    # Break if all variables are used
+    if (length(vars) == 0 || elapsed_time > max_processing_time) {
+      break
     }
   }
 
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ## check all names are unieuq --------
-  dup_var <- duplicated(vars)
+  if (length(remove_null(possible_ids_list)) == 0) {
+    if (verbose) {
+      cli::cli_alert_warning("No unique identifier found.")
+    }
+  }
 
-  if (any(dup_var)) {
+  # ----------------------------- #
+  # Return ####
+  # ----------------------------- #
 
-    dvars <- vars[dup_var]
+  ret_list <- store_checked_ids(checked_ids = checked_ids,
+                                possible_ids = possible_ids_list)
 
-    msg     <- "column names must be unique"
-    hint    <- "try changing the names using {.fun make.names}"
-    problem <- "{.var {dvars}}  {?is/are}  duplicated"
-    cli::cli_abort(c(
-                  msg,
-                  i = hint,
-                  x = problem
-                  ))
+  return(ret_list)
+}
 
-  }
 
+filter_by_class <- function(dt, vars, include_classes, exclude_classes) {
+  # Compute the primary class of each variable
+  vars_class <- vapply(dt, function(x) class(x)[1], character(1))
+  names(vars_class) <- vars  # Ensure names are preserved
 
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  # Find duplicates   ---------
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  # Apply 'include_classes' filter
+  if (!is.null(include_classes)) {
+    vars <- vars[vars_class[vars] %in% include_classes]
+  }
 
-  duplicates <- is_id(dt, by = vars, verbose = FALSE)
-  if (duplicates) {
-    if (verbose) {
-      cli::cli_alert_success("There are no duplicates in data frame")
-    }
-  } else {
-    if (verbose) {
-      cli::cli_alert_warning("Data has duplicates. returning NULL")
+  # Apply 'exclude_classes' filter
+  if (!is.null(exclude_classes)) {
+    vars <- vars[!(vars_class[vars] %in% exclude_classes)]
+  }
+  vars
+}
+
+filter_by_name <- function(vars, include, exclude, verbose) {
+  # Apply 'exclude' filter
+  if (!is.null(exclude)) {
+    wno_exc <- which(!exclude %in% vars) # which not excluded
+    if (length(wno_exc) > 0 & verbose) {
+      no_exc <- exclude[wno_exc]
+      cli::cli_alert_warning("var{?s} {.var {no_exc}} not found in dataframe")
     }
-    is_id(dt, by = vars, verbose = TRUE)
-    return(NULL)
+    vars <- setdiff(vars, exclude)
   }
+  # Apply 'include' filter
 
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  # Find ids   ---------
-  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  nvars <- length(vars)
+  c(vars,
+    setdiff(include, vars))
 
-  found <- FALSE
-  i = 0
-  while(i < nvars && found == FALSE) {
-    i = i + 1
-    cm <- utils::combn(vars, m = i)
+}
 
-    lcm <- dim(cm)[2]  # number of combinations of size j
 
-    selected_vars <- vector(length = lcm)
-    for (j in 1:lcm) {
-      tvars <- cm[, j] # testing vars
-      selected_vars[j] <- is_id(dt, by = tvars, verbose = FALSE)
-    }
 
-    sv <- which(selected_vars)
+# Function to estimate processing time based on unique counts
+estimate_combination_time <- function(n_rows, unique_counts) {
+  # Simple estimation function
+  # Time is proportional to (product of unique counts) / n_rows
+  # Adjust the constant factor based on empirical observations
+  est_time <- (prod(unique_counts) / n_rows) * 0.0001
+  return(est_time)
+}
 
-    if (length(sv) > 0) {
 
-      if (length(sv) == 1 && i > 1) {
+remove_null <- \(x) {
+  y <- vapply(x, \(.) !is.null(.), logical(1))
+  x[y]
+}
 
-        lv <- list(V1 = cm[, sv])
 
-      } else if (i == 1) {
+#' store checked variables as possible ids
+#'
+#' This function processes a list of possible IDs by removing any `NULL` entries,
+#' storing a set of checked variables as an attribute and in the specified environment,
+#' and then returning the updated list of possible IDs.
+#'
+#' @param checked_ids A vector of variable names that have been checked as possible IDs.
+#' @param possible_ids A list containing potential identifiers. This list may contain `NULL` values, which will be removed by the function.
+#' @param env An environment where the `checked_ids` will be stored. The default is `.joynenv`.
+#'
+#' @return A list of possible IDs with `NULL` values removed, and the `checked_ids` stored as an attribute.
+#'
+#'
+#' @keywords internal
+store_checked_ids <- function(checked_ids,
+                              possible_ids,
+                              env = .joynenv) {
 
-        ee <- as.data.frame(t(cm[, sv]))
-        lv <- lapply(ee, unique)
+  # Remove null from possible ids
+  possible_ids <- remove_null(possible_ids)
 
-      } else {
+  # Store checked_ids in environment
+  rlang::env_poke(env   = env,
+                  nm    = "checked_ids",
+                  value = checked_ids)
 
-        ee <- as.data.frame(cm[, sv])
-        lv <- lapply(ee, unique)
+  # Store attribute
+  attr(possible_ids,
+       "checked_ids") <- checked_ids
 
-      }
+  # Return
+  return(possible_ids)
 
-      found <- TRUE
+}
+
+#' Create variables that uniquely identify rows in a data table
+#'
+#' This function generates unique identifier columns for a given number of rows, based on the specified number of identifier variables.
+#'
+#' @param n_rows An integer specifying the number of rows in the data table for which unique identifiers need to be generated.
+#' @param n_ids An integer specifying the number of identifiers to be created. If `n_ids` is 1, a simple sequence of unique IDs is created. If greater than 1, a combination of IDs is generated.
+#' @param prefix A character string specifying the prefix for the identifier variable names (default is `"id"`).
+#'
+#' @return A named list where each element is a vector representing a unique identifier column. The number of elements in the list corresponds to the number of identifier variables (`n_ids`). The length of each element is equal to `n_rows`.
+#'
+#'
+#' @keywords internal
+create_ids <- function(n_rows, n_ids, prefix = "id") {
+
+  vars <- vector("list",
+                 n_ids)
+
+  # If n_ids is 1, simply generate a sequence of IDs
+  if (n_ids == 1) {
+    vars[[1]] <- seq_len(n_rows)
+    names(vars)[1] <- paste0(prefix, 1)
+
+    return(vars)
+  } else {
 
+    # Get max unique values each variable can have
+    max_vals <- ceiling(n_rows^(1 / n_ids))
+
+    # Generate a sequence of unique identifiers
+
+    all_ids <- expand.grid(rep(list(seq_len(max_vals)),
+                               n_ids))
+
+    #collapse::fnrow faster?
+
+    if (fnrow(all_ids) > n_rows) {
+      # Randomly sample the unique combinations
+      all_ids <- all_ids[sample(fnrow(all_ids),
+                                    n_rows), ]
     }
-  }
 
-  if (verbose) {
-    cli::cli_alert("we found {length(lv)} possible id{?s}")
-  }
+    # Store each unique identifier in the vars list
+    for (i in seq_len(n_ids)) {
+      vars[[i]] <- all_ids[[i]]
+    }
 
-  return(lv)
+    names(vars) <- paste0(prefix,
+                          seq_len(n_ids))
+
+    return(vars)
+  }
 
 }
+
diff --git a/R/zzz.R b/R/zzz.R
index 49d3b773..f222b498 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -141,6 +141,13 @@ set_joyn_options <- function(...,
 
 }
 
+# ------------------------------------
+# -- Define global variables --
+# ------------------------------------
+
+utils::globalVariables(c("..byvar",
+                         "..vars"))
+
 # ------------------------------------------------------------------------------------------
 # Define custom .strong {cli} classes to emphasize messages subcomponents
 # --- to be used when creating/storing {joyn} messages
diff --git a/man/create_ids.Rd b/man/create_ids.Rd
new file mode 100644
index 00000000..2197685a
--- /dev/null
+++ b/man/create_ids.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/possible_ids.R
+\name{create_ids}
+\alias{create_ids}
+\title{Create variables that uniquely identify rows in a data table}
+\usage{
+create_ids(n_rows, n_ids, prefix = "id")
+}
+\arguments{
+\item{n_rows}{An integer specifying the number of rows in the data table for which unique identifiers need to be generated.}
+
+\item{n_ids}{An integer specifying the number of identifiers to be created. If \code{n_ids} is 1, a simple sequence of unique IDs is created. If greater than 1, a combination of IDs is generated.}
+
+\item{prefix}{A character string specifying the prefix for the identifier variable names (default is \code{"id"}).}
+}
+\value{
+A named list where each element is a vector representing a unique identifier column. The number of elements in the list corresponds to the number of identifier variables (\code{n_ids}). The length of each element is equal to \code{n_rows}.
+}
+\description{
+This function generates unique identifier columns for a given number of rows, based on the specified number of identifier variables.
+}
+\keyword{internal}
diff --git a/man/freq_table.Rd b/man/freq_table.Rd
index 6be47f0c..9ed5cc34 100644
--- a/man/freq_table.Rd
+++ b/man/freq_table.Rd
@@ -4,7 +4,7 @@
 \alias{freq_table}
 \title{Tabulate simple frequencies}
 \usage{
-freq_table(x, byvar, digits = 1, na.rm = FALSE)
+freq_table(x, byvar, digits = 1, na.rm = FALSE, freq_var_name = "n")
 }
 \arguments{
 \item{x}{data frame}
@@ -14,6 +14,8 @@ freq_table(x, byvar, digits = 1, na.rm = FALSE)
 \item{digits}{numeric: number of decimal places to display. Default is 1.}
 
 \item{na.rm}{logical: report NA values in frequencies. Default is FALSE.}
+
+\item{freq_var_name}{character: name for frequency variable. Default is "n"}
 }
 \value{
 data.table with frequencies.
diff --git a/man/is_id.Rd b/man/is_id.Rd
index fce181e9..2e5e2051 100644
--- a/man/is_id.Rd
+++ b/man/is_id.Rd
@@ -4,7 +4,12 @@
 \alias{is_id}
 \title{Check if dt is uniquely identified by \code{by} variable}
 \usage{
-is_id(dt, by, verbose = getOption("joyn.verbose"), return_report = FALSE)
+is_id(
+  dt,
+  by,
+  verbose = getOption("joyn.verbose", default = FALSE),
+  return_report = FALSE
+)
 }
 \arguments{
 \item{dt}{either right of left table}
diff --git a/man/possible_ids.Rd b/man/possible_ids.Rd
index 3d284b31..484e261c 100644
--- a/man/possible_ids.Rd
+++ b/man/possible_ids.Rd
@@ -6,30 +6,76 @@
 \usage{
 possible_ids(
   dt,
+  vars = NULL,
   exclude = NULL,
   include = NULL,
-  verbose = getOption("possible_ids.verbose")
+  exclude_classes = NULL,
+  include_classes = NULL,
+  verbose = getOption("possible_ids.verbose", default = FALSE),
+  min_combination_size = 1,
+  max_combination_size = 5,
+  max_processing_time = 60,
+  max_numb_possible_ids = 100,
+  get_all = FALSE
 )
 }
 \arguments{
 \item{dt}{data frame}
 
-\item{exclude}{character: Exclude variables to be selected as identifiers. It
-could be either the name of the variables of one type of the variable
-prefixed by "_". For instance, "_numeric" or "_character".}
+\item{vars}{character: A vector of variable names to consider for identifying unique combinations.}
+
+\item{exclude}{character: Names of variables to exclude from analysis}
 
 \item{include}{character: Name of variable to be included, that might belong
 to the group excluded in the \code{exclude}}
 
+\item{exclude_classes}{character: classes to exclude from analysis (e.g.,
+"numeric", "integer", "date")}
+
+\item{include_classes}{character: classes to include in the analysis (e.g.,
+"numeric", "integer", "date")}
+
 \item{verbose}{logical: If FALSE no message will be displayed. Default is
 TRUE}
+
+\item{min_combination_size}{numeric: Min number of combinations. Default is
+1, so all combinations.}
+
+\item{max_combination_size}{numeric. Max number of combinations. Default is
+5. If there is a combinations of identifiers larger than
+\code{max_combination_size}, they won't be found}
+
+\item{max_processing_time}{numeric: Max time to process in seconds. After
+that, it returns what it found.}
+
+\item{max_numb_possible_ids}{numeric: Max number of possible IDs to find. See
+details.}
+
+\item{get_all}{logical: get all possible combinations based on the parameters
+above.}
 }
 \value{
 list with possible identifiers
 }
 \description{
-Identify possible variables uniquely identifying x
+Identify possible combinations of variables that  uniquely identifying dt
+}
+\section{Number of possible IDs}{
+
+
+The number of possible IDs in a dataframe could be very large. This is why,
+\code{possible_ids()} makes use of heuristics to return something useful without
+wasting the time of the user. In addition, we provide multiple parameter so
+that the user can fine tune their search for possible IDs easily and
+quickly.
+
+Say for instance that you have a dataframe with 10 variables. Testing every
+possible pair of variables will give you 90 possible unique identifiers for
+this dataframe. If you want to test all the possible IDs, you will have to
+test more 5000 combinations. If the dataframe has many rows, it may take a
+while.
 }
+
 \examples{
 library(data.table)
 x4 = data.table(id1 = c(1, 1, 2, 3, 3),
diff --git a/man/store_checked_ids.Rd b/man/store_checked_ids.Rd
new file mode 100644
index 00000000..b0471724
--- /dev/null
+++ b/man/store_checked_ids.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/possible_ids.R
+\name{store_checked_ids}
+\alias{store_checked_ids}
+\title{store checked variables as possible ids}
+\usage{
+store_checked_ids(checked_ids, possible_ids, env = .joynenv)
+}
+\arguments{
+\item{checked_ids}{A vector of variable names that have been checked as possible IDs.}
+
+\item{possible_ids}{A list containing potential identifiers. This list may contain \code{NULL} values, which will be removed by the function.}
+
+\item{env}{An environment where the \code{checked_ids} will be stored. The default is \code{.joynenv}.}
+}
+\value{
+A list of possible IDs with \code{NULL} values removed, and the \code{checked_ids} stored as an attribute.
+}
+\description{
+This function processes a list of possible IDs by removing any \code{NULL} entries,
+storing a set of checked variables as an attribute and in the specified environment,
+and then returning the updated list of possible IDs.
+}
+\keyword{internal}
diff --git a/tests/testthat/test-freq_table.R b/tests/testthat/test-freq_table.R
index f579699b..d5467b71 100644
--- a/tests/testthat/test-freq_table.R
+++ b/tests/testthat/test-freq_table.R
@@ -67,9 +67,7 @@ test_that("correct totals", {
 
   tr <- nrow(y4)
 
-  j <- freq_table(y4, "id2")
-  j <- freq_table(y4, "id2")
-  j |>
+  freq_table(y4, "id2") |>
     fsubset(id2 == "total") |>
     fselect(n) |>
     reg_elem() |>
diff --git a/tests/testthat/test-is_id.R b/tests/testthat/test-is_id.R
index 3205ccdd..332c5a2a 100644
--- a/tests/testthat/test-is_id.R
+++ b/tests/testthat/test-is_id.R
@@ -75,10 +75,13 @@ test_that("returns correct report table", {
       id = c("c", "b", "c", "a"),
       y = c(11L, 15L, 18L, 20L)
     )
-  j <- is_id(y, by = "id", return_report = TRUE)
+  j <- is_id(y, by = "id", return_report = TRUE) |>
+    roworder(by = "id")
 
-  r <- data.table(id = c("c", "b", "a"),
-                  copies = c(2L, 1L, 1L))
+  r <- data.table(id = c("c", "b", "a", "total"),
+                  copies = c(2L, 1L, 1L, 4L),
+                  percent = c("50%", "25%", "25%", "100%")) |>
+    roworder(by = "id")
 
   expect_equal(j, r)
 
diff --git a/tests/testthat/test-possible_ids.R b/tests/testthat/test-possible_ids.R
index ccf68847..4d9041ec 100644
--- a/tests/testthat/test-possible_ids.R
+++ b/tests/testthat/test-possible_ids.R
@@ -1,3 +1,5 @@
+
+# PREPARATION ####
 withr::local_options(joyn.verbose = FALSE)
 library(data.table)
 # options(possible_ids.verbose = FALSE)
@@ -38,12 +40,234 @@ y4 = data.table(id  = c(1, 2, 5, 6, 3),
                 y   = c(11L, 15L, 20L, 13L, 10L),
                 x   = c(16:20))
 
+# Auxiliary data: Big data table--------------------
+
+# Set seed for reproducibility
+set.seed(123)
+
+# Number of rows and variables
+n_rows <- 1e4        # 10,000 rows
+n_vars <- 50         # Total variables
+
+# Initialize an empty data.table
+dt_large <- data.table(id = 1:n_rows)
+
+## Manually create three variables that uniquely identify the data ####
+dt_large[, unique_id1 := rep(1:10, each = 1000)]  # 1000 unique values repeated 100 times
+dt_large[, unique_id2 := sample(letters, n_rows, replace = TRUE)]  # Random character variable
+dt_large[, unique_id3 := sample(1:1000, n_rows, replace = TRUE)]   # Random integer
+
+# Function to generate random data
+generate_random_data <- function(n, type) {
+  switch(type,
+         "numeric_int" = sample(1:1e6, n, replace = TRUE),
+         "numeric_double" = rnorm(n),
+         "character" = replicate(n, paste0(sample(letters, 5, replace = TRUE), collapse = "")),
+         "factor" = factor(sample(letters[1:10], n, replace = TRUE)),
+         "logical" = sample(c(TRUE, FALSE), n, replace = TRUE),
+         "date" = as.Date("2000-01-01") + sample(0:3650, n, replace = TRUE),
+         "datetime" = as.POSIXct("2000-01-01") + sample(0:(3650*24*60*60), n, replace = TRUE)
+  )
+}
+
+# Variable types and counts
+var_types <- c("numeric_int", "numeric_double", "character", "factor", "logical", "date", "datetime")
+vars_per_type <- c(10, 10, 10, 10, 5, 3, 2)  # Total should sum to 50
+
+# Generate variables and add to the data.table
+var_count <- 0
+for (i in seq_along(var_types)) {
+  type <- var_types[i]
+  n_vars_type <- vars_per_type[i]
+  for (j in 1:n_vars_type) {
+    var_count <- var_count + 1
+    var_name <- paste0(type, "_", j)
+    dt_large[, (var_name) := generate_random_data(n_rows, type)]
+  }
+}
+
+## Introduce duplicates in some columns that are NOT the unique identifiers ####
+# For example, we can duplicate the first 100 rows in the "numeric_int_1" and "character_1" columns
+# dt_large <- rbind(dt_large, dt_large[1:100, .(numeric_int_1, character_1)])
+
+# Shuffle the data to avoid ordered data
+dt_large <- dt_large[sample(.N)]
+
+
+
+# dt_large[, id := .I]
+dt <- copy(dt_large)
+
+
+possible_ids(
+  dt = dt_large,
+  exclude_classes = c("numeric"),
+  verbose = TRUE
+)
+
+possible_ids(
+  dt = dt_large,
+  exclude_classes = c("numeric"),
+  exclude = "id",
+  verbose = TRUE
+)
+
+uniq_vars <- grep("unique_id", names(dt_large), value = TRUE)
+pids <- possible_ids(
+  dt = dt_large,
+  #exclude_classes = c("logical", "date", "datetime", "numeric"),
+  exclude = "id",
+  #vars = uniq_vars,
+  verbose = TRUE,
+  min_combination_size = 3,
+  # max_combination_size = 3,
+  max_processing_time = 240,
+  get_all = TRUE
+)
+
+possible_ids(
+  dt = dt_large,
+  verbose = TRUE
+)
+
+## Remove the 'id' column to simulate data without a clear unique identifier ####
+dt_large[, id := NULL]
+
+possible_ids_list <- possible_ids(
+  dt = dt_large,
+  exclude_classes = c("logical", "date", "datetime"),  # Exclude some types for efficiency
+  verbose = TRUE
+)
+possible_ids_list
+
+possible_ids_list <- possible_ids(
+  dt = dt_large,
+  exclude_classes = c("logical", "date", "datetime", "numeric"),  # Exclude some types for efficiency
+  max_processing_time = 120,
+  verbose = TRUE
+)
+possible_ids_list
+
+
+
+# TESTS ####
+## Test create ids --------------------- ####
+
+test_that("create_ids works as intended", {
+  df_test <- data.table(a = 1:50)
+
+  # with a single id
+  res <- as.data.frame(create_ids(n_rows = 50,
+                                  n_ids = 1))
+
+  nrow(unique(res)) |>
+    expect_equal(50)
+
+  class(res) |>
+    expect_equal("data.frame")
+
+
+  # ret class
+  res <- create_ids(n_rows = 50,
+                    n_ids = 1)
+
+  class(res) |>
+    expect_equal("list")
+
+  length(res) |>
+    expect_equal(1)
+
+  # prefix
+  create_ids(n_rows = 40, n_ids = 4, prefix = "unique_id") |>
+    names() |>
+    expect_equal(paste0("unique_id", 1:4))
+
+  # with more than an id
+  vars <- c("var1", "var2", "var3")
+
+  dt <- df_test |> copy()
+
+  res <- dt[, (vars) := create_ids(.N,
+                                        n_ids = 3)]
+
+  nrow(res[, .N,
+
+     by = vars][N > 1]) |>
+    expect_equal(0)
+
+  res <- df_test[, .(as.data.table(
+    create_ids(.N, n_ids = 3)
+    ))]
+
+  nrow(res[, .N,
+
+           by = c("id1", "id2", "id3")][N > 1]) |>
+    expect_equal(0)
+
+  dt <- dt_large |> copy()
+
+  new_ids <- as.data.table(create_ids(nrow(dt),
+                                      n_ids = 3,
+                                      prefix = "new_id"))
 
+  # Bind the new IDs to the existing data.table
+  dt <- cbind(dt, new_ids)
+
+
+})
+
+
+
+## Test possible_ids ------------------- ####
 test_that("convert to data.table", {
   xx1 <- as.data.frame(x1)
   expect_equal(possible_ids(x1), possible_ids(xx1))
 })
 
+
+test_that("store checked ids", {
+
+  res <- possible_ids(x4,
+                      get_all = TRUE)
+
+  attributes(res)$checked_ids |>
+    expect_setequal(names(x4))
+
+  .joynenv$checked_ids |>
+    expect_setequal(names(x4))
+
+  # with get_all = FALSE
+  res <- possible_ids(x4)
+
+  attributes(res)$checked_ids |>
+    expect_setequal(names(x4))
+
+  .joynenv$checked_ids |>
+    expect_setequal(names(x4))
+
+  # with large dt -get_all FALSE
+  res <- possible_ids(dt,
+                      vars = paste0("numeric_int_", 1:4))
+
+  attributes(res)$checked_ids |>
+    expect_setequal(paste0("numeric_int_", 1:4))
+
+  .joynenv$checked_ids |>
+    expect_setequal(paste0("numeric_int_", 1:4))
+
+  # with large dt -get_all TRUE
+  res <- possible_ids(dt,
+                      vars = paste0("numeric_int_", 1:4),
+                      get_all = TRUE)
+
+  attributes(res)$checked_ids |>
+    expect_setequal(paste0("numeric_int_", 1:4))
+
+  .joynenv$checked_ids |>
+    expect_setequal(paste0("numeric_int_", 1:4))
+
+})
+
 test_that("error if not dataframe", {
 
   m1 <- as.matrix(x1)
@@ -51,36 +275,319 @@ test_that("error if not dataframe", {
 
 })
 
-test_that("inconsistent user of `include`", {
+test_that("vars provided by user", {
+
+  # single var -raise error
+  possible_ids(x4,
+               vars = c("t")) |>
+    expect_error()
+
+  # one or more vars not included in dt
+  possible_ids(x4,
+               vars = c("id3", "id2")) |>
+    expect_error()
+
+  possible_ids(dt,
+               vars = c("id", "id3", "id2")) |>
+    expect_error()
+
+  possible_ids(dt,
+               vars = c("id", "numeric_int_1", "character_1"),
+               verbose = TRUE) |>
+    expect_no_error()
+
+  ids_dt <- possible_ids(dt)
+
+  vars <- c("id", "numeric_double_1", "numeric_double_2")
+
+  use_ids_dt <- possible_ids(dt,
+               vars = vars)
+
+  all(sapply(use_ids_dt,
+             function(x) { x %in% ids_dt })) |>
+    expect_equal(TRUE)
+
+  all(unlist(use_ids_dt) %in% vars) |>
+    expect_equal(TRUE)
+
+
+  # Check if the combination of unique_id1, unique_id2, and unique_id3 uniquely identifies rows
+  vars <- c("id", "unique_id2", "unique_id3")
+
+  res <- dt[, .N,
+           by = vars][N > 1] |>
+    nrow()
+
+  # --if it does, possible_ids should return those vars
 
-  expect_warning(possible_ids(x1,
-                            include = "x"))
+  if (res == 0) {  # if no duplicate rows
+    possible_ids(dt,
+                 vars = vars,
+                 min_combination_size = 3) |>
+      unlist() |>
+      expect_setequal(vars)
+  }
 
 })
 
+test_that("relationship include and vars", {
+
+  possible_ids(x4,
+               vars = c("id1", "id2"),
+               include = c("t")) |>
+    expect_no_error()
+
+  possible_ids(dt,
+               vars = c("id", "unique_id2"),
+               include = c("id", "factor_1", "factor_2")) |>
+    expect_no_error()
+
+  possible_ids(x4,
+               vars = NULL,
+               include = c("t", "x")) |>
+    expect_no_error()
+
+  possible_ids(x4,
+               vars = c("t", "x"),
+               include = NULL) |>
+    expect_no_error()
+
+  # test checked vars are at least `vars` plus those in `include`
+  res <- possible_ids(x4,
+                      vars = c("id1", "id2", "t"),
+                      include = "t")
+
+  checked_ids <- attributes(res)$checked_ids
+
+  checked_ids |>
+    expect_setequal(c("id1", "id2", "t"))
+
+  res <- possible_ids(dt,
+                      vars = c("logical_1", "logical_2", "factor_1", "factor_2"),
+                      include = "unique_id1")
+
+  checked_ids <- attributes(res)$checked_ids
+
+  checked_ids |>
+    expect_setequal(c("logical_1", "logical_2", "factor_1", "factor_2", "unique_id1"))
+
+
+})
+
+test_that("relationship exclude and vars", {
+
+  possible_ids(x4,
+               vars = c("t", "x"),
+               exclude_classes = "character") |>
+    expect_message()
+
+  possible_ids(x4,
+               vars = c("id1", "x"),
+               exclude = "x") |>
+    expect_message()
+
+  possible_ids(dt,
+               vars = paste0("character_", 1:10),
+               exclude = c("character_1", "character_2")) |>
+    expect_message()
+  })
+
+# test_that("inconsistent use of `include`", {
+#
+#   #  expect_warning(possible_ids(x1,
+#   #                              include = "x"))
+#   #
+#   # possible_ids(x1,
+#   #              include = c("id", "x")) |>
+#   #   expect_no_error()
+#
+# })
+
 test_that("exclude and include", {
 
   dd <- possible_ids(x3,
-               exclude = "_numeric",
+               exclude_classes =  c("numeric", "integer"),
                include = "foo")
-  expect_equal(c("V1", "V2"), names(dd))
+
+  expect_equal(unlist(dd),
+               c("id", "foo"))
+
+  ## Test combination between include class and exclude vars ####
+
+  res <- possible_ids(dt,
+                               get_all = TRUE,
+                               include_classes     = c("integer"),
+                               exclude             = paste0("numeric_int_", 1:5))
+
+  checked_vars <- attributes(res)$checked_ids
+
+  any(
+    paste0("numeric_int_", 1:5) %in% checked_vars
+    ) |>
+    expect_equal(FALSE)
+
+  all(
+    paste0("numeric_int_", 6:10) %in% checked_vars
+  ) |>
+    expect_equal(TRUE)
+
+  ## Test combination between include vars and exclude class ####
+  res <- possible_ids(dt,
+                      include     = c("numeric_double_1",
+                                      "numeric_double_2"),
+                      exclude_classes = "numeric")
+
+  checked_vars <- attributes(res)$checked_ids
+
+  all(
+    paste0("numeric_double_", 1:2) %in% checked_vars
+  ) |>
+    expect_equal(TRUE)
+
+  any(
+    paste0("numeric_double_", 3:10) %in% checked_vars
+  ) |>
+    expect_equal(FALSE)
+
+  res_ids <- possible_ids(x2,
+                               include = "x",
+                               exclude_classes = "numeric")
+
+  res_ids |>
+    unlist() |>
+    expect_equal(c("t", "x"))
+
+  res_ids <- possible_ids(x3,
+                          include = "id",
+                          exclude_classes = "character")
+
+  res_ids |>
+    unlist() |>
+    expect_setequal(c("foo", "id", "v"))
+
+  # alert if include and exclude same class ####
+  possible_ids(dt,
+               include_classes = "numeric",
+               exclude_classes = "numeric") |>
+    expect_message()
+
+  # alert if include and exclude same vars ####
+  possible_ids(dt,
+               include = c("id", "unique_id1"),
+               exclude = c("id", "unique_id1")) |>
+    expect_message()
+
+  res <- possible_ids(dt,
+                      exclude_classes = c("integer"),
+                      include = c("numeric_int_1"))
+
+  attributes(res)$checked_ids |>
+    expect_setequal(setdiff(names(dt), c(paste0("numeric_int_", 2:10),
+                                         "id",
+                                         "unique_id1", "unique_id3")))
 
 })
 
 
-test_that("get NULL when duplicates", {
+test_that("get length 0 -error", {
 
-  expect_null(possible_ids(x1,
-                           exclude = "_numeric",
+  expect_error(possible_ids(x1,
+                           exclude_classes = c("numeric", "integer"),
                            include = "t"))
 
 })
 
+test_that("get all works", {
+
+  # no error
+  possible_ids(x4,
+               get_all = TRUE) |>
+    expect_no_error()
+
+  # get all with user selected vars
+  possible_ids(x4,
+               vars = c("id1", "t"),
+               get_all = TRUE) |>
+    expect_no_error()
+
+  # get all with max number of combinations
+  possible_ids(x4,
+               max_combination_size = 3,
+               get_all = TRUE) |>
+    expect_no_error()
+
+  # check get all combs
+  possible_ids(x3,
+               get_all = TRUE) |>
+    unlist() |>
+    expect_setequal(c("id", "v", "foo"))
+
+
+
+})
+
+
+test_that("Max combination size", {
+
+  res <- possible_ids(dt,
+               vars = c( "unique_id1", "unique_id2", "unique_id3",
+                        "character_1", "character_2", "character_3", "character_4"),
+               max_combination_size = 5)
+
+
+  sapply(res, function(sublist) {
+    length(sublist) <= 3}) |>
+    all() |>
+    expect_true()
+
+  res <- possible_ids(x1,
+                      get_all = TRUE,
+                      max_combination_size = 2)
+
+  sapply(res, function(sublist) {
+    length(sublist) <= 2}) |>
+    all() |>
+    expect_true()
+
+})
+
+test_that("Min combination size", {
+
+  res <- possible_ids(x4,
+               min_combination_size = 1,
+               get_all = FALSE) |>
+    unlist()
+
+  expect_true(
+    length(res) >= 1)
+
+  res <- possible_ids(dt,
+               min_combination_size = 3,
+               get_all = FALSE) |>
+    unlist()
+
+  expect_true(length(res) >= 3)
+
+
+  possible_ids(x4,
+               #min_combination_size = 1,
+               max_combination_size = 1) |>
+    expect_error()
+
+
+  possible_ids(x4,
+               min_combination_size = 3,
+               max_combination_size = 2) |>
+    expect_error()
+
+})
+
 
 test_that("Exclude nothing", {
+  p1 <- possible_ids(x1)
+  p2 <- possible_ids(x1, exclude = "rer")
 
-  expect_warning(possible_ids(x1,
-                              exclude = "rer"))
+  expect_equal(p1, p2)
 
 })
 
@@ -91,9 +598,10 @@ test_that("Exclude type and variable", {
 
   xx4[, id2 := as.character(id2)]
   dd <- possible_ids(xx4,
-               exclude = c("_character", "x"))
+               exclude_classes = c("character"),
+               exclude = "x")
 
-  expect_equal(c("id1", "t"), dd$V1)
+  expect_equal(c("id1", "t"), unlist(dd))
 
 })
 
@@ -104,7 +612,7 @@ test_that("Exclude more than one variable", {
   dd <- possible_ids(x4,
              exclude = c("id2", "x"))
 
-  expect_equal(c("id1", "t"), dd$V1)
+  expect_equal(c("id1", "t"), unlist(dd))
 
 })
 
@@ -117,4 +625,139 @@ test_that("duplicated names", {
 
 })
 
+test_that("identifies ids", {
+
+  vars <- c("var1", "var2", "var3")
+  dt[, (vars) := create_ids(.N, n_ids = 3)]
+
+  possible_ids(dt,
+               vars = vars) |>
+    unlist() |>
+    expect_equal(vars)
+
+  df_test <- as.data.frame(
+    create_ids(n_rows = 50,
+               n_ids = 3)
+  )
+
+  possible_ids(df_test,
+               vars = c("id1", "id2"),
+               include = "id3") |>
+    unlist() |>
+    expect_equal(c("id1", "id2", "id3"))
+
+  possible_ids(df_test,
+               exclude_classes = "integer",
+               include = c("id1", "id2", "id3")) |>
+    unlist() |>
+    expect_equal(c("id1", "id2", "id3"))
+
+})
 
+# Auxiliary data: Big data table--------------------
+
+# Set seed for reproducibility
+set.seed(123)
+
+# Number of rows and variables
+n_rows <- 1e4        # 10,000 rows
+n_vars <- 50         # Total variables
+
+# Initialize an empty data.table
+dt_large <- data.table(id = 1:n_rows)
+
+## Manually create three variables that uniquely identify the data ####
+dt_large[, unique_id1 := rep(1:10, each = 1000)]  # 1000 unique values repeated 100 times
+dt_large[, unique_id2 := sample(letters, n_rows, replace = TRUE)]  # Random character variable
+dt_large[, unique_id3 := sample(1:1000, n_rows, replace = TRUE)]   # Random integer
+
+# Function to generate random data
+generate_random_data <- function(n, type) {
+  switch(type,
+         "numeric_int" = sample(1:1e6, n, replace = TRUE),
+         "numeric_double" = rnorm(n),
+         "character" = replicate(n, paste0(sample(letters, 5, replace = TRUE), collapse = "")),
+         "factor" = factor(sample(letters[1:10], n, replace = TRUE)),
+         "logical" = sample(c(TRUE, FALSE), n, replace = TRUE),
+         "date" = as.Date("2000-01-01") + sample(0:3650, n, replace = TRUE),
+         "datetime" = as.POSIXct("2000-01-01") + sample(0:(3650*24*60*60), n, replace = TRUE)
+  )
+}
+
+# Variable types and counts
+var_types <- c("numeric_int", "numeric_double", "character", "factor", "logical", "date", "datetime")
+vars_per_type <- c(10, 10, 10, 10, 5, 3, 2)  # Total should sum to 50
+
+# Generate variables and add to the data.table
+var_count <- 0
+for (i in seq_along(var_types)) {
+  type <- var_types[i]
+  n_vars_type <- vars_per_type[i]
+  for (j in 1:n_vars_type) {
+    var_count <- var_count + 1
+    var_name <- paste0(type, "_", j)
+    dt_large[, (var_name) := generate_random_data(n_rows, type)]
+  }
+}
+
+## Introduce duplicates in some columns that are NOT the unique identifiers ####
+# For example, we can duplicate the first 100 rows in the "numeric_int_1" and "character_1" columns
+# dt_large <- rbind(dt_large, dt_large[1:100, .(numeric_int_1, character_1)])
+
+# Shuffle the data to avoid ordered data
+dt_large <- dt_large[sample(.N)]
+
+
+
+# dt_large[, id := .I]
+dt <- copy(dt_large)
+
+
+possible_ids(
+  dt = dt_large,
+  exclude_classes = c("numeric"),
+  verbose = TRUE
+)
+
+possible_ids(
+  dt = dt_large,
+  exclude_classes = c("numeric"),
+  exclude = "id",
+  verbose = TRUE
+)
+
+uniq_vars <- grep("unique_id", names(dt_large), value = TRUE)
+pids <- possible_ids(
+  dt = dt_large,
+  #exclude_classes = c("logical", "date", "datetime", "numeric"),
+  exclude = "id",
+  #vars = uniq_vars,
+  verbose = TRUE,
+  min_combination_size = 3,
+  # max_combination_size = 3,
+  max_processing_time = 240,
+  get_all = TRUE
+)
+
+possible_ids(
+  dt = dt_large,
+  verbose = TRUE
+)
+
+## Remove the 'id' column to simulate data without a clear unique identifier ####
+dt_large[, id := NULL]
+
+possible_ids_list <- possible_ids(
+  dt = dt_large,
+  exclude_classes = c("logical", "date", "datetime"),  # Exclude some types for efficiency
+  verbose = TRUE
+)
+possible_ids_list
+
+possible_ids_list <- possible_ids(
+  dt = dt_large,
+  exclude_classes = c("logical", "date", "datetime", "numeric"),  # Exclude some types for efficiency
+  max_processing_time = 120,
+  verbose = TRUE
+)
+possible_ids_list
diff --git a/vignettes/aux-functions.Rmd b/vignettes/aux-functions.Rmd
index 2eab35de..a668bb74 100644
--- a/vignettes/aux-functions.Rmd
+++ b/vignettes/aux-functions.Rmd
@@ -68,10 +68,10 @@ possible_ids(dt      = x1,
 possible_ids(dt      = x1, 
              exclude = "_character")
 
-# Identify possible unique identifiers, excluding character variables but considering variable z
+# Identify possible unique identifiers, excluding character variables but considering variable c1
 possible_ids(dt      = x1, 
-             exclude = "_character",
-             include = "z")
+             exclude_classes = "character",
+             include = "c1")
 
 ```