InseeFr · khaledlarbi · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,104 @@
+# Workflow derived from https://github.com/r-wasm/actions/tree/v1/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    # Only build on main or master branch
+    branches: [main, master]
+  # Or when triggered manually
+  workflow_dispatch: {}
+
+name: R WASM & {pkgdown} deploy
+
+jobs:
+  rwasmbuild:
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: r-wasm-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      # Build the local R package and structure the CRAN repository
+      - name: Build WASM R packages
+        uses: r-wasm/actions/build-rwasm@v1
+        with:
+          packages: "."
+          repo-path: "_site"
+
+      # Upload the CRAN repository for use in the next step
+      # Make sure to set a retention day to avoid running into a cap
+      - name: Upload build artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: rwasmrepo
+          path: |
+            _site
+          retention-days: 1
+
+  pkgdown:
+    runs-on: ubuntu-latest
+    # Add a dependency on the prior job completing
+    needs: rwasmbuild
+    # Required for the gh-pages deployment action
+    environment:
+      name: github-pages
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      # To download GitHub Packages within action
+      repository-projects: read
+      # For publishing to pages environment
+      pages: write
+      id-token: write
+    steps:
+      # Usual steps for generating a pkgdown website
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+          needs: website
+      # Change the build directory from `docs` to `_site`
+      # For parity with where the R WASM package repository is setup
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, dest_dir = "_site")
+        shell: Rscript {0}
+
+      # New material ---
+
+      # Download the built R WASM CRAN repository from the prior step.
+      # Extract it into the `_site` directory
+      - name: Download build artifact
+        uses: actions/download-artifact@v3
+        with:
+          name: rwasmrepo
+          path: _site
+
+      # Upload a tar file that will work with GitHub Pages
+      # Make sure to set a retention day to avoid running into a cap
+      # This artifact shouldn't be required after deployment onto pages was a success.
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v2
+        with: 
+          retention-days: 1
+
+      # Use an Action deploy to push the artifact onto GitHub Pages
+      # This requires the `Action` tab being structured to allow for deployment
+      # instead of using `docs/` or the `gh-pages` branch of the repository
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v2
diff --git a/.github/workflows/webr-pkgdown-build-and-deploy.yml b/.github/workflows/webr-pkgdown-build-and-deploy.yml
@@ -0,0 +1,107 @@
+# Workflow derived from https://github.com/r-wasm/actions/tree/v1/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    # Only build on main or master branch
+    branches: '**'
+  # Or when triggered manually
+  workflow_dispatch: {}
+
+name: R WASM & {pkgdown} deploy
+
+jobs:
+  rwasmbuild:
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: r-wasm-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      # Build the local R package and structure the CRAN repository
+      - name: Build WASM R packages
+        uses: r-wasm/actions/build-rwasm@v1
+        with:
+          packages: "."
+          repo-path: "_site"
+
+      # Upload the CRAN repository for use in the next step
+      # Make sure to set a retention day to avoid running into a cap
+      - name: Upload build artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: rwasmrepo
+          path: |
+            _site
+          retention-days: 1
+
+  pkgdown:
+    runs-on: ubuntu-latest
+    # Add a dependency on the prior job completing
+    needs: rwasmbuild
+    # Required for the gh-pages deployment action
+    environment:
+      name: github-pages
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      # To download GitHub Packages within action
+      repository-projects: read
+      # For publishing to pages environment
+      pages: write
+      id-token: write
+    steps:
+      # Usual steps for generating a pkgdown website
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+          needs: website
+      # Change the build directory from `docs` to `_site`
+      # For parity with where the R WASM package repository is setup
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, dest_dir = "_site")
+        shell: Rscript {0}
+
+      # New material ---
+
+      # Download the built R WASM CRAN repository from the prior step.
+      # Extract it into the `_site` directory
+      - name: Download build artifact
+        uses: actions/download-artifact@v3
+        with:
+          name: rwasmrepo
+          path: _site
+
+      # Upload a tar file that will work with GitHub Pages
+      # Make sure to set a retention day to avoid running into a cap
+      # This artifact shouldn't be required after deployment onto pages was a success.
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v2
+        with: 
+          retention-days: 1
+
+      # Use an Action deploy to push the artifact onto GitHub Pages
+      # This requires the `Action` tab being structured to allow for deployment
+      # instead of using `docs/` or the `gh-pages` branch of the repository
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v2
+
+
+
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,7 +3,14 @@ Type: Package
 Title: A User-Oriented Statistical Toolkit for Analytical Variance Estimation
 Depends: R(>= 3.2.5)
 Imports: methods, utils, stats, Matrix
-Suggests: testthat, sampling, magrittr, tibble, dplyr, data.table
+Suggests: 
+    testthat,
+    sampling,
+    magrittr,
+    tibble,
+    dplyr,
+    data.table,
+    torch
 Version: 1.0.0
 Authors@R: c(
   person("Martin", "Chevalier", role = "aut", email = "mc@slmc.fr", comment = "Creator"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(add_zero)
+export(auto_statistic_function)
 export(define_statistic_wrapper)
 export(define_variance_wrapper)
 export(qvar)

diff --git a/R/define_statistic_wrapper.R b/R/define_statistic_wrapper.R
@@ -301,3 +301,138 @@ standard_display <- function(point, var, metadata, alpha){
 standard_display <- change_enclosing(standard_display, globalenv())
 
 
+
+
+#' Define a function that computes statistics without the need to provide linearized variables.
+#' @description Creates a function that computes statistics without the need to provide linearized variables.
+#' The estimated linearized variables are computed via autodifferentiation based
+#' on \code{torch}.
+#'
+#' @param fn A function describing totals function to be estimated. All arguments from \code{fn} must be in \code{arg_type}. 
+#' @param arg_type A list specifying the arguments for the created function, 
+#' including \code{data}, \code{weight}, and optionally \code{param}.
+#'
+#' @return A function that computes the estimated totals, applies 
+#' the \code{fn} function, and returns a list with three elements:
+#' \describe{
+#'   \item{point}{The point estimate, as a numeric value.}
+#'   \item{lin}{The linearized variable, defined as the dot product between the gradient 
+#'   and the data.}
+#'   \item{n}{Number of non-missing values used to estimate totals.}
+#' }
+#' 
+#' @details \code{fn} describes the function applied to the estimated totals. This function takes weighted totals as input, 
+#' so weights do not need to be provided to \code{fn}.
+#' @export
+#'
+#' @examples
+#' ratio_autolin <- define_statistic_wrapper(
+#'   statistic_function = auto_statistic_function(function(x, y) {return(x / y)}, arg_type),
+#'   arg_type = arg_type
+#' )
+
+auto_statistic_function <- function(fn, arg_type){
+  # The auto_statistic_function creates a function that meets the criteria of the statistic_function
+  # argument in gustave::define_statistic_wrapper. It is based on the fn function,
+  # which describes the total function the user wishes to estimate.
+  # 
+  # From this fn function, we create a fn_tensored function that takes the elements specified
+  # in arg_type as arguments. This fn_tensored function computes the estimated totals, applies
+  # the fn function, and calculates the gradient of the fn function with respect to the data 
+  # (variables specified in arg_type$data).
+
+  #Check if `torch` is installed.
+  if (!requireNamespace("torch", quietly = TRUE)) {
+    stop("The 'torch' package is required for this function.
+         Please install it using install.packages('torch') and check that 
+         torch files have been download using torch::install_torch().")
+  }
+
+  #Check if `fn` is a function
+  if(!is.function(fn)){
+    stop("`fn` must be a function.")
+  }
+
+  #Check if `arg_type` is a list
+  if(!is.list(arg_type)){
+    stop("`arg_type` must be a list.")
+  }
+
+  #Check that all arguments from `fn` (except weights) are defined in `arg_type`
+  if(!identical(setdiff(sort(unname(unlist(arg_type))), arg_type$weight),
+                sort((methods::formalArgs(fn))))){
+    stop("Argument names from `fn` must be in `arg_type`.")
+  } else {
+    # Reorder to ensure that the resulting function from `auto_statistic_function`
+    # has arguments in the same order as `fn`, even if the elements in `arg_type` are sorted differently.
+    formalargs <- (methods::formalArgs(fn))
+    arg_type$data <- intersect(formalargs, arg_type$data)
+    arg_type$param <- intersect(formalargs, arg_type$param)
+  }
+
+
+  #Define an empty function
+  fn_tensored <- function() NULL
+  #Change formal arguments
+  formals(fn_tensored) <- stats::setNames(vector("list", length(unlist(arg_type))), unlist(arg_type))
+  #Change function body
+  body(fn_tensored) <- quote({
+    data_list <- mget(arg_type$data)
+    #Compute total estimation for each variable in arg_type$data
+    tot <- lapply(X = data_list, FUN = function(x){sum(x*get(arg_type$weight))})
+
+    #Check if some totals are not numerical value (missing, NULL, ...)
+    tot_not_numerical <- (sapply(X = tot, FUN = function(x){!is.numeric(x)}))
+
+    if(sum(tot_not_numerical) > 1){
+      var_not_numerical <- paste(arg_type$data[tot_not_numerical], collapse = " ")
+      stop(paste0("Some variables provide a non-numerical weighted total : ", var_not_numerical))
+    }
+    #Transform it into (torch) tensors with requires_grad at TRUE:
+    #That allows us to get the gradient of the point estimation
+    #with respect to those variables
+    tot_tensored <- lapply(X = tot, FUN = function(x){torch::torch_tensor(data = x,
+                                                                          requires_grad = TRUE)})
+    #Get arguments for point estimation.
+    #In some case, arg_type$param = NULL then mget(arg_type$param) raises an error.
+    #In order to avoid error, we add a condition on is.null(arg_type$param)
+    args_for_point_estimation <- tot_tensored
+
+    if(!is.null(arg_type$param)){
+      args_for_point_estimation <- c(args_for_point_estimation,
+                                     mget(arg_type$param))
+    }
+
+    #Compute point estimation.
+    point <- do.call(what = fn, 
+                     args = args_for_point_estimation)
+
+    if(is.vector(point) || is.list(point)){
+      if(!is.numeric(point)){
+        stop("Results from `fn` must be a numerical vector of size 1.")
+      }
+    } 
+
+
+    #Compute the gradient of `fn` with respect to `data` inputs
+    torch::autograd_backward(point)
+    #Get gradients as matrix
+    jac <- do.call(what = rbind,
+                   args = lapply(X = tot_tensored, FUN = function(x){as.numeric(x$grad)}))
+    #Get data as matrix
+    data <- do.call(what = rbind,
+                    args = data_list)
+    #(Estimated) Linearization variable for unit i is defined as <grad, data_i>
+    #where <,> denotes for the canonical dot product and data_i is the i-th row
+    #corresponding to data from the i-th unit. 
+    lin <- t(data) %*% jac
+    #A list with two named elements `point` and `lin` is returned 
+    #as required for the `statistic_function` argument of `gustave::define_statistic_wrapper`.
+    n <- min(sapply(X = data_list, FUN = function(variable){sum(!is.na(variable))}))
+    res <- list(point = as.numeric(point), lin = lin, n = n)
+    return(res)
+  })
+  #Return result
+  return(fn_tensored)
+}
+
diff --git a/doc/colloque sondages/.gitkeep b/doc/colloque sondages/.gitkeep
diff --git a/doc/colloque sondages/Article/.gitkeep b/doc/colloque sondages/Article/.gitkeep
diff --git a/doc/colloque sondages/Article/article.pdf b/doc/colloque sondages/Article/article.pdf
diff --git a/doc/colloque sondages/Article/gustave_colloque_sondages.aux b/doc/colloque sondages/Article/gustave_colloque_sondages.aux
diff --git a/doc/colloque sondages/Article/gustave_colloque_sondages.dvi b/doc/colloque sondages/Article/gustave_colloque_sondages.dvi