Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding conversion utility for .shp to .rds #76

Merged
merged 4 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ Imports:
furrr,
parallel,
logger,
tidyverse
tidyverse,
here
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
55 changes: 55 additions & 0 deletions tests/testthat/format_conversion_util_test.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
library(testthat)
library(dplyr)
library(purrr)
library(sf)
library(future)
library(furrr)

# Source the utility file with filter_range_data() definition
# source("utility/format_conversion_util.R")
source("utility/format_conversion_util.R") # Path to the utility script

# Test for filter_range_data()
test_that("filter_range_data() filters correctly based on realm", {

# Mock data
range_data <- data.frame(
presence = c(1, 1, 2, 1, 1),
origin = c(1, 2, 1, 3, 2),
seasonal = c(1, 2, 3, 1, 2),
terrestial = c("true", "false", "true", "true", "true"), #codespell:ignore terrestial
marine = c("false", "true", "false", "false", "true")
)

# Test for terrestrial realm
result <- filter_range_data(range_data, "terrestial") #codespell:ignore terrestial
expect_equal(nrow(result), 2)

# Test for marine realm
result <- filter_range_data(range_data, "marine")
expect_equal(nrow(result), 2)

# Test for invalid realm
result <- filter_range_data(range_data, "unknown")
expect_equal(nrow(result), 3)
})


# Test for clean_results()
test_that("clean_results() cleans and combines correctly", {

# Mock data
res <- list(
species1 = c(1, 2, 3),
species2 = NULL,
species3 = c(4, 5),
species1 = c(6, 7)
)

# Test cleaning and combining
result <- clean_results(res)
expect_type(result, "list")
expect_equal(names(result), c("species1", "species3"))
expect_equal(result$species1, c(1, 2, 3, 6, 7))
expect_equal(result$species3, c(4, 5))
})
104 changes: 104 additions & 0 deletions utility/format_conversion_util.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
library(sf)
library(terra)
library(dplyr)
library(future)
library(furrr)
library(here)
library(stars)
library(logger)


#' Prepare Species Range Data by Intersecting with Grid
#'
#' This function filters species range data based on presence,
#' origin, and seasonal criteria.
#' It intersects the species ranges with a specified grid, and
#' returns the overlapping grid cells. The results are saved as an RDS file.
#'
#' @param range_data A data frame containing species range data with geometry.
#' @param grid A spatial grid object (e.g., sf object).
#' @param realm A character string,
#' indicating whether the species is ("terrestrial" or "marine").
#' @param use_parallel A boolean,
#' indicating whether to use parallel processing (default is TRUE).
#' @param output_file A character string,
#' indicating the file path to save the results as an RDS.
#' @return The function returns the saved result that is also stored in the output RDS file path.
prepare_range_data_from_shp_file <- function(input_file_path, grid, realm, use_parallel = TRUE,
number_of_workers = availableCores() - 1, rds_output_file_path = "gridded_ranges.rds") {

log_info(paste("Reading the Input .shp File at path: ", input_file_path))
range_data <- st_read(here(input_file_path))

# Set Up Parallel Processing
if (use_parallel) {
plan("multisession", workers = number_of_workers)
} else {
plan("sequential")
}

# 1. Filter Range Data
log_info("Filtering Range Data...")
range_filtered <- filter_range_data(range_data, realm)

# 2. Perform Grid Intersections
log_info("Perform Grid Intersections...")
res <- intersect_ranges_with_grid(range_filtered, grid)

# 3. Assign scientific names as names of the list elements
names(res) <- range_filtered$sci_name

# 4. Clean and Combine Results
log_info("Clean Results...")
res_final <- clean_results(res)

# 5. Save the results to an RDS file
saveRDS(res_final, rds_output_file_path)

# 5. Return the final result
return(res_final)
}


# Helper Function: Filter Range Data Based on Realm
filter_range_data <- function(range_data, realm) {
log_info("Filtering Range Data Based on Realm...")
range_filtered <- range_data %>%
dplyr::filter(presence == 1, origin %in% c(1, 2), seasonal %in% c(1, 2)) %>%
dplyr::filter(
if (realm == "terrestial") terrestial == "true" #codespell:ignore terrestial
else if (realm == "marine") marine == "true"
else TRUE
)

return(range_filtered)
}


# Helper Function: Intersect Range Data with Grid
intersect_ranges_with_grid <- function(range_filtered, grid) {

intersected_data <- future_map(st_geometry(range_filtered), possibly(function(x) {
y <- st_intersects(x, grid)
y <- unlist(y)
y <- grid %>%
slice(y) %>%
pull(world_id)
return(y)
}, otherwise = NULL), .progress = TRUE)
return(intersected_data)
}

# Helper Function: Clean and Combine Results
clean_results <- function(res) {

# Remove NULL results
res <- discard(res, is.null)

# Combine elements with the same name
unlisted_res <- unlist(res, use.names = FALSE)
repeated_names <- rep(names(res), lengths(res))
res <- tapply(unlisted_res, repeated_names, FUN = c)

return(res)
}
Loading