Skip to content
This repository has been archived by the owner on Nov 17, 2022. It is now read-only.

Commit

Permalink
Merge pull request #14 from inbo/chunkbased
Browse files Browse the repository at this point in the history
Chunkbased
  • Loading branch information
stijnvanhoey authored Aug 22, 2017
2 parents 41cd96b + f01aa0b commit 0ffdf83
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 29 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
inst/doc
UvaBitsWarehouse.Rproj
*.Rproj
install.R
instal*.R
vignettes/performance-optimisation_cache
draft/
9 changes: 5 additions & 4 deletions R/enrich.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ join_tracks_and_metadata <- function(tracking_data, bird_data) {
stop(msg)
}

# Prepare coorindates information
# Prepare coordinates information
bird_data[, colony_latitude := latitude]
bird_data[, colony_longitude := longitude]
bird_data[, latitude := NULL]
Expand Down Expand Up @@ -158,7 +158,7 @@ add_dist_travelled <- function(dt) {
distances <- distCosine(
cbind(dt$longitude, dt$latitude),
cbind(c(1, data.table::shift(dt$longitude)[-1]),
c(1, data.table::shift(dt$latitude)[-1])
c(1, data.table::shift(dt$latitude)[-1])
)
)
distances[!dt$tmp.select | is.na(dt$tmp.select)] <- NA
Expand Down Expand Up @@ -219,7 +219,7 @@ add_dist_to_colony <- function(dt) {
add_sunlight <- function(dt) {
results <- suncalc.custom(dt$date_time, dt$latitude, dt$longitude)
dt[, calc_sunlight := date_time > results$sunrise & date_time < results$sunset]
print(dt)
#print(dt)
}


Expand Down Expand Up @@ -308,7 +308,7 @@ join_raster_value_with_legend <- function(dt, legend) {
#' }
enrich_data <- function(tracking_data, bird_data, corine_raster_data, corine_legend) {
dt <- join_tracks_and_metadata(tracking_data, bird_data)
dt <- delete_test_records(dt)
dt <- delete_test_records(dt) # actually redundant due to date-based join
setkey(dt, device_info_serial, date_time) # will sort on those columns
add_year_month_hour(dt)
add_time_since_previous_fix(dt)
Expand All @@ -319,6 +319,7 @@ enrich_data <- function(tracking_data, bird_data, corine_raster_data, corine_leg
flag_outliers(dt)
raster_join(dt, corine_raster_data)
dt <- join_raster_value_with_legend(dt, corine_legend)
setkey(dt, device_info_serial, date_time) # will sort on those columns
setnames(dt, "calc_raster_value", "calc_corine_value")
setnames(dt, "calc_raster_legend", "calc_corine_legend")
return(dt)
Expand Down
51 changes: 27 additions & 24 deletions R/extract.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Check numeric values
#' @description Check whether values in given column can be converted to
#' numeric types. If not, this function will call stop().
#'
#'
#' @param colname Name of the column to be tested
#' @param col Column containing values to be tested
#' @return col if all values can be converted to numeric. Otherwise error.
Expand Down Expand Up @@ -38,7 +38,7 @@ load_tracks_file <- function(filename) {
#' Validate tracking data
#' @description Validate the data coming either from a csv file or
#' from the UvA-BiTS virtual lab directly.
#'
#'
#' @param tracks_data The tracking data as a data table
#' @return validated tracking data as a data table if no errors are found.
#' @export
Expand All @@ -50,25 +50,28 @@ validate_tracks_data <- function(tracks_data) {
issues <- c()
# set data types for non-character columns
nas_in_date_time <- sum(is.na(tracks_data$date_time))
tracks_data[, date_time:=lubridate::fast_strptime(date_time, "%Y-%m-%d %H:%M:%OS", lt=FALSE)]
tracks_data[, date_time := lubridate::fast_strptime(date_time,
"%Y-%m-%d %H:%M:%OS",
lt = FALSE)]
if (sum(is.na(tracks_data$date_time)) > nas_in_date_time) {
issues <- c(issues, "unparsable values found in column date_time")
}

# check whether columns can be converted to numeric
numeric_cols <- c("device_info_serial", "latitude", "longitude", "altitude", "pressure",
"temperature", "satellites_used", "gps_fixtime", "positiondop",
"h_accuracy", "v_accuracy", "x_speed", "y_speed", "z_speed",
"speed_accurracy", "speed_3d", "speed_2d", "direction", "altitude_agl")
numeric_cols <- c("device_info_serial", "latitude", "longitude", "altitude",
"pressure", "temperature", "satellites_used",
"gps_fixtime", "positiondop", "h_accuracy", "v_accuracy",
"x_speed", "y_speed", "z_speed", "speed_accurracy",
"speed_3d", "speed_2d", "direction", "altitude_agl")
lapply(numeric_cols, function(x) {
tryCatch({
check_numeric_values(x, tracks_data[[x]])
}, error=function(e) {
}, error = function(e) {
issues <<- append(issues, paste("non numeric values found in column ", x))
}
)
})

# convert to numeric columns
tracks_data[, device_info_serial:=as.numeric(device_info_serial)]
tracks_data[, latitude:=as.numeric(latitude)]
Expand All @@ -89,11 +92,11 @@ validate_tracks_data <- function(tracks_data) {
tracks_data[, speed_2d:=as.numeric(speed_2d)]
tracks_data[, direction:=as.numeric(direction)]
tracks_data[, altitude_agl:=as.numeric(altitude_agl)]

# drop unused columns
# ... location: this is a binary blob from a geometric data type. Cannot be used.
tracks_data[, location:=NULL]

if (length(issues) > 0) {
print(paste(issues, sep="\n"))
stop("Validation failed")
Expand All @@ -105,11 +108,11 @@ validate_tracks_data <- function(tracks_data) {
#' @description Load a file containing bird metadata. This file is managed at the
#' INBO on Google Drive. Create a csv export of that file, and make sure it is
#' "," delimited.
#'
#'
#' @param filename The name of the file containing bird metadata
#' @return A data table (not a data frame!) containing the bird metadata
#' @export
#' @examples
#' @examples
#' \dontrun{
#' load_bird_file(inputFile)
#' }
Expand All @@ -121,7 +124,7 @@ load_bird_file <- function(filename) {

#' Validate bird data
#' @description Validate the bird metadata
#'
#'
#' @param bird_data The bird metadata as a data table
#' @return validated bird metadata as a data table if no errors are found.
#' @export
Expand All @@ -141,16 +144,16 @@ validate_bird_data <- function(bird_data) {
if (sum(is.na(bird_data$tracking_ended_at)) > nas_in_tr_end_time) {
issues <- c(issues, "unparsable values found in column tracking_ended_at")
}

# convert logical columns to Logical
tmp_is_active <- as.factor(bird_data$is_active)
tryCatch({
levels(tmp_is_active) <- c("TRUE", "FALSE")
levels(tmp_is_active) <- c("TRUE", "FALSE")
}, error=function(e) {
issues <<- append(issues, paste("could not parse boolean values from column is_active"))
})
bird_data[, is_active:=as.logical(tmp_is_active)]

# convert enumeration columns to factors
# note that the allowed choices are saved as package data in 'data/'
bird_data[, sex:=as.factor(sex)]
Expand All @@ -167,7 +170,7 @@ validate_bird_data <- function(bird_data) {
)
)
}

# check whether columns can be converted to numeric
numeric_cols <- c("device_info_serial", "catch_weight", "latitude", "longitude")
lapply(numeric_cols, function(x) {
Expand All @@ -178,13 +181,13 @@ validate_bird_data <- function(bird_data) {
}
)
})

# convert to numeric columns
bird_data[, device_info_serial:=as.numeric(device_info_serial)]
bird_data[, catch_weight:=as.numeric(catch_weight)]
bird_data[, latitude:=as.numeric(latitude)]
bird_data[, longitude:=as.numeric(longitude)]

if (length(issues) > 0) {
print(paste(issues, sep="\n"))
stop("Validation failed")
Expand All @@ -196,8 +199,8 @@ validate_bird_data <- function(bird_data) {
#' Read raster data
#' @description Read raster data using the raster package. By default
#' this function will set the CRS of this data to EPSG4326 (WGS 84).
#' Use the data.CRS parameter to override this.
#'
#' Use the data.CRS parameter to override this.
#'
#' @param filename Name of the file containing raster data
#' @param data.CRS Coordinate Reference System of the data
#' @return raster data as RasterLayer class
Expand All @@ -217,7 +220,7 @@ read_raster_data <- function(filename, data.CRS="+init=epsg:4326") {
#' @description Read the raster legend. The legend is expected to contain two columns:
#' `id` containing the actual values used in the raster layer, and `value` which contains
#' the labels.
#'
#'
#' @param filename Name of the csv file containing the raster legend
#' @return a data table containing the raster legend
#' @export
Expand Down

0 comments on commit 0ffdf83

Please sign in to comment.