Skip to content

Commit

Permalink
implement load_function kwarg for collect_results! (#424)
Browse files Browse the repository at this point in the history
The changes in this branch are a follow up from a previous pull request
based on commit 6e6ff07 in PR #421. In that PR there were issues
with whitespace changes inadvertantly coming from the autoformatter
in vscode. Reverting the whitespace only changes proved to be more
difficult than anticicpated.

So to resolve this, this branch was created and a new PR will be created
from it. The whitespace issues are gone but all the feedback and changes
from the original PR are retained.

The commit makes the following changes.
 - add the `load_function` kwarg to `collect_results`. This allows
customizing how data is loaded from file before being processed into a
dataframe by `collect_results`.
 - add a test to `update_result_tests.jl`
 - update docstring of `collect_results`
 - increase package version to 2.16.0
 - update `CHANGELOG.md`

All tests passed, 589 of 589.
  • Loading branch information
NuclearPowerNerd authored Aug 23, 2024
1 parent 09e1029 commit ac0fe7f
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 2.16.0

- Add `load_function` keyword argument to `collect_results` to customize how data is loaded from file before being converted to a dataframe by `collect_results`

# 2.15.0

- Add `wload_kwargs` to `produce_or_load` to allow passing kwargs to `wload`
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DrWatson"
uuid = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
repo = "https://github.com/JuliaDynamics/DrWatson.jl.git"
version = "2.15.0"
version = "2.16.0"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down
17 changes: 9 additions & 8 deletions src/result_collection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ See also [`collect_results`](@ref).
* `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file.
* `special_list = []`: List of additional (derived) key-value pairs
to put in `df` as explained below.
* `load_function = wload`: Load function. Defaults to `wload`. You may want to specify a custom load function for example if you store results as a struct and you want the fields of the struct to form the columns of the dataframe. The struct is saved to file as a one-element dictionary so the dataframe will only have a single column. To work around this you could convert it to a dictionary by specifying `load_function = (filename) -> struct2dict(wload(filename)["mykey"])`. This way `collect_results` will receive a `Dict` whose keys are the fields of the struct.
`special_list` is a `Vector` where each entry
is a derived quantity to be included in `df`. There are two types of entries.
Expand Down Expand Up @@ -90,6 +91,7 @@ function collect_results!(filename, folder;
newfile = false, # keyword only for defining collect_results without !
rinclude = [r""],
rexclude = [r"^\b$"],
load_function = wload,
kwargs...)

@assert all(eltype(r) <: Regex for r in (rinclude, rexclude)) "Elements of `rinclude` and `rexclude` must be Regex expressions."
Expand All @@ -100,7 +102,7 @@ function collect_results!(filename, folder;
mtimes = Dict{String,Float64}()
else
verbose && @info "Loading existing result collection..."
data = wload(filename)
data = load_function(filename)
df = data["df"]
# Check if we have pre-recorded mtimes (if not this could be because of an old results database).
if "mtime" keys(data)
Expand Down Expand Up @@ -170,7 +172,7 @@ function collect_results!(filename, folder;
mtimes[file] = mtime_file

fpath = rpath === nothing ? file : joinpath(rpath, file)
df_new = to_data_row(FileIO.query(fpath); kwargs...)
df_new = to_data_row(FileIO.query(fpath); load_function=load_function, kwargs...)
#add filename
df_new[!, :path] .= file
if replace_entry
Expand Down Expand Up @@ -231,18 +233,17 @@ is_valid_file(file, valid_filetypes) =
any(endswith(file, v) for v in valid_filetypes)

# Use wload per default when nothing else is available
function to_data_row(file::File; kwargs...)
function to_data_row(file::File; load_function=wload, kwargs...)
fpath = filename(file)
@debug "Opening $(filename(file)) with fallback wload."
return to_data_row(wload(fpath), fpath; kwargs...)
return to_data_row(load_function(fpath), fpath; kwargs...)
end
# Specialize for JLD2 files, can do much faster mmapped access
function to_data_row(file::File{format"JLD2"}; kwargs...)
function to_data_row(file::File{format"JLD2"}; load_function=(filename) -> JLD2.jldopen(filename, "r"), kwargs...)
fpath = filename(file)
@debug "Opening $(filename(file)) with jldopen."
JLD2.jldopen(filename(file), "r") do data
return to_data_row(data, fpath; kwargs...)
end
data = load_function(fpath)
return to_data_row(data, fpath; kwargs...)
end
function to_data_row(data, file;
white_list = collect(keys(data)),
Expand Down
16 changes: 16 additions & 0 deletions test/update_results_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,22 @@ cres_relpath = collect_results!(relpathname, folder;
rpath = projectdir())
@info all(startswith.(cres[!,"path"], "data"))

struct dummy
a::Float64
b::Int64
c::Matrix{Float64}
end
_dummy_matrix = rand(3,3)
_dummy = dummy(1.0, 1, _dummy_matrix)
wsave(datadir("dummy.jld2"), "dummy", _dummy)

actual_dataframe = collect_results(datadir(), rinclude=[r"dummy.jld2"], load_function=(filename) -> struct2dict(wload(filename)["dummy"]))
_dataframe_vector = Vector{Union{Missing, Matrix{Float64}}}(undef, 1)
_dataframe_vector[1] = _dummy_matrix
expected_dataframe = DataFrame(a = 1.0, b = 1, c = _dataframe_vector, path = datadir("dummy.jld2"))

@test actual_dataframe == expected_dataframe

###############################################################################
# Trailing slash in foldername #
###############################################################################
Expand Down

0 comments on commit ac0fe7f

Please sign in to comment.