Skip to content

Commit

Permalink
revise version for publish
Browse files Browse the repository at this point in the history
  • Loading branch information
marcosdanieldasilva committed Nov 7, 2024
1 parent 59c1e25 commit 653bfc2
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 185 deletions.
21 changes: 21 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ authors = ["Marcos Daniel da Silva <marcosdasilva@5a.tec.br> and contributors"]
version = "1.0.0-DEV"

[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Expand All @@ -13,6 +15,7 @@ HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
Kaleido_jll = "f7e6163d-2fa5-5f23-b69c-1db539e41963"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
PlotlyKaleido = "f2990250-8cf9-495f-b13a-cce12b45703c"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
RecipesPipeline = "01d81517-befc-4cb6-b9ec-a95719d0359c"
Expand All @@ -23,6 +26,24 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
BenchmarkTools = "1.5.0"
CSV = "0.10.15"
CategoricalArrays = "0.10.8"
ColorTypes = "0.11.5"
DataFrames = "1.7.0"
Distributions = "0.25.113"
GLM = "1.9.0"
HypothesisTests = "0.11.3"
Kaleido_jll = "0.1.0"
LinearAlgebra = "1.11.0"
PlotlyJS = "0.18.14"
PlotlyKaleido = "2.2.5"
Plots = "1.40.8"
RecipesPipeline = "0.6.12"
Reexport = "1.2.2"
StatsBase = "0.34.3"
StatsModels = "0.7.4"
Tables = "1.12.0"
julia = "1.6.7"

[extras]
Expand Down
117 changes: 58 additions & 59 deletions src/ForestMensuration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,69 +13,68 @@ Performs complex calculations with simple commands.
Offers a user-friendly and intuitive interface.
"""
module ForestMensuration
using
CategoricalArrays,
ColorTypes,
DataFrames,
Distributions,
GLM,
HypothesisTests,
LinearAlgebra,
PlotlyJS,
Plots,
Plots.PlotMeasures,
RecipesPipeline,
Reexport,
StatsBase,
StatsModels
Tables
using
CategoricalArrays,
ColorTypes,
DataFrames,
Distributions,
GLM,
HypothesisTests,
LinearAlgebra,
PlotlyJS,
Plots,
Plots.PlotMeasures,
RecipesPipeline,
Reexport,
StatsBase,
StatsModels,
Tables

import Plots: cgrad
import StatsBase: fit, Histogram
import Plots: cgrad
import StatsBase: fit, Histogram

import StatsModels: asgn, missing_omit, Schema, TableRegressionModel
import StatsModels: asgn, missing_omit, Schema, TableRegressionModel

@reexport using GLM
@reexport using GLM

include("structs-consts.jl")
include("linear-regression.jl")
include("multiple-linear-regression.jl")
include("prediction.jl")
include("regression-parameters.jl")
include("criteria-functions.jl")
include("plot_regression.jl")
include("frequency-tables.jl")
include("dendrometric-averages.jl")
include("cubage.jl")
include("inventory-report.jl")
include("simple-casual-sampling.jl")
include("site-classification.jl")
include("show.jl")
# include("graph-analysis.jl")
include("structs_consts.jl")
include("linear_regression.jl")
include("prediction.jl")
include("regression_parameters.jl")
include("criteria_functions.jl")
include("plot_regression.jl")
include("frequency_tables.jl")
include("dendrometric_averages.jl")
include("cubage.jl")
include("inventory_report.jl")
include("simple_casual_sampling.jl")
include("site_classification.jl")
include("show.jl")
# include("graph-analysis.jl")

export
# Regression
TableRegressionModel,
regression,
prediction,
prediction!,
criteria_table,
criteria_selection,
dendrometric_averages,
plot_regression,
#Cubage
cubage,
Smalian,
Huber,
Newton,
# Frequency functions
diametric_table,
frequency_table,
# Site classification
hdom_classification,
site_classification,
site_table,
# Forest Inventory
simple_casual_sampling
export
# Regression
TableRegressionModel,
regression,
prediction,
prediction!,
criteria_table,
criteria_selection,
dendrometric_averages,
plot_regression,
#Cubage
cubage,
Smalian,
Huber,
Newton,
# Frequency functions
diametric_table,
frequency_table,
# Site classification
hdom_classification,
site_classification,
site_table,
# Forest Inventory
simple_casual_sampling

end
34 changes: 17 additions & 17 deletions src/criteria-functions.jl → src/criteria_functions.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function StatsBase.nulldeviance(x::Vector{<:Real})
mean_x = mean(x)
mean_x = Distributions.mean(x)
out = Vector{Float64}(undef, length(x))
@inbounds @simd for i in eachindex(x)
out[i] = abs2(x[i] - mean_x)
Expand All @@ -10,7 +10,7 @@ end
@inline p_result(test::HypothesisTests.HypothesisTest) = pvalue(test) > 0.05 ? true : false

# Function to calculate various statistical criteria for evaluating regression models
function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
function _criteria_parameters(model::TableRegressionModel)::Matrix{Float64}
# Number of observations in the model
n = nobs(model)
# Degrees of freedom for residuals
Expand All @@ -26,9 +26,9 @@ function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
# Calculate the Root Mean Squared Error (RMSE)
RMSE = (devi / n)
# Calculate the Mean Absolute Error (MAE)
MAE = mean(abs.(residual))
MAE = Distributions.mean(abs.(residual))
# Standard error of the estimate (Syx) expressed as a percentage of the mean of y
syx_in_percentage = (devi / dof_resid) / mean(y) * 100
syx_in_percentage = (devi / dof_resid) / Distributions.mean(y) * 100
# Coefficient of determination (R²)
r_2 = 1 - devi / nulldeviance(y)
# Adjusted R²: adjusted for the number of predictors in the model
Expand All @@ -44,8 +44,8 @@ function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
else
ApproximateOneSampleKSTest(HypothesisTests.ksstats(residual, fit_mle(Normal, residual))...) |> p_result
end
catch
false
catch
false
end
# Test for coefficient significance
cc = coef(model)
Expand All @@ -61,13 +61,13 @@ function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
end

function _calculate_ranks(ct::DataFrame, selected_criteria::Vector{Symbol})

n = size(ct, 1)
ranks = Dict()

# Calculate ranks for each criterion
if :adjr2 in selected_criteria
ranks[:adjr2] = competerank(ct[!, "adjr2"], rev = true)
ranks[:adjr2] = competerank(ct[!, "adjr2"], rev=true)
end
if :syx in selected_criteria
ranks[:syx] = competerank(ct[!, "syx"])
Expand All @@ -83,17 +83,17 @@ function _calculate_ranks(ct::DataFrame, selected_criteria::Vector{Symbol})
end
if :normality in selected_criteria
# Penalize non-normal models with a higher rank
normality_ranks = competerank(ct[!, "normality"], rev = true)
normality_ranks = competerank(ct[!, "normality"], rev=true)
penalized_non_normal_ranks = [normality_ranks[i] == 1 ? 1 : normality_ranks[i] * n for i in 1:n]
ranks[:normality] = penalized_non_normal_ranks
end
if :significance in selected_criteria
# Penalize non-significance models with a higher rank
significance_ranks = competerank(ct[!, "significance"], rev = true)
significance_ranks = competerank(ct[!, "significance"], rev=true)
penalized_non_significance_ranks = [significance_ranks[i] == 1 ? 1 : significance_ranks[i] * n for i in 1:n]
ranks[:significance] = penalized_non_significance_ranks
end

# Combine ranks into a single score
combined_rank = sum([ranks[crit] for crit in selected_criteria])

Expand Down Expand Up @@ -140,7 +140,7 @@ The `criteria_table` function evaluates and ranks multiple regression models bas
`criteria_table([model1, model2], :aic, :mae)`
"""
function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol...; best::Union{Bool, Int}=10) :: DataFrame
function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol...; best::Union{Bool,Int}=10)::DataFrame

allowed_fields = [:adjr2, :syx, :rmse, :mae, :aic, :normality, :significance]

Expand All @@ -167,7 +167,7 @@ function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol.

# Insert the model objects into the DataFrame
insertcols!(ct, 1, "model" => model)

# Combine ranks into a single score
combined_rank = _calculate_ranks(ct, selected_criteria)

Expand All @@ -183,14 +183,14 @@ function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol.
elseif best < length(model)
# If 'best' is less than the number of models, return the top 'best' models
top_models = ct[1:best, 1]
return criteria_table(top_models, criteria...; best = false) # Re-run with selected models
return criteria_table(top_models, criteria...; best=false) # Re-run with selected models
else
# Otherwise, return the full DataFrame
return ct
end
end

@inline criteria_table(model::TableRegressionModel, criteria::Symbol...) :: DataFrame = criteria_table([model], criteria...)
@inline criteria_table(model::TableRegressionModel, criteria::Symbol...)::DataFrame = criteria_table([model], criteria...)

"""
The `criteria_selection` function evaluates and ranks a vector of regression models based on specified criteria, returning the best model according to the combined ranking.
Expand All @@ -216,4 +216,4 @@ The `criteria_selection` function evaluates and ranks a vector of regression mod
- `TableRegressionModel`:
The best model based on the combined ranking of the specified criteria.
"""
@inline criteria_selection(model::Vector{<:TableRegressionModel}, criteria::Symbol...) :: TableRegressionModel = criteria_table(model, criteria..., best = 5)[1, 1]
@inline criteria_selection(model::Vector{<:TableRegressionModel}, criteria::Symbol...)::TableRegressionModel = criteria_table(model, criteria..., best=5)[1, 1]
8 changes: 4 additions & 4 deletions src/dendrometric-averages.jl → src/dendrometric_averages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ function dendrometric_averages(d::Vector; area::Real=1.0) :: DataFrame
d̅, s = mean_and_std(d)
d₋, d₊ =- s, d̅ + s
g = basal_area.(d)
dg = ((40000 * mean(g)) / π)
dw = quantile(d, 0.6)
dz = ((40000 * median(g)) / π)
dg = ((40000 * Distributions.mean(g)) / π)
dw = Distributions.quantile(d, 0.6)
dz = ((40000 * Distributions.median(g)) / π)
n_tree = round(Int, (100 * area))
d₁₀₀ = n_tree < length(d) ? mean(partialsort(d, 1:n_tree, rev = true)) : NaN64
d₁₀₀ = n_tree < length(d) ? Distributions.mean(partialsort(d, 1:n_tree, rev = true)) : NaN64
return DataFrame(d₋ = d₋, d̅ = d̅, dg = dg, dw = dw, dz = dz, d₁₀₀ = d₁₀₀, d₊ = d₊)
end

Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 653bfc2

Please sign in to comment.