revise version for publish

marcosdanieldasilva · Nov 7, 2024 · 653bfc2 · 653bfc2
1 parent 59c1e25
commit 653bfc2
Show file tree

Hide file tree

Showing 13 changed files with 172 additions and 185 deletions.
diff --git a/Project.toml b/Project.toml
@@ -4,6 +4,8 @@ authors = ["Marcos Daniel da Silva <marcosdasilva@5a.tec.br> and contributors"]
 version = "1.0.0-DEV"
 
 [deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -13,6 +15,7 @@ HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
 Kaleido_jll = "f7e6163d-2fa5-5f23-b69c-1db539e41963"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
+PlotlyKaleido = "f2990250-8cf9-495f-b13a-cce12b45703c"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
 RecipesPipeline = "01d81517-befc-4cb6-b9ec-a95719d0359c"
@@ -23,6 +26,24 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
+BenchmarkTools = "1.5.0"
+CSV = "0.10.15"
+CategoricalArrays = "0.10.8"
+ColorTypes = "0.11.5"
+DataFrames = "1.7.0"
+Distributions = "0.25.113"
+GLM = "1.9.0"
+HypothesisTests = "0.11.3"
+Kaleido_jll = "0.1.0"
+LinearAlgebra = "1.11.0"
+PlotlyJS = "0.18.14"
+PlotlyKaleido = "2.2.5"
+Plots = "1.40.8"
+RecipesPipeline = "0.6.12"
+Reexport = "1.2.2"
+StatsBase = "0.34.3"
+StatsModels = "0.7.4"
+Tables = "1.12.0"
 julia = "1.6.7"
 
 [extras]

diff --git a/src/ForestMensuration.jl b/src/ForestMensuration.jl
@@ -13,69 +13,68 @@ Performs complex calculations with simple commands.
 Offers a user-friendly and intuitive interface.
 """
 module ForestMensuration
-  using
-    CategoricalArrays,
-    ColorTypes,
-    DataFrames,
-    Distributions,
-    GLM,
-    HypothesisTests,
-    LinearAlgebra,
-    PlotlyJS,
-    Plots,
-    Plots.PlotMeasures,
-    RecipesPipeline,
-    Reexport,
-    StatsBase,
-    StatsModels
-    Tables
+using
+  CategoricalArrays,
+  ColorTypes,
+  DataFrames,
+  Distributions,
+  GLM,
+  HypothesisTests,
+  LinearAlgebra,
+  PlotlyJS,
+  Plots,
+  Plots.PlotMeasures,
+  RecipesPipeline,
+  Reexport,
+  StatsBase,
+  StatsModels,
+  Tables
 
-  import Plots: cgrad
-  import StatsBase: fit, Histogram
+import Plots: cgrad
+import StatsBase: fit, Histogram
 
-  import StatsModels: asgn, missing_omit, Schema, TableRegressionModel
+import StatsModels: asgn, missing_omit, Schema, TableRegressionModel
 
-  @reexport using GLM
+@reexport using GLM
 
-  include("structs-consts.jl")
-  include("linear-regression.jl")
-  include("multiple-linear-regression.jl")
-  include("prediction.jl")
-  include("regression-parameters.jl")
-  include("criteria-functions.jl")
-  include("plot_regression.jl")
-  include("frequency-tables.jl")
-  include("dendrometric-averages.jl")
-  include("cubage.jl")
-  include("inventory-report.jl")
-  include("simple-casual-sampling.jl")
-  include("site-classification.jl")
-  include("show.jl")
-  # include("graph-analysis.jl")
+include("structs_consts.jl")
+include("linear_regression.jl")
+include("prediction.jl")
+include("regression_parameters.jl")
+include("criteria_functions.jl")
+include("plot_regression.jl")
+include("frequency_tables.jl")
+include("dendrometric_averages.jl")
+include("cubage.jl")
+include("inventory_report.jl")
+include("simple_casual_sampling.jl")
+include("site_classification.jl")
+include("show.jl")
+# include("graph-analysis.jl")
 
-  export
-    # Regression
-    TableRegressionModel,
-    regression,
-    prediction,
-    prediction!,
-    criteria_table,
-    criteria_selection,
-    dendrometric_averages,
-    plot_regression,
-    #Cubage
-    cubage,
-    Smalian,
-    Huber,
-    Newton,
-    # Frequency functions
-    diametric_table,
-    frequency_table,
-    # Site classification
-    hdom_classification,
-    site_classification,
-    site_table,
-    # Forest Inventory
-    simple_casual_sampling
+export
+  # Regression
+  TableRegressionModel,
+  regression,
+  prediction,
+  prediction!,
+  criteria_table,
+  criteria_selection,
+  dendrometric_averages,
+  plot_regression,
+  #Cubage
+  cubage,
+  Smalian,
+  Huber,
+  Newton,
+  # Frequency functions
+  diametric_table,
+  frequency_table,
+  # Site classification
+  hdom_classification,
+  site_classification,
+  site_table,
+  # Forest Inventory
+  simple_casual_sampling
 
 end
diff --git a/src/criteria-functions.jl → src/criteria_functions.jl b/src/criteria-functions.jl → src/criteria_functions.jl
@@ -1,5 +1,5 @@
 function StatsBase.nulldeviance(x::Vector{<:Real})
-  mean_x = mean(x)
+  mean_x = Distributions.mean(x)
   out = Vector{Float64}(undef, length(x))
   @inbounds @simd for i in eachindex(x)
     out[i] = abs2(x[i] - mean_x)
@@ -10,7 +10,7 @@ end
 @inline p_result(test::HypothesisTests.HypothesisTest) = pvalue(test) > 0.05 ? true : false
 
 # Function to calculate various statistical criteria for evaluating regression models
-function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
+function _criteria_parameters(model::TableRegressionModel)::Matrix{Float64}
   # Number of observations in the model
   n = nobs(model)
   # Degrees of freedom for residuals
@@ -26,9 +26,9 @@ function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
   # Calculate the Root Mean Squared Error (RMSE)
   RMSE = √(devi / n)
   # Calculate the Mean Absolute Error (MAE)
-  MAE = mean(abs.(residual))
+  MAE = Distributions.mean(abs.(residual))
   # Standard error of the estimate (Syx) expressed as a percentage of the mean of y
-  syx_in_percentage = √(devi / dof_resid) / mean(y) * 100
+  syx_in_percentage = √(devi / dof_resid) / Distributions.mean(y) * 100
   # Coefficient of determination (R²)
   r_2 = 1 - devi / nulldeviance(y)
   # Adjusted R²: adjusted for the number of predictors in the model
@@ -44,8 +44,8 @@ function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
     else
       ApproximateOneSampleKSTest(HypothesisTests.ksstats(residual, fit_mle(Normal, residual))...) |> p_result
     end
-    catch
-      false
+  catch
+    false
   end
   # Test for coefficient significance
   cc = coef(model)
@@ -61,13 +61,13 @@ function _criteria_parameters(model::TableRegressionModel) :: Matrix{Float64}
 end
 
 function _calculate_ranks(ct::DataFrame, selected_criteria::Vector{Symbol})
-  
+
   n = size(ct, 1)
   ranks = Dict()
-  
+
   # Calculate ranks for each criterion
   if :adjr2 in selected_criteria
-    ranks[:adjr2] = competerank(ct[!, "adjr2"], rev = true)
+    ranks[:adjr2] = competerank(ct[!, "adjr2"], rev=true)
   end
   if :syx in selected_criteria
     ranks[:syx] = competerank(ct[!, "syx"])
@@ -83,17 +83,17 @@ function _calculate_ranks(ct::DataFrame, selected_criteria::Vector{Symbol})
   end
   if :normality in selected_criteria
     # Penalize non-normal models with a higher rank
-    normality_ranks = competerank(ct[!, "normality"], rev = true)
+    normality_ranks = competerank(ct[!, "normality"], rev=true)
     penalized_non_normal_ranks = [normality_ranks[i] == 1 ? 1 : normality_ranks[i] * n for i in 1:n]
     ranks[:normality] = penalized_non_normal_ranks
   end
   if :significance in selected_criteria
     # Penalize non-significance models with a higher rank
-    significance_ranks = competerank(ct[!, "significance"], rev = true)
+    significance_ranks = competerank(ct[!, "significance"], rev=true)
     penalized_non_significance_ranks = [significance_ranks[i] == 1 ? 1 : significance_ranks[i] * n for i in 1:n]
     ranks[:significance] = penalized_non_significance_ranks
   end
-  
+
   # Combine ranks into a single score
   combined_rank = sum([ranks[crit] for crit in selected_criteria])
 
@@ -140,7 +140,7 @@ The `criteria_table` function evaluates and ranks multiple regression models bas
   `criteria_table([model1, model2], :aic, :mae)`
 
 """
-function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol...; best::Union{Bool, Int}=10) :: DataFrame
+function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol...; best::Union{Bool,Int}=10)::DataFrame
 
   allowed_fields = [:adjr2, :syx, :rmse, :mae, :aic, :normality, :significance]
 
@@ -167,7 +167,7 @@ function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol.
 
   # Insert the model objects into the DataFrame
   insertcols!(ct, 1, "model" => model)
- 
+
   # Combine ranks into a single score
   combined_rank = _calculate_ranks(ct, selected_criteria)
 
@@ -183,14 +183,14 @@ function criteria_table(model::Vector{<:TableRegressionModel}, criteria::Symbol.
   elseif best < length(model)
     # If 'best' is less than the number of models, return the top 'best' models
     top_models = ct[1:best, 1]
-    return criteria_table(top_models, criteria...; best = false) # Re-run with selected models
+    return criteria_table(top_models, criteria...; best=false) # Re-run with selected models
   else
     # Otherwise, return the full DataFrame
     return ct
   end
 end
 
-@inline criteria_table(model::TableRegressionModel, criteria::Symbol...) :: DataFrame = criteria_table([model], criteria...)
+@inline criteria_table(model::TableRegressionModel, criteria::Symbol...)::DataFrame = criteria_table([model], criteria...)
 
 """
 The `criteria_selection` function evaluates and ranks a vector of regression models based on specified criteria, returning the best model according to the combined ranking.
@@ -216,4 +216,4 @@ The `criteria_selection` function evaluates and ranks a vector of regression mod
 - `TableRegressionModel`: 
     The best model based on the combined ranking of the specified criteria.
 """
-@inline criteria_selection(model::Vector{<:TableRegressionModel}, criteria::Symbol...) :: TableRegressionModel = criteria_table(model, criteria..., best = 5)[1, 1]
+@inline criteria_selection(model::Vector{<:TableRegressionModel}, criteria::Symbol...)::TableRegressionModel = criteria_table(model, criteria..., best=5)[1, 1]
diff --git a/src/dendrometric-averages.jl → src/dendrometric_averages.jl b/src/dendrometric-averages.jl → src/dendrometric_averages.jl
@@ -4,11 +4,11 @@ function dendrometric_averages(d::Vector; area::Real=1.0) :: DataFrame
   d̅, s = mean_and_std(d)
   d₋, d₊ = d̅ - s, d̅ + s
   g = basal_area.(d)
-  dg = √((40000 * mean(g)) / π)
-  dw = quantile(d, 0.6)
-  dz = √((40000 * median(g)) / π)
+  dg = √((40000 * Distributions.mean(g)) / π)
+  dw = Distributions.quantile(d, 0.6)
+  dz = √((40000 * Distributions.median(g)) / π)
   n_tree = round(Int, (100 * area))
-  d₁₀₀ = n_tree < length(d) ? mean(partialsort(d, 1:n_tree, rev = true)) : NaN64
+  d₁₀₀ = n_tree < length(d) ? Distributions.mean(partialsort(d, 1:n_tree, rev = true)) : NaN64
   return DataFrame(d₋ = d₋, d̅ = d̅, dg = dg, dw = dw, dz = dz, d₁₀₀ = d₁₀₀, d₊ = d₊)
 end
 

diff --git a/src/frequency-tables.jl → src/frequency_tables.jl b/src/frequency-tables.jl → src/frequency_tables.jl
diff --git a/src/inventory-report.jl → src/inventory_report.jl b/src/inventory-report.jl → src/inventory_report.jl