add site classification

marcosdanieldasilva · Aug 4, 2024 · d6e24c6 · d6e24c6
1 parent c35d4ed
commit d6e24c6
Show file tree

Hide file tree

Showing 8 changed files with 471 additions and 20 deletions.
diff --git a/Project.toml b/Project.toml
@@ -4,9 +4,11 @@ authors = ["Marcos Daniel da Silva <marcosdasilva@5a.tec.br> and contributors"]
 version = "1.0.0-DEV"
 
 [deps]
+CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
 HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"

diff --git a/src/ForestMensuration.jl b/src/ForestMensuration.jl
@@ -12,9 +12,10 @@ Facilitates the analysis of dendrometric and forest data.
 Performs complex calculations with simple commands.
 Offers a user-friendly and intuitive interface.
 """
-
 module ForestMensuration
-  using DataFrames, Distributions, LinearAlgebra, HypothesisTests, StatsBase, StatsModels, Tables
+  using CategoricalArrays, DataFrames, Distributions, LinearAlgebra, GLM, HypothesisTests,
+        Plots.PlotMeasures, StatsBase, StatsModels, StatsPlots, RecipesBase, RecipesPipeline, Tables
+
   import StatsBase: dof_residual, dof, nobs, aicc, aic, bic, coef
 
   include("structs-consts.jl")
@@ -25,10 +26,14 @@ module ForestMensuration
   include("cubage.jl")
   include("inventory-report.jl")
   include("simple-casual-sampling.jl")
+  include("site-classification.jl")
   include("show.jl")
   include("graph-analysis.jl")
 
   export
+    # StatsModels Terms
+    ContinuousTerm,
+    CategoricalTerm,
     # Regression structures
     FittedLinearModel,
     # Cubage methods
@@ -54,6 +59,7 @@ module ForestMensuration
     fit_regression,
     frequency_table,
     graph,
+    hdom_classification,
     homoscedasticity,
     loglikelihood,
     modelmatrix,
@@ -65,6 +71,8 @@ module ForestMensuration
     r2,
     regression,
     residuals,
+    site_classification,
+    site_table,
     stderror,
     syx,
     syx_in_percentage,

diff --git a/src/graph-analysis.jl b/src/graph-analysis.jl
@@ -8,12 +8,6 @@
 #   return  confidence
 # end
 
-using StatsPlots
-using RecipesBase
-using Distributions
-using RecipesPipeline
-using Plots.PlotMeasures
-
 @recipe function f(model::FittedLinearModel)
   pred = predict(model)
   resid = residuals(model)
@@ -88,7 +82,7 @@ using Plots.PlotMeasures
     labels, idxs = getfield(gb, 1), getfield(gb, 2)
     ngroups = length(labels)
     for i = 1:ngroups
-      color = cgrad(:darktest, categorical = true)[i]
+      color = cgrad(:darktest, ngroups, categorical = true)[i]
       @series begin
         seriestype := :scatter
         subplot := 1
@@ -168,4 +162,255 @@ using Plots.PlotMeasures
   end
 end
 
-@inline graph(model::FittedLinearModel) = plot(model::FittedLinearModel)
+@inline graph(fitted_model::FittedLinearModel; kw...) = plot(fitted_model; kw...)
+
+@recipe function f(fitted_model::FittedLinearModel, split::Bool)
+  if !split
+      pred = predict(fitted_model)
+      resid = residuals(fitted_model)
+      confidence = confidence_interval(fitted_model)
+      y_name, x_name = names(fitted_model.data)[1:2]
+      n = size(fitted_model.data, 1)
+      qp = qqbuild(fit_mle(Normal, resid), resid)
+      x_qqline = [extrema(qp.qx)...]
+      quantx, quanty = quantile(qp.qx, [0.25, 0.75]), quantile(qp.qy, [0.25, 0.75])
+      slp = diff(quanty) ./ diff(quantx)
+      y_qqline = quanty .+ slp .* (x_qqline .- quantx)
+      grid --> :none
+      tick_direction --> :out
+      framestyle --> :box
+      markerstrokewidth --> 0
+      seriesalpha --> 0.6
+      legend --> :outertopright
+      (y, x, q...) = eachcol(fitted_model.data)
+      color_palette --> cgrad(:darktest, categorical = true)
+      # titlefontize --> 10
+      guidefontsize --> 9
+      legendfontsize --> 7
+      fontfamily --> :times
+      size --> (1000, 650)
+      margin --> 3mm
+      layout := (2, 2)
+      if size(fitted_model.data, 2) == 2
+          @series begin
+              seriestype := :scatter
+              subplot := 1
+              label --> "Observed Values"
+              x, y
+          end
+          @series begin
+              ribbon --> confidence
+              xaxis --> x_name
+              yaxis --> y_name
+              title --> "Linear Regression"
+              label --> "Fitted Values"
+              seriestype := :line
+              seriesalpha := 0.7
+              line --> 2
+              subplot := 1
+              x, pred
+          end
+          @series begin
+              seriestype := :scatter
+              subplot := 2
+              label --> "Observed Values"
+              pred, resid
+          end
+          @series begin
+              seriestype := :hline
+              subplot := 2
+              label := :none
+              seriesalpha := 1.0
+              linecolor := :darkgrey
+              line --> 2
+              [0], [0]
+          end
+          @series begin
+              seriestype := :histogram
+              xaxis --> "Residuals"
+              yaxis --> "Frequency"
+              title --> "Histogram of Residuals"
+              label --> "Observed Values"
+              linewidth := 0.7
+              normalize := :pdf
+              ylims := (0, Inf)
+              subplot := 3
+              resid
+          end
+          @series begin
+              seriestype := :scatter
+              label --> "Observed Values"
+              subplot := 4
+              qp.qx, qp.qy
+          end
+          @series begin
+              seriestype := :line
+              subplot := 4
+              label --> :none
+              seriesalpha := 1.0
+              line := 2
+              linecolor := :darkgrey
+              x_qqline, y_qqline
+          end
+      else
+          h = Plots._make_hist((vec(copy(resid)),), :auto; normed = true)
+          nbins = length(h.weights)
+          edges = h.edges[1]
+          bar_width = mean(map(i -> edges[i + 1] - edges[i], 1:nbins))
+          (y, x, q...) = eachcol(fitted_model.data)
+          x_bar = map(i -> (edges[i] + edges[i + 1]) / 2, 1:nbins)
+          gb = RecipesPipeline._extract_group_attributes(tuple(q...))
+          labels, idxs = getfield(gb, 1), getfield(gb, 2)
+          ngroups = length(labels)
+          ntot = count(x_bar -> !isnan(x_bar), resid)
+          y_bar = fill(NaN, nbins, ngroups)
+          for i = 1:ngroups
+              groupinds = idxs[i]
+              v_i = filter(x_bar -> !isnan(x_bar), resid[groupinds])
+              h_i = Plots._make_hist((v_i,), h.edges; normed = false, weights = nothing)
+              y_bar[:, i] .= h_i.weights .* (length(v_i) / ntot / sum(h_i.weights))
+              y_bar, fr = StatsPlots.groupedbar_fillrange(y_bar)
+          end
+          for i = 1:ngroups
+              colors = cgrad(:darktest, categorical = true, ngroups)
+              qp = qqbuild(fit_mle(Normal, resid[idxs[i]]), resid[idxs[i]])
+              @series begin
+                  seriestype := :scatter
+                  subplot := 1
+                  label := labels[i]
+                  color := colors[i]
+                  xvals = x[idxs[i]]
+                  yvals = y[idxs[i]]
+                  xvals, yvals
+              end
+              @series begin
+                  ribbon --> confidence[idxs[i]]
+                  seriestype := :line
+                  seriesalpha := 0.7
+                  line --> 2
+                  title --> "Linear Regression"
+                  subplot := 1
+                  label := :none
+                  color := colors[i]
+                  xvals = x[idxs[i]]
+                  predvals = pred[idxs[i]]
+                  xvals, predvals
+              end
+              @series begin
+                  seriestype := :scatter
+                  subplot := 2
+                  label := labels[i]
+                  color := colors[i]
+                  pred[idxs[i]], resid[idxs[i]]
+              end
+              @series begin
+                  seriestype := :bar
+                  seriesalpha := 1.0
+                  subplot := 3
+                  label := labels[i]
+                  color := colors[i]
+                  yvals = y_bar[:, i]
+                  x_bar, yvals
+              end
+              @series begin
+                  seriestype := :scatter
+                  label := labels[i]
+                  color := colors[i]
+                  subplot := 4
+                  qp.qx, qp.qy
+              end
+          end
+      end
+  else
+      pred = predict(fitted_model)
+      resid = residuals(fitted_model)
+      confidence = confidence_interval(fitted_model)
+      (y, x, q...) = eachcol(fitted_model.data)
+      y_name, x_name = names(fitted_model.data)[1:2]
+      gb = RecipesPipeline._extract_group_attributes(tuple(q...))
+      labels, idxs = getfield(gb, 1), getfield(gb, 2)
+      ngroups = length(labels)
+      colors = cgrad(:darktest, categorical = true, ngroups)
+      layout := (4, ngroups)
+      grid --> :none
+      tick_direction --> :out
+      framestyle --> :box
+      markerstrokewidth --> 0
+      seriesalpha --> 0.6
+      for i = 1:ngroups
+          qp = qqbuild(fit_mle(Normal, resid[idxs[i]]), resid[idxs[i]])
+          x_qqline = [extrema(qp.qx)...]
+          quantx, quanty = quantile(qp.qx, [0.25, 0.75]), quantile(qp.qy, [0.25, 0.75])
+          slp = diff(quanty) ./ diff(quantx)
+          y_qqline = quanty .+ slp .* (x_qqline .- quantx)
+          @series begin
+              seriestype := :scatter
+              subplot := i
+              link := :y
+              label := :none
+              color := colors[i]
+              xvals = x[idxs[i]]
+              yvals = y[idxs[i]]
+              xvals, yvals
+          end
+          @series begin
+              seriestype := :line
+              subplot := i
+              ribbon := confidence[idxs[i]]
+              seriesalpha := 0.7
+              line --> 2
+              title --> labels[i]
+              label := :none
+              color := colors[i]
+              xvals = x[idxs[i]]
+              predvals = pred[idxs[i]]
+              xvals, predvals
+          end
+          @series begin
+              seriestype := :scatter
+              subplot := i + ngroups
+              label := :none
+              # link := :both
+              color := colors[i]
+              pred[idxs[i]], resid[idxs[i]]
+          end
+          @series begin
+              seriestype := :hline
+              subplot := i + ngroups
+              label := :none
+              seriesalpha := 1.0
+              linecolor := :darkgrey
+              line --> 2
+              [0], [0]
+          end
+          @series begin
+              seriestype := :histogram
+              subplot := i + 2ngroups
+              label := :none
+              linewidth := 0.7
+              normalize := :pdf
+              color := colors[i]
+              ylims := (0, Inf)
+              resid[idxs[i]]
+          end
+          @series begin
+              seriestype := :scatter
+              subplot := i + 3ngroups
+              label := :none
+              color := colors[i]
+              qp.qx, qp.qy
+          end
+          @series begin
+              seriestype := :line
+              subplot := i + 3ngroups
+              label --> :none
+              seriesalpha := 1.0
+              line := 2
+              linecolor := :darkgrey
+              x_qqline, y_qqline
+          end
+      end
+  end
+end
+
+@inline graph(fitted_model::FittedLinearModel, split::Bool=false; kw...) = Plots.plot(fitted_model, split; kw...)
diff --git a/src/linear-regression.jl b/src/linear-regression.jl
@@ -173,15 +173,17 @@ Perform regression analysis.
 - `q::S...`: Symbols representing additional terms for the model.
 - `best::Union{Bool, Int}`: Whether to return the best model(s) based on RMSE.
 - `effect::S`: Type of effect to consider (`:additive` or `:interactive`).
+- q_type` : Type of q variable (`CategoricalTerm` or `ContinuousTerm`).
 
 # Returns
 - `Vector{FittedLinearModel}`: Vector of fitted models.
 """
-function regression(y::S, x::S, data::AbstractDataFrame, q::S...; best::Union{Bool, Int}=true, effect::S=:additive) where S <: Symbol
+function regression(y::S, x::S, data::AbstractDataFrame, q::S...; best::Union{Bool, Int}=true, effect::S=:additive, q_type=CategoricalTerm) where S <: Symbol
   if best < 0
-  throw(ArgumentError("best must be a positive integer or false"))
+  throw(ArgumentError("best must be a positive integer or true/false"))
   elseif !isempty(q)
   effect ∈ (:additive, :interactive) || throw(ArgumentError("Invalid effect argument. Expected :additive or :interactive."))
+  q_type ∈ (CategoricalTerm, ContinuousTerm) || throw(ArgumentError("Invalid q_type argument. Expected CategoricalTerm or ContinuousTerm."))
   end
 
   new_data = dropmissing(data[:, [y, x, q...]])
@@ -195,7 +197,7 @@ function regression(y::S, x::S, data::AbstractDataFrame, q::S...; best::Union{Bo
   x_observed = new_data[!, x]
   y_term = concrete_term(term(y), y_observed, ContinuousTerm)
   x_term = concrete_term(term(x), x_observed, ContinuousTerm)
-  q_term = [concrete_term(term(terms), new_data[!, terms], CategoricalTerm) for terms ∈ q]
+  q_term = [concrete_term(term(terms), new_data[!, terms], q_type) for terms ∈ q]
   y_term_list = _dependent_variable(y_term, x_term)
   x_term_list = _indepedent_variable(x_term)
 

diff --git a/src/rascunho.jl b/src/rascunho.jl
@@ -1,7 +1,28 @@
 using ForestMensuration, CSV, DataFrames
 
+
+data = CSV.read("C:\\Users\\marco\\OneDrive\\Documents\\Programação\\dados_CSV\\AlturaDominante_vs_Idade.csv", DataFrame)
+
+reg = regression(:Hd, :idade, data)
+
+qreg = regression(:Hd, :idade, data, :parcelas)
+
+s = site_classification(reg, 60)
+
+site_table(reg, 60, 3)
+
+site_table(reg, 60)
+
 df = CSV.read("C:\\Users\\marco\\OneDrive\\Documents\\Programação\\dados_CSV\\exemplo-hipso.csv", DataFrame)
 
+reg = regression(:ht, :dap, df)
+
+p = graph(reg)
+
+qreg = regression(:ht, :dap, df, :regiao)
+
+qp = graph(qreg)
+
 cub_data = CSV.read(
   "C:\\Users\\marco\\OneDrive\\Documents\\Programação\\dados_CSV\\exemplo-cubagem-2.csv",
   DataFrame