Skip to content

Commit

Permalink
add site classification
Browse files Browse the repository at this point in the history
  • Loading branch information
marcosdanieldasilva committed Aug 4, 2024
1 parent c35d4ed commit d6e24c6
Show file tree
Hide file tree
Showing 8 changed files with 471 additions and 20 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ authors = ["Marcos Daniel da Silva <marcosdasilva@5a.tec.br> and contributors"]
version = "1.0.0-DEV"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Expand Down
12 changes: 10 additions & 2 deletions src/ForestMensuration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ Facilitates the analysis of dendrometric and forest data.
Performs complex calculations with simple commands.
Offers a user-friendly and intuitive interface.
"""

module ForestMensuration
using DataFrames, Distributions, LinearAlgebra, HypothesisTests, StatsBase, StatsModels, Tables
using CategoricalArrays, DataFrames, Distributions, LinearAlgebra, GLM, HypothesisTests,
Plots.PlotMeasures, StatsBase, StatsModels, StatsPlots, RecipesBase, RecipesPipeline, Tables

import StatsBase: dof_residual, dof, nobs, aicc, aic, bic, coef

include("structs-consts.jl")
Expand All @@ -25,10 +26,14 @@ module ForestMensuration
include("cubage.jl")
include("inventory-report.jl")
include("simple-casual-sampling.jl")
include("site-classification.jl")
include("show.jl")
include("graph-analysis.jl")

export
# StatsModels Terms
ContinuousTerm,
CategoricalTerm,
# Regression structures
FittedLinearModel,
# Cubage methods
Expand All @@ -54,6 +59,7 @@ module ForestMensuration
fit_regression,
frequency_table,
graph,
hdom_classification,
homoscedasticity,
loglikelihood,
modelmatrix,
Expand All @@ -65,6 +71,8 @@ module ForestMensuration
r2,
regression,
residuals,
site_classification,
site_table,
stderror,
syx,
syx_in_percentage,
Expand Down
261 changes: 253 additions & 8 deletions src/graph-analysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
# return confidence
# end

using StatsPlots
using RecipesBase
using Distributions
using RecipesPipeline
using Plots.PlotMeasures

@recipe function f(model::FittedLinearModel)
pred = predict(model)
resid = residuals(model)
Expand Down Expand Up @@ -88,7 +82,7 @@ using Plots.PlotMeasures
labels, idxs = getfield(gb, 1), getfield(gb, 2)
ngroups = length(labels)
for i = 1:ngroups
color = cgrad(:darktest, categorical = true)[i]
color = cgrad(:darktest, ngroups, categorical = true)[i]
@series begin
seriestype := :scatter
subplot := 1
Expand Down Expand Up @@ -168,4 +162,255 @@ using Plots.PlotMeasures
end
end

@inline graph(model::FittedLinearModel) = plot(model::FittedLinearModel)
@inline graph(fitted_model::FittedLinearModel; kw...) = plot(fitted_model; kw...)

@recipe function f(fitted_model::FittedLinearModel, split::Bool)
if !split
pred = predict(fitted_model)
resid = residuals(fitted_model)
confidence = confidence_interval(fitted_model)
y_name, x_name = names(fitted_model.data)[1:2]
n = size(fitted_model.data, 1)
qp = qqbuild(fit_mle(Normal, resid), resid)
x_qqline = [extrema(qp.qx)...]
quantx, quanty = quantile(qp.qx, [0.25, 0.75]), quantile(qp.qy, [0.25, 0.75])
slp = diff(quanty) ./ diff(quantx)
y_qqline = quanty .+ slp .* (x_qqline .- quantx)
grid --> :none
tick_direction --> :out
framestyle --> :box
markerstrokewidth --> 0
seriesalpha --> 0.6
legend --> :outertopright
(y, x, q...) = eachcol(fitted_model.data)
color_palette --> cgrad(:darktest, categorical = true)
# titlefontize --> 10
guidefontsize --> 9
legendfontsize --> 7
fontfamily --> :times
size --> (1000, 650)
margin --> 3mm
layout := (2, 2)
if size(fitted_model.data, 2) == 2
@series begin
seriestype := :scatter
subplot := 1
label --> "Observed Values"
x, y
end
@series begin
ribbon --> confidence
xaxis --> x_name
yaxis --> y_name
title --> "Linear Regression"
label --> "Fitted Values"
seriestype := :line
seriesalpha := 0.7
line --> 2
subplot := 1
x, pred
end
@series begin
seriestype := :scatter
subplot := 2
label --> "Observed Values"
pred, resid
end
@series begin
seriestype := :hline
subplot := 2
label := :none
seriesalpha := 1.0
linecolor := :darkgrey
line --> 2
[0], [0]
end
@series begin
seriestype := :histogram
xaxis --> "Residuals"
yaxis --> "Frequency"
title --> "Histogram of Residuals"
label --> "Observed Values"
linewidth := 0.7
normalize := :pdf
ylims := (0, Inf)
subplot := 3
resid
end
@series begin
seriestype := :scatter
label --> "Observed Values"
subplot := 4
qp.qx, qp.qy
end
@series begin
seriestype := :line
subplot := 4
label --> :none
seriesalpha := 1.0
line := 2
linecolor := :darkgrey
x_qqline, y_qqline
end
else
h = Plots._make_hist((vec(copy(resid)),), :auto; normed = true)
nbins = length(h.weights)
edges = h.edges[1]
bar_width = mean(map(i -> edges[i + 1] - edges[i], 1:nbins))
(y, x, q...) = eachcol(fitted_model.data)
x_bar = map(i -> (edges[i] + edges[i + 1]) / 2, 1:nbins)
gb = RecipesPipeline._extract_group_attributes(tuple(q...))
labels, idxs = getfield(gb, 1), getfield(gb, 2)
ngroups = length(labels)
ntot = count(x_bar -> !isnan(x_bar), resid)
y_bar = fill(NaN, nbins, ngroups)
for i = 1:ngroups
groupinds = idxs[i]
v_i = filter(x_bar -> !isnan(x_bar), resid[groupinds])
h_i = Plots._make_hist((v_i,), h.edges; normed = false, weights = nothing)
y_bar[:, i] .= h_i.weights .* (length(v_i) / ntot / sum(h_i.weights))
y_bar, fr = StatsPlots.groupedbar_fillrange(y_bar)
end
for i = 1:ngroups
colors = cgrad(:darktest, categorical = true, ngroups)
qp = qqbuild(fit_mle(Normal, resid[idxs[i]]), resid[idxs[i]])
@series begin
seriestype := :scatter
subplot := 1
label := labels[i]
color := colors[i]
xvals = x[idxs[i]]
yvals = y[idxs[i]]
xvals, yvals
end
@series begin
ribbon --> confidence[idxs[i]]
seriestype := :line
seriesalpha := 0.7
line --> 2
title --> "Linear Regression"
subplot := 1
label := :none
color := colors[i]
xvals = x[idxs[i]]
predvals = pred[idxs[i]]
xvals, predvals
end
@series begin
seriestype := :scatter
subplot := 2
label := labels[i]
color := colors[i]
pred[idxs[i]], resid[idxs[i]]
end
@series begin
seriestype := :bar
seriesalpha := 1.0
subplot := 3
label := labels[i]
color := colors[i]
yvals = y_bar[:, i]
x_bar, yvals
end
@series begin
seriestype := :scatter
label := labels[i]
color := colors[i]
subplot := 4
qp.qx, qp.qy
end
end
end
else
pred = predict(fitted_model)
resid = residuals(fitted_model)
confidence = confidence_interval(fitted_model)
(y, x, q...) = eachcol(fitted_model.data)
y_name, x_name = names(fitted_model.data)[1:2]
gb = RecipesPipeline._extract_group_attributes(tuple(q...))
labels, idxs = getfield(gb, 1), getfield(gb, 2)
ngroups = length(labels)
colors = cgrad(:darktest, categorical = true, ngroups)
layout := (4, ngroups)
grid --> :none
tick_direction --> :out
framestyle --> :box
markerstrokewidth --> 0
seriesalpha --> 0.6
for i = 1:ngroups
qp = qqbuild(fit_mle(Normal, resid[idxs[i]]), resid[idxs[i]])
x_qqline = [extrema(qp.qx)...]
quantx, quanty = quantile(qp.qx, [0.25, 0.75]), quantile(qp.qy, [0.25, 0.75])
slp = diff(quanty) ./ diff(quantx)
y_qqline = quanty .+ slp .* (x_qqline .- quantx)
@series begin
seriestype := :scatter
subplot := i
link := :y
label := :none
color := colors[i]
xvals = x[idxs[i]]
yvals = y[idxs[i]]
xvals, yvals
end
@series begin
seriestype := :line
subplot := i
ribbon := confidence[idxs[i]]
seriesalpha := 0.7
line --> 2
title --> labels[i]
label := :none
color := colors[i]
xvals = x[idxs[i]]
predvals = pred[idxs[i]]
xvals, predvals
end
@series begin
seriestype := :scatter
subplot := i + ngroups
label := :none
# link := :both
color := colors[i]
pred[idxs[i]], resid[idxs[i]]
end
@series begin
seriestype := :hline
subplot := i + ngroups
label := :none
seriesalpha := 1.0
linecolor := :darkgrey
line --> 2
[0], [0]
end
@series begin
seriestype := :histogram
subplot := i + 2ngroups
label := :none
linewidth := 0.7
normalize := :pdf
color := colors[i]
ylims := (0, Inf)
resid[idxs[i]]
end
@series begin
seriestype := :scatter
subplot := i + 3ngroups
label := :none
color := colors[i]
qp.qx, qp.qy
end
@series begin
seriestype := :line
subplot := i + 3ngroups
label --> :none
seriesalpha := 1.0
line := 2
linecolor := :darkgrey
x_qqline, y_qqline
end
end
end
end

@inline graph(fitted_model::FittedLinearModel, split::Bool=false; kw...) = Plots.plot(fitted_model, split; kw...)
8 changes: 5 additions & 3 deletions src/linear-regression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,17 @@ Perform regression analysis.
- `q::S...`: Symbols representing additional terms for the model.
- `best::Union{Bool, Int}`: Whether to return the best model(s) based on RMSE.
- `effect::S`: Type of effect to consider (`:additive` or `:interactive`).
- q_type` : Type of q variable (`CategoricalTerm` or `ContinuousTerm`).
# Returns
- `Vector{FittedLinearModel}`: Vector of fitted models.
"""
function regression(y::S, x::S, data::AbstractDataFrame, q::S...; best::Union{Bool, Int}=true, effect::S=:additive) where S <: Symbol
function regression(y::S, x::S, data::AbstractDataFrame, q::S...; best::Union{Bool, Int}=true, effect::S=:additive, q_type=CategoricalTerm) where S <: Symbol
if best < 0
throw(ArgumentError("best must be a positive integer or false"))
throw(ArgumentError("best must be a positive integer or true/false"))
elseif !isempty(q)
effect (:additive, :interactive) || throw(ArgumentError("Invalid effect argument. Expected :additive or :interactive."))
q_type (CategoricalTerm, ContinuousTerm) || throw(ArgumentError("Invalid q_type argument. Expected CategoricalTerm or ContinuousTerm."))
end

new_data = dropmissing(data[:, [y, x, q...]])
Expand All @@ -195,7 +197,7 @@ function regression(y::S, x::S, data::AbstractDataFrame, q::S...; best::Union{Bo
x_observed = new_data[!, x]
y_term = concrete_term(term(y), y_observed, ContinuousTerm)
x_term = concrete_term(term(x), x_observed, ContinuousTerm)
q_term = [concrete_term(term(terms), new_data[!, terms], CategoricalTerm) for terms q]
q_term = [concrete_term(term(terms), new_data[!, terms], q_type) for terms q]
y_term_list = _dependent_variable(y_term, x_term)
x_term_list = _indepedent_variable(x_term)

Expand Down
21 changes: 21 additions & 0 deletions src/rascunho.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
using ForestMensuration, CSV, DataFrames


data = CSV.read("C:\\Users\\marco\\OneDrive\\Documents\\Programação\\dados_CSV\\AlturaDominante_vs_Idade.csv", DataFrame)

reg = regression(:Hd, :idade, data)

qreg = regression(:Hd, :idade, data, :parcelas)

s = site_classification(reg, 60)

site_table(reg, 60, 3)

site_table(reg, 60)

df = CSV.read("C:\\Users\\marco\\OneDrive\\Documents\\Programação\\dados_CSV\\exemplo-hipso.csv", DataFrame)

reg = regression(:ht, :dap, df)

p = graph(reg)

qreg = regression(:ht, :dap, df, :regiao)

qp = graph(qreg)

cub_data = CSV.read(
"C:\\Users\\marco\\OneDrive\\Documents\\Programação\\dados_CSV\\exemplo-cubagem-2.csv",
DataFrame
Expand Down
Loading

0 comments on commit d6e24c6

Please sign in to comment.