Skip to content

Commit

Permalink
code refactoring, release 0.1.6
Browse files Browse the repository at this point in the history
  • Loading branch information
kortirso committed Jan 8, 2019
1 parent cb20cdf commit 4f79d05
Show file tree
Hide file tree
Showing 17 changed files with 74 additions and 144 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## Unreleased
## [0.1.6] - 2019-01-08
### Added
- Polynomial Regression predictor

Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Available preprocessing methods:
Available algorithms for prediction:

- Linear Regression
- Polynomial Regression

Available algorithms for classification:

Expand All @@ -23,7 +24,7 @@ by adding `learn_kit` to your list of dependencies in `mix.exs`:
```elixir
def deps do
[
{:learn_kit, "~> 0.1.5"}
{:learn_kit, "~> 0.1.6"}
]
end
```
Expand Down
1 change: 0 additions & 1 deletion lib/learn_kit/knn.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ defmodule LearnKit.Knn do
defstruct data_set: []

alias LearnKit.Knn

use Knn.Classify

@type label :: atom
Expand Down
12 changes: 3 additions & 9 deletions lib/learn_kit/knn/classify.ex
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,10 @@ defmodule LearnKit.Knn.Classify do
end

# sort distances
defp sort_distances(features) do
Enum.sort(features, &(elem(&1, 0) <= elem(&2, 0)))
end
defp sort_distances(features), do: Enum.sort(features, &(elem(&1, 0) <= elem(&2, 0)))

# take closest features
defp select_closest_features(features, options) do
Enum.take(features, options[:k])
end
defp select_closest_features(features, options), do: Enum.take(features, options[:k])

# check existeness of current feature in data set
defp check_zero_distance(closest_features, options) do
Expand Down Expand Up @@ -146,9 +142,7 @@ defmodule LearnKit.Knn.Classify do
end
end

defp accumulate_weight_of_labels([], acc) do
acc
end
defp accumulate_weight_of_labels([], acc), do: acc

defp accumulate_weight_of_labels([{_, key, weight} | tail], acc) do
previous = if Keyword.has_key?(acc, key), do: acc[key], else: 0
Expand Down
9 changes: 2 additions & 7 deletions lib/learn_kit/naive_bayes/gaussian.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ defmodule LearnKit.NaiveBayes.Gaussian do
defstruct data_set: [], fit_data: []

alias LearnKit.NaiveBayes.Gaussian

use Gaussian.Normalize
use Gaussian.Fit
use Gaussian.Classify
Expand Down Expand Up @@ -34,9 +33,7 @@ defmodule LearnKit.NaiveBayes.Gaussian do
"""
@spec new() :: %Gaussian{data_set: []}

def new do
Gaussian.new([])
end
def new, do: Gaussian.new([])

@doc """
Creates classifier with data_set
Expand All @@ -53,9 +50,7 @@ defmodule LearnKit.NaiveBayes.Gaussian do
"""
@spec new(data_set) :: %Gaussian{data_set: data_set}

def new(data_set) do
%Gaussian{data_set: data_set}
end
def new(data_set), do: %Gaussian{data_set: data_set}

@doc """
Add train data to classifier
Expand Down
1 change: 0 additions & 1 deletion lib/learn_kit/preprocessing.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ defmodule LearnKit.Preprocessing do
"""

alias LearnKit.{Preprocessing, Math}

use Preprocessing.Normalize

@type row :: [number]
Expand Down
18 changes: 6 additions & 12 deletions lib/learn_kit/regression/linear.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ defmodule LearnKit.Regression.Linear do
defstruct factors: [], results: [], coefficients: []

alias LearnKit.Regression.Linear

use Linear.Calculations
use LearnKit.Regression.Score

Expand Down Expand Up @@ -43,8 +42,9 @@ defmodule LearnKit.Regression.Linear do
"""
@spec new(factors, results) :: %Linear{factors: factors, results: results, coefficients: []}

def new(factors, results) when is_list(factors) and is_list(results),
do: %Linear{factors: factors, results: results}
def new(factors, results) when is_list(factors) and is_list(results) do
%Linear{factors: factors, results: results}
end

@doc """
Fit train data
Expand Down Expand Up @@ -75,19 +75,13 @@ defmodule LearnKit.Regression.Linear do
}
"""
@spec fit(%Linear{factors: factors, results: results}) :: %Linear{
factors: factors,
results: results,
coefficients: coefficients
}

def fit(linear = %Linear{factors: factors, results: results}, options \\ [])
when is_list(options) do
@spec fit(%Linear{factors: factors, results: results}) :: %Linear{factors: factors, results: results, coefficients: coefficients}

def fit(linear = %Linear{factors: factors, results: results}, options \\ []) when is_list(options) do
coefficients =
Keyword.merge([method: ""], options)
|> define_method_for_fit()
|> do_fit(linear)

%Linear{factors: factors, results: results, coefficients: coefficients}
end

Expand Down
79 changes: 31 additions & 48 deletions lib/learn_kit/regression/linear/calculations.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@ defmodule LearnKit.Regression.Linear.Calculations do
Module for fit functions
"""

alias LearnKit.Math
alias LearnKit.Regression.Linear
alias LearnKit.{Math, Regression.Linear}

defmacro __using__(_opts) do
quote do
defp do_fit(method, %Linear{factors: factors, results: results})
when method == "gradient descent" do
defp do_fit("gradient descent", %Linear{factors: factors, results: results}) do
gradient_descent_iteration(
[:rand.uniform(), :rand.uniform()],
0.0001,
Expand All @@ -21,10 +19,7 @@ defmodule LearnKit.Regression.Linear.Calculations do
end

defp do_fit(_, %Linear{factors: factors, results: results}) do
beta =
Math.correlation(factors, results) * Math.standard_deviation(results) /
Math.standard_deviation(factors)

beta = calc_beta(factors, results)
alpha = Math.mean(results) - beta * Math.mean(factors)
[alpha, beta]
end
Expand All @@ -36,66 +31,54 @@ defmodule LearnKit.Regression.Linear.Calculations do
end)
end

defp calc_beta(factors, results) do
Math.correlation(factors, results) * Math.standard_deviation(results) / Math.standard_deviation(factors)
end

defp squared_error_gradient(linear, x, y) do
error_variable = prediction_error(linear, x, y)

[
-2 * error_variable,
-2 * error_variable * x
]
end

defp gradient_descent_iteration(_, _, min_theta, _, _, iterations_with_no_improvement)
when iterations_with_no_improvement >= 100,
do: min_theta
defp gradient_descent_iteration(_, _, min_theta, _, _, no_improve_step) when no_improve_step >= 100, do: min_theta

defp gradient_descent_iteration(
theta,
alpha,
min_theta,
min_value,
data,
iterations_with_no_improvement
) do
defp gradient_descent_iteration(theta, alpha, min_theta, min_value, data, no_improve_step) do
[
min_theta,
min_value,
iterations_with_no_improvement,
no_improve_step,
alpha
] = check_value(data, min_value, theta, min_theta, iterations_with_no_improvement, alpha)
] = check_value(data, min_value, theta, min_theta, no_improve_step, alpha)

theta =
data
|> Enum.shuffle()
|> Enum.reduce(theta, fn {xi, yi}, acc ->
gradient_i = squared_error_gradient(%Linear{coefficients: theta}, xi, yi)
acc |> Math.vector_subtraction(alpha |> Math.scalar_multiply(gradient_i))
end)

gradient_descent_iteration(
theta,
alpha,
min_theta,
min_value,
data,
iterations_with_no_improvement
)
calc_new_theta(data, theta, alpha)
|> gradient_descent_iteration(alpha, min_theta, min_value, data, no_improve_step)
end

defp check_value(data, min_value, theta, min_theta, iterations_with_no_improvement, alpha) do
value =
Enum.reduce(data, 0, fn {xi, yi}, acc ->
acc + squared_prediction_error(%Linear{coefficients: theta}, xi, yi)
end)
defp calc_new_theta(data, theta, alpha) do
data
|> Enum.shuffle()
|> Enum.reduce(theta, fn {xi, yi}, acc ->
gradient_i = squared_error_gradient(%Linear{coefficients: theta}, xi, yi)
acc |> Math.vector_subtraction(alpha |> Math.scalar_multiply(gradient_i))
end)
end

defp check_value(data, min_value, theta, min_theta, no_improve_step, alpha) do
value = calc_new_value(data, theta)
cond do
value < min_value ->
[theta, value, 0, 0.0001]

true ->
[min_theta, min_value, iterations_with_no_improvement + 1, alpha * 0.9]
value < min_value -> [theta, value, 0, 0.0001]
true -> [min_theta, min_value, no_improve_step + 1, alpha * 0.9]
end
end

defp calc_new_value(data, theta) do
Enum.reduce(data, 0, fn {xi, yi}, acc ->
acc + squared_prediction_error(%Linear{coefficients: theta}, xi, yi)
end)
end
end
end
end
22 changes: 7 additions & 15 deletions lib/learn_kit/regression/polynomial.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ defmodule LearnKit.Regression.Polynomial do
defstruct factors: [], results: [], coefficients: [], degree: 2

alias LearnKit.Regression.Polynomial

use Polynomial.Calculations
use LearnKit.Regression.Score

Expand All @@ -29,12 +28,8 @@ defmodule LearnKit.Regression.Polynomial do
%LearnKit.Regression.Polynomial{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: [], degree: 2}
"""
@spec new(factors, results) :: %Polynomial{
factors: factors,
results: results,
coefficients: [],
degree: 2
}
@spec new(factors, results) :: %Polynomial{factors: factors, results: results, coefficients: [], degree: 2}

def new(factors, results) when is_list(factors) and is_list(results) do
%Polynomial{factors: factors, results: results}
end
Expand Down Expand Up @@ -74,12 +69,8 @@ defmodule LearnKit.Regression.Polynomial do
}
"""
@spec fit(%Polynomial{factors: factors, results: results}) :: %Polynomial{
factors: factors,
results: results,
coefficients: coefficients,
degree: degree
}
@spec fit(%Polynomial{factors: factors, results: results}) :: %Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree}

def fit(%Polynomial{factors: factors, results: results}, options \\ []) do
degree = options[:degree] || 2
matrix = matrix(factors, degree)
Expand All @@ -103,8 +94,8 @@ defmodule LearnKit.Regression.Polynomial do
"""
@spec predict(%Polynomial{coefficients: coefficients, degree: degree}, list) :: {:ok, list}
def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples)
when is_list(samples) do

def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples) when is_list(samples) do
{:ok, do_predict(polynomial, samples)}
end

Expand All @@ -123,6 +114,7 @@ defmodule LearnKit.Regression.Polynomial do
"""
@spec predict(%Polynomial{coefficients: coefficients, degree: degree}, number) :: {:ok, number}

def predict(%Polynomial{coefficients: coefficients, degree: degree}, sample) do
ordered_coefficients = coefficients |> Enum.reverse()
{:ok, substitute_coefficients(ordered_coefficients, sample, degree, 0.0)}
Expand Down
2 changes: 0 additions & 2 deletions lib/learn_kit/regression/polynomial/calculations.ex
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ defmodule LearnKit.Regression.Polynomial.Calculations do

defp matrix_line(1, factors, degree) do
power_ofs = Enum.to_list(1..degree)

[Enum.count(factors) | sum_of_x_i_with_k(power_ofs, factors)]
end

Expand All @@ -26,7 +25,6 @@ defmodule LearnKit.Regression.Polynomial.Calculations do

defp matrix(factors, degree) do
lines = Enum.to_list(1..(degree + 1))

Enum.map(lines, fn line ->
matrix_line(line, factors, degree)
end)
Expand Down
13 changes: 4 additions & 9 deletions lib/learn_kit/regression/score.ex
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,7 @@ defmodule LearnKit.Regression.Score do
{:ok, 0.9876543209876543}
"""
@spec score(%LearnKit.Regression.Linear{
factors: factors,
results: results,
coefficients: coefficients
}) :: {:ok, number}
@spec score(%LearnKit.Regression.Linear{factors: factors, results: results, coefficients: coefficients}) :: {:ok, number}

def score(regression = %_{factors: _, results: _, coefficients: _}) do
{
Expand All @@ -44,10 +40,9 @@ defmodule LearnKit.Regression.Score do
y - prediction
end

defp sum_of_squared_errors(
regression = %_{coefficients: _, factors: factors, results: results}
) do
Enum.zip(factors, results)
defp sum_of_squared_errors(regression = %_{coefficients: _, factors: factors, results: results}) do
factors
|> Enum.zip(results)
|> Enum.reduce(0, fn {xi, yi}, acc ->
acc + squared_prediction_error(regression, xi, yi)
end)
Expand Down
2 changes: 1 addition & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ defmodule LearnKit.MixProject do
def project do
[
app: :learn_kit,
version: "0.1.5",
version: "0.1.6",
elixir: "~> 1.7",
name: "LearnKit",
description: @description,
Expand Down
1 change: 0 additions & 1 deletion test/learn_kit/knn_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
defmodule LearnKit.KnnTest do
use ExUnit.Case

alias LearnKit.Knn

setup_all do
Expand Down
1 change: 0 additions & 1 deletion test/learn_kit/math_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
defmodule LearnKit.MathTest do
use ExUnit.Case

alias LearnKit.Math

test "calculate sum" do
Expand Down
1 change: 0 additions & 1 deletion test/learn_kit/preprocessing_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
defmodule LearnKit.PreprocessingTest do
use ExUnit.Case

alias LearnKit.Preprocessing

describe "for invalid data" do
Expand Down
Loading

0 comments on commit 4f79d05

Please sign in to comment.