diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py new file mode 100644 index 000000000000..9c8a53df3f93 --- /dev/null +++ b/machine_learning/ridge_regression.py @@ -0,0 +1,119 @@ +import numpy as np +from matplotlib import pyplot as plt +from sklearn import datasets + + +# Ridge Regression function +# reference : https://en.wikipedia.org/wiki/Ridge_regression +def ridge_cost_function( + x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float +) -> float: + """ + Compute the Ridge regression cost function with L2 regularization. + + J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (a/2) * Σ θ_j^2 (for j=1 to n) + + Where: + - J(θ) is the cost function we aim to minimize + - m is the number of training examples + - hθ(x) = X * θ (prediction) + - y_i is the actual target value for example i + - a is the regularization parameter + + @param X: The feature matrix (m x n) + @param y: The target vector (m,) + @param theta: The parameters (weights) of the model (n,) + @param alpha: The regularization parameter + + @returns: The computed cost value + """ + m = len(y) + predictions = np.dot(x, theta) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( + theta[1:] ** 2 + ) + + return cost + + +def ridge_gradient_descent( + x: np.ndarray, + y: np.ndarray, + theta: np.ndarray, + alpha: float, + learning_rate: float, + max_iterations: int, +) -> np.ndarray: + """ + Perform gradient descent to minimize the + cost function and fit the Ridge regression model. + + @param X: The feature matrix (m x n) + @param y: The target vector (m,) + @param theta: The initial parameters (weights) of the model (n,) + @param alpha: The regularization parameter + @param learning_rate: The learning rate for gradient descent + @param max_iterations: The number of iterations for gradient descent + + @returns: The optimized parameters (weights) of the model (n,) + """ + m = len(y) + + for iteration in range(max_iterations): + predictions = np.dot(x, theta) + error = predictions - y + + # calculate the gradient + gradient = (1 / m) * np.dot(x.T, error) + gradient[1:] += (alpha / m) * theta[1:] + theta -= learning_rate * gradient + + if iteration % 100 == 0: + cost = ridge_cost_function(x, y, theta, alpha) + print(f"Iteration {iteration}, Cost: {cost}") + + return theta + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + # Load California Housing dataset + california_housing = datasets.fetch_california_housing() + x = california_housing.data[:, :2] # 2 features for simplicity + y = california_housing.target + x = (x - np.mean(x, axis=0)) / np.std(x, axis=0) + + # Add a bias column (intercept) to X + x = np.c_[np.ones(x.shape[0]), x] + + # Initialize parameters (theta) + theta_initial = np.zeros(x.shape[1]) + + # Set hyperparameters + alpha = 0.1 + learning_rate = 0.01 + max_iterations = 1000 + + optimized_theta = ridge_gradient_descent( + x, y, theta_initial, alpha, learning_rate, max_iterations + ) + print(f"Optimized theta: {optimized_theta}") + + # Prediction + def predict(x, theta): + return np.dot(x, theta) + + y_pred = predict(x, optimized_theta) + + # Plotting the results (here we visualize predicted vs actual values) + plt.figure(figsize=(10, 6)) + plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") + plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") + plt.xlabel("Actual values") + plt.ylabel("Predicted values") + plt.title("Ridge Regression: Actual vs Predicted Values") + plt.legend() + plt.show()