diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index 5b1e663116cc..c1715f554fd1 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -13,10 +13,12 @@ # dependencies = [ # "httpx", # "numpy", +# "matplotlib", # ] # /// import httpx +import matplotlib.pyplot as plt import numpy as np @@ -102,12 +104,17 @@ def run_linear_regression(data_x, data_y): theta = np.zeros((1, no_features)) + err = [] + for i in range(iterations): theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) error = sum_of_square_error(data_x, data_y, len_data, theta) - print(f"At Iteration {i + 1} - Error is {error:.5f}") + err.append(error) - return theta + if i % 1000 == 0: + print(f"At Iteration {i + 1} - Error is {error:.5f}") + + return theta, err def mean_absolute_error(predicted_y, original_y): @@ -125,6 +132,44 @@ def mean_absolute_error(predicted_y, original_y): return total / len(original_y) +# visualization +def plot_regression(data_x, data_y, theta): + """ + Plot regression line with dataset points + """ + + x = np.array(data_x[:, 1]).flatten() + y = np.array(data_y).flatten() + + predictions = theta[0, 0] + theta[0, 1] * x + + plt.scatter(x, y) + + plt.plot(x, predictions) + + plt.xlabel("ADR") + plt.ylabel("Rating") + + plt.title("Linear Regression Best Fit") + + plt.show() + + +def plot_loss(err): + """ + Plot training loss curve + """ + + plt.plot(err) + + plt.xlabel("Iterations") + plt.ylabel("Loss") + + plt.title("Training Loss Curve") + + plt.show() + + def main(): """Driver function""" data = collect_dataset() @@ -133,7 +178,11 @@ def main(): data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) data_y = data[:, -1].astype(float) - theta = run_linear_regression(data_x, data_y) + theta, err = run_linear_regression(data_x, data_y) + + plot_regression(data_x, data_y, theta) + plot_loss(err) + len_result = theta.shape[1] print("Resultant Feature vector : ") for i in range(len_result):