import numpy as np
import matplotlib
import matplotlib.pyplot as plt
First, let us define some data, expressing the number of daily cigarets smoked and the age of death for 30 persons:
daily_cig_smoked = np.array([36, 14, 18, 35, 14, 33, 2, 6, 14, 29, 21, 30, 0, 27, 16, 16, 29,
2, 23, 16, 18, 14, 4, 32, 30, 25, 14, 20, 25, 21])
age_of_death = np.array([69.6, 92.4, 75.7, 68. , 77.1, 69.6, 94.1, 91.3, 87.1, 78.4, 75.3,
66.5, 88.1, 77.2, 76.9, 86.7, 70.5, 94.2, 74.5, 71.1, 78.7, 78.6,
89.4, 65.5, 69.2, 78. , 80.3, 77.4, 78.1, 74.9])
Just to have a nicer visualization of the data in table format, we will import the pandas library here. (There is no need to understand the content of the following cell; it is just for displaying our data in a nice table form):
import pandas
pandas.DataFrame(np.vstack((daily_cig_smoked, age_of_death)).T, columns=("daily cigarets","age of death"))
Now, let us plot our data points:
plt.plot(daily_cig_smoked, age_of_death, "o")
Let us define our model: a linear model with parameters thetas_0 and thetas_1:$$age = \theta_0 + \theta_1\times cig$$
def model(x, thetas):
thetas_0, thetas_1 = thetas
return thetas_0 + thetas_1 * x
let us also define the loss (ie. the Mean Squared Distance): $$ loss(\vec{\theta}) = \frac{1}{N}\cdot\sum_i (model(\vec{\theta}, x_i)-y_i)^2$$
def compute_loss(thetas, input_values, output_values):
loss = np.mean( (output_values - model(input_values, thetas))**2 )
return loss
Let us try to compute the loss and visualize this model for some random thetas:
thetas = np.random.randn(2)
print("The current loss is:", compute_loss(thetas, daily_cig_smoked, age_of_death))
x_values = np.linspace(0,40)
plt.plot(daily_cig_smoked, age_of_death, "o")
plt.plot(x_values, model(x_values, thetas))
We can now define the gradient:$$\frac{2}{N}\cdot\sum_i (\theta_0+ \theta_1 \times x_i-y_i)$$ $$\frac{2}{N}\cdot\sum_i x_i\times(\theta_0+ \theta_1 \times x_i-y_i)$$
In practice, we write $$diff_i=\theta_0+ \theta_1 \times x_i-y_i$$ and can then express the gradient as: $$\frac{2}{N}\cdot\sum_i diff_i$$ $$\frac{2}{N}\cdot\sum_i x_i\times diff_i$$
def gradient_loss(thetas, input_values, output_values):
diff = (output_values - model(input_values, thetas))
grad_theta_0 = np.mean( -2*diff )
grad_theta_1 = np.mean( -2*input_values*diff)
return np.array([grad_theta_0, grad_theta_1])
We start by initializing our two parameters thetas[0] and thetas[1]
thetas = np.random.randn(2)
We can now observe the first few iterations of gradient descent by repeatedly pressing Ctrl+Enter in the following cell:
axes = plt.gca()
#axes.set_ylim([50,100])
plt.plot(daily_cig_smoked, age_of_death, "o")
plt.plot(x_values, model(x_values, thetas))
print("loss:", compute_loss(thetas, daily_cig_smoked, age_of_death))
thetas = thetas -0.002*gradient_loss(thetas, daily_cig_smoked, age_of_death)
In this specific case, the gradient descent is quite slow, so let us execute a lot of iterations (10000) at once:
for num_iter in range(10000):
thetas = thetas -0.002*gradient_loss(thetas, daily_cig_smoked, age_of_death)
plt.plot(daily_cig_smoked, age_of_death, "o")
plt.plot(x_values, model(x_values, thetas))
print("loss:", compute_loss(thetas, daily_cig_smoked, age_of_death))
print("Final values of thetas:", thetas[0], thetas[1])
print("If I smoke 10 cigarets a day, the model predict that I will die at", model(10, thetas))
We could actually have obtained a similar result by using a linear regression function from a library. Eg. scipy
from scipy import stats
stats.linregress(daily_cig_smoked, age_of_death)