1.准备数据
import numpy as npimport matplotlib.pyplot as plt# 准备数据plot_x = np.linspace(-1., 6., 141)plot_x
array([-1. , -0.95, -0.9 , -0.85, -0.8 , -0.75, -0.7 , -0.65, -0.6 , -0.55, -0.5 , -0.45, -0.4 , -0.35, -0.3 , -0.25, -0.2 , -0.15, -0.1 , -0.05, 0. , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. , 1.05, 1.1 , 1.15, 1.2 , 1.25, 1.3 , 1.35, 1.4 , 1.45, 1.5 , 1.55, 1.6 , 1.65, 1.7 , 1.75, 1.8 , 1.85, 1.9 , 1.95, 2. , 2.05, 2.1 , 2.15, 2.2 , 2.25, 2.3 , 2.35, 2.4 , 2.45, 2.5 , 2.55, 2.6 , 2.65, 2.7 , 2.75, 2.8 , 2.85, 2.9 , 2.95, 3. , 3.05, 3.1 , 3.15, 3.2 , 3.25, 3.3 , 3.35, 3.4 , 3.45, 3.5 , 3.55, 3.6 , 3.65, 3.7 , 3.75, 3.8 , 3.85, 3.9 , 3.95, 4. , 4.05, 4.1 , 4.15, 4.2 , 4.25, 4.3 , 4.35, 4.4 , 4.45, 4.5 , 4.55, 4.6 , 4.65, 4.7 , 4.75, 4.8 , 4.85, 4.9 , 4.95, 5. , 5.05, 5.1 , 5.15, 5.2 , 5.25, 5.3 , 5.35, 5.4 , 5.45, 5.5 , 5.55, 5.6 , 5.65, 5.7 , 5.75, 5.8 , 5.85, 5.9 , 5.95, 6. ])
2.可视化
plot_y = (plot_x-2.5)**2 - 1.plt.plot(plot_x, plot_y)plt.show()

3.迭代过程
epsilon = 1e-8eta = 0.1def J(theta): return (theta-2.5)**2 - 1.def dJ(theta): return 2*(theta-2.5)theta = 0.0while True: gradient = dJ(theta) last_theta = theta theta = theta - eta * gradient if(abs(J(theta) - J(last_theta)) < epsilon): breakprint(theta) # 2.499891109642585print(J(theta)) # -0.99999998814289
theta = 0.0theta_history = [theta]while True: gradient = dJ(theta) last_theta = theta theta = theta - eta * gradient theta_history.append(theta) if(abs(J(theta) - J(last_theta)) < epsilon): breakplt.plot(plot_x, J(plot_x))plt.plot(np.array(theta_history), J(np.array(theta_history)), color="r", marker='+')plt.show()

len(theta_history) # 46
4.封装代码
theta_history = []def gradient_descent(initial_theta, eta, epsilon=1e-8): theta = initial_theta theta_history.append(initial_theta) while True: gradient = dJ(theta) last_theta = theta theta = theta - eta * gradient theta_history.append(theta) if(abs(J(theta) - J(last_theta)) < epsilon): breakdef plot_theta_history(): plt.plot(plot_x, J(plot_x)) plt.plot(np.array(theta_history), J(np.array(theta_history)), color="r", marker='+') plt.show()
eta = 0.01theta_history = []gradient_descent(0, eta)plot_theta_history()

len(theta_history) # 424
eta = 0.001theta_history = []gradient_descent(0, eta)plot_theta_history()

len(theta_history) # 3682
5.步伐略大
eta = 0.8theta_history = []gradient_descent(0, eta)plot_theta_history()

6.步伐太大
eta = 1.1theta_history = []gradient_descent(0, eta) # OverflowError: (34, 'Result too large')
def J(theta): try: return (theta-2.5)**2 - 1. except: return float('inf')def gradient_descent(initial_theta, eta, n_iters = 1e4, epsilon=1e-8): theta = initial_theta i_iter = 0 theta_history.append(initial_theta) while i_iter < n_iters: gradient = dJ(theta) last_theta = theta theta = theta - eta * gradient theta_history.append(theta) if(abs(J(theta) - J(last_theta)) < epsilon): break i_iter += 1 return
eta = 1.1theta_history = []gradient_descent(0, eta)len(theta_history) # 10001
eta = 1.1theta_history = []gradient_descent(0, eta, n_iters=10)plot_theta_history()
