Coursera 机器学习课程2023版笔记(第一部分)

这本笔记的主题:

线性回归

本文所用的数据来源于CUHK(SZ) MAT2041 22 fall hw05 的第一题,前两个值(x_tain,y_train)是输入,最后一个值(z_train)是输出

第一部分:初始设置

1
2
3
4
5
import numpy as np
import matplotlib.pyplot as plt
x_train = np.array([7.0,18.0,3.0])
y_train = np.array([10.0,3.0,10.0])
z_train = np.array([30.0,40.0,20.0])

这几个点在图上表示为:

1
2
3
4
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(x_train, y_train, z_train, marker="x", c='r')
plt.show()

第二部分:线性回归模型

这个使用的是z = w1x1+w2x2+b

损失函数:J(w1,w2,b)=(1/2m)sum(fm,b(x,y,z)-z_real)^2

1
2
3
4
5
6
7
def compute_cost(w_1, w_2, b):
global x_train, y_train, z_train, alpha
m = len(x_train) #数据量
summ = 0
for i in range(m):
summ += (w_1*x_train[i]+w_2*y_train[i]+b-z_train[i])**2
return (1/(2*m))*summ

建立模型的偏导函数 dJ/dw1=1/m求和x1(w1x1+w2x2+b-y), dJ/dw2=1/m求和x2(w1x1+w2x2+b-y) ,dJ/db=1/m求和(w1x1+w2x2+b-y)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def partial_w_1(w_1, w_2, b):
global x_train, y_train, z_train, alpha
m = len(x_train) #数据量
summ = 0
for i in range(m):
summ += x_train[i]*(w_1*x_train[i]+w_2*y_train[i]+b-z_train[i])
return (1/m)*summ

def partial_w_2(w_1, w_2, b): #可以用decorator来简化,但是这里先不整活
global x_train, y_train, z_train, alpha
m = len(x_train) #数据量
summ = 0
for i in range(m):
summ += y_train[i]*(w_1*x_train[i]+w_2*y_train[i]+b-z_train[i])
return (1/m)*summ

def partial_b(w_1, w_2, b): #可以用decorator来简化,但是这里先不整活
global x_train, y_train, z_train, alpha
m = len(x_train) #数据量
summ = 0
for i in range(m):
summ += w_1*x_train[i]+w_2*y_train[i]+b-z_train[i]
return (1/m)*summ

def calculate_gradient(w_1, w_2, b):
global x_train, y_train, z_train, alpha
return partial_w_1(w_1, w_2, b), partial_w_2(w_1, w_2, b), partial_b(w_1, w_2, b)

梯度下降

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def gradient_desent(w_1, w_2, b):
global x_train, y_train, z_train, alpha
dJ_dw1, dJ_dw2, dJ_db = calculate_gradient(w_1,w_2,b)
w_1_new = w_1 - alpha*dJ_dw1
w_2_new = w_2 - alpha*dJ_dw2
b_new = b - alpha*dJ_db
w_1 = w_1_new
w_2 = w_2_new
b = b_new
cost = compute_cost(w_1, w_2, b)
return w_1, w_2, b, cost
## main function ##
for i in range(num_iter):
w_1, w_2, b, now_cost = gradient_desent(w_1, w_2, b)
cost.append(now_cost)
## 结果 ##
print(f"w1:{w_1},w2:{w_2},b:{b}")
print(f"model:z ={w_1}x+{w_2}y+{b}")
cost = np.array(cost)
plt.plot(range(1,num_iter+1),cost,marker = 'x')
plt.show()

结果可视化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
fig = plt.figure(figsize=(12, 8), facecolor='lightyellow')
# 创建 3D 坐标系
ax = fig.add_subplot(projection='3d')
ax.scatter(x_train, y_train, z_train, marker="x", c='r')
x = np.linspace(0, 50, 50)
y = np.linspace(0, 50, 50)
X, Y = np.meshgrid(x, y)
# 绘制 3D 图形
ax.plot_surface(X, Y, Z=w_1*X+w_2*y+b, color='g', alpha=0.6)
ax.set(xlabel='X', ylabel='Y', zlabel='Z', xlim=(0, 50), ylim=(0, 50), zlim=(0, 50))

# 调整视角
ax.view_init(elev=30, azim=75) # 仰角与方位角

plt.show()

数据验证:x = 10, y = 8。 按照MAT2041 HW05 solution,z 应约为 31.9

1
2
3
x_test  = 10
y_test = 8
print(f"z = {w_1*x_test+w_2*y_test+b}")