用Numpy实现线性回归
现在二维平面上有一系列点point,我们要找到一个一次函数$y=wx b$,使得所有点到这条直线的距离平方和$sum(wx b-y)^2$最小
因此我们可以定义损失函数$loss = (wx b-y)^2$,计算损失的代码如下:
代码语言:javascript复制# compute loss
def compute_error_for_line_given_points(b, w, points):
totalError = 0
for i in range(len(points)):
x = points[i, 0]
y = points[i, 1]
totalError = (y - (w * x b)) ** 2
return totalError / float(len(points)) # average
然后用梯度下降法更新$w$和$b$
$w' = w - lr*frac{partial loss}{partial w}$,$b' = b - lr*frac{partial loss}{partial b}$,其中 $frac{partial loss}{partial w} = 2 * x * (wx b - y)$,$frac{partial loss}{partial b} = 2 * (wx b - y)$
代码语言:javascript复制# compute gradient
def step_gradient(b_current, w_current, points, learningRate):
b_gradient = 0
w_gradient = 0
N = float(len(points))
for i in range(len(points)):
x = points[i, 0]
y = points[i, 1]
b_gradient = 2 * ((w_current * x) b_current - y)
w_gradient = 2 * x * ((w_current * x) b_current - y)
b_gradient = b_gradient / N
w_gradient = w_gradient / N
new_b = b_current - (learningRate * b_gradient)
new_w = w_current - (learningRate * w_gradient)
return [new_b, new_w]
最后只要设定迭代次数,不断的重复更新$w$和$b$就行了
代码语言:javascript复制def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations): # num_iteration 迭代次数
b = starting_b
w = starting_w
for i in range(num_iterations):
b, w = step_gradient(b, w, np.array(points), learning_rate)
return [b, w]
主函数
代码语言:javascript复制def run():
points = np.genfromtxt("data.txt", delimiter=",")
learning_rate = 0.0001
initial_b = random()
initial_w = random()
num_iterations = 1000
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
compute_error_for_line_given_points(initial_b, initial_w, points)))
print("Running...")
[b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations at b = {1}, w = {2}, error = {3}"
.format(num_iterations, b, w,
compute_error_for_line_given_points(b, w, points)))
run()
data.txt数据文件下载