线性回归

基本

模型
训练
数据
损失函数
优化

xxxxxxxxxx
md"# 线性回归

19.3 μs

$y = X w + b$

xxxxxxxxxx
md"$y = Xw + b$"

4.5 μs

根据误差，利用梯度对参数 $W$ 和 $b$ 进行更新。

$W = W - \frac{l r}{b a t c h s i z e} \frac{\partial L (W, b)}{\partial W}$

其中 $b a t c h s i z e$ 是每批数据训练大小， $l r$ 是学习率。

xxxxxxxxxx
md"

39.9 μs

例子

xxxxxxxxxx
md"**例子**"

20.8 μs

xxxxxxxxxx
 
begin
    using Flux
    using Flux: @epochs
    using DataFrames
    using Gadfly
    using LinearAlgebra
    using Random
end

63 s

xxxxxxxxxx
 
begin
    #prepare data
    Random.seed!(123)
    num_inputs = 2
    num_inputs = 2
    num_examples = 1000
    True_w = [2.5, 3.0]
    True_b = 4.2
    features = randn(Float64, (num_examples, num_inputs))
    labels = features * True_w .+ True_b
    labels += randn(size(labels)) * 0.05
    regData = []
    for i in 1:num_examples
        push!(regData, (features[i,:], labels[i]))
    end
​
end

371 ms

1×1000 Array{Float64,2}:
 1.1902678809862768  2.04817970778924  1.142650902867199  …  -0.5439527567544324

xxxxxxxxxx
 
x1 = reshape(features[:,1], (1, 1000))

31.6 ms

1×1000 Array{Float64,2}:
 -1.6203711972374109  0.11647271641359806  …  2.3396203656224523  -0.6175729562392942

xxxxxxxxxx
 
x2 = reshape(features[:, 2], (1, 1000))

11.9 μs

Chain(Dense(2, 1))

xxxxxxxxxx
 
#define Model
m = Chain(Dense(2,1))

336 ms

loss (generic function with 1 method)

xxxxxxxxxx
 
#define Loss function
loss(x,y) = Flux.mse(m(x), y)

18.3 μs

opt

Descenteta

0.3

xxxxxxxxxx
 
#opt
opt = Descent(0.3)
​

2.2 ms

xxxxxxxxxx
 
@epochs 100 Flux.train!(loss, Flux.params(m), regData, opt)

19.8 s

1×1000 Array{Float32,2}:
 1.9709918  9.5093975  6.4021225  4.3541403  …  5.4332256  7.535674  0.97789097

xxxxxxxxxx
 
m([x1; x2])

1 s

Float641

2.33767

9.65501

6.43607

4.51849

1.27157

3.84774

6.45349

7.62538

5.6466

-1.18204

3.31636

-1.60468

-3.28649

5.98303

9.10764

6.90287

-0.431712

-1.41984

0.380435

6.49265

-0.838661

6.04654

3.92285

-2.15864

8.89678

6.29154

0.876283

5.51073

6.70167

14.8557

0.748078

9.53377

4.27246

11.9367

7.3114

1.82663

-0.261888

7.30467

1.82493

7.43576

991

6.25487

992

7.13825

993

2.1925

994

8.03754

995

2.67286

996

11.7558

997

0.0161424

998

5.57802

999

6.97309

1000

0.978814

xxxxxxxxxx
 
labels

760 ns

xxxxxxxxxx
 
plot(x=m([x1;x2]), y=labels, Guide.xlabel("Predict Value"), Guide.ylabel("reality value"), Guide.title("Pre vs Real"))

456 ms

详细步骤：👇

batch_size = 10
#init w, b
w = rand(2)
b = rand(1)

#learning rate
lr = 0.3

#epoch
epoch = 3

linreg(X, w, b) = X*w .+ b
#define Loss
function Loss(ŷ, y)
	sum((ŷ - y) .^ 2)
end

function train_model(batch_size, features, labels, w, b, lr, epoch)
	dt_row, dt_col = size(features)
	indices = 1:dt_row
	Rond_indices = Random.shuffle(indices)
	for j in 1:epoch
		for i in range(1, dt_row, step = batch_size)
			j = Rond_indices[i:min(i+batch_size, dt_row)]
			X = features[j,:]
			y = labels[j,:]
			L = Loss(linreg(X, w, b), y)
			gs = gradient(() -> Loss(linreg(X, w, b), y), Flux.params(w, b))
			w -= lr/batch_size * gs[w]
			b -= lr/batch_size * gs[b]
		end
	end
	return w,b

end

ŵ, b̂ = train_model(batch_size, features, labels, w, b, lr, epoch)

pre_y = features * ŵ .+ b̂

print(pre_y)

xxxxxxxxxx
md"

8.7 μs