PyTorch学习笔记之Tensors
PyTorch Tensors are just like numpy arrays, but they can run on GPU.No built-in notion of computational graph, or gradients, or deep learning.Here we fit a two-layer net using PyTorch Tensors:
1 import torch 2 3 dtype = torch.FloatTensor 4 5 # step 1: create random tensors for data and weights 6 N, D_in, H, D_out = 64, 1000, 100, 10 7 x = torch.randn(N, D_in).type(dtype) 8 # print(x) # [torch.FloatTensor of size 64x1000] 9 y = torch.randn(N, D_out).type(dtype) 10 # print(y) # [torch.FloatTensor of size 64x10] 11 w1 = torch.randn(D_in, H).type(dtype) 12 # print(w1) # [torch.FloatTensor of size 1000x100] 13 w2 = torch.randn(H, D_out).type(dtype) 14 # print(w2) # [torch.FloatTensor of size 100x10]
step 2
1 learning_rate = 1e-6 2 for t in range(1): 3 # step 2: Forward pass: compute predictions and loss 4 h = x.mm(w1) # mm is which function ? 5 # print(h) # [torch.FloatTensor of size 64x100] 6 # print(x.mul(w1)) # RuntimeError: inconsistent tensor size 7 h_relu = h.clamp(min=0) # clamp(myValue, min, max) 8 # print(h_relu) # [torch.FloatTensor of size 64x100] 9 y_pred = h_relu.mm(w2) 10 # print(y_pred) # [torch.FloatTensor of size 64x10] 11 loss = (y_pred - y).pow(2).sum() 12 # print((y_pred - y).pow(2)) # pow() 方法返回 xy(x的y次方) 的值。 13 # print(loss) # 30832366.024527483 14 15 16 # define function clamp 17 # def clamp(minvalue, value, maxvalue): 18 # return max(minvalue, min(value, maxvalue)) 19 ''' h 20 6.2160e+00 -1.0304e+01 -2.1468e+01 ... 1.9651e+01 1.7158e+01 1.3336e+01 21 5.8056e+01 2.6900e+01 2.2681e+01 ... -3.0021e+01 -4.7533e+01 3.7371e+01 22 -1.6430e+01 -4.1532e+01 2.7384e+01 ... -3.2225e+01 -1.9597e+01 5.8636e+01 23 ... ⋱ ... 24 9.2964e+00 6.5791e+01 1.8076e+01 ... 2.4620e+01 2.3355e+01 4.4987e-01 25 3.7563e+01 -2.6666e+01 3.5643e+01 ... 3.0626e+01 3.0002e+01 -1.3277e+01 26 -4.2287e+01 3.3466e+01 3.8845e+01 ... 2.1715e+01 -3.3691e+01 -2.5290e+01 27 [torch.FloatTensor of size 64x100] 28 29 h_relu 30 6.2160 0.0000 0.0000 ... 19.6511 17.1578 13.3358 31 58.0565 26.8997 22.6810 ... 0.0000 0.0000 37.3708 32 0.0000 0.0000 27.3841 ... 0.0000 0.0000 58.6358 33 ... ⋱ ... 34 9.2964 65.7915 18.0760 ... 24.6199 23.3550 0.4499 35 37.5627 0.0000 35.6430 ... 30.6257 30.0016 0.0000 36 0.0000 33.4656 38.8449 ... 21.7154 0.0000 0.0000 37 [torch.FloatTensor of size 64x100] 38 '''
step 3
1 for t in range(500): 2 # step 2: Forward pass: compute predictions and loss 3 h = x.mm(w1) # [torch.FloatTensor of size 64x100] 4 h_relu = h.clamp(min=0) # clamp(myValue, min, max) 5 # h_relu [torch.FloatTensor of size 64x100] 6 y_pred = h_relu.mm(w2) # # [torch.FloatTensor of size 64x10] 7 loss = (y_pred - y).pow(2).sum() # 30832366.024527483 8 9 # step 3: Backward pass: manually compute gradients 10 grad_y_pred = 2.0 * (y_pred - y) # [torch.FloatTensor of size 64x10] 11 grad_w2 = h_relu.t().mm(grad_y_pred) # .t()转置 12 grad_h_relu = grad_y_pred.mm(w2.t()) # [torch.FloatTensor of size 64x100] 13 grad_h = grad_h_relu.clone() # the same as 14 grad_h[h < 0] = 0 15 grad_w1 = x.t().mm(grad_h) # [torch.FloatTensor of size 1000x100] 16 17 # print(h_relu) 18 # print(h_relu.t()) 19 ''' 20 0.0000 14.8044 0.0000 ... 0.0000 38.3654 0.0000 21 21.3853 0.0000 27.1789 ... 14.8747 14.6064 0.0000 22 33.8491 0.0000 0.0000 ... 26.2651 11.5845 0.0000 23 ... ⋱ ... 24 11.2708 0.0000 0.0000 ... 0.0000 4.2082 0.0000 25 0.0000 0.0000 0.0000 ... 2.6930 5.6134 47.2977 26 0.0000 37.3445 0.0000 ... 31.3511 0.0000 64.6182 27 [torch.FloatTensor of size 64x100] 28 29 30 0.0000 21.3853 33.8491 ... 11.2708 0.0000 0.0000 31 14.8044 0.0000 0.0000 ... 0.0000 0.0000 37.3445 32 0.0000 27.1789 0.0000 ... 0.0000 0.0000 0.0000 33 ... ⋱ ... 34 0.0000 14.8747 26.2651 ... 0.0000 2.6930 31.3511 35 38.3654 14.6064 11.5845 ... 4.2082 5.6134 0.0000 36 0.0000 0.0000 0.0000 ... 0.0000 47.2977 64.6182 37 [torch.FloatTensor of size 100x64] 38 ''' 39 40 # print(grad_h) 41 # grad_h[h < 0] = 0 42 # print(grad_h) 43 ''' 44 -3.9989e+02 -9.3610e+02 -3.9592e+02 ... -1.0868e+03 6.9429e+02 3.3026e+02 45 9.4933e+02 1.2244e+03 2.4054e+02 ... 9.1655e+02 1.3783e+03 2.2368e+02 46 4.1473e+03 3.6368e+03 -3.2277e+02 ... 2.9705e+02 3.9689e+03 1.0691e+03 47 ... ⋱ ... 48 1.2205e+03 -4.0321e+02 8.4314e+02 ... 1.0697e+03 1.0149e+02 -4.6613e+02 49 6.0660e+02 5.5411e+02 2.0111e+03 ... -7.9235e+02 7.9334e+02 -9.1837e+01 50 1.3468e+03 2.4743e+03 -3.9460e+02 ... 1.1505e+03 1.5951e+03 7.3752e+02 51 [torch.FloatTensor of size 64x100] 52 53 54 0.0000 0.0000 -395.9182 ... -1086.8199 0.0000 0.0000 55 949.3327 0.0000 240.5419 ... 0.0000 0.0000 223.6831 56 4147.3193 0.0000 0.0000 ... 297.0452 3968.9290 0.0000 57 ... ⋱ ... 58 1220.4922 0.0000 843.1447 ... 1069.6855 101.4936 0.0000 59 0.0000 554.1067 2011.1219 ... -792.3494 0.0000 -91.8371 60 1346.8444 2474.3076 0.0000 ... 0.0000 1595.0582 737.5197 61 [torch.FloatTensor of size 64x100] 62 '''
step 4
1 # step 4: Gradient descent step on weights 2 w1 -= learning_rate * grad_w1 # [torch.FloatTensor of size 1000x100] 3 w2 -= learning_rate * grad_w2 # [torch.FloatTensor of size 100x10]