import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

Gradient function for z = <AddBackward0 object at 0x7f7a86429b10>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x7f7a86429b50>

tensor([[0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046]])
tensor([0.1212, 0.1953, 0.3046])

True
False

False

First call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])

Second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])

Call after zeroing gradients
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])


import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)


print('Gradient function for z =',z.grad_fn)
print('Gradient function for loss =', loss.grad_fn)

Gradient function for z = <AddBackward0 object at 0x7f7a86429b10>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x7f7a86429b50>


loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046],
        [0.1212, 0.1953, 0.3046]])
tensor([0.1212, 0.1953, 0.3046])


z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


inp = torch.eye(5, requires_grad=True)
out = (inp+1).pow(2)
out.backward(torch.ones_like(inp), retain_graph=True)
print("First call\n", inp.grad)
out.backward(torch.ones_like(inp), retain_graph=True)
print("\nSecond call\n", inp.grad)
inp.grad.zero_()
out.backward(torch.ones_like(inp), retain_graph=True)
print("\nCall after zeroing gradients\n", inp.grad)

First call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])

Second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])

Call after zeroing gradients
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])

「PyTorch入門 5. 自動微分」¶

Automatic Differentiation with `torch.autograd`¶

テンソル、関数、計算グラフの関係¶

勾配の計算¶

勾配計算をしない方法¶

計算グラフについて補足¶

補注：テンソルに対する勾配とヤコビ行列¶

さらなる詳細¶

「PyTorch入門 5. 自動微分」¶

Automatic Differentiation with torch.autograd¶

テンソル、関数、計算グラフの関係¶

勾配の計算¶

勾配計算をしない方法¶

計算グラフについて補足¶

補注：テンソルに対する勾配とヤコビ行列¶

さらなる詳細¶

Automatic Differentiation with `torch.autograd`¶