from pathlib import Path
import requests


import pickle

with open("data/mnist/mnist.pkl", "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f)


from matplotlib import pyplot
import numpy as np

pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
pyplot.savefig("m.png")
pyplot.close()
print(x_train.shape)

torch.Size([60000, 784])


import torch

x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
x_train, x_train.shape, y_train.min(), y_train.max()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) tensor([5, 0, 4,  ..., 5, 6, 8])
torch.Size([60000, 784])
tensor(0) tensor(9)

/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  after removing the cwd from sys.path.


import math

weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)


def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):
    return log_softmax(xb @ weights + bias)


bs = 64  # batch size

xb = x_train[0:bs]  # a mini-batch from x
preds = model(xb)  # predictions
preds[0], preds.shape
print(preds[0], preds.shape)

tensor([-1.9488, -2.2116, -2.4457, -2.4929, -2.6134, -2.4335, -2.5554, -2.0149,
        -2.1891, -2.3574], grad_fn=<SelectBackward>) torch.Size([64, 10])


def nll(input, target):
    return -input[range(target.shape[0]), target].mean()

loss_func = nll


yb = y_train[0:bs]
print(loss_func(preds, yb))

tensor(2.3800, grad_fn=<NegBackward>)


def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()


print(accuracy(preds, yb))

tensor(0.0156)


from IPython.core.debugger import set_trace

lr = 0.05  # learning rate
epochs = 10  # how many epochs to train for

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        # set_trace()
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()
            bias.grad.zero_()


print(loss_func(model(xb), yb), accuracy(model(xb), yb))

tensor(0.1047, grad_fn=<NegBackward>) tensor(0.9688)


import torch.nn.functional as F

loss_func = F.cross_entropy

def model(xb):
    return xb @ weights + bias


print(loss_func(model(xb), yb), accuracy(model(xb), yb))

tensor(0.1047, grad_fn=<NllLossBackward>) tensor(0.9688)


from torch import nn

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        self.bias = nn.Parameter(torch.zeros(10))

    def forward(self, xb):
        return xb @ self.weights + self.bias


model = Mnist_Logistic()


print(loss_func(model(xb), yb))

tensor(2.3403, grad_fn=<NllLossBackward>)


def fit():
    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p -= p.grad * lr
                model.zero_grad()

fit()


print(loss_func(model(xb), yb))

tensor(0.1034, grad_fn=<NllLossBackward>)


class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(784, 10)

    def forward(self, xb):
        return self.lin(xb)


model = Mnist_Logistic()
print(loss_func(model(xb), yb))

tensor(2.3120, grad_fn=<NllLossBackward>)


fit()

print(loss_func(model(xb), yb))

tensor(0.1047, grad_fn=<NllLossBackward>)


from torch import optim


def get_model():
    model = Mnist_Logistic()
    return model, optim.SGD(model.parameters(), lr=lr)

model, opt = get_model()
print(loss_func(model(xb), yb))

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

print(loss_func(model(xb), yb))

tensor(2.3339, grad_fn=<NllLossBackward>)
tensor(0.1042, grad_fn=<NllLossBackward>)


from torch.utils.data import TensorDataset


train_ds = TensorDataset(x_train, y_train)


model, opt = get_model()

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        xb, yb = train_ds[i * bs: i * bs + bs]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

print(loss_func(model(xb), yb))

tensor(0.1058, grad_fn=<NllLossBackward>)


from torch.utils.data import DataLoader

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs)


model, opt = get_model()

for epoch in range(epochs):
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

print(loss_func(model(xb), yb))

tensor(0.1037, grad_fn=<NllLossBackward>)


train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)


model, opt = get_model()

for epoch in range(epochs):
    model.train()
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

    model.eval()
    with torch.no_grad():
        valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)

    print(epoch, valid_loss / len(valid_dl))

0 tensor(0.4012)
1 tensor(0.3518)
2 tensor(0.3314)
3 tensor(0.3178)
4 tensor(0.3091)
5 tensor(0.3033)
6 tensor(0.2984)
7 tensor(0.2930)
8 tensor(0.2904)
9 tensor(0.2871)


def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)


import numpy as np

def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss)


def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )


train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 0.4006433832327525
1 0.35207522134780883
2 0.33008397494951885
3 0.3180934603055318
4 0.30892171427408854
5 0.3026645806789398
6 0.2976059865474701
7 0.29389627084732056
8 0.28993876514434813
9 0.28666494232813516


class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)

    def forward(self, xb):
        xb = xb.view(-1, 1, 28, 28)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.avg_pool2d(xb, 4)
        return xb.view(-1, xb.size(1))

lr = 0.1


model = Mnist_CNN()
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 0.3310419024705887
1 0.23648772403001786
2 0.1919250955402851
3 0.16623398226102193
4 0.15438730241060256
5 0.1386402908841769
6 0.1365909076154232
7 0.1354450833171606
8 0.12664987264076868
9 0.11645612166921297


class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)


def preprocess(x):
    return x.view(-1, 1, 28, 28)


model = nn.Sequential(
    Lambda(preprocess),
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AvgPool2d(4),
    Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 0.3585416913350423
1 0.24378163089354832
2 0.19601463597814242
3 0.1873031108180682
4 0.14833631729682287
5 0.142175222158432
6 0.1318467699488004
7 0.11871440504491329
8 0.11276339552104474
9 0.11457007641196251


# 日本語訳注：最新のnn.Flatten()を使用した場合

model = nn.Sequential(
    Lambda(preprocess),  # これはnn.Flatten()では対応できないので、このまま
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AvgPool2d(4),
    # Lambda(lambda x: x.view(x.size(0), -1)),
    nn.Flatten()  # 上記Lambdaの代わり
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 0.31753584813276925
1 0.21110932911237082
2 0.22918362423578897
3 0.17362791807850203
4 0.1558101560652256
5 0.15648633526563643
6 0.14269687856038413
7 0.10920118572115899
8 0.10257465073764324
9 0.11280524482528369


def preprocess(x, y):
    return x.view(-1, 1, 28, 28), y


class WrappedDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        batches = iter(self.dl)
        for b in batches:
            yield (self.func(*b))

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)


model = nn.Sequential(
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)



import time
tm = time.time()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

print("computation time:", time.time() - tm)


print(torch.cuda.is_available())

True


dev = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")


def preprocess(x, y):
    return x.view(-1, 1, 28, 28).to(dev), y.to(dev)


train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)


model.to(dev)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)



tm = time.time()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
print("computation time:", time.time() - tm)

「torch.nnを用いたディープラーニングモデルの実装方法」¶

MNISTデータの用意¶

(torch.nnなしで）ニューラルネットワークを構築¶

torch.nn.functionalを利用¶

nn.Moduleを使用したコードのリファクタリング¶

nn.Linearを使用したリファクタリング¶

optimを使用したリファクタリング¶

Datasetを利用したリファクタリング¶

DataLoaderを用いたリファクタリング¶

バリデーションを追加する¶

fit関数とget_data関数の実装¶

CNNモデルへの変更¶

nn.Sequentialの利用¶

DataLoaderのラップ¶

GPUの利用¶

まとめ¶