# Penn-Fudanデータセットをダウンロード
wget vrl.sys.wakayama-u.ac.jp/class/pytorch_tutorial/datasets/PennFudanPed.zip .
# カレント・フォルダにzipファイルを解凍
unzip PennFudanPed.zip

--2020-12-10 06:01:30--  https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip
Resolving www.cis.upenn.edu (www.cis.upenn.edu)... 158.130.69.163, 2607:f470:8:64:5ea5::d
Connecting to www.cis.upenn.edu (www.cis.upenn.edu)|158.130.69.163|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 53723336 (51M) [application/zip]
Saving to: ‘PennFudanPed.zip’

PennFudanPed.zip    100%[===================>]  51.23M  31.5MB/s    in 1.6s    

2020-12-10 06:01:31 (31.5 MB/s) - ‘PennFudanPed.zip’ saved [53723336/53723336]

--2020-12-10 06:01:31--  http://./
Resolving . (.)... failed: No address associated with hostname.
wget: unable to resolve host address ‘.’
FINISHED --2020-12-10 06:01:31--
Total wall clock time: 1.9s
Downloaded: 1 files, 51M in 1.6s (31.5 MB/s)
Archive:  PennFudanPed.zip
   creating: PennFudanPed/
  inflating: PennFudanPed/added-object-list.txt  
   creating: PennFudanPed/Annotation/
  inflating: PennFudanPed/Annotation/FudanPed00001.txt  
  inflating: PennFudanPed/Annotation/FudanPed00002.txt  
  inflating: PennFudanPed/Annotation/FudanPed00003.txt  
  ...
  inflating: PennFudanPed/PNGImages/PennPed00095.png  
  inflating: PennFudanPed/PNGImages/PennPed00096.png  
  inflating: PennFudanPed/readme.txt

(<PIL.Image.Image image mode=RGB size=559x536 at 0x7F5F41CB6940>,
 {'area': tensor([35358., 36225.]), 'boxes': tensor([[159., 181., 301., 430.],
          [419., 170., 534., 485.]]), 'image_id': tensor([0]), 'iscrowd': tensor([0, 0]), 'labels': tensor([1, 1]), 'masks': tensor([[[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]],
  
          [[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8)})

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth

HBox(children=(FloatProgress(value=0.0, max=167502836.0), HTML(value='')))

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth

HBox(children=(FloatProgress(value=0.0, max=14212972.0), HTML(value='')))


  
# Penn-Fudanデータセットをダウンロード
wget vrl.sys.wakayama-u.ac.jp/class/pytorch_tutorial/datasets/PennFudanPed.zip .
# カレント・フォルダにzipファイルを解凍
unzip PennFudanPed.zip

--2020-12-10 06:01:30--  https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip
Resolving www.cis.upenn.edu (www.cis.upenn.edu)... 158.130.69.163, 2607:f470:8:64:5ea5::d
Connecting to www.cis.upenn.edu (www.cis.upenn.edu)|158.130.69.163|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 53723336 (51M) [application/zip]
Saving to: ‘PennFudanPed.zip’

PennFudanPed.zip    100%[===================>]  51.23M  31.5MB/s    in 1.6s    

2020-12-10 06:01:31 (31.5 MB/s) - ‘PennFudanPed.zip’ saved [53723336/53723336]

--2020-12-10 06:01:31--  http://./
Resolving . (.)... failed: No address associated with hostname.
wget: unable to resolve host address ‘.’
FINISHED --2020-12-10 06:01:31--
Total wall clock time: 1.9s
Downloaded: 1 files, 51M in 1.6s (31.5 MB/s)
Archive:  PennFudanPed.zip
   creating: PennFudanPed/
  inflating: PennFudanPed/added-object-list.txt  
   creating: PennFudanPed/Annotation/
  inflating: PennFudanPed/Annotation/FudanPed00001.txt  
  inflating: PennFudanPed/Annotation/FudanPed00002.txt  
  inflating: PennFudanPed/Annotation/FudanPed00003.txt  
  ...
  inflating: PennFudanPed/PNGImages/PennPed00095.png  
  inflating: PennFudanPed/PNGImages/PennPed00096.png  
  inflating: PennFudanPed/readme.txt


from PIL import Image
img = Image.open('PennFudanPed/PNGImages/FudanPed00001.png')
img.save('example.png')


mask = Image.open('PennFudanPed/PedMasks/FudanPed00001_mask.png')
# 各マスクのインスタンスは、0からNまでの異なる色を持っています。Nはインスタンスの数です。
# 簡単に可視化するために、マスクにカラーパレットを追加しましょう。
mask.putpalette([
    0, 0, 0, # black background
    255, 0, 0, # index 1 is red
    255, 255, 0, # index 2 is yellow
    255, 153, 0, # index 3 is orange
])
mask.save('example_mask.png')


import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image


class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # 画像の並び方を揃えるために、すべての画像ファイルをロードしてソートします
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # 画像とマスクをロードします
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # 各色は異なるインスタンスに対応しているため、
        # なお、値が0になっているインスタンスは背景となります。

        mask = Image.open(mask_path)
        # マスクに対してはRGBに変換していない点に注意してください。

        # PIL 画像を numpy 配列に変換します
        mask = np.array(mask)
        # インスタンスは異なる色でエンコードされています
        obj_ids = np.unique(mask)
        # 最初のIDは背景なので削除します
        obj_ids = obj_ids[1:]

        # カラー・エンコードされたマスクを、True/Falseで表現されたマスクに変換します
        masks = mask == obj_ids[:, None, None]

        # 各マスクのバウンディングボックスの座標を取得します
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])
        # すべてtorch.Tensorに変換します
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # クラスは今回は1つだけ（人物）です
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # すべてのインスタンスを、iscrowd=0と仮定します
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)


dataset = PennFudanDataset('PennFudanPed/')
dataset[0]

(<PIL.Image.Image image mode=RGB size=559x536 at 0x7F5F41CB6940>,
 {'area': tensor([35358., 36225.]), 'boxes': tensor([[159., 181., 301., 430.],
          [419., 170., 534., 485.]]), 'image_id': tensor([0]), 'iscrowd': tensor([0, 0]), 'labels': tensor([1, 1]), 'masks': tensor([[[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]],
  
          [[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8)})


import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# COCOデータセットで訓練した、訓練済みモデルをロード
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.COCO_V1)
# 既存の分類器を、ユーザーが定義したnum_classesを持つ新しい分類器に置き換えます
num_classes = 2  # 人を示すクラス＋背景クラスで2個
# 分類器にインプットする特徴量の数を取得
in_features = model.roi_heads.box_predictor.cls_score.in_features
# 事前訓練済みのヘッドを新しいものと置き換える
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth

HBox(children=(FloatProgress(value=0.0, max=167502836.0), HTML(value='')))


import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# 分類のために訓練済みモデルをロードし、特徴量のみを取得します。
backbone = torchvision.models.mobilenet_v2(weights=torchvision.models.MobileNet_V2_Weights.IMAGENET1K_V1).features
# FasterRCNNでは、バックボーンで指定された出力チャネル数を知る必要があります。
# mobilenet_v2の場合は1280なので、ここで1280を設定します。
backbone.out_channels = 1280

# RPN：Resion Proposal Networkに、空間ごとに5 x 3パターンのアンカーを生成させてみましょう。
# これは、アンカーに5つのサイズ（size）と、3つのアスペクト比(aspect_ratio)があることを意味します。
# 特徴マップごとに異なるサイズとアスペクト比となる可能性があるので，Tuple[Tuple[int]] という形式で指定します。
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# 関心領域のトリミングを実行するために使用する特徴マップ(featmap_names)と、
# 画像の大きさを元に戻した後のトリミングのサイズ(output_size)を定義しましょう。
# バックボーンがTensorを返す場合、featmap_namesは[0]になっているはずです。
# もう少し一般化して説明すると、バックボーンはOrderedDict[Tensor]を返すことになるので、
# featmap_namesで使用する特徴マップを選択できます。
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

# FasterRCNNモデルに上記に定義したパーツをまとめます。
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth

HBox(children=(FloatProgress(value=0.0, max=14212972.0), HTML(value='')))


import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_instance_segmentation_model(num_classes):
    # COCOデータセットで事前学習したインスタンス・セグメンテーションのモデルをロードします
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=torchvision.models.detection.MaskRCNN_ResNet50_FPN_Weights.COCO_V1)

    # 分類器に入力する特徴量の数を取得します
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # 事前訓練済みのヘッドを新しいヘッドに置き換えます
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # セグメンテーション・マスクの分類器に入力する特徴量の数を取得します
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # セグメテーション・マスクの推論器を新しいものに置き換えます
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model


  

# 上記ファイルを利用するために、TorchVision のリポジトリをreferences/detectionからダウンロードします

wget vrl.sys.wakayama-u.ac.jp/class/pytorch_tutorial/2_2_func.tar
tar -xvf 2_2_func.tar
cp 2_2_func/utils.py .
cp 2_2_func/transforms.py .
cp 2_2_func/coco_eval.py .
cp 2_2_func/engine.py .
cp 2_2_func/coco_utils.py .

Cloning into 'vision'...
remote: Enumerating objects: 82, done.
remote: Counting objects: 100% (82/82), done.
remote: Compressing objects: 100% (70/70), done.
remote: Total 12010 (delta 40), reused 22 (delta 12), pack-reused 11928
Receiving objects: 100% (12010/12010), 13.43 MiB | 22.18 MiB/s, done.
Resolving deltas: 100% (8379/8379), done.
Note: checking out 'v0.3.0'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>

HEAD is now at be37608 version check against PyTorch's CUDA version


from engine import train_one_epoch, evaluate
import utils
import transforms as T


def get_transform(train):
    transforms = []
    # PIL imageをPyTorch Tensorに変換します
    transforms.append(T.ToTensor())
    if train:
        # 訓練中、データオーギュメンテーションとして、
        # 訓練用の画像と正解のバウンディング・ボックスをランダム確率で水平に反転させます
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)


# 作成したカスタム・データセット
dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))

# データセットを訓練セットとテストセットに分割
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# 訓練データと評価データのデータロード用オブジェクトを用意
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# 作成したカスタム・データセットのクラスは、背景と人の2クラスのみです
num_classes = 2

# 補助関数を使って、モデルを取得します
model = get_instance_segmentation_model(num_classes)

# モデルを正しいデバイス(GPU)に移動します
model.to(device)

# オプティマイザを定義します
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# 3エポックごとに学習率が1/10になるように学習率スケジューラを定義します
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth

HBox(children=(FloatProgress(value=0.0, max=178090079.0), HTML(value='')))


# 10エポックで訓練してみましょう
num_epochs = 10

for epoch in range(num_epochs):
    # １エポックの訓練につき、10イテレーションごとにprintします
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # 学習率の更新
    lr_scheduler.step()
    # テストデータセットの評価
    evaluate(model, data_loader_test, device=device)

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py:3103: UserWarning: The default behavior for interpolate/upsample with float scale_factor changed in 1.6.0 to align with other frameworks/libraries, and now uses scale_factor directly, instead of relying on the computed output size. If you wish to restore the old behavior, please set recompute_scale_factor=True. See the documentation of nn.Upsample for details. 
  warnings.warn("The default behavior for interpolate/upsample with float scale_factor changed "

Epoch: [0]  [ 0/60]  eta: 0:01:23  lr: 0.000090  loss: 4.6415 (4.6415)  loss_classifier: 0.7949 (0.7949)  loss_box_reg: 0.3921 (0.3921)  loss_mask: 3.4276 (3.4276)  loss_objectness: 0.0213 (0.0213)  loss_rpn_box_reg: 0.0056 (0.0056)  time: 1.3895  data: 0.4002  max mem: 2302
Epoch: [0]  [10/60]  eta: 0:00:30  lr: 0.000936  loss: 1.7297 (2.2541)  loss_classifier: 0.4499 (0.4679)  loss_box_reg: 0.2963 (0.2845)  loss_mask: 1.0088 (1.4770)  loss_objectness: 0.0213 (0.0200)  loss_rpn_box_reg: 0.0040 (0.0047)  time: 0.6184  data: 0.0422  max mem: 2854
Epoch: [0]  [20/60]  eta: 0:00:23  lr: 0.001783  loss: 0.9483 (1.5313)  loss_classifier: 0.2060 (0.3266)  loss_box_reg: 0.2250 (0.2574)  loss_mask: 0.3999 (0.9175)  loss_objectness: 0.0196 (0.0222)  loss_rpn_box_reg: 0.0035 (0.0076)  time: 0.5381  data: 0.0067  max mem: 2854
Epoch: [0]  [30/60]  eta: 0:00:16  lr: 0.002629  loss: 0.6293 (1.2229)  loss_classifier: 0.1259 (0.2542)  loss_box_reg: 0.2131 (0.2504)  loss_mask: 0.2257 (0.6914)  loss_objectness: 0.0140 (0.0191)  loss_rpn_box_reg: 0.0072 (0.0078)  time: 0.5274  data: 0.0070  max mem: 3220
Epoch: [0]  [40/60]  eta: 0:00:10  lr: 0.003476  loss: 0.4422 (1.0263)  loss_classifier: 0.0551 (0.2056)  loss_box_reg: 0.1811 (0.2333)  loss_mask: 0.1849 (0.5646)  loss_objectness: 0.0066 (0.0156)  loss_rpn_box_reg: 0.0062 (0.0073)  time: 0.5119  data: 0.0070  max mem: 3220
Epoch: [0]  [50/60]  eta: 0:00:05  lr: 0.004323  loss: 0.3780 (0.9008)  loss_classifier: 0.0511 (0.1753)  loss_box_reg: 0.1534 (0.2212)  loss_mask: 0.1517 (0.4843)  loss_objectness: 0.0019 (0.0129)  loss_rpn_box_reg: 0.0044 (0.0071)  time: 0.5444  data: 0.0073  max mem: 3220
Epoch: [0]  [59/60]  eta: 0:00:00  lr: 0.005000  loss: 0.3556 (0.8196)  loss_classifier: 0.0408 (0.1553)  loss_box_reg: 0.1366 (0.2059)  loss_mask: 0.1637 (0.4400)  loss_objectness: 0.0017 (0.0113)  loss_rpn_box_reg: 0.0036 (0.0072)  time: 0.5601  data: 0.0079  max mem: 3220
Epoch: [0] Total time: 0:00:33 (0.5540 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1704 (0.1704)  evaluator_time: 0.0065 (0.0065)  time: 0.3354  data: 0.1570  max mem: 3220
Test:  [49/50]  eta: 0:00:00  model_time: 0.1188 (0.1198)  evaluator_time: 0.0056 (0.0112)  time: 0.1340  data: 0.0038  max mem: 3220
Test: Total time: 0:00:07 (0.1412 s / it)
Averaged stats: model_time: 0.1188 (0.1198)  evaluator_time: 0.0056 (0.0112)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.625
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.976
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.787
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.389
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.631
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.291
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.687
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.689
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.675
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.690
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.664
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.985
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.816
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.325
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.674
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.308
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.713
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.718
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.713
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.719
Epoch: [1]  [ 0/60]  eta: 0:01:01  lr: 0.005000  loss: 0.2611 (0.2611)  loss_classifier: 0.0312 (0.0312)  loss_box_reg: 0.0728 (0.0728)  loss_mask: 0.1538 (0.1538)  loss_objectness: 0.0008 (0.0008)  loss_rpn_box_reg: 0.0024 (0.0024)  time: 1.0184  data: 0.4079  max mem: 3220
Epoch: [1]  [10/60]  eta: 0:00:29  lr: 0.005000  loss: 0.2823 (0.2967)  loss_classifier: 0.0422 (0.0407)  loss_box_reg: 0.0916 (0.0923)  loss_mask: 0.1538 (0.1580)  loss_objectness: 0.0008 (0.0019)  loss_rpn_box_reg: 0.0029 (0.0038)  time: 0.5841  data: 0.0432  max mem: 3220
Epoch: [1]  [20/60]  eta: 0:00:23  lr: 0.005000  loss: 0.2764 (0.2977)  loss_classifier: 0.0434 (0.0435)  loss_box_reg: 0.1000 (0.0975)  loss_mask: 0.1435 (0.1506)  loss_objectness: 0.0008 (0.0017)  loss_rpn_box_reg: 0.0038 (0.0045)  time: 0.5586  data: 0.0071  max mem: 3220
Epoch: [1]  [30/60]  eta: 0:00:17  lr: 0.005000  loss: 0.2658 (0.2856)  loss_classifier: 0.0404 (0.0415)  loss_box_reg: 0.0962 (0.0915)  loss_mask: 0.1274 (0.1465)  loss_objectness: 0.0009 (0.0018)  loss_rpn_box_reg: 0.0040 (0.0043)  time: 0.5698  data: 0.0073  max mem: 3220
Epoch: [1]  [40/60]  eta: 0:00:11  lr: 0.005000  loss: 0.2969 (0.3003)  loss_classifier: 0.0386 (0.0434)  loss_box_reg: 0.0958 (0.0999)  loss_mask: 0.1362 (0.1501)  loss_objectness: 0.0009 (0.0017)  loss_rpn_box_reg: 0.0042 (0.0052)  time: 0.5865  data: 0.0073  max mem: 3220
Epoch: [1]  [50/60]  eta: 0:00:05  lr: 0.005000  loss: 0.3033 (0.2995)  loss_classifier: 0.0368 (0.0423)  loss_box_reg: 0.0958 (0.0989)  loss_mask: 0.1551 (0.1514)  loss_objectness: 0.0006 (0.0016)  loss_rpn_box_reg: 0.0056 (0.0052)  time: 0.5993  data: 0.0074  max mem: 3220
Epoch: [1]  [59/60]  eta: 0:00:00  lr: 0.005000  loss: 0.2720 (0.2897)  loss_classifier: 0.0319 (0.0411)  loss_box_reg: 0.0749 (0.0931)  loss_mask: 0.1462 (0.1490)  loss_objectness: 0.0006 (0.0016)  loss_rpn_box_reg: 0.0043 (0.0050)  time: 0.5846  data: 0.0074  max mem: 3220
Epoch: [1] Total time: 0:00:35 (0.5843 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1760 (0.1760)  evaluator_time: 0.0046 (0.0046)  time: 0.3382  data: 0.1560  max mem: 3220
Test:  [49/50]  eta: 0:00:00  model_time: 0.1174 (0.1174)  evaluator_time: 0.0039 (0.0063)  time: 0.1277  data: 0.0039  max mem: 3220
Test: Total time: 0:00:06 (0.1343 s / it)
Averaged stats: model_time: 0.1174 (0.1174)  evaluator_time: 0.0039 (0.0063)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.771
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.987
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.916
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.502
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.781
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.358
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.823
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.823
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.750
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.828
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.744
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.987
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.911
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.368
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.757
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.341
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.787
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.787
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.675
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.795
Epoch: [2]  [ 0/60]  eta: 0:00:54  lr: 0.005000  loss: 0.2391 (0.2391)  loss_classifier: 0.0382 (0.0382)  loss_box_reg: 0.0490 (0.0490)  loss_mask: 0.1394 (0.1394)  loss_objectness: 0.0112 (0.0112)  loss_rpn_box_reg: 0.0013 (0.0013)  time: 0.9149  data: 0.3904  max mem: 3220
Epoch: [2]  [10/60]  eta: 0:00:30  lr: 0.005000  loss: 0.2476 (0.2409)  loss_classifier: 0.0380 (0.0333)  loss_box_reg: 0.0707 (0.0657)  loss_mask: 0.1298 (0.1352)  loss_objectness: 0.0011 (0.0028)  loss_rpn_box_reg: 0.0037 (0.0038)  time: 0.6195  data: 0.0408  max mem: 3220
Epoch: [2]  [20/60]  eta: 0:00:24  lr: 0.005000  loss: 0.2440 (0.2441)  loss_classifier: 0.0336 (0.0338)  loss_box_reg: 0.0707 (0.0690)  loss_mask: 0.1281 (0.1342)  loss_objectness: 0.0011 (0.0024)  loss_rpn_box_reg: 0.0037 (0.0047)  time: 0.6056  data: 0.0065  max mem: 3220
Epoch: [2]  [30/60]  eta: 0:00:17  lr: 0.005000  loss: 0.1964 (0.2274)  loss_classifier: 0.0310 (0.0313)  loss_box_reg: 0.0468 (0.0599)  loss_mask: 0.1145 (0.1300)  loss_objectness: 0.0008 (0.0020)  loss_rpn_box_reg: 0.0032 (0.0042)  time: 0.5769  data: 0.0072  max mem: 3220
Epoch: [2]  [40/60]  eta: 0:00:12  lr: 0.005000  loss: 0.1985 (0.2280)  loss_classifier: 0.0310 (0.0316)  loss_box_reg: 0.0425 (0.0586)  loss_mask: 0.1163 (0.1319)  loss_objectness: 0.0003 (0.0017)  loss_rpn_box_reg: 0.0023 (0.0042)  time: 0.5849  data: 0.0073  max mem: 3220
Epoch: [2]  [50/60]  eta: 0:00:06  lr: 0.005000  loss: 0.2418 (0.2347)  loss_classifier: 0.0317 (0.0324)  loss_box_reg: 0.0573 (0.0619)  loss_mask: 0.1343 (0.1343)  loss_objectness: 0.0005 (0.0018)  loss_rpn_box_reg: 0.0045 (0.0044)  time: 0.6182  data: 0.0073  max mem: 3220
Epoch: [2]  [59/60]  eta: 0:00:00  lr: 0.005000  loss: 0.2167 (0.2358)  loss_classifier: 0.0313 (0.0330)  loss_box_reg: 0.0588 (0.0624)  loss_mask: 0.1227 (0.1342)  loss_objectness: 0.0010 (0.0019)  loss_rpn_box_reg: 0.0037 (0.0043)  time: 0.6001  data: 0.0073  max mem: 3220
Epoch: [2] Total time: 0:00:36 (0.6037 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:20  model_time: 0.1706 (0.1706)  evaluator_time: 0.0040 (0.0040)  time: 0.4052  data: 0.2292  max mem: 3220
Test:  [49/50]  eta: 0:00:00  model_time: 0.1206 (0.1217)  evaluator_time: 0.0037 (0.0059)  time: 0.1309  data: 0.0040  max mem: 3220
Test: Total time: 0:00:06 (0.1398 s / it)
Averaged stats: model_time: 0.1206 (0.1217)  evaluator_time: 0.0037 (0.0059)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.785
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.989
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.936
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.599
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.790
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.365
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.833
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.833
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.834
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.729
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.989
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.913
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.560
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.734
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.334
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.780
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.780
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.762
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.781
Epoch: [3]  [ 0/60]  eta: 0:00:55  lr: 0.000500  loss: 0.1726 (0.1726)  loss_classifier: 0.0171 (0.0171)  loss_box_reg: 0.0307 (0.0307)  loss_mask: 0.1164 (0.1164)  loss_objectness: 0.0022 (0.0022)  loss_rpn_box_reg: 0.0062 (0.0062)  time: 0.9247  data: 0.3295  max mem: 3220
Epoch: [3]  [10/60]  eta: 0:00:29  lr: 0.000500  loss: 0.1953 (0.2107)  loss_classifier: 0.0262 (0.0275)  loss_box_reg: 0.0429 (0.0499)  loss_mask: 0.1173 (0.1288)  loss_objectness: 0.0009 (0.0013)  loss_rpn_box_reg: 0.0028 (0.0032)  time: 0.5917  data: 0.0357  max mem: 3220
Epoch: [3]  [20/60]  eta: 0:00:24  lr: 0.000500  loss: 0.2024 (0.2208)  loss_classifier: 0.0279 (0.0314)  loss_box_reg: 0.0519 (0.0545)  loss_mask: 0.1217 (0.1304)  loss_objectness: 0.0005 (0.0010)  loss_rpn_box_reg: 0.0031 (0.0036)  time: 0.6098  data: 0.0070  max mem: 3220
Epoch: [3]  [30/60]  eta: 0:00:18  lr: 0.000500  loss: 0.2159 (0.2202)  loss_classifier: 0.0344 (0.0321)  loss_box_reg: 0.0474 (0.0531)  loss_mask: 0.1267 (0.1305)  loss_objectness: 0.0004 (0.0008)  loss_rpn_box_reg: 0.0034 (0.0037)  time: 0.6512  data: 0.0078  max mem: 3220
Epoch: [3]  [40/60]  eta: 0:00:12  lr: 0.000500  loss: 0.1960 (0.2154)  loss_classifier: 0.0310 (0.0315)  loss_box_reg: 0.0416 (0.0509)  loss_mask: 0.1210 (0.1287)  loss_objectness: 0.0003 (0.0008)  loss_rpn_box_reg: 0.0021 (0.0035)  time: 0.6245  data: 0.0077  max mem: 3220
Epoch: [3]  [50/60]  eta: 0:00:06  lr: 0.000500  loss: 0.1651 (0.2036)  loss_classifier: 0.0200 (0.0289)  loss_box_reg: 0.0267 (0.0463)  loss_mask: 0.1108 (0.1243)  loss_objectness: 0.0003 (0.0008)  loss_rpn_box_reg: 0.0016 (0.0031)  time: 0.5948  data: 0.0077  max mem: 3220
Epoch: [3]  [59/60]  eta: 0:00:00  lr: 0.000500  loss: 0.1559 (0.1999)  loss_classifier: 0.0178 (0.0282)  loss_box_reg: 0.0262 (0.0453)  loss_mask: 0.1090 (0.1225)  loss_objectness: 0.0004 (0.0008)  loss_rpn_box_reg: 0.0020 (0.0031)  time: 0.5982  data: 0.0076  max mem: 3220
Epoch: [3] Total time: 0:00:37 (0.6180 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:17  model_time: 0.1785 (0.1785)  evaluator_time: 0.0038 (0.0038)  time: 0.3547  data: 0.1707  max mem: 3220
Test:  [49/50]  eta: 0:00:00  model_time: 0.1206 (0.1192)  evaluator_time: 0.0034 (0.0055)  time: 0.1272  data: 0.0037  max mem: 3220
Test: Total time: 0:00:06 (0.1352 s / it)
Averaged stats: model_time: 0.1206 (0.1192)  evaluator_time: 0.0034 (0.0055)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.821
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.991
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.957
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.621
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.828
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.380
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.866
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.866
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.869
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.757
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.991
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.907
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.557
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.766
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.346
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.805
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.805
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.762
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.808
Epoch: [4]  [ 0/60]  eta: 0:01:08  lr: 0.000500  loss: 0.1688 (0.1688)  loss_classifier: 0.0238 (0.0238)  loss_box_reg: 0.0349 (0.0349)  loss_mask: 0.1040 (0.1040)  loss_objectness: 0.0003 (0.0003)  loss_rpn_box_reg: 0.0058 (0.0058)  time: 1.1420  data: 0.4607  max mem: 3220
Epoch: [4]  [10/60]  eta: 0:00:33  lr: 0.000500  loss: 0.1937 (0.2002)  loss_classifier: 0.0266 (0.0287)  loss_box_reg: 0.0460 (0.0473)  loss_mask: 0.1226 (0.1207)  loss_objectness: 0.0003 (0.0004)  loss_rpn_box_reg: 0.0024 (0.0030)  time: 0.6731  data: 0.0465  max mem: 3220
Epoch: [4]  [20/60]  eta: 0:00:24  lr: 0.000500  loss: 0.1756 (0.1894)  loss_classifier: 0.0262 (0.0258)  loss_box_reg: 0.0336 (0.0405)  loss_mask: 0.1130 (0.1199)  loss_objectness: 0.0002 (0.0004)  loss_rpn_box_reg: 0.0017 (0.0027)  time: 0.5985  data: 0.0061  max mem: 3220
Epoch: [4]  [30/60]  eta: 0:00:18  lr: 0.000500  loss: 0.1712 (0.1953)  loss_classifier: 0.0231 (0.0272)  loss_box_reg: 0.0328 (0.0428)  loss_mask: 0.1121 (0.1217)  loss_objectness: 0.0003 (0.0009)  loss_rpn_box_reg: 0.0017 (0.0029)  time: 0.5945  data: 0.0072  max mem: 3220
Epoch: [4]  [40/60]  eta: 0:00:12  lr: 0.000500  loss: 0.1752 (0.1960)  loss_classifier: 0.0242 (0.0284)  loss_box_reg: 0.0339 (0.0416)  loss_mask: 0.1148 (0.1218)  loss_objectness: 0.0006 (0.0012)  loss_rpn_box_reg: 0.0026 (0.0030)  time: 0.6167  data: 0.0072  max mem: 3220
Epoch: [4]  [50/60]  eta: 0:00:06  lr: 0.000500  loss: 0.1790 (0.1914)  loss_classifier: 0.0219 (0.0272)  loss_box_reg: 0.0328 (0.0406)  loss_mask: 0.1104 (0.1195)  loss_objectness: 0.0004 (0.0012)  loss_rpn_box_reg: 0.0026 (0.0028)  time: 0.6169  data: 0.0073  max mem: 3220
Epoch: [4]  [59/60]  eta: 0:00:00  lr: 0.000500  loss: 0.1790 (0.1925)  loss_classifier: 0.0219 (0.0275)  loss_box_reg: 0.0328 (0.0407)  loss_mask: 0.1179 (0.1202)  loss_objectness: 0.0006 (0.0013)  loss_rpn_box_reg: 0.0019 (0.0028)  time: 0.6136  data: 0.0073  max mem: 3220
Epoch: [4] Total time: 0:00:37 (0.6201 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1778 (0.1778)  evaluator_time: 0.0039 (0.0039)  time: 0.3318  data: 0.1485  max mem: 3220
Test:  [49/50]  eta: 0:00:00  model_time: 0.1202 (0.1192)  evaluator_time: 0.0031 (0.0052)  time: 0.1272  data: 0.0037  max mem: 3220
Test: Total time: 0:00:06 (0.1342 s / it)
Averaged stats: model_time: 0.1202 (0.1192)  evaluator_time: 0.0031 (0.0052)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.826
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.991
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.959
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.634
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.832
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.382
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.868
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.868
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.872
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.754
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.991
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.907
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.550
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.762
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.347
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.803
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.803
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.750
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.807
Epoch: [5]  [ 0/60]  eta: 0:00:53  lr: 0.000500  loss: 0.1190 (0.1190)  loss_classifier: 0.0097 (0.0097)  loss_box_reg: 0.0128 (0.0128)  loss_mask: 0.0952 (0.0952)  loss_objectness: 0.0006 (0.0006)  loss_rpn_box_reg: 0.0007 (0.0007)  time: 0.8971  data: 0.2526  max mem: 3220
Epoch: [5]  [10/60]  eta: 0:00:30  lr: 0.000500  loss: 0.1469 (0.1677)  loss_classifier: 0.0171 (0.0192)  loss_box_reg: 0.0249 (0.0327)  loss_mask: 0.1005 (0.1129)  loss_objectness: 0.0004 (0.0005)  loss_rpn_box_reg: 0.0015 (0.0024)  time: 0.6191  data: 0.0293  max mem: 3220
Epoch: [5]  [20/60]  eta: 0:00:24  lr: 0.000500  loss: 0.1808 (0.1807)  loss_classifier: 0.0261 (0.0245)  loss_box_reg: 0.0317 (0.0374)  loss_mask: 0.1047 (0.1148)  loss_objectness: 0.0004 (0.0013)  loss_rpn_box_reg: 0.0016 (0.0027)  time: 0.5935  data: 0.0070  max mem: 3220
Epoch: [5]  [30/60]  eta: 0:00:18  lr: 0.000500  loss: 0.1809 (0.1869)  loss_classifier: 0.0284 (0.0258)  loss_box_reg: 0.0333 (0.0386)  loss_mask: 0.1141 (0.1189)  loss_objectness: 0.0004 (0.0010)  loss_rpn_box_reg: 0.0019 (0.0027)  time: 0.6061  data: 0.0071  max mem: 3220
Epoch: [5]  [40/60]  eta: 0:00:12  lr: 0.000500  loss: 0.1730 (0.1825)  loss_classifier: 0.0248 (0.0251)  loss_box_reg: 0.0333 (0.0376)  loss_mask: 0.1067 (0.1159)  loss_objectness: 0.0004 (0.0012)  loss_rpn_box_reg: 0.0026 (0.0026)  time: 0.6083  data: 0.0071  max mem: 3220
Epoch: [5]  [50/60]  eta: 0:00:06  lr: 0.000500  loss: 0.1692 (0.1877)  loss_classifier: 0.0223 (0.0258)  loss_box_reg: 0.0360 (0.0397)  loss_mask: 0.1146 (0.1181)  loss_objectness: 0.0006 (0.0013)  loss_rpn_box_reg: 0.0027 (0.0028)  time: 0.6161  data: 0.0071  max mem: 3220
Epoch: [5]  [59/60]  eta: 0:00:00  lr: 0.000500  loss: 0.1762 (0.1886)  loss_classifier: 0.0232 (0.0261)  loss_box_reg: 0.0356 (0.0397)  loss_mask: 0.1222 (0.1189)  loss_objectness: 0.0004 (0.0012)  loss_rpn_box_reg: 0.0022 (0.0027)  time: 0.6414  data: 0.0071  max mem: 3220
Epoch: [5] Total time: 0:00:37 (0.6194 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1649 (0.1649)  evaluator_time: 0.0035 (0.0035)  time: 0.3329  data: 0.1629  max mem: 3220
Test:  [49/50]  eta: 0:00:00  model_time: 0.1190 (0.1181)  evaluator_time: 0.0033 (0.0054)  time: 0.1276  data: 0.0040  max mem: 3220
Test: Total time: 0:00:06 (0.1338 s / it)
Averaged stats: model_time: 0.1190 (0.1181)  evaluator_time: 0.0033 (0.0054)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.827
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.960
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.632
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.835
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.383
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.869
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.869
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.873
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.763
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.909
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.771
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.349
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.808
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.808
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.762
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.811
Epoch: [6]  [ 0/60]  eta: 0:01:21  lr: 0.000050  loss: 0.1959 (0.1959)  loss_classifier: 0.0429 (0.0429)  loss_box_reg: 0.0312 (0.0312)  loss_mask: 0.1170 (0.1170)  loss_objectness: 0.0005 (0.0005)  loss_rpn_box_reg: 0.0045 (0.0045)  time: 1.3659  data: 0.6759  max mem: 3220
Epoch: [6]  [10/60]  eta: 0:00:33  lr: 0.000050  loss: 0.1959 (0.1922)  loss_classifier: 0.0324 (0.0302)  loss_box_reg: 0.0345 (0.0405)  loss_mask: 0.1150 (0.1172)  loss_objectness: 0.0007 (0.0007)  loss_rpn_box_reg: 0.0042 (0.0036)  time: 0.6678  data: 0.0650  max mem: 3220
Epoch: [6]  [20/60]  eta: 0:00:25  lr: 0.000050  loss: 0.1659 (0.1771)  loss_classifier: 0.0226 (0.0262)  loss_box_reg: 0.0245 (0.0334)  loss_mask: 0.1121 (0.1139)  loss_objectness: 0.0004 (0.0006)  loss_rpn_box_reg: 0.0020 (0.0031)  time: 0.5902  data: 0.0054  max mem: 3220
Epoch: [6]  [30/60]  eta: 0:00:18  lr: 0.000050  loss: 0.1597 (0.1768)  loss_classifier: 0.0216 (0.0259)  loss_box_reg: 0.0243 (0.0336)  loss_mask: 0.1115 (0.1141)  loss_objectness: 0.0002 (0.0006)  loss_rpn_box_reg: 0.0014 (0.0026)  time: 0.5963  data: 0.0069  max mem: 3220
Epoch: [6]  [40/60]  eta: 0:00:12  lr: 0.000050  loss: 0.1634 (0.1754)  loss_classifier: 0.0225 (0.0261)  loss_box_reg: 0.0316 (0.0336)  loss_mask: 0.1051 (0.1126)  loss_objectness: 0.0002 (0.0007)  loss_rpn_box_reg: 0.0014 (0.0025)  time: 0.5939  data: 0.0070  max mem: 3220
Epoch: [6]  [50/60]  eta: 0:00:06  lr: 0.000050  loss: 0.1779 (0.1817)  loss_classifier: 0.0226 (0.0265)  loss_box_reg: 0.0355 (0.0358)  loss_mask: 0.1079 (0.1158)  loss_objectness: 0.0005 (0.0007)  loss_rpn_box_reg: 0.0023 (0.0028)  time: 0.6046  data: 0.0070  max mem: 3220
Epoch: [6]  [59/60]  eta: 0:00:00  lr: 0.000050  loss: 0.1785 (0.1835)  loss_classifier: 0.0226 (0.0269)  loss_box_reg: 0.0391 (0.0370)  loss_mask: 0.1146 (0.1162)  loss_objectness: 0.0005 (0.0007)  loss_rpn_box_reg: 0.0029 (0.0028)  time: 0.6295  data: 0.0071  max mem: 3220
Epoch: [6] Total time: 0:00:37 (0.6201 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1671 (0.1671)  evaluator_time: 0.0036 (0.0036)  time: 0.3271  data: 0.1546  max mem: 3220
Test:  [49/50]  eta: 0:00:00  model_time: 0.1188 (0.1185)  evaluator_time: 0.0032 (0.0054)  time: 0.1285  data: 0.0038  max mem: 3220
Test: Total time: 0:00:06 (0.1348 s / it)
Averaged stats: model_time: 0.1188 (0.1185)  evaluator_time: 0.0032 (0.0054)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.834
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.960
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.636
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.842
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.387
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.875
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.875
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.879
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.761
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.909
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.546
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.769
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.348
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.807
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.807
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.750
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.811
Epoch: [7]  [ 0/60]  eta: 0:01:04  lr: 0.000050  loss: 0.1686 (0.1686)  loss_classifier: 0.0246 (0.0246)  loss_box_reg: 0.0334 (0.0334)  loss_mask: 0.1098 (0.1098)  loss_objectness: 0.0001 (0.0001)  loss_rpn_box_reg: 0.0007 (0.0007)  time: 1.0751  data: 0.4796  max mem: 3220
Epoch: [7]  [10/60]  eta: 0:00:31  lr: 0.000050  loss: 0.1710 (0.1774)  loss_classifier: 0.0246 (0.0262)  loss_box_reg: 0.0323 (0.0342)  loss_mask: 0.1111 (0.1136)  loss_objectness: 0.0002 (0.0013)  loss_rpn_box_reg: 0.0017 (0.0021)  time: 0.6377  data: 0.0485  max mem: 3220
Epoch: [7]  [20/60]  eta: 0:00:26  lr: 0.000050  loss: 0.1777 (0.1891)  loss_classifier: 0.0244 (0.0288)  loss_box_reg: 0.0323 (0.0376)  loss_mask: 0.1126 (0.1193)  loss_objectness: 0.0002 (0.0010)  loss_rpn_box_reg: 0.0020 (0.0025)  time: 0.6291  data: 0.0064  max mem: 3594
Epoch: [7]  [30/60]  eta: 0:00:19  lr: 0.000050  loss: 0.1745 (0.1848)  loss_classifier: 0.0244 (0.0275)  loss_box_reg: 0.0333 (0.0361)  loss_mask: 0.1159 (0.1179)  loss_objectness: 0.0003 (0.0009)  loss_rpn_box_reg: 0.0027 (0.0025)  time: 0.6398  data: 0.0077  max mem: 3594
Epoch: [7]  [40/60]  eta: 0:00:12  lr: 0.000050  loss: 0.1710 (0.1829)  loss_classifier: 0.0233 (0.0266)  loss_box_reg: 0.0319 (0.0354)  loss_mask: 0.1098 (0.1177)  loss_objectness: 0.0003 (0.0007)  loss_rpn_box_reg: 0.0022 (0.0026)  time: 0.6198  data: 0.0078  max mem: 3594
Epoch: [7]  [50/60]  eta: 0:00:06  lr: 0.000050  loss: 0.1543 (0.1784)  loss_classifier: 0.0230 (0.0254)  loss_box_reg: 0.0226 (0.0338)  loss_mask: 0.1051 (0.1162)  loss_objectness: 0.0002 (0.0007)  loss_rpn_box_reg: 0.0019 (0.0024)  time: 0.6141  data: 0.0072  max mem: 3594
Epoch: [7]  [59/60]  eta: 0:00:00  lr: 0.000050  loss: 0.1596 (0.1823)  loss_classifier: 0.0232 (0.0262)  loss_box_reg: 0.0304 (0.0356)  loss_mask: 0.1027 (0.1172)  loss_objectness: 0.0004 (0.0007)  loss_rpn_box_reg: 0.0020 (0.0025)  time: 0.6128  data: 0.0069  max mem: 3594
Epoch: [7] Total time: 0:00:37 (0.6300 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1593 (0.1593)  evaluator_time: 0.0036 (0.0036)  time: 0.3274  data: 0.1628  max mem: 3594
Test:  [49/50]  eta: 0:00:00  model_time: 0.1200 (0.1180)  evaluator_time: 0.0032 (0.0051)  time: 0.1273  data: 0.0036  max mem: 3594
Test: Total time: 0:00:06 (0.1336 s / it)
Averaged stats: model_time: 0.1200 (0.1180)  evaluator_time: 0.0032 (0.0051)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.830
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.960
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.636
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.839
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.386
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.871
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.871
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.876
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.758
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.909
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.546
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.766
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.346
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.804
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.804
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.750
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.808
Epoch: [8]  [ 0/60]  eta: 0:01:03  lr: 0.000050  loss: 0.1664 (0.1664)  loss_classifier: 0.0166 (0.0166)  loss_box_reg: 0.0234 (0.0234)  loss_mask: 0.1243 (0.1243)  loss_objectness: 0.0003 (0.0003)  loss_rpn_box_reg: 0.0019 (0.0019)  time: 1.0502  data: 0.2962  max mem: 3594
Epoch: [8]  [10/60]  eta: 0:00:32  lr: 0.000050  loss: 0.1711 (0.1639)  loss_classifier: 0.0242 (0.0239)  loss_box_reg: 0.0234 (0.0289)  loss_mask: 0.1054 (0.1086)  loss_objectness: 0.0003 (0.0005)  loss_rpn_box_reg: 0.0019 (0.0021)  time: 0.6525  data: 0.0332  max mem: 3594
Epoch: [8]  [20/60]  eta: 0:00:25  lr: 0.000050  loss: 0.1654 (0.1673)  loss_classifier: 0.0242 (0.0236)  loss_box_reg: 0.0269 (0.0291)  loss_mask: 0.1061 (0.1115)  loss_objectness: 0.0003 (0.0007)  loss_rpn_box_reg: 0.0020 (0.0023)  time: 0.6107  data: 0.0069  max mem: 3594
Epoch: [8]  [30/60]  eta: 0:00:18  lr: 0.000050  loss: 0.1639 (0.1703)  loss_classifier: 0.0229 (0.0246)  loss_box_reg: 0.0269 (0.0298)  loss_mask: 0.1140 (0.1130)  loss_objectness: 0.0004 (0.0007)  loss_rpn_box_reg: 0.0014 (0.0022)  time: 0.6023  data: 0.0069  max mem: 3594
Epoch: [8]  [40/60]  eta: 0:00:12  lr: 0.000050  loss: 0.1797 (0.1787)  loss_classifier: 0.0223 (0.0262)  loss_box_reg: 0.0325 (0.0341)  loss_mask: 0.1139 (0.1152)  loss_objectness: 0.0006 (0.0008)  loss_rpn_box_reg: 0.0021 (0.0025)  time: 0.6054  data: 0.0072  max mem: 3594
Epoch: [8]  [50/60]  eta: 0:00:06  lr: 0.000050  loss: 0.1870 (0.1784)  loss_classifier: 0.0272 (0.0263)  loss_box_reg: 0.0416 (0.0344)  loss_mask: 0.1075 (0.1144)  loss_objectness: 0.0006 (0.0008)  loss_rpn_box_reg: 0.0023 (0.0025)  time: 0.6120  data: 0.0073  max mem: 3594
Epoch: [8]  [59/60]  eta: 0:00:00  lr: 0.000050  loss: 0.1737 (0.1830)  loss_classifier: 0.0273 (0.0260)  loss_box_reg: 0.0329 (0.0358)  loss_mask: 0.1163 (0.1178)  loss_objectness: 0.0004 (0.0008)  loss_rpn_box_reg: 0.0027 (0.0027)  time: 0.6081  data: 0.0071  max mem: 3594
Epoch: [8] Total time: 0:00:36 (0.6159 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1682 (0.1682)  evaluator_time: 0.0035 (0.0035)  time: 0.3348  data: 0.1614  max mem: 3594
Test:  [49/50]  eta: 0:00:00  model_time: 0.1195 (0.1186)  evaluator_time: 0.0033 (0.0051)  time: 0.1274  data: 0.0036  max mem: 3594
Test: Total time: 0:00:06 (0.1343 s / it)
Averaged stats: model_time: 0.1195 (0.1186)  evaluator_time: 0.0033 (0.0051)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.832
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.960
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.636
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.840
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.384
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.873
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.873
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.877
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.757
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.908
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.546
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.765
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.346
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.804
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.804
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.750
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.808
Epoch: [9]  [ 0/60]  eta: 0:01:18  lr: 0.000005  loss: 0.2115 (0.2115)  loss_classifier: 0.0517 (0.0517)  loss_box_reg: 0.0533 (0.0533)  loss_mask: 0.1029 (0.1029)  loss_objectness: 0.0011 (0.0011)  loss_rpn_box_reg: 0.0025 (0.0025)  time: 1.3039  data: 0.4819  max mem: 3594
Epoch: [9]  [10/60]  eta: 0:00:33  lr: 0.000005  loss: 0.1589 (0.1738)  loss_classifier: 0.0208 (0.0268)  loss_box_reg: 0.0303 (0.0380)  loss_mask: 0.1029 (0.1063)  loss_objectness: 0.0003 (0.0006)  loss_rpn_box_reg: 0.0024 (0.0021)  time: 0.6726  data: 0.0478  max mem: 3594
Epoch: [9]  [20/60]  eta: 0:00:25  lr: 0.000005  loss: 0.1759 (0.1881)  loss_classifier: 0.0265 (0.0286)  loss_box_reg: 0.0303 (0.0412)  loss_mask: 0.1085 (0.1152)  loss_objectness: 0.0003 (0.0007)  loss_rpn_box_reg: 0.0022 (0.0024)  time: 0.6015  data: 0.0057  max mem: 3594
Epoch: [9]  [30/60]  eta: 0:00:18  lr: 0.000005  loss: 0.1759 (0.1852)  loss_classifier: 0.0271 (0.0281)  loss_box_reg: 0.0348 (0.0395)  loss_mask: 0.1043 (0.1138)  loss_objectness: 0.0003 (0.0010)  loss_rpn_box_reg: 0.0022 (0.0028)  time: 0.5967  data: 0.0070  max mem: 3594
Epoch: [9]  [40/60]  eta: 0:00:12  lr: 0.000005  loss: 0.1667 (0.1864)  loss_classifier: 0.0217 (0.0278)  loss_box_reg: 0.0317 (0.0398)  loss_mask: 0.1043 (0.1152)  loss_objectness: 0.0004 (0.0009)  loss_rpn_box_reg: 0.0020 (0.0027)  time: 0.6036  data: 0.0070  max mem: 3594
Epoch: [9]  [50/60]  eta: 0:00:06  lr: 0.000005  loss: 0.1667 (0.1858)  loss_classifier: 0.0217 (0.0270)  loss_box_reg: 0.0282 (0.0381)  loss_mask: 0.1162 (0.1172)  loss_objectness: 0.0004 (0.0009)  loss_rpn_box_reg: 0.0017 (0.0027)  time: 0.6051  data: 0.0071  max mem: 3594
Epoch: [9]  [59/60]  eta: 0:00:00  lr: 0.000005  loss: 0.1653 (0.1825)  loss_classifier: 0.0202 (0.0265)  loss_box_reg: 0.0257 (0.0360)  loss_mask: 0.1078 (0.1164)  loss_objectness: 0.0003 (0.0009)  loss_rpn_box_reg: 0.0014 (0.0026)  time: 0.5942  data: 0.0069  max mem: 3594
Epoch: [9] Total time: 0:00:36 (0.6147 s / it)
creating index...
index created!
Test:  [ 0/50]  eta: 0:00:16  model_time: 0.1718 (0.1718)  evaluator_time: 0.0037 (0.0037)  time: 0.3360  data: 0.1590  max mem: 3594
Test:  [49/50]  eta: 0:00:00  model_time: 0.1186 (0.1189)  evaluator_time: 0.0034 (0.0051)  time: 0.1271  data: 0.0037  max mem: 3594
Test: Total time: 0:00:06 (0.1339 s / it)
Averaged stats: model_time: 0.1186 (0.1189)  evaluator_time: 0.0034 (0.0051)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.834
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.959
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.636
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.842
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.386
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.875
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.875
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.812
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.879
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.756
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.992
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.907
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.546
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.764
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.347
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.804
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.804
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.750
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.808


# テストセット用に、画像を一つ取得します
img, _ = dataset_test[0]
# モデルを評価モードに変更します
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])


print(prediction)

[{'boxes': tensor([[ 62.8530,  46.3704, 197.4942, 323.8831],
          [276.5706,  21.8190, 290.4983,  74.1342]], device='cuda:0'),
  'labels': tensor([1, 1], device='cuda:0'),
  'masks': tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            ...,
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.]]],
  
  
          [[[0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            ...,
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.]]]], device='cuda:0'),
  'scores': tensor([0.9988, 0.4812], device='cuda:0')}]


input_image = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
input_image.save('input_image.png')


pred1 = Image.fromarray(prediction[0]['masks'][0, 0].mul(255).byte().cpu().numpy())
pred1.save('pred1.png')


# 日本語訳注：検出された物体は2つで、確信度は0.99と0.48です。
print(len(prediction[0]['masks']))
print(prediction[0]['scores'])

2
tensor([0.9988, 0.4812], device='cuda:0')


# 確信度0.48の人物のマスクを描画します。
# 元の画像を見ると、確かに奥の方に人がいるような気がします
pred2 = Image.fromarray(prediction[0]['masks'][1, 0].mul(255).byte().cpu().numpy())
pred2.save('pred2.png')

「Torchvisionを利用した物体検出のファインチューニング手法」¶

データセットの定義¶

PennFudanのカスタム・データセットを作成する¶

モデルの定義¶

1 - 訓練済みモデルからのファイン・チューニング¶

2 - モデルを修正して別のメイン部分（backbone）に変更する¶

PennFudanデータセットに対するインスタンス・セグメンテーションのモデル構築¶

訓練と評価用の関数の実装¶

すべてをまとめる¶

まとめ¶