-
Maciej Wielgosz authoredMaciej Wielgosz authored
pyg_implementaion_main.py 10.99 KiB
import os
import wandb
import random
import numpy as np
from tqdm.auto import tqdm
import torch
import torch.nn.functional as F
from torch_scatter import scatter
from torchmetrics.functional import jaccard_index
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet
from torch_geometric.loader import DataLoader
from torch_geometric.nn import MLP, DynamicEdgeConv
wandb_project = "pyg-point-cloud" #@param {"type": "string"} , maciej-wielgosz-nibio
wandb_run_name = "train-dgcnn" #@param {"type": "string"}
wandb.init(
entity="maciej-wielgosz-nibio",
project=wandb_project,
name=wandb_run_name,
job_type="train"
)
config = wandb.config
config.seed = 42
config.device = 'cuda' if torch.cuda.is_available() else 'cpu'
random.seed(config.seed)
torch.manual_seed(config.seed)
device = torch.device(config.device)
config.category = 'Car' #@param ["Bag", "Cap", "Car", "Chair", "Earphone", "Guitar", "Knife", "Lamp", "Laptop", "Motorbike", "Mug", "Pistol", "Rocket", "Skateboard", "Table"] {type:"raw"}
config.random_jitter_translation = 1e-2
config.random_rotation_interval_x = 15
config.random_rotation_interval_y = 15
config.random_rotation_interval_z = 15
config.validation_split = 0.2
config.batch_size = 4
config.num_workers = 6
config.num_nearest_neighbours = 30
config.aggregation_operator = "max"
config.dropout = 0.5
config.initial_lr = 1e-3
config.lr_scheduler_step_size = 5
config.gamma = 0.8
config.epochs = 1
transform = T.Compose([
T.RandomJitter(config.random_jitter_translation),
T.RandomRotate(config.random_rotation_interval_x, axis=0),
T.RandomRotate(config.random_rotation_interval_y, axis=1),
T.RandomRotate(config.random_rotation_interval_z, axis=2)
])
pre_transform = T.NormalizeScale()
dataset_path = os.path.join('ShapeNet', config.category)
train_val_dataset = ShapeNet(
dataset_path, config.category, split='trainval',
transform=transform, pre_transform=pre_transform
)
segmentation_class_frequency = {}
for idx in tqdm(range(len(train_val_dataset))):
pc_viz = train_val_dataset[idx].pos.numpy().tolist()
segmentation_label = train_val_dataset[idx].y.numpy().tolist()
for label in set(segmentation_label):
segmentation_class_frequency[label] = segmentation_label.count(label)
class_offset = min(list(segmentation_class_frequency.keys()))
print("Class Offset:", class_offset)
for idx in range(len(train_val_dataset)):
train_val_dataset[idx].y -= class_offset
num_train_examples = int((1 - config.validation_split) * len(train_val_dataset))
train_dataset = train_val_dataset[:num_train_examples]
val_dataset = train_val_dataset[num_train_examples:]
train_loader = DataLoader(
train_dataset, batch_size=config.batch_size,
shuffle=True, num_workers=config.num_workers
)
val_loader = DataLoader(
val_dataset, batch_size=config.batch_size,
shuffle=False, num_workers=config.num_workers
)
visualization_loader = DataLoader(
val_dataset[:10], batch_size=1,
shuffle=False, num_workers=config.num_workers
)
class DGCNN(torch.nn.Module):
def __init__(self, out_channels, k=30, aggr='max'):
super().__init__()
self.conv1 = DynamicEdgeConv(
MLP([2 * 6, 64, 64]), k, aggr
)
self.conv2 = DynamicEdgeConv(
MLP([2 * 64, 64, 64]), k, aggr
)
self.conv3 = DynamicEdgeConv(
MLP([2 * 64, 64, 64]), k, aggr
)
self.mlp = MLP(
[3 * 64, 1024, 256, 128, out_channels],
dropout=0.5, norm=None
)
def forward(self, data):
x, pos, batch = data.x, data.pos, data.batch
x0 = torch.cat([x, pos], dim=-1)
x1 = self.conv1(x0, batch)
x2 = self.conv2(x1, batch)
x3 = self.conv3(x2, batch)
out = self.mlp(torch.cat([x1, x2, x3], dim=1))
return F.log_softmax(out, dim=1)
config.num_classes = train_dataset.num_classes
model = DGCNN(
out_channels=train_dataset.num_classes,
k=config.num_nearest_neighbours,
aggr=config.aggregation_operator
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=config.initial_lr)
scheduler = torch.optim.lr_scheduler.StepLR(
optimizer, step_size=config.lr_scheduler_step_size, gamma=config.gamma
)
def train_step(epoch):
model.train()
ious, categories = [], []
total_loss = correct_nodes = total_nodes = 0
y_map = torch.empty(
train_loader.dataset.num_classes, device=device
).long()
num_train_examples = len(train_loader)
progress_bar = tqdm(
train_loader, desc=f"Training Epoch {epoch}/{config.epochs}"
)
for data in progress_bar:
data = data.to(device)
optimizer.zero_grad()
outs = model(data)
loss = F.nll_loss(outs, data.y)
loss.backward()
optimizer.step()
total_loss += loss.item()
correct_nodes += outs.argmax(dim=1).eq(data.y).sum().item()
total_nodes += data.num_nodes
sizes = (data.ptr[1:] - data.ptr[:-1]).tolist()
for out, y, category in zip(outs.split(sizes), data.y.split(sizes),
data.category.tolist()):
category = list(ShapeNet.seg_classes.keys())[category]
part = ShapeNet.seg_classes[category]
part = torch.tensor(part, device=device)
y_map[part] = torch.arange(part.size(0), device=device)
iou = jaccard_index(
out[:, part].argmax(dim=-1), y_map[y],
task="multiclass", num_classes=part.size(0)
)
ious.append(iou)
categories.append(data.category)
iou = torch.tensor(ious, device=device)
category = torch.cat(categories, dim=0)
mean_iou = float(scatter(iou, category, reduce='mean').mean())
return {
"Train/Loss": total_loss / num_train_examples,
"Train/Accuracy": correct_nodes / total_nodes,
"Train/IoU": mean_iou
}
@torch.no_grad()
def val_step(epoch):
model.eval()
ious, categories = [], []
total_loss = correct_nodes = total_nodes = 0
y_map = torch.empty(
val_loader.dataset.num_classes, device=device
).long()
num_val_examples = len(val_loader)
progress_bar = tqdm(
val_loader, desc=f"Validating Epoch {epoch}/{config.epochs}"
)
for data in progress_bar:
data = data.to(device)
outs = model(data)
loss = F.nll_loss(outs, data.y)
total_loss += loss.item()
correct_nodes += outs.argmax(dim=1).eq(data.y).sum().item()
total_nodes += data.num_nodes
sizes = (data.ptr[1:] - data.ptr[:-1]).tolist()
for out, y, category in zip(outs.split(sizes), data.y.split(sizes),
data.category.tolist()):
category = list(ShapeNet.seg_classes.keys())[category]
part = ShapeNet.seg_classes[category]
part = torch.tensor(part, device=device)
y_map[part] = torch.arange(part.size(0), device=device)
iou = jaccard_index(
out[:, part].argmax(dim=-1), y_map[y],
task="multiclass", num_classes=part.size(0)
)
ious.append(iou)
categories.append(data.category)
iou = torch.tensor(ious, device=device)
category = torch.cat(categories, dim=0)
mean_iou = float(scatter(iou, category, reduce='mean').mean())
return {
"Validation/Loss": total_loss / num_val_examples,
"Validation/Accuracy": correct_nodes / total_nodes,
"Validation/IoU": mean_iou
}
@torch.no_grad()
def visualization_step(epoch, table):
model.eval()
for data in tqdm(visualization_loader):
data = data.to(device)
outs = model(data)
predicted_labels = outs.argmax(dim=1)
accuracy = predicted_labels.eq(data.y).sum().item() / data.num_nodes
sizes = (data.ptr[1:] - data.ptr[:-1]).tolist()
ious, categories = [], []
y_map = torch.empty(
visualization_loader.dataset.num_classes, device=device
).long()
for out, y, category in zip(
outs.split(sizes), data.y.split(sizes), data.category.tolist()
):
category = list(ShapeNet.seg_classes.keys())[category]
part = ShapeNet.seg_classes[category]
part = torch.tensor(part, device=device)
y_map[part] = torch.arange(part.size(0), device=device)
iou = jaccard_index(
out[:, part].argmax(dim=-1), y_map[y],
task="multiclass", num_classes=part.size(0)
)
ious.append(iou)
categories.append(data.category)
iou = torch.tensor(ious, device=device)
category = torch.cat(categories, dim=0)
mean_iou = float(scatter(iou, category, reduce='mean').mean())
gt_pc_viz = data.pos.cpu().numpy().tolist()
segmentation_label = data.y.cpu().numpy().tolist()
frequency_dict = {key: 0 for key in segmentation_class_frequency.keys()}
for label in set(segmentation_label):
frequency_dict[label] = segmentation_label.count(label)
for j in range(len(gt_pc_viz)):
# gt_pc_viz[j] += [segmentation_label[j] + 1 - class_offset]
gt_pc_viz[j] += [segmentation_label[j] + 1]
predicted_pc_viz = data.pos.cpu().numpy().tolist()
segmentation_label = data.y.cpu().numpy().tolist()
frequency_dict = {key: 0 for key in segmentation_class_frequency.keys()}
for label in set(segmentation_label):
frequency_dict[label] = segmentation_label.count(label)
for j in range(len(predicted_pc_viz)):
# predicted_pc_viz[j] += [segmentation_label[j] + 1 - class_offset]
predicted_pc_viz[j] += [segmentation_label[j] + 1]
table.add_data(
epoch, wandb.Object3D(np.array(gt_pc_viz)),
wandb.Object3D(np.array(predicted_pc_viz)),
accuracy, mean_iou
)
return table
def save_checkpoint(epoch):
"""Save model checkpoints as Weights & Biases artifacts"""
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict()
}, "checkpoint.pt")
artifact_name = wandb.util.make_artifact_name_safe(
f"{wandb.run.name}-{wandb.run.id}-checkpoint"
)
checkpoint_artifact = wandb.Artifact(artifact_name, type="checkpoint")
checkpoint_artifact.add_file("checkpoint.pt")
wandb.log_artifact(
checkpoint_artifact, aliases=["latest", f"epoch-{epoch}"]
)
table = wandb.Table(columns=["Epoch", "Ground-Truth", "Prediction", "Accuracy", "IoU"])
for epoch in range(1, config.epochs + 1):
train_metrics = train_step(epoch)
val_metrics = val_step(epoch)
metrics = {**train_metrics, **val_metrics}
metrics["learning_rate"] = scheduler.get_last_lr()[-1]
wandb.log(metrics)
table = visualization_step(epoch, table)
scheduler.step()
save_checkpoint(epoch)
wandb.log({"Evaluation": table})
wandb.finish()