From fd78121725bc15789cbddb86f4e2b87ae50c52ed Mon Sep 17 00:00:00 2001
From: Maciej Wielgosz <maciej.wielgosz@nibio.no>
Date: Fri, 24 Mar 2023 11:20:17 +0100
Subject: [PATCH] update dgcnn loader for to small final batch

---
 dgcnn/dataset_run.ipynb    | 16 +++++++++++-----
 dgcnn/dgcnn_train.py       | 20 ++++++++++++--------
 dgcnn/find_missing_data.py | 20 ++++++++++++++++++++
 3 files changed, 43 insertions(+), 13 deletions(-)
 create mode 100644 dgcnn/find_missing_data.py

diff --git a/dgcnn/dataset_run.ipynb b/dgcnn/dataset_run.ipynb
index 5265d6c..23ad7b8 100644
--- a/dgcnn/dataset_run.ipynb
+++ b/dgcnn/dataset_run.ipynb
@@ -39,17 +39,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor([[-0.0148, -1.7624, -2.2409,  0.8483,  0.4169],\n",
-      "        [-0.3802, -0.0168, -1.9958,  0.9351, -1.0900],\n",
-      "        [ 0.3501, -1.3573, -1.8246, -1.2850,  0.2785]], requires_grad=True)\n",
-      "tensor([0, 0, 1])\n"
+      "tensor([[-0.1308, -0.9843,  0.0595, -1.4868, -0.0280],\n",
+      "        [ 1.6206, -0.0315,  0.6667,  2.3133, -1.0334],\n",
+      "        [ 0.3246,  0.6226, -0.9101,  0.1938,  0.6888]], requires_grad=True)\n",
+      "tensor([3, 4, 0])\n",
+      "tensor([[-1.2085,  0.0789, -0.9984,  0.3746, -0.6782],\n",
+      "        [-1.1200, -0.0271, -0.6976, -0.2629,  1.4103],\n",
+      "        [-2.2474,  0.5892,  0.8203, -1.2913, -1.5551]], requires_grad=True)\n",
+      "tensor([[0.2879, 0.0775, 0.0657, 0.4815, 0.0875],\n",
+      "        [0.2927, 0.2297, 0.3383, 0.1158, 0.0235],\n",
+      "        [0.2733, 0.3353, 0.1628, 0.0173, 0.2113]])\n"
      ]
     }
    ],
diff --git a/dgcnn/dgcnn_train.py b/dgcnn/dgcnn_train.py
index 08aeea0..59f502e 100644
--- a/dgcnn/dgcnn_train.py
+++ b/dgcnn/dgcnn_train.py
@@ -21,20 +21,22 @@ def train():
     # get data 
     shapenet_data = ShapenetDataDgcnn(
       root='/home/nibio/mutable-outside-world/code/oracle_gpu_runs/data/shapenet', 
-      npoints=32,
+      npoints=256,
       return_cls_label=True,
-      small_data=True,
+      small_data=False,
       small_data_size=1000,
-      split='train'
+      just_one_class=False,
+      split='train',
+      norm=True
       )
     
     # create a dataloader
     dataloader = torch.utils.data.DataLoader(
         shapenet_data,
-        batch_size=8,
-        shuffle=False,
-        num_workers=4,
-        drop_last=False
+        batch_size=4,
+        shuffle=True,
+        num_workers=8,
+        drop_last=True
         )
     
     # create a optimizer
@@ -43,7 +45,7 @@ def train():
     # create a config wandb
     wandb.config.update({
         "batch_size": 8,
-        "learning_rate": 0.0001,
+        "learning_rate": 0.01,
         "optimizer": "Adam",
         "loss_function": "cross_entropy"
         })
@@ -57,6 +59,8 @@ def train():
         print(f"Epoch: {epoch}")
         wandb.log({"epoch": epoch})
         for i, data in enumerate(dataloader, 0):
+            # print(f"Batch: {i}")
+         
             points, _, class_name = data
             points = points.cuda()
             class_name = class_name.cuda()
diff --git a/dgcnn/find_missing_data.py b/dgcnn/find_missing_data.py
new file mode 100644
index 0000000..d9bcc64
--- /dev/null
+++ b/dgcnn/find_missing_data.py
@@ -0,0 +1,20 @@
+from tqdm import tqdm
+from shapenet_data_dgcnn import ShapenetDataDgcnn
+
+shapenet_data = ShapenetDataDgcnn(
+      root='/home/nibio/mutable-outside-world/code/oracle_gpu_runs/data/shapenet', 
+      npoints=128,
+      return_cls_label=True,
+      small_data=False,
+      small_data_size=1000,
+      just_one_class=False,
+      split='train',
+      norm=True
+      )
+
+# read the data one by one and check if exists
+
+for i in tqdm(range(len(shapenet_data))):
+    data = shapenet_data[i]
+    if data[0].shape[0] != 128:
+        print(f"Data is None: {i}")
\ No newline at end of file
-- 
GitLab