From 67bf88035516e337f4bcc12c3b587e171827d74d Mon Sep 17 00:00:00 2001
From: Maciej Wielgosz <maciej.wielgosz@nibio.no>
Date: Fri, 12 Jan 2024 11:10:47 +0100
Subject: [PATCH] update of docs and pipeline

---
 README.md               | 19 +++++++++----------
 models/simple_cnn.py    |  2 +-
 pipeline/data_loader.py | 11 +++++++++++
 pipeline/run.py         | 39 ++++-----------------------------------
 4 files changed, 25 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index e00c2b2..b877887 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-# ml-department-workshop
+# General info
+
+This is the repositiory to be used for NIBIO deparment tutorial.
 
 # Dataset
 
@@ -69,20 +71,17 @@ Fiel, S. & Sablatnig, R. (2010): Leaf classification using local features. In: P
     - Natural conditions (others)
 
 
+## Getting started with the tutorial
 
+Use [Google Colab](https://colab.research.google.com/?utm_source=scs-index) and create a new project.
 
+Clone the repo in the google collab using the following command `!git clone https://gitlab.nibio.no/maciekwielgosz/ml-department-workshop.git`
 
-## Getting started
-
-To make it easy for you to get started with GitLab, here's a list of recommended next steps.
-
-Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
+Install the packet to be used for getting the data : `!pip install gdown`.
 
-## Installation
-Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
+Get the data with the following command: `!gdown https://drive.google.com/uc?id=1D6z3UbCoBOhOs8lhasgm-ap58-uPdDY-`
 
-## Usage
-Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
+The basis for the tutorial is the contest of the `run.py` script in the folder of the cloned repository. You can gradulaly copy the commands from there and modify them.
 
 ## Support
 Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
diff --git a/models/simple_cnn.py b/models/simple_cnn.py
index 5e26845..14ac342 100644
--- a/models/simple_cnn.py
+++ b/models/simple_cnn.py
@@ -14,7 +14,7 @@ class SimpleCNN(nn.Module):
         self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
         self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
         self.fc1 = nn.Linear(in_features=64 * 32 * 32, out_features=500)
-        self.fc2 = nn.Linear(in_features=500, out_features=2)
+        self.fc2 = nn.Linear(in_features=500, out_features=3)
 
     def forward(self, x, return_activations=False):
         activations = {}
diff --git a/pipeline/data_loader.py b/pipeline/data_loader.py
index d469080..1d7061d 100644
--- a/pipeline/data_loader.py
+++ b/pipeline/data_loader.py
@@ -20,4 +20,15 @@ def create_data_loaders(data_path, batch_size=8, num_workers=4):
     val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
     test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
 
+
+    # Get class names and their corresponding indices
+    train_class_to_idx = train_dataset.class_to_idx
+    val_class_to_idx = val_dataset.class_to_idx
+    test_class_to_idx = test_dataset.class_to_idx
+
+    # Optionally, print the class names and indices
+    print("Training class indices:", train_class_to_idx)
+    print("Validation class indices:", val_class_to_idx)
+    print("Test class indices:", test_class_to_idx)
+
     return train_loader, val_loader, test_loader
diff --git a/pipeline/run.py b/pipeline/run.py
index e8b3aa7..d69d6ab 100644
--- a/pipeline/run.py
+++ b/pipeline/run.py
@@ -10,7 +10,9 @@
 
 from prepare_data.clean_file_names import clean_file_names
 
-RAW_DATA_PATH = "/home/nibio/mutable-outside-world/code/ml-department-workshop/ml-department-workshop-dataset/simple-needles-2-class"
+# RAW_DATA_PATH = "/home/nibio/mutable-outside-world/code/ml-department-workshop/ml-department-workshop-dataset/simple-needles-2-class"
+RAW_DATA_PATH = "/home/nibio/mutable-outside-world/code/ml-department-workshop/ml-department-workshop-dataset/simple-needles-3-class"
+
 
 # Clean file and directory names
 clean_file_names(RAW_DATA_PATH)
@@ -42,7 +44,7 @@ show_sample_images(DATA_PATH, 'output_image.png')
 
 
 ############################## this section trains the model ##############################
-TRAIN = False
+TRAIN = True
 
 if TRAIN:
     # Import necessary packages for training
@@ -141,39 +143,6 @@ plt.ylabel('True label')
 # save the confusion matrix
 plt.savefig('confusion_matrix.png')
 
-############################## this section plots the ROC curve ##############################
-import matplotlib.pyplot as plt
-import numpy as np
-from sklearn.metrics import roc_curve, auc
-
-# Get the predictions for the test data
-y_pred = []
-y_true = []
-
-with torch.no_grad():
-    for data in test_loader:
-        images, labels = data
-        outputs = model(images)
-        _, predicted = torch.max(outputs.data, dim=1)
-        y_pred += predicted.tolist()
-        y_true += labels.tolist()
-
-# Get the ROC curve
-fpr, tpr, _ = roc_curve(y_true, y_pred)
-roc_auc = auc(fpr, tpr)
-
-# Plot the ROC curve
-plt.figure(figsize=(10, 10))
-plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
-plt.plot([0, 1], [0, 1], 'k--')  # Add a diagonal line for reference
-plt.xlabel('False Positive Rate')
-plt.ylabel('True Positive Rate')
-plt.title('ROC Curve')
-plt.legend(loc="lower right")
-# save the ROC curve
-plt.savefig('roc_curve.png')
-
-
 ############################## this section computes precison, recall and F1-score ##############################
 import matplotlib.pyplot as plt
 import numpy as np
-- 
GitLab