Skip to content
Snippets Groups Projects
Commit cb065f90 authored by Maciej Wielgosz's avatar Maciej Wielgosz
Browse files

first version of the simple model implemented

parent 344ca4f0
No related branches found
No related tags found
No related merge requests found
/simple-needles-2-class
/data
...@@ -4,6 +4,73 @@ ...@@ -4,6 +4,73 @@
The original dataset is from [here](https://zenodo.org/records/4446842). The original dataset is from [here](https://zenodo.org/records/4446842).
The local NIBIO copy of the dataset is [here](https://nibio-my.sharepoint.com/:f:/g/personal/maciej_wielgosz_nibio_no/Elq2J4unUjFOtVKnFwDvgecBnTRLdBm_F5H__1V1gs_upQ?e=XFoOl5)
# Austrian Tree Dataset Overview
Collected by: **Austrian Federal Forests AG**
Collection Period: **Autumn 2009 - Spring 2010**
Usage: **Non-commercial research only**
## Publication Reference
Fiel, S. & Sablatnig, R. (2010): Leaf classification using local features. In: Proc. of 34th annual Workshop of the Austrian Association for Pattern Recognition (AAPR), 2010, 69-74 pdf.
## Dataset Contents
### 1. Leaves of Broad Leaf Trees
- **Total Images:** 134
- **Types:**
- Ash (25 images)
- Beech (30 images)
- Hornbeam (34 images)
- Mountain Oak (22 images)
- Sycamore Maple (23 images)
- **Details:**
- Image Scale: 800 pixels height or 600 pixels width
- Note: Ash leaves are compound, specifically pinnate
### 2. Bark of Trees
- **Total Images:** 1183
- **Types:**
- Ash (34 images)
- Beech (16 images)
- Black Pine (166 images, divided into 3 age-based sub-classes)
- Fir (127 images, divided into 3 age-based sub-classes)
- Hornbeam (42 images)
- Larch (200 images, divided into 3 age-based sub-classes)
- Mountain Oak (77 images)
- Scots Pine (190 images, divided into 3 age-based sub-classes)
- Spruce (213 images, divided into 3 age-based sub-classes)
- Swiss Stone Pine (96 images)
- Sycamore Maple (22 images)
- **Details:**
- Image Scale: 800 pixels height or 600 pixels width
- Age Categories:
- Less than 60 years
- 60 to 80 years
- More than 80 years
### 3. Needles of Conifers
- **Total Images:** 275
- **Types:**
- Black Pine (107 images)
- Fir (10 images)
- Larch (114 images)
- Scots Pine (10 images)
- Spruce (13 images)
- Swiss Stone Pine (21 images)
- **Details:**
- Needle Classes:
- Separate growth (Fir, Spruce)
- Cluster growth (others)
- Lighting:
- Perfect conditions (Fir, Scots Pine, Spruce)
- Natural conditions (others)
## Getting started ## Getting started
......
%% Cell type:code id: tags:
``` python
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
# Define a transform to apply to each image
transform = transforms.Compose([
transforms.Resize((256, 256)), # Resize each image to 256x256
transforms.ToTensor(), # Convert image to a PyTorch tensor
transforms.Normalize(mean=[0.485, 0.456, 0.406], # Normalize for pre-trained models
std=[0.229, 0.224, 0.225])
])
data_path = "/home/nibio/mutable-outside-world/code/ml-department-workshop/data"
# Create a dataset for each set: train, validation, and test
train_dataset = datasets.ImageFolder(root=data_path + '/train', transform=transform)
val_dataset = datasets.ImageFolder(root=data_path + '/val', transform=transform)
test_dataset = datasets.ImageFolder(root=data_path + '/test', transform=transform)
# Create a DataLoader for each set
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)
```
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
import numpy as np
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
# Assuming 'train_dataset' is already defined and loaded as in previous examples
# Make sure you have the 'train_dataset.class_to_idx' attribute available
# which is automatically created when using datasets.ImageFolder
# Function to show an image with labels
def imshow(img, labels):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
# Display labels below the image
plt.xticks([]) # Remove x-axis ticks
plt.yticks([]) # Remove y-axis ticks
plt.xlabel(' - '.join('%5s' % train_dataset.classes[label] for label in labels), fontsize=10)
plt.show()
# Define transformations
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# Create the train_dataset and train_loader as before
train_dataset = datasets.ImageFolder(root=data_path + '/train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
# Get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)
# Show images with labels
imshow(torchvision.utils.make_grid(images), labels)
```
%% Cell type:code id: tags:
``` python
# create a simple CNN model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.fc1 = nn.Linear(in_features=64 * 32 * 32, out_features=500)
self.fc2 = nn.Linear(in_features=500, out_features=2)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x))) # 16 x 128 x 128
x = self.pool2(F.relu(self.conv2(x))) # 32 x 64 x 64
x = self.pool3(F.relu(self.conv3(x))) # 64 x 32 x 32
x = x.view(-1, 64 * 32 * 32) # Flatten
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# train the model
# Create an instance of the model
model = SimpleCNN()
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
# Use Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
num_epochs = 5
for epoch in range(num_epochs):
running_loss = 0.0
for i, data in enumerate(train_loader):
# Get the inputs
inputs, labels = data
# Zero the parameter gradients
optimizer.zero_grad()
# Forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# Print statistics
print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, loss))
# save the model
torch.save(model.state_dict(), 'simple_cnn.pth')
```
%% Cell type:code id: tags:
``` python
# load the model
model = SimpleCNN()
model.load_state_dict(torch.load('simple_cnn.pth'))
# run the model on the test set and print the accuracy
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))
```
%% Output
Accuracy of the network on the test images: 77 %
import torch
# import dataset from torch
import sys
import os
import re
def clean_file_names(path):
"""
Clean file names in a directory. This function will replace all spaces with underscores,
replace all dashes with underscores, and change all file names to lowercase. If there are
numbers in brackets, they will be replaced with an underscore and the number.
Parameters
----------
path : str
Path to directory containing files to be renamed.
Returns
-------
None.
"""
for filename in os.listdir(path):
if filename.lower().endswith((".png", ".jpg")):
# replace all spaces with underscores
new_filename = re.sub(r"\s+", "_", filename)
# replace all dashes with underscores
new_filename = re.sub(r"-", "_", new_filename)
# if there are numbers in bruckets, change to underscore number
new_filename = re.sub(r"\(\d+\)", lambda x: "_" + x.group()[1:-1], new_filename)
print(new_filename)
# rename file to new filename and change to lowercase
os.rename(
os.path.join(path, filename), os.path.join(path, new_filename.lower())
)
if __name__ == "__main__":
clean_file_names(sys.argv[1])
\ No newline at end of file
import os
import shutil
import numpy as np
class PrepareTrainValTest:
def __init__(self,
data_in_path,
data_out_path,
train_size=0.7,
val_size=0.15,
test_size=0.15,
verbose=False
):
self.data_in_path = data_in_path
self.data_out_path = data_out_path
self.train_size = train_size
self.val_size = val_size
self.test_size = test_size
self.verbose = verbose
def prepare_train_val_test(self):
"""
Prepare train, validation, and test data sets from raw data. This function will
create a directory structure that looks like this:
data
├── test
│ ├── class_1
│ ├── class_2
│ ├── class_3
│ └── class_4
├── train
│ ├── class_1
│ ├── class_2
│ ├── class_3
│ └── class_4
└── val
├── class_1
├── class_2
├── class_3
└── class_4
Parameters
----------
None.
Returns
-------
None.
"""
# get list of all classes
classes = os.listdir(self.data_in_path)
# create train, val, and test directories
for directory in ["train", "val", "test"]:
os.makedirs(os.path.join(self.data_out_path, directory), exist_ok=True)
for class_name in classes:
os.makedirs(
os.path.join(self.data_out_path, directory, class_name), exist_ok=True
)
# loop through classes
for class_name in classes:
# get list of all files in class directory
files = os.listdir(os.path.join(self.data_in_path, class_name))
# shuffle files
np.random.shuffle(files)
# get number of files in class directory
num_files = len(files)
# get number of files for each data set
num_train = int(num_files * self.train_size)
num_val = int(num_files * self.val_size)
num_test = int(num_files * self.test_size)
# loop through files
for i, file in enumerate(files):
# copy file to train directory
if i < num_train:
shutil.copy(
os.path.join(self.data_in_path, class_name, file),
os.path.join(self.data_out_path, "train", class_name, file),
)
# copy file to val directory
elif i < num_train + num_val:
shutil.copy(
os.path.join(self.data_in_path, class_name, file),
os.path.join(self.data_out_path, "val", class_name, file),
)
# copy file to test directory
else:
shutil.copy(
os.path.join(self.data_in_path, class_name, file),
os.path.join(self.data_out_path, "test", class_name, file),
)
if __name__ == "__main__":
# use argparse to get command line arguments
import argparse
parser = argparse.ArgumentParser(
description="Prepare train, validation, and test data sets from raw data."
)
parser.add_argument(
'-i',
"--data_in_path",
type=str,
required=True,
help="Path to directory containing raw data."
)
parser.add_argument(
'-o',
"--data_out_path",
type=str,
required=True,
help="Path to directory to save train, validation, and test data."
)
parser.add_argument(
"--verbose",
type=bool,
default=False,
help="Enable verbose mode."
)
args = parser.parse_args()
# create instance of PrepareTrainValTest class
prepare_train_val_test = PrepareTrainValTest(
data_in_path=args.data_in_path,
data_out_path=args.data_out_path,
verbose=args.verbose
)
# prepare train, validation, and test data sets
prepare_train_val_test.prepare_train_val_test()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment