Example of the aitlas toolbox for benchmarking a dataset

Contents

Example of the `aitlas` toolbox for benchmarking a dataset#

This notebook shows a sample implementation of a multi class image classification using the aitlas toolbox using the UC Merced dataset.

[18]:

import pandas as pd
import matplotlib.pyplot as plt

from aitlas.datasets import UcMercedDataset
from aitlas.models import VGG16
from aitlas.tasks import StratifiedSplitTask

Define the splits and dataset paths#

[19]:

splits = [(10, 90), (20, 80), (30, 70), (40, 60), (50, 50), (60, 40), (70, 30),
          (80, 20), (90, 10)] # one tuple is (train, test)
data_dir = "/media/hdd/multi-class/UCMerced" # where is the dataset on disk

Loop through the splits, train and evaluate#

[ ]:

results = [] # results accumulator

# iterate through the splits
for train, test in splits:
    # where to store the file names of the train images
    train_csv_file = f"/media/hdd/multi-class/UCMerced/train_{train}_{test}.csv"
    # where to store the file names of the test images
    test_csv_file = f"/media/hdd/multi-class/UCMerced/test_{train}_{test}.csv"
    # configure split task
    split_config = {
        "split": {
            "train": {
                "ratio": train,
                "file": train_csv_file
            },
            "test": {
                "ratio": test,
                "file": test_csv_file
            }
        },
        "data_dir": data_dir
    }
    split_task = StratifiedSplitTask(None, split_config)
    split_task.run()

    # setup train set
    train_dataset_config = {
        "batch_size": 4,
        "shuffle": True,
        "num_workers": 4,
        "csv_file": train_csv_file,
        "data_dir": data_dir,
        "transforms": ["aitlas.transforms.ResizeCenterCropFlipHVToTensor"]
    }

    train_dataset = UcMercedDataset(train_dataset_config)

    # setup test set
    test_dataset_config = {
        "batch_size": 4,
        "shuffle": False,
        "num_workers": 4,
        "csv_file": test_csv_file,
        "data_dir": data_dir,
        "transforms": ["aitlas.transforms.ResizeCenterCropToTensor"]
    }

    test_dataset = UcMercedDataset(test_dataset_config)
    print(f"Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")


    # setup model
    epochs = 50
    model_directory = "./experiments/uc_merced/"
    model_config = {
        "num_classes": 21,
        "learning_rate": 0.0001,
        "pretrained": True
    }
    model = VGG16(model_config)
    model.prepare()

    # training and evaluation
    model.train_and_evaluate_model(
        train_dataset=train_dataset,
        epochs=epochs,
        model_directory=model_directory,
        val_dataset=test_dataset,
        run_id='1',
    )

    # collect results
    results.append(model.running_metrics.f1_score())

See the results#

[21]:

df = pd.DataFrame(zip(splits, [round(float(r["F1_score Micro"]), 4) for r in results]),
                  columns=["Train/Test", "Micro F1 score"])
df

[21]:

	Train/Test	Micro F1 score
0	(10, 90)	0.7450
1	(20, 80)	0.7107
2	(30, 70)	0.8741
3	(40, 60)	0.8619
4	(50, 50)	0.9181
5	(60, 40)	0.8857
6	(70, 30)	0.9032
7	(80, 20)	0.8405
8	(90, 10)	0.9143

[22]:

df.plot(x='Train/Test', y='Micro F1 score', kind = 'line')
plt.show()

../_images/examples_land_use_classification_benchmark_8_0.png

[ ]: