Base

Base#

aitlas.base.classification module#

class BaseMulticlassClassifier(config)[source]#

Bases: BaseModel

Base class for a multiclass classifier.

schema#: alias of BaseClassifierSchema

get_predicted(outputs, threshold=None)[source]#

Get predicted classes from the model outputs.

Parameters:

outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).
threshold (float, optional) – The threshold for classification, defaults to None.

Returns:

tuple containing the probabilities and predicted classes

Return type:

tuple

report(labels, dataset_name, running_metrics, **kwargs)[source]#

Generate a report for multiclass classification.

Parameters:

labels (list) – List of class labels.
dataset_name (list) – Name of the dataset.
running_metrics (aitlas.base.metrics.RunningScore) – A running score object for multiclass classification.

load_optimizer()[source]#: Load the optimizer

load_criterion()[source]#: Load the loss function

load_lr_scheduler(optimizer)[source]#: Load the learning rate scheduler

training: bool#

class BaseMultilabelClassifier(config)[source]#

Bases: BaseModel

Base class for a multilabel classifier.

schema#: alias of BaseClassifierSchema

load_optimizer()[source]#: Load the optimizer

load_criterion()[source]#: Load the loss function

load_lr_scheduler(optimizer)[source]#

get_predicted(outputs, threshold=None)[source]#

Get predicted classes from the model outputs.

Parameters:

outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).
threshold (float, optional) – Threshold for classification, defaults to None

Returns:

Tuple containing the probabilities and predicted classes.

Return type:

tuple

report(labels, dataset_name, running_metrics, **kwargs)[source]#

Generate a report for multilabel classification.

Parameters:

labels (list) – List of class labels
dataset_name (str) – Name of the dataset.
running_metrics (aitlas.base.metrics.RunningScore) – Type of metrics to be reported. Currently only confusion matrix is

training: bool#

aitlas.base.config module#

class Config(config)[source]#

Bases: Munch

Config object used for automatic object creation from a dictionary.

class ObjectConfig(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

class RunConfig(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Top level configuration schema

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

class Configurable(config)[source]#

Bases: ABC

Base class for all configurable objects.

schema = None#

aitlas.base.datasets module#

Dataset base class.

This is the base class for all datasets. All datasets should subclass it.

class BaseDataset(config)[source]#

Bases: Dataset, Configurable

This class represents a basic dataset for machine learning tasks. It is a subclass of both :class:Dataset and :class:Configurable. You can use it as a base class to define your own custom datasets.

Parameters:

Dataset (_type_) – _description_
Configurable (_type_) – _description_

BaseDataset constructor

Parameters:: config (Config, contains information for the batch size, number of workers, list of labels, list of transformations) – Configuration object which specifies the details of the dataset.

schema#: alias of BaseDatasetSchema

name = None#

labels = None#

get_name()[source]#

prepare()[source]#: Implement if something needs to happen to the dataset after object creation

dataloader()[source]#: Create and return a dataloader for the dataset

get_labels()[source]#: Implement this if you want to return the complete set of labels of the dataset

show_batch(size)[source]#: Implement this if you want to return a random batch of images from the dataset

show_samples()[source]#: Implement this if you want to return a random samples from the dataset

show_image(index)[source]#: Implement this if you want to return an image with a given index from the dataset

data_distribution_table()[source]#: Implement this if you want to return the label distribution of the dataset

data_distribution_barchart()[source]#: Implement this if you want to return the label distribution of the dataset as a barchart

load_transforms(class_names)[source]#: Loads transformation classes and make a composition of them

aitlas.base.metrics module#

class BaseMetric(device='cpu', **kwargs)[source]#

Bases: object

Base class for metrics

calculate(y_true, y_pred)[source]#

class RunningScore(num_classes, device)[source]#

Bases: object

update(y_true, y_pred, y_prob=None)[source]#: Updates stats on each batch

reset()[source]#: Reset the confusion matrix

get_computed()[source]#

precision()[source]#

accuracy()[source]#

weights()[source]#

recall()[source]#

f1_score()[source]#

iou()[source]#

get_scores(metrics)[source]#: Returns the specified metrics

class MultiClassRunningScore(num_classes, device)[source]#

Bases: RunningScore

Calculates confusion matrix for multi-class data. This class contains metrics that are averaged over batches.

accuracy()[source]#

weights()[source]#

recall()[source]#

precision()[source]#

iou()[source]#

kappa()[source]#

class MultiLabelRunningScore(num_classes, device)[source]#

Bases: RunningScore

Calculates a confusion matrix for multi-labelled, multi-class data in addition to the

reset()[source]#: Reset the confusion matrix and list of probabilities

update(y_true, y_pred, y_prob=None)[source]#: Updates stats on each batch

map()[source]#

roc_auc_score()[source]#

accuracy()[source]#

precision()[source]#

weights()[source]#

recall()[source]#

get_outcomes(total=False)[source]#: Return true/false positives/negatives from the confusion matrix :param total: do we need to return per class or total

count()[source]#

get_samples()[source]#

iou()[source]#

class SegmentationRunningScore(num_classes, device)[source]#

Bases: MultiLabelRunningScore

Calculates a metrics for semantic segmentation

update(y_true, y_pred, y_prob=None)[source]#: Updates stats on each batch

class ObjectDetectionRunningScore(num_classes, device)[source]#

Bases: object

Calculates a metrics for object detection

update(preds, target)[source]#: Updates stats on each batch

reset()[source]#: Reset the confusion matrix

compute()[source]#

map()[source]#: Returns the specified metrics

map_50()[source]#: Returns the specified metrics

get_scores(metrics)[source]#: Returns the specified metrics

aitlas.base.models module#

Models base class. This is the base class for all models. All models should subclass it.

class EarlyStopping(patience=10, min_delta=0)[source]#

Bases: object

Early stopping to stop the training when the loss does not improve after certain epochs.

BaseModel constructor

Parameters:

patience – how many epochs to wait before stopping when loss is not improving
min_delta – minimum difference between new loss and old loss for new loss to be considered as an improvement

class BaseModel(config=None)[source]#

Bases: Module, Configurable

Basic class abstracting a model. Contains methods for training, evaluation and also utility methods for loading, saving a model to storage.

BaseModel constructor

Parameters:: config (Config, optional) – Configuration object which specifies the details of the model, defaults to None.

schema#: alias of BaseModelSchema

name = None#

log_loss = True#

prepare()[source]#: Prepare the model before using it. Loans loss criteria, optimizer, lr scheduler and early stopping.

fit(dataset, epochs=100, model_directory=None, save_epochs=10, iterations_log=100, resume_model=None, val_dataset=None, run_id=None, **kwargs)[source]#

Main method to train the model. It trains the model for the specified number of epochs and saves the model after every save_epochs. It also logs the loss after every iterations_log.

Parameters:

dataset (aitlas.base.BaseDataset) – Dataset object which contains the training data.
epochs (int, optional) – Number of epochs to train the model, defaults to 100
model_directory (str, optional) – Location where the model checkpoints will be stored or should be loaded from, defaults to None
save_epochs (int, optional) – Number of epoch after a checkpoint is saved, defaults to 10
iterations_log (int, optional) – Number of iteration after which the training status will be logged, defaults to 100
resume_model (str, optional) – Whether or not to resume training a saved model, defaults to None
val_dataset (aitlas.base.BaseDataset, optional) – Dataset object which contains the validation data., defaults to None
run_id (str, optional) – Optional id to idenfity the experiment, defaults to None

Returns:

Returns the loss at the end of training.

Return type:

float

train_epoch(epoch, dataloader, optimizer, criterion, iterations_log)[source]#

evaluate(dataset=None, model_path=None)[source]#

Evaluate a model stored in a specified path against a given dataset

Parameters:

dataset (BaseDataset | None) – the dataset to evaluate against
model_path (str | None) – the path to the model on disk

Returns:

evaluate_model(dataloader, criterion=None, description='testing on validation set')[source]#: Evaluates the current model against the specified dataloader for the specified metrics :param dataloader: The dataloader to evaluate against :param metrics: list of metric keys to calculate :criterion: Criterion to calculate loss :description: What to show in the progress bar :return: tuple of (metrics, y_true, y_pred)

predict(dataset=None, description='running prediction')[source]#

Predicts using a model against for a specified dataset

Returns:: tuple of (y_true, y_pred, y_pred_probs)
Return type:: tuple
Parameters:: dataset (BaseDataset | None) –

predict_image(image=None, labels=None, data_transforms=None, description='running prediction for single image')[source]#

Predicts using a model against for a specified image

Returns:: Plot containing the image and the predictions.
Return type:: matplotlib.figure.Figure

predict_masks(image=None, labels=None, data_transforms=None, description='running prediction for single image')[source]#

Predicts using a model against for a specified image

Returns:: Plot of the predicted masks
Return type:: matplotlib.figure.Figure

detect_objects(image=None, labels=None, data_transforms=None, description='running object detection for single image')[source]#

Predicts using a model against for a specified image

Returns:: Plots the image with the object boundaries.
Return type:: matplotlib.figure.Figure

predict_output_per_batch(dataloader, description)[source]#: Run predictions on a dataloader and return inputs, outputs, labels per batch

forward(*input, **kwargs)[source]#: Abstract method implementing the model. Extending classes should override this method. :return: Instance extending nn.Module :rtype: nn.Module

get_predicted(outputs, threshold=None)[source]#: Gets the output from the model and return the predictions :return: Tuple in the format (probabilities, predicted classes/labels) :rtype: tuple

report(labels, dataset_name, running_metrics, **kwargs)[source]#: The report we want to generate for the model

log_metrics(output, labels, tag='train', writer=None, epoch=0)[source]#: Log the calculated metrics

allocate_device(opts=None)[source]#

Put the model on CPU or GPU

Returns:: Return the model on CPU or GPU.
Return type:: nn.Module

save_model(model_directory, epoch, optimizer, loss, start, run_id)[source]#: Saves the model on disk :param model_directory: directory to save the model :param epoch: Epoch number of checkpoint :param optimizer: Optimizer used :param loss: Criterion used :param start: Start time of training :param run_id: Run id of the model

extract_features(*input, **kwargs)[source]#

Abstract for trim the model to extract feature. Extending classes should override this method.

Returns:: Instance of the model architecture
Return type:: nn.Module

load_model(file_path, optimizer=None)[source]#: Loads a model from a checkpoint

load_optimizer()[source]#: Load the optimizer

load_criterion()[source]#: Load the loss function

load_lr_scheduler(optimizer)[source]#

train_model(train_dataset, epochs=100, model_directory=None, save_epochs=10, iterations_log=100, resume_model=None, val_dataset=None, run_id=None, **kwargs)[source]#

Main method that trains the model.

Parameters:

train_dataset (BaseDataset) – Dataset to train the model
epochs (int, optional) – Number of epochs for training, defaults to 100
model_directory (str, optional) – Directory where the model checkpoints will be saved, defaults to None
save_epochs (int, optional) – Number of epochs to save a checkpoint of the model, defaults to 10
iterations_log (int, optional) – The number of iterations to pass before logging the system state, defaults to 100
resume_model (str, optional) – Boolean indicating whether to resume an already traind model or not, defaults to None
val_dataset (BaseDataset, optional) – Dataset used for validation, defaults to None
run_id (str, optional) – Optional run id to identify the experiment, defaults to None

Returns:

Return the loss of the model

train_and_evaluate_model(train_dataset, epochs=100, model_directory=None, save_epochs=10, iterations_log=100, resume_model=None, val_dataset=None, run_id=None, **kwargs)[source]#

Method that trains and evaluates the model.

Parameters:

train_dataset (BaseDataset) – Dataset to train the model
epochs (int, optional) – Number of epochs for training, defaults to 100
model_directory (str, optional) – Model directory where the model checkpoints will be saved, defaults to None
save_epochs (int, optional) – Number of epochs to save a checkpoint of the model, defaults to 10
iterations_log (int, optional) – Number of iterations to pass before logging the system state, defaults to 100
resume_model (str, optional) – Boolean indicating whether to resume an already traind model or not, defaults to None
val_dataset (BaseDataset, optional) – Dataset used for validation, defaults to None
run_id (str, optional) – Run id to identify the experiment, defaults to None

Returns:

Loss of the model

training: bool#

aitlas.base.object_detection module#

class BaseObjectDetection(config)[source]#

Bases: BaseModel

This class extends the functionality of the BaseModel class by adding object detection specific functionality.

schema#: alias of BaseObjectDetectionSchema

log_loss = True#

get_predicted(outputs, threshold=0.3)[source]#

Get predicted objects from the model outputs.

Parameters:

outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).
threshold (float, optional) – The threshold for classification, defaults to None.

Returns:

List of dictionaries containing the predicted bounding boxes, scores and labels.

Return type:

list

load_optimizer()[source]#: Load the optimizer

load_criterion()[source]#: Load the loss function

load_lr_scheduler(optimizer)[source]#: Load the learning rate scheduler

train_epoch(epoch, dataloader, optimizer, criterion, iterations_log)[source]#

Train the model for a single epoch.

Parameters:

epoch – The current epoch number.
dataloader – The data loader for the training set.
optimizer – The optimizer.
criterion – The loss function.
iterations_log – The number of iterations after which to log the loss.

Returns:

The average loss over the entire epoch.

Return type:

float

predict_output_per_batch(dataloader, description)[source]#

Run predictions on a dataloader and return inputs, outputs, targets per batch

Parameters:

dataloader (aitlas.base.BaseDataLoader) – Data loader for the prediction set.
description (str) – Description of the task for logging purposes.

Yield:

Yields a tuple of (inputs, outputs, targets)

Return type:

tuple

evaluate_model(dataloader, criterion=None, description='testing on validation set')[source]#

Method used to evaluate the model on a validation set.

Parameters:

dataloader (aitlas.base.BaseDataLoader) – Data loader for the validation set.
criterion (_type_, optional) – The loss function, defaults to None.
description (str, optional) – Description of the task for logging purposes, defaults to “testing on validation set”

Returns:

Returns a MAP score of the evaluation on the model.

Return type:

float

training: bool#

aitlas.base.schemas module#

class BaseDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Schema for configuring a base dataset.

Parameters:

batch_size (int, optional) – Batch size for the dataset. Default is 64.
shuffle (bool, optional) – Flag indicating whether to shuffle the dataset. Default is True.
num_workers (int, optional) – Number of workers to use for data loading. Default is 4.
pin_memory (bool, optional) – Flag indicating whether to use page-locked memory. Default is False.
transforms (List[str], optional) – Classes to run transformations over the input data.
target_transforms (List[str], optional) – Classes to run transformations over the target data.
joint_transforms (List[str], optional) – Classes to run transformations over the input and target data.
labels (List[str], optional) – Labels for the dataset.
only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

class BaseModelSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Schema for configuring a base model.

Parameters:

num_classes (int, optional) – Number of classes for the model. Default is 2.
use_cuda (bool, optional) – Flag indicating whether to use CUDA if available. Default is True.
metrics (List[str], optional) – Metrics to calculate during training and evaluation. Default is [‘f1_score’].
weights (List[float], optional) – Class weights to apply for the loss function. Default is None.
rank (int, optional) – Rank value for distributed data processing. Default is 0.
use_ddp (bool, optional) – Flag indicating whether to turn on distributed data processing. Default is False.
only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

class BaseClassifierSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseModelSchema

Schema for configuring a base classifier.

Parameters:

learning_rate (float, optional) – Learning rate used in training. Default is 0.01.
weight_decay (float, optional) – Weight decay used in training. Default is 0.0.
pretrained (bool, optional) – Flag indicating whether to use a pretrained model. Default is True.
local_model_path (str, optional) – Local path of the pretrained model. Default is None.
threshold (float, optional) – Prediction threshold if needed. Default is 0.5.
freeze (bool, optional) – Flag indicating whether to freeze all layers except for the classifier layer(s). Default is False.
only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class BaseSegmentationClassifierSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseClassifierSchema

Schema for configuring a base segmentation classifier.

Parameters:

metrics (List[str], optional) – Classes of metrics you want to calculate during training and evaluation. Default is [‘iou’, ‘f1_score’, ‘accuracy’].
only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class BaseObjectDetectionSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseClassifierSchema

Schema for configuring a base object detection model.

Parameters:

metrics (List[str], optional) – Classes of metrics you want to calculate during training and evaluation. Default is [‘map’].
step_size (int, optional) – Step size for the learning rate scheduler. Default is 15.
gamma (float, optional) – Gamma (multiplier) for the learning rate scheduler. Default is 0.1.
only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class BaseTransformsSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

aitlas.base.segmentation module#

class BaseSegmentationClassifier(config)[source]#

Bases: BaseModel

Base class for a segmentation classifier.

schema#: alias of BaseSegmentationClassifierSchema

get_predicted(outputs, threshold=None)[source]#

Get predicted classes from the model outputs.

Parameters:

outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).
threshold (float, optional) – The threshold for classification, defaults to None.

Returns: