Base#

aitlas.base.classification module#

class BaseMulticlassClassifier(config)[source]#

Bases: BaseModel

Base class for a multiclass classifier.

schema#

alias of BaseClassifierSchema

get_predicted(outputs, threshold=None)[source]#

Get predicted classes from the model outputs.

Parameters:
  • outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).

  • threshold (float, optional) – The threshold for classification, defaults to None.

Returns:

tuple containing the probabilities and predicted classes

Return type:

tuple

report(labels, dataset_name, running_metrics, **kwargs)[source]#

Generate a report for multiclass classification.

Parameters:
load_optimizer()[source]#

Load the optimizer

load_criterion()[source]#

Load the loss function

load_lr_scheduler(optimizer)[source]#

Load the learning rate scheduler

training: bool#
class BaseMultilabelClassifier(config)[source]#

Bases: BaseModel

Base class for a multilabel classifier.

schema#

alias of BaseClassifierSchema

load_optimizer()[source]#

Load the optimizer

load_criterion()[source]#

Load the loss function

load_lr_scheduler(optimizer)[source]#
get_predicted(outputs, threshold=None)[source]#

Get predicted classes from the model outputs.

Parameters:
  • outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).

  • threshold (float, optional) – Threshold for classification, defaults to None

Returns:

Tuple containing the probabilities and predicted classes.

Return type:

tuple

report(labels, dataset_name, running_metrics, **kwargs)[source]#

Generate a report for multilabel classification.

Parameters:
  • labels (list) – List of class labels

  • dataset_name (str) – Name of the dataset.

  • running_metrics (aitlas.base.metrics.RunningScore) – Type of metrics to be reported. Currently only confusion matrix is

training: bool#

aitlas.base.config module#

class Config(config)[source]#

Bases: Munch

Config object used for automatic object creation from a dictionary.

class ObjectConfig(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Parameters:
  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#
class RunConfig(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Top level configuration schema

Parameters:
  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#
class Configurable(config)[source]#

Bases: ABC

Base class for all configurable objects.

schema = None#

aitlas.base.datasets module#

Dataset base class.

This is the base class for all datasets. All datasets should subclass it.

class BaseDataset(config)[source]#

Bases: Dataset, Configurable

This class represents a basic dataset for machine learning tasks. It is a subclass of both :class:Dataset and :class:Configurable. You can use it as a base class to define your own custom datasets.

Parameters:
  • Dataset (_type_) – _description_

  • Configurable (_type_) – _description_

BaseDataset constructor

Parameters:

config (Config, contains information for the batch size, number of workers, list of labels, list of transformations) – Configuration object which specifies the details of the dataset.

schema#

alias of BaseDatasetSchema

name = None#
labels = None#
get_name()[source]#
prepare()[source]#

Implement if something needs to happen to the dataset after object creation

dataloader()[source]#

Create and return a dataloader for the dataset

get_labels()[source]#

Implement this if you want to return the complete set of labels of the dataset

show_batch(size)[source]#

Implement this if you want to return a random batch of images from the dataset

show_samples()[source]#

Implement this if you want to return a random samples from the dataset

show_image(index)[source]#

Implement this if you want to return an image with a given index from the dataset

data_distribution_table()[source]#

Implement this if you want to return the label distribution of the dataset

data_distribution_barchart()[source]#

Implement this if you want to return the label distribution of the dataset as a barchart

load_transforms(class_names)[source]#

Loads transformation classes and make a composition of them

aitlas.base.metrics module#

class BaseMetric(device='cpu', **kwargs)[source]#

Bases: object

Base class for metrics

calculate(y_true, y_pred)[source]#
class RunningScore(num_classes, device)[source]#

Bases: object

update(y_true, y_pred, y_prob=None)[source]#

Updates stats on each batch

reset()[source]#

Reset the confusion matrix

get_computed()[source]#
precision()[source]#
accuracy()[source]#
weights()[source]#
recall()[source]#
f1_score()[source]#
iou()[source]#
get_scores(metrics)[source]#

Returns the specified metrics

class MultiClassRunningScore(num_classes, device)[source]#

Bases: RunningScore

Calculates confusion matrix for multi-class data. This class contains metrics that are averaged over batches.

accuracy()[source]#
weights()[source]#
recall()[source]#
precision()[source]#
iou()[source]#
kappa()[source]#
class MultiLabelRunningScore(num_classes, device)[source]#

Bases: RunningScore

Calculates a confusion matrix for multi-labelled, multi-class data in addition to the

reset()[source]#

Reset the confusion matrix and list of probabilities

update(y_true, y_pred, y_prob=None)[source]#

Updates stats on each batch

map()[source]#
roc_auc_score()[source]#
accuracy()[source]#
precision()[source]#
weights()[source]#
recall()[source]#
get_outcomes(total=False)[source]#

Return true/false positives/negatives from the confusion matrix :param total: do we need to return per class or total

count()[source]#
get_samples()[source]#
iou()[source]#
class SegmentationRunningScore(num_classes, device)[source]#

Bases: MultiLabelRunningScore

Calculates a metrics for semantic segmentation

update(y_true, y_pred, y_prob=None)[source]#

Updates stats on each batch

class ObjectDetectionRunningScore(num_classes, device)[source]#

Bases: object

Calculates a metrics for object detection

update(preds, target)[source]#

Updates stats on each batch

reset()[source]#

Reset the confusion matrix

compute()[source]#
map()[source]#

Returns the specified metrics

map_50()[source]#

Returns the specified metrics

get_scores(metrics)[source]#

Returns the specified metrics

aitlas.base.models module#

Models base class. This is the base class for all models. All models should subclass it.

class EarlyStopping(patience=10, min_delta=0)[source]#

Bases: object

Early stopping to stop the training when the loss does not improve after certain epochs.

BaseModel constructor

Parameters:
  • patience – how many epochs to wait before stopping when loss is not improving

  • min_delta – minimum difference between new loss and old loss for new loss to be considered as an improvement

class BaseModel(config=None)[source]#

Bases: Module, Configurable

Basic class abstracting a model. Contains methods for training, evaluation and also utility methods for loading, saving a model to storage.

BaseModel constructor

Parameters:

config (Config, optional) – Configuration object which specifies the details of the model, defaults to None.

schema#

alias of BaseModelSchema

name = None#
log_loss = True#
prepare()[source]#

Prepare the model before using it. Loans loss criteria, optimizer, lr scheduler and early stopping.

fit(dataset, epochs=100, model_directory=None, save_epochs=10, iterations_log=100, resume_model=None, val_dataset=None, run_id=None, **kwargs)[source]#

Main method to train the model. It trains the model for the specified number of epochs and saves the model after every save_epochs. It also logs the loss after every iterations_log.

Parameters:
  • dataset (aitlas.base.BaseDataset) – Dataset object which contains the training data.

  • epochs (int, optional) – Number of epochs to train the model, defaults to 100

  • model_directory (str, optional) – Location where the model checkpoints will be stored or should be loaded from, defaults to None

  • save_epochs (int, optional) – Number of epoch after a checkpoint is saved, defaults to 10

  • iterations_log (int, optional) – Number of iteration after which the training status will be logged, defaults to 100

  • resume_model (str, optional) – Whether or not to resume training a saved model, defaults to None

  • val_dataset (aitlas.base.BaseDataset, optional) – Dataset object which contains the validation data., defaults to None

  • run_id (str, optional) – Optional id to idenfity the experiment, defaults to None

Returns:

Returns the loss at the end of training.

Return type:

float

train_epoch(epoch, dataloader, optimizer, criterion, iterations_log)[source]#
evaluate(dataset=None, model_path=None)[source]#

Evaluate a model stored in a specified path against a given dataset

Parameters:
  • dataset (BaseDataset | None) – the dataset to evaluate against

  • model_path (str | None) – the path to the model on disk

Returns:

evaluate_model(dataloader, criterion=None, description='testing on validation set')[source]#

Evaluates the current model against the specified dataloader for the specified metrics :param dataloader: The dataloader to evaluate against :param metrics: list of metric keys to calculate :criterion: Criterion to calculate loss :description: What to show in the progress bar :return: tuple of (metrics, y_true, y_pred)

predict(dataset=None, description='running prediction')[source]#

Predicts using a model against for a specified dataset

Returns:

tuple of (y_true, y_pred, y_pred_probs)

Return type:

tuple

Parameters:

dataset (BaseDataset | None) –

predict_image(image=None, labels=None, data_transforms=None, description='running prediction for single image')[source]#

Predicts using a model against for a specified image

Returns:

Plot containing the image and the predictions.

Return type:

matplotlib.figure.Figure

predict_masks(image=None, labels=None, data_transforms=None, description='running prediction for single image')[source]#

Predicts using a model against for a specified image

Returns:

Plot of the predicted masks

Return type:

matplotlib.figure.Figure

detect_objects(image=None, labels=None, data_transforms=None, description='running object detection for single image')[source]#

Predicts using a model against for a specified image

Returns:

Plots the image with the object boundaries.

Return type:

matplotlib.figure.Figure

predict_output_per_batch(dataloader, description)[source]#

Run predictions on a dataloader and return inputs, outputs, labels per batch

forward(*input, **kwargs)[source]#

Abstract method implementing the model. Extending classes should override this method. :return: Instance extending nn.Module :rtype: nn.Module

get_predicted(outputs, threshold=None)[source]#

Gets the output from the model and return the predictions :return: Tuple in the format (probabilities, predicted classes/labels) :rtype: tuple

report(labels, dataset_name, running_metrics, **kwargs)[source]#

The report we want to generate for the model

log_metrics(output, labels, tag='train', writer=None, epoch=0)[source]#

Log the calculated metrics

allocate_device(opts=None)[source]#

Put the model on CPU or GPU

Returns:

Return the model on CPU or GPU.

Return type:

nn.Module

save_model(model_directory, epoch, optimizer, loss, start, run_id)[source]#

Saves the model on disk :param model_directory: directory to save the model :param epoch: Epoch number of checkpoint :param optimizer: Optimizer used :param loss: Criterion used :param start: Start time of training :param run_id: Run id of the model

extract_features(*input, **kwargs)[source]#

Abstract for trim the model to extract feature. Extending classes should override this method.

Returns:

Instance of the model architecture

Return type:

nn.Module

load_model(file_path, optimizer=None)[source]#

Loads a model from a checkpoint

load_optimizer()[source]#

Load the optimizer

load_criterion()[source]#

Load the loss function

load_lr_scheduler(optimizer)[source]#
train_model(train_dataset, epochs=100, model_directory=None, save_epochs=10, iterations_log=100, resume_model=None, val_dataset=None, run_id=None, **kwargs)[source]#

Main method that trains the model.

Parameters:
  • train_dataset (BaseDataset) – Dataset to train the model

  • epochs (int, optional) – Number of epochs for training, defaults to 100

  • model_directory (str, optional) – Directory where the model checkpoints will be saved, defaults to None

  • save_epochs (int, optional) – Number of epochs to save a checkpoint of the model, defaults to 10

  • iterations_log (int, optional) – The number of iterations to pass before logging the system state, defaults to 100

  • resume_model (str, optional) – Boolean indicating whether to resume an already traind model or not, defaults to None

  • val_dataset (BaseDataset, optional) – Dataset used for validation, defaults to None

  • run_id (str, optional) – Optional run id to identify the experiment, defaults to None

Returns:

Return the loss of the model

train_and_evaluate_model(train_dataset, epochs=100, model_directory=None, save_epochs=10, iterations_log=100, resume_model=None, val_dataset=None, run_id=None, **kwargs)[source]#

Method that trains and evaluates the model.

Parameters:
  • train_dataset (BaseDataset) – Dataset to train the model

  • epochs (int, optional) – Number of epochs for training, defaults to 100

  • model_directory (str, optional) – Model directory where the model checkpoints will be saved, defaults to None

  • save_epochs (int, optional) – Number of epochs to save a checkpoint of the model, defaults to 10

  • iterations_log (int, optional) – Number of iterations to pass before logging the system state, defaults to 100

  • resume_model (str, optional) – Boolean indicating whether to resume an already traind model or not, defaults to None

  • val_dataset (BaseDataset, optional) – Dataset used for validation, defaults to None

  • run_id (str, optional) – Run id to identify the experiment, defaults to None

Returns:

Loss of the model

training: bool#

aitlas.base.object_detection module#

class BaseObjectDetection(config)[source]#

Bases: BaseModel

This class extends the functionality of the BaseModel class by adding object detection specific functionality.

schema#

alias of BaseObjectDetectionSchema

log_loss = True#
get_predicted(outputs, threshold=0.3)[source]#

Get predicted objects from the model outputs.

Parameters:
  • outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).

  • threshold (float, optional) – The threshold for classification, defaults to None.

Returns:

List of dictionaries containing the predicted bounding boxes, scores and labels.

Return type:

list

load_optimizer()[source]#

Load the optimizer

load_criterion()[source]#

Load the loss function

load_lr_scheduler(optimizer)[source]#

Load the learning rate scheduler

train_epoch(epoch, dataloader, optimizer, criterion, iterations_log)[source]#

Train the model for a single epoch.

Parameters:
  • epoch – The current epoch number.

  • dataloader – The data loader for the training set.

  • optimizer – The optimizer.

  • criterion – The loss function.

  • iterations_log – The number of iterations after which to log the loss.

Returns:

The average loss over the entire epoch.

Return type:

float

predict_output_per_batch(dataloader, description)[source]#

Run predictions on a dataloader and return inputs, outputs, targets per batch

Parameters:
  • dataloader (aitlas.base.BaseDataLoader) – Data loader for the prediction set.

  • description (str) – Description of the task for logging purposes.

Yield:

Yields a tuple of (inputs, outputs, targets)

Return type:

tuple

evaluate_model(dataloader, criterion=None, description='testing on validation set')[source]#

Method used to evaluate the model on a validation set.

Parameters:
  • dataloader (aitlas.base.BaseDataLoader) – Data loader for the validation set.

  • criterion (_type_, optional) – The loss function, defaults to None.

  • description (str, optional) – Description of the task for logging purposes, defaults to “testing on validation set”

Returns:

Returns a MAP score of the evaluation on the model.

Return type:

float

training: bool#

aitlas.base.schemas module#

class BaseDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Schema for configuring a base dataset.

Parameters:
  • batch_size (int, optional) – Batch size for the dataset. Default is 64.

  • shuffle (bool, optional) – Flag indicating whether to shuffle the dataset. Default is True.

  • num_workers (int, optional) – Number of workers to use for data loading. Default is 4.

  • pin_memory (bool, optional) – Flag indicating whether to use page-locked memory. Default is False.

  • transforms (List[str], optional) – Classes to run transformations over the input data.

  • target_transforms (List[str], optional) – Classes to run transformations over the target data.

  • joint_transforms (List[str], optional) – Classes to run transformations over the input and target data.

  • labels (List[str], optional) – Labels for the dataset.

  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#
class BaseModelSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Schema for configuring a base model.

Parameters:
  • num_classes (int, optional) – Number of classes for the model. Default is 2.

  • use_cuda (bool, optional) – Flag indicating whether to use CUDA if available. Default is True.

  • metrics (List[str], optional) – Metrics to calculate during training and evaluation. Default is [‘f1_score’].

  • weights (List[float], optional) – Class weights to apply for the loss function. Default is None.

  • rank (int, optional) – Rank value for distributed data processing. Default is 0.

  • use_ddp (bool, optional) – Flag indicating whether to turn on distributed data processing. Default is False.

  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#
class BaseClassifierSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseModelSchema

Schema for configuring a base classifier.

Parameters:
  • learning_rate (float, optional) – Learning rate used in training. Default is 0.01.

  • weight_decay (float, optional) – Weight decay used in training. Default is 0.0.

  • pretrained (bool, optional) – Flag indicating whether to use a pretrained model. Default is True.

  • local_model_path (str, optional) – Local path of the pretrained model. Default is None.

  • threshold (float, optional) – Prediction threshold if needed. Default is 0.5.

  • freeze (bool, optional) – Flag indicating whether to freeze all layers except for the classifier layer(s). Default is False.

  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#
fields: Dict[str, ma_fields.Field]#

Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#
dump_fields: Dict[str, ma_fields.Field]#
class BaseSegmentationClassifierSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseClassifierSchema

Schema for configuring a base segmentation classifier.

Parameters:
  • metrics (List[str], optional) – Classes of metrics you want to calculate during training and evaluation. Default is [‘iou’, ‘f1_score’, ‘accuracy’].

  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#
fields: Dict[str, ma_fields.Field]#

Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#
dump_fields: Dict[str, ma_fields.Field]#
class BaseObjectDetectionSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseClassifierSchema

Schema for configuring a base object detection model.

Parameters:
  • metrics (List[str], optional) – Classes of metrics you want to calculate during training and evaluation. Default is [‘map’].

  • step_size (int, optional) – Step size for the learning rate scheduler. Default is 15.

  • gamma (float, optional) – Gamma (multiplier) for the learning rate scheduler. Default is 0.1.

  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#
fields: Dict[str, ma_fields.Field]#

Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#
dump_fields: Dict[str, ma_fields.Field]#
class BaseTransformsSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: Schema

Parameters:
  • only (types.StrSequenceOrSet | None) –

  • exclude (types.StrSequenceOrSet) –

  • many (bool) –

  • context (dict | None) –

  • load_only (types.StrSequenceOrSet) –

  • dump_only (types.StrSequenceOrSet) –

  • partial (bool | types.StrSequenceOrSet) –

  • unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

aitlas.base.segmentation module#

class BaseSegmentationClassifier(config)[source]#

Bases: BaseModel

Base class for a segmentation classifier.

schema#

alias of BaseSegmentationClassifierSchema

get_predicted(outputs, threshold=None)[source]#

Get predicted classes from the model outputs.

Parameters:
  • outputs (torch.Tensor) – Model outputs with shape (batch_size, num_classes).

  • threshold (float, optional) – The threshold for classification, defaults to None.

Returns:

tuple containing the probabilities and predicted classes

Return type:

tuple

load_optimizer()[source]#

Load the optimizer

load_criterion()[source]#

Load the loss function

load_lr_scheduler(optimizer)[source]#

Load the learning rate scheduler

training: bool#

aitlas.base.tasks module#

class BaseTask(model, config)[source]#

Bases: Configurable

static create_dataset(dataset_config)[source]#

Builds the input dataset using the provided configuration.

generate_task_id()[source]#

Generates a task ID

run()[source]#

Runs the task.

aitlas.base.transforms module#

Base class for implementing configurable transformations

load_transforms(class_names, config)[source]#

Loads transformation classes and make a composition of them

class BaseTransforms(*args, **kwargs)[source]#

Bases: object

Base class for implementing configurable transformations

schema#

alias of BaseTransformsSchema

configurables = None#

aitlas.base.visualizations module#

Base class for implementing visualizations.

class BaseVisualization(cm, labels, file, **kwargs)[source]#

Bases: object

Base class for visualizations

plot()[source]#
class BaseDetailedVisualization(y_true, y_pred, y_prob, labels, file, **kwargs)[source]#

Bases: BaseVisualization

Base class for visualizations

plot()[source]#