Datasets

Datasets#

aitlas.datasets.aid module#

class AIDDataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://1drv.ms/u/s!AthY3vMZmuxChNR0Co7QHpJ56M-SvQ'#

labels = ['Airport', 'BareLand', 'BaseballField', 'Beach', 'Bridge', 'Center', 'Church', 'Commercial', 'DenseResidential', 'Desert', 'Farmland', 'Forest', 'Industrial', 'Meadow', 'MediumResidential', 'Mountain', 'Park', 'Parking', 'Playground', 'Pond', 'Port', 'RailwayStation', 'Resort', 'River', 'School', 'SparseResidential', 'Square', 'Stadium', 'StorageTanks', 'Viaduct']#

name = 'AID dataset'#

aitlas.datasets.aid_multilabel module#

class AIDMultiLabelDataset(config)[source]#

Bases: MultiLabelClassificationDataset

url = 'https://github.com/Hua-YS/AID-Multilabel-Dataset'#

labels = ['airplane', 'bare-soil', 'buildings', 'cars', 'chaparral', 'court', 'dock', 'field', 'grass', 'mobile-home', 'pavement', 'sand', 'sea', 'ship', 'tanks', 'trees', 'water']#

name = 'AID multilabel dataset'#

aitlas.datasets.airs module#

class AIRSDataset(config)[source]#

Bases: SemanticSegmentationDataset

url = 'https://www.airs-dataset.com/'#

labels = ['Background', 'Roof']#

color_mapping = [[0, 0, 0], [200, 200, 200]]#

name = 'AIRS'#

aitlas.datasets.amazon_rainforest module#

class AmazonRainforestDataset(config)[source]#

Bases: SemanticSegmentationDataset

url = 'https://zenodo.org/record/3233081#.YTYm_44zaUk'#

labels = ['Background', 'Forest']#

color_mapping = [[0, 0, 0], [0, 255, 0]]#

name = 'Amazon Rainforest'#

load_dataset(data_dir, csv_file=None)[source]#

aitlas.datasets.big_earth_net module#

interp_band(bands, img10_shape=[120, 120])[source]#: lanha/DSen2

parse_json_labels(f_j_path)[source]#

parse meta-data json file for big earth to get image labels

Parameters:: f_j_path (str) – json file path
Returns:: list of labels
Return type:: list

update_json_labels(f_j_path, BigEarthNet_19_labels)[source]#

loads_pickle(buf)[source]#

Parameters:: buf (bytes-like object) – the output of dumps
Returns:: object

dumps_pickle(obj)[source]#: Serialize an object. :param obj: object to be serialized :type obj: bytes-like object :return: Implementation-dependent bytes-like object

cls2multihot(cls_vec, label_indices)[source]#

class BigEarthNetDataset(config)[source]#

Bases: BaseDataset

BigEarthNet dataset adaptation

schema#: alias of BigEarthNetSchema

name = 'Big Earth Net'#

get_labels()[source]#

load_patches()[source]#

get_item_name(index)[source]#

show_image(index)[source]#

save_image(index)[source]#

show_batch(size, show_title=True)[source]#

data_distribution_table()[source]#

data_distribution_barchart()[source]#

labels_stats()[source]#

prepare()[source]#

process_to_lmdb()[source]#

class PrepBigEarthNetDataset(data_dir=None, patch_names_list=None, label_indices=None)[source]#: Bases: Dataset

aitlas.datasets.brazilian_coffee_scenes module#

class BrazilianCoffeeScenesDataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'http://www.patreo.dcc.ufmg.br/wp-content/uploads/2017/11/brazilian_coffee_dataset.zip'#

labels = ['coffee', 'noncoffee']#

name = 'Brazilian Coffee Scenes dataset'#

prepare(root)[source]#

aitlas.datasets.breizhcrops module#

BreizhCrops - a crop type classification dataset

Note

Adapted from: dl4sits/BreizhCrops ; Original implementation of BreizhCrops dataset: dl4sits/BreizhCrops

class DownloadProgressBar(*_, **__)[source]#

Bases: tqdm

Parameters:

iterable (iterable, optional) – Iterable to decorate with a progressbar. Leave blank to manually manage the updates.
desc (str, optional) – Prefix for the progressbar.
total (int or float, optional) – The number of expected iterations. If unspecified, len(iterable) is used if possible. If float(“inf”) or as a last resort, only basic progress statistics are displayed (no ETA, no progressbar). If gui is True and this parameter needs subsequent updating, specify an initial arbitrary large positive number, e.g. 9e9.
leave (bool, optional) – If [default: True], keeps all traces of the progressbar upon termination of iteration. If None, will leave only if position is 0.
file (io.TextIOWrapper or io.StringIO, optional) – Specifies where to output the progress messages (default: sys.stderr). Uses file.write(str) and file.flush() methods. For encoding, see write_bytes.
ncols (int, optional) – The width of the entire output message. If specified, dynamically resizes the progressbar to stay within this bound. If unspecified, attempts to use environment width. The fallback is a meter width of 10 and no limit for the counter and statistics. If 0, will not print any meter (only stats).
mininterval (float, optional) – Minimum progress display update interval [default: 0.1] seconds.
maxinterval (float, optional) – Maximum progress display update interval [default: 10] seconds. Automatically adjusts miniters to correspond to mininterval after long display update lag. Only works if dynamic_miniters or monitor thread is enabled.
miniters (int or float, optional) – Minimum progress display update interval, in iterations. If 0 and dynamic_miniters, will automatically adjust to equal mininterval (more CPU efficient, good for tight loops). If > 0, will skip display of specified number of iterations. Tweak this and mininterval to get very efficient loops. If your progress is erratic with both fast and slow iterations (network, skipping items, etc) you should set miniters=1.
ascii (bool or str, optional) – If unspecified or False, use unicode (smooth blocks) to fill the meter. The fallback is to use ASCII characters ” 123456789#”.
disable (bool, optional) – Whether to disable the entire progressbar wrapper [default: False]. If set to None, disable on non-TTY.
unit (str, optional) – String that will be used to define the unit of each iteration [default: it].
unit_scale (bool or int or float, optional) – If 1 or True, the number of iterations will be reduced/scaled automatically and a metric prefix following the International System of Units standard will be added (kilo, mega, etc.) [default: False]. If any other non-zero number, will scale total and n.
dynamic_ncols (bool, optional) – If set, constantly alters ncols and nrows to the environment (allowing for window resizes) [default: False].
smoothing (float, optional) – Exponential moving average smoothing factor for speed estimates (ignored in GUI mode). Ranges from 0 (average speed) to 1 (current/instantaneous speed) [default: 0.3].
bar_format (str, optional) –
Specify a custom bar string formatting. May impact performance. [default: ‘{l_bar}{bar}{r_bar}’], where l_bar=’{desc}: {percentage:3.0f}%|’ and r_bar=’| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, ‘

’{rate_fmt}{postfix}]’

Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt,
percentage, elapsed, elapsed_s, ncols, nrows, desc, unit, rate, rate_fmt, rate_noinv, rate_noinv_fmt, rate_inv, rate_inv_fmt, postfix, unit_divisor, remaining, remaining_s, eta.

Note that a trailing “: ” is automatically removed after {desc} if the latter is empty.
initial (int or float, optional) – The initial counter value. Useful when restarting a progress bar [default: 0]. If using float, consider specifying {n:.3f} or similar in bar_format, or specifying unit_scale.
position (int, optional) – Specify the line offset to print this bar (starting from 0) Automatic if unspecified. Useful to manage multiple bars at once (eg, from threads).
postfix (dict or *, optional) – Specify additional stats to display at the end of the bar. Calls set_postfix(**postfix) if possible (dict).
unit_divisor (float, optional) – [default: 1000], ignored unless unit_scale is True.
write_bytes (bool, optional) – Whether to write bytes. If (default: False) will write unicode.
lock_args (tuple, optional) – Passed to refresh for intermediate output (initialisation, iterating, and updating).
nrows (int, optional) – The screen height. If specified, hides nested bars outside this bound. If unspecified, attempts to use environment height. The fallback is 20.
colour (str, optional) – Bar colour (e.g. ‘green’, ‘#00ff00’).
delay (float, optional) – Don’t display until [default: 0] seconds have elapsed.
gui (bool, optional) – WARNING: internal parameter - do not use. Use tqdm.gui.tqdm(…) instead. If set, will attempt to use matplotlib animations for a graphical output [default: False].

Returns:

out

Return type:

decorated iterator.

update_to(b=1, bsize=1, tsize=None)[source]#

download_file(url, output_path, overwrite=False)[source]#

unzip(zipfile_path, target_dir)[source]#

untar(filepath)[source]#

class BreizhCropsDataset(config)[source]#

Bases: CropsDataset

schema#: alias of BreizhCropsSchema

preprocess()[source]#

get_labels()[source]#

data_distribution_table()[source]#

parcel_distribution_table()[source]#

data_distribution_barchart()[source]#

show_samples()[source]#

show_timeseries(index)[source]#

download_csv_files(region)[source]#

build_folder_structure(root, year, level, region)[source]#

Folder structure:

<root>
   codes.csv
   classmapping.csv
   <year>
      <region>.shp
      <level>
         <region>.csv
         <region>.h5
         <region>
             <csv>
                 123123.csv
                 123125.csv
                 ...

get_fid(idx)[source]#

download_h5_database(region)[source]#

write_h5_database_from_csv(index, region)[source]#

get_codes()[source]#

load_classmapping(classmapping)[source]#

get_classes_to_ind(classmapping)[source]#: keep for now, could be needed to make it compatible with GenericMulticlass

load_raw(csv_file)[source]#

['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'QA10', 'QA20', 'QA60', 'doa', 'label', 'id']

load(csv_file)[source]#

load_culturecode_and_id(csv_file)[source]#

write_index(region)[source]#

aitlas.datasets.camvid module#

class CamVidDataset(config)[source]#

Bases: SemanticSegmentationDataset

url = 'https://github.com/alexgkendall/SegNet-Tutorial'#

labels = ['sky', 'building', 'column_pole', 'road', 'sidewalk', 'tree', 'sign', 'fence', 'car', 'pedestrian', 'byciclist', 'void']#

color_mapping = [[255, 127, 127], [255, 191, 127], [255, 255, 127], [191, 255, 127], [127, 255, 127], [127, 255, 191], [127, 255, 255], [127, 191, 255], [127, 127, 255], [191, 127, 255], [255, 127, 255], [255, 127, 191]]#

name = 'CamVid'#

load_dataset(data_dir, csv_file=None)[source]#

aitlas.datasets.chactun module#

class ChactunDataset(config)[source]#

Bases: SemanticSegmentationDataset

labels = ['Aguada', 'Building', 'Platform']#

color_mapping = [[255, 255, 0], [100, 100, 100], [0, 255, 0]]#

name = 'Chactun'#

load_dataset(data_dir, csv_file=None)[source]#

show_image(index, show_title=True)[source]#

aitlas.datasets.clrs module#

class CLRSDataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://github.com/lehaifeng/CLRS'#

labels = ['airport', 'bare-land', 'beach', 'bridge', 'commercial', 'desert', 'farmland', 'forest', 'golf-course', 'highway', 'industrial', 'meadow', 'mountain', 'overpass', 'park', 'parking', 'playground', 'port', 'railway', 'railway-station', 'residential', 'river', 'runway', 'stadium', 'storage-tank']#

name = 'CLRS dataset'#

aitlas.datasets.crops_classification module#

class CropsDataset(config)[source]#

Bases: BaseDataset

CropsDataset - a crop type classification dataset

schema#: alias of CropsDatasetSchema

preprocess()[source]#

get_labels()[source]#

data_distribution_table()[source]#

parcel_distribution_table()[source]#

data_distribution_barchart()[source]#

show_samples()[source]#

show_image(index)[source]#

show_timeseries(index)[source]#

get_codes()[source]#

load_classmapping(classmapping)[source]#

aitlas.datasets.dfc15_multilabel module#

class DFC15MultiLabelDataset(config)[source]#

Bases: MultiLabelClassificationDataset

url = 'https://github.com/Hua-YS/DFC15-Multilabel-Dataset'#

labels = ['impervious', 'water', 'clutter', 'vegetation', 'building', 'tree', 'boat', 'car']#

name = 'DFC15 dataset'#

aitlas.datasets.eopatch_crops module#

class DownloadProgressBar(*_, **__)[source]#

Bases: tqdm

Parameters:

iterable (iterable, optional) – Iterable to decorate with a progressbar. Leave blank to manually manage the updates.
desc (str, optional) – Prefix for the progressbar.
total (int or float, optional) – The number of expected iterations. If unspecified, len(iterable) is used if possible. If float(“inf”) or as a last resort, only basic progress statistics are displayed (no ETA, no progressbar). If gui is True and this parameter needs subsequent updating, specify an initial arbitrary large positive number, e.g. 9e9.
leave (bool, optional) – If [default: True], keeps all traces of the progressbar upon termination of iteration. If None, will leave only if position is 0.
file (io.TextIOWrapper or io.StringIO, optional) – Specifies where to output the progress messages (default: sys.stderr). Uses file.write(str) and file.flush() methods. For encoding, see write_bytes.
ncols (int, optional) – The width of the entire output message. If specified, dynamically resizes the progressbar to stay within this bound. If unspecified, attempts to use environment width. The fallback is a meter width of 10 and no limit for the counter and statistics. If 0, will not print any meter (only stats).
mininterval (float, optional) – Minimum progress display update interval [default: 0.1] seconds.
maxinterval (float, optional) – Maximum progress display update interval [default: 10] seconds. Automatically adjusts miniters to correspond to mininterval after long display update lag. Only works if dynamic_miniters or monitor thread is enabled.
miniters (int or float, optional) – Minimum progress display update interval, in iterations. If 0 and dynamic_miniters, will automatically adjust to equal mininterval (more CPU efficient, good for tight loops). If > 0, will skip display of specified number of iterations. Tweak this and mininterval to get very efficient loops. If your progress is erratic with both fast and slow iterations (network, skipping items, etc) you should set miniters=1.
ascii (bool or str, optional) – If unspecified or False, use unicode (smooth blocks) to fill the meter. The fallback is to use ASCII characters ” 123456789#”.
disable (bool, optional) – Whether to disable the entire progressbar wrapper [default: False]. If set to None, disable on non-TTY.
unit (str, optional) – String that will be used to define the unit of each iteration [default: it].
unit_scale (bool or int or float, optional) – If 1 or True, the number of iterations will be reduced/scaled automatically and a metric prefix following the International System of Units standard will be added (kilo, mega, etc.) [default: False]. If any other non-zero number, will scale total and n.
dynamic_ncols (bool, optional) – If set, constantly alters ncols and nrows to the environment (allowing for window resizes) [default: False].
smoothing (float, optional) – Exponential moving average smoothing factor for speed estimates (ignored in GUI mode). Ranges from 0 (average speed) to 1 (current/instantaneous speed) [default: 0.3].
bar_format (str, optional) –
Specify a custom bar string formatting. May impact performance. [default: ‘{l_bar}{bar}{r_bar}’], where l_bar=’{desc}: {percentage:3.0f}%|’ and r_bar=’| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, ‘

’{rate_fmt}{postfix}]’

Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt,
percentage, elapsed, elapsed_s, ncols, nrows, desc, unit, rate, rate_fmt, rate_noinv, rate_noinv_fmt, rate_inv, rate_inv_fmt, postfix, unit_divisor, remaining, remaining_s, eta.

Note that a trailing “: ” is automatically removed after {desc} if the latter is empty.
initial (int or float, optional) – The initial counter value. Useful when restarting a progress bar [default: 0]. If using float, consider specifying {n:.3f} or similar in bar_format, or specifying unit_scale.
position (int, optional) – Specify the line offset to print this bar (starting from 0) Automatic if unspecified. Useful to manage multiple bars at once (eg, from threads).
postfix (dict or *, optional) – Specify additional stats to display at the end of the bar. Calls set_postfix(**postfix) if possible (dict).
unit_divisor (float, optional) – [default: 1000], ignored unless unit_scale is True.
write_bytes (bool, optional) – Whether to write bytes. If (default: False) will write unicode.
lock_args (tuple, optional) – Passed to refresh for intermediate output (initialisation, iterating, and updating).
nrows (int, optional) – The screen height. If specified, hides nested bars outside this bound. If unspecified, attempts to use environment height. The fallback is 20.
colour (str, optional) – Bar colour (e.g. ‘green’, ‘#00ff00’).
delay (float, optional) – Don’t display until [default: 0] seconds have elapsed.
gui (bool, optional) – WARNING: internal parameter - do not use. Use tqdm.gui.tqdm(…) instead. If set, will attempt to use matplotlib animations for a graphical output [default: False].

Returns:

out

Return type:

decorated iterator.

update_to(b=1, bsize=1, tsize=None)[source]#

download_file(url, output_path, overwrite=False)[source]#

class EOPatchCrops(config)[source]#

Bases: CropsDataset

EOPatchCrops - a crop type classification dataset

preprocess()[source]#

split()[source]#

write_index()[source]#

aitlas.datasets.eurosat module#

class EurosatDataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://github.com/phelber/EuroSAT'#

labels = ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']#

name = 'EuroSAT dataset'#

aitlas.datasets.inria module#

class InriaDataset(config)[source]#

Bases: SemanticSegmentationDataset

url = 'https://project.inria.fr/aerialimagelabeling/'#

labels = ['Background', 'Buildings']#

color_mapping = [[0, 0, 0], [255, 255, 255]]#

name = 'Inria'#

aitlas.datasets.landcover_ai module#

class LandCoverAiDataset(config)[source]#

Bases: SemanticSegmentationDataset

url = 'https://landcover.ai.linuxpolska.com/'#

labels = ['Background', 'Buildings', 'Woodlands', 'Water', 'Road']#

color_mapping = [[255, 255, 0], [0, 0, 0], [0, 255, 0], [0, 0, 255], [200, 200, 200]]#

name = 'Landcover AI'#

split_images(imgs_dir, masks_dir, output_dir)[source]#

aitlas.datasets.massachusetts_buildings module#

class MassachusettsBuildingsDataset(config)[source]#

Bases: SemanticSegmentationDataset

url = 'https://www.cs.toronto.edu/~vmnih/data/'#

labels = ['Background', 'Buildings']#

color_mapping = [[0, 0, 0], [255, 0, 0]]#

name = 'Massachusetts Buildings'#

aitlas.datasets.massachusetts_roads module#

class MassachusettsRoadsDataset(config)[source]#

Bases: SemanticSegmentationDataset

url = 'https://www.cs.toronto.edu/~vmnih/data/'#

labels = ['Background', 'Roads']#

color_mapping = [[0, 0, 0], [200, 200, 200]]#

name = 'Massachusetts Roads'#

aitlas.datasets.mlrs_net module#

class MLRSNetMultiLabelDataset(config)[source]#

Bases: MultiLabelClassificationDataset

url = 'https://data.mendeley.com/datasets/7j9bv9vwsx/2'#

labels = ['airplane', 'airport', 'bare soil', 'baseball diamond', 'basketball court', 'beach', 'bridge', 'buildings', 'cars', 'cloud', 'containers', 'crosswalk', 'dense residential area', 'desert', 'dock', 'factory', 'field', 'football field', 'forest', 'freeway', 'golf course', 'grass', 'greenhouse', 'gully', 'habor', 'intersection', 'island', 'lake', 'mobile home', 'mountain', 'overpass', 'park', 'parking lot', 'parkway', 'pavement', 'railway', 'railway station', 'river', 'road', 'roundabout', 'runway', 'sand', 'sea', 'ships', 'snow', 'snowberg', 'sparse residential area', 'stadium', 'swimming pool', 'tanks', 'tennis court', 'terrace', 'track', 'trail', 'transmission tower', 'trees', 'water', 'chaparral', 'wetland', 'wind turbine']#

name = 'MLRSNet dataset'#

prepare(root_folder)[source]#

aitlas.datasets.multiclass_classification module#

class MultiClassClassificationDataset(config)[source]#

Bases: BaseDataset

schema#: alias of ClassificationDatasetSchema

get_labels()[source]#

data_distribution_table()[source]#

data_distribution_barchart()[source]#

show_samples()[source]#

show_image(index)[source]#

show_batch(size, show_title=True)[source]#

load_dataset()[source]#

re_map_labels(labels_remapping)[source]#

aitlas.datasets.multilabel_classification module#

class MultiLabelClassificationDataset(config)[source]#

Bases: BaseDataset

schema#: alias of ClassificationDatasetSchema

get_labels()[source]#

data_distribution_table()[source]#

data_distribution_barchart()[source]#

show_samples()[source]#

show_image(index)[source]#

show_batch(size, show_title=True)[source]#

load_dataset(data_dir, csv_file)[source]#

labels_stats()[source]#

re_map_labels(labels_remapping, map_size)[source]#

aitlas.datasets.npz module#

class NpzDataset(config)[source]#

Bases: BaseDataset

schema#: alias of NPZDatasetSchema

labels = None#

get_labels()[source]#

data_distribution_table()[source]#

data_distribution_barchart()[source]#

show_samples()[source]#

show_image(index)[source]#

show_batch(size, show_title=True)[source]#

load_dataset()[source]#

aitlas.datasets.object_detection module#

class BaseObjectDetectionDataset(config)[source]#

Bases: BaseDataset

Base object detection dataset class

BaseDataset constructor

Parameters:: config (Config, contains information for the batch size, number of workers, list of labels, list of transformations) – Configuration object which specifies the details of the dataset.

name = 'Object Detection Dataset'#

dataloader()[source]#

apply_transformations(image, target)[source]#

get_labels()[source]#

show_image(index, show_title=False)[source]#

show_batch(size, show_labels=False)[source]#

class ObjectDetectionPascalDataset(config)[source]#

Bases: BaseObjectDetectionDataset

schema#: alias of ObjectDetectionPascalDatasetSchema

labels = [None]#

load_dataset(imageset_file, data_dir)[source]#

data_distribution_table()[source]#

data_distribution_barchart(show_title=True)[source]#

class ObjectDetectionCocoDataset(config)[source]#

Bases: BaseObjectDetectionDataset

This is a skeleton object detection dataset following the Coco format

schema#: alias of ObjectDetectionCocoDatasetSchema

data_distribution_table()[source]#

data_distribution_barchart()[source]#

show_samples()[source]#

load_dataset(data_dir=None, json_file=None)[source]#

aitlas.datasets.optimal_31 module#

class Optimal31Dataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://drive.google.com/file/d/1Fk9a0DW8UyyQsR8dP2Qdakmr69NVBhq9/view'#

labels = ['airplane', 'airport', 'baseball_diamond', 'basketball_court', 'beach', 'bridge', 'chaparral', 'church', 'circular_farmland', 'commercial_area', 'dense_residential', 'desert', 'forest', 'freeway', 'golf_course', 'ground_track_field', 'harbor', 'industrial_area', 'intersection', 'island', 'lake', 'meadow', 'medium_residential', 'mobile_home_park', 'mountain', 'overpass', 'parking_lot', 'railway', 'rectangular_farmland', 'roundabout', 'runway']#

name = 'Optimal31 dataset'#

aitlas.datasets.pattern_net module#

class PatternNetDataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://arxiv.org/abs/1706.03424'#

labels = ['airplane', 'baseball_field', 'basketball_court', 'beach', 'bridge', 'cemetery', 'chaparral', 'christmas_tree_farm', 'closed_road', 'coastal_mansion', 'crosswalk', 'dense_residential', 'ferry_terminal', 'football_field', 'forest', 'freeway', 'golf_course', 'harbor', 'intersection', 'mobile_home_park', 'nursing_home', 'oil_gas_field', 'oil_well', 'overpass', 'parking_lot', 'parking_space', 'railway', 'river', 'runway', 'runway_marking', 'shipping_yard', 'solar_panel', 'sparse_residential', 'storage_tank', 'swimming_pool', 'tennis_court', 'transformer_station', 'wastewater_treatment_plant']#

name = 'PatternNet dataset'#

aitlas.datasets.planet_uas module#

class PlanetUASMultiLabelDataset(config)[source]#

Bases: MultiLabelClassificationDataset

url = 'https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/overview'#

labels = ['haze', 'primary', 'agriculture', 'clear', 'water', 'habitation', 'road', 'cultivation', 'slash_burn', 'cloudy', 'partly_cloudy', 'conventional_mine', 'bare_ground', 'artisinal_mine', 'blooming', 'selective_logging', 'blow_down']#

name = 'Planet UAS multilabel dataset'#

prepare(csv_train_file)[source]#

kaggle_format(csv_file_path, output_file, threshold)[source]#

aitlas.datasets.resisc45 module#

class Resisc45Dataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://www.tensorflow.org/datasets/catalog/resisc45'#

labels = ['airplane', 'airport', 'baseball_diamond', 'basketball_court', 'beach', 'bridge', 'chaparral', 'church', 'circular_farmland', 'cloud', 'commercial_area', 'dense_residential', 'desert', 'forest', 'freeway', 'golf_course', 'ground_track_field', 'harbor', 'industrial_area', 'intersection', 'island', 'lake', 'meadow', 'medium_residential', 'mobile_home_park', 'mountain', 'overpass', 'palace', 'parking_lot', 'railway', 'railway_station', 'rectangular_farmland', 'river', 'roundabout', 'runway', 'sea_ice', 'ship', 'snowberg', 'sparse_residential', 'stadium', 'storage_tank', 'tennis_court', 'terrace', 'thermal_power_station', 'wetland']#

name = 'RESISC45 dataset'#

aitlas.datasets.rsd46_whu module#

class RSD46WHUDataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://github.com/RSIA-LIESMARS-WHU/RSD46-WHU'#

labels = ['Airplane', 'Airport', 'Artificial dense forest land', 'Artificial sparse forest land', 'Bare land', 'Basketball court', 'Blue structured factory building', 'Building', 'Construction site', 'Cross river bridge', 'Crossroads', 'Dense tall building', 'Dock', 'Fish pond', 'Footbridge', 'Graff', 'Grassland', 'Low scattered building', 'Lrregular farmland', 'Medium density scattered building', 'Medium density structured building', 'Natural dense forest land', 'Natural sparse forest land', 'Oiltank', 'Overpass', 'Parking lot', 'Plasticgreenhouse', 'Playground', 'Railway', 'Red structured factory building', 'Refinery', 'Regular farmland', 'Scattered blue roof factory building', 'Scattered red roof factory building', 'Sewage plant-type-one', 'Sewage plant-type-two', 'Ship', 'Solar power station', 'Sparse residential area', 'Square', 'Steelsmelter', 'Storage land', 'Tennis court', 'Thermal power plant', 'Vegetable plot', 'Water']#

name = 'RSD46-WHU dataset'#

aitlas.datasets.rsi_cb256 module#

class RSICB256Dataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://github.com/lehaifeng/RSI-CB'#

labels = ['airplane', 'airport_runway', 'artificial_grassland', 'avenue', 'bare_land', 'bridge', 'city_building', 'coastline', 'container', 'crossroads', 'dam', 'desert', 'dry_farm', 'forest', 'green_farmland', 'highway', 'hirst', 'lakeshore', 'mangrove', 'marina', 'mountain', 'parkinglot', 'pipeline', 'residents', 'river', 'river_protection_forest', 'sandbeach', 'sapling', 'sea', 'shrubwood', 'snow_mountain', 'sparse_forest', 'storage_room', 'stream', 'town']#

name = 'RSI-CB256 dataset'#

aitlas.datasets.rsscn7 module#

class RSSCN7Dataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://docs.google.com/viewer?a=v&pid=sites&srcid=ZGVmYXVsdGRvbWFpbnxxaW56b3VjbnxneDo1MDYzYWMxOWIwMjRiMWFi'#

labels = ['farm_land', 'forest', 'grass_land', 'industrial_region', 'parking_lot', 'residential_region', 'river_lake']#

name = 'RSSCN7 dataset'#

aitlas.datasets.sat6 module#

class SAT6Dataset(config)[source]#

Bases: BaseDataset

schema#: alias of MatDatasetSchema

url = 'http://csc.lsu.edu/~saikat/deepsat/'#

labels = ['buildings', 'barren land', 'trees', 'grassland', 'roads', 'water bodies']#

name = 'SAT-6 dataset'#

get_labels()[source]#

data_distribution_table()[source]#

data_distribution_barchart()[source]#

show_image(index)[source]#

show_batch(size, show_title=True)[source]#

load_dataset(mat_file)[source]#

re_map_labels(labels_remapping)[source]#

aitlas.datasets.schemas module#

class MatDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring a classification dataset given as mat file.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class NPZDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring a classification dataset given as npz file.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class ClassificationDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring a classification dataset.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class SegmentationDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring a segmentation dataset.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class ObjectDetectionPascalDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring an object detection dataset given in PASCAL VOC format.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class ObjectDetectionCocoDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring an object detection dataset given in COCO format.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class BigEarthNetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring the BigEarthNet dataset.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class SpaceNet6DatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring the SpaceNet6 dataset.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class BreizhCropsSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring the BreizhCrops dataset for crop type prediction.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class CropsDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring dataset for crop type prediction.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

class So2SatDatasetSchema(*, only=None, exclude=(), many=False, context=None, load_only=(), dump_only=(), partial=False, unknown=None)[source]#

Bases: BaseDatasetSchema

Schema for configuring the So2Sat dataset.

Parameters:

only (types.StrSequenceOrSet | None) –
exclude (types.StrSequenceOrSet) –
many (bool) –
context (dict | None) –
load_only (types.StrSequenceOrSet) –
dump_only (types.StrSequenceOrSet) –
partial (bool | types.StrSequenceOrSet) –
unknown (str | None) –

opts: SchemaOpts = <marshmallow.schema.SchemaOpts object>#

fields: Dict[str, ma_fields.Field]#: Dictionary mapping field_names -> Field objects

load_fields: Dict[str, ma_fields.Field]#

dump_fields: Dict[str, ma_fields.Field]#

aitlas.datasets.semantic_segmentation module#

class SemanticSegmentationDataset(config)[source]#

Bases: BaseDataset

schema#: alias of SegmentationDatasetSchema

labels = None#

color_mapping = None#

name = None#

apply_transformations(image, mask)[source]#

load_dataset(data_dir, csv_file=None)[source]#

get_labels()[source]#

data_distribution_table()[source]#

data_distribution_barchart(show_title=True)[source]#

show_image(index, show_title=False)[source]#

aitlas.datasets.siri_whu module#

class SiriWhuDataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'http://www.lmars.whu.edu.cn/prof_web/zhongyanfei/e-code.html'#

labels = ['agriculture', 'commercial', 'harbor', 'idle_land', 'industrial', 'meadow', 'overpass', 'park', 'pond', 'residential', 'river', 'water']#

name = 'SIRI-WHU dataset'#

aitlas.datasets.so2sat module#

class So2SatDataset(config)[source]#

Bases: BaseDataset

So2Sat dataset version 2 (contains train, validation and test splits)

So2Sat LCZ42 is a dataset consisting of corresponding synthetic aperture radar and multispectral optical image data acquired by the Sentinel-1 and Sentinel-2 remote sensing satellites, and a corresponding local climate zones (LCZ) label. The dataset is distributed over 42 cities across different continents and cultural regions of the world, and comes with a split into fully independent, non-overlapping training, validation, and test sets.

url = 'https://dataserv.ub.tum.de/s/m1483140/download?path=%2F&files=testing.h5'#

name = 'So2Sat dataset'#

schema#: alias of So2SatDatasetSchema

labels = ['Compact high_rise', 'Compact middle_rise', 'Compact low_rise', 'Open high_rise', 'Open middle_rise', 'Open low_rise', 'Lightweight low_rise', 'Large low_rise', 'Sparsely built', 'Heavy industry', 'Dense trees', 'Scattered trees', 'Bush or scrub', 'Low plants', 'Bare rock or paved', 'Bare soil or sand', 'Water']#

get_labels()[source]#

show_image(index)[source]#

show_samples()[source]#

show_batch(size, show_title=True)[source]#

data_distribution_table()[source]#

data_distribution_barchart()[source]#

aitlas.datasets.spacenet6 module#

Note

Based on the implementation at: SpaceNetChallenge/SpaceNet_SAR_Buildings_Solutions

polygon_to_mask(poly, image_size)[source]#

process_image(image_path, segmentation_directory, edge_width, contact_width, gt_buildings_csv)[source]#

Creates and saves the target (ground-truth) segmentation mask for the input image.

Parameters:

image_path (str) – path to the source image
segmentation_directory (str) – path to the destination directory for the segmentation masks
edge_width (int) – the width of the edge
contact_width (int) – the width of the contact
gt_buildings_csv (str) – path to the source ground-truth-buildings csv

class SpaceNet6Dataset(config)[source]#

Bases: BaseDataset

SpaceNet6 dataset.

schema#: alias of SpaceNet6DatasetSchema

load_directory()[source]#: Loads the *.tif images from the specified directory.

load_other_folds(fold)[source]#: Loads all images (and masks) except the ones from this fold.

load_fold(fold)[source]#: Loads the images from this fold.

labels()[source]#

prepare()[source]#

Prepares the SpaceNet6 data set for model training and validation by:

Creating training segmentation masks from the geojson files

2. Splitting the data set by location, which was shown to be very important for model learning, see: SpaceNetChallenge/SpaceNet_SAR_Buildings_Solutions Creates 10 splits of the data set. Each split consists of 10 folds (i.e. further splits) of which 9 are used for training and one for validation/testing (in essence, a cross validation procedure).

aitlas.datasets.uc_merced module#

class UcMercedDataset(config)[source]#

Bases: MultiClassClassificationDataset

labels = ['agricultural', 'airplane', 'baseballdiamond', 'beach', 'buildings', 'chaparral', 'denseresidential', 'forest', 'freeway', 'golfcourse', 'harbor', 'intersection', 'mediumresidential', 'mobilehomepark', 'overpass', 'parkinglot', 'river', 'runway', 'sparseresidential', 'storagetanks', 'tenniscourt']#

name = 'UC Merced dataset'#

aitlas.datasets.uc_merced_multilabel module#

class UcMercedMultiLabelDataset(config)[source]#

Bases: MultiLabelClassificationDataset

url = 'https://drive.google.com/file/d/1DtKiauowCB0ykjFe8v0OVvT76rEfOk0v/view'#

labels = ['airplane', 'bare-soil', 'buildings', 'cars', 'chaparral', 'court', 'dock', 'field', 'grass', 'mobile-home', 'pavement', 'sand', 'sea', 'ship', 'tanks', 'trees', 'water']#

name = 'UC Merced multilabel dataset'#

aitlas.datasets.urls module#

Contains raw urls to download the data for crop type prediction tasks. TODO Refactor raw csv urls to be more general

aitlas.datasets.whu_rs19 module#

class WHURS19Dataset(config)[source]#

Bases: MultiClassClassificationDataset

url = 'https://github.com/CAPTAIN-WHU/BED4RS'#

labels = ['Airport', 'Beach', 'Bridge', 'Commercial', 'Desert', 'Farmland', 'footballField', 'Forest', 'Industrial', 'Meadow', 'Mountain', 'Park', 'Parking', 'Pond', 'Port', 'railwayStation', 'Residential', 'River', 'Viaduct']#

name = 'WHU-RS19 dataset'#

Datasets

Contents

Datasets#

aitlas.datasets.aid module#

aitlas.datasets.aid_multilabel module#

aitlas.datasets.airs module#

aitlas.datasets.amazon_rainforest module#

aitlas.datasets.big_earth_net module#

aitlas.datasets.brazilian_coffee_scenes module#

aitlas.datasets.breizhcrops module#

aitlas.datasets.camvid module#

aitlas.datasets.chactun module#

aitlas.datasets.clrs module#

aitlas.datasets.crops_classification module#

aitlas.datasets.dfc15_multilabel module#

aitlas.datasets.eopatch_crops module#

aitlas.datasets.eurosat module#

aitlas.datasets.inria module#

aitlas.datasets.landcover_ai module#

aitlas.datasets.massachusetts_buildings module#

aitlas.datasets.massachusetts_roads module#

aitlas.datasets.mlrs_net module#

aitlas.datasets.multiclass_classification module#

aitlas.datasets.multilabel_classification module#

aitlas.datasets.npz module#

aitlas.datasets.object_detection module#

aitlas.datasets.optimal_31 module#

aitlas.datasets.pattern_net module#

aitlas.datasets.planet_uas module#

aitlas.datasets.resisc45 module#

aitlas.datasets.rsd46_whu module#

aitlas.datasets.rsi_cb256 module#

aitlas.datasets.rsscn7 module#

aitlas.datasets.sat6 module#

aitlas.datasets.schemas module#

aitlas.datasets.semantic_segmentation module#

aitlas.datasets.siri_whu module#

aitlas.datasets.so2sat module#

aitlas.datasets.spacenet6 module#

aitlas.datasets.uc_merced module#

aitlas.datasets.uc_merced_multilabel module#

aitlas.datasets.urls module#

aitlas.datasets.whu_rs19 module#