Multi Label datasests statistics#
[1]:
from aitlas.datasets import (
AIDMultiLabelDataset,
DFC15MultiLabelDataset,
PlanetUASMultiLabelDataset,
UcMercedMultiLabelDataset,
MLRSNetMultiLabelDataset,
BigEarthNetDataset
)
AID dataset train split#
[7]:
train_dataset_config = {
"data_dir": "/media/hdd/multi-label/AID_multilabel/train/images",
"csv_file": "/media/hdd/multi-label/AID_multilabel/train/multilabels.txt"
}
train_dataset = AIDMultiLabelDataset(train_dataset_config)
print(f"Total number of images: {len(train_dataset)}")
print(train_dataset.labels_stats())
fig = train_dataset.show_batch(20, True)
fig = train_dataset.data_distribution_barchart()
train_dataset.data_distribution_table()
Total number of images: 2400
Minimum number of labels: 1.0, Maximum number of labels: 11.0, Average number of labels: 5.14875
[7]:
Label | Count | |
---|---|---|
1 | airplane | 79 |
2 | bare-soil | 1171 |
3 | buildings | 1744 |
4 | cars | 1617 |
5 | chaparral | 75 |
6 | court | 269 |
7 | dock | 221 |
8 | field | 175 |
9 | grass | 1829 |
10 | mobile-home | 1 |
11 | pavement | 1870 |
12 | sand | 207 |
13 | sea | 177 |
14 | ship | 237 |
15 | tanks | 87 |
16 | trees | 1924 |
17 | water | 674 |


AID dataset test split#
[8]:
test_dataset_config = {
"data_dir": "/media/hdd/multi-label/AID_multilabel/test/images",
"csv_file": "/media/hdd/multi-label/AID_multilabel/test/multilabels.txt"
}
test_dataset = AIDMultiLabelDataset(test_dataset_config)
print(f"Total number of images: {len(test_dataset)}")
print(test_dataset.labels_stats())
fig = test_dataset.show_batch(20, True)
fig = test_dataset.data_distribution_barchart()
test_dataset.data_distribution_table()
Total number of images: 600
Minimum number of labels: 1.0, Maximum number of labels: 10.0, Average number of labels: 5.168333333333333
[8]:
Label | Count | |
---|---|---|
1 | airplane | 20 |
2 | bare-soil | 304 |
3 | buildings | 417 |
4 | cars | 409 |
5 | chaparral | 37 |
6 | court | 75 |
7 | dock | 50 |
8 | field | 39 |
9 | grass | 466 |
10 | mobile-home | 1 |
11 | pavement | 458 |
12 | sand | 52 |
13 | sea | 44 |
14 | ship | 47 |
15 | tanks | 21 |
16 | trees | 483 |
17 | water | 178 |


DFC15 dataset train split#
[9]:
train_dataset_config = {
"data_dir": "/media/hdd/multi-label/DFC15_multilabel/train/images",
"csv_file": "/media/hdd/multi-label/DFC15_multilabel/train/multilabels.txt"
}
train_dataset = DFC15MultiLabelDataset(train_dataset_config)
print(f"Total number of images: {len(train_dataset)}")
print(train_dataset.labels_stats())
fig = train_dataset.show_batch(20, True)
fig = train_dataset.data_distribution_barchart()
train_dataset.data_distribution_table()
Total number of images: 2673
Minimum number of labels: 2.0, Maximum number of labels: 6.0, Average number of labels: 2.7871305649083427
[9]:
Label | Count | |
---|---|---|
1 | impervious | 2516 |
2 | water | 791 |
3 | clutter | 1508 |
4 | vegetation | 867 |
5 | building | 797 |
6 | tree | 201 |
7 | boat | 216 |
8 | car | 554 |


DFC15 dataset test split#
[10]:
test_dataset_config = {
"data_dir": "/media/hdd/multi-label/DFC15_multilabel/test/images",
"csv_file": "/media/hdd/multi-label/DFC15_multilabel/test/multilabels.txt"
}
test_dataset = DFC15MultiLabelDataset(test_dataset_config)
print(f"Total number of images: {len(test_dataset)}")
print(test_dataset.labels_stats())
fig = test_dataset.show_batch(20, True)
fig = test_dataset.data_distribution_barchart()
test_dataset.data_distribution_table()
Total number of images: 669
Minimum number of labels: 2.0, Maximum number of labels: 6.0, Average number of labels: 2.828101644245142
[10]:
Label | Count | |
---|---|---|
1 | impervious | 617 |
2 | water | 207 |
3 | clutter | 383 |
4 | vegetation | 219 |
5 | building | 204 |
6 | tree | 57 |
7 | boat | 54 |
8 | car | 151 |


UC Merced dataset#
[11]:
dataset_config = {
"data_dir": "/media/hdd/multi-label/UCMerced_multilabel/images",
"csv_file": "/media/hdd/multi-label/UCMerced_multilabel/multilabels.txt"
}
dataset = UcMercedMultiLabelDataset(dataset_config)
print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(20, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 2100
Minimum number of labels: 1.0, Maximum number of labels: 7.0, Average number of labels: 3.334761904761905
[11]:
Label | Count | |
---|---|---|
1 | airplane | 100 |
2 | bare-soil | 718 |
3 | buildings | 691 |
4 | cars | 886 |
5 | chaparral | 115 |
6 | court | 105 |
7 | dock | 100 |
8 | field | 103 |
9 | grass | 975 |
10 | mobile-home | 102 |
11 | pavement | 1300 |
12 | sand | 294 |
13 | sea | 100 |
14 | ship | 102 |
15 | tanks | 100 |
16 | trees | 1009 |
17 | water | 203 |


MLRSNet dataset#
[12]:
dataset_config = {
"data_dir": "/media/hdd/multi-label/MLRSNet_multilabel/images",
"csv_file": "/media/hdd/multi-label/MLRSNet_multilabel/multilabels.txt"
}
dataset = MLRSNetMultiLabelDataset(dataset_config)
print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(20, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 109161
Minimum number of labels: 0.0, Maximum number of labels: 13.0, Average number of labels: 5.019109388884308
[12]:
Label | Count | |
---|---|---|
1 | airplane | 2306 |
2 | airport | 2480 |
3 | bare soil | 39345 |
4 | baseball diamond | 1996 |
5 | basketball court | 3726 |
6 | beach | 2485 |
7 | bridge | 2772 |
8 | buildings | 51305 |
9 | cars | 34013 |
10 | cloud | 1798 |
11 | containers | 2500 |
12 | crosswalk | 2673 |
13 | dense residential area | 2774 |
14 | desert | 2537 |
15 | dock | 2492 |
16 | factory | 2667 |
17 | field | 15142 |
18 | football field | 1057 |
19 | forest | 3562 |
20 | freeway | 2500 |
21 | golf course | 2515 |
22 | grass | 49390 |
23 | greenhouse | 2601 |
24 | gully | 2413 |
25 | habor | 2492 |
26 | intersection | 2497 |
27 | island | 2493 |
28 | lake | 2499 |
29 | mobile home | 2499 |
30 | mountain | 5468 |
31 | overpass | 2652 |
32 | park | 1682 |
33 | parking lot | 7061 |
34 | parkway | 2537 |
35 | pavement | 56383 |
36 | railway | 4399 |
37 | railway station | 2187 |
38 | river | 2493 |
39 | road | 37783 |
40 | roundabout | 2039 |
41 | runway | 2259 |
42 | sand | 11014 |
43 | sea | 4980 |
44 | ships | 4092 |
45 | snow | 3565 |
46 | snowberg | 2555 |
47 | sparse residential area | 1829 |
48 | stadium | 2462 |
49 | swimming pool | 5078 |
50 | tanks | 2500 |
51 | tennis court | 2499 |
52 | terrace | 2345 |
53 | track | 3693 |
54 | trail | 12376 |
55 | transmission tower | 2500 |
56 | trees | 70728 |
57 | water | 27834 |
58 | chaparral | 5903 |
59 | wetland | 3417 |
60 | wind turbine | 2049 |


Planet UAS dataset#
[13]:
dataset_config = {
"data_dir": "/media/hdd/multi-label/PlanetUAS/images",
"csv_file": "/media/hdd/multi-label/PlanetUAS/multilabels.txt"
}
dataset = PlanetUASMultiLabelDataset(dataset_config)
print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(20, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 40479
Minimum number of labels: 1.0, Maximum number of labels: 9.0, Average number of labels: 2.8707477951530422
[13]:
Label | Count | |
---|---|---|
1 | haze | 2697 |
2 | primary | 37513 |
3 | agriculture | 12315 |
4 | clear | 28431 |
5 | water | 7411 |
6 | habitation | 3660 |
7 | road | 8071 |
8 | cultivation | 4477 |
9 | slash_burn | 209 |
10 | cloudy | 2089 |
11 | partly_cloudy | 7261 |
12 | conventional_mine | 100 |
13 | bare_ground | 862 |
14 | artisinal_mine | 339 |
15 | blooming | 332 |
16 | selective_logging | 340 |
17 | blow_down | 98 |


Big Earth Net Dataset with 19 labels#
[5]:
dataset_config = {
"lmdb_path": "/media/ssd/BigEarthNet/lmdb",
"csv_file": "/media/ssd/BigEarthNet/splits/all.csv",
"selection": "rgb",
"version": "19 labels"
}
dataset = BigEarthNetDataset(dataset_config)
print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(15, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Minimum number of labels: 1.0, Maximum number of labels: 11.0, Average number of labels: 2.8923999198896944
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
[5]:
Label | Count | |
---|---|---|
0 | Urban fabric | 74891 |
1 | Industrial or commercial units | 11865 |
2 | Arable land | 194148 |
3 | Permanent crops | 29350 |
4 | Pastures | 98997 |
5 | Complex cultivation patterns | 104203 |
6 | Land principally occupied by agriculture, with... | 130637 |
7 | Agro-forestry areas | 30649 |
8 | Broad-leaved forest | 141300 |
9 | Coniferous forest | 164775 |
10 | Mixed forest | 176567 |
11 | Natural grassland and sparsely vegetated areas | 12022 |
12 | Moors, heathland and sclerophyllous vegetation | 16267 |
13 | Transitional woodland, shrub | 148950 |
14 | Beaches, dunes, sands | 1536 |
15 | Inland wetlands | 22100 |
16 | Coastal wetlands | 1566 |
17 | Inland waters | 67277 |
18 | Marine waters | 74877 |


Big Earth Net Dataset with 43 labels#
[6]:
dataset_config = {
"lmdb_path": "/media/ssd/BigEarthNet/lmdb",
"csv_file": "/media/ssd/BigEarthNet/splits/all.csv",
"selection": "rgb",
"version": "43 labels"
}
dataset = BigEarthNetDataset(dataset_config)
print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(15, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Minimum number of labels: 1.0, Maximum number of labels: 12.0, Average number of labels: 2.9619533819643973
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
[6]:
Label | Count | |
---|---|---|
0 | Continuous urban fabric | 10766 |
1 | Discontinuous urban fabric | 65894 |
2 | Industrial or commercial units | 11865 |
3 | Road and rail networks and associated land | 3269 |
4 | Port areas | 453 |
5 | Airports | 815 |
6 | Mineral extraction sites | 4213 |
7 | Dump sites | 816 |
8 | Construction sites | 1077 |
9 | Green urban areas | 1648 |
10 | Sport and leisure facilities | 4975 |
11 | Non-irrigated arable land | 183987 |
12 | Permanently irrigated land | 13571 |
13 | Rice fields | 3793 |
14 | Vineyards | 9524 |
15 | Fruit trees and berry plantations | 4672 |
16 | Olive groves | 12503 |
17 | Pastures | 98997 |
18 | Annual crops associated with permanent crops | 7019 |
19 | Complex cultivation patterns | 104203 |
20 | Land principally occupied by agriculture, with... | 130637 |
21 | Agro-forestry areas | 30649 |
22 | Broad-leaved forest | 141300 |
23 | Coniferous forest | 164775 |
24 | Mixed forest | 176567 |
25 | Natural grassland | 11141 |
26 | Moors and heathland | 5073 |
27 | Sclerophyllous vegetation | 11241 |
28 | Transitional woodland/shrub | 148950 |
29 | Beaches, dunes, sands | 1536 |
30 | Bare rock | 2894 |
31 | Sparsely vegetated areas | 1202 |
32 | Burnt areas | 304 |
33 | Inland marshes | 5516 |
34 | Peatbogs | 16667 |
35 | Salt marshes | 1339 |
36 | Salines | 424 |
37 | Intertidal flats | 938 |
38 | Water courses | 9792 |
39 | Water bodies | 58009 |
40 | Coastal lagoons | 1495 |
41 | Estuaries | 1064 |
42 | Sea and ocean | 72522 |

