Multi Label datasests statistics#

[1]:
from aitlas.datasets import (
    AIDMultiLabelDataset,
    DFC15MultiLabelDataset,
    PlanetUASMultiLabelDataset,
    UcMercedMultiLabelDataset,
    MLRSNetMultiLabelDataset,
    BigEarthNetDataset
)

AID dataset train split#

[7]:
train_dataset_config = {
    "data_dir": "/media/hdd/multi-label/AID_multilabel/train/images",
    "csv_file": "/media/hdd/multi-label/AID_multilabel/train/multilabels.txt"
}
train_dataset = AIDMultiLabelDataset(train_dataset_config)

print(f"Total number of images: {len(train_dataset)}")
print(train_dataset.labels_stats())
fig = train_dataset.show_batch(20, True)
fig = train_dataset.data_distribution_barchart()
train_dataset.data_distribution_table()
Total number of images: 2400
Minimum number of labels: 1.0, Maximum number of labels: 11.0, Average number of labels: 5.14875
[7]:
Label Count
1 airplane 79
2 bare-soil 1171
3 buildings 1744
4 cars 1617
5 chaparral 75
6 court 269
7 dock 221
8 field 175
9 grass 1829
10 mobile-home 1
11 pavement 1870
12 sand 207
13 sea 177
14 ship 237
15 tanks 87
16 trees 1924
17 water 674
../_images/examples_multilabel_datasets_statistics_3_2.png
../_images/examples_multilabel_datasets_statistics_3_3.png

AID dataset test split#

[8]:
test_dataset_config = {
    "data_dir": "/media/hdd/multi-label/AID_multilabel/test/images",
    "csv_file": "/media/hdd/multi-label/AID_multilabel/test/multilabels.txt"
}
test_dataset = AIDMultiLabelDataset(test_dataset_config)

print(f"Total number of images: {len(test_dataset)}")
print(test_dataset.labels_stats())
fig = test_dataset.show_batch(20, True)
fig = test_dataset.data_distribution_barchart()
test_dataset.data_distribution_table()
Total number of images: 600
Minimum number of labels: 1.0, Maximum number of labels: 10.0, Average number of labels: 5.168333333333333
[8]:
Label Count
1 airplane 20
2 bare-soil 304
3 buildings 417
4 cars 409
5 chaparral 37
6 court 75
7 dock 50
8 field 39
9 grass 466
10 mobile-home 1
11 pavement 458
12 sand 52
13 sea 44
14 ship 47
15 tanks 21
16 trees 483
17 water 178
../_images/examples_multilabel_datasets_statistics_5_2.png
../_images/examples_multilabel_datasets_statistics_5_3.png

DFC15 dataset train split#

[9]:
train_dataset_config = {
    "data_dir": "/media/hdd/multi-label/DFC15_multilabel/train/images",
    "csv_file": "/media/hdd/multi-label/DFC15_multilabel/train/multilabels.txt"
}
train_dataset = DFC15MultiLabelDataset(train_dataset_config)

print(f"Total number of images: {len(train_dataset)}")
print(train_dataset.labels_stats())
fig = train_dataset.show_batch(20, True)
fig = train_dataset.data_distribution_barchart()
train_dataset.data_distribution_table()
Total number of images: 2673
Minimum number of labels: 2.0, Maximum number of labels: 6.0, Average number of labels: 2.7871305649083427
[9]:
Label Count
1 impervious 2516
2 water 791
3 clutter 1508
4 vegetation 867
5 building 797
6 tree 201
7 boat 216
8 car 554
../_images/examples_multilabel_datasets_statistics_7_2.png
../_images/examples_multilabel_datasets_statistics_7_3.png

DFC15 dataset test split#

[10]:
test_dataset_config = {
    "data_dir": "/media/hdd/multi-label/DFC15_multilabel/test/images",
    "csv_file": "/media/hdd/multi-label/DFC15_multilabel/test/multilabels.txt"
}
test_dataset = DFC15MultiLabelDataset(test_dataset_config)

print(f"Total number of images: {len(test_dataset)}")
print(test_dataset.labels_stats())
fig = test_dataset.show_batch(20, True)
fig = test_dataset.data_distribution_barchart()
test_dataset.data_distribution_table()
Total number of images: 669
Minimum number of labels: 2.0, Maximum number of labels: 6.0, Average number of labels: 2.828101644245142
[10]:
Label Count
1 impervious 617
2 water 207
3 clutter 383
4 vegetation 219
5 building 204
6 tree 57
7 boat 54
8 car 151
../_images/examples_multilabel_datasets_statistics_9_2.png
../_images/examples_multilabel_datasets_statistics_9_3.png

UC Merced dataset#

[11]:
dataset_config = {
    "data_dir": "/media/hdd/multi-label/UCMerced_multilabel/images",
    "csv_file": "/media/hdd/multi-label/UCMerced_multilabel/multilabels.txt"
}
dataset = UcMercedMultiLabelDataset(dataset_config)

print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(20, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 2100
Minimum number of labels: 1.0, Maximum number of labels: 7.0, Average number of labels: 3.334761904761905
[11]:
Label Count
1 airplane 100
2 bare-soil 718
3 buildings 691
4 cars 886
5 chaparral 115
6 court 105
7 dock 100
8 field 103
9 grass 975
10 mobile-home 102
11 pavement 1300
12 sand 294
13 sea 100
14 ship 102
15 tanks 100
16 trees 1009
17 water 203
../_images/examples_multilabel_datasets_statistics_11_2.png
../_images/examples_multilabel_datasets_statistics_11_3.png

MLRSNet dataset#

[12]:
dataset_config = {
    "data_dir": "/media/hdd/multi-label/MLRSNet_multilabel/images",
    "csv_file": "/media/hdd/multi-label/MLRSNet_multilabel/multilabels.txt"
}
dataset = MLRSNetMultiLabelDataset(dataset_config)

print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(20, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 109161
Minimum number of labels: 0.0, Maximum number of labels: 13.0, Average number of labels: 5.019109388884308
[12]:
Label Count
1 airplane 2306
2 airport 2480
3 bare soil 39345
4 baseball diamond 1996
5 basketball court 3726
6 beach 2485
7 bridge 2772
8 buildings 51305
9 cars 34013
10 cloud 1798
11 containers 2500
12 crosswalk 2673
13 dense residential area 2774
14 desert 2537
15 dock 2492
16 factory 2667
17 field 15142
18 football field 1057
19 forest 3562
20 freeway 2500
21 golf course 2515
22 grass 49390
23 greenhouse 2601
24 gully 2413
25 habor 2492
26 intersection 2497
27 island 2493
28 lake 2499
29 mobile home 2499
30 mountain 5468
31 overpass 2652
32 park 1682
33 parking lot 7061
34 parkway 2537
35 pavement 56383
36 railway 4399
37 railway station 2187
38 river 2493
39 road 37783
40 roundabout 2039
41 runway 2259
42 sand 11014
43 sea 4980
44 ships 4092
45 snow 3565
46 snowberg 2555
47 sparse residential area 1829
48 stadium 2462
49 swimming pool 5078
50 tanks 2500
51 tennis court 2499
52 terrace 2345
53 track 3693
54 trail 12376
55 transmission tower 2500
56 trees 70728
57 water 27834
58 chaparral 5903
59 wetland 3417
60 wind turbine 2049
../_images/examples_multilabel_datasets_statistics_13_2.png
../_images/examples_multilabel_datasets_statistics_13_3.png

Planet UAS dataset#

[13]:
dataset_config = {
    "data_dir": "/media/hdd/multi-label/PlanetUAS/images",
    "csv_file": "/media/hdd/multi-label/PlanetUAS/multilabels.txt"
}
dataset = PlanetUASMultiLabelDataset(dataset_config)

print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(20, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 40479
Minimum number of labels: 1.0, Maximum number of labels: 9.0, Average number of labels: 2.8707477951530422
[13]:
Label Count
1 haze 2697
2 primary 37513
3 agriculture 12315
4 clear 28431
5 water 7411
6 habitation 3660
7 road 8071
8 cultivation 4477
9 slash_burn 209
10 cloudy 2089
11 partly_cloudy 7261
12 conventional_mine 100
13 bare_ground 862
14 artisinal_mine 339
15 blooming 332
16 selective_logging 340
17 blow_down 98
../_images/examples_multilabel_datasets_statistics_15_2.png
../_images/examples_multilabel_datasets_statistics_15_3.png

Big Earth Net Dataset with 19 labels#

[5]:
dataset_config = {
    "lmdb_path": "/media/ssd/BigEarthNet/lmdb",
    "csv_file": "/media/ssd/BigEarthNet/splits/all.csv",
    "selection": "rgb",
    "version": "19 labels"
}
dataset = BigEarthNetDataset(dataset_config)

print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(15, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Minimum number of labels: 1.0, Maximum number of labels: 11.0, Average number of labels: 2.8923999198896944
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
[5]:
Label Count
0 Urban fabric 74891
1 Industrial or commercial units 11865
2 Arable land 194148
3 Permanent crops 29350
4 Pastures 98997
5 Complex cultivation patterns 104203
6 Land principally occupied by agriculture, with... 130637
7 Agro-forestry areas 30649
8 Broad-leaved forest 141300
9 Coniferous forest 164775
10 Mixed forest 176567
11 Natural grassland and sparsely vegetated areas 12022
12 Moors, heathland and sclerophyllous vegetation 16267
13 Transitional woodland, shrub 148950
14 Beaches, dunes, sands 1536
15 Inland wetlands 22100
16 Coastal wetlands 1566
17 Inland waters 67277
18 Marine waters 74877
../_images/examples_multilabel_datasets_statistics_17_2.png
../_images/examples_multilabel_datasets_statistics_17_3.png

Big Earth Net Dataset with 43 labels#

[6]:
dataset_config = {
    "lmdb_path": "/media/ssd/BigEarthNet/lmdb",
    "csv_file": "/media/ssd/BigEarthNet/splits/all.csv",
    "selection": "rgb",
    "version": "43 labels"
}
dataset = BigEarthNetDataset(dataset_config)

print(f"Total number of images: {len(dataset)}")
print(dataset.labels_stats())
fig = dataset.show_batch(15, True)
fig = dataset.data_distribution_barchart()
dataset.data_distribution_table()
Total number of images: 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Minimum number of labels: 1.0, Maximum number of labels: 12.0, Average number of labels: 2.9619533819643973
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
Processed 100000 of 519284
Processed 200000 of 519284
Processed 300000 of 519284
Processed 400000 of 519284
Processed 500000 of 519284
[6]:
Label Count
0 Continuous urban fabric 10766
1 Discontinuous urban fabric 65894
2 Industrial or commercial units 11865
3 Road and rail networks and associated land 3269
4 Port areas 453
5 Airports 815
6 Mineral extraction sites 4213
7 Dump sites 816
8 Construction sites 1077
9 Green urban areas 1648
10 Sport and leisure facilities 4975
11 Non-irrigated arable land 183987
12 Permanently irrigated land 13571
13 Rice fields 3793
14 Vineyards 9524
15 Fruit trees and berry plantations 4672
16 Olive groves 12503
17 Pastures 98997
18 Annual crops associated with permanent crops 7019
19 Complex cultivation patterns 104203
20 Land principally occupied by agriculture, with... 130637
21 Agro-forestry areas 30649
22 Broad-leaved forest 141300
23 Coniferous forest 164775
24 Mixed forest 176567
25 Natural grassland 11141
26 Moors and heathland 5073
27 Sclerophyllous vegetation 11241
28 Transitional woodland/shrub 148950
29 Beaches, dunes, sands 1536
30 Bare rock 2894
31 Sparsely vegetated areas 1202
32 Burnt areas 304
33 Inland marshes 5516
34 Peatbogs 16667
35 Salt marshes 1339
36 Salines 424
37 Intertidal flats 938
38 Water courses 9792
39 Water bodies 58009
40 Coastal lagoons 1495
41 Estuaries 1064
42 Sea and ocean 72522
../_images/examples_multilabel_datasets_statistics_19_2.png
../_images/examples_multilabel_datasets_statistics_19_3.png