From a9ef9cb317148a34d37ff265eb13b1fd0a5dbba1 Mon Sep 17 00:00:00 2001 From: loafei <18996341802@163.com> Date: Sat, 4 Jan 2025 15:54:19 +0800 Subject: [PATCH 1/3] add new datasets --- config/dataset/image_classification/README.md | 6 ++++ .../image_classification/test/TALL14.yaml | 20 +++++++++++++ .../image_classification/test/TALL20.yaml | 28 +++++++++++++++++++ .../image_classification/test/cifar10.yaml | 2 +- .../test/cub-200-2011.yaml | 4 +++ .../test/emnist_letters.yaml | 5 ++++ .../test/emnist_mnist.yaml | 4 +++ .../test/fashion_mnist.yaml | 4 +++ .../image_classification/test/fer2013.yaml | 3 ++ .../image_classification/test/food101.yaml | 4 +++ .../image_classification/test/kmnist.yaml | 4 +++ .../test/mango-leaf-disease.yaml | 4 +++ .../test/oxford-iiit-pet.yaml | 4 +++ .../test/oxford_flowers102.yaml | 4 +++ .../image_classification/test/pcam.yaml | 4 +++ .../test/rendered-sst2.yaml | 4 +++ .../image_classification/test/stl10.yaml | 4 +++ .../image_classification/train/TALL14.yaml | 20 +++++++++++++ .../image_classification/train/TALL20.yaml | 28 +++++++++++++++++++ .../image_classification/train/cifar10.yaml | 2 +- .../train/cub-200-2011.yaml | 4 +++ .../train/emnist_letters.yaml | 4 +++ .../train/emnist_mnist.yaml | 4 +++ .../train/fashion_mnist.yaml | 4 +++ .../image_classification/train/fer2013.yaml | 3 ++ .../image_classification/train/food101.yaml | 4 +++ .../image_classification/train/kmnist.yaml | 4 +++ .../train/mango-leaf-disease.yaml | 4 +++ .../train/oxford-iiit-pet.yaml | 4 +++ .../train/oxford_flowers102.yaml | 4 +++ .../image_classification/train/pcam.yaml | 4 +++ .../train/rendered-sst2.yaml | 4 +++ .../image_classification/train/stl10.yaml | 4 +++ 33 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 config/dataset/image_classification/README.md create mode 100644 config/dataset/image_classification/test/TALL14.yaml create mode 100644 config/dataset/image_classification/test/TALL20.yaml create mode 100644 config/dataset/image_classification/test/cub-200-2011.yaml create mode 100644 config/dataset/image_classification/test/emnist_letters.yaml create mode 100644 config/dataset/image_classification/test/emnist_mnist.yaml create mode 100644 config/dataset/image_classification/test/fashion_mnist.yaml create mode 100644 config/dataset/image_classification/test/fer2013.yaml create mode 100644 config/dataset/image_classification/test/food101.yaml create mode 100644 config/dataset/image_classification/test/kmnist.yaml create mode 100644 config/dataset/image_classification/test/mango-leaf-disease.yaml create mode 100644 config/dataset/image_classification/test/oxford-iiit-pet.yaml create mode 100644 config/dataset/image_classification/test/oxford_flowers102.yaml create mode 100644 config/dataset/image_classification/test/pcam.yaml create mode 100644 config/dataset/image_classification/test/rendered-sst2.yaml create mode 100644 config/dataset/image_classification/test/stl10.yaml create mode 100644 config/dataset/image_classification/train/TALL14.yaml create mode 100644 config/dataset/image_classification/train/TALL20.yaml create mode 100644 config/dataset/image_classification/train/cub-200-2011.yaml create mode 100644 config/dataset/image_classification/train/emnist_letters.yaml create mode 100644 config/dataset/image_classification/train/emnist_mnist.yaml create mode 100644 config/dataset/image_classification/train/fashion_mnist.yaml create mode 100644 config/dataset/image_classification/train/fer2013.yaml create mode 100644 config/dataset/image_classification/train/food101.yaml create mode 100644 config/dataset/image_classification/train/kmnist.yaml create mode 100644 config/dataset/image_classification/train/mango-leaf-disease.yaml create mode 100644 config/dataset/image_classification/train/oxford-iiit-pet.yaml create mode 100644 config/dataset/image_classification/train/oxford_flowers102.yaml create mode 100644 config/dataset/image_classification/train/pcam.yaml create mode 100644 config/dataset/image_classification/train/rendered-sst2.yaml create mode 100644 config/dataset/image_classification/train/stl10.yaml diff --git a/config/dataset/image_classification/README.md b/config/dataset/image_classification/README.md new file mode 100644 index 00000000..ee66c96f --- /dev/null +++ b/config/dataset/image_classification/README.md @@ -0,0 +1,6 @@ +# Image Classification Dataset Configurations + +This folder contains the dataset configuration for image classification tasks. + +- Each dataset should have 'image' and 'label' columns. +- If a dataset has no test split, we will use the validation split as the test split and create the validation set from the training set. diff --git a/config/dataset/image_classification/test/TALL14.yaml b/config/dataset/image_classification/test/TALL14.yaml new file mode 100644 index 00000000..82bf635d --- /dev/null +++ b/config/dataset/image_classification/test/TALL14.yaml @@ -0,0 +1,20 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # eight tasks in the task arithmetic paper + - sun397 + - stanford-cars + - resisc45 + - eurosat + - svhn + - gtsrb + - mnist + - dtd + # additional 6 tasks in the TALL mask paper + - oxford_flowers102 + - pcam + - fer2013 + - oxford-iiit-pet + - stl10 + - cifar100 diff --git a/config/dataset/image_classification/test/TALL20.yaml b/config/dataset/image_classification/test/TALL20.yaml new file mode 100644 index 00000000..9eaf6811 --- /dev/null +++ b/config/dataset/image_classification/test/TALL20.yaml @@ -0,0 +1,28 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # eight tasks in the task arithmetic paper + - sun397 + - stanford-cars + - resisc45 + - eurosat + - svhn + - gtsrb + - mnist + - dtd + # additional 6 tasks in the TALL mask paper (TALL 14) + - oxford_flowers102 + - pcam + - fer2013 + - oxford-iiit-pet + - stl10 + - cifar100 + # additional 6 tasks in the TALL mask paper (TALL 20) + - cifar10 + - food101 + - fashion_mnist + - emnist_letters + - kmnist + - rendered-sst2 + diff --git a/config/dataset/image_classification/test/cifar10.yaml b/config/dataset/image_classification/test/cifar10.yaml index bbbcea1a..05b89082 100644 --- a/config/dataset/image_classification/test/cifar10.yaml +++ b/config/dataset/image_classification/test/cifar10.yaml @@ -1,4 +1,4 @@ -dtd: +cifar10: _target_: datasets.load_dataset path: tanganke/cifar10 split: test diff --git a/config/dataset/image_classification/test/cub-200-2011.yaml b/config/dataset/image_classification/test/cub-200-2011.yaml new file mode 100644 index 00000000..33bfdfd9 --- /dev/null +++ b/config/dataset/image_classification/test/cub-200-2011.yaml @@ -0,0 +1,4 @@ +cub-200-2011: + _target_: datasets.load_dataset + path: Donghyun99/CUB-200-2011 + split: test diff --git a/config/dataset/image_classification/test/emnist_letters.yaml b/config/dataset/image_classification/test/emnist_letters.yaml new file mode 100644 index 00000000..98aeb6d0 --- /dev/null +++ b/config/dataset/image_classification/test/emnist_letters.yaml @@ -0,0 +1,5 @@ +emnist_letters: + _target_: datasets.load_dataset + path: tanganke/emnist_letters + split: test + diff --git a/config/dataset/image_classification/test/emnist_mnist.yaml b/config/dataset/image_classification/test/emnist_mnist.yaml new file mode 100644 index 00000000..82102895 --- /dev/null +++ b/config/dataset/image_classification/test/emnist_mnist.yaml @@ -0,0 +1,4 @@ +emnist_mnist: + _target_: datasets.load_dataset + path: tanganke/emnist_mnist + split: test diff --git a/config/dataset/image_classification/test/fashion_mnist.yaml b/config/dataset/image_classification/test/fashion_mnist.yaml new file mode 100644 index 00000000..e98ceac7 --- /dev/null +++ b/config/dataset/image_classification/test/fashion_mnist.yaml @@ -0,0 +1,4 @@ +fashion_mnist: + _target_: datasets.load_dataset + path: zalando-datasets/fashion_mnist + split: test \ No newline at end of file diff --git a/config/dataset/image_classification/test/fer2013.yaml b/config/dataset/image_classification/test/fer2013.yaml new file mode 100644 index 00000000..6e91e088 --- /dev/null +++ b/config/dataset/image_classification/test/fer2013.yaml @@ -0,0 +1,3 @@ +fer2013: + _target_: fusion_bench.dataset.fer2013.load_fer2013 + split: test diff --git a/config/dataset/image_classification/test/food101.yaml b/config/dataset/image_classification/test/food101.yaml new file mode 100644 index 00000000..75d1c556 --- /dev/null +++ b/config/dataset/image_classification/test/food101.yaml @@ -0,0 +1,4 @@ +food101: + _target_: datasets.load_dataset + path: ethz/food101 + split: validation diff --git a/config/dataset/image_classification/test/kmnist.yaml b/config/dataset/image_classification/test/kmnist.yaml new file mode 100644 index 00000000..d546b8bd --- /dev/null +++ b/config/dataset/image_classification/test/kmnist.yaml @@ -0,0 +1,4 @@ +kmnist: + _target_: datasets.load_dataset + path: tanganke/kmnist + split: test diff --git a/config/dataset/image_classification/test/mango-leaf-disease.yaml b/config/dataset/image_classification/test/mango-leaf-disease.yaml new file mode 100644 index 00000000..ca213dd2 --- /dev/null +++ b/config/dataset/image_classification/test/mango-leaf-disease.yaml @@ -0,0 +1,4 @@ +mango-leaf-disease: + _target_: datasets.load_dataset + path: AfiqN/mango-leaf-disease + split: test diff --git a/config/dataset/image_classification/test/oxford-iiit-pet.yaml b/config/dataset/image_classification/test/oxford-iiit-pet.yaml new file mode 100644 index 00000000..989a7148 --- /dev/null +++ b/config/dataset/image_classification/test/oxford-iiit-pet.yaml @@ -0,0 +1,4 @@ +oxford-iiit-pet: + _target_: datasets.load_dataset + path: timm/oxford-iiit-pet + split: test diff --git a/config/dataset/image_classification/test/oxford_flowers102.yaml b/config/dataset/image_classification/test/oxford_flowers102.yaml new file mode 100644 index 00000000..ad331fc2 --- /dev/null +++ b/config/dataset/image_classification/test/oxford_flowers102.yaml @@ -0,0 +1,4 @@ +oxford_flowers102: + _target_: datasets.load_dataset + path: dpdl-benchmark/oxford_flowers102 + split: test diff --git a/config/dataset/image_classification/test/pcam.yaml b/config/dataset/image_classification/test/pcam.yaml new file mode 100644 index 00000000..f6bb8f1e --- /dev/null +++ b/config/dataset/image_classification/test/pcam.yaml @@ -0,0 +1,4 @@ +pcam: + _target_: datasets.load_dataset + path: 1aurent/PatchCamelyon + split: test diff --git a/config/dataset/image_classification/test/rendered-sst2.yaml b/config/dataset/image_classification/test/rendered-sst2.yaml new file mode 100644 index 00000000..3faa7955 --- /dev/null +++ b/config/dataset/image_classification/test/rendered-sst2.yaml @@ -0,0 +1,4 @@ +rendered-sst2: + _target_: datasets.load_dataset + path: nateraw/rendered-sst2 + split: test diff --git a/config/dataset/image_classification/test/stl10.yaml b/config/dataset/image_classification/test/stl10.yaml new file mode 100644 index 00000000..5bbd824f --- /dev/null +++ b/config/dataset/image_classification/test/stl10.yaml @@ -0,0 +1,4 @@ +stl10: + _target_: datasets.load_dataset + path: tanganke/stl10 + split: test diff --git a/config/dataset/image_classification/train/TALL14.yaml b/config/dataset/image_classification/train/TALL14.yaml new file mode 100644 index 00000000..82bf635d --- /dev/null +++ b/config/dataset/image_classification/train/TALL14.yaml @@ -0,0 +1,20 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # eight tasks in the task arithmetic paper + - sun397 + - stanford-cars + - resisc45 + - eurosat + - svhn + - gtsrb + - mnist + - dtd + # additional 6 tasks in the TALL mask paper + - oxford_flowers102 + - pcam + - fer2013 + - oxford-iiit-pet + - stl10 + - cifar100 diff --git a/config/dataset/image_classification/train/TALL20.yaml b/config/dataset/image_classification/train/TALL20.yaml new file mode 100644 index 00000000..9eaf6811 --- /dev/null +++ b/config/dataset/image_classification/train/TALL20.yaml @@ -0,0 +1,28 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # eight tasks in the task arithmetic paper + - sun397 + - stanford-cars + - resisc45 + - eurosat + - svhn + - gtsrb + - mnist + - dtd + # additional 6 tasks in the TALL mask paper (TALL 14) + - oxford_flowers102 + - pcam + - fer2013 + - oxford-iiit-pet + - stl10 + - cifar100 + # additional 6 tasks in the TALL mask paper (TALL 20) + - cifar10 + - food101 + - fashion_mnist + - emnist_letters + - kmnist + - rendered-sst2 + diff --git a/config/dataset/image_classification/train/cifar10.yaml b/config/dataset/image_classification/train/cifar10.yaml index 2d5da591..59514dd2 100644 --- a/config/dataset/image_classification/train/cifar10.yaml +++ b/config/dataset/image_classification/train/cifar10.yaml @@ -1,4 +1,4 @@ -dtd: +cifar10: _target_: datasets.load_dataset path: tanganke/cifar10 split: train diff --git a/config/dataset/image_classification/train/cub-200-2011.yaml b/config/dataset/image_classification/train/cub-200-2011.yaml new file mode 100644 index 00000000..37c0b5f8 --- /dev/null +++ b/config/dataset/image_classification/train/cub-200-2011.yaml @@ -0,0 +1,4 @@ +cub-200-2011: + _target_: datasets.load_dataset + path: Donghyun99/CUB-200-2011 + split: train diff --git a/config/dataset/image_classification/train/emnist_letters.yaml b/config/dataset/image_classification/train/emnist_letters.yaml new file mode 100644 index 00000000..49bbae2b --- /dev/null +++ b/config/dataset/image_classification/train/emnist_letters.yaml @@ -0,0 +1,4 @@ +emnist_letters: + _target_: datasets.load_dataset + path: tanganke/emnist_letters + split: train diff --git a/config/dataset/image_classification/train/emnist_mnist.yaml b/config/dataset/image_classification/train/emnist_mnist.yaml new file mode 100644 index 00000000..c473abba --- /dev/null +++ b/config/dataset/image_classification/train/emnist_mnist.yaml @@ -0,0 +1,4 @@ +emnist_mnist: + _target_: datasets.load_dataset + path: tanganke/emnist_mnist + split: train diff --git a/config/dataset/image_classification/train/fashion_mnist.yaml b/config/dataset/image_classification/train/fashion_mnist.yaml new file mode 100644 index 00000000..0638fd56 --- /dev/null +++ b/config/dataset/image_classification/train/fashion_mnist.yaml @@ -0,0 +1,4 @@ +fashion_mnist: + _target_: datasets.load_dataset + path: zalando-datasets/fashion_mnist + split: train \ No newline at end of file diff --git a/config/dataset/image_classification/train/fer2013.yaml b/config/dataset/image_classification/train/fer2013.yaml new file mode 100644 index 00000000..1a71eee8 --- /dev/null +++ b/config/dataset/image_classification/train/fer2013.yaml @@ -0,0 +1,3 @@ +fer2013: + _target_: fusion_bench.dataset.fer2013.load_fer2013 + split: train diff --git a/config/dataset/image_classification/train/food101.yaml b/config/dataset/image_classification/train/food101.yaml new file mode 100644 index 00000000..16cf043b --- /dev/null +++ b/config/dataset/image_classification/train/food101.yaml @@ -0,0 +1,4 @@ +food101: + _target_: datasets.load_dataset + path: ethz/food101 + split: train diff --git a/config/dataset/image_classification/train/kmnist.yaml b/config/dataset/image_classification/train/kmnist.yaml new file mode 100644 index 00000000..7d9720b5 --- /dev/null +++ b/config/dataset/image_classification/train/kmnist.yaml @@ -0,0 +1,4 @@ +kmnist: + _target_: datasets.load_dataset + path: tanganke/kmnist + split: train diff --git a/config/dataset/image_classification/train/mango-leaf-disease.yaml b/config/dataset/image_classification/train/mango-leaf-disease.yaml new file mode 100644 index 00000000..7e2ca86e --- /dev/null +++ b/config/dataset/image_classification/train/mango-leaf-disease.yaml @@ -0,0 +1,4 @@ +mango-leaf-disease: + _target_: datasets.load_dataset + path: AfiqN/mango-leaf-disease + split: train diff --git a/config/dataset/image_classification/train/oxford-iiit-pet.yaml b/config/dataset/image_classification/train/oxford-iiit-pet.yaml new file mode 100644 index 00000000..a76ef467 --- /dev/null +++ b/config/dataset/image_classification/train/oxford-iiit-pet.yaml @@ -0,0 +1,4 @@ +oxford-iiit-pet: + _target_: datasets.load_dataset + path: timm/oxford-iiit-pet + split: train diff --git a/config/dataset/image_classification/train/oxford_flowers102.yaml b/config/dataset/image_classification/train/oxford_flowers102.yaml new file mode 100644 index 00000000..4b7dfa43 --- /dev/null +++ b/config/dataset/image_classification/train/oxford_flowers102.yaml @@ -0,0 +1,4 @@ +oxford_flowers102: + _target_: datasets.load_dataset + path: dpdl-benchmark/oxford_flowers102 + split: train diff --git a/config/dataset/image_classification/train/pcam.yaml b/config/dataset/image_classification/train/pcam.yaml new file mode 100644 index 00000000..8971a585 --- /dev/null +++ b/config/dataset/image_classification/train/pcam.yaml @@ -0,0 +1,4 @@ +pcam: + _target_: datasets.load_dataset + path: 1aurent/PatchCamelyon + split: train diff --git a/config/dataset/image_classification/train/rendered-sst2.yaml b/config/dataset/image_classification/train/rendered-sst2.yaml new file mode 100644 index 00000000..9eb4c871 --- /dev/null +++ b/config/dataset/image_classification/train/rendered-sst2.yaml @@ -0,0 +1,4 @@ +rendered-sst2: + _target_: datasets.load_dataset + path: nateraw/rendered-sst2 + split: train diff --git a/config/dataset/image_classification/train/stl10.yaml b/config/dataset/image_classification/train/stl10.yaml new file mode 100644 index 00000000..d379819a --- /dev/null +++ b/config/dataset/image_classification/train/stl10.yaml @@ -0,0 +1,4 @@ +stl10: + _target_: datasets.load_dataset + path: tanganke/stl10 + split: train From 4974f4e896b9ab86dfeadde02bf335e7a547b5b5 Mon Sep 17 00:00:00 2001 From: loafei <18996341802@163.com> Date: Sat, 4 Jan 2025 16:18:35 +0800 Subject: [PATCH 2/3] add new models and tasks --- config/model/clip-vit/README.md | 38 ++++ .../model/clip-vit/clip-vit-base-patch16.yaml | 4 +- .../clip-vit-base-patch16_TALL14.yaml | 22 ++ .../clip-vit-base-patch16_TALL20.yaml | 29 +++ .../clip-vit-base-patch16_cifar10.yaml | 1 + .../clip-vit-base-patch16_cifar100.yaml | 1 + .../clip-vit/clip-vit-base-patch16_dtd.yaml | 4 +- .../clip-vit-base-patch16_emnist_letters.yaml | 1 + .../clip-vit-base-patch16_eurosat.yaml | 4 +- .../clip-vit-base-patch16_fashion_mnist.yaml | 1 + .../clip-vit-base-patch16_fer2013.yaml | 1 + .../clip-vit-base-patch16_food101.yaml | 1 + .../clip-vit/clip-vit-base-patch16_gtsrb.yaml | 4 +- .../clip-vit-base-patch16_kmnist.yaml | 1 + .../clip-vit/clip-vit-base-patch16_mnist.yaml | 4 +- ...clip-vit-base-patch16_oxford-iiit-pet.yaml | 1 + ...ip-vit-base-patch16_oxford_flowers102.yaml | 1 + .../clip-vit/clip-vit-base-patch16_pcam.yaml | 1 + .../clip-vit-base-patch16_rendered-sst2.yaml | 1 + .../clip-vit-base-patch16_resisc45.yaml | 4 +- .../clip-vit-base-patch16_stanford-cars.yaml | 4 +- .../clip-vit/clip-vit-base-patch16_stl10.yaml | 1 + .../clip-vit-base-patch16_sun397.yaml | 4 +- .../clip-vit/clip-vit-base-patch16_svhn.yaml | 4 +- .../model/clip-vit/clip-vit-base-patch32.yaml | 4 +- .../clip-vit-base-patch32_TALL14.yaml | 22 ++ .../clip-vit-base-patch32_TALL20.yaml | 29 +++ .../clip-vit-base-patch32_cifar10.yaml | 1 + .../clip-vit-base-patch32_cifar100.yaml | 1 + .../clip-vit/clip-vit-base-patch32_dtd.yaml | 4 +- .../clip-vit-base-patch32_eight_tasks.yaml | 1 + .../clip-vit-base-patch32_emnist_letters.yaml | 1 + .../clip-vit-base-patch32_eurosat.yaml | 4 +- .../clip-vit-base-patch32_fashion_mnist.yaml | 1 + .../clip-vit-base-patch32_fer2013.yaml | 1 + .../clip-vit-base-patch32_food101.yaml | 1 + .../clip-vit/clip-vit-base-patch32_gtsrb.yaml | 4 +- .../clip-vit-base-patch32_kmnist.yaml | 1 + .../clip-vit/clip-vit-base-patch32_mnist.yaml | 4 +- ...clip-vit-base-patch32_oxford-iiit-pet.yaml | 1 + ...ip-vit-base-patch32_oxford_flowers102.yaml | 1 + .../clip-vit/clip-vit-base-patch32_pcam.yaml | 1 + .../clip-vit-base-patch32_rendered-sst2.yaml | 1 + .../clip-vit-base-patch32_resisc45.yaml | 4 +- .../clip-vit-base-patch32_stanford-cars.yaml | 4 +- .../clip-vit/clip-vit-base-patch32_stl10.yaml | 1 + .../clip-vit-base-patch32_sun397.yaml | 4 +- .../clip-vit/clip-vit-base-patch32_svhn.yaml | 4 +- .../clip-vit/clip-vit-large-patch14.yaml | 4 +- .../clip-vit-large-patch14_TALL14.yaml | 22 ++ .../clip-vit-large-patch14_TALL20.yaml | 29 +++ .../clip-vit-large-patch14_cifar10.yaml | 1 + .../clip-vit-large-patch14_cifar100.yaml | 1 + .../clip-vit/clip-vit-large-patch14_dtd.yaml | 4 +- ...clip-vit-large-patch14_emnist_letters.yaml | 1 + .../clip-vit-large-patch14_eurosat.yaml | 4 +- .../clip-vit-large-patch14_fashion_mnist.yaml | 1 + .../clip-vit-large-patch14_fer2013.yaml | 1 + .../clip-vit-large-patch14_food101.yaml | 1 + .../clip-vit-large-patch14_gtsrb.yaml | 4 +- .../clip-vit-large-patch14_kmnist.yaml | 1 + .../clip-vit-large-patch14_mnist.yaml | 4 +- ...lip-vit-large-patch14_oxford-iiit-pet.yaml | 1 + ...p-vit-large-patch14_oxford_flowers102.yaml | 1 + .../clip-vit/clip-vit-large-patch14_pcam.yaml | 1 + .../clip-vit-large-patch14_rendered-sst2.yaml | 1 + .../clip-vit-large-patch14_resisc45.yaml | 4 +- .../clip-vit-large-patch14_stanford-cars.yaml | 4 +- .../clip-vit-large-patch14_stl10.yaml | 1 + .../clip-vit-large-patch14_sun397.yaml | 4 +- .../clip-vit/clip-vit-large-patch14_svhn.yaml | 4 +- .../clip-vit-base-patch16_TA8_model_only.yaml | 6 + .../clip-vit-base-patch16_TALL14.yaml | 11 + ...ip-vit-base-patch16_TALL14_model_only.yaml | 9 + .../clip-vit-base-patch16_TALL20.yaml | 11 + ...ip-vit-base-patch16_TALL20_model_only.yaml | 9 + .../clip-vit-base-patch16_individual.yaml | 18 +- .../clip-vit-base-patch32_TALL14.yaml | 8 + ...ip-vit-base-patch32_TALL14_model_only.yaml | 6 + .../clip-vit-base-patch32_TALL20.yaml | 8 + ...ip-vit-base-patch32_TALL20_model_only.yaml | 6 + .../clip-vit-base-patch32_individual.yaml | 12 +- ...t-base-patch32_single_task_projection.yaml | 15 ++ .../clip-vit-large-patch14_TALL14.yaml | 11 + ...p-vit-large-patch14_TALL14_model_only.yaml | 9 + .../clip-vit-large-patch14_TALL20.yaml | 11 + ...p-vit-large-patch14_TALL20_model_only.yaml | 9 + .../clip-vit-large-patch14_individual.yaml | 18 +- ...vit-base-patch32_robustness_corrupted.yaml | 27 +++ .../clip-vit-classification_TALL14.yaml | 19 ++ .../clip-vit-classification_TALL20.yaml | 26 +++ .../clip-vit-single-task_cifar10.yaml | 3 + .../clip-vit-single-task_cifar100.yaml | 3 + .../clip-vit-single-task_dtd.yaml | 3 + .../clip-vit-single-task_emnist_letters.yaml | 3 + .../clip-vit-single-task_eurosat.yaml | 3 + .../clip-vit-single-task_fashion_mnist.yaml | 3 + .../clip-vit-single-task_fer2013.yaml | 3 + .../clip-vit-single-task_food101.yaml | 3 + .../clip-vit-single-task_gtsrb.yaml | 3 + .../clip-vit-single-task_kmnist.yaml | 3 + .../clip-vit-single-task_mnist.yaml | 3 + .../clip-vit-single-task_oxford-iiit-pet.yaml | 3 + ...lip-vit-single-task_oxford_flowers102.yaml | 3 + ...vit-single-task_oxford_flowers102_val.yaml | 3 + .../clip-vit-single-task_pcam.yaml | 3 + .../clip-vit-single-task_rendered-sst2.yaml | 3 + .../clip-vit-single-task_resisc45.yaml | 3 + .../clip-vit-single-task_stanford-cars.yaml | 3 + .../clip-vit-single-task_stl10.yaml | 3 + .../clip-vit-single-task_sun397.yaml | 3 + .../clip-vit-single-task_svhn.yaml | 3 + fusion_bench/mixins/clip_classification.py | 12 +- fusion_bench/modelpool/base_pool.py | 1 - .../modelpool/clip_vision/modelpool.py | 100 ++++++++- .../tasks/clip_classification/__init__.py | 13 ++ .../tasks/clip_classification/clip_dataset.py | 17 +- .../tasks/clip_classification/cub_200_2011.py | 208 ++++++++++++++++++ .../clip_classification/emnist_letters.py | 31 +++ .../tasks/clip_classification/emnist_mnist.py | 5 + .../clip_classification/fashion_mnist.py | 18 ++ .../tasks/clip_classification/fer2013.py | 18 ++ .../tasks/clip_classification/food101.py | 105 +++++++++ .../tasks/clip_classification/kmnist.py | 17 ++ .../clip_classification/mongo_leaf_disease.py | 19 ++ .../tasks/clip_classification/pcam.py | 5 + 126 files changed, 1096 insertions(+), 121 deletions(-) create mode 100644 config/model/clip-vit/README.md create mode 100644 config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_TALL20.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_cifar100.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_emnist_letters.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_fashion_mnist.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_fer2013.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_food101.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_kmnist.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_oxford-iiit-pet.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_oxford_flowers102.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_pcam.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_rendered-sst2.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch16_stl10.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_TALL14.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_TALL20.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_cifar10.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_cifar100.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_emnist_letters.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_fashion_mnist.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_fer2013.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_food101.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_kmnist.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_oxford-iiit-pet.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_oxford_flowers102.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_pcam.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_rendered-sst2.yaml create mode 100644 config/model/clip-vit/clip-vit-base-patch32_stl10.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_TALL14.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_TALL20.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_cifar10.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_cifar100.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_emnist_letters.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_fashion_mnist.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_fer2013.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_food101.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_kmnist.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_pcam.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml create mode 100644 config/model/clip-vit/clip-vit-large-patch14_stl10.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_model_only.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14_model_only.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14_model_only.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20_model_only.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_model_only.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml create mode 100644 config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL14.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL20.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar10.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar100.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_dtd.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_emnist_letters.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_eurosat.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fashion_mnist.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fer2013.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_food101.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_gtsrb.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_kmnist.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_mnist.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford-iiit-pet.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102_val.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_pcam.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_rendered-sst2.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_resisc45.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stanford-cars.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stl10.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397.yaml create mode 100644 config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml create mode 100644 fusion_bench/tasks/clip_classification/cub_200_2011.py create mode 100644 fusion_bench/tasks/clip_classification/emnist_letters.py create mode 100644 fusion_bench/tasks/clip_classification/emnist_mnist.py create mode 100644 fusion_bench/tasks/clip_classification/fashion_mnist.py create mode 100644 fusion_bench/tasks/clip_classification/fer2013.py create mode 100644 fusion_bench/tasks/clip_classification/food101.py create mode 100644 fusion_bench/tasks/clip_classification/kmnist.py create mode 100644 fusion_bench/tasks/clip_classification/mongo_leaf_disease.py create mode 100644 fusion_bench/tasks/clip_classification/pcam.py diff --git a/config/model/clip-vit/README.md b/config/model/clip-vit/README.md new file mode 100644 index 00000000..11024a2a --- /dev/null +++ b/config/model/clip-vit/README.md @@ -0,0 +1,38 @@ +This folder contains the configuration for the CLIP-ViT models (managed by `fusion_bench.modelpool.CLIPVisionModelPool`). + +## Expected Configuration + +### Detailed Configuration + + +```yaml +${name_of_model}: + _target_: ${function_to_load_model} + ... # arguments to pass to the function +``` + +For example, to load the pre-trained CLIP-ViT-B/16 model, you can use the following configuration: + +```yaml +_pretrained_: # `_pretrained_` is a special key in FusionBench that indicates the model is pre-trained + _target_: transformers.CLIPVisionModel.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-base-patch16 +``` + +In this case, calling `modelpool.load_model("_pretrained_")` will return a `transformers.CLIPVisionModel` instance, which is equivalent to call `transformers.CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch16")`. + +The detailed configuration is more flexible and can be used when you need to pass additional arguments to the `from_pretrained` function or call custom functions to load and preprocess the model. + +### Simplified Configuration + +```yaml +${name_of_model}: ${pretrained_model_name_or_path} +``` + +This is a simplified configuration that is equivalent to the detailed configuration. + +For example, to load the pre-trained CLIP-ViT-B/16 model, you can use the following configuration: + +```yaml +_pretrained_: openai/clip-vit-base-patch16 +``` diff --git a/config/model/clip-vit/clip-vit-base-patch16.yaml b/config/model/clip-vit/clip-vit-base-patch16.yaml index 12ec715f..cafbc34c 100644 --- a/config/model/clip-vit/clip-vit-base-patch16.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16.yaml @@ -1,3 +1 @@ -_pretrained_: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: openai/clip-vit-base-patch16 +_pretrained_: openai/clip-vit-base-patch16 diff --git a/config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml b/config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml new file mode 100644 index 00000000..5393b822 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml @@ -0,0 +1,22 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # pre-trained model + - clip-vit-base-patch16 + # eight tasks in the task arithmetic paper + - clip-vit-base-patch16_sun397 + - clip-vit-base-patch16_stanford-cars + - clip-vit-base-patch16_resisc45 + - clip-vit-base-patch16_eurosat + - clip-vit-base-patch16_svhn + - clip-vit-base-patch16_gtsrb + - clip-vit-base-patch16_mnist + - clip-vit-base-patch16_dtd + # additional 6 tasks in the TALL mask paper + - clip-vit-base-patch16_oxford_flowers102 + - clip-vit-base-patch16_pcam + - clip-vit-base-patch16_fer2013 + - clip-vit-base-patch16_oxford-iiit-pet + - clip-vit-base-patch16_stl10 + - clip-vit-base-patch16_cifar100 diff --git a/config/model/clip-vit/clip-vit-base-patch16_TALL20.yaml b/config/model/clip-vit/clip-vit-base-patch16_TALL20.yaml new file mode 100644 index 00000000..52f64269 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_TALL20.yaml @@ -0,0 +1,29 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # pre-trained model + - clip-vit-base-patch16 + # eight tasks in the task arithmetic paper + - clip-vit-base-patch16_sun397 + - clip-vit-base-patch16_stanford-cars + - clip-vit-base-patch16_resisc45 + - clip-vit-base-patch16_eurosat + - clip-vit-base-patch16_svhn + - clip-vit-base-patch16_gtsrb + - clip-vit-base-patch16_mnist + - clip-vit-base-patch16_dtd + # additional 6 tasks in the TALL mask paper (TALL 14) + - clip-vit-base-patch16_oxford_flowers102 + - clip-vit-base-patch16_pcam + - clip-vit-base-patch16_fer2013 + - clip-vit-base-patch16_oxford-iiit-pet + - clip-vit-base-patch16_stl10 + - clip-vit-base-patch16_cifar100 + # additional 6 tasks in the TALL mask paper (TALL 20) + - clip-vit-base-patch16_cifar10 + - clip-vit-base-patch16_food101 + - clip-vit-base-patch16_fashion_mnist + - clip-vit-base-patch16_emnist_letters + - clip-vit-base-patch16_kmnist + - clip-vit-base-patch16_rendered-sst2 diff --git a/config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml b/config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml new file mode 100644 index 00000000..78775832 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml @@ -0,0 +1 @@ +cifar10: tanganke/clip-vit-base-patch16_cifar10 \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-base-patch16_cifar100.yaml b/config/model/clip-vit/clip-vit-base-patch16_cifar100.yaml new file mode 100644 index 00000000..d97adfbd --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_cifar100.yaml @@ -0,0 +1 @@ +cifar100: tanganke/clip-vit-base-patch16_cifar100 diff --git a/config/model/clip-vit/clip-vit-base-patch16_dtd.yaml b/config/model/clip-vit/clip-vit-base-patch16_dtd.yaml index a21a444a..f8d75e07 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_dtd.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_dtd.yaml @@ -1,3 +1 @@ -dtd: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_dtd +dtd: tanganke/clip-vit-base-patch16_dtd diff --git a/config/model/clip-vit/clip-vit-base-patch16_emnist_letters.yaml b/config/model/clip-vit/clip-vit-base-patch16_emnist_letters.yaml new file mode 100644 index 00000000..7f1309db --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_emnist_letters.yaml @@ -0,0 +1 @@ +emnist_letters: tanganke/clip-vit-base-patch16_emnist_letters diff --git a/config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml b/config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml index 1f551608..a8c1d5ad 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml @@ -1,3 +1 @@ -eurosat: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_eurosat +eurosat: tanganke/clip-vit-base-patch16_eurosat diff --git a/config/model/clip-vit/clip-vit-base-patch16_fashion_mnist.yaml b/config/model/clip-vit/clip-vit-base-patch16_fashion_mnist.yaml new file mode 100644 index 00000000..90a98f7d --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_fashion_mnist.yaml @@ -0,0 +1 @@ +fashion_mnist: tanganke/clip-vit-base-patch16_fashion_mnist diff --git a/config/model/clip-vit/clip-vit-base-patch16_fer2013.yaml b/config/model/clip-vit/clip-vit-base-patch16_fer2013.yaml new file mode 100644 index 00000000..0f147621 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_fer2013.yaml @@ -0,0 +1 @@ +fer2013: tanganke/clip-vit-base-patch16_fer2013 diff --git a/config/model/clip-vit/clip-vit-base-patch16_food101.yaml b/config/model/clip-vit/clip-vit-base-patch16_food101.yaml new file mode 100644 index 00000000..b4b51ffd --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_food101.yaml @@ -0,0 +1 @@ +food101: tanganke/clip-vit-base-patch16_food101 diff --git a/config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml b/config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml index ee87b11d..6d06432e 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml @@ -1,3 +1 @@ -gtsrb: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_gtsrb +gtsrb: tanganke/clip-vit-base-patch16_gtsrb diff --git a/config/model/clip-vit/clip-vit-base-patch16_kmnist.yaml b/config/model/clip-vit/clip-vit-base-patch16_kmnist.yaml new file mode 100644 index 00000000..58b3ade0 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_kmnist.yaml @@ -0,0 +1 @@ +kmnist: tanganke/clip-vit-base-patch16_kmnist diff --git a/config/model/clip-vit/clip-vit-base-patch16_mnist.yaml b/config/model/clip-vit/clip-vit-base-patch16_mnist.yaml index 12c722d0..b2e65d16 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_mnist.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_mnist.yaml @@ -1,3 +1 @@ -mnist: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_mnist +mnist: tanganke/clip-vit-base-patch16_mnist diff --git a/config/model/clip-vit/clip-vit-base-patch16_oxford-iiit-pet.yaml b/config/model/clip-vit/clip-vit-base-patch16_oxford-iiit-pet.yaml new file mode 100644 index 00000000..9806eec1 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_oxford-iiit-pet.yaml @@ -0,0 +1 @@ +oxford-iiit-pet: tanganke/clip-vit-base-patch16_oxford-iiit-pet diff --git a/config/model/clip-vit/clip-vit-base-patch16_oxford_flowers102.yaml b/config/model/clip-vit/clip-vit-base-patch16_oxford_flowers102.yaml new file mode 100644 index 00000000..10759717 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_oxford_flowers102.yaml @@ -0,0 +1 @@ +oxford_flowers102: tanganke/clip-vit-base-patch16_oxford_flowers102 diff --git a/config/model/clip-vit/clip-vit-base-patch16_pcam.yaml b/config/model/clip-vit/clip-vit-base-patch16_pcam.yaml new file mode 100644 index 00000000..e1862972 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_pcam.yaml @@ -0,0 +1 @@ +pcam: tanganke/clip-vit-base-patch16_pcam diff --git a/config/model/clip-vit/clip-vit-base-patch16_rendered-sst2.yaml b/config/model/clip-vit/clip-vit-base-patch16_rendered-sst2.yaml new file mode 100644 index 00000000..26ce3621 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_rendered-sst2.yaml @@ -0,0 +1 @@ +rendered-sst2: tanganke/clip-vit-base-patch16_rendered-sst2 diff --git a/config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml b/config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml index 48201c90..6da47696 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml @@ -1,3 +1 @@ -resisc45: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_resisc45 +resisc45: tanganke/clip-vit-base-patch16_resisc45 diff --git a/config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml b/config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml index 4aa3ba93..227248a6 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml @@ -1,3 +1 @@ -stanford-cars: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_stanford-cars +stanford-cars: tanganke/clip-vit-base-patch16_stanford-cars diff --git a/config/model/clip-vit/clip-vit-base-patch16_stl10.yaml b/config/model/clip-vit/clip-vit-base-patch16_stl10.yaml new file mode 100644 index 00000000..b5ba3143 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch16_stl10.yaml @@ -0,0 +1 @@ +stl10: tanganke/clip-vit-base-patch16_stl10 diff --git a/config/model/clip-vit/clip-vit-base-patch16_sun397.yaml b/config/model/clip-vit/clip-vit-base-patch16_sun397.yaml index ac8608a5..a039697a 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_sun397.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_sun397.yaml @@ -1,3 +1 @@ -sun397: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_sun397 +sun397: tanganke/clip-vit-base-patch16_sun397 diff --git a/config/model/clip-vit/clip-vit-base-patch16_svhn.yaml b/config/model/clip-vit/clip-vit-base-patch16_svhn.yaml index 9dda8229..bbc87da0 100644 --- a/config/model/clip-vit/clip-vit-base-patch16_svhn.yaml +++ b/config/model/clip-vit/clip-vit-base-patch16_svhn.yaml @@ -1,3 +1 @@ -svhn: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch16_svhn +svhn: tanganke/clip-vit-base-patch16_svhn diff --git a/config/model/clip-vit/clip-vit-base-patch32.yaml b/config/model/clip-vit/clip-vit-base-patch32.yaml index 032f7309..b7a4736d 100644 --- a/config/model/clip-vit/clip-vit-base-patch32.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32.yaml @@ -1,3 +1 @@ -_pretrained_: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: openai/clip-vit-base-patch32 +_pretrained_: openai/clip-vit-base-patch32 diff --git a/config/model/clip-vit/clip-vit-base-patch32_TALL14.yaml b/config/model/clip-vit/clip-vit-base-patch32_TALL14.yaml new file mode 100644 index 00000000..40f7a4f7 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_TALL14.yaml @@ -0,0 +1,22 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # pre-trained model + - clip-vit-base-patch32 + # eight tasks in the task arithmetic paper + - clip-vit-base-patch32_sun397 + - clip-vit-base-patch32_stanford-cars + - clip-vit-base-patch32_resisc45 + - clip-vit-base-patch32_eurosat + - clip-vit-base-patch32_svhn + - clip-vit-base-patch32_gtsrb + - clip-vit-base-patch32_mnist + - clip-vit-base-patch32_dtd + # additional 6 tasks in the TALL mask paper + - clip-vit-base-patch32_oxford_flowers102 + - clip-vit-base-patch32_pcam + - clip-vit-base-patch32_fer2013 + - clip-vit-base-patch32_oxford-iiit-pet + - clip-vit-base-patch32_stl10 + - clip-vit-base-patch32_cifar100 diff --git a/config/model/clip-vit/clip-vit-base-patch32_TALL20.yaml b/config/model/clip-vit/clip-vit-base-patch32_TALL20.yaml new file mode 100644 index 00000000..7fec2528 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_TALL20.yaml @@ -0,0 +1,29 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # pre-trained model + - clip-vit-base-patch32 + # eight tasks in the task arithmetic paper + - clip-vit-base-patch32_sun397 + - clip-vit-base-patch32_stanford-cars + - clip-vit-base-patch32_resisc45 + - clip-vit-base-patch32_eurosat + - clip-vit-base-patch32_svhn + - clip-vit-base-patch32_gtsrb + - clip-vit-base-patch32_mnist + - clip-vit-base-patch32_dtd + # additional 6 tasks in the TALL mask paper (TALL 14) + - clip-vit-base-patch32_oxford_flowers102 + - clip-vit-base-patch32_pcam + - clip-vit-base-patch32_fer2013 + - clip-vit-base-patch32_oxford-iiit-pet + - clip-vit-base-patch32_stl10 + - clip-vit-base-patch32_cifar100 + # additional 6 tasks in the TALL mask paper (TALL 20) + - clip-vit-base-patch32_cifar10 + - clip-vit-base-patch32_food101 + - clip-vit-base-patch32_fashion_mnist + - clip-vit-base-patch32_emnist_letters + - clip-vit-base-patch32_kmnist + - clip-vit-base-patch32_rendered-sst2 diff --git a/config/model/clip-vit/clip-vit-base-patch32_cifar10.yaml b/config/model/clip-vit/clip-vit-base-patch32_cifar10.yaml new file mode 100644 index 00000000..505e7042 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_cifar10.yaml @@ -0,0 +1 @@ +cifar10: tanganke/clip-vit-base-patch32_cifar10 diff --git a/config/model/clip-vit/clip-vit-base-patch32_cifar100.yaml b/config/model/clip-vit/clip-vit-base-patch32_cifar100.yaml new file mode 100644 index 00000000..c310cd7e --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_cifar100.yaml @@ -0,0 +1 @@ +cifar100: tanganke/clip-vit-base-patch32_cifar100 diff --git a/config/model/clip-vit/clip-vit-base-patch32_dtd.yaml b/config/model/clip-vit/clip-vit-base-patch32_dtd.yaml index 884ef76e..7074604c 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_dtd.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_dtd.yaml @@ -1,3 +1 @@ -dtd: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_dtd +dtd: tanganke/clip-vit-base-patch32_dtd diff --git a/config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml b/config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml index 65007efc..75c79f4f 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml @@ -1,3 +1,4 @@ +# The 8 task used in the Task Arithmetic paper defaults: - clip-vit-base-patch32 - clip-vit-base-patch32_sun397 diff --git a/config/model/clip-vit/clip-vit-base-patch32_emnist_letters.yaml b/config/model/clip-vit/clip-vit-base-patch32_emnist_letters.yaml new file mode 100644 index 00000000..e8dab15e --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_emnist_letters.yaml @@ -0,0 +1 @@ +emnist_letters: tanganke/clip-vit-base-patch32_emnist_letters diff --git a/config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml b/config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml index 3f946c8d..e6efc82e 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml @@ -1,3 +1 @@ -eurosat: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_eurosat +eurosat: tanganke/clip-vit-base-patch32_eurosat diff --git a/config/model/clip-vit/clip-vit-base-patch32_fashion_mnist.yaml b/config/model/clip-vit/clip-vit-base-patch32_fashion_mnist.yaml new file mode 100644 index 00000000..7f9cc7ee --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_fashion_mnist.yaml @@ -0,0 +1 @@ +fashion_mnist: tanganke/clip-vit-base-patch32_fashion_mnist diff --git a/config/model/clip-vit/clip-vit-base-patch32_fer2013.yaml b/config/model/clip-vit/clip-vit-base-patch32_fer2013.yaml new file mode 100644 index 00000000..a7acc59a --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_fer2013.yaml @@ -0,0 +1 @@ +fer2013: tanganke/clip-vit-base-patch32_fer2013 diff --git a/config/model/clip-vit/clip-vit-base-patch32_food101.yaml b/config/model/clip-vit/clip-vit-base-patch32_food101.yaml new file mode 100644 index 00000000..06795942 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_food101.yaml @@ -0,0 +1 @@ +food101: tanganke/clip-vit-base-patch32_food101 diff --git a/config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml b/config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml index b82b9f5a..2e7fade6 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml @@ -1,3 +1 @@ -gtsrb: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_gtsrb +gtsrb: tanganke/clip-vit-base-patch32_gtsrb diff --git a/config/model/clip-vit/clip-vit-base-patch32_kmnist.yaml b/config/model/clip-vit/clip-vit-base-patch32_kmnist.yaml new file mode 100644 index 00000000..18157cd5 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_kmnist.yaml @@ -0,0 +1 @@ +kmnist: tanganke/clip-vit-base-patch32_kmnist diff --git a/config/model/clip-vit/clip-vit-base-patch32_mnist.yaml b/config/model/clip-vit/clip-vit-base-patch32_mnist.yaml index b802c8ea..de91ffbb 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_mnist.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_mnist.yaml @@ -1,3 +1 @@ -mnist: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_mnist +mnist: tanganke/clip-vit-base-patch32_mnist diff --git a/config/model/clip-vit/clip-vit-base-patch32_oxford-iiit-pet.yaml b/config/model/clip-vit/clip-vit-base-patch32_oxford-iiit-pet.yaml new file mode 100644 index 00000000..3530fe5c --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_oxford-iiit-pet.yaml @@ -0,0 +1 @@ +oxford-iiit-pet: tanganke/clip-vit-base-patch32_oxford-iiit-pet diff --git a/config/model/clip-vit/clip-vit-base-patch32_oxford_flowers102.yaml b/config/model/clip-vit/clip-vit-base-patch32_oxford_flowers102.yaml new file mode 100644 index 00000000..67f05e01 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_oxford_flowers102.yaml @@ -0,0 +1 @@ +oxford_flowers102: tanganke/clip-vit-base-patch32_oxford_flowers102 diff --git a/config/model/clip-vit/clip-vit-base-patch32_pcam.yaml b/config/model/clip-vit/clip-vit-base-patch32_pcam.yaml new file mode 100644 index 00000000..79ab7f63 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_pcam.yaml @@ -0,0 +1 @@ +pcam: tanganke/clip-vit-base-patch32_pcam diff --git a/config/model/clip-vit/clip-vit-base-patch32_rendered-sst2.yaml b/config/model/clip-vit/clip-vit-base-patch32_rendered-sst2.yaml new file mode 100644 index 00000000..e25d9b79 --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_rendered-sst2.yaml @@ -0,0 +1 @@ +rendered-sst2: tanganke/clip-vit-base-patch32_rendered-sst2 diff --git a/config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml b/config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml index e76cdbbf..3c512d8d 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml @@ -1,3 +1 @@ -resisc45: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_resisc45 +resisc45: tanganke/clip-vit-base-patch32_resisc45 diff --git a/config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml b/config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml index 3752cc05..9f73d1e6 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml @@ -1,3 +1 @@ -stanford-cars: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_stanford-cars +stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars diff --git a/config/model/clip-vit/clip-vit-base-patch32_stl10.yaml b/config/model/clip-vit/clip-vit-base-patch32_stl10.yaml new file mode 100644 index 00000000..699735ac --- /dev/null +++ b/config/model/clip-vit/clip-vit-base-patch32_stl10.yaml @@ -0,0 +1 @@ +stl10: tanganke/clip-vit-base-patch32_stl10 diff --git a/config/model/clip-vit/clip-vit-base-patch32_sun397.yaml b/config/model/clip-vit/clip-vit-base-patch32_sun397.yaml index b523033b..f2e936da 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_sun397.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_sun397.yaml @@ -1,3 +1 @@ -sun397: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_sun397 +sun397: tanganke/clip-vit-base-patch32_sun397 diff --git a/config/model/clip-vit/clip-vit-base-patch32_svhn.yaml b/config/model/clip-vit/clip-vit-base-patch32_svhn.yaml index 37aa5d19..c83a2e81 100644 --- a/config/model/clip-vit/clip-vit-base-patch32_svhn.yaml +++ b/config/model/clip-vit/clip-vit-base-patch32_svhn.yaml @@ -1,3 +1 @@ -svhn: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-base-patch32_svhn +svhn: tanganke/clip-vit-base-patch32_svhn diff --git a/config/model/clip-vit/clip-vit-large-patch14.yaml b/config/model/clip-vit/clip-vit-large-patch14.yaml index ba44d38e..c51bf2d8 100644 --- a/config/model/clip-vit/clip-vit-large-patch14.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14.yaml @@ -1,3 +1 @@ -_pretrained_: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: openai/clip-vit-large-patch14 +_pretrained_: openai/clip-vit-large-patch14 \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_TALL14.yaml b/config/model/clip-vit/clip-vit-large-patch14_TALL14.yaml new file mode 100644 index 00000000..48594b9b --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_TALL14.yaml @@ -0,0 +1,22 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # pre-trained model + - clip-vit-large-patch14 + # eight tasks in the task arithmetic paper + - clip-vit-large-patch14_sun397 + - clip-vit-large-patch14_stanford-cars + - clip-vit-large-patch14_resisc45 + - clip-vit-large-patch14_eurosat + - clip-vit-large-patch14_svhn + - clip-vit-large-patch14_gtsrb + - clip-vit-large-patch14_mnist + - clip-vit-large-patch14_dtd + # additional 6 tasks in the TALL mask paper + - clip-vit-large-patch14_oxford_flowers102 + - clip-vit-large-patch14_pcam + - clip-vit-large-patch14_fer2013 + - clip-vit-large-patch14_oxford-iiit-pet + - clip-vit-large-patch14_stl10 + - clip-vit-large-patch14_cifar100 diff --git a/config/model/clip-vit/clip-vit-large-patch14_TALL20.yaml b/config/model/clip-vit/clip-vit-large-patch14_TALL20.yaml new file mode 100644 index 00000000..2520f574 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_TALL20.yaml @@ -0,0 +1,29 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + # pre-trained model + - clip-vit-large-patch14 + # eight tasks in the task arithmetic paper + - clip-vit-large-patch14_sun397 + - clip-vit-large-patch14_stanford-cars + - clip-vit-large-patch14_resisc45 + - clip-vit-large-patch14_eurosat + - clip-vit-large-patch14_svhn + - clip-vit-large-patch14_gtsrb + - clip-vit-large-patch14_mnist + - clip-vit-large-patch14_dtd + # additional 6 tasks in the TALL mask paper (TALL 14) + - clip-vit-large-patch14_oxford_flowers102 + - clip-vit-large-patch14_pcam + - clip-vit-large-patch14_fer2013 + - clip-vit-large-patch14_oxford-iiit-pet + - clip-vit-large-patch14_stl10 + - clip-vit-large-patch14_cifar100 + # additional 6 tasks in the TALL mask paper (TALL 20) + - clip-vit-large-patch14_cifar10 + - clip-vit-large-patch14_food101 + - clip-vit-large-patch14_fashion_mnist + - clip-vit-large-patch14_emnist_letters + - clip-vit-large-patch14_kmnist + - clip-vit-large-patch14_rendered-sst2 diff --git a/config/model/clip-vit/clip-vit-large-patch14_cifar10.yaml b/config/model/clip-vit/clip-vit-large-patch14_cifar10.yaml new file mode 100644 index 00000000..d949b1e2 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_cifar10.yaml @@ -0,0 +1 @@ +cifar10: tanganke/clip-vit-large-patch14_cifar10 diff --git a/config/model/clip-vit/clip-vit-large-patch14_cifar100.yaml b/config/model/clip-vit/clip-vit-large-patch14_cifar100.yaml new file mode 100644 index 00000000..1c0b6ba6 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_cifar100.yaml @@ -0,0 +1 @@ +cifar100: tanganke/clip-vit-large-patch14_cifar100 diff --git a/config/model/clip-vit/clip-vit-large-patch14_dtd.yaml b/config/model/clip-vit/clip-vit-large-patch14_dtd.yaml index 54cff74a..0f233255 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_dtd.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_dtd.yaml @@ -1,3 +1 @@ -dtd: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_dtd +dtd: tanganke/clip-vit-large-patch14_dtd diff --git a/config/model/clip-vit/clip-vit-large-patch14_emnist_letters.yaml b/config/model/clip-vit/clip-vit-large-patch14_emnist_letters.yaml new file mode 100644 index 00000000..1d5609d0 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_emnist_letters.yaml @@ -0,0 +1 @@ +emnist_letters: tanganke/clip-vit-large-patch14_emnist_letters diff --git a/config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml b/config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml index 07a4e319..ba28a7b8 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml @@ -1,3 +1 @@ -eurosat: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_eurosat +eurosat: tanganke/clip-vit-large-patch14_eurosat diff --git a/config/model/clip-vit/clip-vit-large-patch14_fashion_mnist.yaml b/config/model/clip-vit/clip-vit-large-patch14_fashion_mnist.yaml new file mode 100644 index 00000000..5f7462e6 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_fashion_mnist.yaml @@ -0,0 +1 @@ +fashion_mnist: tanganke/clip-vit-large-patch14_fashion_mnist diff --git a/config/model/clip-vit/clip-vit-large-patch14_fer2013.yaml b/config/model/clip-vit/clip-vit-large-patch14_fer2013.yaml new file mode 100644 index 00000000..e1206ac7 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_fer2013.yaml @@ -0,0 +1 @@ +fer2013: tanganke/clip-vit-large-patch14_fer2013 diff --git a/config/model/clip-vit/clip-vit-large-patch14_food101.yaml b/config/model/clip-vit/clip-vit-large-patch14_food101.yaml new file mode 100644 index 00000000..f7831147 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_food101.yaml @@ -0,0 +1 @@ +food101: tanganke/clip-vit-large-patch14_food101 diff --git a/config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml b/config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml index deed597b..044e9b10 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml @@ -1,3 +1 @@ -gtsrb: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_gtsrb +gtsrb: tanganke/clip-vit-large-patch14_gtsrb diff --git a/config/model/clip-vit/clip-vit-large-patch14_kmnist.yaml b/config/model/clip-vit/clip-vit-large-patch14_kmnist.yaml new file mode 100644 index 00000000..e7e3687e --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_kmnist.yaml @@ -0,0 +1 @@ +kmnist: tanganke/clip-vit-large-patch14_kmnist diff --git a/config/model/clip-vit/clip-vit-large-patch14_mnist.yaml b/config/model/clip-vit/clip-vit-large-patch14_mnist.yaml index 427fff3e..42f519e6 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_mnist.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_mnist.yaml @@ -1,3 +1 @@ -mnist: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_mnist +mnist: tanganke/clip-vit-large-patch14_mnist diff --git a/config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml b/config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml new file mode 100644 index 00000000..35cdf582 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml @@ -0,0 +1 @@ +oxford-iiit-pet: tanganke/clip-vit-large-patch14_oxford-iiit-pet \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml b/config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml new file mode 100644 index 00000000..b2d35e0d --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml @@ -0,0 +1 @@ +oxford_flowers102: tanganke/clip-vit-large-patch14_oxford_flowers102 \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_pcam.yaml b/config/model/clip-vit/clip-vit-large-patch14_pcam.yaml new file mode 100644 index 00000000..2101b7b9 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_pcam.yaml @@ -0,0 +1 @@ +pcam: tanganke/clip-vit-large-patch14_pcam \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml b/config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml new file mode 100644 index 00000000..549ed392 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml @@ -0,0 +1 @@ +rendered-sst2: tanganke/clip-vit-large-patch14_rendered-sst2 \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml b/config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml index 06e4189d..ed4dc34d 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml @@ -1,3 +1 @@ -resisc45: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_resisc45 +resisc45: tanganke/clip-vit-large-patch14_resisc45 \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml b/config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml index 4368e6b6..ab6fb550 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml @@ -1,3 +1 @@ -stanford-cars: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_stanford-cars +stanford-cars: tanganke/clip-vit-large-patch14_stanford-cars \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_stl10.yaml b/config/model/clip-vit/clip-vit-large-patch14_stl10.yaml new file mode 100644 index 00000000..b98a0b92 --- /dev/null +++ b/config/model/clip-vit/clip-vit-large-patch14_stl10.yaml @@ -0,0 +1 @@ +stl10: tanganke/clip-vit-large-patch14_stl10 \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_sun397.yaml b/config/model/clip-vit/clip-vit-large-patch14_sun397.yaml index 4374b511..c9989ed7 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_sun397.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_sun397.yaml @@ -1,3 +1 @@ -sun397: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_sun397 +sun397: tanganke/clip-vit-large-patch14_sun397 \ No newline at end of file diff --git a/config/model/clip-vit/clip-vit-large-patch14_svhn.yaml b/config/model/clip-vit/clip-vit-large-patch14_svhn.yaml index 1051f013..41dec111 100644 --- a/config/model/clip-vit/clip-vit-large-patch14_svhn.yaml +++ b/config/model/clip-vit/clip-vit-large-patch14_svhn.yaml @@ -1,3 +1 @@ -svhn: - _target_: transformers.CLIPVisionModel.from_pretrained - pretrained_model_name_or_path: tanganke/clip-vit-large-patch14_svhn +svhn: tanganke/clip-vit-large-patch14_svhn \ No newline at end of file diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_model_only.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_model_only.yaml new file mode 100644 index 00000000..afa6a6d9 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_model_only.yaml @@ -0,0 +1,6 @@ +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch16_eight_tasks +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-base-patch16 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14.yaml new file mode 100644 index 00000000..f17914b6 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14.yaml @@ -0,0 +1,11 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch16_TALL14 + - /dataset/image_classification/train@train_datasets: TALL14 + - /dataset/image_classification/test@test_datasets: TALL14 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-base-patch16 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14_model_only.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14_model_only.yaml new file mode 100644 index 00000000..b70e971f --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14_model_only.yaml @@ -0,0 +1,9 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch16_TALL14 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-base-patch16 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20.yaml new file mode 100644 index 00000000..c0fcf5b4 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20.yaml @@ -0,0 +1,11 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch16_TALL20 + - /dataset/image_classification/train@train_datasets: TALL20 + - /dataset/image_classification/test@test_datasets: TALL20 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-base-patch16 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml new file mode 100644 index 00000000..b1ac3768 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml @@ -0,0 +1,9 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch16_TALL20 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-base-patch16 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml index 7f0d73ff..aeeddf4f 100644 --- a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml @@ -1,7 +1,19 @@ +# This is useful for evluate the performance of a single clip vision model +# +# fusion_bench \ +# modelpool=CLIPVisionModelPool/clip-vit-base-patch16_individual \ +# modelpool.base_model=${MODEL_PATH} +# ... defaults: - CLIPVisionModelPool@: _template - - /model/clip-vit@models: - - clip-vit-base-patch16 + +models: + _pretrained_: + _target_: transformers.CLIPVisionModel.from_pretrained + pretrained_model_name_or_path: ${...base_model} + processor: _target_: transformers.CLIPProcessor.from_pretrained - pretrained_model_name_or_path: openai/clip-vit-base-patch16 + pretrained_model_name_or_path: ${..base_model} + +base_model: openai/clip-vit-base-patch16 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14.yaml new file mode 100644 index 00000000..908678b5 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14.yaml @@ -0,0 +1,8 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch32_TALL14 + - /dataset/image_classification/train@train_datasets: TALL14 + - /dataset/image_classification/test@test_datasets: TALL14 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14_model_only.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14_model_only.yaml new file mode 100644 index 00000000..b6ed15fe --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14_model_only.yaml @@ -0,0 +1,6 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch32_TALL14 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20.yaml new file mode 100644 index 00000000..fa85bff8 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20.yaml @@ -0,0 +1,8 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch32_TALL20 + - /dataset/image_classification/train@train_datasets: TALL20 + - /dataset/image_classification/test@test_datasets: TALL20 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20_model_only.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20_model_only.yaml new file mode 100644 index 00000000..0e4d3780 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20_model_only.yaml @@ -0,0 +1,6 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-base-patch32_TALL20 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml index 712e547d..2be5bc8d 100644 --- a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml @@ -1,7 +1,13 @@ defaults: - CLIPVisionModelPool@: _template - - /model/clip-vit@models: - - clip-vit-base-patch32 + +models: + _pretrained_: + _target_: transformers.CLIPVisionModel.from_pretrained + pretrained_model_name_or_path: ${...base_model} + processor: _target_: transformers.CLIPProcessor.from_pretrained - pretrained_model_name_or_path: openai/clip-vit-base-patch32 + pretrained_model_name_or_path: ${..base_model} + +base_model: openai/clip-vit-base-patch32 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml new file mode 100644 index 00000000..694faad1 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml @@ -0,0 +1,15 @@ +defaults: + - /model/clip-vit@models: + - clip-vit-base-patch32 + - clip-vit-base-patch32_sun397 + - clip-vit-base-patch32_stanford-cars + +_target_: fusion_bench.modelpool.CLIPVisionModelPool +_recursive_: false + +train_datasets: null +test_datasets: null + +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-base-patch32 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14.yaml new file mode 100644 index 00000000..8223bb21 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14.yaml @@ -0,0 +1,11 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-large-patch14_TALL14 + - /dataset/image_classification/train@train_datasets: TALL14 + - /dataset/image_classification/test@test_datasets: TALL14 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-large-patch14 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_model_only.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_model_only.yaml new file mode 100644 index 00000000..b2a63810 --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_model_only.yaml @@ -0,0 +1,9 @@ +# The 14 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-large-patch14_TALL14 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-large-patch14 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml new file mode 100644 index 00000000..08027edf --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml @@ -0,0 +1,11 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-large-patch14_TALL20 + - /dataset/image_classification/train@train_datasets: TALL20 + - /dataset/image_classification/test@test_datasets: TALL20 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-large-patch14 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml new file mode 100644 index 00000000..841345fd --- /dev/null +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml @@ -0,0 +1,9 @@ +# The 20 task used in the paper: +# Wang et al. Localizing Task Information for Improved Model Merging and Compression +# http://arxiv.org/abs/2405.07813 +defaults: + - CLIPVisionModelPool@: _template + - /model/clip-vit@models: clip-vit-large-patch14_TALL20 +processor: + _target_: transformers.CLIPProcessor.from_pretrained + pretrained_model_name_or_path: openai/clip-vit-large-patch14 diff --git a/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml index c1ed44b3..58367ac6 100644 --- a/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +++ b/config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml @@ -1,7 +1,19 @@ +# This is useful for evluate the performance of a single clip vision model +# +# fusion_bench \ +# modelpool=CLIPVisionModelPool/clip-vit-large-patch14_individual \ +# modelpool.base_model=${MODEL_PATH} +# ... defaults: - CLIPVisionModelPool@: _template - - /model/clip-vit@models: - - clip-vit-large-patch14 + +models: + _pretrained_: + _target_: transformers.CLIPVisionModel.from_pretrained + pretrained_model_name_or_path: ${...base_model} + processor: _target_: transformers.CLIPProcessor.from_pretrained - pretrained_model_name_or_path: openai/clip-vit-large-patch14 + pretrained_model_name_or_path: ${..base_model} + +base_model: openai/clip-vit-large-patch14 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted.yaml new file mode 100644 index 00000000..dd7b285a --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted.yaml @@ -0,0 +1,27 @@ +type: clip_vit_classification +name: clip-vit-robustness_clean +# corrption can be one of: +# contrast, gaussian_noise, impulse_noise, jpeg_compression, motion_blur, pixelate, spatter +corruption: ${corruption} +dataset_type: huggingface_image_classification +tasks: + - name: stanford_cars + dataset: + name: tanganke/stanford_cars + split: ${taskpool.corruption} + - name: eurosat + dataset: + name: tanganke/eurosat + split: ${taskpool.corruption} + - name: resisc45 + dataset: + name: tanganke/resisc45 + split: ${taskpool.corruption} + - name: gtsrb + dataset: + name: tanganke/gtsrb + split: ${taskpool.corruption} +clip_model: openai/clip-vit-base-patch32 +batch_size: 128 +num_workers: 16 +fast_dev_run: ${fast_dev_run} diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL14.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL14.yaml new file mode 100644 index 00000000..84e0a0ec --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL14.yaml @@ -0,0 +1,19 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: + # eight tasks in the task arithmetic paper + - sun397 + - stanford-cars + - resisc45 + - eurosat + - svhn + - gtsrb + - mnist + - dtd + # additional 6 tasks in the TALL mask paper (TALL 14) + - oxford_flowers102 + - pcam + - fer2013 + - oxford-iiit-pet + - stl10 + - cifar100 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL20.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL20.yaml new file mode 100644 index 00000000..99a9db9a --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL20.yaml @@ -0,0 +1,26 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: + # eight tasks in the task arithmetic paper + - sun397 + - stanford-cars + - resisc45 + - eurosat + - svhn + - gtsrb + - mnist + - dtd + # additional 6 tasks in the TALL mask paper (TALL 14) + - oxford_flowers102 + - pcam + - fer2013 + - oxford-iiit-pet + - stl10 + - cifar100 + # additional 6 tasks in the TALL mask paper (TALL 20) + - cifar10 + - food101 + - fashion_mnist + - emnist_letters + - kmnist + - rendered-sst2 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar10.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar10.yaml new file mode 100644 index 00000000..696acd68 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar10.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: cifar10 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar100.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar100.yaml new file mode 100644 index 00000000..89b8f6ec --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar100.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: cifar100 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_dtd.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_dtd.yaml new file mode 100644 index 00000000..feab0918 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_dtd.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: dtd diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_emnist_letters.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_emnist_letters.yaml new file mode 100644 index 00000000..45d3ac3e --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_emnist_letters.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: emnist_letters diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_eurosat.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_eurosat.yaml new file mode 100644 index 00000000..6356ea43 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_eurosat.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: eurosat diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fashion_mnist.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fashion_mnist.yaml new file mode 100644 index 00000000..f8c2f5c1 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fashion_mnist.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: fashion_mnist diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fer2013.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fer2013.yaml new file mode 100644 index 00000000..5c0bae52 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fer2013.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: fer2013 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_food101.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_food101.yaml new file mode 100644 index 00000000..5845b58c --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_food101.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: food101 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_gtsrb.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_gtsrb.yaml new file mode 100644 index 00000000..dfbced87 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_gtsrb.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: gtsrb diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_kmnist.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_kmnist.yaml new file mode 100644 index 00000000..2bd634ba --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_kmnist.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: kmnist diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_mnist.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_mnist.yaml new file mode 100644 index 00000000..e12e72cb --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_mnist.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: mnist diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford-iiit-pet.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford-iiit-pet.yaml new file mode 100644 index 00000000..db89e073 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford-iiit-pet.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: oxford-iiit-pet diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102.yaml new file mode 100644 index 00000000..92033927 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: oxford_flowers102 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102_val.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102_val.yaml new file mode 100644 index 00000000..e2a63817 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102_val.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/val@test_datasets: oxford_flowers102 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_pcam.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_pcam.yaml new file mode 100644 index 00000000..9b85cbf0 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_pcam.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: pcam diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_rendered-sst2.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_rendered-sst2.yaml new file mode 100644 index 00000000..5be18240 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_rendered-sst2.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: rendered-sst2 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_resisc45.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_resisc45.yaml new file mode 100644 index 00000000..90a5dfe4 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_resisc45.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: resisc45 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stanford-cars.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stanford-cars.yaml new file mode 100644 index 00000000..37439c5a --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stanford-cars.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: stanford-cars diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stl10.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stl10.yaml new file mode 100644 index 00000000..012ce382 --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stl10.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: stl10 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397.yaml new file mode 100644 index 00000000..34e00c7f --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: sun397 diff --git a/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml new file mode 100644 index 00000000..557fb63e --- /dev/null +++ b/config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml @@ -0,0 +1,3 @@ +defaults: + - CLIPVisionModelTaskPool@: _template + - /dataset/image_classification/test@test_datasets: svhn diff --git a/fusion_bench/mixins/clip_classification.py b/fusion_bench/mixins/clip_classification.py index ff2d8649..c000d7ff 100644 --- a/fusion_bench/mixins/clip_classification.py +++ b/fusion_bench/mixins/clip_classification.py @@ -132,13 +132,13 @@ def setup_zero_shot_classification_head( # get cache directory if self.modelpool.has_pretrained: - model_name = self.modelpool.get_model_config( - "_pretrained_" - ).pretrained_model_name_or_path + model_name = self.modelpool.get_model_config("_pretrained_") + if not isinstance(model_name, str): + model_name = model_name.pretrained_model_name_or_path else: - model_name = self.modelpool.get_model_config( - self.modelpool.model_names[0] - ).pretrained_model_name_or_path + model_name = self.modelpool.get_model_config(self.modelpool.model_names[0]) + if not isinstance(model_name, str): + model_name = model_name.pretrained_model_name_or_path cache_dir = os.path.join( self.zeroshot_weights_cache_dir, os.path.normpath(model_name.split("/")[-1]), diff --git a/fusion_bench/modelpool/base_pool.py b/fusion_bench/modelpool/base_pool.py index 18a1a567..948622f8 100644 --- a/fusion_bench/modelpool/base_pool.py +++ b/fusion_bench/modelpool/base_pool.py @@ -147,7 +147,6 @@ def get_model_config(self, model_name: str, return_copy: bool = True) -> DictCon DictConfig: The configuration for the specified model. """ model_config = self._models[model_name] - assert isinstance(model_config, DictConfig), "Model config must be a DictConfig" if return_copy: model_config = deepcopy(model_config) return model_config diff --git a/fusion_bench/modelpool/clip_vision/modelpool.py b/fusion_bench/modelpool/clip_vision/modelpool.py index 1bea814e..34826fa8 100644 --- a/fusion_bench/modelpool/clip_vision/modelpool.py +++ b/fusion_bench/modelpool/clip_vision/modelpool.py @@ -1,8 +1,11 @@ import logging from copy import deepcopy -from typing import Optional +from typing import Optional, Union +from datasets import load_dataset from omegaconf import DictConfig, open_dict +from torch import nn +from torch.utils.data import Dataset from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel from typing_extensions import override @@ -36,17 +39,29 @@ def __init__( def load_processor(self, *args, **kwargs) -> CLIPProcessor: assert self._processor is not None, "Processor is not defined in the config" - processor = instantiate(self._processor, *args, **kwargs) + if isinstance(self._processor, str): + log.info(f"Loading `transformers.CLIPProcessor`: {self._processor}") + processor = CLIPProcessor.from_pretrained(self._processor) + else: + processor = instantiate(self._processor, *args, **kwargs) return processor def load_clip_model(self, model_name: str, *args, **kwargs) -> CLIPModel: model_config = self._models[model_name] - assert isinstance(model_config, DictConfig), "Model config must be a DictConfig" - model_config = deepcopy(model_config) - with open_dict(model_config): - model_config._target_ = "transformers.CLIPModel.from_pretrained" - clip_model = instantiate(model_config, *args, **kwargs) - return clip_model + + if isinstance(model_config, str): + log.info(f"Loading `transformers.CLIPModel`: {model_config}") + clip_model = CLIPModel.from_pretrained(model_config, *args, **kwargs) + return clip_model + else: + assert isinstance( + model_config, DictConfig + ), "Model config must be a DictConfig" + model_config = deepcopy(model_config) + with open_dict(model_config): + model_config._target_ = "transformers.CLIPModel.from_pretrained" + clip_model = instantiate(model_config, *args, **kwargs) + return clip_model @override def save_model(self, model: CLIPVisionModel, path: str): @@ -59,3 +74,72 @@ def save_model(self, model: CLIPVisionModel, path: str): """ with timeit_context(f'Saving clip vision model to "{path}"'): model.save_pretrained(path) + + def load_model( + self, model_name_or_config: Union[str, DictConfig], *args, **kwargs + ) -> CLIPVisionModel: + """ + This method is used to load a CLIPVisionModel from the model pool. + + Example configuration could be: + + ```yaml + models: + cifar10: tanganke/clip-vit-base-patch32_cifar10 + sun397: tanganke/clip-vit-base-patch32_sun397 + stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars + ``` + + Args: + model_name_or_config (Union[str, DictConfig]): The name of the model or the model configuration. + + Returns: + CLIPVisionModel: The loaded CLIPVisionModel. + """ + if ( + isinstance(model_name_or_config, str) + and model_name_or_config in self._models + ): + model = self._models[model_name_or_config] + if isinstance(model, str): + log.info(f"Loading `transformers.CLIPVisionModel`: {model}") + return CLIPVisionModel.from_pretrained(model, *args, **kwargs) + if isinstance(model, nn.Module): + log.info(f"Returning existing model: {model}") + return model + + # If the model is not a string, we use the default load_model method + return super().load_model(model_name_or_config, *args, **kwargs) + + def load_train_dataset(self, dataset_name: str, *args, **kwargs): + dataset_config = self._train_datasets[dataset_name] + if isinstance(dataset_config, str): + log.info( + f"Loading train dataset using `datasets.load_dataset`: {dataset_config}" + ) + dataset = load_dataset(dataset_config, split="train") + else: + dataset = super().load_train_dataset(dataset_name, *args, **kwargs) + return dataset + + def load_val_dataset(self, dataset_name: str, *args, **kwargs): + dataset_config = self._val_datasets[dataset_name] + if isinstance(dataset_config, str): + log.info( + f"Loading validation dataset using `datasets.load_dataset`: {dataset_config}" + ) + dataset = load_dataset(dataset_config, split="validation") + else: + dataset = super().load_val_dataset(dataset_name, *args, **kwargs) + return dataset + + def load_test_dataset(self, dataset_name: str, *args, **kwargs): + dataset_config = self._test_datasets[dataset_name] + if isinstance(dataset_config, str): + log.info( + f"Loading test dataset using `datasets.load_dataset`: {dataset_config}" + ) + dataset = load_dataset(dataset_config, split="test") + else: + dataset = super().load_test_dataset(dataset_name, *args, **kwargs) + return dataset diff --git a/fusion_bench/tasks/clip_classification/__init__.py b/fusion_bench/tasks/clip_classification/__init__.py index 82f91661..2b982e51 100644 --- a/fusion_bench/tasks/clip_classification/__init__.py +++ b/fusion_bench/tasks/clip_classification/__init__.py @@ -58,11 +58,24 @@ class CLIPTemplateFactory: "templates": "templates", }, "nateraw/rendered-sst2": ".rendered_sst2", + "rendered-sst2": ".rendered_sst2", "tanganke/stl10": ".stl10", + "stl10": ".stl10", "dpdl-benchmark/oxford_flowers102": ".flower102", + "oxford_flowers102": ".flower102", "timm/oxford-iiit-pet": ".oxford_iiit_pet", + "oxford-iiit-pet": ".oxford_iiit_pet", "imagenet": ".imagenet", "tiny-imagenet": ".tiny_imagenet", + "pcam": ".pcam", + "fer2013": ".fer2013", + "emnist_mnist": ".emnist_mnist", + "emnist_letters": ".emnist_letters", + "kmnist": ".kmnist", + "food101": ".food101", + "fashion_mnist": ".fashion_mnist", + "cub-200-2011": ".cub_200_2011", + "mango-leaf-disease": ".mango_leaf_disease", } @staticmethod diff --git a/fusion_bench/tasks/clip_classification/clip_dataset.py b/fusion_bench/tasks/clip_classification/clip_dataset.py index f6a8dd85..508cf359 100644 --- a/fusion_bench/tasks/clip_classification/clip_dataset.py +++ b/fusion_bench/tasks/clip_classification/clip_dataset.py @@ -1,16 +1 @@ -import torch - - -class CLIPDataset(torch.utils.data.Dataset): - def __init__(self, dataset, processor): - self.dataset = dataset - self.processor = processor - - def __len__(self): - return len(self.dataset) - - def __getitem__(self, idx): - item = self.dataset[idx] - image = item["image"] - inputs = self.processor(images=[image], return_tensors="pt")["pixel_values"][0] - return inputs, item["label"] +from fusion_bench.dataset.clip_dataset import CLIPDataset diff --git a/fusion_bench/tasks/clip_classification/cub_200_2011.py b/fusion_bench/tasks/clip_classification/cub_200_2011.py new file mode 100644 index 00000000..bda57044 --- /dev/null +++ b/fusion_bench/tasks/clip_classification/cub_200_2011.py @@ -0,0 +1,208 @@ +classname_mapping = { + "0": "Black_footed_Albatross", + "1": "Laysan_Albatross", + "2": "Sooty_Albatross", + "3": "Groove_billed_Ani", + "4": "Crested_Auklet", + "5": "Least_Auklet", + "6": "Parakeet_Auklet", + "7": "Rhinoceros_Auklet", + "8": "Brewer_Blackbird", + "9": "Red_winged_Blackbird", + "10": "Rusty_Blackbird", + "11": "Yellow_headed_Blackbird", + "12": "Bobolink", + "13": "Indigo_Bunting", + "14": "Lazuli_Bunting", + "15": "Painted_Bunting", + "16": "Cardinal", + "17": "Spotted_Catbird", + "18": "Gray_Catbird", + "19": "Yellow_breasted_Chat", + "20": "Eastern_Towhee", + "21": "Chuck_will_Widow", + "22": "Brandt_Cormorant", + "23": "Red_faced_Cormorant", + "24": "Pelagic_Cormorant", + "25": "Bronzed_Cowbird", + "26": "Shiny_Cowbird", + "27": "Brown_Creeper", + "28": "American_Crow", + "29": "Fish_Crow", + "30": "Black_billed_Cuckoo", + "31": "Mangrove_Cuckoo", + "32": "Yellow_billed_Cuckoo", + "33": "Gray_crowned_Rosy_Finch", + "34": "Purple_Finch", + "35": "Northern_Flicker", + "36": "Acadian_Flycatcher", + "37": "Great_Crested_Flycatcher", + "38": "Least_Flycatcher", + "39": "Olive_sided_Flycatcher", + "40": "Scissor_tailed_Flycatcher", + "41": "Vermilion_Flycatcher", + "42": "Yellow_bellied_Flycatcher", + "43": "Frigatebird", + "44": "Northern_Fulmar", + "45": "Gadwall", + "46": "American_Goldfinch", + "47": "European_Goldfinch", + "48": "Boat_tailed_Grackle", + "49": "Eared_Grebe", + "50": "Horned_Grebe", + "51": "Pied_billed_Grebe", + "52": "Western_Grebe", + "53": "Blue_Grosbeak", + "54": "Evening_Grosbeak", + "55": "Pine_Grosbeak", + "56": "Rose_breasted_Grosbeak", + "57": "Pigeon_Guillemot", + "58": "California_Gull", + "59": "Glaucous_winged_Gull", + "60": "Heermann_Gull", + "61": "Herring_Gull", + "62": "Ivory_Gull", + "63": "Ring_billed_Gull", + "64": "Slaty_backed_Gull", + "65": "Western_Gull", + "66": "Anna_Hummingbird", + "67": "Ruby_throated_Hummingbird", + "68": "Rufous_Hummingbird", + "69": "Green_Violetear", + "70": "Long_tailed_Jaeger", + "71": "Pomarine_Jaeger", + "72": "Blue_Jay", + "73": "Florida_Jay", + "74": "Green_Jay", + "75": "Dark_eyed_Junco", + "76": "Tropical_Kingbird", + "77": "Gray_Kingbird", + "78": "Belted_Kingfisher", + "79": "Green_Kingfisher", + "80": "Pied_Kingfisher", + "81": "Ringed_Kingfisher", + "82": "White_breasted_Kingfisher", + "83": "Red_legged_Kittiwake", + "84": "Horned_Lark", + "85": "Pacific_Loon", + "86": "Mallard", + "87": "Western_Meadowlark", + "88": "Hooded_Merganser", + "89": "Red_breasted_Merganser", + "90": "Mockingbird", + "91": "Nighthawk", + "92": "Clark_Nutcracker", + "93": "White_breasted_Nuthatch", + "94": "Baltimore_Oriole", + "95": "Hooded_Oriole", + "96": "Orchard_Oriole", + "97": "Scott_Oriole", + "98": "Ovenbird", + "99": "Brown_Pelican", + "100": "White_Pelican", + "101": "Western_Wood_Pewee", + "102": "Sayornis", + "103": "American_Pipit", + "104": "Whip_poor_Will", + "105": "Horned_Puffin", + "106": "Common_Raven", + "107": "White_necked_Raven", + "108": "American_Redstart", + "109": "Geococcyx", + "110": "Loggerhead_Shrike", + "111": "Great_Grey_Shrike", + "112": "Baird_Sparrow", + "113": "Black_throated_Sparrow", + "114": "Brewer_Sparrow", + "115": "Chipping_Sparrow", + "116": "Clay_colored_Sparrow", + "117": "House_Sparrow", + "118": "Field_Sparrow", + "119": "Fox_Sparrow", + "120": "Grasshopper_Sparrow", + "121": "Harris_Sparrow", + "122": "Henslow_Sparrow", + "123": "Le_Conte_Sparrow", + "124": "Lincoln_Sparrow", + "125": "Nelson_Sharp_tailed_Sparrow", + "126": "Savannah_Sparrow", + "127": "Seaside_Sparrow", + "128": "Song_Sparrow", + "129": "Tree_Sparrow", + "130": "Vesper_Sparrow", + "131": "White_crowned_Sparrow", + "132": "White_throated_Sparrow", + "133": "Cape_Glossy_Starling", + "134": "Bank_Swallow", + "135": "Barn_Swallow", + "136": "Cliff_Swallow", + "137": "Tree_Swallow", + "138": "Scarlet_Tanager", + "139": "Summer_Tanager", + "140": "Artic_Tern", + "141": "Black_Tern", + "142": "Caspian_Tern", + "143": "Common_Tern", + "144": "Elegant_Tern", + "145": "Forsters_Tern", + "146": "Least_Tern", + "147": "Green_tailed_Towhee", + "148": "Brown_Thrasher", + "149": "Sage_Thrasher", + "150": "Black_capped_Vireo", + "151": "Blue_headed_Vireo", + "152": "Philadelphia_Vireo", + "153": "Red_eyed_Vireo", + "154": "Warbling_Vireo", + "155": "White_eyed_Vireo", + "156": "Yellow_throated_Vireo", + "157": "Bay_breasted_Warbler", + "158": "Black_and_white_Warbler", + "159": "Black_throated_Blue_Warbler", + "160": "Blue_winged_Warbler", + "161": "Canada_Warbler", + "162": "Cape_May_Warbler", + "163": "Cerulean_Warbler", + "164": "Chestnut_sided_Warbler", + "165": "Golden_winged_Warbler", + "166": "Hooded_Warbler", + "167": "Kentucky_Warbler", + "168": "Magnolia_Warbler", + "169": "Mourning_Warbler", + "170": "Myrtle_Warbler", + "171": "Nashville_Warbler", + "172": "Orange_crowned_Warbler", + "173": "Palm_Warbler", + "174": "Pine_Warbler", + "175": "Prairie_Warbler", + "176": "Prothonotary_Warbler", + "177": "Swainson_Warbler", + "178": "Tennessee_Warbler", + "179": "Wilson_Warbler", + "180": "Worm_eating_Warbler", + "181": "Yellow_Warbler", + "182": "Northern_Waterthrush", + "183": "Louisiana_Waterthrush", + "184": "Bohemian_Waxwing", + "185": "Cedar_Waxwing", + "186": "American_Three_toed_Woodpecker", + "187": "Pileated_Woodpecker", + "188": "Red_bellied_Woodpecker", + "189": "Red_cockaded_Woodpecker", + "190": "Red_headed_Woodpecker", + "191": "Downy_Woodpecker", + "192": "Bewick_Wren", + "193": "Cactus_Wren", + "194": "Carolina_Wren", + "195": "House_Wren", + "196": "Marsh_Wren", + "197": "Rock_Wren", + "198": "Winter_Wren", + "199": "Common_Yellowthroat", +} + +classnames = [classname_mapping[str(i)] for i in range(200)] +templates = [ + lambda c: f"a photo of a {c}.", + lambda c: f"a photo of the {c}.", +] diff --git a/fusion_bench/tasks/clip_classification/emnist_letters.py b/fusion_bench/tasks/clip_classification/emnist_letters.py new file mode 100644 index 00000000..de3d6ac5 --- /dev/null +++ b/fusion_bench/tasks/clip_classification/emnist_letters.py @@ -0,0 +1,31 @@ +classnames_mapping = { + "0": "A", + "1": "B", + "2": "C", + "3": "D", + "4": "E", + "5": "F", + "6": "G", + "7": "H", + "8": "I", + "9": "J", + "10": "K", + "11": "L", + "12": "M", + "13": "N", + "14": "O", + "15": "P", + "16": "Q", + "17": "R", + "18": "S", + "19": "T", + "20": "U", + "21": "V", + "22": "W", + "23": "X", + "24": "Y", + "25": "Z", +} + +classnames = [classnames_mapping[str(i)] for i in range(26)] +templates = [lambda c: f'a photo of the digit character: "{c}".'] diff --git a/fusion_bench/tasks/clip_classification/emnist_mnist.py b/fusion_bench/tasks/clip_classification/emnist_mnist.py new file mode 100644 index 00000000..dec1c0fe --- /dev/null +++ b/fusion_bench/tasks/clip_classification/emnist_mnist.py @@ -0,0 +1,5 @@ +# https://huggingface.co/datasets/tanganke/emnist_mnist +classnames = [str(i) for i in range(10)] +templates = [ + lambda c: f'a photo of the number: "{c}".', +] diff --git a/fusion_bench/tasks/clip_classification/fashion_mnist.py b/fusion_bench/tasks/clip_classification/fashion_mnist.py new file mode 100644 index 00000000..12266ef3 --- /dev/null +++ b/fusion_bench/tasks/clip_classification/fashion_mnist.py @@ -0,0 +1,18 @@ +classname_mapping = { + "0": "T - shirt / top", + "1": "Trouser", + "2": "Pullover", + "3": "Dress", + "4": "Coat", + "5": "Sandal", + "6": "Shirt", + "7": "Sneaker", + "8": "Bag", + "9": "Ankle boot", +} +classnames = [classname_mapping[str(i)] for i in range(10)] + +templates = [ + lambda c: f"a photo of a {c}.", + lambda c: f"a photo of the {c}.", +] diff --git a/fusion_bench/tasks/clip_classification/fer2013.py b/fusion_bench/tasks/clip_classification/fer2013.py new file mode 100644 index 00000000..88d31eda --- /dev/null +++ b/fusion_bench/tasks/clip_classification/fer2013.py @@ -0,0 +1,18 @@ +classnames = [ + "angry", + "disgusted", + "fearful", + "happy", + "neutral", + "sad", + "surprised", +] + +templates = [ + lambda c: f"a photo of a {c} looking face.", + lambda c: f"a photo of a face showing the emotion: {c}.", + lambda c: f"a photo of a face looking {c}.", + lambda c: f"a face that looks {c}.", + lambda c: f"they look {c}.", + lambda c: f"look at how {c} they are.", +] diff --git a/fusion_bench/tasks/clip_classification/food101.py b/fusion_bench/tasks/clip_classification/food101.py new file mode 100644 index 00000000..74015beb --- /dev/null +++ b/fusion_bench/tasks/clip_classification/food101.py @@ -0,0 +1,105 @@ +classnames = [ + "apple pie", + "baby back ribs", + "baklava", + "beef carpaccio", + "beef tartare", + "beet salad", + "beignets", + "bibimbap", + "bread pudding", + "breakfast burrito", + "bruschetta", + "caesar salad", + "cannoli", + "caprese salad", + "carrot cake", + "ceviche", + "cheese plate", + "cheesecake", + "chicken curry", + "chicken quesadilla", + "chicken wings", + "chocolate cake", + "chocolate mousse", + "churros", + "clam chowder", + "club sandwich", + "crab cakes", + "creme brulee", + "croque madame", + "cup cakes", + "deviled eggs", + "donuts", + "dumplings", + "edamame", + "eggs benedict", + "escargots", + "falafel", + "filet mignon", + "fish and chips", + "foie gras", + "french fries", + "french onion soup", + "french toast", + "fried calamari", + "fried rice", + "frozen yogurt", + "garlic bread", + "gnocchi", + "greek salad", + "grilled cheese sandwich", + "grilled salmon", + "guacamole", + "gyoza", + "hamburger", + "hot and sour soup", + "hot dog", + "huevos rancheros", + "hummus", + "ice cream", + "lasagna", + "lobster bisque", + "lobster roll sandwich", + "macaroni and cheese", + "macarons", + "miso soup", + "mussels", + "nachos", + "omelette", + "onion rings", + "oysters", + "pad thai", + "paella", + "pancakes", + "panna cotta", + "peking duck", + "pho", + "pizza", + "pork chop", + "poutine", + "prime rib", + "pulled pork sandwich", + "ramen", + "ravioli", + "red velvet cake", + "risotto", + "samosa", + "sashimi", + "scallops", + "seaweed salad", + "shrimp and grits", + "spaghetti bolognese", + "spaghetti carbonara", + "spring rolls", + "steak", + "strawberry shortcake", + "sushi", + "tacos", + "takoyaki", + "tiramisu", + "tuna tartare", + "waffles", +] + +templates = [lambda c: f"a photo of {c}, a type of food."] diff --git a/fusion_bench/tasks/clip_classification/kmnist.py b/fusion_bench/tasks/clip_classification/kmnist.py new file mode 100644 index 00000000..c2407c90 --- /dev/null +++ b/fusion_bench/tasks/clip_classification/kmnist.py @@ -0,0 +1,17 @@ +classnames_mapping = { + "0": "お", + "1": "き", + "2": "す", + "3": "つ", + "4": "な", + "5": "は", + "6": "ま", + "7": "や", + "8": "れ", + "9": "を", +} +classnames = [classnames_mapping[str(c)] for c in range(10)] + +templates = [ + lambda c: f"a photo of the character {c}.", +] diff --git a/fusion_bench/tasks/clip_classification/mongo_leaf_disease.py b/fusion_bench/tasks/clip_classification/mongo_leaf_disease.py new file mode 100644 index 00000000..4dac7c21 --- /dev/null +++ b/fusion_bench/tasks/clip_classification/mongo_leaf_disease.py @@ -0,0 +1,19 @@ +classnames = [ + "Anthracnose", + "Bacterial Canker", + "Cutting Weevil", + "Die Back", + "Gall Midge", + "Healthy", + "Powdery Mildew", + "Sooty Mould", +] + +templates = [ + lambda c: f"a photo of a mango leaf with {c}.", + lambda c: f"a mango leaf showing symptoms of {c}.", + lambda c: f"a close-up photo of {c} on a mango leaf.", + lambda c: f"this mango leaf is affected by {c}.", + lambda c: f"a mango leaf disease identified as {c}.", + lambda c: f"a {c} infection on a mango leaf.", +] diff --git a/fusion_bench/tasks/clip_classification/pcam.py b/fusion_bench/tasks/clip_classification/pcam.py new file mode 100644 index 00000000..aa9eced5 --- /dev/null +++ b/fusion_bench/tasks/clip_classification/pcam.py @@ -0,0 +1,5 @@ +classnames = ["lymph node", "lymph node containing metastatic tumor tissue"] + +templates = [ + lambda c: f"this is a photo of {c}", +] From 7f3c1682c51bb3f198549fc873c2b6864ddf4f15 Mon Sep 17 00:00:00 2001 From: loafei <18996341802@163.com> Date: Sat, 4 Jan 2025 16:35:17 +0800 Subject: [PATCH 3/3] new file: fusion_bench/dataset/fer2013.py --- fusion_bench/dataset/fer2013.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 fusion_bench/dataset/fer2013.py diff --git a/fusion_bench/dataset/fer2013.py b/fusion_bench/dataset/fer2013.py new file mode 100644 index 00000000..3dc719a2 --- /dev/null +++ b/fusion_bench/dataset/fer2013.py @@ -0,0 +1,12 @@ +from datasets import load_dataset + + +def load_fer2013(path: str = "clip-benchmark/wds_fer2013", split: str = "train"): + dataset = load_dataset(path, split=split) + dataset = dataset.remove_columns(["__key__", "__url__"]) + dataset = dataset.rename_columns({"jpg": "image", "cls": "label"}) + return dataset + +if __name__ == "__main__": + dataset = load_fer2013(split="test") + print(dataset)