From e770f0316d2a9b787c9d1440f204fcb65e176682 Mon Sep 17 00:00:00 2001
From: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
Date: Tue, 20 Feb 2024 20:22:08 +0000
Subject: [PATCH] [`pipeline`] Add pool option to image feature extraction
 pipeline (#28985)

* Add pool option

* PR comments - error message and exact outputs check
---
 .../pipelines/image_feature_extraction.py     | 32 +++++++++++++++----
 ...test_pipelines_image_feature_extraction.py | 32 ++++++++++++++++++-
 2 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/src/transformers/pipelines/image_feature_extraction.py b/src/transformers/pipelines/image_feature_extraction.py
index ccfe7c40d7e7..3a361deabd79 100644
--- a/src/transformers/pipelines/image_feature_extraction.py
+++ b/src/transformers/pipelines/image_feature_extraction.py
@@ -14,6 +14,8 @@
         image_processor_kwargs (`dict`, *optional*):
                 Additional dictionary of keyword arguments passed along to the image processor e.g.
                 {"size": {"height": 100, "width": 100}}
+        pool (`bool`, *optional*, defaults to `False`):
+            Whether or not to return the pooled output. If `False`, the model will return the raw hidden states.
     """,
 )
 class ImageFeatureExtractionPipeline(Pipeline):
@@ -41,9 +43,14 @@ class ImageFeatureExtractionPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models).
     """
 
-    def _sanitize_parameters(self, image_processor_kwargs=None, return_tensors=None, **kwargs):
+    def _sanitize_parameters(self, image_processor_kwargs=None, return_tensors=None, pool=None, **kwargs):
         preprocess_params = {} if image_processor_kwargs is None else image_processor_kwargs
-        postprocess_params = {"return_tensors": return_tensors} if return_tensors is not None else {}
+
+        postprocess_params = {}
+        if pool is not None:
+            postprocess_params["pool"] = pool
+        if return_tensors is not None:
+            postprocess_params["return_tensors"] = return_tensors
 
         if "timeout" in kwargs:
             preprocess_params["timeout"] = kwargs["timeout"]
@@ -59,14 +66,25 @@ def _forward(self, model_inputs):
         model_outputs = self.model(**model_inputs)
         return model_outputs
 
-    def postprocess(self, model_outputs, return_tensors=False):
-        # [0] is the first available tensor, logits or last_hidden_state.
+    def postprocess(self, model_outputs, pool=None, return_tensors=False):
+        pool = pool if pool is not None else False
+
+        if pool:
+            if "pooler_output" not in model_outputs:
+                raise ValueError(
+                    "No pooled output was returned. Make sure the model has a `pooler` layer when using the `pool` option."
+                )
+            outputs = model_outputs["pooler_output"]
+        else:
+            # [0] is the first available tensor, logits or last_hidden_state.
+            outputs = model_outputs[0]
+
         if return_tensors:
-            return model_outputs[0]
+            return outputs
         if self.framework == "pt":
-            return model_outputs[0].tolist()
+            return outputs.tolist()
         elif self.framework == "tf":
-            return model_outputs[0].numpy().tolist()
+            return outputs.numpy().tolist()
 
     def __call__(self, *args, **kwargs):
         """
diff --git a/tests/pipelines/test_pipelines_image_feature_extraction.py b/tests/pipelines/test_pipelines_image_feature_extraction.py
index a9c99ad50bc6..1519c7a97803 100644
--- a/tests/pipelines/test_pipelines_image_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_image_feature_extraction.py
@@ -62,10 +62,21 @@ def test_small_model_pt(self):
             nested_simplify(outputs[0][0]),
             [-1.417, -0.392, -1.264, -1.196, 1.648, 0.885, 0.56, -0.606, -1.175, 0.823, 1.912, 0.081, -0.053, 1.119, -0.062, -1.757, -0.571, 0.075, 0.959, 0.118, 1.201, -0.672, -0.498, 0.364, 0.937, -1.623, 0.228, 0.19, 1.697, -1.115, 0.583, -0.981])  # fmt: skip
 
+    @require_torch
+    def test_small_model_w_pooler_pt(self):
+        feature_extractor = pipeline(
+            task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit-w-pooler", framework="pt"
+        )
+        img = prepare_img()
+        outputs = feature_extractor(img, pool=True)
+        self.assertEqual(
+            nested_simplify(outputs[0]),
+            [-0.056,  0.083,  0.021,  0.038,  0.242, -0.279, -0.033, -0.003, 0.200, -0.192,  0.045, -0.095, -0.077,  0.017, -0.058, -0.063, -0.029, -0.204,  0.014,  0.042,  0.305, -0.205, -0.099,  0.146, -0.287,  0.020,  0.168, -0.052,  0.046,  0.048, -0.156,  0.093])  # fmt: skip
+
     @require_tf
     def test_small_model_tf(self):
         feature_extractor = pipeline(
-            task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="tf"
+            task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit-w-pooler", framework="tf"
         )
         img = prepare_img()
         outputs = feature_extractor(img)
@@ -73,6 +84,17 @@ def test_small_model_tf(self):
             nested_simplify(outputs[0][0]),
             [-1.417, -0.392, -1.264, -1.196, 1.648, 0.885, 0.56, -0.606, -1.175, 0.823, 1.912, 0.081, -0.053, 1.119, -0.062, -1.757, -0.571, 0.075, 0.959, 0.118, 1.201, -0.672, -0.498, 0.364, 0.937, -1.623, 0.228, 0.19, 1.697, -1.115, 0.583, -0.981])  # fmt: skip
 
+    @require_tf
+    def test_small_model_w_pooler_tf(self):
+        feature_extractor = pipeline(
+            task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit-w-pooler", framework="tf"
+        )
+        img = prepare_img()
+        outputs = feature_extractor(img, pool=True)
+        self.assertEqual(
+            nested_simplify(outputs[0]),
+            [-0.056,  0.083,  0.021,  0.038,  0.242, -0.279, -0.033, -0.003, 0.200, -0.192,  0.045, -0.095, -0.077,  0.017, -0.058, -0.063, -0.029, -0.204,  0.014,  0.042,  0.305, -0.205, -0.099,  0.146, -0.287,  0.020,  0.168, -0.052,  0.046,  0.048, -0.156,  0.093])  # fmt: skip
+
     @require_torch
     def test_image_processing_small_model_pt(self):
         feature_extractor = pipeline(
@@ -91,6 +113,10 @@ def test_image_processing_small_model_pt(self):
         outputs = feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
         self.assertEqual(np.squeeze(outputs).shape, (226, 32))
 
+        # Test pooling option
+        outputs = feature_extractor(img, pool=True)
+        self.assertEqual(np.squeeze(outputs).shape, (32,))
+
     @require_tf
     def test_image_processing_small_model_tf(self):
         feature_extractor = pipeline(
@@ -109,6 +135,10 @@ def test_image_processing_small_model_tf(self):
         outputs = feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
         self.assertEqual(np.squeeze(outputs).shape, (226, 32))
 
+        # Test pooling option
+        outputs = feature_extractor(img, pool=True)
+        self.assertEqual(np.squeeze(outputs).shape, (32,))
+
     @require_torch
     def test_return_tensors_pt(self):
         feature_extractor = pipeline(