Merge pull request #805 from roboflow/feature/pass-measured-fps-when-…

…handling-live-stream Use measured fps when fetching frames from live stream
roboflow · Nov 14, 2024 · 4b6c7b6 · 4b6c7b6
2 parents 1f235f3 + 580f0a2
commit 4b6c7b6
Show file tree

Hide file tree

Showing 8 changed files with 40 additions and 7 deletions.
diff --git a/docs/workflows/internal_data_types.md b/docs/workflows/internal_data_types.md
@@ -284,6 +284,9 @@ def inspect_vide_metadata(video_metadata: VideoMetadata) -> None:
     # Field represents FPS value (if possible to be retrieved) (optional)
     print(video_metadata.fps)
 
+    # Field represents measured FPS of live stream (optional)
+    print(video_metadata.measured_fps)
+
     # Field is a flag telling if frame comes from video file or stream.
     # If not possible to be determined - None
     print(video_metadata.comes_from_video_file)

diff --git a/docs/workflows/video_processing/overview.md b/docs/workflows/video_processing/overview.md
@@ -5,7 +5,7 @@ video-specific blocks (e.g., the ByteTracker block) and continue to dedicate eff
 their performance and robustness. The current state of this work is as follows:
 
 * We've introduced the `WorkflowVideoMetadata` input to store metadata related to video frames, 
-including FPS, timestamp, video source identifier, and file/stream flags. While this may not be the final approach 
+including declared FPS, measured FPS, timestamp, video source identifier, and file/stream flags. While this may not be the final approach 
 for handling video metadata, it allows us to build stateful video-processing blocks at this stage. 
 If your Workflow includes any blocks requiring input of kind `video_metadata`, you must define this input in 
 your Workflow. The metadata functions as a batch-oriented parameter, treated by the Execution Engine in the same

diff --git a/inference/core/interfaces/camera/entities.py b/inference/core/interfaces/camera/entities.py
@@ -55,14 +55,17 @@ class VideoFrame:
         frame_timestamp (FrameTimestamp): The timestamp when the frame was captured.
         source_id (int): The index of the video_reference element which was passed to InferencePipeline for this frame
             (useful when multiple streams are passed to InferencePipeline).
-        fps (Optional[float]): FPS of source (if possible to be acquired)
+        fps (Optional[float]): declared FPS of source (if possible to be acquired)
+        measured_fps (Optional[float]): measured FPS of live stream
         comes_from_video_file (Optional[bool]): flag to determine if frame comes from video file
     """
 
     image: np.ndarray
     frame_id: FrameID
     frame_timestamp: FrameTimestamp
+    # TODO: in next major version of inference replace `fps` with `declared_fps`
     fps: Optional[float] = None
+    measured_fps: Optional[float] = None
     source_id: Optional[int] = None
     comes_from_video_file: Optional[bool] = None
 

diff --git a/inference/core/interfaces/camera/video_source.py b/inference/core/interfaces/camera/video_source.py
@@ -862,11 +862,19 @@ def consume_frame(
             },
             status_update_handlers=self._status_update_handlers,
         )
+        measured_source_fps = declared_source_fps
+        if not is_source_video_file:
+            if hasattr(self._stream_consumption_pace_monitor, "fps"):
+                measured_source_fps = self._stream_consumption_pace_monitor.fps
+            else:
+                measured_source_fps = self._stream_consumption_pace_monitor()
+
         if self._video_fps_should_be_sub_sampled():
             return True
         return self._consume_stream_frame(
             video=video,
             declared_source_fps=declared_source_fps,
+            measured_source_fps=measured_source_fps,
             is_source_video_file=is_source_video_file,
             frame_timestamp=frame_timestamp,
             buffer=buffer,
@@ -912,6 +920,7 @@ def _consume_stream_frame(
         self,
         video: VideoFrameProducer,
         declared_source_fps: Optional[float],
+        measured_source_fps: Optional[float],
         is_source_video_file: Optional[bool],
         frame_timestamp: datetime,
         buffer: Queue,
@@ -954,7 +963,8 @@ def _consume_stream_frame(
                 buffer=buffer,
                 decoding_pace_monitor=self._decoding_pace_monitor,
                 source_id=source_id,
-                fps=declared_source_fps,
+                declared_source_fps=declared_source_fps,
+                measured_source_fps=measured_source_fps,
                 comes_from_video_file=is_source_video_file,
             )
         if self._buffer_filling_strategy in DROP_OLDEST_STRATEGIES:
@@ -1153,7 +1163,8 @@ def decode_video_frame_to_buffer(
     buffer: Queue,
     decoding_pace_monitor: sv.FPSMonitor,
     source_id: Optional[int],
-    fps: Optional[float] = None,
+    declared_source_fps: Optional[float] = None,
+    measured_source_fps: Optional[float] = None,
     comes_from_video_file: Optional[bool] = None,
 ) -> bool:
     success, image = video.retrieve()
@@ -1164,7 +1175,8 @@ def decode_video_frame_to_buffer(
         image=image,
         frame_id=frame_id,
         frame_timestamp=frame_timestamp,
-        fps=fps,
+        fps=declared_source_fps,
+        measured_fps=measured_source_fps,
         source_id=source_id,
         comes_from_video_file=comes_from_video_file,
     )

diff --git a/inference/core/interfaces/stream/model_handlers/roboflow_models.py b/inference/core/interfaces/stream/model_handlers/roboflow_models.py
@@ -1,4 +1,4 @@
-from typing import Any, List
+from typing import List
 
 from inference.core.interfaces.camera.entities import VideoFrame
 from inference.core.interfaces.stream.entities import ModelConfig
@@ -12,10 +12,16 @@ def default_process_frame(
     inference_config: ModelConfig,
 ) -> List[dict]:
     postprocessing_args = inference_config.to_postprocessing_params()
+    # TODO: handle batch input in usage
+    fps = video_frame[0].fps
+    if video_frame[0].measured_fps:
+        fps = video_frame[0].measured_fps
+    if not fps:
+        fps = 0
     predictions = wrap_in_list(
         model.infer(
             [f.image for f in video_frame],
-            usage_fps=video_frame[0].fps,
+            usage_fps=fps,
             usage_api_key=model.api_key,
             **postprocessing_args,
         )

diff --git a/inference/core/interfaces/stream/model_handlers/workflows.py b/inference/core/interfaces/stream/model_handlers/workflows.py
@@ -19,6 +19,8 @@ def run_workflow(
             workflows_parameters = {}
         # TODO: pass fps reflecting each stream to workflows_parameters
         fps = video_frames[0].fps
+        if video_frames[0].measured_fps:
+            fps = video_frames[0].measured_fps
         if fps is None:
             # for FPS reporting we expect 0 when FPS cannot be determined
             fps = 0
@@ -32,6 +34,7 @@ def run_workflow(
                 frame_number=video_frame.frame_id,
                 frame_timestamp=video_frame.frame_timestamp,
                 fps=video_frame.fps,
+                measured_fps=video_frame.measured_fps,
                 comes_from_video_file=video_frame.comes_from_video_file,
             )
             for video_frame in video_frames

diff --git a/inference/core/workflows/execution_engine/entities/base.py b/inference/core/workflows/execution_engine/entities/base.py
@@ -200,6 +200,10 @@ class VideoMetadata(BaseModel):
         description="Field represents FPS value (if possible to be retrieved)",
         default=None,
     )
+    measured_fps: Optional[float] = Field(
+        description="Field represents measured FPS of live stream",
+        default=None,
+    )
     comes_from_video_file: Optional[bool] = Field(
         description="Field is a flag telling if frame comes from video file or stream - "
         "if not possible to be determined - pass None",

diff --git a/inference/core/workflows/execution_engine/entities/types.py b/inference/core/workflows/execution_engine/entities/types.py
@@ -78,6 +78,7 @@ def __hash__(self) -> int:
         "video_identifier": "rtsp://some.com/stream1",
         "comes_from_video_file": False,
         "fps": 23.99,
+        "measured_fps": 20.05,
         "frame_number": 24,
         "frame_timestamp": "2024-08-21T11:13:44.313999", 
     }  
@@ -116,6 +117,7 @@ def __hash__(self) -> int:
     "video_identifier": "rtsp://some.com/stream1",
     "comes_from_video_file": False,
     "fps": 23.99,
+    "measured_fps": 20.05,
     "frame_number": 24,
     "frame_timestamp": "2024-08-21T11:13:44.313999", 
 }