From e33c22196ff7889670897db78d9ee08ce3823653 Mon Sep 17 00:00:00 2001 From: Alex Clarke Date: Thu, 17 Oct 2024 15:40:46 +0100 Subject: [PATCH] Add support for multiple monitors (#31) * Add support for multiple monitors This is activated with two main talon settings, controlling if clamping occurs and the behavior when no eye tracker data * Changed eyetracker fallback to use enum Also ran formatter * Moved functions out of import try to if after * Use literal for setting, move clamp to functions * Add quick suggested changes Co-authored-by: James Stout * Move raise UnImplemented to beginning of function --------- Co-authored-by: James Stout --- .subtrees/gaze-ocr/gaze_ocr/_gaze_ocr.py | 34 +++++++++-- .../screen-ocr/screen_ocr/_screen_ocr.py | 60 ++++++++++++++----- gaze_ocr_talon.py | 39 ++++++++---- 3 files changed, 100 insertions(+), 33 deletions(-) diff --git a/.subtrees/gaze-ocr/gaze_ocr/_gaze_ocr.py b/.subtrees/gaze-ocr/gaze_ocr/_gaze_ocr.py index 1980389..b6ee817 100644 --- a/.subtrees/gaze-ocr/gaze_ocr/_gaze_ocr.py +++ b/.subtrees/gaze-ocr/gaze_ocr/_gaze_ocr.py @@ -69,11 +69,21 @@ def move_text_cursor(self): self.keyboard.right(1) +class EyeTrackerFallback(Enum): + MAIN_SCREEN = auto() + ACTIVE_WINDOW = auto() + + class OcrCache: - def __init__(self, ocr_reader: Reader): + def __init__( + self, + ocr_reader: Reader, + fallback_when_no_eye_tracker: EyeTrackerFallback = EyeTrackerFallback.MAIN_SCREEN, + ): self.ocr_reader = ocr_reader self._last_time_range = None self._last_screen_contents = None + self.fallback_when_no_eye_tracker = fallback_when_no_eye_tracker def read( self, @@ -96,7 +106,13 @@ def read( if bounding_box: self._last_screen_contents = self.ocr_reader.read_screen(bounding_box) else: - self._last_screen_contents = self.ocr_reader.read_screen() + if ( + self.fallback_when_no_eye_tracker + == EyeTrackerFallback.ACTIVE_WINDOW + ): + self._last_screen_contents = self.ocr_reader.read_current_window() + else: + self._last_screen_contents = self.ocr_reader.read_screen() return self._last_screen_contents @@ -122,6 +138,7 @@ def __init__( app_actions=None, save_data_directory: Optional[str] = None, gaze_box_padding: int = 100, + fallback_when_no_eye_tracker: EyeTrackerFallback = EyeTrackerFallback.MAIN_SCREEN, ): self.ocr_reader = ocr_reader self.eye_tracker = eye_tracker @@ -133,7 +150,10 @@ def __init__( self._change_radius = 10 self._executor = futures.ThreadPoolExecutor(max_workers=1) self._future = None - self._ocr_cache = OcrCache(ocr_reader) + self._ocr_cache = OcrCache( + ocr_reader, fallback_when_no_eye_tracker=fallback_when_no_eye_tracker + ) + self.fallback_when_no_eye_tracker = fallback_when_no_eye_tracker def shutdown(self, wait=True): self._executor.shutdown(wait) @@ -204,7 +224,13 @@ def read_nearby( if gaze_point: self._future.set_result(self.ocr_reader.read_nearby(gaze_point)) else: - self._future.set_result(self.ocr_reader.read_screen()) + if ( + self.fallback_when_no_eye_tracker + == EyeTrackerFallback.ACTIVE_WINDOW + ): + self._future.set_result(self.ocr_reader.read_current_window()) + else: + self._future.set_result(self.ocr_reader.read_screen()) def latest_screen_contents(self) -> ScreenContents: """Return the ScreenContents of the latest call to start_reading_nearby(). diff --git a/.subtrees/screen-ocr/screen_ocr/_screen_ocr.py b/.subtrees/screen-ocr/screen_ocr/_screen_ocr.py index e79dcf2..745e521 100644 --- a/.subtrees/screen-ocr/screen_ocr/_screen_ocr.py +++ b/.subtrees/screen-ocr/screen_ocr/_screen_ocr.py @@ -45,20 +45,39 @@ except (ImportError, SyntaxError): _winrt = None + # Optional packages needed for certain backends. try: from PIL import Image, ImageGrab, ImageOps except ImportError: Image = ImageGrab = ImageOps = None try: - from talon import actions, screen - from talon.types import rect + from talon import actions, screen, ui + from talon.types.rect import Rect except ImportError: - screen = rect = actions = None + ui = screen = Rect = actions = None # Represented as [left, top, right, bottom] pixel coordinates BoundingBox = Tuple[int, int, int, int] +if Rect: + + def to_rect(bounding_box: BoundingBox) -> Rect: + return Rect( + x=bounding_box[0], + y=bounding_box[1], + width=bounding_box[2] - bounding_box[0], + height=bounding_box[3] - bounding_box[1], + ) + + def to_bounding_box(rect_talon: Rect) -> BoundingBox: + return ( + rect_talon.x, + rect_talon.y, + rect_talon.x + rect_talon.width, + rect_talon.y + rect_talon.height, + ) + class Reader: """Reads on-screen text using OCR.""" @@ -227,6 +246,20 @@ def read_screen(self, bounding_box: Optional[BoundingBox] = None): search_radius=None, ) + def read_current_window(self): + if not self._is_talon_backend(): + raise NotImplementedError + assert ui + win = ui.active_window() + bounding_box = to_bounding_box(win.rect) + screenshot, bounding_box = self._clean_screenshot( + bounding_box, clamp_to_main_screen=False + ) + return self.read_image( + screenshot, + bounding_box=bounding_box, + ) + def read_image( self, image, @@ -255,17 +288,17 @@ def _is_talon_backend(self): return _talon and isinstance(self._backend, _talon.TalonBackend) def _clean_screenshot( - self, bounding_box: Optional[BoundingBox] + self, bounding_box: Optional[BoundingBox], clamp_to_main_screen: bool = True ) -> Tuple[Any, BoundingBox]: if not actions: - return self._screenshot(bounding_box) + return self._screenshot(bounding_box, clamp_to_main_screen) # Attempt to turn off HUD if talon_hud is installed. try: actions.user.hud_set_visibility(False, pause_seconds=0.02) except: pass try: - return self._screenshot(bounding_box) + return self._screenshot(bounding_box, clamp_to_main_screen) finally: # Attempt to turn on HUD if talon_hud is installed. try: @@ -274,28 +307,23 @@ def _clean_screenshot( pass def _screenshot( - self, bounding_box: Optional[BoundingBox] + self, bounding_box: Optional[BoundingBox], clamp_to_main_screen: bool = True ) -> Tuple[Any, BoundingBox]: if self._is_talon_backend(): assert screen - assert rect + assert to_rect screen_box = screen.main().rect - if bounding_box: + if bounding_box and clamp_to_main_screen: bounding_box = ( max(0, bounding_box[0]), max(0, bounding_box[1]), min(screen_box.width, bounding_box[2]), min(screen_box.height, bounding_box[3]), ) - else: + if not bounding_box: bounding_box = (0, 0, screen_box.width, screen_box.height) screenshot = screen.capture_rect( - rect.Rect( - bounding_box[0], - bounding_box[1], - bounding_box[2] - bounding_box[0], - bounding_box[3] - bounding_box[1], - ), + to_rect(bounding_box), retina=False, ) else: diff --git a/gaze_ocr_talon.py b/gaze_ocr_talon.py index 9bf71f9..d8e7dee 100644 --- a/gaze_ocr_talon.py +++ b/gaze_ocr_talon.py @@ -3,7 +3,7 @@ import sys from math import floor from pathlib import Path -from typing import Dict, Iterable, Optional, Sequence +from typing import Dict, Iterable, Literal, Optional, Sequence import numpy as np from talon import Context, Module, actions, app, cron, fs, screen, settings @@ -107,6 +107,12 @@ default="FFFFFF", desc="Debug color to use on a dark background", ) +mod.setting( + "ocr_behavior_when_no_eye_tracker", + type=Literal["MAIN_SCREEN", "ACTIVE_WINDOW"], + default="MAIN_SCREEN", + desc="Region to OCR when no data from the eye tracker", +) mod.mode("gaze_ocr_disambiguation") mod.list("ocr_actions", desc="Actions to perform on selected text.") @@ -256,7 +262,8 @@ def reload_backend(name, flags): if setting_ocr_use_talon_backend and not ocr: logging.info("Talon OCR not available, will rely on external support.") ocr_reader = screen_ocr.Reader.create_fast_reader( - radius=settings.get("user.ocr_gaze_point_padding"), homophones=homophones + radius=settings.get("user.ocr_gaze_point_padding"), + homophones=homophones, ) gaze_ocr_controller = gaze_ocr.Controller( ocr_reader, @@ -266,6 +273,9 @@ def reload_backend(name, flags): app_actions=gaze_ocr.talon.AppActions(), save_data_directory=settings.get("user.ocr_logging_dir"), gaze_box_padding=settings.get("user.ocr_gaze_box_padding"), + fallback_when_no_eye_tracker=gaze_ocr.EyeTrackerFallback[ + settings.get("user.ocr_behavior_when_no_eye_tracker").upper() + ], ) @@ -318,9 +328,10 @@ def reset_disambiguation(): def show_disambiguation(): global ambiguous_matches, disambiguation_canvas + contents = gaze_ocr_controller.latest_screen_contents() + def on_draw(c): assert ambiguous_matches - contents = gaze_ocr_controller.latest_screen_contents() debug_color = get_debug_color(has_light_background(contents.screenshot)) nearest = gaze_ocr_controller.find_nearest_cursor_location(ambiguous_matches) used_locations = set() @@ -353,7 +364,7 @@ def on_draw(c): actions.mode.enable("user.gaze_ocr_disambiguation") if disambiguation_canvas: disambiguation_canvas.close() - disambiguation_canvas = Canvas.from_screen(screen.main()) + disambiguation_canvas = Canvas.from_rect(screen_ocr.to_rect(contents.bounding_box)) disambiguation_canvas.register("draw", on_draw) disambiguation_canvas.freeze() @@ -740,19 +751,14 @@ def show_ocr_overlay_for_query(type: str, query: str = ""): debug_canvas.close() contents = gaze_ocr_controller.latest_screen_contents() + contents_rect = screen_ocr.to_rect(contents.bounding_box) + def on_draw(c): debug_color = get_debug_color(has_light_background(contents.screenshot)) # Show bounding box. c.paint.style = c.paint.Style.STROKE c.paint.color = debug_color - c.draw_rect( - rect.Rect( - x=contents.bounding_box[0], - y=contents.bounding_box[1], - width=contents.bounding_box[2] - contents.bounding_box[0], - height=contents.bounding_box[3] - contents.bounding_box[1], - ) - ) + c.draw_rect(contents_rect) if contents.screen_coordinates: c.paint.style = c.paint.Style.STROKE c.paint.color = debug_color @@ -795,7 +801,14 @@ def on_draw(c): f"{settings.get('user.ocr_debug_display_seconds')}s", debug_canvas.close ) - debug_canvas = Canvas.from_screen(screen.main()) + # Increased size slightly for canvas to ensure everything will be inside canvas + canvas_rect = contents_rect.copy() + center = canvas_rect.center + canvas_rect.height += 100 + canvas_rect.width += 100 + canvas_rect.center = center + + debug_canvas = Canvas.from_rect(canvas_rect) debug_canvas.register("draw", on_draw) debug_canvas.freeze()