Skip to content

Commit

Permalink
Add support for multiple monitors (#31)
Browse files Browse the repository at this point in the history
* Add support for multiple monitors
This is activated with two main talon settings,
controlling if clamping occurs and the behavior when no eye tracker data

* Changed eyetracker fallback to use enum
Also ran formatter

* Moved functions out of import try to if after

* Use literal for setting, move clamp to functions

* Add quick suggested changes

Co-authored-by: James Stout <james.wolf.stout@gmail.com>

* Move raise UnImplemented to beginning of function

---------

Co-authored-by: James Stout <james.wolf.stout@gmail.com>
  • Loading branch information
alexander-clarke and wolfmanstout authored Oct 17, 2024
1 parent 627c31d commit e33c221
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 33 deletions.
34 changes: 30 additions & 4 deletions .subtrees/gaze-ocr/gaze_ocr/_gaze_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,21 @@ def move_text_cursor(self):
self.keyboard.right(1)


class EyeTrackerFallback(Enum):
MAIN_SCREEN = auto()
ACTIVE_WINDOW = auto()


class OcrCache:
def __init__(self, ocr_reader: Reader):
def __init__(
self,
ocr_reader: Reader,
fallback_when_no_eye_tracker: EyeTrackerFallback = EyeTrackerFallback.MAIN_SCREEN,
):
self.ocr_reader = ocr_reader
self._last_time_range = None
self._last_screen_contents = None
self.fallback_when_no_eye_tracker = fallback_when_no_eye_tracker

def read(
self,
Expand All @@ -96,7 +106,13 @@ def read(
if bounding_box:
self._last_screen_contents = self.ocr_reader.read_screen(bounding_box)
else:
self._last_screen_contents = self.ocr_reader.read_screen()
if (
self.fallback_when_no_eye_tracker
== EyeTrackerFallback.ACTIVE_WINDOW
):
self._last_screen_contents = self.ocr_reader.read_current_window()
else:
self._last_screen_contents = self.ocr_reader.read_screen()
return self._last_screen_contents


Expand All @@ -122,6 +138,7 @@ def __init__(
app_actions=None,
save_data_directory: Optional[str] = None,
gaze_box_padding: int = 100,
fallback_when_no_eye_tracker: EyeTrackerFallback = EyeTrackerFallback.MAIN_SCREEN,
):
self.ocr_reader = ocr_reader
self.eye_tracker = eye_tracker
Expand All @@ -133,7 +150,10 @@ def __init__(
self._change_radius = 10
self._executor = futures.ThreadPoolExecutor(max_workers=1)
self._future = None
self._ocr_cache = OcrCache(ocr_reader)
self._ocr_cache = OcrCache(
ocr_reader, fallback_when_no_eye_tracker=fallback_when_no_eye_tracker
)
self.fallback_when_no_eye_tracker = fallback_when_no_eye_tracker

def shutdown(self, wait=True):
self._executor.shutdown(wait)
Expand Down Expand Up @@ -204,7 +224,13 @@ def read_nearby(
if gaze_point:
self._future.set_result(self.ocr_reader.read_nearby(gaze_point))
else:
self._future.set_result(self.ocr_reader.read_screen())
if (
self.fallback_when_no_eye_tracker
== EyeTrackerFallback.ACTIVE_WINDOW
):
self._future.set_result(self.ocr_reader.read_current_window())
else:
self._future.set_result(self.ocr_reader.read_screen())

def latest_screen_contents(self) -> ScreenContents:
"""Return the ScreenContents of the latest call to start_reading_nearby().
Expand Down
60 changes: 44 additions & 16 deletions .subtrees/screen-ocr/screen_ocr/_screen_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,39 @@
except (ImportError, SyntaxError):
_winrt = None


# Optional packages needed for certain backends.
try:
from PIL import Image, ImageGrab, ImageOps
except ImportError:
Image = ImageGrab = ImageOps = None
try:
from talon import actions, screen
from talon.types import rect
from talon import actions, screen, ui
from talon.types.rect import Rect
except ImportError:
screen = rect = actions = None
ui = screen = Rect = actions = None

# Represented as [left, top, right, bottom] pixel coordinates
BoundingBox = Tuple[int, int, int, int]

if Rect:

def to_rect(bounding_box: BoundingBox) -> Rect:
return Rect(
x=bounding_box[0],
y=bounding_box[1],
width=bounding_box[2] - bounding_box[0],
height=bounding_box[3] - bounding_box[1],
)

def to_bounding_box(rect_talon: Rect) -> BoundingBox:
return (
rect_talon.x,
rect_talon.y,
rect_talon.x + rect_talon.width,
rect_talon.y + rect_talon.height,
)


class Reader:
"""Reads on-screen text using OCR."""
Expand Down Expand Up @@ -227,6 +246,20 @@ def read_screen(self, bounding_box: Optional[BoundingBox] = None):
search_radius=None,
)

def read_current_window(self):
if not self._is_talon_backend():
raise NotImplementedError
assert ui
win = ui.active_window()
bounding_box = to_bounding_box(win.rect)
screenshot, bounding_box = self._clean_screenshot(
bounding_box, clamp_to_main_screen=False
)
return self.read_image(
screenshot,
bounding_box=bounding_box,
)

def read_image(
self,
image,
Expand Down Expand Up @@ -255,17 +288,17 @@ def _is_talon_backend(self):
return _talon and isinstance(self._backend, _talon.TalonBackend)

def _clean_screenshot(
self, bounding_box: Optional[BoundingBox]
self, bounding_box: Optional[BoundingBox], clamp_to_main_screen: bool = True
) -> Tuple[Any, BoundingBox]:
if not actions:
return self._screenshot(bounding_box)
return self._screenshot(bounding_box, clamp_to_main_screen)
# Attempt to turn off HUD if talon_hud is installed.
try:
actions.user.hud_set_visibility(False, pause_seconds=0.02)
except:
pass
try:
return self._screenshot(bounding_box)
return self._screenshot(bounding_box, clamp_to_main_screen)
finally:
# Attempt to turn on HUD if talon_hud is installed.
try:
Expand All @@ -274,28 +307,23 @@ def _clean_screenshot(
pass

def _screenshot(
self, bounding_box: Optional[BoundingBox]
self, bounding_box: Optional[BoundingBox], clamp_to_main_screen: bool = True
) -> Tuple[Any, BoundingBox]:
if self._is_talon_backend():
assert screen
assert rect
assert to_rect
screen_box = screen.main().rect
if bounding_box:
if bounding_box and clamp_to_main_screen:
bounding_box = (
max(0, bounding_box[0]),
max(0, bounding_box[1]),
min(screen_box.width, bounding_box[2]),
min(screen_box.height, bounding_box[3]),
)
else:
if not bounding_box:
bounding_box = (0, 0, screen_box.width, screen_box.height)
screenshot = screen.capture_rect(
rect.Rect(
bounding_box[0],
bounding_box[1],
bounding_box[2] - bounding_box[0],
bounding_box[3] - bounding_box[1],
),
to_rect(bounding_box),
retina=False,
)
else:
Expand Down
39 changes: 26 additions & 13 deletions gaze_ocr_talon.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
from math import floor
from pathlib import Path
from typing import Dict, Iterable, Optional, Sequence
from typing import Dict, Iterable, Literal, Optional, Sequence

import numpy as np
from talon import Context, Module, actions, app, cron, fs, screen, settings
Expand Down Expand Up @@ -107,6 +107,12 @@
default="FFFFFF",
desc="Debug color to use on a dark background",
)
mod.setting(
"ocr_behavior_when_no_eye_tracker",
type=Literal["MAIN_SCREEN", "ACTIVE_WINDOW"],
default="MAIN_SCREEN",
desc="Region to OCR when no data from the eye tracker",
)

mod.mode("gaze_ocr_disambiguation")
mod.list("ocr_actions", desc="Actions to perform on selected text.")
Expand Down Expand Up @@ -256,7 +262,8 @@ def reload_backend(name, flags):
if setting_ocr_use_talon_backend and not ocr:
logging.info("Talon OCR not available, will rely on external support.")
ocr_reader = screen_ocr.Reader.create_fast_reader(
radius=settings.get("user.ocr_gaze_point_padding"), homophones=homophones
radius=settings.get("user.ocr_gaze_point_padding"),
homophones=homophones,
)
gaze_ocr_controller = gaze_ocr.Controller(
ocr_reader,
Expand All @@ -266,6 +273,9 @@ def reload_backend(name, flags):
app_actions=gaze_ocr.talon.AppActions(),
save_data_directory=settings.get("user.ocr_logging_dir"),
gaze_box_padding=settings.get("user.ocr_gaze_box_padding"),
fallback_when_no_eye_tracker=gaze_ocr.EyeTrackerFallback[
settings.get("user.ocr_behavior_when_no_eye_tracker").upper()
],
)


Expand Down Expand Up @@ -318,9 +328,10 @@ def reset_disambiguation():
def show_disambiguation():
global ambiguous_matches, disambiguation_canvas

contents = gaze_ocr_controller.latest_screen_contents()

def on_draw(c):
assert ambiguous_matches
contents = gaze_ocr_controller.latest_screen_contents()
debug_color = get_debug_color(has_light_background(contents.screenshot))
nearest = gaze_ocr_controller.find_nearest_cursor_location(ambiguous_matches)
used_locations = set()
Expand Down Expand Up @@ -353,7 +364,7 @@ def on_draw(c):
actions.mode.enable("user.gaze_ocr_disambiguation")
if disambiguation_canvas:
disambiguation_canvas.close()
disambiguation_canvas = Canvas.from_screen(screen.main())
disambiguation_canvas = Canvas.from_rect(screen_ocr.to_rect(contents.bounding_box))
disambiguation_canvas.register("draw", on_draw)
disambiguation_canvas.freeze()

Expand Down Expand Up @@ -740,19 +751,14 @@ def show_ocr_overlay_for_query(type: str, query: str = ""):
debug_canvas.close()
contents = gaze_ocr_controller.latest_screen_contents()

contents_rect = screen_ocr.to_rect(contents.bounding_box)

def on_draw(c):
debug_color = get_debug_color(has_light_background(contents.screenshot))
# Show bounding box.
c.paint.style = c.paint.Style.STROKE
c.paint.color = debug_color
c.draw_rect(
rect.Rect(
x=contents.bounding_box[0],
y=contents.bounding_box[1],
width=contents.bounding_box[2] - contents.bounding_box[0],
height=contents.bounding_box[3] - contents.bounding_box[1],
)
)
c.draw_rect(contents_rect)
if contents.screen_coordinates:
c.paint.style = c.paint.Style.STROKE
c.paint.color = debug_color
Expand Down Expand Up @@ -795,7 +801,14 @@ def on_draw(c):
f"{settings.get('user.ocr_debug_display_seconds')}s", debug_canvas.close
)

debug_canvas = Canvas.from_screen(screen.main())
# Increased size slightly for canvas to ensure everything will be inside canvas
canvas_rect = contents_rect.copy()
center = canvas_rect.center
canvas_rect.height += 100
canvas_rect.width += 100
canvas_rect.center = center

debug_canvas = Canvas.from_rect(canvas_rect)
debug_canvas.register("draw", on_draw)
debug_canvas.freeze()

Expand Down

0 comments on commit e33c221

Please sign in to comment.