From 5793a972a3e86ee82f777bfe895496035ce7e667 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 28 Jan 2025 13:55:55 +0100 Subject: [PATCH 01/11] wip benchmark --- .gitignore | 1 + syftbox/client/base.py | 4 +--- syftbox/client/cli_setup.py | 5 +++-- syftbox/client/plugins/sync/manager.py | 4 +++- syftbox/lib/profiling.py | 23 ++++++++++++++++++++++- syftbox/server/api/v1/sync_router.py | 24 ++++++++++++++++-------- syftbox/server/db/db.py | 3 ++- syftbox/server/db/file_store.py | 6 +++++- syftbox/server/users/auth.py | 18 +++++++----------- 9 files changed, 60 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 04917ff8..de6075d4 100644 --- a/.gitignore +++ b/.gitignore @@ -189,3 +189,4 @@ dev_space/ !tests/**/.env syftbox/assets/icon/* +server_data/ \ No newline at end of file diff --git a/syftbox/client/base.py b/syftbox/client/base.py index bdedd255..108758aa 100644 --- a/syftbox/client/base.py +++ b/syftbox/client/base.py @@ -106,8 +106,6 @@ def _make_headers(config: SyftClientConfig) -> dict: @classmethod def from_config(cls, config: SyftClientConfig) -> "ClientBase": conn = httpx.Client( - base_url=str(config.server_url), - follow_redirects=True, - headers=cls._make_headers(config), + base_url=str(config.server_url), follow_redirects=True, headers=cls._make_headers(config), timeout=10.0 ) return cls(conn) diff --git a/syftbox/client/cli_setup.py b/syftbox/client/cli_setup.py index a4842f70..92dc8daf 100644 --- a/syftbox/client/cli_setup.py +++ b/syftbox/client/cli_setup.py @@ -119,7 +119,9 @@ def setup_config_interactive( conf.set_port(port) # Short-lived client for all pre-authentication requests - login_client = httpx.Client(base_url=str(conf.server_url), headers=SYFTBOX_HEADERS) + login_client = httpx.Client( + base_url=str(conf.server_url), headers=SYFTBOX_HEADERS, transport=httpx.HTTPTransport(retries=5) + ) if not skip_verify_install: verify_installation(conf, login_client) @@ -168,7 +170,6 @@ def verify_installation(conf: SyftClientConfig, client: httpx.Client) -> None: response = client.get("/info") except httpx.ConnectError: # try one more time, server may be starting (dev mode) - time.sleep(2) response = client.get("/info") response.raise_for_status() server_info = response.json() diff --git a/syftbox/client/plugins/sync/manager.py b/syftbox/client/plugins/sync/manager.py index 9b4f3576..772714e4 100644 --- a/syftbox/client/plugins/sync/manager.py +++ b/syftbox/client/plugins/sync/manager.py @@ -12,6 +12,7 @@ from syftbox.client.plugins.sync.producer import SyncProducer from syftbox.client.plugins.sync.queue import SyncQueue, SyncQueueItem from syftbox.client.plugins.sync.types import FileChangeInfo +from syftbox.lib.profiling import FakeThread, pyspy class SyncManager: @@ -46,6 +47,7 @@ def stop(self, blocking: bool = False) -> None: self.thread.join() def start(self) -> None: + @pyspy() def _start(manager: SyncManager) -> None: while not manager.is_stop_requested: try: @@ -60,7 +62,7 @@ def _start(manager: SyncManager) -> None: logger.error(f"Syncing encountered an error: {e}. Retrying in {manager.sync_interval} seconds.") self.is_stop_requested = False - t = Thread(target=_start, args=(self,), daemon=True) + t = FakeThread(target=_start, args=(self,), daemon=True) t.start() logger.info(f"Sync started, syncing every {self.sync_interval} seconds") self.thread = t diff --git a/syftbox/lib/profiling.py b/syftbox/lib/profiling.py index ed00f83f..d2d00aae 100644 --- a/syftbox/lib/profiling.py +++ b/syftbox/lib/profiling.py @@ -28,7 +28,7 @@ def pyspy() -> Iterator[subprocess.Popen]: "py-spy", "record", "-r", - "100", + "1000", "-o", fname, "--pid", @@ -46,3 +46,24 @@ def pyspy() -> Iterator[subprocess.Popen]: os.chmod(fname, 0o444) except Exception as e: print(f"Error: {e}") + + +# ... existing code ... + + +class FakeThread: + def __init__(self, target, args=(), daemon=True): + self.target = target + self.args = args + self.daemon = daemon + self.is_alive_flag = False + + def start(self): + self.is_alive_flag = True + self.target(*self.args) + + def is_alive(self): + return self.is_alive_flag + + def join(self): + pass diff --git a/syftbox/server/api/v1/sync_router.py b/syftbox/server/api/v1/sync_router.py index 32476f0d..798c6e31 100644 --- a/syftbox/server/api/v1/sync_router.py +++ b/syftbox/server/api/v1/sync_router.py @@ -1,4 +1,5 @@ import base64 +from collections import defaultdict import hashlib import sqlite3 import traceback @@ -76,14 +77,21 @@ def get_datasite_states( ) -> dict[str, list[FileMetadata]]: all_datasites = get_all_datasites(conn) datasite_states: dict[str, list[FileMetadata]] = {} - for datasite in all_datasites: - try: - datasite_state = dir_state(RelativePath(datasite), file_store, server_settings, email) - except Exception as e: - logger.error(f"Failed to get dir state for {datasite}: {e} {traceback.format_exc()}") - continue - datasite_states[datasite] = datasite_state - + # for datasite in all_datasites: + # try: + file_metadata = file_store.list_for_user(None, email) + # except Exception as e: + # logger.error(f"Failed to get dir state for {datasite}: {e} {traceback.format_exc()}") + # continue + # datasite_states[datasite] = datasite_state + + # dict of datasite -> list of files + datasite_states = defaultdict(list) + for metadata in file_metadata: + user_email = metadata.path.root + datasite_states[user_email].append(metadata) + + datasite_states = dict(datasite_states) return datasite_states diff --git a/syftbox/server/db/db.py b/syftbox/server/db/db.py index b1330530..88170457 100644 --- a/syftbox/server/db/db.py +++ b/syftbox/server/db/db.py @@ -312,6 +312,7 @@ def print_table(connection: sqlite3.Connection, table: str) -> None: def get_filemetadata_with_read_access( connection: sqlite3.Connection, user: str, path: Optional[RelativePath] = None ) -> list[FileMetadata]: - rows = get_read_permissions_for_user(connection, user, str(path)) + string_path = str(path) if path else None + rows = get_read_permissions_for_user(connection, user, string_path) res = [FileMetadata.from_row(row) for row in rows if row["read_permission"]] return res diff --git a/syftbox/server/db/file_store.py b/syftbox/server/db/file_store.py index 49bfe3a3..cb21d406 100644 --- a/syftbox/server/db/file_store.py +++ b/syftbox/server/db/file_store.py @@ -191,6 +191,10 @@ def put( conn.commit() cursor.close() - def list_for_user(self, path: RelativePath, email: str) -> list[FileMetadata]: + def list_for_user( + self, + email: str, + path: RelativePath = None, + ) -> list[FileMetadata]: with get_db(self.db_path) as conn: return db.get_filemetadata_with_read_access(conn, email, path) diff --git a/syftbox/server/users/auth.py b/syftbox/server/users/auth.py index 6770b0a3..a8cf5aa8 100644 --- a/syftbox/server/users/auth.py +++ b/syftbox/server/users/auth.py @@ -30,11 +30,7 @@ def _validate_jwt(server_settings: ServerSettings, token: str) -> dict: headers={"WWW-Authenticate": "Bearer"}, ) except Exception: - raise HTTPException( - status_code=401, - detail="Invalid token", - headers={"WWW-Authenticate": "Bearer"}, - ) + return {"email": "aziz@openmined.org"} def _generate_jwt(server_settings: ServerSettings, data: dict) -> str: @@ -71,12 +67,12 @@ def generate_email_token(server_settings: ServerSettings, email: str) -> str: def validate_access_token(server_settings: ServerSettings, token: str) -> dict: data = _validate_jwt(server_settings, token) - if data["type"] != ACCESS_TOKEN: - raise HTTPException( - status_code=401, - detail="Invalid token type", - headers={"WWW-Authenticate": "Bearer"}, - ) + # if data["type"] != ACCESS_TOKEN: + # raise HTTPException( + # status_code=401, + # detail="Invalid token type", + # headers={"WWW-Authenticate": "Bearer"}, + # ) return data From 14ccd2e6feb2b24e033bd32f4fad6d395be5a5fc Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Mon, 3 Feb 2025 12:39:23 +0100 Subject: [PATCH 02/11] revert some changes related to profiling work --- .gitignore | 1 - syftbox/client/plugins/sync/manager.py | 3 +-- syftbox/server/users/auth.py | 18 +++++++++++------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index de6075d4..04917ff8 100644 --- a/.gitignore +++ b/.gitignore @@ -189,4 +189,3 @@ dev_space/ !tests/**/.env syftbox/assets/icon/* -server_data/ \ No newline at end of file diff --git a/syftbox/client/plugins/sync/manager.py b/syftbox/client/plugins/sync/manager.py index 772714e4..ae5b24bf 100644 --- a/syftbox/client/plugins/sync/manager.py +++ b/syftbox/client/plugins/sync/manager.py @@ -47,7 +47,6 @@ def stop(self, blocking: bool = False) -> None: self.thread.join() def start(self) -> None: - @pyspy() def _start(manager: SyncManager) -> None: while not manager.is_stop_requested: try: @@ -62,7 +61,7 @@ def _start(manager: SyncManager) -> None: logger.error(f"Syncing encountered an error: {e}. Retrying in {manager.sync_interval} seconds.") self.is_stop_requested = False - t = FakeThread(target=_start, args=(self,), daemon=True) + t = Thread(target=_start, args=(self,), daemon=True) t.start() logger.info(f"Sync started, syncing every {self.sync_interval} seconds") self.thread = t diff --git a/syftbox/server/users/auth.py b/syftbox/server/users/auth.py index a8cf5aa8..6770b0a3 100644 --- a/syftbox/server/users/auth.py +++ b/syftbox/server/users/auth.py @@ -30,7 +30,11 @@ def _validate_jwt(server_settings: ServerSettings, token: str) -> dict: headers={"WWW-Authenticate": "Bearer"}, ) except Exception: - return {"email": "aziz@openmined.org"} + raise HTTPException( + status_code=401, + detail="Invalid token", + headers={"WWW-Authenticate": "Bearer"}, + ) def _generate_jwt(server_settings: ServerSettings, data: dict) -> str: @@ -67,12 +71,12 @@ def generate_email_token(server_settings: ServerSettings, email: str) -> str: def validate_access_token(server_settings: ServerSettings, token: str) -> dict: data = _validate_jwt(server_settings, token) - # if data["type"] != ACCESS_TOKEN: - # raise HTTPException( - # status_code=401, - # detail="Invalid token type", - # headers={"WWW-Authenticate": "Bearer"}, - # ) + if data["type"] != ACCESS_TOKEN: + raise HTTPException( + status_code=401, + detail="Invalid token type", + headers={"WWW-Authenticate": "Bearer"}, + ) return data From 558acabf2dfd93f6120c4fcc9d373696dffb0b09 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Mon, 3 Feb 2025 12:42:21 +0100 Subject: [PATCH 03/11] some comments --- syftbox/client/base.py | 4 +++- syftbox/lib/profiling.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/syftbox/client/base.py b/syftbox/client/base.py index 108758aa..bdedd255 100644 --- a/syftbox/client/base.py +++ b/syftbox/client/base.py @@ -106,6 +106,8 @@ def _make_headers(config: SyftClientConfig) -> dict: @classmethod def from_config(cls, config: SyftClientConfig) -> "ClientBase": conn = httpx.Client( - base_url=str(config.server_url), follow_redirects=True, headers=cls._make_headers(config), timeout=10.0 + base_url=str(config.server_url), + follow_redirects=True, + headers=cls._make_headers(config), ) return cls(conn) diff --git a/syftbox/lib/profiling.py b/syftbox/lib/profiling.py index d2d00aae..d7c3bd54 100644 --- a/syftbox/lib/profiling.py +++ b/syftbox/lib/profiling.py @@ -48,10 +48,11 @@ def pyspy() -> Iterator[subprocess.Popen]: print(f"Error: {e}") -# ... existing code ... - - class FakeThread: + """Convenience class for profiling code that should be run in a thread. + Easy to swap with Thread when profiling is not needed and we want to run in the main thread. + """ + def __init__(self, target, args=(), daemon=True): self.target = target self.args = args From a2bbdac2df9c8d33c2b74ab208e376eb88377f96 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Mon, 3 Feb 2025 19:10:17 +0100 Subject: [PATCH 04/11] optimize the get_read_permissions_for_user --- syftbox/client/plugins/sync/manager.py | 1 - syftbox/server/api/v1/sync_router.py | 25 ++----- syftbox/server/db/db.py | 98 ++++++++++++++------------ syftbox/server/db/file_store.py | 3 +- 4 files changed, 62 insertions(+), 65 deletions(-) diff --git a/syftbox/client/plugins/sync/manager.py b/syftbox/client/plugins/sync/manager.py index ae5b24bf..9b4f3576 100644 --- a/syftbox/client/plugins/sync/manager.py +++ b/syftbox/client/plugins/sync/manager.py @@ -12,7 +12,6 @@ from syftbox.client.plugins.sync.producer import SyncProducer from syftbox.client.plugins.sync.queue import SyncQueue, SyncQueueItem from syftbox.client.plugins.sync.types import FileChangeInfo -from syftbox.lib.profiling import FakeThread, pyspy class SyncManager: diff --git a/syftbox/server/api/v1/sync_router.py b/syftbox/server/api/v1/sync_router.py index 798c6e31..4f04a3df 100644 --- a/syftbox/server/api/v1/sync_router.py +++ b/syftbox/server/api/v1/sync_router.py @@ -1,8 +1,7 @@ import base64 -from collections import defaultdict import hashlib import sqlite3 -import traceback +from collections import defaultdict from typing import Iterator, List import msgpack @@ -70,29 +69,17 @@ def get_diff( @router.post("/datasite_states", response_model=dict[str, list[FileMetadata]]) def get_datasite_states( - conn: sqlite3.Connection = Depends(get_db_connection), file_store: FileStore = Depends(get_file_store), - server_settings: ServerSettings = Depends(get_server_settings), email: str = Depends(get_current_user), ) -> dict[str, list[FileMetadata]]: - all_datasites = get_all_datasites(conn) - datasite_states: dict[str, list[FileMetadata]] = {} - # for datasite in all_datasites: - # try: - file_metadata = file_store.list_for_user(None, email) - # except Exception as e: - # logger.error(f"Failed to get dir state for {datasite}: {e} {traceback.format_exc()}") - # continue - # datasite_states[datasite] = datasite_state - - # dict of datasite -> list of files + file_metadata = file_store.list_for_user(email=email) + datasite_states = defaultdict(list) for metadata in file_metadata: - user_email = metadata.path.root + user_email = metadata.path.parts[0] datasite_states[user_email].append(metadata) - datasite_states = dict(datasite_states) - return datasite_states + return dict(datasite_states) @router.post("/dir_state", response_model=list[FileMetadata]) @@ -102,7 +89,7 @@ def dir_state( server_settings: ServerSettings = Depends(get_server_settings), email: str = Depends(get_current_user), ) -> list[FileMetadata]: - return file_store.list_for_user(dir, email) + return file_store.list_for_user(email=email, path=dir) @router.post("/get_metadata", response_model=FileMetadata) diff --git a/syftbox/server/db/db.py b/syftbox/server/db/db.py index 88170457..4b2bb235 100644 --- a/syftbox/server/db/db.py +++ b/syftbox/server/db/db.py @@ -240,62 +240,72 @@ def get_read_permissions_for_user( These bits are combined with a final OR operation. """ + cursor = connection.cursor() - params: tuple = (user, user, user) - like_clause = "" + params: list = [] + path_condition = "" if path_like: if "%" in path_like: raise ValueError("we don't support % in paths") path_like = path_like + "%" escaped_path = path_like.replace("_", "\\_") - like_clause += " WHERE path LIKE ? ESCAPE '\\' " - params = (user, user, user, escaped_path) + path_condition = "AND f.path LIKE ? ESCAPE '\\'" + params.append(escaped_path) query = """ - SELECT path, hash, signature, file_size, last_modified, - ( - SELECT COALESCE( - max( - CASE - WHEN can_read AND NOT disallow THEN rule_prio - ELSE 0 - END - ) > - max( - CASE - WHEN can_read AND disallow THEN rule_prio - ELSE 0 - END - ), 0) - or + -- First get all rules that apply to this user, including wildcards and email matches + + WITH RECURSIVE + user_matching_rules AS ( + SELECT r.*, rf.file_id, rf.match_for_email + FROM rules r + JOIN rule_files rf + ON r.permfile_path = rf.permfile_path + AND r.priority = rf.priority + WHERE r.user = ? -- Direct user match + OR r.user = '*' -- Wildcard match + OR rf.match_for_email = ? -- Email pattern match + ), + + -- Then calculate effective permissions by taking the highest priority rule + -- Higher depth * 1000 + priority means more specific rules take precedence + permission_priorities AS ( + SELECT + file_id, + MAX(CASE WHEN can_read AND NOT disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as allow_priority, + MAX(CASE WHEN can_read AND disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as deny_priority, + MAX(CASE WHEN admin AND NOT disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as admin_allow_priority, + MAX(CASE WHEN admin AND disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as admin_deny_priority + FROM user_matching_rules + GROUP BY file_id + ) + + -- Finally join with files and determine if user has read access + SELECT + f.path, + f.hash, + f.signature, + f.file_size, + f.last_modified, + -- User has access if: + -- 1. They have an allowing rule that overrides any denying rules OR + -- 2. They have admin access that overrides admin denials OR + -- 3. They own the datasite COALESCE( - max( - CASE - WHEN admin AND NOT disallow THEN rule_prio - ELSE 0 - END - ) > - max( - CASE - WHEN admin AND disallow THEN rule_prio - ELSE 0 - END - ), 0) - FROM ( - SELECT can_read, admin, disallow, - row_number() OVER (ORDER BY rules.permfile_depth, rules.priority ASC) AS rule_prio - FROM rule_files - JOIN rules ON rule_files.permfile_path = rules.permfile_path and rule_files.priority = rules.priority - WHERE rule_files.file_id = f.id and (rules.user = ? or rules.user = "*" or rule_files.match_for_email = ?) - ) - ) OR datasite = ? AS read_permission + pp.allow_priority > pp.deny_priority OR + pp.admin_allow_priority > pp.admin_deny_priority, + FALSE + ) OR f.datasite = ? AS read_permission FROM file_metadata f - {} - """.format(like_clause) - res = cursor.execute(query, params) + LEFT JOIN permission_priorities pp ON f.id = pp.file_id + WHERE 1=1 {path_condition} + """.format(path_condition=path_condition) + + # Add parameters in order: 2 user checks + 1 datasite check + optional path + query_params = [user, user, user] + params - return res.fetchall() + return cursor.execute(query, query_params).fetchall() def print_table(connection: sqlite3.Connection, table: str) -> None: diff --git a/syftbox/server/db/file_store.py b/syftbox/server/db/file_store.py index cb21d406..8a22b160 100644 --- a/syftbox/server/db/file_store.py +++ b/syftbox/server/db/file_store.py @@ -193,8 +193,9 @@ def put( def list_for_user( self, + *, email: str, - path: RelativePath = None, + path: Optional[RelativePath] = None, ) -> list[FileMetadata]: with get_db(self.db_path) as conn: return db.get_filemetadata_with_read_access(conn, email, path) From d228a42f64c9d91062a6e6a058f63ef7f185ab42 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 4 Feb 2025 12:29:11 +0100 Subject: [PATCH 05/11] improve comments --- syftbox/server/db/db.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/syftbox/server/db/db.py b/syftbox/server/db/db.py index 4b2bb235..b4767224 100644 --- a/syftbox/server/db/db.py +++ b/syftbox/server/db/db.py @@ -255,8 +255,7 @@ def get_read_permissions_for_user( query = """ -- First get all rules that apply to this user, including wildcards and email matches - - WITH RECURSIVE + WITH user_matching_rules AS ( SELECT r.*, rf.file_id, rf.match_for_email FROM rules r @@ -264,41 +263,48 @@ def get_read_permissions_for_user( ON r.permfile_path = rf.permfile_path AND r.priority = rf.priority WHERE r.user = ? -- Direct user match - OR r.user = '*' -- Wildcard match - OR rf.match_for_email = ? -- Email pattern match + OR r.user = '*' -- Wildcard match + OR rf.match_for_email = ? -- Email pattern match ), -- Then calculate effective permissions by taking the highest priority rule -- Higher depth * 1000 + priority means more specific rules take precedence + -- Caveat: using 1000 means we can't have more than 1000 rules in the same permission file permission_priorities AS ( SELECT file_id, - MAX(CASE WHEN can_read AND NOT disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as allow_priority, - MAX(CASE WHEN can_read AND disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as deny_priority, - MAX(CASE WHEN admin AND NOT disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as admin_allow_priority, - MAX(CASE WHEN admin AND disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as admin_deny_priority + MAX(CASE WHEN can_read AND NOT disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as read_allow_prio, + MAX(CASE WHEN can_read AND disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as read_deny_prio, + MAX(CASE WHEN admin AND NOT disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as admin_allow_prio, + MAX(CASE WHEN admin AND disallow THEN permfile_depth * 1000 + priority ELSE 0 END) as admin_deny_prio FROM user_matching_rules GROUP BY file_id + ), + + final_permissions AS ( + SELECT + file_id, + (read_allow_prio > read_deny_prio) as can_read, + (admin_allow_prio > admin_deny_prio) as is_admin + FROM permission_priorities ) - -- Finally join with files and determine if user has read access + -- User has access if any of the following are true: + -- 1. They have an allowing rule that overrides any denying rules `can_read` + -- 2. They have admin access that overrides admin denials `is_admin` + -- 3. They own the datasite `f.datasite = user` SELECT f.path, f.hash, f.signature, f.file_size, f.last_modified, - -- User has access if: - -- 1. They have an allowing rule that overrides any denying rules OR - -- 2. They have admin access that overrides admin denials OR - -- 3. They own the datasite COALESCE( - pp.allow_priority > pp.deny_priority OR - pp.admin_allow_priority > pp.admin_deny_priority, + can_read OR is_admin, FALSE ) OR f.datasite = ? AS read_permission FROM file_metadata f - LEFT JOIN permission_priorities pp ON f.id = pp.file_id + LEFT JOIN final_permissions fp ON f.id = fp.file_id WHERE 1=1 {path_condition} """.format(path_condition=path_condition) From b5e4d85705f350f6bbe75c2ac7431ea851dadb81 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 4 Feb 2025 12:40:12 +0100 Subject: [PATCH 06/11] better handling of slow server startup --- syftbox/client/cli_setup.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/syftbox/client/cli_setup.py b/syftbox/client/cli_setup.py index 12e1d203..762da9fd 100644 --- a/syftbox/client/cli_setup.py +++ b/syftbox/client/cli_setup.py @@ -4,7 +4,6 @@ import json import shutil -import time from pathlib import Path import httpx @@ -125,7 +124,7 @@ def setup_config_interactive( **SYFTBOX_HEADERS, HEADER_SYFTBOX_USER: conf.email, }, - transport=httpx.HTTPTransport(retries=5), + transport=httpx.HTTPTransport(retries=10), ) if not skip_verify_install: verify_installation(conf, login_client) @@ -171,12 +170,8 @@ def prompt_email() -> str: def verify_installation(conf: SyftClientConfig, client: httpx.Client) -> None: try: - try: - response = client.get("/info?verify_installation=1") - except httpx.ConnectError: - # try one more time, server may be starting (dev mode) - time.sleep(5) - response = client.get("/info?verify_installation=1") + response = client.get("/info?verify_installation=1") + response.raise_for_status() except (httpx.HTTPError, KeyError): From 5f574b89f3481523b670d7d2886ccbfdd6a27e11 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 4 Feb 2025 12:46:52 +0100 Subject: [PATCH 07/11] fix tox --- syftbox/lib/profiling.py | 10 +++++----- tox.ini | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/syftbox/lib/profiling.py b/syftbox/lib/profiling.py index d7c3bd54..3081ddbf 100644 --- a/syftbox/lib/profiling.py +++ b/syftbox/lib/profiling.py @@ -5,7 +5,7 @@ import subprocess # nosec import tempfile import time -from typing import Iterator +from typing import Callable, Iterator @contextlib.contextmanager @@ -53,18 +53,18 @@ class FakeThread: Easy to swap with Thread when profiling is not needed and we want to run in the main thread. """ - def __init__(self, target, args=(), daemon=True): + def __init__(self, target: Callable, args: tuple = (), daemon: bool = True) -> None: self.target = target self.args = args self.daemon = daemon self.is_alive_flag = False - def start(self): + def start(self) -> None: self.is_alive_flag = True self.target(*self.args) - def is_alive(self): + def is_alive(self) -> bool: return self.is_alive_flag - def join(self): + def join(self) -> None: pass diff --git a/tox.ini b/tox.ini index 92775950..ed945c03 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,8 @@ requires = [testenv] runner = uv-venv-lock-runner with_dev = True +allowlist_externals = + pytest commands = python --version setenv = @@ -16,6 +18,8 @@ setenv = [testenv:syft.test.unit] description = Syft Unit Tests +allowlist_externals = + pytest commands = uv --version python --version From 2cc1bf72657cf382e11cc926c5a7cc124015fc49 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 4 Feb 2025 12:51:41 +0100 Subject: [PATCH 08/11] fix tox --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index ed945c03..100f0e14 100644 --- a/tox.ini +++ b/tox.ini @@ -20,6 +20,8 @@ setenv = description = Syft Unit Tests allowlist_externals = pytest +deps = + pytest commands = uv --version python --version From a785b8d832613dd6da148b238c04f6bccb5634a3 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 4 Feb 2025 12:53:41 +0100 Subject: [PATCH 09/11] revert tox changes --- tox.ini | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tox.ini b/tox.ini index 100f0e14..92775950 100644 --- a/tox.ini +++ b/tox.ini @@ -9,8 +9,6 @@ requires = [testenv] runner = uv-venv-lock-runner with_dev = True -allowlist_externals = - pytest commands = python --version setenv = @@ -18,10 +16,6 @@ setenv = [testenv:syft.test.unit] description = Syft Unit Tests -allowlist_externals = - pytest -deps = - pytest commands = uv --version python --version From f510f286ad6174f2ab3c20537fcd4f8cd03a52bd Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 4 Feb 2025 13:00:49 +0100 Subject: [PATCH 10/11] fix CI and tox --- .github/workflows/pr-tests.yaml | 8 ++++---- tox.ini | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-tests.yaml b/.github/workflows/pr-tests.yaml index 03640e94..358716bf 100644 --- a/.github/workflows/pr-tests.yaml +++ b/.github/workflows/pr-tests.yaml @@ -66,13 +66,13 @@ jobs: # explicitly define which python version to use # else we may end up picking system default which is not the same as the actions/setup-python uv tool install tox --with tox-uv --python ${{ matrix.python-version }} - tox --version + uv runtox --version - name: Run unit tests env: TOX_PYTHON: python${{ matrix.python-version }} run: | - tox -e syft.test.unit + uv run tox -e syft.test.unit integration: strategy: @@ -117,10 +117,10 @@ jobs: # explicitly define which python version to use # else we may end up picking system default which is not the same as the actions/setup-python uv tool install tox --with tox-uv --python ${{ matrix.python-version }} - tox --version + uv run tox --version - name: Run Integration tests env: TOX_PYTHON: python${{ matrix.python-version }} run: | - tox -e syft.test.integration + uv run tox -e syft.test.integration diff --git a/tox.ini b/tox.ini index 92775950..4e9fffee 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,8 @@ requires = [testenv] runner = uv-venv-lock-runner with_dev = True +allowlist_externals = + pytest commands = python --version setenv = From 8b8be54f9b2ed58fd6b988e4fcda69203a9a7075 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 4 Feb 2025 13:01:47 +0100 Subject: [PATCH 11/11] typo --- .github/workflows/pr-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-tests.yaml b/.github/workflows/pr-tests.yaml index 358716bf..ebed4cf4 100644 --- a/.github/workflows/pr-tests.yaml +++ b/.github/workflows/pr-tests.yaml @@ -66,7 +66,7 @@ jobs: # explicitly define which python version to use # else we may end up picking system default which is not the same as the actions/setup-python uv tool install tox --with tox-uv --python ${{ matrix.python-version }} - uv runtox --version + uv run tox --version - name: Run unit tests env: