From 3cd1d910022704c97c0f8c66e77a185191c424a2 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:05:58 +0100 Subject: [PATCH 1/6] Add test for user-provided storage in disk usage calculation --- test/unit/data/test_quota.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/unit/data/test_quota.py b/test/unit/data/test_quota.py index 5c34ea7cb056..c662f04c9238 100644 --- a/test/unit/data/test_quota.py +++ b/test/unit/data/test_quota.py @@ -111,6 +111,21 @@ def test_calculate_usage(self): assert u.calculate_disk_usage_default_source(object_store) == 10 + def test_calculate_usage_with_user_provided_storage(self): + u = self.u + + self._add_dataset(10) + # This dataset should not be counted towards the user's disk usage + self._add_dataset(30, object_store_id="user_objects://private/user/storage") + + object_store = MockObjectStore() + assert u.calculate_disk_usage_default_source(object_store) == 10 + assert u.disk_usage is None + u.calculate_and_set_disk_usage(object_store) + assert u.calculate_disk_usage_default_source(object_store) == 10 + + self._refresh_user_and_assert_disk_usage_is(10) + def test_calculate_usage_readjusts_incorrect_quota(self): u = self.u From 49bd07668024afdf71f075a622c8a35d94172274 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:59:36 +0100 Subject: [PATCH 2/6] Exclude user-provided object stores from quota calculation --- lib/galaxy/model/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 38aac43223e2..cdcbe5e3d941 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -644,6 +644,7 @@ def stderr(self, stderr): LEFT OUTER JOIN library_dataset_dataset_association ON dataset.id = library_dataset_dataset_association.dataset_id WHERE dataset.id IN (SELECT dataset_id FROM per_hist_hdas) AND library_dataset_dataset_association.id IS NULL + AND dataset.object_store_id NOT LIKE 'user_objects://%' {and_dataset_condition} """ From c4a6b302a201f44fda5789ab435083024e321f0b Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 16 Dec 2024 17:34:51 +0100 Subject: [PATCH 3/6] Do not track private object sources in quota source mapping Uploading a dataset to a private object store will not be counted against the default quota source now. --- lib/galaxy/objectstore/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 4a4d639bf927..063bf8447d0e 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -1756,12 +1756,16 @@ def __init__(self, source=DEFAULT_QUOTA_SOURCE, enabled=DEFAULT_QUOTA_ENABLED): self.default_quota_source = source self.default_quota_enabled = enabled self.info = QuotaSourceInfo(self.default_quota_source, self.default_quota_enabled) + # Private sources are provided by the user and the quota is not tracked + self.private_source_info = QuotaSourceInfo(label=None, use=False) self.backends = {} self._labels = None - def get_quota_source_info(self, object_store_id): + def get_quota_source_info(self, object_store_id: Optional[str]) -> QuotaSourceInfo: if object_store_id in self.backends: return self.backends[object_store_id].get_quota_source_info(object_store_id) + elif self._is_private_source(object_store_id): + return self.private_source_info else: return self.info @@ -1809,6 +1813,9 @@ def ids_per_quota_source(self, include_default_quota_source=False): quota_sources[quota_source_label].append(object_id) return quota_sources + def _is_private_source(self, object_store_id: Optional[str]) -> bool: + return object_store_id is not None and object_store_id.startswith("user_objects://") + class ObjectCreationProblem(Exception): pass From 50ebdcc54cffb7c06ed3fcc1528b7880929090cf Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:01:25 +0100 Subject: [PATCH 4/6] Rename "private" to "user-defined" sources To avoid confusion with "private" as in "not shareable" object stores. And to make it clear that these are user-defined object stores. --- lib/galaxy/objectstore/__init__.py | 10 +++++----- test/unit/data/test_quota.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 063bf8447d0e..32788e0e9aa3 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -1756,16 +1756,16 @@ def __init__(self, source=DEFAULT_QUOTA_SOURCE, enabled=DEFAULT_QUOTA_ENABLED): self.default_quota_source = source self.default_quota_enabled = enabled self.info = QuotaSourceInfo(self.default_quota_source, self.default_quota_enabled) - # Private sources are provided by the user and the quota is not tracked - self.private_source_info = QuotaSourceInfo(label=None, use=False) + # User defined sources are provided by the user and the quota is not tracked + self.user_defined_source_info = QuotaSourceInfo(label=None, use=False) self.backends = {} self._labels = None def get_quota_source_info(self, object_store_id: Optional[str]) -> QuotaSourceInfo: if object_store_id in self.backends: return self.backends[object_store_id].get_quota_source_info(object_store_id) - elif self._is_private_source(object_store_id): - return self.private_source_info + elif self._is_user_defined_source(object_store_id): + return self.user_defined_source_info else: return self.info @@ -1813,7 +1813,7 @@ def ids_per_quota_source(self, include_default_quota_source=False): quota_sources[quota_source_label].append(object_id) return quota_sources - def _is_private_source(self, object_store_id: Optional[str]) -> bool: + def _is_user_defined_source(self, object_store_id: Optional[str]) -> bool: return object_store_id is not None and object_store_id.startswith("user_objects://") diff --git a/test/unit/data/test_quota.py b/test/unit/data/test_quota.py index c662f04c9238..1da9d611c77d 100644 --- a/test/unit/data/test_quota.py +++ b/test/unit/data/test_quota.py @@ -116,7 +116,7 @@ def test_calculate_usage_with_user_provided_storage(self): self._add_dataset(10) # This dataset should not be counted towards the user's disk usage - self._add_dataset(30, object_store_id="user_objects://private/user/storage") + self._add_dataset(30, object_store_id="user_objects://user/provided/storage") object_store = MockObjectStore() assert u.calculate_disk_usage_default_source(object_store) == 10 From c42245c36f153dce41fe9605faa0dc6c4aa55589 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:17:52 +0100 Subject: [PATCH 5/6] Fix condition to include null object store IDs This fixes the unit tests because when the dataset.object_store_id was null, the condition was also evaluated to null. --- lib/galaxy/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index cdcbe5e3d941..b6f7918c2271 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -644,7 +644,7 @@ def stderr(self, stderr): LEFT OUTER JOIN library_dataset_dataset_association ON dataset.id = library_dataset_dataset_association.dataset_id WHERE dataset.id IN (SELECT dataset_id FROM per_hist_hdas) AND library_dataset_dataset_association.id IS NULL - AND dataset.object_store_id NOT LIKE 'user_objects://%' + AND (dataset.object_store_id NOT LIKE 'user_objects://%' OR dataset.object_store_id IS NULL) {and_dataset_condition} """ From f761a43c14977579a441fe04e63bd5a91a7aa742 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:52:22 +0100 Subject: [PATCH 6/6] Refactor user object store checks to use single source of truth --- lib/galaxy/jobs/__init__.py | 3 ++- lib/galaxy/managers/object_store_instances.py | 3 ++- lib/galaxy/model/__init__.py | 18 +++++++++++++----- lib/galaxy/objectstore/__init__.py | 17 ++++++++++------- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 4ad932893b76..db860ba6c618 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -70,6 +70,7 @@ from galaxy.model.store import copy_dataset_instance_metadata_attributes from galaxy.model.store.discover import MaxDiscoveredFilesExceededError from galaxy.objectstore import ( + is_user_object_store, ObjectStorePopulator, serialize_static_object_store_config, ) @@ -2328,7 +2329,7 @@ def setup_external_metadata( required_user_object_store_uris = set() for out_dataset_instance in out_data.values(): object_store_id = out_dataset_instance.dataset.object_store_id - if object_store_id and object_store_id.startswith("user_objects://"): + if is_user_object_store(object_store_id): required_user_object_store_uris.add(object_store_id) job_metadata = os.path.join(self.tool_working_directory, self.tool.provided_metadata_file) diff --git a/lib/galaxy/managers/object_store_instances.py b/lib/galaxy/managers/object_store_instances.py index b0163c7be249..c7c3a1e9a0b3 100644 --- a/lib/galaxy/managers/object_store_instances.py +++ b/lib/galaxy/managers/object_store_instances.py @@ -31,6 +31,7 @@ build_test_object_store_from_user_config, ConcreteObjectStoreModel, QuotaModel, + USER_OBJECTS_SCHEME, UserObjectStoresAppConfig, ) from galaxy.objectstore.badges import serialize_badges @@ -300,7 +301,7 @@ def _to_model(self, trans, persisted_object_store: UserObjectStore) -> UserConcr ) secrets = persisted_object_store.template_secrets or [] uuid = str(persisted_object_store.uuid) - object_store_id = f"user_objects://{uuid}" + object_store_id = f"{USER_OBJECTS_SCHEME}{uuid}" return UserConcreteObjectStoreModel( uuid=uuid, diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index b6f7918c2271..255cb1fe6b9f 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -162,7 +162,10 @@ ) from galaxy.model.orm.now import now from galaxy.model.orm.util import add_object_to_object_session -from galaxy.objectstore import ObjectStorePopulator +from galaxy.objectstore import ( + ObjectStorePopulator, + USER_OBJECTS_SCHEME, +) from galaxy.objectstore.templates import ( ObjectStoreConfiguration, ObjectStoreTemplate, @@ -644,7 +647,7 @@ def stderr(self, stderr): LEFT OUTER JOIN library_dataset_dataset_association ON dataset.id = library_dataset_dataset_association.dataset_id WHERE dataset.id IN (SELECT dataset_id FROM per_hist_hdas) AND library_dataset_dataset_association.id IS NULL - AND (dataset.object_store_id NOT LIKE 'user_objects://%' OR dataset.object_store_id IS NULL) + AND (dataset.object_store_id NOT LIKE '{user_objects_scheme}%' OR dataset.object_store_id IS NULL) {and_dataset_condition} """ @@ -660,7 +663,9 @@ def calculate_user_disk_usage_statements(user_id, quota_source_map, for_sqlite=F default_usage_dataset_condition = f"{default_cond} {use_or} {exclude_cond}" if default_usage_dataset_condition.strip(): default_usage_dataset_condition = f"AND ( {default_usage_dataset_condition} )" - default_usage = UNIQUE_DATASET_USER_USAGE.format(and_dataset_condition=default_usage_dataset_condition) + default_usage = UNIQUE_DATASET_USER_USAGE.format( + and_dataset_condition=default_usage_dataset_condition, user_objects_scheme=USER_OBJECTS_SCHEME + ) default_usage = f""" UPDATE galaxy_user SET disk_usage = ({default_usage}) WHERE id = :id @@ -674,7 +679,8 @@ def calculate_user_disk_usage_statements(user_id, quota_source_map, for_sqlite=F # the object_store_id to quota_source_label into a temp table of values for quota_source_label, object_store_ids in source.items(): label_usage = UNIQUE_DATASET_USER_USAGE.format( - and_dataset_condition="AND ( dataset.object_store_id IN :include_object_store_ids )" + and_dataset_condition="AND ( dataset.object_store_id IN :include_object_store_ids )", + user_objects_scheme=USER_OBJECTS_SCHEME, ) if for_sqlite: # hacky alternative for older sqlite @@ -1127,7 +1133,9 @@ def calculate_disk_usage_default_source(self, object_store): if exclude_objectstore_ids else "" ) - default_usage = UNIQUE_DATASET_USER_USAGE.format(and_dataset_condition=default_usage_dataset_condition) + default_usage = UNIQUE_DATASET_USER_USAGE.format( + and_dataset_condition=default_usage_dataset_condition, user_objects_scheme=USER_OBJECTS_SCHEME + ) sql_calc = text(default_usage) params = {"id": self.id} bindparams = [bindparam("id")] diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 32788e0e9aa3..d07bb0643209 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -75,9 +75,15 @@ DEFAULT_QUOTA_SOURCE = None # Just track quota right on user object in Galaxy. DEFAULT_QUOTA_ENABLED = True # enable quota tracking in object stores by default DEFAULT_DEVICE_ID = None +USER_OBJECTS_SCHEME = "user_objects://" + log = logging.getLogger(__name__) +def is_user_object_store(object_store_id: Optional[str]) -> bool: + return object_store_id is not None and object_store_id.startswith(USER_OBJECTS_SCHEME) + + class UserObjectStoreResolver(Protocol): def resolve_object_store_uri_config(self, uri: str) -> ObjectStoreConfiguration: pass @@ -1288,7 +1294,7 @@ def _resolve_backend(self, object_store_id: str): try: return self.backends[object_store_id] except KeyError: - if object_store_id.startswith("user_objects://") and self.user_object_store_resolver: + if is_user_object_store(object_store_id) and self.user_object_store_resolver: return self.user_object_store_resolver.resolve_object_store_uri(object_store_id) raise @@ -1326,7 +1332,7 @@ def _merge_device_source_map(clz, device_source_map: "DeviceSourceMap", object_s def __get_store_id_for(self, obj, **kwargs): if obj.object_store_id is not None: - if obj.object_store_id in self.backends or obj.object_store_id.startswith("user_objects://"): + if obj.object_store_id in self.backends or is_user_object_store(obj.object_store_id): return obj.object_store_id else: log.warning( @@ -1365,7 +1371,7 @@ def validate_selected_object_store_id(self, user, object_store_id: Optional[str] if parent_check or object_store_id is None: return parent_check # user selection allowed and object_store_id is not None - if object_store_id.startswith("user_objects://"): + if is_user_object_store(object_store_id): if not user: return "Supplied object store id is not accessible" rest_of_uri = object_store_id.split("://", 1)[1] @@ -1764,7 +1770,7 @@ def __init__(self, source=DEFAULT_QUOTA_SOURCE, enabled=DEFAULT_QUOTA_ENABLED): def get_quota_source_info(self, object_store_id: Optional[str]) -> QuotaSourceInfo: if object_store_id in self.backends: return self.backends[object_store_id].get_quota_source_info(object_store_id) - elif self._is_user_defined_source(object_store_id): + elif is_user_object_store(object_store_id): return self.user_defined_source_info else: return self.info @@ -1813,9 +1819,6 @@ def ids_per_quota_source(self, include_default_quota_source=False): quota_sources[quota_source_label].append(object_id) return quota_sources - def _is_user_defined_source(self, object_store_id: Optional[str]) -> bool: - return object_store_id is not None and object_store_id.startswith("user_objects://") - class ObjectCreationProblem(Exception): pass