From 66148bebe4530d360650ae6ec82a56ec0bb142f7 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Wed, 10 Apr 2024 18:59:04 +1000 Subject: [PATCH] No more non-unique column names, or non-queryable columns in list_columns (#70) * No more non-unique column names, or non-queryable columns in list_columns * Fix order of columns returned by default --- src/genomic_features/ensembl/ensembldb.py | 13 ++++++++++--- tests/test_columns.py | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/genomic_features/ensembl/ensembldb.py b/src/genomic_features/ensembl/ensembldb.py index df04047..82f420d 100644 --- a/src/genomic_features/ensembl/ensembldb.py +++ b/src/genomic_features/ensembl/ensembldb.py @@ -438,12 +438,19 @@ def _get_required_tables(self, tab) -> list: return self._tables_by_degree(tab) def list_columns(self, tables: str | list[str] | None = None) -> list[str]: - """List all columns available in the genomic features table.""" + """List queryable columns available in these tables.""" if tables is None: - tables = self.db.list_tables() # list of table names + tables = self._tables_by_degree() # list of table names + if "metadata" in tables: + tables.remove("metadata") elif isinstance(tables, str): tables = [tables] # list of tables names (only one) - columns = [c for t in tables for c in self.db.table(t).columns] + + columns = [] + for t in tables: + for c in self.db.table(t).columns: + if c not in columns: + columns.append(c) return columns def _clean_columns(self, columns: list[str]) -> list[str]: diff --git a/tests/test_columns.py b/tests/test_columns.py index 29bb2e4..49ea628 100644 --- a/tests/test_columns.py +++ b/tests/test_columns.py @@ -125,3 +125,20 @@ def test_chromosome_columns(hsapiens108): .reset_index(drop=True) ) pd.testing.assert_series_equal(result["seq_length"], expected_lengths) + + +def test_list_columns_uniqueness(hsapiens108): + # https://github.com/scverse/genomic-features/issues/42 + cols = hsapiens108.list_columns() + assert len(cols) == len(set(cols)) + + cols = hsapiens108.list_columns(["gene", "tx"]) + assert len(cols) == len(set(cols)) + + +def test_list_columns_include_unqueryable_cols(hsapiens108): + # https://github.com/scverse/genomic-features/issues/42 + cols = hsapiens108.list_columns() + # From metadata + assert "value" not in cols + assert "name" not in cols