scverse · ivirshup · Apr 10, 2024 · Apr 8, 2024 · Apr 9, 2024 · Apr 10, 2024
diff --git a/src/genomic_features/ensembl/ensembldb.py b/src/genomic_features/ensembl/ensembldb.py
@@ -19,9 +19,7 @@
 
 PKG_CACHE_DIR = "genomic-features"
 
-BIOC_ANNOTATION_HUB_URL = (
-    "https://bioconductorhubs.blob.core.windows.net/annotationhub"
-)
+BIOC_ANNOTATION_HUB_URL = "https://bioconductorhubs.blob.core.windows.net/annotationhub"
 ANNOTATION_HUB_URL = (
     "https://annotationhub.bioconductor.org/metadata/annotationhub.sqlite3"
 )
@@ -53,7 +51,7 @@ def annotation(
     """
     try:
         sqlite_file_path = retrieve_annotation(
-            f'{BIOC_ANNOTATION_HUB_URL}/AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite'
+            f"{BIOC_ANNOTATION_HUB_URL}/AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite"
         )
 
         if backend == "sqlite":
@@ -440,12 +438,19 @@ def _get_required_tables(self, tab) -> list:
         return self._tables_by_degree(tab)
 
     def list_columns(self, tables: str | list[str] | None = None) -> list[str]:
-        """List all columns available in the genomic features table."""
+        """List queryable columns available in these tables."""
         if tables is None:
             tables = self.db.list_tables()  # list of table names
+            if "metadata" in tables:
+                tables.remove("metadata")
         elif isinstance(tables, str):
             tables = [tables]  # list of tables names (only one)
-        columns = [c for t in tables for c in self.db.table(t).columns]
+
+        columns = []
+        for t in tables:
+            for c in self.db.table(t).columns:
+                if c not in columns:
+                    columns.append(c)
         return columns
 
     def _clean_columns(self, columns: list[str]) -> list[str]:

diff --git a/tests/test_columns.py b/tests/test_columns.py
@@ -125,3 +125,20 @@ def test_chromosome_columns(hsapiens108):
         .reset_index(drop=True)
     )
     pd.testing.assert_series_equal(result["seq_length"], expected_lengths)
+
+
+def test_list_columns_uniqueness(hsapiens108):
+    # https://github.com/scverse/genomic-features/issues/42
+    cols = hsapiens108.list_columns()
+    assert len(cols) == len(set(cols))
+
+    cols = hsapiens108.list_columns(["gene", "tx"])
+    assert len(cols) == len(set(cols))
+
+
+def test_list_columns_include_unqueryable_cols(hsapiens108):
+    # https://github.com/scverse/genomic-features/issues/42
+    cols = hsapiens108.list_columns()
+    # From metadata
+    assert "value" not in cols
+    assert "name" not in cols