From eb79ed70a8fbac41d7f90aaba09b2112d2ad30d9 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Wed, 11 Oct 2023 16:20:55 +0200 Subject: [PATCH 01/23] FEAT: Add a query_tap method to SimbadClass SimbadClass.query_tap wraps the pyvo.dal.TAPService.run_async method. This commit also add the SimbadClass.simbad_mirrors and SimbadClass.tap attributes. --- astroquery/simbad/core.py | 120 +++++++++++++++++- astroquery/simbad/tests/test_simbad.py | 22 ++++ astroquery/simbad/tests/test_simbad_remote.py | 19 +++ 3 files changed, 159 insertions(+), 2 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index f027b7d611..c3b9e3f3d8 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -17,9 +17,11 @@ from astropy.table import Table import astropy.io.votable as votable -from astroquery.query import BaseQuery +from astroquery.query import BaseQuery, BaseVOQuery from astroquery.utils import commons, async_to_sync from astroquery.exceptions import TableParseError, LargeQueryWarning, BlankResponseWarning + +from pyvo.dal import TAPService from . import conf @@ -260,7 +262,7 @@ def _request(self, *args, **kwargs): @async_to_sync -class SimbadClass(SimbadBaseQuery): +class SimbadClass(SimbadBaseQuery, BaseVOQuery): """ The class for querying the Simbad web service. @@ -300,6 +302,41 @@ class SimbadClass(SimbadBaseQuery): def __init__(self): super().__init__() self._VOTABLE_FIELDS = self._VOTABLE_FIELDS.copy() + self._mirror = conf.server + self._tap = None + + @property + def simbad_mirrors(self): + """Set of the two Simbad mirrors domains.""" + return {'simbad.cds.unistra.fr', 'simbad.harvard.edu'} + + @property + def mirror(self): + """The Simbad mirror to use.""" + return self._mirror + + @mirror.setter + def mirror(self, server: str): + f"""Allows to switch server between Simbad mirrors. + + Parameters + ---------- + server : str + It should be one of {self.simbad_mirrors}. + """ + if server in self.simbad_mirrors: + self._mirror = server + else: + raise ValueError(f"'{server}' does not correspond to a Simbad mirror, " + f"the two existing ones are {self.simbad_mirrors}.") + + @property + def tap(self): + """A ``~pyvo.dal.tap.TAPService`` service for Simbad.""" + tap_url = "https://" + self.mirror + "/simbad/sim-tap" + if (not self._tap) or (self._tap.baseurl != tap_url): + self._tap = TAPService(baseurl=tap_url, session=self._session) + return self._tap def list_wildcards(self): """ @@ -936,6 +973,85 @@ def query_objectids_async(self, object_name, *, cache=True, return response + def query_tap(self, query: str, maxrec=10000, uploads=None): + """Query Simbad TAP service. + + Parameters + ---------- + query : str + A string containing the query written in the + Astronomical Data Query Language (ADQL). + maxrec : int, optional + The number of records to be returned. Its maximum value is 2000000. + uploads : dict, optional + A dictionary of local tables to be used in the query. It should be + constructed as ``{"table_name": table}``.``table`` can be an + ``~astropy.table.table.Table``, an ``~astropy.io.votable.tree.VOTableFile`` + or a ``~pyvo.dal.DALResults`` object. In the ``query``, these tables are referred + as ``TAP_UPLOAD.table_name`` where ``TAP_UPLOAD`` is imposed and ``table_name`` + is the key of the ``uploads`` dictionary. The maximum number on lines for the + uploaded tables is 200000. + + Returns + ------- + `~pyvo.dal.TAPResults` + The response returned by Simbad. + It can be converted to astropy objects. To get an + `~astropy.io.votable.tree.Table` use `~pyvo.dal.TAPResults.votable``, + and to get an `~astropy.table.table.Table` use `~pyvo.dal.TAPResults.to_table()`. + + Notes + ----- + A TAP (Table Access Protocol) service allows to query data tables with + queries written in ADQL (Astronomical Data Query Language), a flavor + of the more general SQL (Structured Query Language). + For more documentation about writing ADQL queries, you can read its official + documentation (`ADQL documentation `) + or the `Simbad ADQL cheat sheet `_. + See also: a `graphic representation of Simbad's tables and their relations + `_. + + Examples + -------- + To see the five oldest papers referenced in Simbad + >>> from astroquery.simbad import Simbad + >>> Simbad.query_tap("SELECT top 5 bibcode, title, nbobject" + ... "FROM ref ORDER BY bibcode") # doctest: +REMOTE_DATA + + bibcode title nbobject + object object int32 + ------------------- ----------------------------------------------------------------------- -------- + 1850CDT..1784....0A ??? 2 + 1850CDT..1784..227M Catalogue des nebuleuses et des amas d'etoiles. 111 + 1857AN.....45...89S Ueber veranderliche Sterne. 1 + 1861MNRAS..21...68B On the three new variable stars, T Bootis, T Serpentis, and S Delphini. 3 + 1874MNRAS..34...75S Nebulae discovered and observed at the observatory of Marseille. 1 + + Get the type of an object + >>> from astroquery.simbad import Simbad + >>> Simbad.query_tap("SELECT main_id, otype FROM basic WHERE main_id = 'm10'") # doctest: +REMOTE_DATA +
+ main_id otype + object object + ------- ------ + M 10 GlC + + Upload a table to use in a query + >>> from astroquery.simbad import Simbad + >>> from astropy.table import Table + >>> objects_table = Table([["M101", "NGC1343", "Abell1656"]], names=["objectname"]) + >>> Simbad.query_tap("SELECT * from TAP_UPLOAD.objects_table WHERE objectname = 'M101'", + ... uploads={"objects_table": objects_table}) # doctest: +REMOTE_DATA +
+ objectname + object + ---------- + M101 + """ + if maxrec > self.tap.hardlimit: + raise ValueError(f"The maximum number of records cannot exceed {self.tap.hardlimit}.") + return self.tap.run_async(query, maxrec=maxrec, uploads=uploads) + def _get_query_header(self, get_raw=False): # if get_raw is set then don't fetch as votable if get_raw: diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index d188119703..e3c04ad101 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -74,6 +74,28 @@ class last_query: return response +def test_simbad_mirror(): + simbad_instance = simbad.SimbadClass() + # default value should be set at instantiation + assert simbad_instance.mirror == "simbad.cds.unistra.fr" + # it can be switched to harvard mirror + simbad_instance.mirror = "simbad.harvard.edu" + assert simbad_instance.mirror == "simbad.harvard.edu" + with pytest.raises(ValueError, + match="'test' does not correspond to a Simbad mirror, *"): + # but not to an invalid mirror + simbad_instance.mirror = "test" + + +def test_simbad_create_tap_service(): + simbad_instance = simbad.Simbad() + # newly created should have no tap service + assert simbad_instance._tap is None + # then we create it + simbadtap = simbad_instance.tap + assert 'simbad/sim-tap' in simbadtap.baseurl + + @pytest.mark.parametrize(('radius', 'expected_radius'), [('5d0m0s', '5.0d'), ('5d', '5.0d'), diff --git a/astroquery/simbad/tests/test_simbad_remote.py b/astroquery/simbad/tests/test_simbad_remote.py index d4801612f7..3c18fd16dc 100644 --- a/astroquery/simbad/tests/test_simbad_remote.py +++ b/astroquery/simbad/tests/test_simbad_remote.py @@ -11,6 +11,7 @@ # Maybe we need to expose SimbadVOTableResult to be in the public API? from astroquery.simbad.core import SimbadVOTableResult from astroquery.exceptions import BlankResponseWarning +from pyvo.dal import DALOverflowWarning # M42 coordinates @@ -252,3 +253,21 @@ def test_multi_vo_fields(self): assert ("ID_1" in response.keys()) assert ("ID_2mass" in response.keys()) assert ("ID_s" in response.keys()) + + def test_query_tap(self): + # a robust query about something that should not change in Simbad + filtername = Simbad.query_tap("select filtername from filter where filtername='B'") + assert 'B' == filtername.getvalue("filtername", 0) + # test uploads by joining two local tables + table_letters = Table([["a", "b", "c"]], names=["letters"]) + table_numbers = Table([[1, 2, 3], ["a", "b", "c"]], names=["numbers", "letters"]) + result = Simbad.query_tap("SELECT * FROM TAP_UPLOAD.numbers " + "JOIN TAP_UPLOAD.letters USING(letters)", + uploads={"numbers": table_numbers, "letters": table_letters}) + expect = ("
\nletters numbers\n object int64 \n------- -------" + "\n a 1\n b 2\n c 3") + assert expect in str(result) + # test of maxrec + with pytest.raises(DALOverflowWarning, match="Partial result set *"): + truncated_result = Simbad.query_tap("SELECT * from basic", maxrec=2) + assert len(truncated_result) == 2 From e6580a8fe2eb342a4ab41201d51744d9871630a8 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 12 Oct 2023 11:51:43 +0200 Subject: [PATCH 02/23] docs: reorganize sections in simbad docs queries are now in three sections: - objects-related queries - bibliography-related queries - mixed type queries --- docs/simbad/simbad.rst | 155 +++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 77 deletions(-) diff --git a/docs/simbad/simbad.rst b/docs/simbad/simbad.rst index 50d74de9c7..ec287aac65 100644 --- a/docs/simbad/simbad.rst +++ b/docs/simbad/simbad.rst @@ -16,15 +16,17 @@ formulated. If successful all the queries will return the results in a A warning about big queries --------------------------- -The SIMBAD database has limited querying capacity. If you spam the server with -queries, you may be temporary blacklisted. The rate limit may vary, but you -should not submit more than ~5-10 queries per second. -If you want to perform large queries, we suggest using vectorized queries -when possible. You can pass `~astroquery.simbad.SimbadClass.query_region` +The SIMBAD database is largely used and had to limit the rate of incoming queries. +If you spam the server with more that ~5-10 queries per second you will be +blacklisted for a week. If it happens to you, you can use the section about +:ref:`vectorized queries ` below. You can pass +`~astroquery.simbad.SimbadClass.query_region` a vector of coordinates or `~astroquery.simbad.SimbadClass.query_objects` a list of object names, and SIMBAD will treat this submission as a single -query. See :ref:`vectorized queries ` below. +query. +To get out of the blacklist, send an `email to the Simbad +database `__. Different ways to access Simbad ------------------------------- @@ -35,16 +37,19 @@ internally creates a `script query is also how the `Simbad web interface `__ operates. -Simbad provides another way to access its database via `TAP -Service `__. This may be better -suited for some usage. If you need a python client for TAP Services, -check out `pyvo `__. +A more versatile option is to query SIMBAD directly via Table Access Protocol +(TAP) with the `~astroquery.simbad.SimbadClass.query_tap` method. -Query an Identifier -------------------- +Query modes +=========== +Objects queries +--------------- + +Query by an Identifier +^^^^^^^^^^^^^^^^^^^^^^ -This is useful if you want to query a known identifier. For instance to query +This is useful if you want to query a known identifier (name). For instance to query the messier object M1: .. code-block:: python @@ -96,14 +101,62 @@ their functions: [abc] : Exactly one character taken in the list. Can also be defined by a range of characters: [A-Z] -Query a region --------------- +Query to get all names (identifiers) for an object +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +These queries can be used to retrieve all of the names (identifiers) +associated with an object. + +.. code-block:: python + + >>> from astroquery.simbad import Simbad + >>> result_table = Simbad.query_objectids("Polaris") + >>> print(result_table) + ID + ----------------------- + NAME Polaris + NAME North Star + NAME Lodestar + PLX 299 + SBC9 76 + * 1 UMi + * alf UMi + AAVSO 0122+88 + ADS 1477 A + AG+89 4 + BD+88 8 + CCDM J02319+8915A + CSI+88 8 1 + FK5 907 + GC 2243 + GCRV 1037 + ... + PPM 431 + ROT 3491 + SAO 308 + SBC7 51 + SKY# 3738 + TD1 835 + TYC 4628-237-1 + UBV 21589 + UBV M 8201 + V* alf UMi + PLX 299.00 + WDS J02318+8916Aa,Ab + ADS 1477 AP + ** WRH 39 + WDS J02318+8916A + ** STF 93A + 2MASS J02314822+8915503 + + +Query a region +^^^^^^^^^^^^^^ Queries that support a cone search with a specified radius - around an identifier or given coordinates are also supported. If an identifier is used -then it will be resolved to coordinates using online name resolving services -available in Astropy. +then it will be resolved to coordinates using the `Sesame name resolver +`__. .. code-block:: python @@ -238,7 +291,7 @@ to 2000.0. So here is a query with all the options utilized: Query a catalogue ------------------ +^^^^^^^^^^^^^^^^^ Queries can also be formulated to return all the objects from a catalogue. For instance to query the ESO catalog: @@ -260,10 +313,11 @@ instance to query the ESO catalog: ESO 1-4 07 49 28.813 ... I 2006AJ....131.1163S ESO 1-5 08 53 05.006 ... I 2006AJ....131.1163S +Bibliographic queries +--------------------- Query a bibcode ---------------- - +^^^^^^^^^^^^^^^^ This retrieves the reference corresponding to a bibcode. @@ -329,59 +383,8 @@ from a given journal in a given year: Aluminium oxide in the optical spectrum of VY Canis Majoris. Files: (abstract) -Query object identifiers ------------------------- - - -These queries can be used to retrieve all of the names (identifiers) -associated with an object. - -.. code-block:: python - - >>> from astroquery.simbad import Simbad - >>> result_table = Simbad.query_objectids("Polaris") - >>> print(result_table) - ID - ----------------------- - NAME Polaris - NAME North Star - NAME Lodestar - PLX 299 - SBC9 76 - * 1 UMi - * alf UMi - AAVSO 0122+88 - ADS 1477 A - AG+89 4 - BD+88 8 - CCDM J02319+8915A - CSI+88 8 1 - FK5 907 - GC 2243 - GCRV 1037 - ... - PPM 431 - ROT 3491 - SAO 308 - SBC7 51 - SKY# 3738 - TD1 835 - TYC 4628-237-1 - UBV 21589 - UBV M 8201 - V* alf UMi - PLX 299.00 - WDS J02318+8916Aa,Ab - ADS 1477 AP - ** WRH 39 - WDS J02318+8916A - ** STF 93A - 2MASS J02314822+8915503 - - Query a bibobj --------------- - +^^^^^^^^^^^^^^ These queries can be used to retrieve all the objects that are contained in the article specified by the bibcode: @@ -408,7 +411,6 @@ article specified by the bibcode: Query based on any criteria ---------------------------- - Anything done in SIMBAD's `criteria interface`_ can be done via astroquery. See that link for details of how these queries are formed. @@ -440,7 +442,7 @@ See that link for details of how these queries are formed. Object type criteria --------------------- +^^^^^^^^^^^^^^^^^^^^ SIMBAD sets a ``maintype`` for each astronomical object that is related to the real type classification. Other object types (``otypes``) are given, which are related to some types coming from some surveys/observations. Depending on your needs, ``maintype`` or ``otype`` fields can be used. To use all subcategories of an object type, ``maintypes`` or ``otypes`` fields can also be used. @@ -475,7 +477,7 @@ See the dedicated SIMBAD `documentation on object types ` above, but we emphasize here that it works for all queries. From ab7c7d279e46d3b96791dec456657c11f55eabf5 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 12 Oct 2023 14:27:49 +0200 Subject: [PATCH 03/23] docs: add query TAP section --- astroquery/simbad/core.py | 35 ++++++++++++++++++++--------------- docs/simbad/query_tap.rst | 35 +++++++++++++++++++++++++++++++++++ docs/simbad/simbad.rst | 10 +++++++++- 3 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 docs/simbad/query_tap.rst diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index c3b9e3f3d8..ede58c1efb 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -974,31 +974,32 @@ def query_objectids_async(self, object_name, *, cache=True, return response def query_tap(self, query: str, maxrec=10000, uploads=None): - """Query Simbad TAP service. + """ + Query Simbad TAP service. Parameters ---------- query : str A string containing the query written in the Astronomical Data Query Language (ADQL). - maxrec : int, optional + maxrec : int, default: 10000 The number of records to be returned. Its maximum value is 2000000. - uploads : dict, optional - A dictionary of local tables to be used in the query. It should be - constructed as ``{"table_name": table}``.``table`` can be an - ``~astropy.table.table.Table``, an ``~astropy.io.votable.tree.VOTableFile`` - or a ``~pyvo.dal.DALResults`` object. In the ``query``, these tables are referred - as ``TAP_UPLOAD.table_name`` where ``TAP_UPLOAD`` is imposed and ``table_name`` - is the key of the ``uploads`` dictionary. The maximum number on lines for the + uploads : dict, default: None + A dictionary of local tables to be used in the *query*. It should be + constructed as *{"table_name": table}*.*table* can be an + `~astropy.table.table.Table`, an `~astropy.io.votable.tree.VOTableFile` + or a `~pyvo.dal.DALResults` object. In the *query*, these tables are referred + as *TAP_UPLOAD.table_name* where *TAP_UPLOAD* is imposed and *table_name* + is the key of the *uploads* dictionary. The maximum number on lines for the uploaded tables is 200000. Returns ------- `~pyvo.dal.TAPResults` The response returned by Simbad. - It can be converted to astropy objects. To get an - `~astropy.io.votable.tree.Table` use `~pyvo.dal.TAPResults.votable``, - and to get an `~astropy.table.table.Table` use `~pyvo.dal.TAPResults.to_table()`. + It can be converted to astropy objects. To get a + `~astropy.io.votable.tree.Table` use *.votable*, + and to get a `~astropy.table.table.Table` use *.to_table()*. Notes ----- @@ -1006,14 +1007,16 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): queries written in ADQL (Astronomical Data Query Language), a flavor of the more general SQL (Structured Query Language). For more documentation about writing ADQL queries, you can read its official - documentation (`ADQL documentation `) - or the `Simbad ADQL cheat sheet `_. + documentation (`ADQL documentation `__) + or the `Simbad ADQL cheat sheet `__. See also: a `graphic representation of Simbad's tables and their relations - `_. + `__. Examples -------- + To see the five oldest papers referenced in Simbad + >>> from astroquery.simbad import Simbad >>> Simbad.query_tap("SELECT top 5 bibcode, title, nbobject" ... "FROM ref ORDER BY bibcode") # doctest: +REMOTE_DATA @@ -1028,6 +1031,7 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): 1874MNRAS..34...75S Nebulae discovered and observed at the observatory of Marseille. 1 Get the type of an object + >>> from astroquery.simbad import Simbad >>> Simbad.query_tap("SELECT main_id, otype FROM basic WHERE main_id = 'm10'") # doctest: +REMOTE_DATA
@@ -1037,6 +1041,7 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): M 10 GlC Upload a table to use in a query + >>> from astroquery.simbad import Simbad >>> from astropy.table import Table >>> objects_table = Table([["M101", "NGC1343", "Abell1656"]], names=["objectname"]) diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst new file mode 100644 index 0000000000..22a2c6c146 --- /dev/null +++ b/docs/simbad/query_tap.rst @@ -0,0 +1,35 @@ +`~astroquery.simbad.SimbadClass.query_tap` is the one query to rule them +all. It allows to interrogate SIMBAD with the Astronomical Data Query Language +(ADQL) and allows a direct access to all the tables that SIMBAD contains. + +.. glossary:: + + ADQL + Astronomical Data Query Language. This is a flavor of the Structured + Query Language (SQL) adapted to astronomy. To learn more about this + language, see the `ADQL documentation `__ + or the `Simbad ADQL cheat sheet `__. + + relational database + a database made of a collection of tables with connections between them. + To link tables, use the JOIN directives of the ADQL. See + `graphic representation of Simbad's tables and their relations + `__. + + TAP + Table Access Protocol. This describes the way tables can be accessed with + the ADQL. + +Available tables +^^^^^^^^^^^^^^^^ + +Available columns +^^^^^^^^^^^^^^^^^ + +Query TAP +^^^^^^^^^ + + + + + diff --git a/docs/simbad/simbad.rst b/docs/simbad/simbad.rst index ec287aac65..17d011c196 100644 --- a/docs/simbad/simbad.rst +++ b/docs/simbad/simbad.rst @@ -409,7 +409,15 @@ article specified by the bibcode: Query based on any criteria ----------------------------- +--------------------------- + +Query TAP +^^^^^^^^^ + +.. include:: query_tap.rst + +Query criteria +^^^^^^^^^^^^^^ Anything done in SIMBAD's `criteria interface`_ can be done via astroquery. See that link for details of how these queries are formed. From f003c5da91cbfc9113de8eddd28168fb07484182 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 19 Oct 2023 14:26:52 +0200 Subject: [PATCH 04/23] feat: add helper functions to explore SIMBAD tables this commit also adds two private methods useful when building queries from users input: _adql_parameter and _adql_name. --- astroquery/simbad/core.py | 238 ++++++++++++++++-- astroquery/simbad/tests/test_simbad.py | 34 +++ astroquery/simbad/tests/test_simbad_remote.py | 33 ++- 3 files changed, 276 insertions(+), 29 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index ede58c1efb..f621253d34 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -92,6 +92,39 @@ def strip_field(field, keep_filters=False): return field +def _adql_parameter(entry: str): + """Replace single quotes by two single quotes. + + This should be applied to parameters used in ADQL queries. + It is not a SQL injection protection: it just allows to search, for example, + for authors with quotes in their names or titles/descriptions with apostrophes. + + Parameters + ---------- + entry : str + + Returns + ------- + str + """ + return entry.replace("'", "''") + + +def _adql_name(name: str): + """Prepare a string to be used as a column or table name. + + It prepends and appends a double quote to the elements of the name. + This allows to escape ADQL reserved vocabulary. It then applies the + SIMBAD-specific (not in ADQL) `lowercase` function. + + Parameters + ---------- + name : str + The column name. + """ + return f'''lowercase("{'.'.join([f'"{element}"' for element in name.split(".")])}")''' + + error_regex = re.compile(r'(?ms)\[(?P\d+)\]\s?(?P.+?)(\[|\Z)') SimbadError = namedtuple('SimbadError', ('line', 'msg')) VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'micro', 'patch')) @@ -973,6 +1006,147 @@ def query_objectids_async(self, object_name, *, cache=True, return response + def tables(self, get_adql=False): + """The names and descriptions of the tables in SIMBAD. + + Parameters + ---------- + get_adql : bool, optional + Returns the ADQL string instead of querying SIMBAD. + + Returns + ------- + `~astropy.table.table.Table` + """ + query = ("SELECT table_name, description" + " FROM TAP_SCHEMA.tables" + " WHERE schema_name = 'public'") + if get_adql: + return query + return self.query_tap(query) + + def columns(self, *tables: str, get_adql=False): + """ + Get the list of SIMBAD columns. + + Add tables names to restrict to some tables. Call the function without + any parameter to get all columns names. + + Parameters + ---------- + tables : str, optional + Add tables names as strings to restrict to these tables columns. + get_adql : bool, optional + Returns the ADQL string instead of querying SIMBAD. + + Examples + -------- + >>> from astroquery.simbad import Simbad + >>> Simbad.columns("ids", "ident") # doctest: +REMOTE_DATA +
+ table_name column_name datatype ... unit ucd + object object object ... object object + ---------- ----------- -------- ... ------ ------- + ident id VARCHAR ... meta.id + ident oidref BIGINT ... + ids ids CLOB ... meta.id + ids oidref BIGINT ... + """ + query = ("SELECT table_name, column_name, datatype, description, unit, ucd" + " FROM TAP_SCHEMA.columns" + " WHERE table_name NOT LIKE 'TAP_SCHEMA.%'") + if len(tables) == 1: + query += f" AND table_name = '{tables[0]}'" + elif len(tables) > 1: + query += f" AND table_name IN ({str(tables)[1:-1]})" + query += " ORDER BY table_name, principal DESC, column_name" + if get_adql: + return query + return self.query_tap(query) + + def find_columns_by_keyword(self, keyword: str, get_adql=False): + """ + Find columns by keyword. + + This looks for columns in all Simbad tables that contain the + given keyword. The search is not case-sensitive. + + Parameters + ---------- + keyword : str + A keyword to look for in column names, table names, or descriptions. + get_adql : bool, optional + Returns the ADQL string instead of querying SIMBAD. + + Returns + ------- + `~astropy.table.table.Table` + + Examples + -------- + >>> from astroquery.simbad import Simbad + >>> Simbad.find_columns_by_keyword("filter") # doctest: +REMOTE_DATA +
+ table_name column_name datatype ... unit ucd + object object object ... object object + ----------- ----------- ----------- ... ------ ---------------------- + filter description UNICODECHAR ... meta.note;instr.filter + filter filtername VARCHAR ... instr.filter + filter unit VARCHAR ... meta.unit + flux filter VARCHAR ... instr.filter + mesDiameter filter CHAR ... instr.filter + """ + condition = f"LIKE LOWERCASE('%{_adql_parameter(keyword)}%')" + query = ("SELECT table_name, column_name, datatype, description, unit, ucd" + " FROM TAP_SCHEMA.columns" + f" WHERE (LOWERCASE(column_name) {condition})" + f" OR (LOWERCASE(description) {condition})" + f" OR (LOWERCASE(table_name) {condition})" + " ORDER BY table_name, principal DESC, column_name") + if get_adql: + return query + return self.query_tap(query) + + def find_linked_tables(self, table: str, get_adql=False): + """ + Expose the tables that can be non-obviously linked with the given table. + + This is not exhaustive, this list contains only the links where the column names + are not the same in the two tables. For example every ``oidref`` column of any + table can be joined with any other ``oidref``. The same goes for every ``otype`` + column even if this is not returned by this method. + + Parameters + ---------- + table : str + One of SIMBAD's tables name + get_adql : bool, optional + Returns the ADQL string instead of querying SIMBAD. + + Returns + ------- + `~astropy.table.table.Table` + The information necessary to join the given table to an other. + + Examples + -------- + >>> from astroquery.simbad import Simbad + >>> Simbad.find_linked_tables("otypes") # doctest: +REMOTE_DATA +
+ from_table from_column target_table target_column + object object object object + ---------- ----------- ------------ ------------- + otypedef otype otypes otype + otypes oidref basic oid + """ + query = ("SELECT from_table, from_column, target_table, target_column" + " FROM TAP_SCHEMA.key_columns JOIN TAP_SCHEMA.keys USING (key_id)" + f" WHERE (from_table = '{_adql_parameter(table)}')" + f" OR (target_table = '{_adql_parameter(table)}')") + if get_adql: + return query + return self.query_tap(query) + def query_tap(self, query: str, maxrec=10000, uploads=None): """ Query Simbad TAP service. @@ -986,10 +1160,10 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): The number of records to be returned. Its maximum value is 2000000. uploads : dict, default: None A dictionary of local tables to be used in the *query*. It should be - constructed as *{"table_name": table}*.*table* can be an + constructed as *{"table_alias": table}*.*table* can be an `~astropy.table.table.Table`, an `~astropy.io.votable.tree.VOTableFile` or a `~pyvo.dal.DALResults` object. In the *query*, these tables are referred - as *TAP_UPLOAD.table_name* where *TAP_UPLOAD* is imposed and *table_name* + as *TAP_UPLOAD.table_alias* where *TAP_UPLOAD* is imposed and *table_alias* is the key of the *uploads* dictionary. The maximum number on lines for the uploaded tables is 200000. @@ -1012,50 +1186,66 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): See also: a `graphic representation of Simbad's tables and their relations `__. + See also + -------- + Helper functions to build queries. + tables : The list of SIMBAD's tables. + columns : SIMBAD's columns, can be restricted to some tables. + find_columns_by_keyword : Find columns matching a keyword. + find_linked_tables : Given a table, expose non-obvious possible joins with other tables. + Examples -------- To see the five oldest papers referenced in Simbad - + >>> from astroquery.simbad import Simbad - >>> Simbad.query_tap("SELECT top 5 bibcode, title, nbobject" + >>> Simbad.query_tap("SELECT top 5 bibcode, title " ... "FROM ref ORDER BY bibcode") # doctest: +REMOTE_DATA
- bibcode title nbobject - object object int32 - ------------------- ----------------------------------------------------------------------- -------- - 1850CDT..1784....0A ??? 2 - 1850CDT..1784..227M Catalogue des nebuleuses et des amas d'etoiles. 111 - 1857AN.....45...89S Ueber veranderliche Sterne. 1 - 1861MNRAS..21...68B On the three new variable stars, T Bootis, T Serpentis, and S Delphini. 3 - 1874MNRAS..34...75S Nebulae discovered and observed at the observatory of Marseille. 1 + bibcode ... + object ... + ------------------- ... + 1850CDT..1784..227M ... + 1857AN.....45...89S ... + 1861MNRAS..21...68B ... + 1874MNRAS..34...75S ... + 1877AN.....89...13W ... - Get the type of an object + Get the type for a list of objects >>> from astroquery.simbad import Simbad - >>> Simbad.query_tap("SELECT main_id, otype FROM basic WHERE main_id = 'm10'") # doctest: +REMOTE_DATA -
+ >>> Simbad.query_tap("SELECT main_id, otype" + ... " FROM basic WHERE main_id IN ('m10', 'm13')") # doctest: +REMOTE_DATA +
main_id otype object object ------- ------ M 10 GlC + M 13 GlC Upload a table to use in a query >>> from astroquery.simbad import Simbad >>> from astropy.table import Table - >>> objects_table = Table([["M101", "NGC1343", "Abell1656"]], names=["objectname"]) - >>> Simbad.query_tap("SELECT * from TAP_UPLOAD.objects_table WHERE objectname = 'M101'", - ... uploads={"objects_table": objects_table}) # doctest: +REMOTE_DATA -
- objectname - object - ---------- - M101 + >>> objects_table = Table([["a", "b", "c"]], names=["column_name"]) + >>> Simbad.query_tap("SELECT TAP_UPLOAD.my_table_name.* from TAP_UPLOAD.my_table_name", + ... uploads={"my_table_name": objects_table}) # doctest: +REMOTE_DATA +
+ column_name + object + ----------- + a + b + c """ if maxrec > self.tap.hardlimit: raise ValueError(f"The maximum number of records cannot exceed {self.tap.hardlimit}.") - return self.tap.run_async(query, maxrec=maxrec, uploads=uploads) + if query.count("'") % 2: + raise ValueError("Query string contains an odd number of single quotes." + " Escape the unpaired single quote by doubling it.\n" + "ex: 'Barnard's galaxy' -> 'Barnard''s galaxy'.") + return self.tap.run_async(query, maxrec=maxrec, uploads=uploads).to_table() def _get_query_header(self, get_raw=False): # if get_raw is set then don't fetch as votable diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index e3c04ad101..e3a9bc1ffb 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -461,3 +461,37 @@ def test_regression_issue388(): truth = 'M 1' assert parsed_table['MAIN_ID'][0] == truth assert len(parsed_table) == 1 + +# --------------------------------------------------- +# Test the adql string for query_tap helper functions +# --------------------------------------------------- + + +def test_simbad_tables(): + tables_adql = "SELECT table_name, description FROM TAP_SCHEMA.tables WHERE schema_name = 'public'" + assert simbad.Simbad.tables(get_adql=True) == tables_adql + + +def test_simbad_columns(): + columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd" + " FROM TAP_SCHEMA.columns " + "WHERE table_name NOT LIKE 'TAP_SCHEMA.%'" + " AND table_name IN ('mesPM', 'otypedef', 'journals')" + " ORDER BY table_name, principal DESC, column_name") + assert simbad.Simbad.columns("mesPM", "otypedef", "journals", get_adql=True) == columns_adql + + +def test_find_columns_by_keyword(): + find_columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd " + "FROM TAP_SCHEMA.columns WHERE (LOWERCASE(column_name) " + "LIKE LOWERCASE('%stellar%')) OR (LOWERCASE(description) " + "LIKE LOWERCASE('%stellar%')) OR (LOWERCASE(table_name) " + "LIKE LOWERCASE('%stellar%')) ORDER BY table_name, principal DESC, column_name") + assert simbad.Simbad.find_columns_by_keyword("stellar", get_adql=True) == find_columns_adql + + +def test_find_linked_tables(): + find_linked_tables_adql = ("SELECT from_table, from_column, target_table, target_column " + "FROM TAP_SCHEMA.key_columns JOIN TAP_SCHEMA.keys USING (key_id) " + "WHERE (from_table = 'basic') OR (target_table = 'basic')") + assert simbad.Simbad.find_linked_tables("basic", get_adql=True) == find_linked_tables_adql diff --git a/astroquery/simbad/tests/test_simbad_remote.py b/astroquery/simbad/tests/test_simbad_remote.py index 3c18fd16dc..8670d477d9 100644 --- a/astroquery/simbad/tests/test_simbad_remote.py +++ b/astroquery/simbad/tests/test_simbad_remote.py @@ -257,17 +257,40 @@ def test_multi_vo_fields(self): def test_query_tap(self): # a robust query about something that should not change in Simbad filtername = Simbad.query_tap("select filtername from filter where filtername='B'") - assert 'B' == filtername.getvalue("filtername", 0) + assert 'B' == filtername["filtername"][0] # test uploads by joining two local tables table_letters = Table([["a", "b", "c"]], names=["letters"]) table_numbers = Table([[1, 2, 3], ["a", "b", "c"]], names=["numbers", "letters"]) result = Simbad.query_tap("SELECT * FROM TAP_UPLOAD.numbers " "JOIN TAP_UPLOAD.letters USING(letters)", uploads={"numbers": table_numbers, "letters": table_letters}) - expect = ("
\nletters numbers\n object int64 \n------- -------" - "\n a 1\n b 2\n c 3") - assert expect in str(result) - # test of maxrec + expect = "letters numbers\n------- -------\n a 1\n b 2\n c 3" + assert expect == str(result) + # Test query_tap raised errors with pytest.raises(DALOverflowWarning, match="Partial result set *"): truncated_result = Simbad.query_tap("SELECT * from basic", maxrec=2) assert len(truncated_result) == 2 + with pytest.raises(ValueError, match="The maximum number of records cannot exceed 2000000."): + Simbad.query_tap("select top 5 * from basic", maxrec=10e10) + with pytest.raises(ValueError, match="Query string contains an odd number of single quotes.*"): + Simbad.query_tap("'''") + + def test_simbad_tables(self): + tables = Simbad.tables() + # check the content + assert "basic" in str(tables) + # there might be new tables, we have 30 now. + assert len(tables) >= 30 + + def test_simbad_columns(self): + columns = Simbad.columns("ident", "biblio") + assert len(columns) == 4 + assert "oidref" in str(columns) + + def test_find_columns_by_keyword(self): + columns = Simbad.find_columns_by_keyword("herschel") + assert {"mesHerschel"} == set(columns["table_name"]) + + def test_find_linked_tables(self): + links = Simbad.find_linked_tables("h_link") + assert {"basic"} == set(links["target_table"]) From fca6d152644dc80228dc6d6081deb4d5c15a97c6 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 19 Oct 2023 15:03:26 +0200 Subject: [PATCH 05/23] docs: add documentation for query_tap and its helper methods we also add a graphviz file that represents a quick view of simbad's tables --- astroquery/simbad/core.py | 6 +- docs/simbad/query_tap.rst | 285 +++++++++++++++++++++++++++++++++++--- docs/simbad/simbad-er.gv | 41 ++++++ docs/simbad/simbad.rst | 14 +- 4 files changed, 308 insertions(+), 38 deletions(-) create mode 100644 docs/simbad/simbad-er.gv diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index f621253d34..46012dad59 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -1169,11 +1169,8 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): Returns ------- - `~pyvo.dal.TAPResults` + `~astropy.table.table.Table` The response returned by Simbad. - It can be converted to astropy objects. To get a - `~astropy.io.votable.tree.Table` use *.votable*, - and to get a `~astropy.table.table.Table` use *.to_table()*. Notes ----- @@ -1188,7 +1185,6 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): See also -------- - Helper functions to build queries. tables : The list of SIMBAD's tables. columns : SIMBAD's columns, can be restricted to some tables. find_columns_by_keyword : Find columns matching a keyword. diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 22a2c6c146..6d7093a012 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -1,35 +1,274 @@ -`~astroquery.simbad.SimbadClass.query_tap` is the one query to rule them -all. It allows to interrogate SIMBAD with the Astronomical Data Query Language -(ADQL) and allows a direct access to all the tables that SIMBAD contains. - -.. glossary:: - - ADQL - Astronomical Data Query Language. This is a flavor of the Structured - Query Language (SQL) adapted to astronomy. To learn more about this - language, see the `ADQL documentation `__ - or the `Simbad ADQL cheat sheet `__. - - relational database - a database made of a collection of tables with connections between them. - To link tables, use the JOIN directives of the ADQL. See - `graphic representation of Simbad's tables and their relations - `__. - - TAP - Table Access Protocol. This describes the way tables can be accessed with - the ADQL. +`~astroquery.simbad.SimbadClass.query_tap` (for Table Access Protocol) is the one +query to rule them all. It allows to interrogate all the information in SIMBAD with the +Astronomical Data Query Language (ADQL). ADQL is a flavor of the Structured +Query Language (SQL) adapted to astronomy. To learn more about this language, +see the `ADQL documentation `__ +or the `Simbad ADQL cheat sheet `__. + +Structure of an ADQL query +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The method `~astroquery.simbad.SimbadClass.query_tap` is called with a string containing the +ADQL query. + +.. code-block:: SQL + + /*ADQL queries start with selecting the columns that will be in the output. Usually, + the column name is sufficient. If there is a need to lift ambiguity, add the table + name first (table_name.column_name). This is also where the number of rows is fixed + (here 5).*/ + SELECT TOP 5 basic.ra, basic.dec, main_id, nbref + /*Then comes the declaration of the tables to be included in the query. Here *basic* and + *ids*. Their common column is *oid* in *basic* and *oidref* in *ident*.*/ + FROM basic JOIN ident ON basic.oid = ident.oidref + /*The conditions come after. This query filters otypes that are not in any star + category, specific redshifts, and the results should have an NGC name in their + list of names.*/ + WHERE (otype != 'star..') AND (rvz_redshift < 1) AND (id LIKE 'NGC%') + /*The result is then sorted so that the top 5 selected corresponds to + the objects cited by the largest number of papers.*/ + ORDER BY nbref DESC + +This ADQL query can be called with `~astroquery.simbad.SimbadClass.query_tap`: + +.. a bit long due to the ordering +.. doctest-skip:: + + >>> from astroquery.simbad import Simbad + >>> Simbad.query_tap("""SELECT TOP 5 basic.ra, basic.dec, main_id, nbref + FROM basic JOIN ident ON basic.oid = ident.oidref + WHERE (otype != 'star..') AND (rvz_redshift < 1) + AND (id LIKE 'NGC%') + ORDER BY nbref DESC""") +
+ ra dec main_id nbref + deg deg + float64 float64 object int32 + ------------------ ------------------ -------- ----- + 10.684708333333333 41.268750000000004 M 31 12412 + 13.158333333333333 -72.80027777777778 NAME SMC 10875 + 187.70593076725 12.391123246083334 M 87 7040 + 148.96845833333333 69.67970277777778 M 82 5769 + 23.46206906218 30.660175111980003 M 33 5737 + +The following sections cover a range of methods that help build ADQL queries. +A showcase of more complex queries comes after. Available tables ^^^^^^^^^^^^^^^^ +SIMBAD is a relational database. This means that it is a collection of tables with +links between them. You can access a `graphic representation of Simbad's tables and +their relations `__ or print +the names and descriptions of each table with the ``tables`` attribute: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> Simbad.tables() # doctest: +IGNORE_OUTPUT +
+ table_name description + object object + ------------- ---------------------------------------------------------------------------- + basic General data about an astronomical object + ids all names concatenated with pipe + alltypes all object types concatenated with pipe + ident Identifiers of an astronomical object + cat Catalogues name + flux Magnitude/Flux information about an astronomical object + allfluxes all flux/magnitudes U,B,V,I,J,H,K,u_,g_,r_,i_,z_ + filter Description of a flux filter + has_ref Associations between astronomical objects and their bibliographic references + ref Bibliographic reference + author Author of a bibliographic reference + h_link hierarchy of membership measure + mesHerschel The Herschel observing Log + biblio Bibliography + keywords List of keywords in a paper + mesXmm XMM observing log. + mesVelocities Collection of HRV, Vlsr, cz and redshifts. + mesVar Collection of stellar variability types and periods. + mesRot Stellar Rotational Velocities. + mesPM Collection of proper motions. + mesPLX Collection of trigonometric parallaxes. + otypedef all names and definitions for the object types + mesIUE International Ultraviolet Explorer observing log. + mesISO Infrared Space Observatory (ISO) observing log. + mesFe_h Collection of metallicity, as well as Teff, logg for stars. + mesDiameter Collection of stellar diameters. + mesDistance Collection of distances (pc, kpc or Mpc) by several means. + otypes List of all object types associated with an object + mesSpT Collection of spectral types. + journals Description of all used journals in the database + +To join tables, any columns sharing the same name are possible links between tables. +To find the other possible joins, the `~astroquery.simbad.SimbadClass.find_linked_tables` method +can be useful. Here we look for possible links with the ``mesDiameter`` table + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> Simbad.find_linked_tables("mesDiameter") +
+ from_table from_column target_table target_column + object object object object + ----------- ----------- ------------ ------------- + mesDiameter oidref basic oid + +The output indicates that the ``mesDiameter`` table can be linked to ``basic`` with the following +join statement: ``[...] mesDiameter JOIN basic ON mesDiameter.oidref = basic.oid [...]``. + + +.. graphviz:: simbad-er.gv + :layout: neato + :caption: A quick view of SIMBAD's tables. Hover the links to see the linked columns. + :alt: This interactive graph summarizes the information that can be obtained with `~astroquery.simbad.SimbadClass.tables` and `~astroquery.simbad.SimbadClass.find_linked_tables`. + Available columns ^^^^^^^^^^^^^^^^^ -Query TAP -^^^^^^^^^ +`~astroquery.simbad.SimbadClass.columns` lists the columns in all or a subset of SIMBAD tables. +Calling it with no argument returns the 289 columns of SIMBAD. To restrict the output to +some tables, add their name. To get the columns of the table ``ref``: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> Simbad.columns("ref") +
+ table_name column_name datatype ... unit ucd + object object object ... object object + ---------- ----------- ----------- ... ------ -------------------- + ref abstract UNICODECHAR ... meta.record + ref bibcode CHAR ... meta.bib.bibcode + ref doi VARCHAR ... meta.code;meta.bib + ref journal VARCHAR ... meta.bib.journal + ref last_page INTEGER ... meta.bib.page + ref nbobject INTEGER ... meta.number + ref oidbib BIGINT ... meta.record;meta.bib + ref page INTEGER ... meta.bib.page + ref title CLOB ... meta.title + ref volume INTEGER ... meta.bib.volume + ref year SMALLINT ... meta.note;meta.bib + +`~astroquery.simbad.SimbadClass.find_columns_by_keyword` returns columns for witch the +given keyword is either in the table name, in the column name or in its description. +This is not case-sensitive. + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> Simbad.find_columns_by_keyword("Radial velocity") +
+ table_name column_name ... unit ucd + object object ... object object + ------------- --------------- ... ------ ----------------------------------- + basic rvz_bibcode ... meta.bib.bibcode;spect.dopplerVeloc + basic rvz_err ... km.s-1 stat.error;spect.dopplerVeloc + basic rvz_err_prec ... + basic rvz_qual ... meta.code.qual;spect.dopplerVeloc + basic rvz_radvel ... km.s-1 spect.dopplerVeloc.opt + basic rvz_radvel_prec ... + basic rvz_type ... + mesVelocities origin ... meta.note + +Example TAP queries +^^^^^^^^^^^^^^^^^^^ + +This section lists more complex queries by looking at use cases from former astroquery issues. + +`Getting all bibcodes containing a certain type of measurement for a given object. `__ +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The measurement tables -- the ones with names starting with ``mes``-- have a bibcode column +that corresponds to the paper in which the information was found. + +This query joins the tables ``ident`` to get all possible names of the object and ``mesrot`` +that is the measurement table for rotations. Their common column is ``oidref``. + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> query = """SELECT bibcode AS "Rotation Measurements Bibcodes" + ... FROM ident JOIN mesrot USING(oidref) + ... WHERE id = 'Sirius'; + ... """ + >>> Simbad.query_tap(query) +
+ Rotation Measurements Bibcodes + object + ------------------------------ + 2016A&A...589A..83G + 2002A&A...393..897R + 1995ApJS...99..135A + 1970CoKwa.189....0U + 1970CoAsi.239....1B + 2011A&A...531A.143D + +This returns six papers in which the SIMBAD team found rotation data for Sirius. + +`Criteria on region, measurements and object types `__ +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Here we search for objects that are not stars and have a redshift<0.4 in a cone search. All this information +is in the ``basic`` column. The ``star..`` syntax refers to any type of star. + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> query = """SELECT ra, dec, main_id, rvz_redshift, otype + ... FROM basic + ... WHERE otype != 'star..' + ... AND CONTAINS(POINT('ICRS', basic.ra, basic.dec), CIRCLE('ICRS', 331.92, +12.44 , 0.25)) = 1 + ... AND rvz_redshift <= 0.4""" + >>> Simbad.query_tap(query) +
+ ra dec main_id rvz_redshift otype + deg deg + float64 float64 object float64 object + --------------- ------------------ ------------------------ ------------ ------ + 331.86493815752 12.61105991847 SDSS J220727.58+123639.8 0.11816 EmG + 331.80665742545 12.5032406228 SDSS J220713.60+123011.7 0.1477 EmG + 332.022027 12.29211 SDSS J220805.28+121731.5 0.12186 G + 331.984091 12.573282 SDSS J220756.18+123423.8 0.13824 G + 331.87489584192 12.5830568196 SDSS J220729.97+123458.8 0.03129 G + 331.77233978222 12.314639195540002 2MASX J22070538+1218523 0.079 G + 331.796426 12.426641 SDSS J220711.14+122535.9 0.07886 G + 331.68420630414 12.3609942055 2MASX J22064423+1221397 0.1219 G + 331.951995 12.431051 SDSS J220748.47+122551.7 0.16484 G + 332.171805 12.430204 SDSS J220841.23+122548.7 0.14762 G + 332.084711 12.486509 SDSS J220820.33+122911.4 0.12246 G + +This returns a few galaxies 'G' and emission-line galaxies 'EmG'. +`Get the members of a galaxy cluster `__ +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +All membership information is in the ``h_link`` table. We first need to retrieve the ``oidref`` +corresponding to the parent cluster SDSSCGB 350. This is done is the sub-query between parenthesis. +Then, the ``basic`` table is joined with ``h_link`` and the sub-query result. +.. doctest-remote-data:: + >>> from astroquery.simbad import Simbad + >>> query = """SELECT main_id AS "child id", + ... otype, ra, dec, membership, + ... cluster_table.id AS "parent cluster" + ... FROM (SELECT oidref, id FROM ident WHERE id = 'SDSSCGB 350') AS cluster_table, + ... basic JOIN h_link ON basic.oid = h_link.child + ... WHERE h_link.parent = cluster_table.oidref; + ... """ + >>> Simbad.query_tap(query) +
+ child id otype ra ... membership parent cluster + deg ... percent + object object float64 ... int16 object + ------------------------ ------ ------------------ ... ---------- -------------- + SDSSCGB 350.4 G 243.18303333333336 ... 75 SDSSCGB 350 + SDSS J161245.36+281652.4 G 243.18900464937997 ... 75 SDSSCGB 350 + SDSSCGB 350.1 G 243.18618110644002 ... 75 SDSSCGB 350 + LEDA 1831614 G 243.189153 ... 75 SDSSCGB 350 + LEDA 1832284 G 243.187819 ... 100 SDSSCGB 350 + SDSSCGB 350.1 G 243.18618110644002 ... 100 SDSSCGB 350 + LEDA 1831614 G 243.189153 ... 100 SDSSCGB 350 diff --git a/docs/simbad/simbad-er.gv b/docs/simbad/simbad-er.gv new file mode 100644 index 0000000000..fa0bb01892 --- /dev/null +++ b/docs/simbad/simbad-er.gv @@ -0,0 +1,41 @@ +graph "Simbad Relational Database" { + node [color=lightgray penwidth=6 shape=box style=filled] + graph [esep="+0" mode=major overlap=false sep=0 splines=polyline] + edge [color=lightgray labelOverlay="100%" penwidth=7] + "Measurement tables" [label="{mesHerschel | mesXmm | mesVelocities | mesVar | mesRot | mesPM | mesPLX | mesIUE | mesISO | mesFe_h | mesDiameter | mesDistance | mesSpT }" shape=record] + basic -- "Measurement tables" [color="#9EADC8" tooltip="oid:oidref"] + allfluxes [tooltip="all flux/magnitudes U,B,V,I,J,H,K,u_,g_,r_,i_,z_"] + alltypes [tooltip="all object types concatenated with pipe"] + author [tooltip="Author of a bibliographic reference"] + basic [tooltip="General data about an astronomical object"] + biblio [tooltip=Bibliography] + cat [tooltip="Catalogues name"] + filter [tooltip="Description of a flux filter"] + flux [tooltip="Magnitude/Flux information about an astronomical object"] + h_link [tooltip="hierarchy of membership measure"] + has_ref [tooltip="Associations between astronomical objects and their bibliographic references"] + ident [tooltip="Identifiers of an astronomical object"] + ids [tooltip="all names concatenated with pipe"] + journals [tooltip="Description of all used journals in the database"] + keywords [tooltip="List of keywords in a paper"] + otypedef [tooltip="all names and definitions for the object types"] + otypes [tooltip="List of all object types associated with an object"] + ref [tooltip="Bibliographic reference"] + ids -- basic [color="#C7FFDA" tooltip="oidref:oid"] + otypedef -- otypes [color="#FEC1A7" tooltip="otype:otype"] + otypedef -- basic [color="#4CE0B3" tooltip="otype:otype"] + ident -- basic [color="#F0A8ED" tooltip="oidref:oid"] + flux -- basic [color="#FDD791" tooltip="oidref:oid"] + flux -- filter [color="#F4B6C2" tooltip="filter:filtername"] + allfluxes -- basic [color="#FE9599" tooltip="oidref:oid"] + has_ref -- ref [color="#FFF196" tooltip="oidbibref:oidbib"] + has_ref -- basic [color="#9EADC8" tooltip="oidref:oid"] + author -- ref [color="#C7FFDA" tooltip="oidbibref:oidbib"] + h_link -- basic [color="#FEC1A7" tooltip="parent:oid"] + h_link -- basic [color="#4CE0B3" tooltip="child:oid"] + biblio -- basic [color="#F0A8ED" tooltip="oidref:oid"] + keywords -- ref [color="#FDD791" tooltip="oidbibref:oidbib"] + ref -- journals [color="#F4B6C2" tooltip="journal:journal"] + alltypes -- basic [color="#FE9599" tooltip="oidref:oid"] + otypes -- basic [color="#FFF196" tooltip="oidref:oid"] +} diff --git a/docs/simbad/simbad.rst b/docs/simbad/simbad.rst index 17d011c196..bbbd847d4c 100644 --- a/docs/simbad/simbad.rst +++ b/docs/simbad/simbad.rst @@ -19,14 +19,12 @@ A warning about big queries The SIMBAD database is largely used and had to limit the rate of incoming queries. If you spam the server with more that ~5-10 queries per second you will be -blacklisted for a week. If it happens to you, you can use the section about +blacklisted for an hour. If it happens to you, you can use the section about :ref:`vectorized queries ` below. You can pass `~astroquery.simbad.SimbadClass.query_region` a vector of coordinates or `~astroquery.simbad.SimbadClass.query_objects` a list of object names, and SIMBAD will treat this submission as a single query. -To get out of the blacklist, send an `email to the Simbad -database `__. Different ways to access Simbad ------------------------------- @@ -407,17 +405,13 @@ article specified by the bibcode: NAME Lockman Hole 10 45 00.0 +58 00 00 5 5 ... -- 0 E 2011ApJ...734...99H NAME Gal Center 17 45 40.04 -29 00 28.1 6 6 ... -- 0 E - -Query based on any criteria ---------------------------- - Query TAP -^^^^^^^^^ +--------- .. include:: query_tap.rst -Query criteria -^^^^^^^^^^^^^^ +Query based on any criteria +--------------------------- Anything done in SIMBAD's `criteria interface`_ can be done via astroquery. See that link for details of how these queries are formed. From 14fe47d8e4e181a17def908affd676ee444251de Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 19 Oct 2023 16:04:17 +0200 Subject: [PATCH 06/23] test: add tests for _adql_parameter and _adql_name --- astroquery/simbad/core.py | 2 +- astroquery/simbad/tests/test_simbad.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 46012dad59..b1e714209d 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -122,7 +122,7 @@ def _adql_name(name: str): name : str The column name. """ - return f'''lowercase("{'.'.join([f'"{element}"' for element in name.split(".")])}")''' + return f'''lowercase({'.'.join([f'"{element}"' for element in name.split(".")])})''' error_regex = re.compile(r'(?ms)\[(?P\d+)\]\s?(?P.+?)(\[|\Z)') diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index e3a9bc1ffb..2bd030dc53 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -96,6 +96,15 @@ def test_simbad_create_tap_service(): assert 'simbad/sim-tap' in simbadtap.baseurl +def test_adql_parameter(): + # escape single quotes + assert simbad.core._adql_parameter("Barnard's galaxy") == "Barnard''s galaxy" + + +def test_adql_name(): + assert simbad.core._adql_name("biblio.year") == 'lowercase("biblio"."year")' + + @pytest.mark.parametrize(('radius', 'expected_radius'), [('5d0m0s', '5.0d'), ('5d', '5.0d'), @@ -473,12 +482,18 @@ def test_simbad_tables(): def test_simbad_columns(): + # with three table names columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd" " FROM TAP_SCHEMA.columns " "WHERE table_name NOT LIKE 'TAP_SCHEMA.%'" " AND table_name IN ('mesPM', 'otypedef', 'journals')" " ORDER BY table_name, principal DESC, column_name") assert simbad.Simbad.columns("mesPM", "otypedef", "journals", get_adql=True) == columns_adql + # with only one + columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd " + "FROM TAP_SCHEMA.columns WHERE table_name NOT LIKE 'TAP_SCHEMA.%' " + "AND table_name = 'basic' ORDER BY table_name, principal DESC, column_name") + assert simbad.Simbad.columns("basic", get_adql=True) == columns_adql def test_find_columns_by_keyword(): From 9076760667aae33ffbec95b61ccf6e18d01d22cd Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 19 Oct 2023 16:09:29 +0200 Subject: [PATCH 07/23] docs: add changelog entry --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index f76217eb28..9acc6d7ebf 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -47,6 +47,13 @@ esa.hubble - New methods to download single files ``download_file`` and download FITS associated to an observation ``download_fits_files``. [#2797] - New function to retrieve all the files associated to an observation. [#2797] +simbad +^^^^^^ + +- new ``query_tap`` method to access SIMBAD. This comes with additional methods to explore SIMBAD's tables and + their links: ``Simbad.tables``, ``Simbad.columns``, ``Simbad.find_columns_by_keyword`` + and ``Simbad.find_linked_tables``. [#2856] + solarsystem.neodys ^^^^^^^^^^^^^^^^^^ From 6d31b1f2c079f413ab5994f3f62c070807e4da68 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 20 Oct 2023 14:48:54 +0200 Subject: [PATCH 08/23] test: add try/except on pyvo version for DALOverflowWarning --- astroquery/simbad/tests/test_simbad_remote.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/astroquery/simbad/tests/test_simbad_remote.py b/astroquery/simbad/tests/test_simbad_remote.py index 8670d477d9..1d2e70937f 100644 --- a/astroquery/simbad/tests/test_simbad_remote.py +++ b/astroquery/simbad/tests/test_simbad_remote.py @@ -11,7 +11,13 @@ # Maybe we need to expose SimbadVOTableResult to be in the public API? from astroquery.simbad.core import SimbadVOTableResult from astroquery.exceptions import BlankResponseWarning -from pyvo.dal import DALOverflowWarning +from packaging import version +from pyvo import __version__ as pyvo_version +try: + # This requires pyvo 1.4 + from pyvo.dal.exceptions import DALOverflowWarning +except ImportError: + pass # M42 coordinates @@ -267,9 +273,11 @@ def test_query_tap(self): expect = "letters numbers\n------- -------\n a 1\n b 2\n c 3" assert expect == str(result) # Test query_tap raised errors - with pytest.raises(DALOverflowWarning, match="Partial result set *"): - truncated_result = Simbad.query_tap("SELECT * from basic", maxrec=2) - assert len(truncated_result) == 2 + # DALOverflowWarning exists since pyvo 1.4 + if version.parse(pyvo_version) > version.parse('1.4'): + with pytest.raises(DALOverflowWarning, match="Partial result set *"): + truncated_result = Simbad.query_tap("SELECT * from basic", maxrec=2) + assert len(truncated_result) == 2 with pytest.raises(ValueError, match="The maximum number of records cannot exceed 2000000."): Simbad.query_tap("select top 5 * from basic", maxrec=10e10) with pytest.raises(ValueError, match="Query string contains an odd number of single quotes.*"): From 564517a109444081084076e5e2bd6853c1bbc75c Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Wed, 8 Nov 2023 15:50:08 +0100 Subject: [PATCH 09/23] fix: exchange order of BaseVOQuery and SimbadBaseQuery --- astroquery/simbad/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index b1e714209d..6572a07507 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -295,7 +295,7 @@ def _request(self, *args, **kwargs): @async_to_sync -class SimbadClass(SimbadBaseQuery, BaseVOQuery): +class SimbadClass(BaseVOQuery, SimbadBaseQuery): """ The class for querying the Simbad web service. From 96777c1f19c0141ce9fde2338b1494e9a9e50378 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Wed, 8 Nov 2023 18:15:32 +0100 Subject: [PATCH 10/23] refactor: rename tables and columns into list_tables and list_columns list_columns now accept a 'keyword' keyword argument to filter the output columns find_linked_tables is also renamed to list_linked_tables for homogenity --- CHANGES.rst | 3 +- astroquery/simbad/core.py | 96 +++++++++---------- astroquery/simbad/tests/test_simbad.py | 29 +++--- astroquery/simbad/tests/test_simbad_remote.py | 16 ++-- docs/simbad/query_tap.rst | 37 +++---- 5 files changed, 85 insertions(+), 96 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9acc6d7ebf..1c1faec141 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -51,8 +51,7 @@ simbad ^^^^^^ - new ``query_tap`` method to access SIMBAD. This comes with additional methods to explore SIMBAD's tables and - their links: ``Simbad.tables``, ``Simbad.columns``, ``Simbad.find_columns_by_keyword`` - and ``Simbad.find_linked_tables``. [#2856] + their links: ``Simbad.list_tables``, ``Simbad.list_columns``, and ``Simbad.list_linked_tables``. [#2856] solarsystem.neodys ^^^^^^^^^^^^^^^^^^ diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 6572a07507..49cc847415 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -1006,7 +1006,7 @@ def query_objectids_async(self, object_name, *, cache=True, return response - def tables(self, get_adql=False): + def list_tables(self, get_adql=False): """The names and descriptions of the tables in SIMBAD. Parameters @@ -1025,24 +1025,28 @@ def tables(self, get_adql=False): return query return self.query_tap(query) - def columns(self, *tables: str, get_adql=False): + def list_columns(self, *tables: str, keyword=None, get_adql=False): """ Get the list of SIMBAD columns. Add tables names to restrict to some tables. Call the function without - any parameter to get all columns names. + any parameter to get all columns names from all tables. The keyword argument + looks for columns in the selected Simbad tables that contain the + given keyword. The keyword search is not case-sensitive. Parameters ---------- - tables : str, optional + *tables : str, optional Add tables names as strings to restrict to these tables columns. + keyword : str, optional + A keyword to look for in column names, table names, or descriptions. get_adql : bool, optional Returns the ADQL string instead of querying SIMBAD. Examples -------- >>> from astroquery.simbad import Simbad - >>> Simbad.columns("ids", "ident") # doctest: +REMOTE_DATA + >>> Simbad.list_columns("ids", "ident") # doctest: +REMOTE_DATA
table_name column_name datatype ... unit ucd object object object ... object object @@ -1051,41 +1055,9 @@ def columns(self, *tables: str, get_adql=False): ident oidref BIGINT ... ids ids CLOB ... meta.id ids oidref BIGINT ... - """ - query = ("SELECT table_name, column_name, datatype, description, unit, ucd" - " FROM TAP_SCHEMA.columns" - " WHERE table_name NOT LIKE 'TAP_SCHEMA.%'") - if len(tables) == 1: - query += f" AND table_name = '{tables[0]}'" - elif len(tables) > 1: - query += f" AND table_name IN ({str(tables)[1:-1]})" - query += " ORDER BY table_name, principal DESC, column_name" - if get_adql: - return query - return self.query_tap(query) - - def find_columns_by_keyword(self, keyword: str, get_adql=False): - """ - Find columns by keyword. - This looks for columns in all Simbad tables that contain the - given keyword. The search is not case-sensitive. - - Parameters - ---------- - keyword : str - A keyword to look for in column names, table names, or descriptions. - get_adql : bool, optional - Returns the ADQL string instead of querying SIMBAD. - - Returns - ------- - `~astropy.table.table.Table` - - Examples - -------- >>> from astroquery.simbad import Simbad - >>> Simbad.find_columns_by_keyword("filter") # doctest: +REMOTE_DATA + >>> Simbad.list_columns(keyword="filter") # doctest: +REMOTE_DATA
table_name column_name datatype ... unit ucd object object object ... object object @@ -1095,26 +1067,45 @@ def find_columns_by_keyword(self, keyword: str, get_adql=False): filter unit VARCHAR ... meta.unit flux filter VARCHAR ... instr.filter mesDiameter filter CHAR ... instr.filter + + >>> from astroquery.simbad import Simbad + >>> Simbad.list_columns("basic", keyword="object") # doctest: +REMOTE_DATA +
+ table_name column_name datatype ... unit ucd + object object object ... object object + ---------- ----------- -------- ... ------ ------------------- + basic main_id VARCHAR ... meta.id;meta.main + basic otype_txt VARCHAR ... src.class + basic oid BIGINT ... meta.record;meta.id + basic otype VARCHAR ... src.class """ - condition = f"LIKE LOWERCASE('%{_adql_parameter(keyword)}%')" query = ("SELECT table_name, column_name, datatype, description, unit, ucd" " FROM TAP_SCHEMA.columns" - f" WHERE (LOWERCASE(column_name) {condition})" - f" OR (LOWERCASE(description) {condition})" - f" OR (LOWERCASE(table_name) {condition})" - " ORDER BY table_name, principal DESC, column_name") + " WHERE table_name NOT LIKE 'TAP_SCHEMA.%'") + # select the tables + if len(tables) == 1: + query += f" AND table_name = '{tables[0]}'" + elif len(tables) > 1: + query += f" AND table_name IN ({str(tables)[1:-1]})" + # add the keyword condition + if keyword is not None: + condition = f"LIKE LOWERCASE('%{_adql_parameter(keyword)}%')" + query += (f" AND ( (LOWERCASE(column_name) {condition})" + f" OR (LOWERCASE(description) {condition})" + f" OR (LOWERCASE(table_name) {condition}))") + query += " ORDER BY table_name, principal DESC, column_name" if get_adql: return query return self.query_tap(query) - def find_linked_tables(self, table: str, get_adql=False): + def list_linked_tables(self, table: str, get_adql=False): """ Expose the tables that can be non-obviously linked with the given table. - This is not exhaustive, this list contains only the links where the column names - are not the same in the two tables. For example every ``oidref`` column of any - table can be joined with any other ``oidref``. The same goes for every ``otype`` - column even if this is not returned by this method. + This list contains only the links where the column names are not the same in the + two tables. For example every ``oidref`` column of any table can be joined with + any other ``oidref``. The same goes for every ``otype`` column even if this is not + returned by this method. Parameters ---------- @@ -1131,7 +1122,7 @@ def find_linked_tables(self, table: str, get_adql=False): Examples -------- >>> from astroquery.simbad import Simbad - >>> Simbad.find_linked_tables("otypes") # doctest: +REMOTE_DATA + >>> Simbad.list_linked_tables("otypes") # doctest: +REMOTE_DATA
from_table from_column target_table target_column object object object object @@ -1185,10 +1176,9 @@ def query_tap(self, query: str, maxrec=10000, uploads=None): See also -------- - tables : The list of SIMBAD's tables. - columns : SIMBAD's columns, can be restricted to some tables. - find_columns_by_keyword : Find columns matching a keyword. - find_linked_tables : Given a table, expose non-obvious possible joins with other tables. + list_tables : The list of SIMBAD's tables. + list_columns : SIMBAD's columns list, can be restricted to some tables and some keyword. + list_linked_tables : Given a table, expose non-obvious possible joins with other tables. Examples -------- diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index 2bd030dc53..6d2729d883 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -476,37 +476,36 @@ def test_regression_issue388(): # --------------------------------------------------- -def test_simbad_tables(): +def test_simbad_list_tables(): tables_adql = "SELECT table_name, description FROM TAP_SCHEMA.tables WHERE schema_name = 'public'" - assert simbad.Simbad.tables(get_adql=True) == tables_adql + assert simbad.Simbad.list_tables(get_adql=True) == tables_adql -def test_simbad_columns(): +def test_simbad_list_columns(): # with three table names columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd" " FROM TAP_SCHEMA.columns " "WHERE table_name NOT LIKE 'TAP_SCHEMA.%'" " AND table_name IN ('mesPM', 'otypedef', 'journals')" " ORDER BY table_name, principal DESC, column_name") - assert simbad.Simbad.columns("mesPM", "otypedef", "journals", get_adql=True) == columns_adql + assert simbad.Simbad.list_columns("mesPM", "otypedef", "journals", get_adql=True) == columns_adql # with only one columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd " "FROM TAP_SCHEMA.columns WHERE table_name NOT LIKE 'TAP_SCHEMA.%' " "AND table_name = 'basic' ORDER BY table_name, principal DESC, column_name") - assert simbad.Simbad.columns("basic", get_adql=True) == columns_adql - - -def test_find_columns_by_keyword(): - find_columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd " - "FROM TAP_SCHEMA.columns WHERE (LOWERCASE(column_name) " + assert simbad.Simbad.list_columns("basic", get_adql=True) == columns_adql + # with only a keyword + list_columns_adql = ("SELECT table_name, column_name, datatype, description, unit, ucd " + "FROM TAP_SCHEMA.columns WHERE table_name NOT LIKE 'TAP_SCHEMA.%' " + "AND ( (LOWERCASE(column_name) " "LIKE LOWERCASE('%stellar%')) OR (LOWERCASE(description) " "LIKE LOWERCASE('%stellar%')) OR (LOWERCASE(table_name) " - "LIKE LOWERCASE('%stellar%')) ORDER BY table_name, principal DESC, column_name") - assert simbad.Simbad.find_columns_by_keyword("stellar", get_adql=True) == find_columns_adql + "LIKE LOWERCASE('%stellar%'))) ORDER BY table_name, principal DESC, column_name") + assert simbad.Simbad.list_columns(keyword="stellar", get_adql=True) == list_columns_adql -def test_find_linked_tables(): - find_linked_tables_adql = ("SELECT from_table, from_column, target_table, target_column " +def test_list_linked_tables(): + list_linked_tables_adql = ("SELECT from_table, from_column, target_table, target_column " "FROM TAP_SCHEMA.key_columns JOIN TAP_SCHEMA.keys USING (key_id) " "WHERE (from_table = 'basic') OR (target_table = 'basic')") - assert simbad.Simbad.find_linked_tables("basic", get_adql=True) == find_linked_tables_adql + assert simbad.Simbad.list_linked_tables("basic", get_adql=True) == list_linked_tables_adql diff --git a/astroquery/simbad/tests/test_simbad_remote.py b/astroquery/simbad/tests/test_simbad_remote.py index 1d2e70937f..ac2a9f2090 100644 --- a/astroquery/simbad/tests/test_simbad_remote.py +++ b/astroquery/simbad/tests/test_simbad_remote.py @@ -283,22 +283,20 @@ def test_query_tap(self): with pytest.raises(ValueError, match="Query string contains an odd number of single quotes.*"): Simbad.query_tap("'''") - def test_simbad_tables(self): - tables = Simbad.tables() + def test_simbad_list_tables(self): + tables = Simbad.list_tables() # check the content assert "basic" in str(tables) # there might be new tables, we have 30 now. assert len(tables) >= 30 - def test_simbad_columns(self): - columns = Simbad.columns("ident", "biblio") + def test_simbad_list_columns(self): + columns = Simbad.list_columns("ident", "biblio") assert len(columns) == 4 assert "oidref" in str(columns) - - def test_find_columns_by_keyword(self): - columns = Simbad.find_columns_by_keyword("herschel") + columns = Simbad.list_columns(keyword="herschel") assert {"mesHerschel"} == set(columns["table_name"]) - def test_find_linked_tables(self): - links = Simbad.find_linked_tables("h_link") + def test_list_linked_tables(self): + links = Simbad.list_linked_tables("h_link") assert {"basic"} == set(links["target_table"]) diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 6d7093a012..23a041779f 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -60,12 +60,13 @@ Available tables SIMBAD is a relational database. This means that it is a collection of tables with links between them. You can access a `graphic representation of Simbad's tables and their relations `__ or print -the names and descriptions of each table with the ``tables`` attribute: +the names and descriptions of each table with the +`~astroquery.simbad.SimbadClass.list_tables` method: .. doctest-remote-data:: >>> from astroquery.simbad import Simbad - >>> Simbad.tables() # doctest: +IGNORE_OUTPUT + >>> Simbad.list_tables() # doctest: +IGNORE_OUTPUT
table_name description object object @@ -102,13 +103,13 @@ the names and descriptions of each table with the ``tables`` attribute: journals Description of all used journals in the database To join tables, any columns sharing the same name are possible links between tables. -To find the other possible joins, the `~astroquery.simbad.SimbadClass.find_linked_tables` method +To find the other possible joins, the `~astroquery.simbad.SimbadClass.list_linked_tables` method can be useful. Here we look for possible links with the ``mesDiameter`` table .. doctest-remote-data:: >>> from astroquery.simbad import Simbad - >>> Simbad.find_linked_tables("mesDiameter") + >>> Simbad.list_linked_tables("mesDiameter")
from_table from_column target_table target_column object object object object @@ -122,23 +123,25 @@ join statement: ``[...] mesDiameter JOIN basic ON mesDiameter.oidref = basic.oid .. graphviz:: simbad-er.gv :layout: neato :caption: A quick view of SIMBAD's tables. Hover the links to see the linked columns. - :alt: This interactive graph summarizes the information that can be obtained with `~astroquery.simbad.SimbadClass.tables` and `~astroquery.simbad.SimbadClass.find_linked_tables`. + :alt: This interactive graph summarizes the information that can be obtained with `~astroquery.simbad.SimbadClass.list_tables` and `~astroquery.simbad.SimbadClass.list_linked_tables`. Available columns ^^^^^^^^^^^^^^^^^ -`~astroquery.simbad.SimbadClass.columns` lists the columns in all or a subset of SIMBAD tables. +`~astroquery.simbad.SimbadClass.list_columns` lists the columns in all or a subset of SIMBAD tables. Calling it with no argument returns the 289 columns of SIMBAD. To restrict the output to -some tables, add their name. To get the columns of the table ``ref``: +some tables, add their name. To get the columns of the tables ``ref`` and ``biblio``: .. doctest-remote-data:: >>> from astroquery.simbad import Simbad - >>> Simbad.columns("ref") -
+ >>> Simbad.list_columns("ref", "biblio") +
table_name column_name datatype ... unit ucd object object object ... object object ---------- ----------- ----------- ... ------ -------------------- + biblio biblio TEXT ... meta.record;meta.bib + biblio oidref BIGINT ... meta.record;meta.id ref abstract UNICODECHAR ... meta.record ref bibcode CHAR ... meta.bib.bibcode ref doi VARCHAR ... meta.code;meta.bib @@ -151,25 +154,25 @@ some tables, add their name. To get the columns of the table ``ref``: ref volume INTEGER ... meta.bib.volume ref year SMALLINT ... meta.note;meta.bib -`~astroquery.simbad.SimbadClass.find_columns_by_keyword` returns columns for witch the -given keyword is either in the table name, in the column name or in its description. -This is not case-sensitive. +`~astroquery.simbad.SimbadClass.list_columns` can also be called with a keyword argument. +This returns columns from any table for witch the given keyword is either in the table name, +in the column name or in its description. This is not case-sensitive. .. doctest-remote-data:: >>> from astroquery.simbad import Simbad - >>> Simbad.find_columns_by_keyword("Radial velocity") + >>> Simbad.list_columns(keyword="Radial velocity")
- table_name column_name ... unit ucd - object object ... object object + table_name column_name ... unit ucd + object object ... object object ------------- --------------- ... ------ ----------------------------------- basic rvz_bibcode ... meta.bib.bibcode;spect.dopplerVeloc basic rvz_err ... km.s-1 stat.error;spect.dopplerVeloc basic rvz_err_prec ... basic rvz_qual ... meta.code.qual;spect.dopplerVeloc basic rvz_radvel ... km.s-1 spect.dopplerVeloc.opt - basic rvz_radvel_prec ... - basic rvz_type ... + basic rvz_radvel_prec ... + basic rvz_type ... mesVelocities origin ... meta.note Example TAP queries From 67aab6613e7775c8c54b5790f620e59ad0c92a2f Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 9 Nov 2023 08:59:25 +0100 Subject: [PATCH 11/23] style: make optional kwargs kwarg only --- astroquery/simbad/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 49cc847415..d18fd2d940 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -1006,7 +1006,7 @@ def query_objectids_async(self, object_name, *, cache=True, return response - def list_tables(self, get_adql=False): + def list_tables(self, *, get_adql=False): """The names and descriptions of the tables in SIMBAD. Parameters @@ -1098,7 +1098,7 @@ def list_columns(self, *tables: str, keyword=None, get_adql=False): return query return self.query_tap(query) - def list_linked_tables(self, table: str, get_adql=False): + def list_linked_tables(self, table: str, *, get_adql=False): """ Expose the tables that can be non-obviously linked with the given table. @@ -1138,7 +1138,7 @@ def list_linked_tables(self, table: str, get_adql=False): return query return self.query_tap(query) - def query_tap(self, query: str, maxrec=10000, uploads=None): + def query_tap(self, query: str, *, maxrec=10000, uploads=None): """ Query Simbad TAP service. From cc3aeabe5dd00b07a2d1eeb53211e288d1693e43 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 9 Nov 2023 11:28:32 +0100 Subject: [PATCH 12/23] docs: remove links to issues, rephrase adql example --- docs/simbad/query_tap.rst | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 23a041779f..c46177fdfa 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -19,12 +19,12 @@ ADQL query. (here 5).*/ SELECT TOP 5 basic.ra, basic.dec, main_id, nbref /*Then comes the declaration of the tables to be included in the query. Here *basic* and - *ids*. Their common column is *oid* in *basic* and *oidref* in *ident*.*/ + *ident*. Their common column is named *oid* in *basic* and *oidref* in *ident*.*/ FROM basic JOIN ident ON basic.oid = ident.oidref - /*The conditions come after. This query filters otypes that are not in any star - category, specific redshifts, and the results should have an NGC name in their - list of names.*/ - WHERE (otype != 'star..') AND (rvz_redshift < 1) AND (id LIKE 'NGC%') + /*The conditions come after. This query filters otypes that are not in any + cluster of star (Cl*) sub-category (..), specific redshifts, and the results should + have an NGC name in their list of names.*/ + WHERE (otype != 'Cl*..') AND (rvz_redshift < 1) AND (id LIKE 'NGC%') /*The result is then sorted so that the top 5 selected corresponds to the objects cited by the largest number of papers.*/ ORDER BY nbref DESC @@ -37,7 +37,7 @@ This ADQL query can be called with `~astroquery.simbad.SimbadClass.query_tap`: >>> from astroquery.simbad import Simbad >>> Simbad.query_tap("""SELECT TOP 5 basic.ra, basic.dec, main_id, nbref FROM basic JOIN ident ON basic.oid = ident.oidref - WHERE (otype != 'star..') AND (rvz_redshift < 1) + WHERE (otype != 'Cl*..') AND (rvz_redshift < 1) AND (id LIKE 'NGC%') ORDER BY nbref DESC""")
@@ -51,8 +51,9 @@ This ADQL query can be called with `~astroquery.simbad.SimbadClass.query_tap`: 148.96845833333333 69.67970277777778 M 82 5769 23.46206906218 30.660175111980003 M 33 5737 -The following sections cover a range of methods that help build ADQL queries. -A showcase of more complex queries comes after. +And voilà, we get the 5 NGC objects that are the most cited in literature, are not cluster +of stars, and have a redshift < 1. The following sections cover methods that help build ADQL +queries. A showcase of more complex queries comes after. Available tables ^^^^^^^^^^^^^^^^ @@ -180,8 +181,8 @@ Example TAP queries This section lists more complex queries by looking at use cases from former astroquery issues. -`Getting all bibcodes containing a certain type of measurement for a given object. `__ -"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +Getting all bibcodes containing a certain type of measurement for a given object +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" The measurement tables -- the ones with names starting with ``mes``-- have a bibcode column that corresponds to the paper in which the information was found. @@ -210,8 +211,8 @@ that is the measurement table for rotations. Their common column is ``oidref``. This returns six papers in which the SIMBAD team found rotation data for Sirius. -`Criteria on region, measurements and object types `__ -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +Criteria on region, measurements and object types +""""""""""""""""""""""""""""""""""""""""""""""""" Here we search for objects that are not stars and have a redshift<0.4 in a cone search. All this information is in the ``basic`` column. The ``star..`` syntax refers to any type of star. @@ -244,8 +245,8 @@ is in the ``basic`` column. The ``star..`` syntax refers to any type of star. This returns a few galaxies 'G' and emission-line galaxies 'EmG'. -`Get the members of a galaxy cluster `__ -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +Get the members of a galaxy cluster +""""""""""""""""""""""""""""""""""" All membership information is in the ``h_link`` table. We first need to retrieve the ``oidref`` corresponding to the parent cluster SDSSCGB 350. This is done is the sub-query between parenthesis. From 21ccd2b38c4e4b96042c59566676233755392cc1 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Thu, 9 Nov 2023 14:33:56 +0100 Subject: [PATCH 13/23] docs: remove doctest ignore on first TAP example --- docs/simbad/query_tap.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index c46177fdfa..4c4ff2ecff 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -31,15 +31,15 @@ ADQL query. This ADQL query can be called with `~astroquery.simbad.SimbadClass.query_tap`: -.. a bit long due to the ordering -.. doctest-skip:: +.. nbref changes often so we ignore the output here +.. doctest-remote-data:: >>> from astroquery.simbad import Simbad >>> Simbad.query_tap("""SELECT TOP 5 basic.ra, basic.dec, main_id, nbref - FROM basic JOIN ident ON basic.oid = ident.oidref - WHERE (otype != 'Cl*..') AND (rvz_redshift < 1) - AND (id LIKE 'NGC%') - ORDER BY nbref DESC""") + ... FROM basic JOIN ident ON basic.oid = ident.oidref + ... WHERE (otype != 'Cl*..') AND (rvz_redshift < 1) + ... AND (id LIKE 'NGC%') + ... ORDER BY nbref DESC""") # doctest: +IGNORE_OUTPUT
ra dec main_id nbref deg deg From 744262fe76a1c7fb467b4ee0ffe4bcf6b4b462d6 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 10 Nov 2023 11:57:29 +0100 Subject: [PATCH 14/23] feat: add caching to query_tap and hardlimit The caching decorator could not be applied for the uploaded tables because the astropy table objects are non-hashable. This implementation only caches calls without uploads as I could not find a workaround. --- astroquery/simbad/core.py | 73 ++++++++++++++----- astroquery/simbad/tests/test_simbad_remote.py | 2 +- 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index d18fd2d940..4c7f55609c 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -9,6 +9,7 @@ import os from collections import namedtuple from io import BytesIO +from functools import lru_cache, cached_property import warnings import astropy.units as u from astropy.utils import isiterable @@ -371,6 +372,15 @@ def tap(self): self._tap = TAPService(baseurl=tap_url, session=self._session) return self._tap + @cached_property + def hardlimit(self): + """The maximum number of lines for Simbad's output. + + This property is cached to avoid calls to simbad's capability + page each time `query_tap` is called. + """ + return self.tap.hardlimit + def list_wildcards(self): """ Displays the available wildcards that may be used in Simbad queries and @@ -1138,7 +1148,31 @@ def list_linked_tables(self, table: str, *, get_adql=False): return query return self.query_tap(query) - def query_tap(self, query: str, *, maxrec=10000, uploads=None): + @lru_cache(256) + def _cached_query_tap(self, query: str, *, maxrec=10000): + """Cache version of query TAP + + This private method is called when query_tap is executed without an + ``uploads`` extra keyword argument. This is a work around because + `~astropy.table.table.Table` objects are not hashable and thus cannot + be used as arguments for a function decorated with lru_cache. + + Parameters + ---------- + query : str + A string containing the query written in the + Astronomical Data Query Language (ADQL). + maxrec : int, optional + The number of records to be returned. Its maximum value is 2000000. + + Returns + ------- + `~astropy.table.table.Table` + The response returned by Simbad. + """ + return self.tap.run_async(query, maxrec=maxrec).to_table() + + def query_tap(self, query: str, *, maxrec=10000, **uploads): """ Query Simbad TAP service. @@ -1148,15 +1182,12 @@ def query_tap(self, query: str, *, maxrec=10000, uploads=None): A string containing the query written in the Astronomical Data Query Language (ADQL). maxrec : int, default: 10000 - The number of records to be returned. Its maximum value is 2000000. - uploads : dict, default: None - A dictionary of local tables to be used in the *query*. It should be - constructed as *{"table_alias": table}*.*table* can be an - `~astropy.table.table.Table`, an `~astropy.io.votable.tree.VOTableFile` - or a `~pyvo.dal.DALResults` object. In the *query*, these tables are referred - as *TAP_UPLOAD.table_alias* where *TAP_UPLOAD* is imposed and *table_alias* - is the key of the *uploads* dictionary. The maximum number on lines for the - uploaded tables is 200000. + The number of records to be returned. Its maximum value is given by + `~astroquery.simbad.SimbadClass.hardlimit`. + uploads : `~astropy.table.table.Table` | `~astropy.io.votable.tree.VOTableFile` | `~pyvo.dal.DALResults` + Any number of local tables to be used in the *query*. In the *query*, these tables + are referred as *TAP_UPLOAD.table_alias* where *TAP_UPLOAD* is imposed and *table_alias* + is the keyword name you chose. The maximum number of lines for the uploaded tables is 200000. Returns ------- @@ -1214,23 +1245,25 @@ def query_tap(self, query: str, *, maxrec=10000, uploads=None): >>> from astroquery.simbad import Simbad >>> from astropy.table import Table - >>> objects_table = Table([["a", "b", "c"]], names=["column_name"]) + >>> letters_table = Table([["a", "b", "c"]], names=["alphabet"]) >>> Simbad.query_tap("SELECT TAP_UPLOAD.my_table_name.* from TAP_UPLOAD.my_table_name", - ... uploads={"my_table_name": objects_table}) # doctest: +REMOTE_DATA + ... my_table_name=letters_table) # doctest: +REMOTE_DATA
- column_name - object - ----------- - a - b - c + alphabet + object + -------- + a + b + c """ - if maxrec > self.tap.hardlimit: - raise ValueError(f"The maximum number of records cannot exceed {self.tap.hardlimit}.") + if maxrec > Simbad.hardlimit: + raise ValueError(f"The maximum number of records cannot exceed {Simbad.hardlimit}.") if query.count("'") % 2: raise ValueError("Query string contains an odd number of single quotes." " Escape the unpaired single quote by doubling it.\n" "ex: 'Barnard's galaxy' -> 'Barnard''s galaxy'.") + if uploads == {}: + return self._cached_query_tap(query, maxrec=maxrec) return self.tap.run_async(query, maxrec=maxrec, uploads=uploads).to_table() def _get_query_header(self, get_raw=False): diff --git a/astroquery/simbad/tests/test_simbad_remote.py b/astroquery/simbad/tests/test_simbad_remote.py index ac2a9f2090..b1a5c4844c 100644 --- a/astroquery/simbad/tests/test_simbad_remote.py +++ b/astroquery/simbad/tests/test_simbad_remote.py @@ -269,7 +269,7 @@ def test_query_tap(self): table_numbers = Table([[1, 2, 3], ["a", "b", "c"]], names=["numbers", "letters"]) result = Simbad.query_tap("SELECT * FROM TAP_UPLOAD.numbers " "JOIN TAP_UPLOAD.letters USING(letters)", - uploads={"numbers": table_numbers, "letters": table_letters}) + numbers=table_numbers, letters=table_letters) expect = "letters numbers\n------- -------\n a 1\n b 2\n c 3" assert expect == str(result) # Test query_tap raised errors From 821aec74d2febac0deac9f43181db4910d975224 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 10 Nov 2023 14:56:16 +0100 Subject: [PATCH 15/23] docs: add an example of Simbad.query_tap with an uploaded table --- docs/simbad/query_tap.rst | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 4c4ff2ecff..fe8d625ad5 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -276,3 +276,47 @@ Then, the ``basic`` table is joined with ``h_link`` and the sub-query result. SDSSCGB 350.1 G 243.18618110644002 ... 100 SDSSCGB 350 LEDA 1831614 G 243.189153 ... 100 SDSSCGB 350 +Query a long list of object +""""""""""""""""""""""""""" + +To query a list of objects (or coordinates, of bibliographic references), we can use the +ADQL criteria ``IN`` like so: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> Simbad.query_tap("SELECT main_id, otype FROM basic WHERE main_id IN ('M1', 'M2', 'M3')") +
+ main_id otype + object object + ------- ------ + M 1 SNR + M 2 GlC + M 3 GlC + + +And that would work perfectly... until we reach the character limit for the ADQL query. This +is one of the example use case where the upload table capability is very useful. You can create/use +an `~astropy.table.table.Table` containing the desired list and use it in a ``JOIN`` to replace an ``IN``: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> from astropy.table import Table + >>> list_of_objects = Table([["M1", "M2", "M3"]], names=["Messier_objects"]) + >>> query = """SELECT main_id, otype FROM basic + ... JOIN TAP_UPLOAD.messiers + ... ON basic.main_id = TAP_UPLOAD.messiers.Messier_objects + ... """ + >>> Simbad.query_tap(query, messiers=list_of_objects) +
+ main_id otype + object object + ------- ------ + M 1 SNR + M 2 GlC + M 3 GlC + +.. note:: + The uploaded tables are limited to 200000 lines. You might need to break your query into smaller + chunks if you have longer tables. \ No newline at end of file From 8ff6d7368adcd01286f52978a47e135cfad355af Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 10 Nov 2023 15:37:55 +0100 Subject: [PATCH 16/23] fix: remove use of functools.cache_property for python 3.7 support --- astroquery/simbad/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 4c7f55609c..74deec60c0 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -9,7 +9,7 @@ import os from collections import namedtuple from io import BytesIO -from functools import lru_cache, cached_property +from functools import lru_cache import warnings import astropy.units as u from astropy.utils import isiterable @@ -372,13 +372,16 @@ def tap(self): self._tap = TAPService(baseurl=tap_url, session=self._session) return self._tap - @cached_property + @property + @lru_cache(1) def hardlimit(self): """The maximum number of lines for Simbad's output. This property is cached to avoid calls to simbad's capability - page each time `query_tap` is called. + webpage each time the getter is called. """ + # replace stack of property and lru_cache by functools.cache_property when + # astroquery drops python 3.7 support return self.tap.hardlimit def list_wildcards(self): From 4983ab6b3e18e92665eae2db21ddf230713dd51b Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Mon, 13 Nov 2023 13:47:33 +0100 Subject: [PATCH 17/23] fix: switch simbad.mirror list into simbad.conf.server_list --- astroquery/simbad/__init__.py | 5 ++++- astroquery/simbad/core.py | 31 ++++++++++++-------------- astroquery/simbad/tests/test_simbad.py | 12 +++++----- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/astroquery/simbad/__init__.py b/astroquery/simbad/__init__.py index d664994c99..12618e3a9f 100644 --- a/astroquery/simbad/__init__.py +++ b/astroquery/simbad/__init__.py @@ -15,8 +15,11 @@ class Conf(_config.ConfigNamespace): """ Configuration parameters for `astroquery.simbad`. """ + # the first item is the default configuration + servers_list = ['simbad.cds.unistra.fr', 'simbad.harvard.edu'] + server = _config.ConfigItem( - ['simbad.cds.unistra.fr', 'simbad.harvard.edu'], + servers_list, 'Name of the SIMBAD mirror to use.') timeout = _config.ConfigItem( diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 74deec60c0..f52e48b7d5 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -336,38 +336,35 @@ class SimbadClass(BaseVOQuery, SimbadBaseQuery): def __init__(self): super().__init__() self._VOTABLE_FIELDS = self._VOTABLE_FIELDS.copy() - self._mirror = conf.server + self._server = conf.server self._tap = None @property - def simbad_mirrors(self): - """Set of the two Simbad mirrors domains.""" - return {'simbad.cds.unistra.fr', 'simbad.harvard.edu'} - - @property - def mirror(self): + def server(self): """The Simbad mirror to use.""" - return self._mirror + return self._server - @mirror.setter - def mirror(self, server: str): - f"""Allows to switch server between Simbad mirrors. + @server.setter + def server(self, server: str): + """Allows to switch server between Simbad mirrors. Parameters ---------- server : str - It should be one of {self.simbad_mirrors}. + It should be one of `~astroquery.simbad.conf.servers_list`. """ - if server in self.simbad_mirrors: - self._mirror = server + if server in conf.servers_list: + self._server = server else: - raise ValueError(f"'{server}' does not correspond to a Simbad mirror, " - f"the two existing ones are {self.simbad_mirrors}.") + raise ValueError(f"'{server}' does not correspond to a Simbad server, " + f"the two existing ones are {conf.servers_list}.") @property def tap(self): """A ``~pyvo.dal.tap.TAPService`` service for Simbad.""" - tap_url = "https://" + self.mirror + "/simbad/sim-tap" + tap_url = f"https://{self.server}/simbad/sim-tap" + # only creates a new tap instance if there are no existing one + # or if the server property changed since the last getter call. if (not self._tap) or (self._tap.baseurl != tap_url): self._tap = TAPService(baseurl=tap_url, session=self._session) return self._tap diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index 6d2729d883..6ee28ebf11 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -77,14 +77,14 @@ class last_query: def test_simbad_mirror(): simbad_instance = simbad.SimbadClass() # default value should be set at instantiation - assert simbad_instance.mirror == "simbad.cds.unistra.fr" + assert simbad_instance.server == "simbad.cds.unistra.fr" # it can be switched to harvard mirror - simbad_instance.mirror = "simbad.harvard.edu" - assert simbad_instance.mirror == "simbad.harvard.edu" + simbad_instance.server = "simbad.harvard.edu" + assert simbad_instance.server == "simbad.harvard.edu" + # but not to an invalid mirror with pytest.raises(ValueError, - match="'test' does not correspond to a Simbad mirror, *"): - # but not to an invalid mirror - simbad_instance.mirror = "test" + match="'test' does not correspond to a Simbad server, *"): + simbad_instance.server = "test" def test_simbad_create_tap_service(): From 79d4010d2a5bad142fbabab60e286c851e7fd0c4 Mon Sep 17 00:00:00 2001 From: Manon Marchand Date: Wed, 22 Nov 2023 09:48:55 +0100 Subject: [PATCH 18/23] docs: fix english issues Co-authored-by: Adam Ginsburg --- docs/simbad/query_tap.rst | 6 +++--- docs/simbad/simbad.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index fe8d625ad5..22d087cab2 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -1,5 +1,5 @@ `~astroquery.simbad.SimbadClass.query_tap` (for Table Access Protocol) is the one -query to rule them all. It allows to interrogate all the information in SIMBAD with the +query to rule them all. It allows one to access all the information in SIMBAD with the Astronomical Data Query Language (ADQL). ADQL is a flavor of the Structured Query Language (SQL) adapted to astronomy. To learn more about this language, see the `ADQL documentation `__ @@ -51,7 +51,7 @@ This ADQL query can be called with `~astroquery.simbad.SimbadClass.query_tap`: 148.96845833333333 69.67970277777778 M 82 5769 23.46206906218 30.660175111980003 M 33 5737 -And voilà, we get the 5 NGC objects that are the most cited in literature, are not cluster +And voilà, we get the 5 NGC objects that are the most cited in literature, are not clusters of stars, and have a redshift < 1. The following sections cover methods that help build ADQL queries. A showcase of more complex queries comes after. @@ -156,7 +156,7 @@ some tables, add their name. To get the columns of the tables ``ref`` and ``bibl ref year SMALLINT ... meta.note;meta.bib `~astroquery.simbad.SimbadClass.list_columns` can also be called with a keyword argument. -This returns columns from any table for witch the given keyword is either in the table name, +This returns columns from any table for witch the given keyword is either in the table name, in the column name or in its description. This is not case-sensitive. .. doctest-remote-data:: diff --git a/docs/simbad/simbad.rst b/docs/simbad/simbad.rst index bbbd847d4c..6283f2e245 100644 --- a/docs/simbad/simbad.rst +++ b/docs/simbad/simbad.rst @@ -17,7 +17,7 @@ formulated. If successful all the queries will return the results in a A warning about big queries --------------------------- -The SIMBAD database is largely used and had to limit the rate of incoming queries. +The SIMBAD database is widely used and has to limit the rate of incoming queries. If you spam the server with more that ~5-10 queries per second you will be blacklisted for an hour. If it happens to you, you can use the section about :ref:`vectorized queries ` below. You can pass From 56c8e1def736d946d00dbeccf4fcf387a285888d Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Wed, 22 Nov 2023 16:43:59 +0100 Subject: [PATCH 19/23] docs: increase font size in simbad graph a script to regenerate the DOT file is here : https://github.com/ManonMarchand/simbad-ER-diagram/blob/main/simbad-and-graphviz.py --- docs/simbad/query_tap.rst | 22 +++++++++++----------- docs/simbad/simbad-er.gv | 36 ++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 22d087cab2..6bd9ca236c 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -120,7 +120,6 @@ can be useful. Here we look for possible links with the ``mesDiameter`` table The output indicates that the ``mesDiameter`` table can be linked to ``basic`` with the following join statement: ``[...] mesDiameter JOIN basic ON mesDiameter.oidref = basic.oid [...]``. - .. graphviz:: simbad-er.gv :layout: neato :caption: A quick view of SIMBAD's tables. Hover the links to see the linked columns. @@ -163,18 +162,19 @@ in the column name or in its description. This is not case-sensitive. >>> from astroquery.simbad import Simbad >>> Simbad.list_columns(keyword="Radial velocity") -
- table_name column_name ... unit ucd - object object ... object object - ------------- --------------- ... ------ ----------------------------------- - basic rvz_bibcode ... meta.bib.bibcode;spect.dopplerVeloc - basic rvz_err ... km.s-1 stat.error;spect.dopplerVeloc - basic rvz_err_prec ... - basic rvz_qual ... meta.code.qual;spect.dopplerVeloc - basic rvz_radvel ... km.s-1 spect.dopplerVeloc.opt +
+ table_name column_name ... unit ucd + object object ... object object + ------------- --------------- ... ------ ------------------------------------- + basic rvz_bibcode ... meta.bib.bibcode;spect.dopplerVeloc + basic rvz_err ... km.s-1 stat.error;spect.dopplerVeloc + basic rvz_err_prec ... + basic rvz_qual ... meta.code.qual;spect.dopplerVeloc + basic rvz_radvel ... km.s-1 spect.dopplerVeloc.opt basic rvz_radvel_prec ... basic rvz_type ... - mesVelocities origin ... meta.note + basic rvz_wavelength ... instr.bandpass;spect.dopplerVeloc.opt + mesVelocities origin ... meta.note Example TAP queries ^^^^^^^^^^^^^^^^^^^ diff --git a/docs/simbad/simbad-er.gv b/docs/simbad/simbad-er.gv index fa0bb01892..bd4781ef57 100644 --- a/docs/simbad/simbad-er.gv +++ b/docs/simbad/simbad-er.gv @@ -2,25 +2,25 @@ graph "Simbad Relational Database" { node [color=lightgray penwidth=6 shape=box style=filled] graph [esep="+0" mode=major overlap=false sep=0 splines=polyline] edge [color=lightgray labelOverlay="100%" penwidth=7] - "Measurement tables" [label="{mesHerschel | mesXmm | mesVelocities | mesVar | mesRot | mesPM | mesPLX | mesIUE | mesISO | mesFe_h | mesDiameter | mesDistance | mesSpT }" shape=record] + "Measurement tables" [label="{mesHerschel | mesXmm | mesVelocities | mesVar | mesRot | mesPM | mesPLX | mesIUE | mesISO | mesFe_h | mesDiameter | mesDistance | mesSpT }" fontsize=16 shape=record] basic -- "Measurement tables" [color="#9EADC8" tooltip="oid:oidref"] - allfluxes [tooltip="all flux/magnitudes U,B,V,I,J,H,K,u_,g_,r_,i_,z_"] - alltypes [tooltip="all object types concatenated with pipe"] - author [tooltip="Author of a bibliographic reference"] - basic [tooltip="General data about an astronomical object"] - biblio [tooltip=Bibliography] - cat [tooltip="Catalogues name"] - filter [tooltip="Description of a flux filter"] - flux [tooltip="Magnitude/Flux information about an astronomical object"] - h_link [tooltip="hierarchy of membership measure"] - has_ref [tooltip="Associations between astronomical objects and their bibliographic references"] - ident [tooltip="Identifiers of an astronomical object"] - ids [tooltip="all names concatenated with pipe"] - journals [tooltip="Description of all used journals in the database"] - keywords [tooltip="List of keywords in a paper"] - otypedef [tooltip="all names and definitions for the object types"] - otypes [tooltip="List of all object types associated with an object"] - ref [tooltip="Bibliographic reference"] + allfluxes [fontsize=16 tooltip="all flux/magnitudes U,B,V,I,J,H,K,u_,g_,r_,i_,z_"] + alltypes [fontsize=16 tooltip="all object types concatenated with pipe"] + author [fontsize=16 tooltip="Author of a bibliographic reference"] + basic [fontsize=16 tooltip="General data about an astronomical object"] + biblio [fontsize=16 tooltip=Bibliography] + cat [fontsize=16 tooltip="Catalogues name"] + filter [fontsize=16 tooltip="Description of a flux filter"] + flux [fontsize=16 tooltip="Magnitude/Flux information about an astronomical object"] + h_link [fontsize=16 tooltip="hierarchy of membership measure"] + has_ref [fontsize=16 tooltip="Associations between astronomical objects and their bibliographic references"] + ident [fontsize=16 tooltip="Identifiers of an astronomical object"] + ids [fontsize=16 tooltip="all names concatenated with pipe"] + journals [fontsize=16 tooltip="Description of all used journals in the database"] + keywords [fontsize=16 tooltip="List of keywords in a paper"] + otypedef [fontsize=16 tooltip="all names and definitions for the object types"] + otypes [fontsize=16 tooltip="List of all object types associated with an object"] + ref [fontsize=16 tooltip="Bibliographic reference"] ids -- basic [color="#C7FFDA" tooltip="oidref:oid"] otypedef -- otypes [color="#FEC1A7" tooltip="otype:otype"] otypedef -- basic [color="#4CE0B3" tooltip="otype:otype"] From b99ad4cf12b29d4b1cbad0492b7d6d58ed1ab07a Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 26 Jan 2024 10:58:34 +0100 Subject: [PATCH 20/23] fix: updated datatype in doctest outputs some fields have changed their datatype upstream following #2916 --- astroquery/simbad/core.py | 3 ++- docs/simbad/query_tap.rst | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index f52e48b7d5..dd85eac2d3 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -1063,9 +1063,10 @@ def list_columns(self, *tables: str, keyword=None, get_adql=False): ---------- ----------- -------- ... ------ ------- ident id VARCHAR ... meta.id ident oidref BIGINT ... - ids ids CLOB ... meta.id + ids ids VARCHAR ... meta.id ids oidref BIGINT ... + >>> from astroquery.simbad import Simbad >>> Simbad.list_columns(keyword="filter") # doctest: +REMOTE_DATA
diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 6bd9ca236c..5bf643bd01 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -150,7 +150,7 @@ some tables, add their name. To get the columns of the tables ``ref`` and ``bibl ref nbobject INTEGER ... meta.number ref oidbib BIGINT ... meta.record;meta.bib ref page INTEGER ... meta.bib.page - ref title CLOB ... meta.title + ref title UNICODECHAR ... meta.title ref volume INTEGER ... meta.bib.volume ref year SMALLINT ... meta.note;meta.bib From d2b2bc4b8325e713a40d3150735d30d42e22760f Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 26 Jan 2024 11:13:04 +0100 Subject: [PATCH 21/23] edit intersphinx links --- astroquery/simbad/core.py | 14 +++++++------- astroquery/vizier/core.py | 2 +- docs/simbad/query_tap.rst | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index dd85eac2d3..042712d459 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -361,7 +361,7 @@ def server(self, server: str): @property def tap(self): - """A ``~pyvo.dal.tap.TAPService`` service for Simbad.""" + """A `~pyvo.dal.TAPService` service for Simbad.""" tap_url = f"https://{self.server}/simbad/sim-tap" # only creates a new tap instance if there are no existing one # or if the server property changed since the last getter call. @@ -1026,7 +1026,7 @@ def list_tables(self, *, get_adql=False): Returns ------- - `~astropy.table.table.Table` + `~astropy.table.Table` """ query = ("SELECT table_name, description" " FROM TAP_SCHEMA.tables" @@ -1127,7 +1127,7 @@ def list_linked_tables(self, table: str, *, get_adql=False): Returns ------- - `~astropy.table.table.Table` + `~astropy.table.Table` The information necessary to join the given table to an other. Examples @@ -1155,7 +1155,7 @@ def _cached_query_tap(self, query: str, *, maxrec=10000): This private method is called when query_tap is executed without an ``uploads`` extra keyword argument. This is a work around because - `~astropy.table.table.Table` objects are not hashable and thus cannot + `~astropy.table.Table` objects are not hashable and thus cannot be used as arguments for a function decorated with lru_cache. Parameters @@ -1168,7 +1168,7 @@ def _cached_query_tap(self, query: str, *, maxrec=10000): Returns ------- - `~astropy.table.table.Table` + `~astropy.table.Table` The response returned by Simbad. """ return self.tap.run_async(query, maxrec=maxrec).to_table() @@ -1185,14 +1185,14 @@ def query_tap(self, query: str, *, maxrec=10000, **uploads): maxrec : int, default: 10000 The number of records to be returned. Its maximum value is given by `~astroquery.simbad.SimbadClass.hardlimit`. - uploads : `~astropy.table.table.Table` | `~astropy.io.votable.tree.VOTableFile` | `~pyvo.dal.DALResults` + uploads : `~astropy.table.Table` | `~astropy.io.votable.tree.VOTableFile` | `~pyvo.dal.DALResults` Any number of local tables to be used in the *query*. In the *query*, these tables are referred as *TAP_UPLOAD.table_alias* where *TAP_UPLOAD* is imposed and *table_alias* is the keyword name you chose. The maximum number of lines for the uploaded tables is 200000. Returns ------- - `~astropy.table.table.Table` + `~astropy.table.Table` The response returned by Simbad. Notes diff --git a/astroquery/vizier/core.py b/astroquery/vizier/core.py index 3a9596ad84..fc1b0ee072 100644 --- a/astroquery/vizier/core.py +++ b/astroquery/vizier/core.py @@ -290,7 +290,7 @@ def get_catalog_metadata(self, *, catalog=None, get_query_payload=False): Returns ------- - `~astropy.table.table.Table` + `~astropy.table.Table` A table with the following columns: - title - authors diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 5bf643bd01..37185bcf0e 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -297,7 +297,7 @@ ADQL criteria ``IN`` like so: And that would work perfectly... until we reach the character limit for the ADQL query. This is one of the example use case where the upload table capability is very useful. You can create/use -an `~astropy.table.table.Table` containing the desired list and use it in a ``JOIN`` to replace an ``IN``: +an `~astropy.table.Table` containing the desired list and use it in a ``JOIN`` to replace an ``IN``: .. doctest-remote-data:: From 130482440b65e921df73a98f31e90cdb33e06da2 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 26 Jan 2024 14:16:20 +0100 Subject: [PATCH 22/23] fix: simplify IN statement --- astroquery/simbad/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 042712d459..6d310187c8 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -1097,7 +1097,7 @@ def list_columns(self, *tables: str, keyword=None, get_adql=False): if len(tables) == 1: query += f" AND table_name = '{tables[0]}'" elif len(tables) > 1: - query += f" AND table_name IN ({str(tables)[1:-1]})" + query += f" AND table_name IN {tables}" # add the keyword condition if keyword is not None: condition = f"LIKE LOWERCASE('%{_adql_parameter(keyword)}%')" From ccf93cbcf83b63f4890f6a16102400ab3718edff Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Fri, 26 Jan 2024 15:14:01 +0100 Subject: [PATCH 23/23] fix: remove unused adql_name function --- astroquery/simbad/core.py | 15 --------------- astroquery/simbad/tests/test_simbad.py | 4 ---- 2 files changed, 19 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 6d310187c8..3d7ba33d57 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -111,21 +111,6 @@ def _adql_parameter(entry: str): return entry.replace("'", "''") -def _adql_name(name: str): - """Prepare a string to be used as a column or table name. - - It prepends and appends a double quote to the elements of the name. - This allows to escape ADQL reserved vocabulary. It then applies the - SIMBAD-specific (not in ADQL) `lowercase` function. - - Parameters - ---------- - name : str - The column name. - """ - return f'''lowercase({'.'.join([f'"{element}"' for element in name.split(".")])})''' - - error_regex = re.compile(r'(?ms)\[(?P\d+)\]\s?(?P.+?)(\[|\Z)') SimbadError = namedtuple('SimbadError', ('line', 'msg')) VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'micro', 'patch')) diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index 6ee28ebf11..45047ba725 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -101,10 +101,6 @@ def test_adql_parameter(): assert simbad.core._adql_parameter("Barnard's galaxy") == "Barnard''s galaxy" -def test_adql_name(): - assert simbad.core._adql_name("biblio.year") == 'lowercase("biblio"."year")' - - @pytest.mark.parametrize(('radius', 'expected_radius'), [('5d0m0s', '5.0d'), ('5d', '5.0d'),