Skip to content

Commit

Permalink
Allow candidates_pth=bonus in RetrievalDataset
Browse files Browse the repository at this point in the history
  • Loading branch information
roman-bushuiev committed Oct 28, 2024
1 parent df2ff56 commit 3d1f083
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions massspecgym/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,15 @@ def __init__(
candidates_pth: T.Optional[T.Union[Path, str]] = None,
**kwargs,
):
"""
Args:
mol_label_transform (MolTransform, optional): Transformation to apply to the candidate molecules.
Defaults to `MolToInChIKey()`.
candidates_pth (Optional[Union[Path, str]], optional): Path to the .json file containing the candidates for
retrieval. Defaults to None, in which case the candidates for standard `molecular retrieval` challenge
are downloaded from HuggingFace Hub. If set to `bonus`, the candidates based on molecular formulas
for the `bonus chemical formulae challenge` are downloaded instead.
"""
super().__init__(**kwargs)

self.candidates_pth = candidates_pth
Expand All @@ -151,6 +160,10 @@ def __init__(
self.candidates_pth = utils.hugging_face_download(
"molecules/MassSpecGym_retrieval_candidates_mass.json"
)
elif self.candidates_pth == 'bonus':
self.candidates_pth = utils.hugging_face_download(
"molecules/MassSpecGym_retrieval_candidates_formula.json"
)
elif isinstance(self.candidates_pth, str):
if Path(self.candidates_pth).is_file():
self.candidates_pth = Path(self.candidates_pth)
Expand Down

0 comments on commit 3d1f083

Please sign in to comment.