diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8bb8899 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,14 @@ +jobs: + pypi-publish: + name: upload release to PyPI + runs-on: ubuntu-latest + # Specifying a GitHub environment is optional, but strongly encouraged + environment: pypi + permissions: + # IMPORTANT: this permission is mandatory for Trusted Publishing + id-token: write + steps: + # retrieve your distributions here + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index f2d4264..21dbd2b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ notes/ .cursorignore .coveragerc +*.nemo *.pyc *.pkl *.ini diff --git a/Makefile b/Makefile index 0839aef..6ba864d 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,12 @@ -.PHONY: docker, rapids, benchmark_db, optuna_db, clear_benchmark_db, clear_optuna_db +.PHONY: install, install-pep, docker, rapids, bionemo, molmim,\ + benchmark_db, optuna_db, clear_benchmark_db, clear_optuna_db, \ + process_all_smiles_datasets, run_bionemo install: pip install -e .[dev,benchmark,data] install-pep: - pip install .[dev] --use-pep517 + pip install .[dev,benchmark,data] --use-pep517 docker: docker compose up -d --build benchmark-postgres optuna-postgres @@ -15,8 +17,10 @@ rapids: bionemo: docker compose up -d --build bionemo +# https://docs.nvidia.com/launchpad/ai/base-command-coe/latest/bc-coe-docker-basics-step-02.html +# need ngc account and api key to download molmim: - docker compose up -d --build molmim + sudo docker compose up -d --build molmim benchmark_db: docker compose up -d --build benchmark-postgres @@ -41,6 +45,19 @@ process_all_smiles_datasets: --ulimit stack=67108864 \ --user $(id -u):$(id -g) \ -e CUDA_VISIBLE_DEVICES="0,1" \ - -v "$(pwd)".:/home/rapids/notebooks/chem-mrl \ - nvcr.io/nvidia/rapidsai/notebooks:24.08-cuda12.2-py3.11 \ - bash -c "pip install -r /home/rapids/notebooks/chem-mrl/dataset/requirements.txt && python /home/rapids/notebooks/chem-mrl/dataset/process_all_smiles_datasets.py" + -v "$(pwd)".:/chem-mrl \ + nvcr.io/nvidia/rapidsai/notebooks:24.12-cuda12.5-py3.12 \ + bash -c "pip install -r /chem-mrl/dataset/rapids-requirements.txt && python /chem-mrl/dataset/process_all_smiles_datasets.py" + +# used to run scripts that depend on bionemo framework +run_bionemo: + docker run --rm -it \ + --runtime=nvidia \ + --gpus 1 \ + --shm-size=20g \ + --ulimit memlock=-1 \ + --ulimit stack=67108864 \ + --user $(id -u):$(id -g) \ + -e CUDA_VISIBLE_DEVICES="0" \ + -v "$(pwd)".:/workspace/bionemo/chem-mrl \ + nvcr.io/nvidia/clara/bionemo-framework:1.10.1 diff --git a/chem_mrl/trainers/BaseTrainer.py b/chem_mrl/trainers/BaseTrainer.py index e5c4566..575aeac 100644 --- a/chem_mrl/trainers/BaseTrainer.py +++ b/chem_mrl/trainers/BaseTrainer.py @@ -159,11 +159,6 @@ def train(self, eval_callback: Callable | None): "lr": learning_rate, "weight_decay": weight_decay, } - if self.config.use_fused_adamw and not isinstance( - self.__optimizer, torch.optim.AdamW - ): - # FusedAdam requires adam_w_mode flag - optimizer_params["adam_w_mode"] = True self.model.old_fit( train_objectives=[(self.train_dataloader, self.loss_fct)], diff --git a/dataset/bionemo-requirements.txt b/dataset/bionemo-requirements.txt new file mode 100644 index 0000000..6a761cf --- /dev/null +++ b/dataset/bionemo-requirements.txt @@ -0,0 +1 @@ +fastparquet==2024.11.0 diff --git a/dataset/process_all_smiles_datasets.py b/dataset/process_all_smiles_datasets.py index 38d4096..c7e551b 100755 --- a/dataset/process_all_smiles_datasets.py +++ b/dataset/process_all_smiles_datasets.py @@ -10,9 +10,9 @@ from rdkit import RDLogger logging.basicConfig( - level=logging.INFO, # Ensure INFO messages are printed + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", - handlers=[logging.StreamHandler(sys.stdout)], # Ensure logs go to stdout + handlers=[logging.StreamHandler(sys.stdout)], ) RDLogger.DisableLog("rdApp.*") # type: ignore - DisableLog is an exported function diff --git a/dataset/requirements.txt b/dataset/rapids-requirements.txt similarity index 80% rename from dataset/requirements.txt rename to dataset/rapids-requirements.txt index ea659e4..c707a69 100755 --- a/dataset/requirements.txt +++ b/dataset/rapids-requirements.txt @@ -1,4 +1,3 @@ fastparquet==2024.11.0 rdkit==2024.9.4 -pandas==2.2.2 pandarallel==1.6.5