diff --git a/.gitignore b/.gitignore
index fd2b2e9..8e3cca0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,3 +164,5 @@ trial*
!trial.py
inputs
docs/book/
+MSM/*_test.ipynb
+test/
\ No newline at end of file
diff --git a/README.md b/README.md
index 47d9ef7..3a4d08d 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,27 @@
-# PaCS-ToolKit
+# PaCS-Toolkit
-PaCS-ToolKit enables the execution of PaCS-MD (Parallel Cascade Selection Molecular Dynamic Simulation), a non-bias-enhanced sampling method, across various environments. Additionally, it offers tools for result analysis and visualization.
+PaCS-Toolkit enables the execution of PaCS-MD (Parallel Cascade Selection Molecular Dynamic Simulation), a non-bias-enhanced sampling method, across various environments. Additionally, it offers tools for result analysis and visualization.
While PaCS-MD offers a wide range of applications with existing evaluation types, our toolkit also allows for the integration of additional types as needed.
We believe our package will benefit your research.
-- [PaCS-ToolKit](#pacs-toolkit)
+- [PaCS-Toolkit](#pacs-toolkit)
- [Document](#document)
- [Quick install](#quick-install)
+ - [Example command](#example-command)
- [Citation](#citation)
- [LICENSE](#license)
## Document
-- The documentation of PaCS-ToolKit is [here](https://kitaolab.github.io/PaCS-Toolkit/).
+- The documentation of PaCS-Toolkit is [here](https://kitaolab.github.io/PaCS-Toolkit/).
## Quick install
1. Install by pip
~~~shell
-# Install all feautres of PaCS-ToolKit
+# Install all feautres of PaCS-Toolkit
pip install "pacs[all] @ git+https://github.com/Kitaolab/PaCS-Toolkit.git"
~~~
@@ -32,10 +33,10 @@ see [document](https://kitaolab.github.io/PaCS-Toolkit/) for more information.
2. Install by conda and pip
~~~shell
-conda create -n pacs "python>=3.7" -y
+conda create -n pacs "python>=3.8" -y
conda activate pacs
-# Install all features of PaCS-ToolKit
+# Install all features of PaCS-Toolkit
pip install "pacs[all] @ git+https://github.com/Kitaolab/PaCS-Toolkit.git"
~~~
@@ -44,32 +45,32 @@ see [document](https://kitaolab.github.io/PaCS-Toolkit/) for more information.
+## Example command
+```sh
+pacs mdrun -t 1 -f input.toml
+```
+see help messages(`pacs --help`) and [document](https://kitaolab.github.io/PaCS-Toolkit/) for more information.
+
## Citation
-~~~txt
-- PaCS-Toolkit
-[1] Ikizawa, S.*, Hori, T.*, Wijana, T.N.*, Kono, H., Bai, Z., Kimizono, T., Lu, W., Tran, D.P., & Kitao, A. PaCS-Toolkit: Optimized software utilities for parallel cascade selection molecular dynamics (PaCS-MD) simulations and subsequent analyses. J. Phys. Chem. B. 128, 15, 3631-3642 (2024). https://doi.org/10.1021/acs.jpcb.4c01271
+- [1] PaCS-Toolkit: Ikizawa, S.*, Hori, T.*, Wijana, T.N.*, Kono, H., Bai, Z., Kimizono, T., Lu, W., Tran, D.P., & Kitao, A. PaCS-Toolkit: Optimized software utilities for parallel cascade selection molecular dynamics (PaCS-MD) simulations and subsequent analyses. *J. Phys. Chem. B.*, **128**, 15, 3631-3642 (2024). https://doi.org/10.1021/acs.jpcb.4c01271
-- Original PaCS-MD or targeted-PaCS-MD (t-PaCS-MD)
-[2] Harada, R., & Kitao, A. Parallel cascade selection molecular dynamics (PaCS-MD) to generate conformational transition pathway. J. Chem. Phys. 139, 035103 (2013). https://doi.org/10.1063/1.4813023
+- [2] Original PaCS-MD or targeted-PaCS-MD (t-PaCS-MD): Harada, R., & Kitao, A. Parallel cascade selection molecular dynamics (PaCS-MD) to generate conformational transition pathway. *J. Chem. Phys.* **139**, 035103 (2013). https://doi.org/10.1063/1.4813023
-- Dissociation PaCS-MD (dPaCS-MD)
-[3] Tran, D. P., Takemura, K., Kuwata, K., & Kitao, A. Protein–Ligand Dissociation Simulated by Parallel Cascade Selection Molecular Dynamics. J. Chem. Theory Comput. 14, 404–417 (2018). https://doi.org/10.1021/acs.jctc.7b00504
-[4] Tran, D. P., & Kitao, A. Dissociation Process of a MDM2/p53 Complex Investigated by Parallel Cascade Selection Molecular Dynamics and the Markov State Model. J. Phys. Chem. B , 123, 11, 2469–2478 (2019). https://doi.org/10.1021/acs.jpcb.8b10309
-[5] Hata, H., Phuoc Tran, D., Marzouk Sobeh, M., & Kitao, A. Binding free energy of protein/ligand complexes calculated using dissociation Parallel Cascade Selection Molecular Dynamics and Markov state model. Biophysics and Physicobiology, 18, 305–31 (2021). https://doi.org/10.2142/biophysico.bppb-v18.037
+- [3] Dissociation PaCS-MD (dPaCS-MD): Tran, D. P., Takemura, K., Kuwata, K., & Kitao, A. Protein–Ligand Dissociation Simulated by Parallel Cascade Selection Molecular Dynamics. *J. Chem. Theory Comput*. **14**, 404–417 (2018). https://doi.org/10.1021/acs.jctc.7b00504
-- Application to protein domain motion
-[6] Inoue, Y., Ogawa, Y., Kinoshita, M., Terahara, N., Shimada, M., Kodera, N., Ando, T., Namba, K., Kitao, A., Imada, K., & Minamino, T. Structural Insights into the Substrate Specificity Switch Mechanism of the Type III Protein Export Apparatus. Structure, 27 , 965-976 (2019). https://doi.org/10.1016/j.str.2019.03.017
+- [4] Dissociation PaCS-MD (dPaCS-MD): Tran, D. P., & Kitao, A. Dissociation Process of a MDM2/p53 Complex Investigated by Parallel Cascade Selection Molecular Dynamics and the Markov State Model. *J. Phys. Chem. B*, **123**, 11, 2469–2478 (2019). https://doi.org/10.1021/acs.jpcb.8b10309
-- Association and dissociation PaCS-MD (a/dPaCS-MD)
-[7] Tran, D. P., & Kitao, A. Kinetic Selection and Relaxation of the Intrinsically Disordered Region of a Protein upon Binding. J. Chem. Theory Comput. 16, 2835–2845 (2020). https://doi.org/10.1021/acs.jctc.9b01203
+- [5] Dissociation PaCS-MD (dPaCS-MD): Hata, H., Phuoc Tran, D., Marzouk Sobeh, M., & Kitao, A. Binding free energy of protein/ligand complexes calculated using dissociation Parallel Cascade Selection Molecular Dynamics and Markov state model. *Biophysics and Physicobiology*, **18**, 305–31 (2021). https://doi.org/10.2142/biophysico.bppb-v18.037
-- Edge expansion PaCS-MD (eePaCS-MD)
-[8] Takaba, K., Tran, D. P., & Kitao, A. Edge expansion parallel cascade selection molecular dynamics simulation for investigating large-amplitude collective motions of proteins. J. Chem. Phys. 152, 225101 (2020). https://doi.org/10.1063/5.0004654
-[9] Takaba, K., Tran, D. P., & Kitao, A. Erratum: "Edge expansion parallel cascade selection molecular dynamics simulation for investigating large-amplitude collective motions of proteins" [J. Chem. Phys. 152, 225101 (2020)]. . J. Chem. Phys. 153, 179902 (2020). https://doi.org/10.1063/5.0032465
+- [6] Application to protein domain motion: Inoue, Y., Ogawa, Y., Kinoshita, M., Terahara, N., Shimada, M., Kodera, N., Ando, T., Namba, K., Kitao, A., Imada, K., & Minamino, T. Structural Insights into the Substrate Specificity Switch Mechanism of the Type III Protein Export Apparatus. *Structure*, **27** , 965-976 (2019). https://doi.org/10.1016/j.str.2019.03.017
-- rmsdPaCS-MD
-[10] Tran, D. P., Taira, Y., Ogawa, T., Misu, R., Miyazawa, Y., & Kitao, A. Inhibition of the hexamerization of SARS-CoV-2 endoribonuclease and modeling of RNA structures bound to the hexamer. Sci Rep 12, 3860 (2022). https://doi.org/10.1038/s41598-022-07792-2
-~~~
+- [7] Association and dissociation PaCS-MD (a/dPaCS-MD): Tran, D. P., & Kitao, A. Kinetic Selection and Relaxation of the Intrinsically Disordered Region of a Protein upon Binding. *J. Chem. Theory Comput.*, **16**, 2835–2845 (2020). https://doi.org/10.1021/acs.jctc.9b01203
+
+- [8] Edge expansion PaCS-MD (eePaCS-MD): Takaba, K., Tran, D. P., & Kitao, A. Edge expansion parallel cascade selection molecular dynamics simulation for investigating large-amplitude collective motions of proteins. *J. Chem. Phys.* **152**, 225101 (2020). https://doi.org/10.1063/5.0004654
+
+- [9] Edge expansion PaCS-MD (eePaCS-MD): Takaba, K., Tran, D. P., & Kitao, A. Erratum: "Edge expansion parallel cascade selection molecular dynamics simulation for investigating large-amplitude collective motions of proteins" [J. Chem. Phys. 152, 225101 (2020)]. *J. Chem. Phys.* **153**, 179902 (2020). https://doi.org/10.1063/5.0032465
+
+- [10] rmsdPaCS-MD: Tran, D. P., Taira, Y., Ogawa, T., Misu, R., Miyazawa, Y., & Kitao, A. Inhibition of the hexamerization of SARS-CoV-2 endoribonuclease and modeling of RNA structures bound to the hexamer. *Sci Rep* **12**, 3860 (2022). https://doi.org/10.1038/s41598-022-07792-2
## LICENSE
diff --git a/docs/src/fit.md b/docs/src/fit.md
index a51173b..151dac9 100644
--- a/docs/src/fit.md
+++ b/docs/src/fit.md
@@ -17,19 +17,19 @@ pacs fit traj mdtraj -tf ./trial001/cycle001/replica001/prd.xtc -top ./inputs/in
#### for single trial
```shell
-pacs fit trial mdtraj -t 1 -s ./trial001/cycle001/replica001/prd.pdb -r ./trial001/cycle001/replica001/prd.pdb -ts "protein" -rs "protein" -tf prd.xtc -p 10
+pacs fit trial mdtraj -t 1 -top ./trial001/cycle001/replica001/prd.pdb -r ./trial001/cycle001/replica001/prd.pdb -ts "protein" -rs "protein" -tf prd.xtc -p 10
```
### Arguments
#### for single trajectory
```plaintext
-usage: pacs fit mdtraj [-h] [-tf] [-top] [-r] [-ts] [-rs] [-p] [-o]
+usage: pacs fit traj mdtraj [-h] [-tf] [-top] [-r] [-ts] [-rs] [-p] [-o]
```
- `-tf, --trj_file` (str):
- file name of the trajectory to be fitted (e.g. `-tf prd.xtc`)
- `-top, --topology` (str):
- - topology file path for loading trajectory (e.g. `-s trial001/cycle000/replica001/prd.pdb`)
+ - topology file path for loading trajectory (e.g. `-top trial001/cycle000/replica001/prd.pdb`)
- `-r, --ref_structure` (str):
- reference structure file path for fitting reference (e.g. `-r trial001/cycle000/replica001/prd.pdb`)
- `-ts, --trj_selection` (str):
@@ -53,7 +53,7 @@ usage: pacs fit trial mdtraj [-h] [-t] [-tf] [-top] [-r] [-ts] [-rs] [-p] [-o]
- `-tf, --trj_file` (str):
- file name of the trajectory to be fitted (e.g. `-tf prd.xtc`)
- `-top, --topology` (str):
- - topology file path for loading trajectory (e.g. `-s trial001/cycle000/replica001/prd.pdb`)
+ - topology file path for loading trajectory (e.g. `-top trial001/cycle000/replica001/prd.pdb`)
- `-r, --ref_structure` (str):
- reference structure file path for fitting reference (e.g. `-r trial001/cycle000/replica001/prd.pdb`)
- `-ts, --trj_selection` (str):
diff --git a/docs/src/genfeature.md b/docs/src/genfeature.md
index 2c13706..8b44b45 100644
--- a/docs/src/genfeature.md
+++ b/docs/src/genfeature.md
@@ -1,14 +1,17 @@
# genfeature
-- This command is used after executing `pacs mdrun`.
-- This command generates data that will be used for MSM analysis.
-- This command supports parallel process.
-
-
-| feature | mdtraj | gmx | cpptraj |
-| ------- | ------ | --- | ------- |
-| comdist | o | x | x |
-| comvec | o | x | x |
-| pca | o | x | x |
-| tica | o | x | x |
-| rmsd | o | x | x |
-| xyz | o | x | x |
+- This command should be executed after running `pacs mdrun`.
+- It generates feature data in `.npy` format, which is cconvenient for MSM analysis in Python.
+ - Feature data files (e.g., `t001c002r010.npy`) are stored in the directory specified with the `-od` option.
+ - Each `.npy` file has the `np.arry` in the shape as described in the table below.
+- This command supports parallel processing.
+
+
+Currently implemented analysis tools and the shape of the output data in `.npy` files.
+
+
+| feature | mdtraj | gmx | cpptraj | shape of `.npy` |
+| ------- | ------ | --- | ------- | ---------------------- |
+| comdist | o | x | x | (n_frames,) |
+| comvec | o | x | x | (n_frames, 3) |
+| rmsd | o | x | x | (n_frames,) |
+| xyz | o | x | x | (n_frames, n_atoms, 3) |
diff --git a/docs/src/genfeature/comvec.md b/docs/src/genfeature/comvec.md
index 9463f5e..50e2dca 100644
--- a/docs/src/genfeature/comvec.md
+++ b/docs/src/genfeature/comvec.md
@@ -1,5 +1,7 @@
# comvec
- Center of mass vector
+- Calculate the vector between the centers of mass of `s1` and `s2`
+ - The vector is calculated as `s1` - `s2`
### Example
- The following example generates features about COM vector for MSM analysis
diff --git a/docs/src/genfeature/rmsd.md b/docs/src/genfeature/rmsd.md
index eabd367..eadaa85 100644
--- a/docs/src/genfeature/rmsd.md
+++ b/docs/src/genfeature/rmsd.md
@@ -1,5 +1,6 @@
# RMSD
- Root Mean Square Deviation
+- Calculate the RMSD relative to the structure specified in `ref`
### Example
- The following example generates features about RMSD for MSM analysis
@@ -14,7 +15,7 @@ pacs genfeature rmsd mdtraj -t 1 -tf prd.xtc -top ./inputs/input.gro -ref ./inpu
#### mdtraj
```plaintext
-usage: pacs genfeature pca mdtraj [-h] [-tf] [-top] [-od] [-p] [-ref] [-ft] [-fr] [-ct] [-cr]
+usage: pacs genfeature rmsd mdtraj [-h] [-tf] [-top] [-od] [-p] [-ref] [-ft] [-fr] [-ct] [-cr]
```
- `-t, --trial` (int):
diff --git a/docs/src/install.md b/docs/src/install.md
index 5cd841a..14451ec 100644
--- a/docs/src/install.md
+++ b/docs/src/install.md
@@ -11,23 +11,22 @@
- [2.2. Install by pip locally](#22-install-by-pip-locally)
## Requirements
-- [Python](https://www.python.org/) >= 3.7
-- PaCS-ToolKit currently supports 3 simulator
- - [Gromacs](https://www.gromacs.org/)
- - [Amber](https://ambermd.org/index.php)
- - [Namd](https://www.ks.uiuc.edu/Research/namd/)
+- [Python](https://www.python.org/) >= 3.7 (but python >= 3.8 is recommended because of deeptime)
+- PaCS-Toolkit currently supports 3 simulator
+ - [Gromacs](https://www.gromacs.org/) >= 2022.2 tested
+ - [Amber](https://ambermd.org/index.php) >= 2023 tested
+ - [Namd](https://www.ks.uiuc.edu/Research/namd/) >= 2021-02-20 tested
## 1. Install by pip
### 1.1 Install by conda and pip
~~~shell
-conda create -n pacsmd "python>=3.7" -y
+conda create -n pacsmd "python>=3.8" -y
conda activate pacsmd
~~~
- if using whole pacstk function
~~~shell
pip install "pacs[all] @ git+https://github.com/Kitaolab/PaCS-Toolkit.git"
-pip install pyemma
~~~
- elif using "pacs mdrun" and analyzer == "mdtraj"
@@ -41,9 +40,9 @@ pip install "pacs @ git+https://github.com/Kitaolab/PaCS-Toolkit.git"
~~~
- elif performing MSM
+ - python >= 3.8 is recommended because of deeptime
~~~shell
pip install "pacs[msm] @ git+https://github.com/Kitaolab/PaCS-Toolkit.git"
-pip install pyemma
~~~
### 1.2. Install by pip
@@ -63,9 +62,9 @@ pip install "pacs @ git+https://github.com/Kitaolab/PaCS-Toolkit.git"
~~~
- elif performing MSM
+ - python >= 3.8 is recommended because of deeptime
~~~shell
pip install "pacs[msm] @ git+https://github.com/Kitaolab/PaCS-Toolkit.git"
-pip install pyemma
~~~
@@ -87,15 +86,14 @@ cd pacsmd-${version}
### 2.1. Install by conda and pip locally
~~~shell
-conda create -n pacsmd "python>=3.7" -y
+conda create -n pacsmd "python>=3.8" -y
conda activate pacsmd
~~~
- if using whole pacstk function
- - pyemma does not recommend pip-install
+ - python >= 3.8 is recommended because of deeptime
~~~shell
pip install -e ".[all]"
-conda install -c conda-forge pyemma
~~~
- elif using "pacs mdrun" and analyzer == "mdtraj"
@@ -114,18 +112,15 @@ pip install -e "."
~~~
- elif performing MSM
- - pyemma does not recommend pip-install
~~~
pip install -e ".[msm]"
-conda install -c conda-forge pyemma
~~~
### 2.2. Install by pip locally
- if using whole pacstk function
- - pyemma does not work, conda is recommend
+ - python >= 3.8 is recommended because of deeptime
~~~shell
pip install -e ".[all]"
-pip install pyemma
~~~
- elif using "pacs mdrun" and analyzer == "mdtraj"
@@ -144,8 +139,7 @@ pip install -e "."
~~~
- elif performing MSM
- - sometimes pyemma does not work, conda is recommend
+ - python >= 3.8 is recommended because of deeptime
~~~shell
pip install -e ".[msm]"
-pip install pyemma
~~~
diff --git a/docs/src/mdrun/inputfile.md b/docs/src/mdrun/inputfile.md
index 3b4eb46..d5261a7 100644
--- a/docs/src/mdrun/inputfile.md
+++ b/docs/src/mdrun/inputfile.md
@@ -4,21 +4,22 @@
- input file must be in [toml format](https://toml.io/en/).
*Contents*
-- [sample input file](#sample-input-file)
-- [basic option](#basic-option)
-- [simulator option](#simulator-option)
- - [Gromacs](#gromacs)
- - [Amber](#amber)
- - [NAMD](#namd)
-- [analyzer option](#analyzer-option)
- - [Target](#target)
- - [RMSD](#rmsd)
- - [Association](#association)
- - [Dissociation](#dissociation)
- - [EdgeExpansion](#edgeexpansion)
- - [A\_D](#a_d)
- - [Template](#template)
-- [hidden option (No need to specify)](#hidden-option-no-need-to-specify)
+- [Input file](#input-file)
+ - [sample input file](#sample-input-file)
+ - [basic option](#basic-option)
+ - [simulator option](#simulator-option)
+ - [Gromacs](#gromacs)
+ - [Amber](#amber)
+ - [NAMD](#namd)
+ - [analyzer option](#analyzer-option)
+ - [Target](#target)
+ - [RMSD](#rmsd)
+ - [Association](#association)
+ - [Dissociation](#dissociation)
+ - [EdgeExpansion](#edgeexpansion)
+ - [A\_D](#a_d)
+ - [Template](#template)
+ - [hidden option (No need to specify)](#hidden-option-no-need-to-specify)
## sample input file
- please check [here](https://github.com/Kitaolab/PaCS-Toolkit/tree/main/jobscripts)
@@ -94,6 +95,16 @@ rmfile = true # Whether rmfile is executed after trial
- Gromacs index file
- **trajectory_extension: str, required**
- Trajectory file extension. ("." is necessary)
+- **nojump: bool, default=false**
+ - whether to execute `-pbc nojump` treatment for the selection feature calculation in `analayzer`, snapshot extraction in `exporter` and performing rmmol
+ - **valid only when `analyzer` is also gromacs**
+ - If `true`, molecules are allowed to get out of the simulation box in order to avoid the error in MSM due to the jumping of break of the molecule over pbc box.
+ - If `false`, molecules are just made whole by `-pbc mol` and can warp across the pbc box.
+ - Be noted that the output `prd.xtc` files are not processed with these `-pbc` options. (only `prd_rmmol.xtc` files are processed)
+ - This option is recommended to use when a/dissociation and a_d pacsmd is performed using gromacs as simulator and analyzer
+ - `nojump=true` can lead too large coordinate value to cause overflow or loss-of-significane problem. It will not happpen in most cases, but be carefull if your ligand is very small and simulation box is very large.
+ - When this options is applied, analyzer can consider the distance even if ligand exceeds simulation box
+ - This option is not present in example input in the [sample input repository](https://github.com/Kitaolab/PaCS-Toolkit-example/tree/main) since this option was added in version 1.1.0
@@ -107,6 +118,7 @@ topology = "/work/topol.top" # Topology file such as top, parm7, psf,
mdconf = "/work/parameter.mdp" # Parameter file such as mdp, mdin, namd, etc.
index_file = "/work/index.ndx" # Gromacs index file
trajectory_extension = ".xtc" # Trajectory file extension. ("." is necessary)
+nojump = true # whether to execute nojump treatment only for gmx
```
@@ -527,6 +539,7 @@ user-defined-variable2 = "hoge"
## hidden option (No need to specify)
click here
+
- **cmd_gmx: str**
- Gromacs command (ex. gmx, gmx_mpi)
- will be created from `cmd_serial`
@@ -536,4 +549,5 @@ user-defined-variable2 = "hoge"
- **structure_extension: str**
- Structure file extension
- will be created from `structure`
+
diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md
index f1f2ec7..59c61a7 100644
--- a/docs/src/quickstart.md
+++ b/docs/src/quickstart.md
@@ -26,7 +26,7 @@ git clone https://github.com/Kitaolab/PaCS-Toolkit.git
pip install -e ".[mdtraj]"
# Or install by conda and pip
-conda create -n pacsmd "python>=3.7" -y
+conda create -n pacsmd "python>=3.8" -y
conda activate pacsmd
pip install -e ".[mdtraj]"
```
@@ -140,13 +140,13 @@ $ pacs fit trial mdtraj -t 1 -tf prd_rmmol.xtc -top rmmol_top.pdb -r ref.gro -ts
- So if you want to use other specific CVs, you need to write a code by yourself.
~~~shell
-$ pacs genfeature comdist mdtraj -t 1 -tf prd.xtc -top inputs/example_gromacs/input.gro -s1 "residue 1" -s2 "residue 9"
+$ pacs genfeature comdist mdtraj -t 1 -tf prd.xtc -top inputs/example_gromacs/input.gro -s1 "residue 1" -s2 "residue 9"
$ ls
comdist-CV/
~~~
## Step7: Building MSM and predicting free energy
-- After extracting CVs, various analyses can be performed on them.
+- After extracting CVs, various analyses can be performed on them.
- PaCS-MD is especially compatible with analyses using MSM.
diff --git a/docs/src/rmmol.md b/docs/src/rmmol.md
index 57fee54..88f3938 100644
--- a/docs/src/rmmol.md
+++ b/docs/src/rmmol.md
@@ -18,7 +18,7 @@ pacs rmmol mdtraj -t 1 -k "not water" -e .xtc -m trial001/cycle000/replica001/pr
#### gromacs
```shell
-pacs rmmol gmx -t 1 -k "not_water" -e .xtc -n index.ndx -g gmx
+pacs rmmol gmx -t 1 -k "not_water" -e .xtc -n index.ndx -g gmx --nojump
```
#### cpptraj
@@ -56,6 +56,10 @@ usage: pacs rmmol gmx [-h] [-t] [-k] [-n] [-g] [-e]
- index file for gromacs (e.g. `-n index.ndx`)
- `-g, --cmd_gmx` (str):
- gromacs command prefix (e.g. `-g gmx`)
+- `--nojump` (bool):
+ - whether to execute `-pbc nojump` treatment for the trajectory (e.g. `--nojump`)
+ - If your PaCS-MD trajectory was generated with `nojump=true`, it is strongly strongly recommended to use this options as well
+ - Also refer to the `mdrun/inputfile` page for more details.
#### cpptraj
diff --git a/jobscripts/README.md b/jobscripts/README.md
new file mode 100644
index 0000000..e77c356
--- /dev/null
+++ b/jobscripts/README.md
@@ -0,0 +1,5 @@
+# Jobscripts
+
+- Jobscript samples for supercomputers are provided here.
+- The main purpose here is to provide sample combination of options in input.toml and supercomputer resources.
+- If you want more samples of different PaCS-MD types, also refer to [example input repository](https://github.com/Kitaolab/PaCS-Toolkit-example)
\ No newline at end of file
diff --git a/jobscripts/fugaku/input.toml b/jobscripts/fugaku/input.toml
index 3d40867..ce38dbe 100644
--- a/jobscripts/fugaku/input.toml
+++ b/jobscripts/fugaku/input.toml
@@ -35,8 +35,8 @@ analyzer = "gromacs" # Trajectory tool used to ca
reference = "./inputs/ref.pdb" # Structure for comparison
selection1 = "Backbone" # Selection string for specified group in trajectroies (least squares fit)
selection2 = "Backbone" # Selection string for specified group in trajectories (RMSD calculation)
-selection3 = "Backbone" # Selection string for specified group in reference (least squares fit)
-selection4 = "Backbone" # Selection string for specified group in reference (RMSD calculation)
+#selection3 = "Backbone" # Selection string for specified group in reference (least squares fit)
+#selection4 = "Backbone" # Selection string for specified group in reference (RMSD calculation)
## postprocess
rmmol = true # Whether rmmol is executed after each cycle
@@ -44,3 +44,4 @@ keep_selection = "not_Water" # Molecular name or index gr
rmfile = true # Whether rmfile is executed after trial
cmd_gmx = "gmx" # cmd_gmx for analyze (This command is necessary to specify on fugaku)
+nojump = true # whether to execute nojump treatment only for gmx
\ No newline at end of file
diff --git a/jobscripts/fugaku/pjsub.sh b/jobscripts/fugaku/pjsub.sh
old mode 100644
new mode 100755
diff --git a/jobscripts/ims/input.toml b/jobscripts/ims/input.toml
index 566fe3a..38d2ec3 100644
--- a/jobscripts/ims/input.toml
+++ b/jobscripts/ims/input.toml
@@ -42,3 +42,6 @@ selection4 = "backbone" # Selection string for speci
rmmol = true # Whether rmmol is executed after each cycle
keep_selection = "not resname OPC" # Molecular name or index group to be left in the trajectory when rmmol
rmfile = true # Whether rmfile is executed after trial
+
+# not valid for analyzer=mdtraj
+# nojump = true # whether to execute nojump treatment only for gmx
\ No newline at end of file
diff --git a/jobscripts/ims/jsub.sh b/jobscripts/ims/jsub.sh
old mode 100644
new mode 100755
diff --git a/jobscripts/issp_b/input.toml b/jobscripts/issp_b/input.toml
index cfb3707..cc76645 100644
--- a/jobscripts/issp_b/input.toml
+++ b/jobscripts/issp_b/input.toml
@@ -42,3 +42,6 @@ selection4 = "backbone" # Selection string for speci
rmmol = true # Whether rmmol is executed after each cycle
keep_selection = "not resname OPC" # Molecular name or index group to be left in the trajectory when rmmol
rmfile = true # Whether rmfile is executed after trial
+
+# not valid for analyzer=mdtraj
+# nojump = true # whether to execute nojump treatment only for gmx
\ No newline at end of file
diff --git a/jobscripts/issp_b/sbatch.sh b/jobscripts/issp_b/sbatch.sh
old mode 100644
new mode 100755
diff --git a/jobscripts/local/input.toml b/jobscripts/local/input.toml
index 64c4b2a..d72ca27 100644
--- a/jobscripts/local/input.toml
+++ b/jobscripts/local/input.toml
@@ -40,3 +40,6 @@ selection4 = "backbone" # Selection string for speci
rmmol = true # Whether rmmol is executed after each cycle
keep_selection = "not resname OPC" # Molecular name or index group to be left in the trajectory when rmmol
rmfile = true # Whether rmfile is executed after trial
+
+# not valid for analyzer=mdtraj
+# nojump = true # whether to execute nojump treatment only for gmx
\ No newline at end of file
diff --git a/jobscripts/local/run.sh b/jobscripts/local/run.sh
old mode 100644
new mode 100755
diff --git a/jobscripts/tsubame3/input.toml b/jobscripts/tsubame3/input.toml
index 1611518..fb41845 100644
--- a/jobscripts/tsubame3/input.toml
+++ b/jobscripts/tsubame3/input.toml
@@ -43,3 +43,6 @@ selection4 = "backbone" # Selection string for speci
rmmol = true # Whether rmmol is executed after each cycle
keep_selection = "not resname OPC" # Molecular name or index group to be left in the trajectory when rmmol
rmfile = true # Whether rmfile is executed after trial
+
+# not valid for analyzer=mdtraj
+# nojump = true # whether to execute nojump treatment only for gmx
\ No newline at end of file
diff --git a/jobscripts/tsubame3/qsub.sh b/jobscripts/tsubame3/qsub.sh
old mode 100644
new mode 100755
diff --git a/jobscripts/tsubame4/README.md b/jobscripts/tsubame4/README.md
new file mode 100644
index 0000000..d607979
--- /dev/null
+++ b/jobscripts/tsubame4/README.md
@@ -0,0 +1,14 @@
+# tsubame4
+- [URL](https://www.t4.gsic.titech.ac.jp/)
+
+| simulator | analyzer | available |
+| --------- | ------------ | --------- |
+| gromacs | gromacs | o |
+| gromacs | mdtraj | o |
+| amber | mdtraj | o |
+| amber | cpptraj | o |
+| namd | mdtraj | o |
+
+- Note
+ - Running gromacs over multiple node fails to run for some reason as of Sep. 2024.
+ - So, we are sharing the script for the case `n_parallel=1`
\ No newline at end of file
diff --git a/jobscripts/tsubame4/input.toml b/jobscripts/tsubame4/input.toml
new file mode 100644
index 0000000..fa5024f
--- /dev/null
+++ b/jobscripts/tsubame4/input.toml
@@ -0,0 +1,51 @@
+# PaCS-MD sample jobscript for tsubame4.0
+
+### This script is just for reference
+### You need to modify accordingly
+
+# total_core = 48
+# Parallel run of gmx_mpi over multipe node failed for some reasons as of Sep. 2024.
+# So, this example is to run on a single node and n_parallel=1
+# OMP = 48
+# MPI = None
+# core_per_replica = 48 / 1 = 48
+
+## basic
+max_cycle = 10 # Maximum number of cycles to run. (ex. 1, ..., 123, ..., 999)
+n_replica = 30 # Number of replica. (ex. 1, ..., 123, ..., 999)
+n_parallel = 1 # Number of replica which are calculated at a time
+centering = true # Whether to move the molecule to the center
+centering_selection = "protein" # Name of molecule to move in the center
+working_dir = "./." # Directory where pacsmd will run
+
+## simulator
+simulator = "gromacs" # Simulator used inside PaCS-MD
+cmd_mpi = "" # Commands for MPI such as mpirun, blank is OK
+cmd_serial = "gmx_mpi mdrun" # Commands to run the simulator serially
+cmd_parallel = "gmx_mpi mdrun"
+ # Commands to run the simulator parallelly
+structure = "./inputs/input.gro" # Structural file such as gro, pdb, rst7, etc.
+topology = "./inputs/topol.top" # Topology file such as top, parm7, psf, etc.
+mdconf = "./inputs/parameter.mdp" # Parameter file such as mdp, mdin, namd, etc.
+index_file = "./inputs/index.ndx" # Gromacs index file
+trajectory_extension = ".xtc" # Trajectory file extension. ("." is necessary)
+
+## analyzer
+type = "dissociation" # Evaluation type
+threshold = 10 # CV threshold used to decide whether to terminate the calculation (in units of nm)
+skip_frame = 1 # How many frames to skip when ranking CVs
+analyzer = "mdtraj" # Trajectory tool used to calculate the evaluation type
+selection1 = "protein and name CA" # Selection string for specified group in trajectories
+selection2 = "resname LIG" # Selection string for specified group in trajectories
+
+## postprocess
+rmmol = true # Whether rmmol is executed after each cycle
+keep_selection = "not resname OPC" # Molecular name or index group to be left in the trajectory when rmmol
+rmfile = true # Whether rmfile is executed after trial
+
+# not valid for analyzer=mdtraj
+# nojump = true # whether to execute nojump treatment only for gmx
+
+# Simulation speed measured by this script and the corresponding .toml
+# System of 300,000 atoms
+# - 100 ns/day
\ No newline at end of file
diff --git a/jobscripts/tsubame4/qsub.sh b/jobscripts/tsubame4/qsub.sh
new file mode 100755
index 0000000..82b8e5a
--- /dev/null
+++ b/jobscripts/tsubame4/qsub.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#$ -cwd
+#$ -l node_q=1
+#$ -l h_rt=24:00:00
+#$ -N pacsmd_test
+
+set -e
+. /etc/profile.d/modules.sh
+
+module purge
+module load openmpi/5.0.2-gcc
+module load gromacs/2024
+
+# Please activate the python environment in which you can use pacsmd library
+# Change the path to your environment
+# source /home/x/xxxxxxx/anaconda3/etc/profile.d/conda.sh
+# conda activate pacs
+
+for trial in {1..1};
+do
+ pacs mdrun -f input.toml -t $trial
+done
+
+echo "pacsmd done" >&1
+
+# Simulation speed measured by this script and the corresponding .toml
+# System of 300,000 atoms
+# - 100 ns/day
diff --git a/pacs/__main__.py b/pacs/__main__.py
index da5396d..5a8001a 100644
--- a/pacs/__main__.py
+++ b/pacs/__main__.py
@@ -27,7 +27,7 @@
LOGGER = generate_logger(__name__)
-LOGGER.info(f"pacsmd version {__version__}")
+LOGGER.info(f"PaCS-Toolkit Version: {__version__}")
def prepare_md(
diff --git a/pacs/_version.py b/pacs/_version.py
index 5becc17..6849410 100644
--- a/pacs/_version.py
+++ b/pacs/_version.py
@@ -1 +1 @@
-__version__ = "1.0.0"
+__version__ = "1.1.0"
diff --git a/pacs/mdrun/Cycle.py b/pacs/mdrun/Cycle.py
index af0b53d..bf49066 100644
--- a/pacs/mdrun/Cycle.py
+++ b/pacs/mdrun/Cycle.py
@@ -4,6 +4,7 @@
# import pacs.utils.genrepresent as genrepresent
import pacs.utils.rmfile as rmfile
import pacs.utils.rmmol as rmmol
+from pacs._version import __version__
from pacs.mdrun.analyzer.superAnalyzer import SuperAnalyzer
from pacs.mdrun.exporter.superExporter import SuperExporter
from pacs.mdrun.simulator.superSimulator import SuperSimulator
@@ -79,6 +80,21 @@ def prepare_trial(self) -> None:
exit(1)
tmp = self.settings.n_replica
self.settings.n_replica = 1
+ # create version file of pacstk
+ version_file = f"{self.settings.each_trial()}/pacstk.version"
+ if Path(version_file).exists():
+ with open(version_file) as f:
+ line = f.readline().strip()
+ if __version__ != line:
+ LOGGER.error("PaCS-Toolkit version error")
+ LOGGER.error(
+ f"Version used in {self.settings.each_trial()} is {line},"
+ )
+ LOGGER.error(f"But you're using PaCS-Toolkit {__version__}")
+ exit(1)
+ else:
+ with open(version_file, "w") as f:
+ f.write(f"{__version__}")
self.run_md()
self.calculate_cv()
self.settings.n_replica = tmp
diff --git a/pacs/mdrun/analyzer/a_d.py b/pacs/mdrun/analyzer/a_d.py
index 7175ebf..9b300d8 100644
--- a/pacs/mdrun/analyzer/a_d.py
+++ b/pacs/mdrun/analyzer/a_d.py
@@ -83,18 +83,24 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> List[flo
dir = settings.each_replica(_cycle=cycle, _replica=replica)
+ # nojump treatment
+ if settings.nojump is True:
+ pbc_option = "-pbc nojump"
+ else:
+ pbc_option = "-pbc mol -ur compact"
+
cmd_image = f"echo 'System' \
| {settings.cmd_gmx} trjconv \
-f {dir}/prd{extension} \
-s {dir}/prd.tpr \
-o {dir}/prd_image{extension} \
- -pbc mol \
- -ur compact \
- 1> {dir}/center.log 2>&1" # NOQA: E221
+ {pbc_option} \
+ 1> {dir}/image.log 2>&1" # NOQA: E221
+
res_image = subprocess.run(cmd_image, shell=True)
if res_image.returncode != 0:
LOGGER.error("error occured at image command")
- LOGGER.error(f"see {dir}/center.log")
+ LOGGER.error(f"see {dir}/image.log")
exit(1)
cmd_dist = f"{settings.cmd_gmx} distance \
@@ -103,8 +109,10 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> List[flo
-n {settings.index_file} \
-oxyz {dir}/interCOM_xyz.xvg \
-xvg none \
- -select 'com of group {grp1} plus com of group {grp2}' \
+ -pbc no \
+ -select 'com of group {grp2} plus com of group {grp1}' \
1> {dir}/distance.log 2>&1" # NOQA: E221
+
res_dist = subprocess.run(cmd_dist, shell=True)
if res_dist.returncode != 0:
LOGGER.error("error occurred at distance command")
diff --git a/pacs/mdrun/analyzer/association.py b/pacs/mdrun/analyzer/association.py
index d2f14b9..b1a5731 100644
--- a/pacs/mdrun/analyzer/association.py
+++ b/pacs/mdrun/analyzer/association.py
@@ -62,18 +62,24 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> List[flo
dir = settings.each_replica(_cycle=cycle, _replica=replica)
+ # nojump treatment
+ if settings.nojump is True:
+ pbc_option = "-pbc nojump"
+ else:
+ pbc_option = "-pbc mol -ur compact"
+
cmd_image = f"echo 'System' \
| {settings.cmd_gmx} trjconv \
-f {dir}/prd{extension} \
-s {dir}/prd.tpr \
-o {dir}/prd_image{extension} \
- -pbc mol \
- -ur compact \
- 1> {dir}/center.log 2>&1" # NOQA: E221
+ {pbc_option} \
+ 1> {dir}/image.log 2>&1" # NOQA: E221
+
res_image = subprocess.run(cmd_image, shell=True)
if res_image.returncode != 0:
LOGGER.error("error occured at image command")
- LOGGER.error(f"see {dir}/center.log")
+ LOGGER.error(f"see {dir}/image.log")
exit(1)
cmd_dist = f"{settings.cmd_gmx} distance \
@@ -82,8 +88,10 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> List[flo
-n {settings.index_file} \
-oxyz {dir}/interCOM_xyz.xvg \
-xvg none \
- -select 'com of group {grp1} plus com of group {grp2}' \
+ -pbc no \
+ -select 'com of group {grp2} plus com of group {grp1}' \
1> {dir}/distance.log 2>&1" # NOQA: E221
+
res_dist = subprocess.run(cmd_dist, shell=True)
if res_dist.returncode != 0:
LOGGER.error("error occurred at distance command")
diff --git a/pacs/mdrun/analyzer/dissociation.py b/pacs/mdrun/analyzer/dissociation.py
index 23e4855..ec445ba 100644
--- a/pacs/mdrun/analyzer/dissociation.py
+++ b/pacs/mdrun/analyzer/dissociation.py
@@ -66,18 +66,24 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> List[flo
dir = settings.each_replica(_cycle=cycle, _replica=replica)
+ # nojump treatment
+ if settings.nojump is True:
+ pbc_option = "-pbc nojump"
+ else:
+ pbc_option = "-pbc mol -ur compact"
+
cmd_image = f"echo 'System' \
| {settings.cmd_gmx} trjconv \
-f {dir}/prd{extension} \
-s {dir}/prd.tpr \
-o {dir}/prd_image{extension} \
- -pbc mol \
- -ur compact \
- 1> {dir}/center.log 2>&1" # NOQA: E221
+ {pbc_option} \
+ 1> {dir}/image.log 2>&1" # NOQA: E221
+
res_image = subprocess.run(cmd_image, shell=True)
if res_image.returncode != 0:
LOGGER.error("error occured at image command")
- LOGGER.error(f"see {dir}/center.log")
+ LOGGER.error(f"see {dir}/image.log")
exit(1)
cmd_dist = f"{settings.cmd_gmx} distance \
@@ -86,8 +92,10 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> List[flo
-n {settings.index_file} \
-oxyz {dir}/interCOM_xyz.xvg \
-xvg none \
- -select 'com of group {grp1} plus com of group {grp2}' \
+ -pbc no \
+ -select 'com of group {grp2} plus com of group {grp1}' \
1> {dir}/distance.log 2>&1" # NOQA: E221
+
res_dist = subprocess.run(cmd_dist, shell=True)
if res_dist.returncode != 0:
LOGGER.error("error occurred at distance command")
diff --git a/pacs/mdrun/analyzer/rmsd.py b/pacs/mdrun/analyzer/rmsd.py
index afe18ca..ce37c65 100644
--- a/pacs/mdrun/analyzer/rmsd.py
+++ b/pacs/mdrun/analyzer/rmsd.py
@@ -76,18 +76,24 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> None:
ref = settings.reference
dir = settings.each_replica(_cycle=cycle, _replica=replica)
+ # nojump treatment
+ if settings.nojump is True:
+ pbc_option = "-pbc nojump"
+ else:
+ pbc_option = "-pbc mol -ur compact"
+
cmd_image = f"echo 'System' \
| {settings.cmd_gmx} trjconv \
-f {dir}/prd{extension} \
-s {dir}/prd.tpr \
-o {dir}/prd_image{extension} \
- -pbc mol \
- -ur compact \
- 1> {dir}/center.log 2>&1" # NOQA: E221
+ {pbc_option} \
+ 1> {dir}/image.log 2>&1" # NOQA: E221
+
res_image = subprocess.run(cmd_image, shell=True)
if res_image.returncode != 0:
LOGGER.error("error occured at image command")
- LOGGER.error(f"see {dir}/center.log")
+ LOGGER.error(f"see {dir}/image.log")
exit(1)
cmd_rms = f"echo {selection1} {selection2} \
@@ -96,6 +102,8 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> None:
-s {ref} \
-o {dir}/rms.xvg \
-n {ndx} \
+ -pbc no \
+ -nomw \
-xvg none 1> {dir}/rms.log 2>&1" # NOQA: E221
res_rms = subprocess.run(cmd_rms, shell=True)
if res_rms.returncode != 0:
diff --git a/pacs/mdrun/analyzer/target.py b/pacs/mdrun/analyzer/target.py
index a2e619c..f2785e5 100644
--- a/pacs/mdrun/analyzer/target.py
+++ b/pacs/mdrun/analyzer/target.py
@@ -73,18 +73,24 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> None:
ref = settings.reference
dir = settings.each_replica(_cycle=cycle, _replica=replica)
+ # nojump treatment
+ if settings.nojump is True:
+ pbc_option = "-pbc nojump"
+ else:
+ pbc_option = "-pbc mol -ur compact"
+
cmd_image = f"echo 'System' \
| {settings.cmd_gmx} trjconv \
-f {dir}/prd{extension} \
-s {dir}/prd.tpr \
-o {dir}/prd_image{extension} \
- -pbc mol \
- -ur compact \
- 1> {dir}/center.log 2>&1" # NOQA: E221
+ {pbc_option} \
+ 1> {dir}/image.log 2>&1" # NOQA: E221
+
res_image = subprocess.run(cmd_image, shell=True)
if res_image.returncode != 0:
LOGGER.error("error occurred at image command")
- LOGGER.error(f"see {dir}/center.log")
+ LOGGER.error(f"see {dir}/image.log")
exit(1)
cmd_rms = f"echo {selection1} {selection2} \
@@ -93,6 +99,8 @@ def cal_by_gmx(self, settings: MDsettings, cycle: int, replica: int) -> None:
-s {ref} \
-o {dir}/rms.xvg \
-n {ndx} \
+ -pbc no \
+ -nomw \
-xvg none 1> {dir}/rms.log 2>&1" # NOQA: E221
res_rms = subprocess.run(cmd_rms, shell=True)
diff --git a/pacs/mdrun/exporter/gromacs.py b/pacs/mdrun/exporter/gromacs.py
index 7b4f7a4..1832850 100644
--- a/pacs/mdrun/exporter/gromacs.py
+++ b/pacs/mdrun/exporter/gromacs.py
@@ -96,33 +96,63 @@ def export_by_gmx(
_cycle=cycle, _replica=results[replica_rank].replica
)
out_dir = settings.each_replica(_cycle=cycle + 1, _replica=replica_rank + 1)
+
+ # treatment for centering option
if settings.centering is True:
- cmd_trjconv = f"echo {settings.centering_selection} System \
- | {settings.cmd_gmx} trjconv \
- -f {from_dir}/prd{extension} \
- -o {out_dir}/input{settings.structure_extension} \
- -s {settings.structure} \
- -b {results[replica_rank].frame} \
- -n {settings.index_file} \
- -pbc whole \
- -center \
- -e {results[replica_rank].frame} \
- 1> {from_dir}/trjconv.log 2>&1" # NOQA: E221
+ centering_option = "-center"
+ args_to_trjconv = f"{settings.centering_selection} System"
+ else:
+ centering_option = ""
+ args_to_trjconv = "System"
+
+ # treatment for nojump option
+ if settings.nojump is True:
+ pbc_option = "-pbc nojump"
else:
- cmd_trjconv = f"echo System \
- | {settings.cmd_gmx} trjconv \
- -f {from_dir}/prd{extension} \
- -o {out_dir}/input{settings.structure_extension} \
- -s {settings.structure} \
- -b {results[replica_rank].frame} \
- -e {results[replica_rank].frame} \
- 1> {from_dir}/trjconv.log 2>&1" # NOQA: E221
+ pbc_option = "-pbc whole"
+
+ # First convert trj with -pbc option, and then extract with -b, -e options
+ # If we do the both at the same time, the -pbc nojump does not work properly
+ cmd_trjconv = f"echo {args_to_trjconv} \
+ | {settings.cmd_gmx} trjconv \
+ -f {from_dir}/prd{extension} \
+ -o {from_dir}/prd_image{extension} \
+ -s {from_dir}/prd.tpr \
+ -n {settings.index_file} \
+ {pbc_option} \
+ {centering_option} \
+ 1> {from_dir}/trjconv.log 2>&1" # NOQA: E221
+
res_trjconv = subprocess.run(cmd_trjconv, shell=True)
+
if res_trjconv.returncode != 0:
LOGGER.error("error occurred at trjconv command")
LOGGER.error(f"see {from_dir}/trjconv.log")
exit(1)
+ cmd_extract = f"echo System \
+ | {settings.cmd_gmx} trjconv \
+ -f {from_dir}/prd_image{extension} \
+ -o {out_dir}/input{settings.structure_extension} \
+ -s {from_dir}/prd.tpr \
+ -b {results[replica_rank].frame} \
+ -e {results[replica_rank].frame} \
+ -novel \
+ 1> {from_dir}/extract.log 2>&1" # NOQA: E221
+
+ res_extract = subprocess.run(cmd_extract, shell=True)
+
+ if res_extract.returncode != 0:
+ LOGGER.error("error occurred at extract command")
+ LOGGER.error(f"see {from_dir}/extract.log")
+ exit(1)
+
+ # remove the intermediate trajectory
+ res_rm = subprocess.run(f"rm {from_dir}/prd_image{extension}", shell=True)
+ if res_rm.returncode != 0:
+ LOGGER.error("error occurred at rm command")
+ exit(1)
+
def frame_to_time(self, settings: MDsettings) -> None:
# Output correspondence between frame and time to file
dir_0_1 = settings.each_replica(_cycle=0, _replica=1)
diff --git a/pacs/mdrun/simulator/gromacs.py b/pacs/mdrun/simulator/gromacs.py
index a19b1cf..a1ecebf 100644
--- a/pacs/mdrun/simulator/gromacs.py
+++ b/pacs/mdrun/simulator/gromacs.py
@@ -15,7 +15,7 @@ class GROMACS(SuperSimulator):
def run_md(self, settings: MDsettings, cycle: int, replica: int) -> None:
dir = settings.each_replica(_cycle=cycle, _replica=replica)
self.run_grompp(settings, dir)
- cmd_mdrun = f"{settings.cmd_mpi} {settings.cmd_serial} \
+ cmd_mdrun = f"exec {settings.cmd_mpi} {settings.cmd_serial} \
-deffnm {dir}/prd 1> {dir}/mdrun.log 2>&1" # NOQA: E221
# Depending on the supercomputer environment, MPI-related hangs may occur in rare cases.
@@ -44,7 +44,7 @@ def run_md(self, settings: MDsettings, cycle: int, replica: int) -> None:
exit(1)
def run_grompp(self, settings: MDsettings, dir: str) -> None:
- cmd_grompp = f"{settings.cmd_gmx} grompp \
+ cmd_grompp = f"exec {settings.cmd_gmx} grompp \
-f {settings.mdconf} \
-o {dir}/prd.tpr \
-p {settings.topology} \
@@ -81,7 +81,7 @@ def run_MPI(
p.close()
groupdirtxt = " ".join(groupdir)
- cmd_mdrun = f"{settings.cmd_mpi} {settings.cmd_parallel} \
+ cmd_mdrun = f"exec {settings.cmd_mpi} {settings.cmd_parallel} \
-multidir {groupdirtxt} \
-deffnm prd \
1> {dir}/mdrun.log 2>&1" # NOQA: E221
diff --git a/pacs/models/settings.py b/pacs/models/settings.py
index fb06ea0..df53a41 100644
--- a/pacs/models/settings.py
+++ b/pacs/models/settings.py
@@ -39,6 +39,7 @@ class MDsettings:
selection2 (str): selection for evaluation type
selection3 (str): selection for evaluation type
selection4 (str): selection for evaluation type
+ nojump (bool): whether to execute nojump treatment (valid only for gmx)
"""
# basic
@@ -47,7 +48,7 @@ class MDsettings:
n_replica: int = 1
n_parallel: int = 1
centering: bool = True
- centering_selection: str = "protein"
+ centering_selection: str = None
working_dir: Path = Path("./.")
# simulator
@@ -87,6 +88,9 @@ class MDsettings:
# rmfile option
rmfile: bool = False
+ # nojump option
+ nojump: bool = False
+
def each_replica(
self, _trial: int = None, _cycle: int = None, _replica: int = None
) -> str:
@@ -158,7 +162,7 @@ def check(self) -> None:
LOGGER.error(f"trial number {self.trial} is out of range 1..999")
exit(1)
if self.max_cycle < 0 or self.max_cycle > 999:
- LOGGER.error(f"cycle number {self.cycle} is out of range 1..999")
+ LOGGER.error(f"cycle number {self.max_cycle} is out of range 1..999")
exit(1)
if self.n_replica < 0 or self.n_replica > 999:
LOGGER.error(f"n_replica number {self.replica} is out of range 1..999")
@@ -206,12 +210,19 @@ def check(self) -> None:
LOGGER.error(f"{self.type} is not supported")
exit(1)
+ # nojump
+ self.nojump = self.check_bool(self.nojump)
+ if self.analyzer != "gromacs" or self.simulator != "gromacs":
+ LOGGER.warn(
+ '"nojump = True" is valid when simulator and analyzer are both "gromacs"'
+ )
+
# change centering_selection to match analyzer
- if self.analyzer == "mdtraj":
+ if self.analyzer == "mdtraj" and self.centering_selection is None:
self.centering_selection = "protein"
- if self.analyzer == "cpptraj":
+ if self.analyzer == "cpptraj" and self.centering_selection is None:
self.centering_selection = "@CA,C,O,N,H"
- if self.analyzer == "gromacs":
+ if self.analyzer == "gromacs" and self.centering_selection is None:
self.centering_selection = "Protein"
# threshold check
diff --git a/pacs/utils/genrepresent.py b/pacs/utils/genrepresent.py
index 015490d..d07c747 100644
--- a/pacs/utils/genrepresent.py
+++ b/pacs/utils/genrepresent.py
@@ -57,7 +57,7 @@ def genrepresent_mdtraj(settings: MDsettings) -> None:
top=settings.topology,
)
- extracted_traj = traj[: last_frame + 1]
+ extracted_traj = traj[1 : last_frame + 1]
trajs.append(extracted_traj)
# concatenate the repr trajectory from all cycles
@@ -98,13 +98,18 @@ def genrepresent_gmx(settings: MDsettings) -> None:
rep_dir = Path(settings.each_replica(_cycle=cycle, _replica=selected_replica))
trj = f"{rep_dir}/{settings.trajectory}"
top = settings.topology
+ # extract trajectory from frame 0 to last_frame,
+ # but the first frame is truncated when performing trjcat
+ # but only for the first cycle, the first frame is not truncated,
+ # so not include the first frame
+ first_frame = 1 if cycle == 0 else 0
cmd_trjconv = f"echo System \
| {settings.cmd_gmx} trjconv \
-f {trj} \
-s {top} \
-o {repr_dir}/repr_cycle{cycle:03}{ext} \
- -b 0 -e {frame_time_dict[last_frame]} \
- -pbc mol \
+ -b {frame_time_dict[first_frame]} \
+ -e {frame_time_dict[last_frame]} \
1> {repr_dir}/{cycle}_trjconv.log 2>&1" # NOQA: E221
res_trjconv = subprocess.run(cmd_trjconv, shell=True)
if res_trjconv.returncode != 0:
@@ -211,8 +216,11 @@ def genrepresent_cpptraj(settings: MDsettings) -> None:
def load_frame_to_time(settings: MDsettings) -> Dict[int, float]:
# Read correspondence between frame and time from file
dir_0_1 = settings.each_replica(_cycle=0, _replica=1)
+ # load frame to time
+ # (no need to do "skiprows=1"
+ # because it is recognized as a commment line by default)
frame_time_array = np.loadtxt(
- f"{dir_0_1}/frame_time.tsv", delimiter="\t", skiprows=1
+ f"{dir_0_1}/frame_time.tsv", comments="#", delimiter="\t", skiprows=0
)
frame_time_dict: Dict[int, float] = {}
for frame, time in frame_time_array:
diff --git a/pacs/utils/parser.py b/pacs/utils/parser.py
index 035435b..9ee7c83 100644
--- a/pacs/utils/parser.py
+++ b/pacs/utils/parser.py
@@ -4,6 +4,7 @@
from collections import defaultdict
from pathlib import Path
+import tomli
from pacs._version import __version__
from pacs.models.settings import MDsettings
from pacs.utils.fit import fit, fit_trial
@@ -13,7 +14,7 @@
from pacs.utils.genrepresent import genrepresent
from pacs.utils.logger import generate_logger
from pacs.utils.rmfile import rmfile_all
-from pacs.utils.rmmol import make_top, rmmol_all
+from pacs.utils.rmmol import make_top, rmmol_all, rmmol_log_add_info
# toml lib is not used
@@ -211,6 +212,11 @@ def parse(self) -> MDsettings:
choices=[".xtc", ".trr"],
help="gromacs trajectory extension. (e.g. -e .xtc, -e .trr)",
)
+ parser_rmmol_gmx.add_argument(
+ "--nojump",
+ action="store_true",
+ help="execute gromacs pbc nojump treatment",
+ )
# rmmol cpptraj
parser_rmmol_cpptraj = parser_rmmol_sub.add_parser(
@@ -873,6 +879,7 @@ def find_user_defined_variable(toml) -> str:
dic["analyzer"] = "cpptraj"
conf = MDsettings.__new__(MDsettings)
conf.__dict__.update(dic)
+ self.check_version(f"./trial{args.trial:03}")
genrepresent(conf)
exit(0)
@@ -892,12 +899,15 @@ def find_user_defined_variable(toml) -> str:
dic["analyzer"] = "gromacs"
dic["cmd_gmx"] = args.cmd_gmx
dic["index_file"] = args.index_file
+ dic["nojump"] = args.nojump
elif sys.argv[2] == "cpptraj":
dic["analyzer"] = "cpptraj"
dic["topology"] = args.topology
conf = MDsettings(**dic)
+ self.check_version(f"./trial{args.trial:03}")
make_top(conf)
rmmol_all(conf)
+ rmmol_log_add_info(conf)
exit(0)
if sys.argv[1] == "rmfile":
@@ -905,6 +915,7 @@ def find_user_defined_variable(toml) -> str:
dic["trial"] = int(args.trial)
dic["simulator"] = args.simulator
conf = MDsettings(**dic)
+ self.check_version(f"./trial{args.trial:03}")
rmfile_all(conf)
exit(0)
@@ -941,6 +952,7 @@ def find_user_defined_variable(toml) -> str:
dic["selection2"] = args.ref_selection
dic["n_parallel"] = int(args.n_parallel)
conf = MDsettings(**dic)
+ self.check_version(f"./trial{args.trial:03}")
fit_trial(conf, args.trj_file, args.out)
exit(0)
@@ -970,6 +982,7 @@ def find_user_defined_variable(toml) -> str:
dic["selection1"] = args.selection
dic["analyzer"] = "mdtraj"
conf = MDsettings(**dic)
+ self.check_version(f"./trial{args.trial:03}")
gencom_trial(
conf,
args.trial,
@@ -984,6 +997,7 @@ def find_user_defined_variable(toml) -> str:
if len(sys.argv) == 2:
LOGGER.error("Use -h")
exit(1)
+ self.check_version(f"./trial{args.trial:03}")
if sys.argv[2] == "comvec":
comvec.cal_feature_trial(
args.trial,
@@ -1041,23 +1055,42 @@ def parse_line(self, line: str):
return None
def read_input(self, file: str) -> dict:
- dic = defaultdict(str)
- with open(file, "r") as f:
- for line in f.readlines():
- line = line.strip()
- if line.startswith("#"):
- continue
- if "=" not in line:
- continue
- result = self.parse_line(line)
- if result is None:
- continue
- if len(result) != 2:
- continue
- key, value = result[0].strip(), result[1].strip()
- key = key.replace('"', "").replace("'", "")
- value = value.replace('"', "").replace("'", "")
- if "#" in value:
- value = value.split("#")[0].strip()
- dic[key] = value
- return dic
+ with open(file, "rb") as f:
+ toml_dict = tomli.load(f)
+ return toml_dict
+ # dic = defaultdict(str)
+ # with open(file, "r") as f:
+ # for line in f.readlines():
+ # line = line.strip()
+ # if line.startswith("#"):
+ # continue
+ # if "=" not in line:
+ # continue
+ # result = self.parse_line(line)
+ # if result is None:
+ # continue
+ # if len(result) != 2:
+ # continue
+ # key, value = result[0].strip(), result[1].strip()
+ # key = key.replace('"', "").replace("'", "")
+ # value = value.replace('"', "").replace("'", "")
+ # if "#" in value:
+ # value = value.split("#")[0].strip()
+ # dic[key] = value
+ # return dic
+
+ def check_version(self, trial_dir: str):
+ version_file = f"{trial_dir}/pacstk.version"
+ if Path(version_file).exists():
+ with open(version_file) as f:
+ line = f.readline().strip()
+ if __version__ != line:
+ LOGGER.error("PaCS-Toolkit version error")
+ LOGGER.error(
+ f"Version used in {self.settings.each_trial()} is {line},"
+ )
+ LOGGER.error(f"But you're using PaCS-Toolkit {__version__}")
+ exit(1)
+ else:
+ LOGGER.error(f"Version file: {version_file} is not found")
+ exit(1)
diff --git a/pacs/utils/rmfile.py b/pacs/utils/rmfile.py
index 012990c..4daa4cb 100644
--- a/pacs/utils/rmfile.py
+++ b/pacs/utils/rmfile.py
@@ -49,10 +49,12 @@ def rmfile(settings: MDsettings, cycle: int) -> None:
run_rm(f"{dir}/mdout.mdp")
# .cpt
- run_rm(f"{dir}/prd.cpt")
+ run_rm(f"-f {dir}/*.cpt")
# .tpr
- run_rm(f"{dir}/prd.tpr")
+ # keep .tpr files if rmmol=false in mdrun, in case you want to do rmmol afterward
+ if cycle != 0 and settings.rmmol:
+ run_rm(f"{dir}/prd.tpr")
# '#backup#': use -f
run_rm(f"-f {dir}/\#*") # NOQA: W605
diff --git a/pacs/utils/rmmol.py b/pacs/utils/rmmol.py
index ea91d2c..2b4253d 100644
--- a/pacs/utils/rmmol.py
+++ b/pacs/utils/rmmol.py
@@ -53,8 +53,8 @@ def make_top_mdtraj(settings: MDsettings) -> None:
if Path(f"{dir}/rmmol_top.pdb").exists():
LOGGER.warn(f"{dir}/rmmol_top.pdb already exists. continue")
if not Path(f"{dir}/prd{ext}").exists():
- LOGGER.warn(f"{dir}/prd{ext} does not exist")
- return
+ LOGGER.error(f"{dir}/prd{ext} does not exist")
+ exit(1)
traj = md.load_frame(
f"{dir}/prd{ext}",
index=0,
@@ -71,23 +71,35 @@ def make_top_gmx(settings: MDsettings) -> None:
ext = settings.trajectory_extension
if Path(f"{dir}/rmmol_top.pdb").exists():
LOGGER.warn(f"{dir}/rmmol_top.pdb already exists. continue")
- if Path(f"{dir}/prd.gro").exists():
- pass
- else:
+ if not Path(f"{dir}/prd{ext}").exists():
LOGGER.error(f"{dir}/prd{ext} does not exist")
exit(1)
# create new gro file without water
cmd_convert_gro = f"echo {settings.keep_selection} \
| {settings.cmd_gmx} trjconv \
- -f {dir}/input.gro \
- -s {dir}/input.gro \
+ -f {dir}/prd{ext} \
+ -s {settings.each_replica(_cycle=0, _replica=1)}/prd.tpr \
-n {settings.index_file} \
-o {dir}/rmmol_top.pdb \
+ -b 0 \
+ -e 0 \
1> {dir}/convert_gro.log 2>&1" # NOQA: E221
res_convert_gro = subprocess.run(cmd_convert_gro, shell=True)
if res_convert_gro.returncode != 0:
LOGGER.error("error occurred at trjconv command")
exit(1)
+
+ # create a new topology file
+ cmd_convert_tpr = f"echo {settings.keep_selection} \
+ | {settings.cmd_gmx} convert-tpr \
+ -s {settings.each_replica(_cycle=0, _replica=1)}/prd.tpr \
+ -o {dir}/rmmol_top.tpr \
+ -n {settings.index_file} \
+ 1> {dir}/convert_tpr.log 2>&1" # NOQA: E221
+ res_convert_tpr = subprocess.run(cmd_convert_tpr, shell=True)
+ if res_convert_tpr.returncode != 0:
+ LOGGER.error("error occurred at convert-tpr command")
+ exit(1)
LOGGER.info(f"topology file rmmol_top.pdb has been created in {dir}")
@@ -96,9 +108,7 @@ def make_top_cpptraj(settings: MDsettings) -> None:
ext = settings.trajectory_extension
if Path(f"{dir}/rmmol_top.pdb").exists():
LOGGER.warn(f"{dir}/rmmol_top.pdb already exists. continue")
- if Path(f"{dir}/prd{ext}").exists():
- pass
- else:
+ if not Path(f"{dir}/prd{ext}").exists():
LOGGER.error(f"{dir}/prd{ext} does not exist")
exit(1)
cmd_cpptraj = [
@@ -204,17 +214,24 @@ def rmmol_replica_gmx(
return
# create new trajectory without water
+ if settings.nojump is True:
+ pbc_option = "-pbc nojump"
+ else:
+ pbc_option = "-pbc mol -ur compact"
+
cmd_trjconv = f"echo {settings.keep_selection} \
| {settings.cmd_gmx} trjconv \
-f {dir}/prd{ext} \
- -s {dir}/input.gro \
+ -s {dir}/prd.tpr \
-o {dir}/prd_rmmol{ext} \
-n {settings.index_file} \
+ {pbc_option} \
1> {dir}/rmmol.log 2>&1" # NOQA: E221
res_trjconv = subprocess.run(cmd_trjconv, shell=True)
if res_trjconv.returncode != 0:
LOGGER.error("error occurred at trjconv command")
exit(1)
+
# remove previous trajectory
if not last_cycle:
res_rm = subprocess.run(f"rm {dir}/prd{ext}", shell=True)
@@ -262,6 +279,27 @@ def rmmol_replica_cpptraj(
exit(1)
+def rmmol_log_add_info(settings: MDsettings) -> None:
+ if settings.analyzer == "mdtraj":
+ pass
+ elif settings.analyzer == "gromacs":
+ rmmol_log_add_info_gmx(settings)
+ elif settings.analyzer == "cpptraj":
+ pass
+ else:
+ LOGGER.error("analyze tool is not specified")
+ exit(1)
+
+
+def rmmol_log_add_info_gmx(settings: MDsettings) -> None:
+ LOGGER.info("prd.tpr files are probably no longer needed")
+ LOGGER.info("it is recommended to remove prd.tpr files to save disk space")
+ LOGGER.info(
+ "you can manually regenerate the prd.tpr files from input.gro files \
+ even if you need them later"
+ )
+
+
def rmmol_all(settings: MDsettings) -> None:
max_cycle = detect_n_cycle(settings)
for cycle in range(max_cycle + 1):
diff --git a/requirements.txt b/requirements.txt
index b4a2799..2f78a46 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,10 @@
-mdtraj>=1.9.9
+tomli>=2.0.1
+mdtraj==1.9.9
scipy>=1.7.3
scikit-learn>=1.0.2
matplotlib>=3.5.3
+numpy<2.0.0
pandas>=1.1.5
+deeptime==0.4.4
# pyemma>=2.5.12
ipykernel>=6.16.2
diff --git a/run_fmt_lint.sh b/run_fmt_lint.sh
old mode 100644
new mode 100755
diff --git a/setup.py b/setup.py
index 603acb4..f0b02d7 100644
--- a/setup.py
+++ b/setup.py
@@ -10,8 +10,8 @@
version=__version__, # type: ignore[name-defined] # NOQA: F821
packages=find_packages(),
description=(
- "PaCS-ToolKit:",
- "Tool Kit for Parallel Cascade Selection Molecular Dynamic Simulation",
+ "PaCS-Toolkit:",
+ "Tool kit for Parallel Cascade Selection Molecular Dynamic Simulation",
),
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
@@ -27,33 +27,36 @@
"License :: OSI Approved :: MIT License",
"Operating System :: Unix",
"Programming Language :: Python",
- "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
],
install_requires=[
- "numpy>=1.16.0",
+ "numpy<2.0.0",
+ "tomli>=2.0.1",
],
extras_require={
"mdtraj": [
- "mdtraj>=1.9.9",
+ "mdtraj==1.9.9",
"scipy>=1.7.3",
"scikit-learn>=1.0.2",
],
"msm": [
- "mdtraj>=1.9.9",
+ "mdtraj==1.9.9",
"matplotlib>=3.5.3",
"scipy>=1.7.3",
"pandas>=1.1.5",
# "pyemma>=2.5.12",
+ "deeptime==0.4.4",
"ipykernel>=6.16.2",
"scikit-learn>=1.0.2",
],
"all": [
- "mdtraj>=1.9.9",
+ "mdtraj==1.9.9",
"scipy>=1.7.3",
"scikit-learn>=1.0.2",
"matplotlib>=3.5.3",
"pandas>=1.1.5",
# "pyemma>=2.5.12",
+ "deeptime==0.4.4",
"ipykernel>=6.16.2",
],
},