temporary undo of multi flexible dihedral option

selimsami · Sep 16, 2024 · b9d2f8d · b9d2f8d
1 parent 92a5c62
commit b9d2f8d
Show file tree

Hide file tree

Showing 20 changed files with 362 additions and 449 deletions.
diff --git a/qforce/additionalstructures.py b/qforce/additionalstructures.py
@@ -1,6 +1,7 @@
 import os
 from shutil import copy2 as copy
 from itertools import chain
+import numpy as np
 #
 from colt import Colt
 from ase.io import read
@@ -19,7 +20,7 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
         cls.__classes[cls.name] = cls
         cls._user_input += ("\n# Weight of the structures in the forcefield fit"
-                            "\n weight = 1 :: int :: >1 \n")
+                            "\n weight = 1 :: float \n")
 
     @classmethod
     def classes(cls):
@@ -151,24 +152,35 @@ class MetaDynamics(AdditionalStructureCreator):
 
     _user_input = """
     compute = none :: str :: [none, en, grad]
-
+    
+    # Number of frames used for fitting
+    n_fit = 100 :: int
+    # Number of frames used for validation
+    n_valid = 0 :: int 
     # temperature (K)
     temp = 800 :: float :: >0
-    # total simulation time (ps)
-    time = 50.0 :: float :: >0
     # interval for trajectory printout (fs)
     dump = 500.0 :: float :: >0
     # time step for propagation (fs)
     step = 2.0 :: float :: >0
     # Bond constraints (0: none, 1: H-only, 2: all)
     shake = 0
+
     """
 
     def __init__(self, weight, compute, config):
         self.compute = compute
         self.weight = weight
-        self.xtbinput = {key: value for key, value in config.items()
-                         if key not in ('weight', 'compute')}
+        self.n_fit = config['n_fit']
+        self.n_valid = config['n_valid']
+        self.total_frames = self.n_fit + self.n_valid
+
+        time = config['dump'] * self.total_frames / 1e3
+
+        self.xtbinput = {'time': time}
+        for item in ['temp', 'dump', 'step', 'shake']:
+            self.xtbinput[item] = config[item]
+
         self._data = {
                 'en':  {'calculations': [], 'results': []},
                 'grad':  {'calculations': [], 'results': []},
@@ -232,14 +244,16 @@ def parse(self, qm):
         grad = self._data['grad']
 
         en_results = []
-        for calculation in en['calculations']:
+        for i, calculation in enumerate(en['calculations']):
             files = calculation.check()
-            en_results.append(qm._read_energy(files))
+            if i < self.n_fit:
+                en_results.append(qm._read_energy(files))
 
         grad_results = []
-        for calculation in grad['calculations']:
+        for i, calculation in enumerate(grad['calculations']):
             files = calculation.check()
-            grad_results.append(qm._read_gradient(files))
+            if i < self.n_fit:
+                grad_results.append(qm._read_gradient(files))
 
         en['results'] = en_results
         grad['results'] = grad_results
@@ -316,12 +330,12 @@ def minval(itr, value=None):
 class AdditionalStructures(Colt):
 
     _user_input = """
-    energy_element_weights = 1 :: int :: >1
-    gradient_element_weights = 1 :: int :: >1
-    hessian_element_weights = 1 :: int :: >1
+    energy_element_weights = 1 :: float
+    gradient_element_weights = 1 :: float
+    hessian_element_weights = 1 :: float
 
-    hessian_weight = 1 :: int :: >1
-    dihedral_weight = 1 :: int :: >1
+    hessian_weight = 1 :: float
+    dihedral_weight = 1 :: float
 
     """
 
@@ -377,30 +391,24 @@ def create(self, qm):
         for name, creator in self.creators.items():
             creator.parse(qm)
 
-    def lowest_energy(self, only_hessian=True):
-        minimum = 10000.0
-        if only_hessian is True:
-            for creator in self.creators.values():
-                min_h = minval((out.energy for out in creator.hessouts()), minimum)
-                if min_h < minimum:
-                    minimum = min_h
-            return minimum
+    def get_lowest_energy_and_coords(self):
+        minimum = np.inf
+        coords = None
 
         for creator in self.creators.values():
-            min_e = minval((out.energy for out in creator.enouts()), minimum)
-            if min_e < minimum:
-                minimum = min_e
-            min_g = minval((out.energy for out in creator.gradouts()), minimum)
-            if min_g < minimum:
-                minimum = min_g
-            min_h = minval((out.energy for out in creator.hessouts()), minimum)
-            if min_h < minimum:
-                minimum = min_h
-        return minimum
+            for calctype in [creator.enouts(), creator.gradouts(), creator.hessouts()]:
+                if calctype:
+                    argmin = np.argmin((out.energy for out in calctype))
+                    lowest = calctype[argmin].energy
+                    if lowest < minimum:
+                        minimum = lowest
+                        coords = calctype[argmin].coords
+        return minimum, coords
 
     def normalize(self):
-        emin = self.lowest_energy(only_hessian=True)
+        emin, coords = self.get_lowest_energy_and_coords()
         self.subtract_energy(emin)
+        return emin, coords
 
     def subtract_energy(self, energy):
         for creator in self.creators.values():

diff --git a/qforce/data/bond_dissociation_energy.csv b/qforce/data/bond_dissociation_energy.csv
@@ -44,14 +44,15 @@
 6,8,1,*,6_1.0 6_1.0,351,ether
 6,8,1,8_2.0 8_1.0,6_1.0 6_1.0,423,ester
 6,8,1,8_2.0 8_1.0,1_1.0 6_1.0,458,acetic acid
-6,8,1.5,7_1.0,*,600,amide (guess)
+6,8,1.5,*,*,600,amide (guess)
 6,8,2,*,*,732,aldehyde
 6,8,2,8_2.0 8_2.0,*,799,CO2
 6,9,1,*,*,460,fluorine
 6,16,1,*,*,310,generic C-S
 6,17,1,*,*,350,chlorine
 6,35,1,*,*,294,bromine
 6,53,1,*,*,232,iodine
+7,7,1,*,*,297,diamine
 7,8,1,*,*,305,NO2
 7,8,1.5,*,*,400,NO3-
 7,8,2,*,*,514,HN=O
@@ -60,4 +61,4 @@
 8,16,2,*,6_1.0 6_1.0 8_2.0 8_2.0 ,389,DMSO
 9,9,1,*,*,157,F-F
 15,16,1,*,*,350,Generic P-O
-15,16,2,*,*,575,Generic P=O
+15,16,2,*,*,575,Generic P=O
diff --git a/qforce/fit.py b/qforce/fit.py
@@ -0,0 +1,169 @@
+from sklearn.linear_model import Ridge
+import numpy as np
+#
+from .molecule.bond_and_angle_terms import MorseBondTerm
+
+
+def multi_fit(logger, config, mol, structs):
+    """
+    Doing linear/Ridge regression with all energies, forces and hessian terms:
+    Ax = B
+
+    A: MM contributions to each e/f/h item by each fitted FF term (shape: n_items x n_fit_terms)
+    B: Total QM e/f/h minus MM contributions from non-fitted terms (like non-bonded) (shape: n_items)
+
+    """
+
+    A = []
+    B = []
+    weights = []
+
+    # Compute energies for the lowest energy structure, to subtract from other energies
+    e_lowest, f_lowest = calc_forces(mol.qm_minimum_coords, mol)
+    # f_lowest *= -1  # convert from force to gradient
+    # f_lowest = f_lowest.reshape(mol.terms.n_fitted_terms+1, mol.topo.n_atoms*3)
+    # qm_force = np.zeros((mol.n_atoms, 3))
+    # A += list(f_lowest[:-1].T)
+    # B += list(qm_force.flatten() - f_lowest[-1])
+    # weights += [1e7]*qm_force.size
+    # A.append(e_lowest[:-1] - e_lowest[:-1])
+    # B.append(0 - e_lowest[-1] + e_lowest[-1])
+    # weights.append(1e7)
+
+    logger.info("Calculating the MM hessian matrix elements for all structures ...")
+    for weight, qm in structs.hessitr():
+        hessian, full_md_hessian_1d = [], []
+        non_fit = []
+        qm_hessian = np.copy(qm.hessian)
+
+        full_md_hessian = calc_hessian(qm.coords, mol)
+        count = 0
+        for i in range(mol.topo.n_atoms*3):
+            for j in range(i+1):
+                hes = (full_md_hessian[i, j] + full_md_hessian[j, i]) / 2
+                if all([h == 0 for h in hes]) or np.abs(qm_hessian[count]) < 0.0001:
+                    qm_hessian = np.delete(qm_hessian, count)
+                    full_md_hessian_1d.append(np.zeros(mol.terms.n_fitted_terms))
+                else:
+                    count += 1
+                    hessian.append(hes[:-1])
+                    full_md_hessian_1d.append(hes[:-1])
+                    non_fit.append(hes[-1])
+
+        difference = qm_hessian - np.array(non_fit)
+        A += hessian
+        B += list(difference)
+        weights += [weight]*difference.size
+
+    logger.info("Calculating energies/forces for all additional structures...")
+    for weight_e, qmen in structs.enitr():
+        mm_energy, _ = calc_forces(qmen.coords, mol)
+        B.append(qmen.energy - mm_energy[-1] + e_lowest[-1])
+        A.append(mm_energy[:-1] - e_lowest[:-1])
+        weights.append(weight_e)
+
+    full_qm_forces = []
+    full_mm_forces = []
+    full_qm_energies = []
+    full_mm_energies = []
+
+    # scale for energy structs, grad structs only have weight_f stored, but the ratio is constant!
+    factor_e = structs.energy_weight / structs.gradient_weight
+
+    for weight_f, qmgrad in structs.graditr():
+        weight_e = factor_e * weight_f
+        mm_energy, mm_force = calc_forces(qmgrad.coords, mol)
+        mm_force *= -1  # convert from force to gradient
+        mm_force = mm_force.reshape(mol.terms.n_fitted_terms+1, mol.topo.n_atoms*3)
+
+        full_qm_forces.append(qmgrad.gradient)
+        full_mm_forces.append(mm_force[:-1].T)
+        full_qm_energies.append(qmgrad.energy)
+        full_mm_energies.append(mm_energy[:-1]-e_lowest[:-1])
+
+        A += list(mm_force[:-1].T)
+        B += list(qmgrad.gradient.flatten() - mm_force[-1])
+        weights += [weight_f]*qmgrad.gradient.size
+
+        A.append(mm_energy[:-1] - e_lowest[:-1])
+        B.append(qmgrad.energy - mm_energy[-1] + e_lowest[-1])
+        weights.append(weight_e)
+
+    logger.info("Fitting the force field parameters...")
+    reg = Ridge(alpha=1e-6, fit_intercept=False).fit(A, B, sample_weight=weights)
+    fit = reg.coef_
+    logger.info("Done!\n")
+
+    with mol.terms.add_ignore('charge_flux'):
+        for term in mol.terms:
+            if term.idx < len(fit):
+                term.set_fitparameters(fit)
+
+    full_md_hessian_1d = np.sum(full_md_hessian_1d * fit, axis=1)
+
+    nqmgrads = sum(1 for _ in structs.graditr())
+
+    if nqmgrads > 0:
+        full_qm_energies = np.array(full_qm_energies)
+        print(full_qm_energies.shape)
+        full_mm_energies = np.array(full_mm_energies)
+        print(full_mm_energies.shape)
+        full_qm_forces = np.array(full_qm_forces)
+        print(full_qm_forces.shape)
+        full_mm_forces = np.array(full_mm_forces)
+        print(full_mm_forces.shape)
+        full_mm_forces = np.sum(full_mm_forces * fit, axis=2)
+        full_mm_energies = np.sum(full_mm_energies * fit, axis=1)
+
+        for struct in range(nqmgrads):
+            print('struct', struct)
+            print('QM:\n', full_qm_energies[struct], '\n', full_qm_forces[struct])
+            print('MM:\n', full_mm_energies[struct], '\n', full_mm_forces[struct].reshape((mol.n_atoms, 3)))
+            print('\n')
+
+    err = full_md_hessian_1d-qm.hessian
+    mae = np.abs(err).mean()
+    rmse = (err**2).mean()**0.5
+    max_err = np.max(np.abs(err))
+    print('mae:', mae*0.2390057361376673)
+    print('rmse:', rmse*0.2390057361376673)
+    print('max_err:', max_err*0.2390057361376673)
+
+    return full_md_hessian_1d
+
+
+def calc_hessian(coords, mol):
+    """
+    Scope:
+    -----
+    Perform displacements to calculate the MD hessian numerically.
+    """
+    full_hessian = np.zeros((3*mol.topo.n_atoms, 3*mol.topo.n_atoms,
+                             mol.terms.n_fitted_terms+1))
+
+    with mol.terms.add_ignore('charge_flux'):
+        for a in range(mol.topo.n_atoms):
+            for xyz in range(3):
+                coords[a][xyz] += 1e-5
+                _, f_plus = calc_forces(coords, mol)
+                coords[a][xyz] -= 2e-5
+                _, f_minus = calc_forces(coords, mol)
+                coords[a][xyz] += 1e-5
+                diff = - (f_plus - f_minus) / 2e-5
+                full_hessian[a*3+xyz] = diff.reshape(mol.terms.n_fitted_terms+1, 3*mol.topo.n_atoms).T
+    return full_hessian
+
+
+def calc_forces(coords, mol):
+    """
+    Scope:
+    ------
+    For each displacement, calculate the forces from all terms.
+
+    """
+    energies = np.zeros(mol.terms.n_fitted_terms+1)
+    force = np.zeros((mol.terms.n_fitted_terms+1, mol.topo.n_atoms, 3))
+
+    for term in mol.terms:
+        term.do_fitting(coords, energies, force)
+
diff --git a/qforce/forcefield/forcefield.py b/qforce/forcefield/forcefield.py
@@ -119,6 +119,7 @@ def _get_bond_dissociation_energies(self, md_data):
         bde_dict = self._read_bond_dissociation_energy_csv(md_data)
 
         for a1, a2, props in self.topo.graph.edges(data=True):
+            print(a1, a2, props)
             if props['type'] not in bde_dict:
                 raise ValueError('Morse potential chosen, but dissociation energy not known for this atom number pair '
                                  f'with the bond order in parenthesis: {props["type"]}. '

diff --git a/qforce/forcefield/openmm.py b/qforce/forcefield/openmm.py
@@ -18,7 +18,11 @@ class OpenMM(ForcefieldSettings):
             'cross_dihed_angle': ('periodic', 'cos_cube', False),
             'cross_dihed_bond': ('periodic', 'cos_cube', False),
             'cross_dihed_angle_angle': ('periodic', 'cos_cube', False),
-            'dihedral/flexible': ('periodic', 'cos_cube', False),
+
+            # 'dihedral/flexible': ('periodic', 'cos_cube', False),
+            'dihedral/flexible': True,
+            'dihedral/cos_cube': True,
+
             'dihedral/rigid': True,
             'dihedral/improper': True,
             'dihedral/inversion': True,
@@ -313,7 +317,7 @@ def write_cross_dihed_bond_term(self, term, writer):
                                        'param1': k, 'param2': equ, 'param3': n, 'param4': phi0})
 
     def write_cross_cos_cube_dihed_bond_header(self, forces):
-        db_cross_eq = 'k * (cos(dihedral(p1,p2,p3,p4))+1)^3 * (distance(p6,p5)-r0)'
+        db_cross_eq = 'k * (cos(dihedral(p1,p2,p3,p4))+1)^4 * (distance(p6,p5)-r0)'
         db_force = ET.SubElement(forces, 'Force', {'energy': db_cross_eq, 'name': 'DihedralBond', 'usesPeriodic': '0',
                                                    'type': 'CustomCompoundBondForce', 'forceGroup': '0',
                                                    'particles': '6', 'version': '3'})
@@ -369,7 +373,7 @@ def write_cross_dihed_angle_term(self, term, writer):
                                        'param1': k, 'param2': equ, 'param3': n, 'param4': phi0})
 
     def write_cross_cos_cube_dihed_angle_header(self, forces):
-        da_cross_eq = 'k * (cos(dihedral(p1,p2,p3,p4))+1)^3 * (angle(p5,p6,p7)-theta0)'
+        da_cross_eq = 'k * (cos(dihedral(p1,p2,p3,p4))+1)^4 * (angle(p5,p6,p7)-theta0)'
         da_force = ET.SubElement(forces, 'Force', {'energy': da_cross_eq, 'name': 'DihedralAngle', 'usesPeriodic': '0',
                                                    'type': 'CustomCompoundBondForce', 'forceGroup': '0',
                                                    'particles': '7', 'version': '3'})
@@ -426,7 +430,7 @@ def write_cross_dihed_angle_angle_term(self, term, writer):
                                        'param1': k, 'param2': equ1, 'param3': equ2, 'param4': n, 'param5': phi0})
 
     def write_cross_cos_cube_dihed_angle_angle_header(self, forces):
-        da_cross_eq = 'k * (cos(dihedral(p1,p2,p3,p4))+1)^3 * (angle(p1,p2,p3)-theta0_1) * (angle(p2,p3,p4)-theta0_2)'
+        da_cross_eq = 'k * (cos(dihedral(p1,p2,p3,p4))+1)^4 * (angle(p1,p2,p3)-theta0_1) * (angle(p2,p3,p4)-theta0_2)'
         da_force = ET.SubElement(forces, 'Force', {'energy': da_cross_eq, 'name': 'DihedralAngle', 'usesPeriodic': '0',
                                                    'type': 'CustomCompoundBondForce', 'forceGroup': '0',
                                                    'particles': '4', 'version': '3'})
@@ -526,7 +530,7 @@ def write_inversion_dihedral_term(self, term, writer):
                                           'param1': equ, 'param2': k})
 
     def write_cos_cube_dihedral_header(self, forces):
-        inv_dih_eq = 'k*(cos(theta)+1)^3'
+        inv_dih_eq = 'k*(cos(theta)+1)^4'
         inv_dih_force = ET.SubElement(forces, 'Force', {'energy': inv_dih_eq, 'name': 'CosCubeDihedral', 'usesPeriodic': '0',
                                                         'type': 'CustomTorsionForce', 'forceGroup': '0', 'version': '3',
                                                         })