Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi species #724

Merged
merged 14 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions arc/job/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def determine_job_array_parameters(self):
ARC will allocate, e.g., 8 workers, to simultaneously get processes (one by one) from the HDF5 bank
and execute them. On average, each worker in this example executes 125 jobs.
"""
if self.execution_type == 'incore':
if self.execution_type == 'incore' or self.run_multi_species:
return None
if len(self.job_types) > 1:
self.iterate_by.append('job_types')
Expand Down Expand Up @@ -533,7 +533,10 @@ def set_file_paths(self):
Set local and remote job file paths.
"""
folder_name = 'TS_guesses' if self.reactions is not None else 'TSs' if self.species[0].is_ts else 'Species'
self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species_label, self.job_name)
if self.run_multi_species == False:
self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species_label, self.job_name)
else:
self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species[0].multi_species, self.job_name)
self.local_path_to_output_file = os.path.join(self.local_path, 'output.out')
self.local_path_to_orbitals_file = os.path.join(self.local_path, 'orbitals.fchk')
self.local_path_to_check_file = os.path.join(self.local_path, 'check.chk')
Expand All @@ -545,7 +548,8 @@ def set_file_paths(self):

if self.server is not None:
# Parentheses don't play well in folder names:
species_name_remote = self.species_label.replace('(', '_').replace(')', '_')
species_name_remote = self.species_label if isinstance(self.species_label, str) else self.species[0].multi_species
species_name_remote = species_name_remote.replace('(', '_').replace(')', '_')
path = servers[self.server].get('path', '').lower()
path = os.path.join(path, servers[self.server]['un']) if path else ''
self.remote_path = os.path.join(path, 'runs', 'ARC_Projects', self.project,
Expand Down
3 changes: 3 additions & 0 deletions arc/job/adapters/cfour.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class CFourAdapter(JobAdapter):
job_status (list, optional): The job's server and ESS statuses.
level (Level, optional): The level of theory to use.
max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional).
run_multi_species (bool, optional): Whether to run a job for multiple species in the same input file.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to add this to all of our job adapters (ess and ts), not only gaussian and cfour

reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods.
rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary.
server (str): The server to run on.
Expand Down Expand Up @@ -122,6 +123,7 @@ def __init__(self,
job_status: Optional[List[Union[dict, str]]] = None,
level: Optional[Level] = None,
max_job_time: Optional[float] = None,
run_multi_species: bool = False,
reactions: Optional[List['ARCReaction']] = None,
rotor_index: Optional[int] = None,
server: Optional[str] = None,
Expand Down Expand Up @@ -170,6 +172,7 @@ def __init__(self,
job_status=job_status,
level=level,
max_job_time=max_job_time,
run_multi_species=run_multi_species,
reactions=reactions,
rotor_index=rotor_index,
server=server,
Expand Down
72 changes: 58 additions & 14 deletions arc/job/adapters/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def _initialize_adapter(obj: 'JobAdapter',
job_status: Optional[List[Union[dict, str]]] = None,
level: Optional[Level] = None,
max_job_time: Optional[float] = None,
run_multi_species: bool = False,
reactions: Optional[List['ARCReaction']] = None,
rotor_index: Optional[int] = None,
server: Optional[str] = None,
Expand All @@ -114,7 +115,7 @@ def _initialize_adapter(obj: 'JobAdapter',
times_rerun: int = 0,
torsions: Optional[List[List[int]]] = None,
tsg: Optional[int] = None,
xyz: Optional[dict] = None,
xyz: Optional[Union[dict,List[dict]]] = None,
):
"""
A common Job adapter initializer function.
Expand Down Expand Up @@ -164,6 +165,7 @@ def _initialize_adapter(obj: 'JobAdapter',
# When restarting ARC and re-setting the jobs, ``level`` is a string, convert it to a Level object instance
obj.level = Level(repr=level) if not isinstance(level, Level) and level is not None else level
obj.max_job_time = max_job_time or default_job_settings.get('job_time_limit_hrs', 120)
obj.run_multi_species = run_multi_species
obj.number_of_processes = 0
obj.reactions = [reactions] if reactions is not None and not isinstance(reactions, list) else reactions
obj.remote_path = None
Expand All @@ -180,18 +182,34 @@ def _initialize_adapter(obj: 'JobAdapter',
obj.pivots = [[tor[1] + 1, tor[2] + 1] for tor in obj.torsions] if obj.torsions is not None else None
obj.tsg = tsg
obj.workers = None
obj.xyz = obj.species[0].get_xyz() if obj.species is not None and xyz is None else xyz
if not obj.run_multi_species:
obj.xyz = obj.species[0].get_xyz() if obj.species is not None and xyz is None else xyz
else:
obj.xyz = list()
if obj.species is not None:
for spc in obj.species:
obj.xyz.append(spc.get_xyz() if xyz is None else xyz)

if obj.job_num is None or obj.job_name is None or obj.job_server_name:
obj._set_job_number()

if obj.species is not None:
obj.charge = obj.species[0].charge
obj.multiplicity = obj.species[0].multiplicity
obj.is_ts = obj.species[0].is_ts
obj.species_label = obj.species[0].label
if len(obj.species) > 1:
obj.species_label += f'_and_{len(obj.species) - 1}_others'
if not obj.run_multi_species:
obj.charge = obj.species[0].charge
obj.multiplicity = obj.species[0].multiplicity
obj.is_ts = obj.species[0].is_ts
obj.species_label = obj.species[0].label
if len(obj.species) > 1:
obj.species_label += f'_and_{len(obj.species) - 1}_others'
else:
obj.charge = list()
obj.multiplicity = list()
obj.is_ts = obj.species[0].is_ts
obj.species_label = list()
for spc in obj.species:
obj.charge.append(spc.charge)
obj.multiplicity.append(spc.multiplicity)
obj.species_label.append(spc.label)
elif obj.reactions is not None:
obj.charge = obj.reactions[0].charge
obj.multiplicity = obj.reactions[0].multiplicity
Expand Down Expand Up @@ -233,25 +251,51 @@ def _initialize_adapter(obj: 'JobAdapter',
check_argument_consistency(obj)


def is_restricted(obj) -> bool:
def is_restricted(obj: 'JobAdapter') -> Union[bool, List[bool]]:
alongd marked this conversation as resolved.
Show resolved Hide resolved
"""
Check whether a Job Adapter should be executed as restricted or unrestricted.
If the job adapter contains a list of species, return True or False per species.

Args:
obj: The job adapter object.

Returns:
Union[bool, List[bool]]: Whether to run as restricted (``True``) or not (``False``).
"""
if not obj.run_multi_species:
return is_species_restricted(obj)
else:
return [is_species_restricted(obj, species) for species in obj.species]


def is_species_restricted(obj: 'JobAdapter',
species: Optional['ARCSpecies'] = None,
) -> bool:
"""
Check whether a species should be executed as restricted or unrestricted.

Args:
obj: The job adapter object.
species (ARCSpecies, optional): The species to check.

Returns:
bool: Whether to run as restricted (``True``) or not (``False``).
"""
if (obj.multiplicity > 1 and obj.level.method_type != 'composite') \
or (obj.species[0].number_of_radicals is not None and obj.species[0].number_of_radicals > 1):

if obj.level.method_type in ['force_field','composite','semiempirical']:
return True

multiplicity = obj.multiplicity if species is None else species.multiplicity
number_of_radicals = obj.species[0].number_of_radicals if species is None else species.number_of_radicals
species_label = obj.species[0].label if species is None else species.label
if multiplicity > 1 or (number_of_radicals is not None and number_of_radicals > 1):
# run an unrestricted electronic structure calculation if the spin multiplicity is greater than one,
# or if it is one but the number of radicals is greater than one (e.g., bi-rad singlet)
# don't run unrestricted for composite methods such as CBS-QB3, it'll be done automatically if the
# multiplicity is greater than one, but do specify uCBS-QB3 for example for bi-rad singlets.
if obj.species[0].number_of_radicals is not None and obj.species[0].number_of_radicals > 1:
logger.info(f'Using an unrestricted method for species {obj.species_label} which has '
f'{obj.species[0].number_of_radicals} radicals and multiplicity {obj.multiplicity}.')
if number_of_radicals is not None and number_of_radicals > 1:
logger.info(f'Using an unrestricted method for species {species_label} which has '
f'{number_of_radicals} radicals and multiplicity {multiplicity}.')
return False
return True

Expand Down
14 changes: 14 additions & 0 deletions arc/job/adapters/common_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,26 @@ def setUpClass(cls):
testing=True,
args={'keyword': {'general': 'IOp(1/12=5,3/44=0)'}},
)
cls.job_multi = GaussianAdapter(execution_type='incore',
job_type='scan',
torsions=[[1, 2, 3, 4]],
level=Level(method='wb97xd', basis='def2tzvp'),
project='test',
project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_GaussianAdapter'),
species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'], multiplicity=1, number_of_radicals=2, multi_species='mltspc1'),
ARCSpecies(label='spc1', xyz=['O 0 0 1'], multiplicity=1, number_of_radicals=1, multi_species='mltspc1')],
testing=True,
args={'keyword': {'general': 'IOp(1/12=5,3/44=0)'}},
run_multi_species = True,
)

def test_is_restricted(self):
"""Test the is_restricted() function"""
self.assertTrue(common.is_restricted(self.job_1))
self.assertFalse(common.is_restricted(self.job_2))
self.assertFalse(common.is_restricted(self.job_3))
benchmark_list = [False, True]
self.assertEqual(common.is_restricted(self.job_multi),benchmark_list)

def test_check_argument_consistency(self):
"""Test the check_argument_consistency() function"""
Expand Down
40 changes: 31 additions & 9 deletions arc/job/adapters/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class GaussianAdapter(JobAdapter):
job_status (list, optional): The job's server and ESS statuses.
level (Level, optional): The level of theory to use.
max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional).
run_multi_species (bool, optional): Whether to run a job for multiple species in the same input file.
reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods.
rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary.
server (str): The server to run on.
Expand All @@ -106,7 +107,7 @@ class GaussianAdapter(JobAdapter):
times_rerun (int, optional): Number of times this job was re-run with the same arguments (no trsh methods).
torsions (List[List[int]], optional): The 0-indexed atom indices of the torsion(s).
tsg (int, optional): TSGuess number if optimizing TS guesses.
xyz (dict, optional): The 3D coordinates to use. If not give, species.get_xyz() will be used.
xyz (Union[dict,List[dict]], optional): The 3D coordinates to use. If not give, species.get_xyz() will be used.
"""

def __init__(self,
Expand Down Expand Up @@ -136,6 +137,7 @@ def __init__(self,
job_status: Optional[List[Union[dict, str]]] = None,
level: Optional[Level] = None,
max_job_time: Optional[float] = None,
run_multi_species: bool = False,
reactions: Optional[List['ARCReaction']] = None,
rotor_index: Optional[int] = None,
server: Optional[str] = None,
Expand All @@ -145,7 +147,7 @@ def __init__(self,
times_rerun: int = 0,
torsions: Optional[List[List[int]]] = None,
tsg: Optional[int] = None,
xyz: Optional[dict] = None,
xyz: Optional[Union[dict,List[dict]]] = None,
):

self.incore_capacity = 1
Expand Down Expand Up @@ -184,6 +186,7 @@ def __init__(self,
job_status=job_status,
level=level,
max_job_time=max_job_time,
run_multi_species=run_multi_species,
reactions=reactions,
rotor_index=rotor_index,
server=server,
Expand Down Expand Up @@ -232,10 +235,10 @@ def write_input_file(self) -> None:
input_dict['memory'] = self.input_file_memory
input_dict['method'] = self.level.method
input_dict['multiplicity'] = self.multiplicity
input_dict['xyz'] = xyz_to_str(self.xyz)
input_dict['scan_trsh'] = self.args['keyword']['scan_trsh'] if 'scan_trsh' in self.args['keyword'] else ''
integral_algorithm = 'Acc2E=14' if 'Acc2E=14' in input_dict['trsh'] else 'Acc2E=12'
input_dict['trsh'] = input_dict['trsh'].replace('int=(Acc2E=14)', '') if 'Acc2E=14' in input_dict['trsh'] else input_dict['trsh']
input_dict['xyz'] = [xyz_to_str(xyz) for xyz in self.xyz] if self.run_multi_species else xyz_to_str(self.xyz)

if self.level.basis is not None:
input_dict['slash_1'] = '/'
Expand All @@ -245,9 +248,6 @@ def write_input_file(self) -> None:
if self.level.method_type in ['semiempirical', 'force_field']:
self.checkfile = None

if not is_restricted(self):
input_dict['restricted'] = 'u'

if self.level.dispersion is not None:
input_dict['dispersion'] = self.level.dispersion

Expand All @@ -263,8 +263,9 @@ def write_input_file(self) -> None:
self.level.method = 'cbs-qb3'

# Job type specific options
max_c = self.args['trsh'].split()[1] if 'max_cycles' in self.args['trsh'] else 100
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be maxcycles = self.args['trsh']['max_cycles'].split()[1] if 'max_cycles' in self.args['trsh'].keys() else 100

BTW, why does the comment say 500 and the code has 100? We should remove the comment (comments tend to lie...) and decide which number is correct

Also, is this part of the current PR on purpose or is it a leftover (from rebasing)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You helped me to make the change for this PR, it somehow helped us to make the job on the cluster run successfully. But based on the current main branch, the default is also 100. Does it mean we don't really need this change?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this comment addressed?

if self.job_type in ['opt', 'conformers', 'optfreq', 'composite']:
keywords = ['ts', 'calcfc', 'noeigentest', 'maxcycles=100'] if self.is_ts else ['calcfc']
keywords = ['ts', 'calcfc', 'noeigentest', f'maxcycles={max_c}'] if self.is_ts else ['calcfc']
if self.level.method in ['rocbs-qb3']:
# There are no analytical 2nd derivatives (FC) for this method.
keywords = ['ts', 'noeigentest', 'maxcycles=100'] if self.is_ts else []
Expand All @@ -291,7 +292,8 @@ def write_input_file(self) -> None:
keywords.extend(['tight', 'maxstep=5'])
else:
keywords.extend(['tight', 'maxstep=5'])
input_dict['job_type_1'] = f"opt=({', '.join(key for key in keywords)})"
input_dict['job_type_1'] = "opt" if self.level.method_type not in ['dft', 'composite', 'wavefunction']\
else f"opt=({', '.join(key for key in keywords)})"

elif self.job_type == 'freq':
input_dict['job_type_2'] = f'freq IOp(7/33=1) scf=(tight, direct) integral=(grid=ultrafine, {integral_algorithm})'
Expand Down Expand Up @@ -391,9 +393,29 @@ def write_input_file(self) -> None:

# Remove double spaces
input_dict['job_type_1'] = input_dict['job_type_1'].replace(' ', ' ')

input_file = ''
input_dict_origin = input_dict.copy()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we need to copy the dict here. Looks like we don't need the original (un-copied) dict later

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the code below, we do use it. The idea here is to create the copy input_dict_origin, and use it as the source. Later the input_dict is updated for each of the single species to append the new template.

 with open(os.path.join(self.local_path, input_filenames[self.job_adapter]), 'w') as f:
            f.write(Template(input_template).render(**input_dict))
            if not self.run_multi_species:
                input_dict['restricted'] = restricted_list[0]
                f.write(Template(input_template).render(**input_dict))
            else:
                for index, spc in enumerate(self.species):
                    input_dict['charge'] = input_dict_origin['charge'][index]
                    input_dict['label'] = input_dict_origin['label'][index]
                    input_dict['multiplicity'] = input_dict_origin['multiplicity'][index]
                    input_dict['xyz'] = input_dict_origin['xyz'][index]
                    input_dict['restricted'] = restricted_list[index]
                    input_file += Template(input_template).render(**input_dict)
                    if index < len(self.species) - 1:
                        input_file += '\n--link1--\n'
                f.write(input_file)


restricted_list_bool = is_restricted(self)
restricted_list = ["" if flag else 'u' for flag in ([restricted_list_bool]
if isinstance(restricted_list_bool, bool) else restricted_list_bool)]

with open(os.path.join(self.local_path, input_filenames[self.job_adapter]), 'w') as f:
f.write(Template(input_template).render(**input_dict))
if not self.run_multi_species:
input_dict['restricted'] = restricted_list[0]
f.write(Template(input_template).render(**input_dict))
else:
for index, spc in enumerate(self.species):
input_dict['charge'] = input_dict_origin['charge'][index]
input_dict['label'] = input_dict_origin['label'][index]
input_dict['multiplicity'] = input_dict_origin['multiplicity'][index]
input_dict['xyz'] = input_dict_origin['xyz'][index]
input_dict['restricted'] = restricted_list[index]
input_file += Template(input_template).render(**input_dict)
if index < len(self.species) - 1:
input_file += '\n--link1--\n'
f.write(input_file)

def set_files(self) -> None:
"""
Expand Down
Loading
Loading