Skip to content

Commit

Permalink
Fix pymatgen's Composition hash
Browse files Browse the repository at this point in the history
  • Loading branch information
kavanase committed Oct 23, 2024
1 parent 23d1921 commit 5d038ef
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 136 deletions.
43 changes: 22 additions & 21 deletions doped/chemical_potentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,8 @@ def get_entries_in_chemsys(
bulk_composition: Optional[Union[str, Composition]] = None,
**kwargs,
):
"""
Convenience function to get a list of ``ComputedStructureEntry``s for an
r"""
Convenience function to get a list of ``ComputedStructureEntry``\s for an
input chemical system, using ``MPRester.get_entries_in_chemsys()``.
Automatically uses the appropriate format and syntax required for the
Expand Down Expand Up @@ -480,8 +480,8 @@ def get_entries(
bulk_composition: Optional[Union[str, Composition]] = None,
**kwargs,
):
"""
Convenience function to get a list of ``ComputedStructureEntry``s for an
r"""
Convenience function to get a list of ``ComputedStructureEntry``\s for an
input single composition/formula, chemical system, MPID or full criteria,
using ``MPRester.get_entries()``.
Expand Down Expand Up @@ -621,13 +621,13 @@ def get_MP_summary_docs(
data_fields: Optional[list[str]] = None,
**kwargs,
):
"""
r"""
Get the corresponding Materials Project (MP) ``SummaryDoc`` documents for
computed entries in the input ``entries`` list or ``chemsys`` chemical
system.
If ``entries`` is provided (which should be a list of ``ComputedEntry``s
from the Materials Project), then only ``SummaryDoc``s in this chemical
from the Materials Project), then only ``SummaryDoc``\s in this chemical
system which match one of these entries (based on the MPIDs given in
``ComputedEntry.entry_id``/``ComputedEntry.data["material_id"]`` and
``SummaryDoc.material_id``) are returned.
Expand All @@ -644,7 +644,7 @@ def get_MP_summary_docs(
Args:
entries (list[ComputedEntry]):
Optional input; list of ``ComputedEntry`` objects for the input chemical
system. If provided, only ``SummaryDoc``s which match one of these entries
system. If provided, only ``SummaryDoc``\s which match one of these entries
(based on the MPIDs given in ``ComputedEntry.entry_id``/
``ComputedEntry.data["material_id"]`` and ``SummaryDoc.material_id``) are
returned. Moreover, all data fields listed in ``data_fields`` will be copied
Expand Down Expand Up @@ -729,15 +729,13 @@ def _entries_sort_func(
use_e_per_atom: bool = False,
bulk_composition: Optional[Union[str, Composition, dict, list]] = None,
):
"""
Function to sort ``ComputedEntry``s by energy above hull, then by the
number of elements in the formula, then by the position of elements in the
r"""
Function to sort ``ComputedEntry``\s by energy above hull, then if
composition matches ``bulk_composition`` (if provided), then by the number
of elements in the formula, then by the position of elements in the
periodic table (main group elements, then transition metals, sorted by
row), then alphabetically.
If ``bulk_composition`` is provided, then entries matching the bulk
composition are sorted first, followed by all other entries.
Usage: ``entries_list.sort(key=_entries_sort_func)``
Args:
Expand All @@ -759,8 +757,8 @@ def _entries_sort_func(
"""
bulk_reduced_comp = Composition(bulk_composition).reduced_composition if bulk_composition else None
return (
entry.composition.reduced_composition == bulk_reduced_comp,
entry.energy_per_atom if use_e_per_atom else _get_e_above_hull(entry.data),
entry.composition.reduced_composition != bulk_reduced_comp, # goes from False to True
len(Composition(entry.name).as_dict()),
sorted([_element_sort_func(i.symbol) for i in Composition(entry.name).elements]),
entry.name,
Expand Down Expand Up @@ -1233,9 +1231,12 @@ def __init__(

# sort by host composition?, energy above hull, num_species, then by periodic table positioning:
self.entries.sort(key=lambda x: _entries_sort_func(x, bulk_composition=self.composition))
print([entry.name for entry in self.entries])
print([_entries_sort_func(x, bulk_composition=self.composition) for x in self.entries])
print(self.composition)
_name_entries_and_handle_duplicates(self.entries) # set entry names

if not self.legacy_MP: # need to pull ``SummaryDoc``s to get band_gap and magnetization info
if not self.legacy_MP: # need to pull ``SummaryDoc``\s to get band_gap and magnetization info
self.MP_docs = get_MP_summary_docs(
entries=self.entries, # sets "band_gap", "total_magnetization" and "database_IDs" fields
api_key=self.api_key,
Expand Down Expand Up @@ -1478,14 +1479,14 @@ def _set_default_metal_smearing(self, incar_settings, user_incar_settings):
incar_settings["SIGMA"] = user_incar_settings.get("SIGMA", 0.2)

def _generate_elemental_diatomic_phases(self, entries: list[ComputedEntry]):
"""
r"""
Given an input list of ``ComputedEntry`` objects, adds a
``ComputedStructureEntry`` for each diatomic elemental phase (O2, N2,
H2, F2, Cl2) to ``entries`` using ``make_molecular_entry``, and
generates an output list of
``ComputedEntry``/``ComputedStructureEntry``s containing all entries in
``entries``, with all diatomic elemental phases replaced by the single
molecule-in-a-box entry.
``ComputedEntry``/``ComputedStructureEntry``\s containing all entries
in ``entries``, with all diatomic elemental phases replaced by the
single molecule-in-a-box entry.
Also sets the ``ComputedEntry.data["molecule"]`` flag for each entry
in ``entries`` (``True`` for diatomic gases, ``False`` for all others).
Expand Down Expand Up @@ -1822,7 +1823,7 @@ def __init__(
)
_name_entries_and_handle_duplicates(self.entries) # set entry names

if not self.legacy_MP: # need to pull ``SummaryDoc``s to get band_gap and magnetization info
if not self.legacy_MP: # need to pull ``SummaryDoc``\s to get band_gap and magnetization info
self.intrinsic_MP_docs = deepcopy(self.MP_docs)
self.MP_docs = get_MP_summary_docs(
entries=self.entries, # sets "band_gap", "total_magnetization" and "database_IDs" fields
Expand Down Expand Up @@ -2621,7 +2622,7 @@ def _cplap_input(self, dependent_variable: Optional[str] = None, filename: PathL
"""
Generates an ``input.dat`` file for the ``CPLAP`` ``FORTRAN`` code
(legacy code for computing and analysing chemical potential limits, no
longer recommended).
longer recommended, tested or supported).
Args:
dependent_variable (str):
Expand Down
23 changes: 19 additions & 4 deletions doped/utils/efficiency.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@
from doped.utils import symmetry

# Make composition comparisons faster (used in structure matching etc)
pmg_Comp_eq = Composition.__eq__


def _composition__hash__(self):
"""
Custom ``__hash__`` method for ``Composition`` instances.
``pymatgen`` composition has just hashes the chemical system
(without stoichiometry), which cannot then be used to
distinguish different compositions.
"""
return hash(frozenset(self._data.items()))


@lru_cache(maxsize=int(1e8))
Expand Down Expand Up @@ -51,7 +61,7 @@ def fast_Comp_eq(self, other):
return False

for el, amt in self.items(): # noqa: SIM110
if abs(amt - other[el]) > Composition.amount_tolerance:
if abs(amt - other[el]) > type(self).amount_tolerance:
return False

return True
Expand All @@ -62,8 +72,12 @@ def _Comp__eq__(self, other):
Custom ``__eq__`` method for ``Composition`` instances, using a cached
equality function to speed up comparisons.
"""
self_hash = self.__hash__() # object hash with instances to avoid recursion issues (for class method)
other_hash = other.__hash__()
if not isinstance(other, type(self) | dict):
return NotImplemented

# use object hash with instances to avoid recursion issues (for class method)
self_hash = _composition__hash__(self)
other_hash = _composition__hash__(other)

Composition.__instances__[self_hash] = self # Ensure instances are stored for caching
Composition.__instances__[other_hash] = other
Expand All @@ -73,6 +87,7 @@ def _Comp__eq__(self, other):

Composition.__instances__ = {}
Composition.__eq__ = _Comp__eq__
Composition.__hash__ = _composition__hash__


class Hashabledict(dict):
Expand Down
12 changes: 6 additions & 6 deletions examples/CompetingPhases/zro2_competing_phase_energies.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Formula,Formation Energy (eV/fu),Formation Energy (eV/atom),DFT Energy (eV/atom),DFT Energy (eV/fu),DFT Energy (eV),k-points,Zr,O
ZrO2,-10.975428440000002,-3.658476146666667,-11.610769603333333,-34.83230881,-139.32923524,3x3x3,1.0,2.0
O2,0.0,0.0,-7.006602065,-14.01320413,-14.01320413,2x2x2,0.0,2.0
ZrO2,-10.97542844,-3.6584761466666667,-11.610769603333333,-34.83230881,-139.32923524,3x3x3,1.0,2.0
Zr,0.0,0.0,-9.84367624,-9.84367624,-19.68735248,9x9x5,1.0,0.0
Zr,0.025160013333334064,0.025160013333334064,-9.818516226666667,-9.818516226666667,-58.91109736,6x6x6,1.0,0.0
Zr2O,-5.728958971666668,-1.909652990555556,-10.807637838888889,-32.42291351666667,-194.5374811,5x5x2,2.0,1.0
Zr3O,-5.986573519999993,-1.4966433799999983,-10.63105107625,-42.524204305,-85.04840861,5x5x5,3.0,1.0
Zr3O,-5.935114089999992,-1.483778522499998,-10.61818621875,-42.472744875,-84.94548975,5x5x5,3.0,1.0
ZrO2,-10.951109995000003,-3.650369998333334,-11.602663455,-34.807990365,-278.46392292,3x3x1,1.0,2.0
O2,0.0,0.0,-7.006602065,-14.01320413,-14.01320413,2x2x2,0.0,2.0
ZrO2,-10.951109995000001,-3.6503699983333338,-11.602663455,-34.807990365,-278.46392292,3x3x1,1.0,2.0
Zr2O,-5.728958971666667,-1.9096529905555555,-10.807637838888889,-32.42291351666667,-194.5374811,5x5x2,2.0,1.0
Zr3O,-5.986573519999996,-1.496643379999999,-10.63105107625,-42.524204305,-85.04840861,5x5x5,3.0,1.0
Zr3O,-5.935114089999995,-1.4837785224999986,-10.61818621875,-42.472744875,-84.94548975,5x5x5,3.0,1.0
Loading

0 comments on commit 5d038ef

Please sign in to comment.