Explicitly use int64 in Numpy/cython code to avoid OS inconsistency (…

…#3992) * use explicit int64 to avoid OS inconsistency * pin np <1 to run tests * add missing replacement in cython code * more missing spglib api replacement * globally replace dtype=int with int64 * pre-commit auto-fixes * use int64 in coord_cython * fix assert dtype * NEED confirmation: skip chgnet tests * docstrring tweaks of linear assignment * enough docstring tweaks, go back to real business * last batch of docstring tweak and remove isort tag as it doesn't seem to be used * revert to NP2 in dependency * try to add a np1 test for windows * try another way to install np1 * fix opt dep name * add comment in NP1 install
materialsproject · Aug 24, 2024 · 41e4c69 · 41e4c69
1 parent 6f280a9
commit 41e4c69
Show file tree

Hide file tree

Showing 25 changed files with 114 additions and 116 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -34,6 +34,10 @@ jobs:
             python: "3.10"
             resolution: highest
             extras: ci,optional
+          - os: windows-latest
+            python: "3.10"
+            resolution: highest
+            extras: ci,optional,numpy1  # Test NP1 on Windows (quite buggy ATM)
           - os: ubuntu-latest
             python: ">3.10"
             resolution: lowest-direct

diff --git a/pyproject.toml b/pyproject.toml
@@ -59,8 +59,6 @@ dependencies = [
     "matplotlib>=3.8",
     "monty>=2024.7.29",
     "networkx>=2.2",
-    # NumPy documentation suggests pinning the current major version as the C API is used
-    # https://numpy.org/devdocs/dev/depending_on_numpy.html#runtime-dependency-version-ranges
     "palettable>=3.3.3",
     "pandas>=2",
     "plotly>=4.5.0",
@@ -73,7 +71,9 @@ dependencies = [
     "tabulate>=0.9",
     "tqdm>=4.60",
     "uncertainties>=3.1.4",
-    'numpy>=1.25.0,<3.0',
+    # NumPy documentation suggests pinning the current major version as the C API is used
+    # https://numpy.org/devdocs/dev/depending_on_numpy.html#runtime-dependency-version-ranges
+    "numpy>=1.25.0,<3",
 ]
 version = "2024.8.9"
 
@@ -115,6 +115,7 @@ optional = [
     # "openbabel>=3.1.1; platform_system=='Linux'",
 ]
 numba = ["numba>=0.55"]
+numpy1 = ["numpy>=1.25.0,<2"]  # Test NP1 on Windows (quite buggy ATM)
 
 [project.scripts]
 pmg = "pymatgen.cli.pmg:main"
@@ -266,9 +267,7 @@ exclude_also = [
     "@np.deprecate",
     "def __repr__",
     "except ImportError:",
-    "if 0:",
     "if TYPE_CHECKING:",
-    "if __name__ == .__main__.:",
     "if self.debug:",
     "if settings.DEBUG",
     "if typing.TYPE_CHECKING:",

diff --git a/src/pymatgen/analysis/chemenv/coordination_environments/coordination_geometry_finder.py b/src/pymatgen/analysis/chemenv/coordination_environments/coordination_geometry_finder.py
@@ -1728,7 +1728,7 @@ def coordination_geometry_symmetry_measures_separation_plane_optim(
                         continue
                     if sep not in nb_set.separations:
                         nb_set.separations[sep] = {}
-                    _sep = [np.array(ss, dtype=int) for ss in separation]
+                    _sep = [np.array(ss, dtype=np.int64) for ss in separation]
                     nb_set.separations[sep][separation] = (plane, _sep)
                     if sep == separation_plane_algo.separation:
                         new_seps.append(_sep)

diff --git a/src/pymatgen/analysis/chemenv/utils/graph_utils.py b/src/pymatgen/analysis/chemenv/utils/graph_utils.py
@@ -25,9 +25,9 @@ def get_delta(node1, node2, edge_data):
         edge_data:
     """
     if node1.isite == edge_data["start"] and node2.isite == edge_data["end"]:
-        return np.array(edge_data["delta"], dtype=int)
+        return np.array(edge_data["delta"], dtype=np.int64)
     if node2.isite == edge_data["start"] and node1.isite == edge_data["end"]:
-        return -np.array(edge_data["delta"], dtype=int)
+        return -np.array(edge_data["delta"], dtype=np.int64)
     raise ValueError("Trying to find a delta between two nodes with an edge that seems not to link these nodes.")
 
 

diff --git a/src/pymatgen/analysis/local_env.py b/src/pymatgen/analysis/local_env.py
@@ -822,7 +822,7 @@ def get_all_voronoi_polyhedra(self, structure: Structure):
             indices.extend([(x[2],) + x[3] for x in neighs])
 
         # Get the non-duplicates (using the site indices for numerical stability)
-        indices = np.array(indices, dtype=int)
+        indices = np.array(indices, dtype=np.int64)
         indices, uniq_inds = np.unique(indices, return_index=True, axis=0)  # type: ignore[assignment]
         sites = [sites[idx] for idx in uniq_inds]
 

diff --git a/src/pymatgen/analysis/molecule_matcher.py b/src/pymatgen/analysis/molecule_matcher.py
@@ -1042,7 +1042,7 @@ def permutations(p_atoms, p_centroid, p_weights, q_atoms, q_centroid, q_weights)
         p_centroid_test = np.dot(p_centroid, U)
 
         # generate full view from q shape to fill in atom view on the fly
-        perm_inds = np.zeros(len(p_atoms), dtype=int)
+        perm_inds = np.zeros(len(p_atoms), dtype=np.int64)
 
         # Find unique atoms
         species = np.unique(p_atoms)
@@ -1067,7 +1067,7 @@ def permutations(p_atoms, p_centroid, p_weights, q_atoms, q_centroid, q_weights)
         p_centroid_test = np.dot(p_centroid, U)
 
         # generate full view from q shape to fill in atom view on the fly
-        perm_inds = np.zeros(len(p_atoms), dtype=int)
+        perm_inds = np.zeros(len(p_atoms), dtype=np.int64)
 
         # Find unique atoms
         species = np.unique(p_atoms)

diff --git a/src/pymatgen/analysis/structure_matcher.py b/src/pymatgen/analysis/structure_matcher.py
@@ -559,7 +559,7 @@ def _get_mask(self, struct1, struct2, fu, s1_supercell):
         if s1_supercell:
             # remove the symmetrically equivalent s1 indices
             inds = inds[::fu]
-        return np.array(mask, dtype=int), inds, idx
+        return np.array(mask, dtype=np.int64), inds, idx
 
     def fit(
         self, struct1: Structure, struct2: Structure, symmetric: bool = False, skip_structure_reduction: bool = False

diff --git a/src/pymatgen/analysis/topological/spillage.py b/src/pymatgen/analysis/topological/spillage.py
@@ -42,7 +42,7 @@ def orth(A):
         n_rows, n_cols = A.shape
         eps = np.finfo(float).eps
         tol = max(n_rows, n_cols) * np.amax(s) * eps
-        num = np.sum(s > tol, dtype=int)
+        num = np.sum(s > tol, dtype=np.int64)
         orthonormal_basis = u[:, :num]
         return orthonormal_basis, num
 

diff --git a/src/pymatgen/core/interface.py b/src/pymatgen/core/interface.py
@@ -1269,7 +1269,7 @@ def get_trans_mat(
         r_matrix = np.dot(np.dot(np.linalg.inv(trans_cry.T), r_matrix), trans_cry.T)
         # Set one vector of the basis to the rotation axis direction, and
         # obtain the corresponding transform matrix
-        eye = np.eye(3, dtype=int)
+        eye = np.eye(3, dtype=np.int64)
         hh = kk = ll = None
         for hh in range(3):
             if abs(r_axis[hh]) != 0:
@@ -2107,7 +2107,7 @@ def slab_from_csl(
         # Quickly generate a supercell, normal is not working in this way
         if quick_gen:
             scale_factor = []
-            eye = np.eye(3, dtype=int)
+            eye = np.eye(3, dtype=np.int64)
             for i, j in enumerate(miller):
                 if j == 0:
                     scale_factor.append(eye[i])

diff --git a/src/pymatgen/core/lattice.py b/src/pymatgen/core/lattice.py
@@ -961,7 +961,7 @@ def get_angles(v1, v2, l1, l2):
         for idx, all_j in enumerate(gamma_b):
             inds = np.logical_and(all_j[:, None], np.logical_and(alpha_b, beta_b[idx][None, :]))
             for j, k in np.argwhere(inds):
-                scale_m = np.array((f_a[idx], f_b[j], f_c[k]), dtype=int)  # type: ignore[index]
+                scale_m = np.array((f_a[idx], f_b[j], f_c[k]), dtype=np.int64)  # type: ignore[index]
                 if abs(np.linalg.det(scale_m)) < 1e-8:
                     continue
 
@@ -1381,7 +1381,7 @@ def get_points_in_sphere(
             frac_points = np.ascontiguousarray(frac_points, dtype=float)
             latt_matrix = np.ascontiguousarray(self.matrix, dtype=float)
             cart_coords = np.ascontiguousarray(self.get_cartesian_coords(frac_points), dtype=float)
-            pbc = np.ascontiguousarray(self.pbc, dtype=int)
+            pbc = np.ascontiguousarray(self.pbc, dtype=np.int64)
             center_coords = np.ascontiguousarray([center], dtype=float)
 
             _, indices, images, distances = find_points_in_spheres(
@@ -1510,12 +1510,12 @@ def get_points_in_sphere_old(
         # Generate all possible images that could be within `r` of `center`
         mins = np.floor(pcoords - nmax)
         maxes = np.ceil(pcoords + nmax)
-        arange = np.arange(start=mins[0], stop=maxes[0], dtype=int)
-        brange = np.arange(start=mins[1], stop=maxes[1], dtype=int)
-        crange = np.arange(start=mins[2], stop=maxes[2], dtype=int)
-        arange = arange[:, None] * np.array([1, 0, 0], dtype=int)[None, :]
-        brange = brange[:, None] * np.array([0, 1, 0], dtype=int)[None, :]
-        crange = crange[:, None] * np.array([0, 0, 1], dtype=int)[None, :]
+        arange = np.arange(start=mins[0], stop=maxes[0], dtype=np.int64)
+        brange = np.arange(start=mins[1], stop=maxes[1], dtype=np.int64)
+        crange = np.arange(start=mins[2], stop=maxes[2], dtype=np.int64)
+        arange = arange[:, None] * np.array([1, 0, 0], dtype=np.int64)[None, :]
+        brange = brange[:, None] * np.array([0, 1, 0], dtype=np.int64)[None, :]
+        crange = crange[:, None] * np.array([0, 0, 1], dtype=np.int64)[None, :]
         images = arange[:, None, None] + brange[None, :, None] + crange[None, None, :]
 
         # Generate the coordinates of all atoms within these images
@@ -1629,7 +1629,7 @@ def get_distance_and_image(
         if jimage is None:
             v, d2 = pbc_shortest_vectors(self, frac_coords1, frac_coords2, return_d2=True)
             fc = self.get_fractional_coords(v[0][0]) + frac_coords1 - frac_coords2
-            fc = np.array(np.round(fc), dtype=int)
+            fc = np.array(np.round(fc), dtype=np.int64)
             return np.sqrt(d2[0, 0]), fc
 
         jimage = np.array(jimage)
@@ -1866,7 +1866,7 @@ def get_points_in_spheres(
     neighbors: list[list[tuple[np.ndarray, float, int, np.ndarray]]] = []
 
     for ii, jj in zip(center_coords, site_neighbors, strict=True):
-        l1 = np.array(_three_to_one(jj, ny, nz), dtype=int).ravel()
+        l1 = np.array(_three_to_one(jj, ny, nz), dtype=np.int64).ravel()
         # Use the cube index map to find the all the neighboring
         # coords, images, and indices
         ks = [k for k in l1 if k in cube_to_coords]
@@ -1905,7 +1905,7 @@ def _compute_cube_index(
     Returns:
         np.ndarray: nx3 array int indices
     """
-    return np.array(np.floor((coords - global_min) / radius), dtype=int)
+    return np.array(np.floor((coords - global_min) / radius), dtype=np.int64)
 
 
 def _one_to_three(label1d: np.ndarray, ny: int, nz: int) -> np.ndarray:
@@ -1943,7 +1943,7 @@ def find_neighbors(label: np.ndarray, nx: int, ny: int, nz: int) -> list[np.ndar
         Neighbor cell indices.
     """
     array = [[-1, 0, 1]] * 3
-    neighbor_vectors = np.array(list(itertools.product(*array)), dtype=int)
+    neighbor_vectors = np.array(list(itertools.product(*array)), dtype=np.int64)
     label3d = _one_to_three(label, ny, nz) if np.shape(label)[1] == 1 else label
     all_labels = label3d[:, None, :] - neighbor_vectors[None, :, :]
     filtered_labels = []

diff --git a/src/pymatgen/core/structure.py b/src/pymatgen/core/structure.py
@@ -1778,7 +1778,7 @@ def get_neighbor_list(
             site_coords = np.ascontiguousarray([site.coords for site in sites], dtype=float)
             cart_coords = np.ascontiguousarray(self.cart_coords, dtype=float)
             lattice_matrix = np.ascontiguousarray(self.lattice.matrix, dtype=float)
-            pbc = np.ascontiguousarray(self.pbc, dtype=int)
+            pbc = np.ascontiguousarray(self.pbc, dtype=np.int64)
             center_indices, points_indices, images, distances = find_points_in_spheres(
                 cart_coords,
                 site_coords,

diff --git a/src/pymatgen/core/surface.py b/src/pymatgen/core/surface.py
@@ -949,9 +949,9 @@ def add_site_types() -> None:
                 or "bulk_equivalent" not in initial_structure.site_properties
             ):
                 spg_analyzer = SpacegroupAnalyzer(initial_structure)
-                initial_structure.add_site_property("bulk_wyckoff", spg_analyzer.get_symmetry_dataset()["wyckoffs"])
+                initial_structure.add_site_property("bulk_wyckoff", spg_analyzer.get_symmetry_dataset().wyckoffs)
                 initial_structure.add_site_property(
-                    "bulk_equivalent", spg_analyzer.get_symmetry_dataset()["equivalent_atoms"].tolist()
+                    "bulk_equivalent", spg_analyzer.get_symmetry_dataset().equivalent_atoms.tolist()
                 )
 
         def calculate_surface_normal() -> np.ndarray:
@@ -971,7 +971,7 @@ def calculate_scaling_factor() -> np.ndarray:
             """
             slab_scale_factor = []
             non_orth_ind = []
-            eye = np.eye(3, dtype=int)
+            eye = np.eye(3, dtype=np.int64)
             for idx, miller_idx in enumerate(miller_index):
                 if miller_idx == 0:
                     # If lattice vector is perpendicular to surface normal, i.e.,

diff --git a/src/pymatgen/electronic_structure/boltztrap.py b/src/pymatgen/electronic_structure/boltztrap.py
@@ -325,7 +325,7 @@ def write_struct(self, output_file) -> None:
                 + "\n"
             )
 
-            ops = [np.eye(3)] if self._symprec is None else sym.get_symmetry_dataset()["rotations"]  # type: ignore[reportPossiblyUnboundVariable]
+            ops = [np.eye(3)] if self._symprec is None else sym.get_symmetry_dataset().rotations  # type: ignore[reportPossiblyUnboundVariable]
 
             file.write(f"{len(ops)}\n")
 

diff --git a/src/pymatgen/io/abinit/abiobjects.py b/src/pymatgen/io/abinit/abiobjects.py
@@ -150,7 +150,7 @@ def structure_from_abivars(cls=None, *args, **kwargs) -> Structure:
         raise ValueError(f"{len(typat)=} must equal {len(coords)=}")
 
     # Note conversion to int and Fortran --> C indexing
-    typat = np.array(typat, dtype=int)
+    typat = np.array(typat, dtype=np.int64)
     species = [znucl_type[typ - 1] for typ in typat]
 
     return cls(

diff --git a/src/pymatgen/io/lmto.py b/src/pymatgen/io/lmto.py
@@ -108,7 +108,7 @@ def as_dict(self):
         # The following is to find the classes (atoms that are not symmetry equivalent,
         # and create labels. Note that LMTO only attaches numbers with the second atom
         # of the same species, e.g. "Bi", "Bi1", "Bi2", etc.
-        eq_atoms = sga.get_symmetry_dataset()["equivalent_atoms"]
+        eq_atoms = sga.get_symmetry_dataset().equivalent_atoms
         ineq_sites_index = list(set(eq_atoms))
         sites = []
         classes = []

diff --git a/src/pymatgen/optimization/linear_assignment.pyx b/src/pymatgen/optimization/linear_assignment.pyx
@@ -1,10 +1,8 @@
 # cython: language_level=3
 """
-This module contains an algorithm to solve the Linear Assignment Problem
+This module contains the LAPJV algorithm to solve the Linear Assignment Problem.
 """
 
-# isort: dont-add-imports
-
 import numpy as np
 
 cimport cython
@@ -38,26 +36,21 @@ class LinearAssignment:
             rectangular
         epsilon: Tolerance for determining if solution vector is < 0
 
-    .. attribute: min_cost:
-
-        The minimum cost of the matching
-
-    .. attribute: solution:
-
-        The matching of the rows to columns. i.e solution = [1, 2, 0]
-        would match row 0 to column 1, row 1 to column 2 and row 2
-        to column 0. Total cost would be c[0, 1] + c[1, 2] + c[2, 0]
+    Attributes:
+        min_cost: The minimum cost of the matching.
+        solution: The matching of the rows to columns. i.e solution = [1, 2, 0]
+            would match row 0 to column 1, row 1 to column 2 and row 2
+            to column 0. Total cost would be c[0, 1] + c[1, 2] + c[2, 0].
     """
 
-    def __init__(self, costs, epsilon=1e-13):
-        # https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
+    def __init__(self, costs: np.ndarray, epsilon: float=1e-13) -> None:
         self.orig_c = np.asarray(costs, dtype=np.float64, order="C")
         self.nx, self.ny = self.orig_c.shape
         self.n = self.ny
 
         self.epsilon = fabs(epsilon)
 
-        # check that cost matrix is square
+        # Check that cost matrix is square
         if self.nx > self.ny:
             raise ValueError("cost matrix must have at least as many columns as rows")
 
@@ -67,9 +60,9 @@ class LinearAssignment:
             self.c = np.zeros((self.n, self.n), dtype=np.float64)
             self.c[:self.nx] = self.orig_c
 
-        # initialize solution vectors
-        self._x = np.empty(self.n, dtype=int)
-        self._y = np.empty(self.n, dtype=int)
+        # Initialize solution vectors
+        self._x = np.empty(self.n, dtype=np.int64)
+        self._y = np.empty(self.n, dtype=np.int64)
 
         self.min_cost = compute(self.n, self.c, self._x, self._y, self.epsilon)
         self.solution = self._x[:self.nx]
@@ -79,7 +72,7 @@ class LinearAssignment:
 @cython.wraparound(False)
 cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_t[:] y, np.float_t eps) nogil:
 
-    # augment
+    # Augment
     cdef int i, j, k, i1, j1, f, f0, cnt, low, up, z, last, nrr
     cdef int n = size
     cdef bint b
@@ -93,7 +86,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
     for i in range(n):
         x[i] = -1
 
-    # column reduction
+    # Column reduction
     for j from n > j >= 0:
         col[j] = j
         h = c[0, j]
@@ -107,12 +100,12 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
             x[i1] = j
             y[j] = i1
         else:
-            # in the paper its x[i], but likely a typo
+            # NOTE: in the paper it's x[i], but likely a typo
             if x[i1] > -1:
                 x[i1] = -2 - x[i1]
             y[j] = -1
 
-    # reduction transfer
+    # Reduction transfer
     f = -1
     for i in range(n):
         if x[i] == -1:
@@ -129,12 +122,12 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
                         m = c[i, j] - v[j]
             v[j1] = v[j1] - m
 
-    # augmenting row reduction
+    # Augmenting row reduction
     for cnt in range(2):
         k = 0
         f0 = f
         f = -1
-        # this step isn't strictly necessary, and
+        # This step isn't strictly necessary, and
         # time is proportional to 1/eps in the worst case,
         # so break early by keeping track of nrr
         nrr = 0
@@ -172,7 +165,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
             x[i] = j1
             y[j1] = i
 
-    # augmentation
+    # Augmentation
     f0 = f
     for f in range(f0 + 1):
         i1 = fre[f]
@@ -182,7 +175,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
             d[j] = c[i1, j] - v[j]
             pred[j] = i1
         while True:
-            # the pascal code ends when a single augmentation is found
+            # The pascal code ends when a single augmentation is found
             # really we need to get back to the for f in range(f0+1) loop
             b = False
             if up == low:
@@ -231,7 +224,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
                     pred[j] = i
                     if fabs(h - m) < eps:
                         if y[j] == -1:
-                            # augment
+                            # Augment
                             for k in range(last+1):
                                 j1 = col[k]
                                 v[j1] = v[j1] + d[j1] - m