Skip to content

Commit

Permalink
Explicitly use int64 in Numpy/cython code to avoid OS inconsistency (
Browse files Browse the repository at this point in the history
…#3992)

* use explicit int64 to avoid OS inconsistency

* pin np <1 to run tests

* add missing replacement in cython code

* more missing spglib api replacement

* globally replace dtype=int with int64

* pre-commit auto-fixes

* use int64 in coord_cython

* fix assert dtype

* NEED confirmation: skip chgnet tests

* docstrring tweaks of linear assignment

* enough docstring tweaks, go back to real business

* last batch of docstring tweak and remove isort tag as it doesn't seem to be used

* revert to NP2 in dependency

* try to add a np1 test for windows

* try another way to install np1

* fix opt dep name

* add comment in NP1 install
  • Loading branch information
DanielYang59 authored Aug 24, 2024
1 parent 6f280a9 commit 41e4c69
Show file tree
Hide file tree
Showing 25 changed files with 114 additions and 116 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ jobs:
python: "3.10"
resolution: highest
extras: ci,optional
- os: windows-latest
python: "3.10"
resolution: highest
extras: ci,optional,numpy1 # Test NP1 on Windows (quite buggy ATM)
- os: ubuntu-latest
python: ">3.10"
resolution: lowest-direct
Expand Down
9 changes: 4 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ dependencies = [
"matplotlib>=3.8",
"monty>=2024.7.29",
"networkx>=2.2",
# NumPy documentation suggests pinning the current major version as the C API is used
# https://numpy.org/devdocs/dev/depending_on_numpy.html#runtime-dependency-version-ranges
"palettable>=3.3.3",
"pandas>=2",
"plotly>=4.5.0",
Expand All @@ -73,7 +71,9 @@ dependencies = [
"tabulate>=0.9",
"tqdm>=4.60",
"uncertainties>=3.1.4",
'numpy>=1.25.0,<3.0',
# NumPy documentation suggests pinning the current major version as the C API is used
# https://numpy.org/devdocs/dev/depending_on_numpy.html#runtime-dependency-version-ranges
"numpy>=1.25.0,<3",
]
version = "2024.8.9"

Expand Down Expand Up @@ -115,6 +115,7 @@ optional = [
# "openbabel>=3.1.1; platform_system=='Linux'",
]
numba = ["numba>=0.55"]
numpy1 = ["numpy>=1.25.0,<2"] # Test NP1 on Windows (quite buggy ATM)

[project.scripts]
pmg = "pymatgen.cli.pmg:main"
Expand Down Expand Up @@ -266,9 +267,7 @@ exclude_also = [
"@np.deprecate",
"def __repr__",
"except ImportError:",
"if 0:",
"if TYPE_CHECKING:",
"if __name__ == .__main__.:",
"if self.debug:",
"if settings.DEBUG",
"if typing.TYPE_CHECKING:",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1728,7 +1728,7 @@ def coordination_geometry_symmetry_measures_separation_plane_optim(
continue
if sep not in nb_set.separations:
nb_set.separations[sep] = {}
_sep = [np.array(ss, dtype=int) for ss in separation]
_sep = [np.array(ss, dtype=np.int64) for ss in separation]
nb_set.separations[sep][separation] = (plane, _sep)
if sep == separation_plane_algo.separation:
new_seps.append(_sep)
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/chemenv/utils/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def get_delta(node1, node2, edge_data):
edge_data:
"""
if node1.isite == edge_data["start"] and node2.isite == edge_data["end"]:
return np.array(edge_data["delta"], dtype=int)
return np.array(edge_data["delta"], dtype=np.int64)
if node2.isite == edge_data["start"] and node1.isite == edge_data["end"]:
return -np.array(edge_data["delta"], dtype=int)
return -np.array(edge_data["delta"], dtype=np.int64)
raise ValueError("Trying to find a delta between two nodes with an edge that seems not to link these nodes.")


Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/local_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@ def get_all_voronoi_polyhedra(self, structure: Structure):
indices.extend([(x[2],) + x[3] for x in neighs])

# Get the non-duplicates (using the site indices for numerical stability)
indices = np.array(indices, dtype=int)
indices = np.array(indices, dtype=np.int64)
indices, uniq_inds = np.unique(indices, return_index=True, axis=0) # type: ignore[assignment]
sites = [sites[idx] for idx in uniq_inds]

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/molecule_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,7 @@ def permutations(p_atoms, p_centroid, p_weights, q_atoms, q_centroid, q_weights)
p_centroid_test = np.dot(p_centroid, U)

# generate full view from q shape to fill in atom view on the fly
perm_inds = np.zeros(len(p_atoms), dtype=int)
perm_inds = np.zeros(len(p_atoms), dtype=np.int64)

# Find unique atoms
species = np.unique(p_atoms)
Expand All @@ -1067,7 +1067,7 @@ def permutations(p_atoms, p_centroid, p_weights, q_atoms, q_centroid, q_weights)
p_centroid_test = np.dot(p_centroid, U)

# generate full view from q shape to fill in atom view on the fly
perm_inds = np.zeros(len(p_atoms), dtype=int)
perm_inds = np.zeros(len(p_atoms), dtype=np.int64)

# Find unique atoms
species = np.unique(p_atoms)
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/structure_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def _get_mask(self, struct1, struct2, fu, s1_supercell):
if s1_supercell:
# remove the symmetrically equivalent s1 indices
inds = inds[::fu]
return np.array(mask, dtype=int), inds, idx
return np.array(mask, dtype=np.int64), inds, idx

def fit(
self, struct1: Structure, struct2: Structure, symmetric: bool = False, skip_structure_reduction: bool = False
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/topological/spillage.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def orth(A):
n_rows, n_cols = A.shape
eps = np.finfo(float).eps
tol = max(n_rows, n_cols) * np.amax(s) * eps
num = np.sum(s > tol, dtype=int)
num = np.sum(s > tol, dtype=np.int64)
orthonormal_basis = u[:, :num]
return orthonormal_basis, num

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/core/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,7 +1269,7 @@ def get_trans_mat(
r_matrix = np.dot(np.dot(np.linalg.inv(trans_cry.T), r_matrix), trans_cry.T)
# Set one vector of the basis to the rotation axis direction, and
# obtain the corresponding transform matrix
eye = np.eye(3, dtype=int)
eye = np.eye(3, dtype=np.int64)
hh = kk = ll = None
for hh in range(3):
if abs(r_axis[hh]) != 0:
Expand Down Expand Up @@ -2107,7 +2107,7 @@ def slab_from_csl(
# Quickly generate a supercell, normal is not working in this way
if quick_gen:
scale_factor = []
eye = np.eye(3, dtype=int)
eye = np.eye(3, dtype=np.int64)
for i, j in enumerate(miller):
if j == 0:
scale_factor.append(eye[i])
Expand Down
24 changes: 12 additions & 12 deletions src/pymatgen/core/lattice.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ def get_angles(v1, v2, l1, l2):
for idx, all_j in enumerate(gamma_b):
inds = np.logical_and(all_j[:, None], np.logical_and(alpha_b, beta_b[idx][None, :]))
for j, k in np.argwhere(inds):
scale_m = np.array((f_a[idx], f_b[j], f_c[k]), dtype=int) # type: ignore[index]
scale_m = np.array((f_a[idx], f_b[j], f_c[k]), dtype=np.int64) # type: ignore[index]
if abs(np.linalg.det(scale_m)) < 1e-8:
continue

Expand Down Expand Up @@ -1381,7 +1381,7 @@ def get_points_in_sphere(
frac_points = np.ascontiguousarray(frac_points, dtype=float)
latt_matrix = np.ascontiguousarray(self.matrix, dtype=float)
cart_coords = np.ascontiguousarray(self.get_cartesian_coords(frac_points), dtype=float)
pbc = np.ascontiguousarray(self.pbc, dtype=int)
pbc = np.ascontiguousarray(self.pbc, dtype=np.int64)
center_coords = np.ascontiguousarray([center], dtype=float)

_, indices, images, distances = find_points_in_spheres(
Expand Down Expand Up @@ -1510,12 +1510,12 @@ def get_points_in_sphere_old(
# Generate all possible images that could be within `r` of `center`
mins = np.floor(pcoords - nmax)
maxes = np.ceil(pcoords + nmax)
arange = np.arange(start=mins[0], stop=maxes[0], dtype=int)
brange = np.arange(start=mins[1], stop=maxes[1], dtype=int)
crange = np.arange(start=mins[2], stop=maxes[2], dtype=int)
arange = arange[:, None] * np.array([1, 0, 0], dtype=int)[None, :]
brange = brange[:, None] * np.array([0, 1, 0], dtype=int)[None, :]
crange = crange[:, None] * np.array([0, 0, 1], dtype=int)[None, :]
arange = np.arange(start=mins[0], stop=maxes[0], dtype=np.int64)
brange = np.arange(start=mins[1], stop=maxes[1], dtype=np.int64)
crange = np.arange(start=mins[2], stop=maxes[2], dtype=np.int64)
arange = arange[:, None] * np.array([1, 0, 0], dtype=np.int64)[None, :]
brange = brange[:, None] * np.array([0, 1, 0], dtype=np.int64)[None, :]
crange = crange[:, None] * np.array([0, 0, 1], dtype=np.int64)[None, :]
images = arange[:, None, None] + brange[None, :, None] + crange[None, None, :]

# Generate the coordinates of all atoms within these images
Expand Down Expand Up @@ -1629,7 +1629,7 @@ def get_distance_and_image(
if jimage is None:
v, d2 = pbc_shortest_vectors(self, frac_coords1, frac_coords2, return_d2=True)
fc = self.get_fractional_coords(v[0][0]) + frac_coords1 - frac_coords2
fc = np.array(np.round(fc), dtype=int)
fc = np.array(np.round(fc), dtype=np.int64)
return np.sqrt(d2[0, 0]), fc

jimage = np.array(jimage)
Expand Down Expand Up @@ -1866,7 +1866,7 @@ def get_points_in_spheres(
neighbors: list[list[tuple[np.ndarray, float, int, np.ndarray]]] = []

for ii, jj in zip(center_coords, site_neighbors, strict=True):
l1 = np.array(_three_to_one(jj, ny, nz), dtype=int).ravel()
l1 = np.array(_three_to_one(jj, ny, nz), dtype=np.int64).ravel()
# Use the cube index map to find the all the neighboring
# coords, images, and indices
ks = [k for k in l1 if k in cube_to_coords]
Expand Down Expand Up @@ -1905,7 +1905,7 @@ def _compute_cube_index(
Returns:
np.ndarray: nx3 array int indices
"""
return np.array(np.floor((coords - global_min) / radius), dtype=int)
return np.array(np.floor((coords - global_min) / radius), dtype=np.int64)


def _one_to_three(label1d: np.ndarray, ny: int, nz: int) -> np.ndarray:
Expand Down Expand Up @@ -1943,7 +1943,7 @@ def find_neighbors(label: np.ndarray, nx: int, ny: int, nz: int) -> list[np.ndar
Neighbor cell indices.
"""
array = [[-1, 0, 1]] * 3
neighbor_vectors = np.array(list(itertools.product(*array)), dtype=int)
neighbor_vectors = np.array(list(itertools.product(*array)), dtype=np.int64)
label3d = _one_to_three(label, ny, nz) if np.shape(label)[1] == 1 else label
all_labels = label3d[:, None, :] - neighbor_vectors[None, :, :]
filtered_labels = []
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/core/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1778,7 +1778,7 @@ def get_neighbor_list(
site_coords = np.ascontiguousarray([site.coords for site in sites], dtype=float)
cart_coords = np.ascontiguousarray(self.cart_coords, dtype=float)
lattice_matrix = np.ascontiguousarray(self.lattice.matrix, dtype=float)
pbc = np.ascontiguousarray(self.pbc, dtype=int)
pbc = np.ascontiguousarray(self.pbc, dtype=np.int64)
center_indices, points_indices, images, distances = find_points_in_spheres(
cart_coords,
site_coords,
Expand Down
6 changes: 3 additions & 3 deletions src/pymatgen/core/surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,9 +949,9 @@ def add_site_types() -> None:
or "bulk_equivalent" not in initial_structure.site_properties
):
spg_analyzer = SpacegroupAnalyzer(initial_structure)
initial_structure.add_site_property("bulk_wyckoff", spg_analyzer.get_symmetry_dataset()["wyckoffs"])
initial_structure.add_site_property("bulk_wyckoff", spg_analyzer.get_symmetry_dataset().wyckoffs)
initial_structure.add_site_property(
"bulk_equivalent", spg_analyzer.get_symmetry_dataset()["equivalent_atoms"].tolist()
"bulk_equivalent", spg_analyzer.get_symmetry_dataset().equivalent_atoms.tolist()
)

def calculate_surface_normal() -> np.ndarray:
Expand All @@ -971,7 +971,7 @@ def calculate_scaling_factor() -> np.ndarray:
"""
slab_scale_factor = []
non_orth_ind = []
eye = np.eye(3, dtype=int)
eye = np.eye(3, dtype=np.int64)
for idx, miller_idx in enumerate(miller_index):
if miller_idx == 0:
# If lattice vector is perpendicular to surface normal, i.e.,
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/electronic_structure/boltztrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def write_struct(self, output_file) -> None:
+ "\n"
)

ops = [np.eye(3)] if self._symprec is None else sym.get_symmetry_dataset()["rotations"] # type: ignore[reportPossiblyUnboundVariable]
ops = [np.eye(3)] if self._symprec is None else sym.get_symmetry_dataset().rotations # type: ignore[reportPossiblyUnboundVariable]

file.write(f"{len(ops)}\n")

Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/abinit/abiobjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def structure_from_abivars(cls=None, *args, **kwargs) -> Structure:
raise ValueError(f"{len(typat)=} must equal {len(coords)=}")

# Note conversion to int and Fortran --> C indexing
typat = np.array(typat, dtype=int)
typat = np.array(typat, dtype=np.int64)
species = [znucl_type[typ - 1] for typ in typat]

return cls(
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/lmto.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def as_dict(self):
# The following is to find the classes (atoms that are not symmetry equivalent,
# and create labels. Note that LMTO only attaches numbers with the second atom
# of the same species, e.g. "Bi", "Bi1", "Bi2", etc.
eq_atoms = sga.get_symmetry_dataset()["equivalent_atoms"]
eq_atoms = sga.get_symmetry_dataset().equivalent_atoms
ineq_sites_index = list(set(eq_atoms))
sites = []
classes = []
Expand Down
47 changes: 20 additions & 27 deletions src/pymatgen/optimization/linear_assignment.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
# cython: language_level=3
"""
This module contains an algorithm to solve the Linear Assignment Problem
This module contains the LAPJV algorithm to solve the Linear Assignment Problem.
"""

# isort: dont-add-imports

import numpy as np

cimport cython
Expand Down Expand Up @@ -38,26 +36,21 @@ class LinearAssignment:
rectangular
epsilon: Tolerance for determining if solution vector is < 0
.. attribute: min_cost:
The minimum cost of the matching
.. attribute: solution:
The matching of the rows to columns. i.e solution = [1, 2, 0]
would match row 0 to column 1, row 1 to column 2 and row 2
to column 0. Total cost would be c[0, 1] + c[1, 2] + c[2, 0]
Attributes:
min_cost: The minimum cost of the matching.
solution: The matching of the rows to columns. i.e solution = [1, 2, 0]
would match row 0 to column 1, row 1 to column 2 and row 2
to column 0. Total cost would be c[0, 1] + c[1, 2] + c[2, 0].
"""

def __init__(self, costs, epsilon=1e-13):
# https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
def __init__(self, costs: np.ndarray, epsilon: float=1e-13) -> None:
self.orig_c = np.asarray(costs, dtype=np.float64, order="C")
self.nx, self.ny = self.orig_c.shape
self.n = self.ny

self.epsilon = fabs(epsilon)

# check that cost matrix is square
# Check that cost matrix is square
if self.nx > self.ny:
raise ValueError("cost matrix must have at least as many columns as rows")

Expand All @@ -67,9 +60,9 @@ class LinearAssignment:
self.c = np.zeros((self.n, self.n), dtype=np.float64)
self.c[:self.nx] = self.orig_c

# initialize solution vectors
self._x = np.empty(self.n, dtype=int)
self._y = np.empty(self.n, dtype=int)
# Initialize solution vectors
self._x = np.empty(self.n, dtype=np.int64)
self._y = np.empty(self.n, dtype=np.int64)

self.min_cost = compute(self.n, self.c, self._x, self._y, self.epsilon)
self.solution = self._x[:self.nx]
Expand All @@ -79,7 +72,7 @@ class LinearAssignment:
@cython.wraparound(False)
cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_t[:] y, np.float_t eps) nogil:

# augment
# Augment
cdef int i, j, k, i1, j1, f, f0, cnt, low, up, z, last, nrr
cdef int n = size
cdef bint b
Expand All @@ -93,7 +86,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
for i in range(n):
x[i] = -1

# column reduction
# Column reduction
for j from n > j >= 0:
col[j] = j
h = c[0, j]
Expand All @@ -107,12 +100,12 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
x[i1] = j
y[j] = i1
else:
# in the paper its x[i], but likely a typo
# NOTE: in the paper it's x[i], but likely a typo
if x[i1] > -1:
x[i1] = -2 - x[i1]
y[j] = -1

# reduction transfer
# Reduction transfer
f = -1
for i in range(n):
if x[i] == -1:
Expand All @@ -129,12 +122,12 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
m = c[i, j] - v[j]
v[j1] = v[j1] - m

# augmenting row reduction
# Augmenting row reduction
for cnt in range(2):
k = 0
f0 = f
f = -1
# this step isn't strictly necessary, and
# This step isn't strictly necessary, and
# time is proportional to 1/eps in the worst case,
# so break early by keeping track of nrr
nrr = 0
Expand Down Expand Up @@ -172,7 +165,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
x[i] = j1
y[j1] = i

# augmentation
# Augmentation
f0 = f
for f in range(f0 + 1):
i1 = fre[f]
Expand All @@ -182,7 +175,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
d[j] = c[i1, j] - v[j]
pred[j] = i1
while True:
# the pascal code ends when a single augmentation is found
# The pascal code ends when a single augmentation is found
# really we need to get back to the for f in range(f0+1) loop
b = False
if up == low:
Expand Down Expand Up @@ -231,7 +224,7 @@ cdef np.float_t compute(int size, np.float_t[:, :] c, np.int64_t[:] x, np.int64_
pred[j] = i
if fabs(h - m) < eps:
if y[j] == -1:
# augment
# Augment
for k in range(last+1):
j1 = col[k]
v[j1] = v[j1] + d[j1] - m
Expand Down
Loading

0 comments on commit 41e4c69

Please sign in to comment.