Skip to content

Commit

Permalink
fixed various bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
vincent-grande committed Aug 16, 2024
1 parent 6526043 commit 199b0ee
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 27 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "topf"
version = "1.0.0"
version = "1.0.1"
authors = [
{ name="Vincent P. Grande", email="grande@cs.rwth-aachen.de" },
]
Expand Down
98 changes: 72 additions & 26 deletions src/topf/topfmain.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def topf(
fixed_num_features="Off",
return_dict=False,
):

"""
Returns Topological point features of a point cloud.
Expand Down Expand Up @@ -101,7 +101,7 @@ def topf(
eigenvector_threshold : float, optional
The threshold for the eigenvector components. The default is 0.07.
clustering_method : str, optional
The clustering method to be used. Can be 'kmeans', 'spectral', 'agglomerative'. The default is 'spectral'.
The clustering method to be used. Can be 'kmeans', 'spectral' or 'agglomerative'. The default is 'auto'.
exponential_interpolation : bool, optional
Whether to use exponential interpolation opposed to linear interpolation for computing the simplicial complexes. The default is True.
max_total_quot : float, optional
Expand Down Expand Up @@ -145,6 +145,44 @@ def topf(
The topological features.
(Optional if return_dict = True) output_dict: dict
"""
if not isinstance(interpolation_coefficient, float):
raise ValueError("interpolation_coefficient must be a float.")
if interpolation_coefficient <= 0 or interpolation_coefficient >= 1:
raise ValueError("interpolation_coefficient must be in (0,1).")
if not isinstance(n_clusters, int) and n_clusters != "auto":
raise ValueError("n_clusters must be an integer or 'auto'.")
if clustering_method not in ["kmeans", "spectral", "agglomerative"]:
raise ValueError(
"clustering_method must be 'kmeans', 'spectral', or 'agglomerative'."
)
if draw_signature_heatmaps not in ["Off", "One plot", "Separate plots"]:
raise ValueError(
"draw_signature_heatmaps must be 'Off', 'One plot', or 'Separate plots'."
)
if not isinstance(draw_reps, bool):
raise ValueError("draw_reps must be a boolean.")
if not isinstance(draw_scaled_vecs, bool):
raise ValueError("draw_scaled_vecs must be a boolean.")
if not isinstance(draw_final_clustering, bool):
raise ValueError("draw_final_clustering must be a boolean.")
if not isinstance(draw_signatures, bool):
raise ValueError("draw_signatures must be a boolean.")
if not isinstance(eigenvector_tresholding, bool):
raise ValueError("eigenvector_tresholding must be a boolean.")
if not isinstance(quotient_life_times, bool):
raise ValueError("quotient_life_times must be a boolean.")
if sparsify_input not in ["landmark", "random", "mixed", "off", "auto"]:
raise ValueError(
"sparsify_input must be 'landmark', 'random', 'mixed', 'off', or 'auto'."
)
if not isinstance(return_dict, bool):
raise ValueError("return_dict must be a boolean.")
if not isinstance(verbose, bool):
raise ValueError("verbose must be a boolean.")
if not isinstance(fixed_num_features, list) and fixed_num_features != "Off":
raise ValueError("fixed_num_features must be a list or 'Off'.")
if complex_type not in ["alpha", "rips", "auto"]:
raise ValueError("complex_type must be 'alpha', 'rips', or 'auto.")
base_points = noisify_input_points(np.array(base_points))
ambient_dim = base_points.shape[1]
if max_hom_dim >= ambient_dim:
Expand Down Expand Up @@ -333,7 +371,7 @@ def topf(
multi_inds,
max_hom_dim,
simplex_threshs,
max_num_simplices_drawn = max_num_simplices_drawn
max_num_simplices_drawn=max_num_simplices_drawn,
)
if verbose:
print(
Expand All @@ -360,14 +398,15 @@ def topf(
short_flat_signatures = neigh.predict(original_points)
if verbose:
print("KNeighborsRegressor done")
labels = cluster_points(
short_flat_signatures,
original_points,
n_clusters=n_clusters,
show_plots=draw_final_clustering,
clustering_method=clustering_method,
verbose=verbose,
)
if draw_final_clustering or return_dict:
labels = cluster_points(
short_flat_signatures,
original_points,
n_clusters=n_clusters,
show_plots=draw_final_clustering,
clustering_method=clustering_method,
verbose=verbose,
)
if draw_signatures:
plot_top_features(short_flat_signatures, labels)
if (
Expand All @@ -384,11 +423,6 @@ def topf(
plot_signatures_in_one(original_points, short_flat_signatures_s)
else:
plot_signatures(original_points, short_flat_signatures_s)
elif draw_signature_heatmaps != "Off" and draw_signature_heatmaps != False:
warnings.warn(
"draw_signature_heatmaps parameter should be in range ['Off', 'One plot', Separate plots']. No heatmaps plotted.",
category=RuntimeWarning,
)
if return_dict:
birth_times = []
death_times = []
Expand Down Expand Up @@ -807,15 +841,17 @@ def cluster_points(
verbose=False,
):
"""
Clusters the points using TOPF signatures, clusters on at most 5000 points at the same time.
Clusters the points using TOPF signatures, clusters on at most 3000 points at the same time.
"""
if len(topfeatures) > 5000:
if len(topfeatures) > 3000:
if verbose:
print(
"Sparsifying from " + str(len(topfeatures)) + " to 5000 top. features."
"Sparsifying from "
+ str(len(topfeatures))
+ " to 3000 top. features for clustering. If you want to cluster on all top. features, run clustering manually on the returned top. features."
)
base_points_s, sparse_signatures = random_sampling_two_lists(
base_points, topfeatures, 5000
base_points, topfeatures, 3000
)
if clustering_method == "kmeans":
clustering = sklearn.cluster.KMeans(n_clusters, n_init="auto").fit(
Expand Down Expand Up @@ -1007,7 +1043,7 @@ def plot_vecs(
simplex = multi_all_simplices[0][i][0][index]
starting_point = simplex[0]
starting_point_coord = multi_multi_points[0][i][starting_point]
cur_colour = matplotlib.colors.to_hex(my_colour_maps(i+1)(i))
cur_colour = matplotlib.colors.to_hex(my_colour_maps(i + 1)(i))
xline = [starting_point_coord[0]]
yline = [starting_point_coord[1]]
if ambient_dim >= 3:
Expand Down Expand Up @@ -1035,7 +1071,9 @@ def plot_vecs(
multi_scaled_vecs[1][i] = np.abs(multi_scaled_vecs[1][i]) / np.max(
np.abs(multi_scaled_vecs[1][i])
)
cur_chance = max_num_simplices_drawn / np.sum(multi_scaled_vecs[1][i] > threshold[0])
cur_chance = max_num_simplices_drawn / np.sum(
multi_scaled_vecs[1][i] > threshold[0]
)
if cur_chance < 1 and not already_printed:
already_printed = True
print(
Expand Down Expand Up @@ -1085,7 +1123,9 @@ def plot_vecs(
)
if max_hom_dim == 2:
for i in range(len(multi_inds[2])):
cur_chance = max_num_simplices_drawn / np.sum(multi_scaled_vecs[2][i] > threshold[1])
cur_chance = max_num_simplices_drawn / np.sum(
multi_scaled_vecs[2][i] > threshold[1]
)
if cur_chance < 1 and not already_printed:
already_printed = True
print(
Expand Down Expand Up @@ -1121,7 +1161,10 @@ def plot_vecs(
y=yline,
z=zline,
mode="lines",
line=dict(color=cur_colour, width=1 + multi_scaled_vecs[2][i][index] * 10),
line=dict(
color=cur_colour,
width=1 + multi_scaled_vecs[2][i][index] * 10,
),
)
)
else:
Expand All @@ -1130,7 +1173,10 @@ def plot_vecs(
x=xline,
y=yline,
mode="lines",
line=dict(color=cur_colour, width=1 + multi_scaled_vecs[2][i][index] * 10),
line=dict(
color=cur_colour,
width=1 + multi_scaled_vecs[2][i][index] * 10,
),
)
)
figsimplices.update_xaxes(visible=False)
Expand Down Expand Up @@ -2141,5 +2187,5 @@ def setup_julia():
)


#if __package__ is not None:
# if __package__ is not None:
# setup_julia()

0 comments on commit 199b0ee

Please sign in to comment.