Skip to content

Commit

Permalink
added tqdm only to debug log level
Browse files Browse the repository at this point in the history
  • Loading branch information
FabianGroeger96 committed Oct 31, 2024
1 parent 130e135 commit 584253f
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def parse_requirements(filename):
name=PACKAGE_NAME,
packages=proj_packages,
package_dir={PACKAGE_NAME: SOURCE_DIRECTORY},
version="0.0.30",
version="0.0.31",
author="Fabian Groeger",
author_email="fabian.groeger@unibas.ch",
description="A holistic self-supervised data cleaning strategy to detect irrelevant samples, near duplicates and label errors.",
Expand Down
34 changes: 26 additions & 8 deletions src/cleaner/selfclean_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def __init__(
log_level: str = "INFO",
**kwargs,
):
set_log_level(min_log_level=log_level)
self.log_level = log_level
set_log_level(min_log_level=self.log_level)
fix_random_seeds(seed=random_seed)

self.memmap = memmap
Expand Down Expand Up @@ -119,12 +120,17 @@ def fit(

# create the distance matrix in chunks
n_chunks = math.ceil(self.N / self.chunk_size)
for i in tqdm(
range(n_chunks),
desc="Creating distance matrix",
total=n_chunks,
position=0,
leave=True,
iterator = range(n_chunks)
for i in (
tqdm(
iterator,
desc="Creating distance matrix",
total=n_chunks,
position=0,
leave=True,
)
if self.log_level == "DEBUG"
else iterator
):
chunk_slice = slice(i * self.chunk_size, (i + 1) * self.chunk_size, 1)
X_emb = emb_space[chunk_slice]
Expand Down Expand Up @@ -164,7 +170,19 @@ def fit(
)
triu_indices = np.triu_indices(self.N, k=1)
# create the upper triangular matrix of the distance matrix
for start_idx in range(0, len(triu_indices[0]), self.chunk_size):
n_chunks = math.ceil(len(triu_indices[0]) / self.chunk_size)
iterator = range(0, len(triu_indices[0]), self.chunk_size)
for start_idx in (
tqdm(
iterator,
desc="Creating upper triangular distance matrix",
total=n_chunks,
position=0,
leave=True,
)
if self.log_level == "DEBUG"
else iterator
):
end_idx = min(start_idx + self.chunk_size, len(triu_indices[0]))
self.p_distances[start_idx:end_idx] = self.distance_matrix[
triu_indices[0][start_idx:end_idx], triu_indices[1][start_idx:end_idx]
Expand Down

0 comments on commit 584253f

Please sign in to comment.