Skip to content

Commit

Permalink
Merge pull request #88 from life4/small-fixes
Browse files Browse the repository at this point in the history
Small fixes
  • Loading branch information
orsinium authored Sep 28, 2023
2 parents 5f793e1 + 03be186 commit 53d4a92
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 6 deletions.
2 changes: 1 addition & 1 deletion tests/test_compression/test_entropy_ncd.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_simmetry_compressor(text):
@hypothesis.given(text=hypothesis.strategies.text(min_size=1))
def test_idempotency_compressor(text):
# I've modified idempotency to some kind of distributivity for constant.
# Now it indicates that compressor really compress.
# Now it indicates that compressor actually does compression.
assert ALG._get_size(text * 2) < ALG._get_size(text) * 2


Expand Down
2 changes: 1 addition & 1 deletion textdistance/algorithms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def _sum_counters(self, *sequences: Counter[T]) -> Counter[T]:
result += s
return result

def _count_counters(self, counter: Counter) -> float:
def _count_counters(self, counter: Counter) -> int:
"""Return all elements count from Counter
"""
if getattr(self, 'as_set', False):
Expand Down
5 changes: 2 additions & 3 deletions textdistance/algorithms/compression_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ def _make_probs(self, *sequences) -> dict[str, tuple[Fraction, Fraction]]:

prob_pairs = {}
cumulative_count = 0
counts = sorted(counts.items(), key=lambda x: (x[1], x[0]), reverse=True)
for char, current_count in counts:
for char, current_count in counts.most_common():
prob_pairs[char] = (
Fraction(cumulative_count, total_letters),
Fraction(current_count, total_letters),
Expand Down Expand Up @@ -216,7 +215,7 @@ def _get_size(self, data: Sequence) -> float:
class EntropyNCD(_NCDBase):
"""Entropy based NCD
Get Entropy of input secueance as a size of compressed data.
Get Entropy of input sequence as a size of compressed data.
https://en.wikipedia.org/wiki/Entropy_(information_theory)
https://en.wikipedia.org/wiki/Entropy_encoding
Expand Down
2 changes: 1 addition & 1 deletion textdistance/algorithms/token_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def __call__(self, *sequences: Sequence) -> float:
sequences = [self._count_counters(s) for s in sequences] # ints
ks = list(islice(self.ks, len(sequences)))

if len(sequences) == 2 or self.bias is None:
if len(sequences) != 2 or self.bias is None:
result = intersection
for k, s in zip(ks, sequences):
result += k * (s - intersection)
Expand Down

0 comments on commit 53d4a92

Please sign in to comment.