From 510cd02202653e813f0231c79b82440e6f4cb04a Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Wed, 31 Jul 2024 09:14:37 -0500
Subject: [PATCH 1/9] Heavily optimize `Sqids.__to_id()`

Converting the alphabet to a list is very costly at scale.
Getting the length of the alphabet repeatedly is a little costly.
Comparing `result == 0` vs `not result` is measurably costly.
These have all been eliminated.

Python's timeit module suggest a performance improvement of ~300%.
---
 sqids/sqids.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sqids/sqids.py b/sqids/sqids.py
index ad173cf..88dc463 100644
--- a/sqids/sqids.py
+++ b/sqids/sqids.py
@@ -136,13 +136,13 @@ def __shuffle(self, alphabet: str) -> str:
 
     def __to_id(self, num: int, alphabet: str) -> str:
         id_chars: List[str] = []
-        chars = list(alphabet)
         result = num
+        alphabet_length = len(alphabet)
 
         while True:
-            id_chars.insert(0, chars[result % len(chars)])
-            result = result // len(chars)
-            if result == 0:
+            id_chars.insert(0, alphabet[result % alphabet_length])
+            result = result // alphabet_length
+            if not result:
                 break
 
         return "".join(id_chars)

From ef28bef54cf22c4696364fb5764db046ed3fd9c1 Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Thu, 1 Aug 2024 07:24:56 -0500
Subject: [PATCH 2/9] Fail fast when checking numbers to encode

Previous behavior required checking the entire list
even if the first number is invalid.
---
 sqids/sqids.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sqids/sqids.py b/sqids/sqids.py
index 88dc463..f2bce86 100644
--- a/sqids/sqids.py
+++ b/sqids/sqids.py
@@ -44,8 +44,7 @@ def encode(self, numbers: List[int]) -> str:
         if not numbers:
             return ""
 
-        in_range_numbers = [n for n in numbers if 0 <= n <= sys.maxsize]
-        if len(in_range_numbers) != len(numbers):
+        if not all(0 <= number <= sys.maxsize for number in numbers):
             raise ValueError(f"Encoding supports numbers between 0 and {sys.maxsize}")
 
         return self.__encode_numbers(numbers, 0)

From 1955cbca454f4d6ab7825c929e2f2209dbb994db Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Thu, 1 Aug 2024 09:55:40 -0500
Subject: [PATCH 3/9] Use `any()` to eliminate a block indent

---
 sqids/sqids.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sqids/sqids.py b/sqids/sqids.py
index f2bce86..286f999 100644
--- a/sqids/sqids.py
+++ b/sqids/sqids.py
@@ -10,9 +10,8 @@ def __init__(
         min_length: int = DEFAULT_MIN_LENGTH,
         blocklist: List[str] = DEFAULT_BLOCKLIST,
     ):
-        for char in alphabet:
-            if ord(char) > 127:
-                raise ValueError("Alphabet cannot contain multibyte characters")
+        if any(ord(char) > 127 for char in alphabet):
+            raise ValueError("Alphabet cannot contain multibyte characters")
 
         if len(alphabet) < 3:
             raise ValueError("Alphabet length must be at least 3")

From 39698a801bdc5a8d2abb0ddacabf859addc35441 Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Thu, 1 Aug 2024 09:59:29 -0500
Subject: [PATCH 4/9] Eliminate blocklist looping when checking if an ID is
 blocked

By filtering the blocklist once during instantiation,
a significant amount of computation can be eliminated
when the same instance is reused over and over.

This additionally updates the hypothesis testing;
generated IDs are now confirmed to be blockable.
---
 sqids/sqids.py           | 56 +++++++++++++++++++++++++++-------------
 tests/test_round_trip.py | 19 +++++++++++---
 2 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/sqids/sqids.py b/sqids/sqids.py
index 286f999..1e3b8bc 100644
--- a/sqids/sqids.py
+++ b/sqids/sqids.py
@@ -2,6 +2,8 @@
 import sys
 from .constants import DEFAULT_ALPHABET, DEFAULT_BLOCKLIST, DEFAULT_MIN_LENGTH
 
+DIGITS = set("0123456789")
+
 
 class Sqids:
     def __init__(
@@ -28,16 +30,33 @@ def __init__(
                 f"Minimum length has to be between 0 and {MIN_LENGTH_LIMIT}"
             )
 
-        filtered_blocklist: Set[str] = set()
-        alphabet_lower = alphabet.lower()
-        for word_lower in (w.lower() for w in blocklist if len(w) >= 3):
-            intersection = [c for c in word_lower if c in alphabet_lower]
-            if len(intersection) == len(word_lower):
-                filtered_blocklist.add(word_lower)
+        exact_match: Set[str] = set()
+        match_at_ends: Set[str] = set()
+        match_anywhere: Set[str] = set()
+        alphabet_lower = set(alphabet.lower())
+        for word in blocklist:
+            if len(word) < 3:
+                continue
+            elif len(word) == 3:
+                exact_match.add(word.lower())
+                continue
+
+            word_lower = word.lower()
+            word_lower_set = set(word_lower)
+            if word_lower_set & alphabet_lower != word_lower_set:
+                continue
+
+            if word_lower_set & DIGITS:
+                match_at_ends.add(word_lower)
+            else:
+                match_anywhere.add(word_lower)
 
         self.__alphabet = self.__shuffle(alphabet)
         self.__min_length = min_length
-        self.__blocklist = filtered_blocklist
+        self.__blocklist_exact_match = exact_match
+        # When matching at the ends, `.startswith()` and `.endswith()` need a tuple.
+        self.__blocklist_match_at_ends = tuple(match_at_ends)
+        self.__blocklist_match_anywhere = match_anywhere
 
     def encode(self, numbers: List[int]) -> str:
         if not numbers:
@@ -84,7 +103,7 @@ def __encode_numbers(self, numbers: List[int], increment: int = 0) -> str:
                 alphabet = self.__shuffle(alphabet)
                 id_ += alphabet[: min(self.__min_length - len(id_), len(alphabet))]
 
-        if self.__is_blocked_id(id_):
+        if len(id_) >= 3 and self.__is_blocked_id(id_):
             id_ = self.__encode_numbers(numbers, increment + 1)
 
         return id_
@@ -152,16 +171,17 @@ def __to_number(self, id_: str, alphabet: str) -> int:
     def __is_blocked_id(self, id_: str) -> bool:
         id_ = id_.lower()
 
-        for word in self.__blocklist:
-            if len(word) > len(id_):
-                continue
-            if len(id_) <= 3 or len(word) <= 3:
-                if id_ == word:
-                    return True
-            elif any(c.isdigit() for c in word):
-                if id_.startswith(word) or id_.endswith(word):
-                    return True
-            elif word in id_:
+        if len(id_) == 3:
+            return id_ in self.__blocklist_exact_match
+
+        if (
+            id_.startswith(self.__blocklist_match_at_ends)
+            or id_.endswith(self.__blocklist_match_at_ends)
+        ):
+            return True
+
+        for word in self.__blocklist_match_anywhere:
+            if word in id_:
                 return True
 
         return False
diff --git a/tests/test_round_trip.py b/tests/test_round_trip.py
index 90732a2..3886f5f 100644
--- a/tests/test_round_trip.py
+++ b/tests/test_round_trip.py
@@ -6,10 +6,13 @@
 import hypothesis.strategies as st
 
 
-lists_of_integers = st.lists(elements=st.integers(min_value=0, max_value=sys.maxsize))
+lists_of_integers = st.lists(
+    elements=st.integers(min_value=0, max_value=sys.maxsize),
+    min_size=1,
+)
 min_lengths = st.integers(min_value=0, max_value=255)
 alphabets = st.text(
-    alphabet=st.characters(min_codepoint=0, max_codepoint=0x7f),
+    alphabet=st.characters(min_codepoint=0, max_codepoint=0x7F),
     min_size=3,
 )
 
@@ -23,5 +26,13 @@ def test_round_trip_encoding(numbers, min_length, alphabet):
     # Reject non-unique alphabets without failing the test.
     assume(len(set(alphabet)) == len(alphabet))
 
-    sqid = sqids.Sqids(min_length=min_length, alphabet=alphabet, blocklist=[])
-    assert sqid.decode(sqid.encode(numbers)) == numbers
+    sqid_1 = sqids.Sqids(min_length=min_length, alphabet=alphabet, blocklist=[])
+    id_1 = sqid_1.encode(numbers)
+    assert sqid_1.decode(id_1) == numbers
+
+    # If the ID is long enough, use it as a blocklist word and ensure it is blocked.
+    if len(id_1) >= 3:
+        sqid_2 = sqids.Sqids(min_length=min_length, alphabet=alphabet, blocklist=[id_1])
+        id_2 = sqid_2.encode(numbers)
+        assert id_1 != id_2
+        assert sqid_2.decode(id_2) == numbers

From ac069bd4e89f5fa46d9022b3efdae3eefdc15791 Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Sat, 3 Aug 2024 07:25:57 -0500
Subject: [PATCH 5/9] Add a changelog entry

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 672ab63..c6584ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 **Unreleased**
 - Support Python 3.12 and 3.13.
+- Speed up encoding by ~85% by optimizing blocklist checks.
+  This improvement requires more calculation when the `Sqids` class is instantiated,
+  so users are encouraged to instantiate `Sqids` once and always reuse the instance.
 
 **v0.4.1**
 - Compatibility with Python 3.6 (not officially supported)

From ec137019a9d41571cbe2a3ef2583c79c9e1f0a36 Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Sat, 3 Aug 2024 07:32:34 -0500
Subject: [PATCH 6/9] Add a basic performance measurement script

---
 assets/performance.py | 61 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 assets/performance.py

diff --git a/assets/performance.py b/assets/performance.py
new file mode 100644
index 0000000..9c22178
--- /dev/null
+++ b/assets/performance.py
@@ -0,0 +1,61 @@
+import sqids
+import timeit
+
+number = 100_000
+
+print(f"Iterations: {number:,d}")
+
+print(
+    "{0:<20s} {1:7.3f}".format(
+        "Instantiate:",
+        timeit.timeit(
+            stmt="sqids.Sqids()",
+            globals={"sqids": sqids},
+            number=number,
+        )
+    )
+)
+
+print(
+    "{0:<20s} {1:7.3f}".format(
+        "Encode [0]:",  # [0] -> 'bM'
+        timeit.timeit(
+            stmt="squid.encode([0])",
+            globals={"squid": sqids.Sqids()},
+            number=number,
+        )
+    )
+)
+
+print(
+    "{0:<20s} {1:7.3f}".format(
+        "Encode [0, 1, 2]:",  # [0, 1, 2] -> 'rSCtlB'
+        timeit.timeit(
+            stmt="squid.encode([0, 1, 2])",
+            globals={"squid": sqids.Sqids()},
+            number=number,
+        )
+    )
+)
+
+print(
+    "{0:<20s} {1:7.3f}".format(
+        "Decode 'bM':",  # 'bM' -> [0]
+        timeit.timeit(
+            stmt="squid.decode('bM')",
+            globals={"squid": sqids.Sqids()},
+            number=number,
+        )
+    )
+)
+
+print(
+    "{0:<20s} {1:7.3f}".format(
+        "Decode 'rSCtlB':",  # 'rSCtlB' -> [0, 1, 2]
+        timeit.timeit(
+            stmt="squid.decode('rSCtlB')",
+            globals={"squid": sqids.Sqids()},
+            number=number,
+        )
+    ),
+)

From 442f3167a9c5ac64266447a3aaf123b58a8f0431 Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Sat, 3 Aug 2024 07:37:00 -0500
Subject: [PATCH 7/9] Add a test to ensure short blocklist words are ignored

---
 tests/test_blocklist.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_blocklist.py b/tests/test_blocklist.py
index e2f9a67..a385070 100644
--- a/tests/test_blocklist.py
+++ b/tests/test_blocklist.py
@@ -82,3 +82,12 @@ def test_max_encoding_attempts():
 
     with pytest.raises(Exception):
         sqids.encode([0])
+
+
+def test_small_words_are_ignored():
+    """Blocklist words shorter than 3 characters must be ignored."""
+
+    id_ = Sqids().encode([0])
+    assert id_ == "bM"
+    id_ = Sqids(blocklist=[id_]).encode([0])
+    assert id_ == "bM"

From 5452169f03484a2fd57ccb143ea0c9727fe6e044 Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Sat, 3 Aug 2024 07:52:08 -0500
Subject: [PATCH 8/9] Don't track coverage in the generated roundtrip test
 conditional

---
 tests/test_round_trip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_round_trip.py b/tests/test_round_trip.py
index 3886f5f..f38ea18 100644
--- a/tests/test_round_trip.py
+++ b/tests/test_round_trip.py
@@ -31,7 +31,7 @@ def test_round_trip_encoding(numbers, min_length, alphabet):
     assert sqid_1.decode(id_1) == numbers
 
     # If the ID is long enough, use it as a blocklist word and ensure it is blocked.
-    if len(id_1) >= 3:
+    if len(id_1) >= 3:  # pragma: nocover
         sqid_2 = sqids.Sqids(min_length=min_length, alphabet=alphabet, blocklist=[id_1])
         id_2 = sqid_2.encode(numbers)
         assert id_1 != id_2

From 7c4ef18d2614c8f7433d10882ba68930573c98ac Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Mon, 5 Aug 2024 18:09:32 -0500
Subject: [PATCH 9/9] Make instantiation fast when using the default alphabet
 and blocklist

---
 assets/filter_blocklist.py |  99 +++++++
 sqids/constants.py         | 580 ++++++++++++++++++++++++++++++++++++-
 sqids/sqids.py             |  64 ++--
 tests/test_blocklist.py    |  26 ++
 4 files changed, 742 insertions(+), 27 deletions(-)
 create mode 100644 assets/filter_blocklist.py

diff --git a/assets/filter_blocklist.py b/assets/filter_blocklist.py
new file mode 100644
index 0000000..764e39a
--- /dev/null
+++ b/assets/filter_blocklist.py
@@ -0,0 +1,99 @@
+import pathlib
+import sys
+from typing import Set, Tuple
+
+
+repo_root = pathlib.Path(__file__).parent.parent
+this_file = pathlib.Path(__file__).relative_to(repo_root)
+constants_path = repo_root / "sqids/constants.py"
+import sqids.constants  # noqa
+
+
+DIGITS = set("0123456789")
+
+
+def filter_blocklist() -> Tuple[Set[str], Set[str], Set[str]]:
+    """Pre-filter the blocklist and update the constants file."""
+
+    exact_match = set()
+    match_at_ends = set()
+    match_anywhere = set()
+
+    for word in sqids.constants.DEFAULT_BLOCKLIST:
+        if len(word) == 3:
+            exact_match.add(word)
+        elif set(word) & DIGITS:
+            match_at_ends.add(word)
+        else:
+            match_anywhere.add(word)
+
+    return exact_match, match_at_ends, match_anywhere
+
+
+def generate_new_constants_file(
+    exact_match: Set[str],
+    match_at_ends: Set[str],
+    match_anywhere: Set[str],
+) -> str:
+    """Generate the text of a new constants file."""
+
+    lines = [
+        f'DEFAULT_ALPHABET = "{sqids.constants.DEFAULT_ALPHABET}"',
+        f"DEFAULT_MIN_LENGTH = {sqids.constants.DEFAULT_MIN_LENGTH}",
+        "",
+        "# =======",
+        "#  NOTE",
+        "# =======",
+        "#",
+        f"# When updating the blocklist, run {this_file} to pre-filter constants.",
+        "# This is critical for performance.",
+        "#",
+        "",
+        "DEFAULT_BLOCKLIST = [",
+    ]
+    # Output a sorted blocklist.
+    for word in sorted(sqids.constants.DEFAULT_BLOCKLIST):
+        lines.append(f'    "{word}",')
+    lines.append("]")
+
+    # Output exact-match blocklist words.
+    lines.append("")
+    lines.append("_exact_match = {")
+    for word in sorted(exact_match):
+        lines.append(f'    "{word}",')
+    lines.append("}")
+
+    # Output match-at-ends blocklist words.
+    lines.append("")
+    lines.append("_match_at_ends = (")
+    for word in sorted(match_at_ends):
+        lines.append(f'    "{word}",')
+    lines.append(")")
+
+    # Output match-anywhere blocklist words.
+    lines.append("")
+    lines.append("_match_anywhere = {")
+    for word in sorted(match_anywhere):
+        lines.append(f'    "{word}",')
+    lines.append("}")
+
+    return "\n".join(lines).rstrip() + "\n"  # Include a trailing newline.
+
+
+def main() -> int:
+    text = constants_path.read_text()
+
+    exact_match, match_at_ends, match_anywhere = filter_blocklist()
+    new_text = generate_new_constants_file(exact_match, match_at_ends, match_anywhere)
+
+    if text == new_text:
+        print("No changes necessary")
+        return 0
+
+    print(f"Updating {constants_path.relative_to(repo_root)}")
+    constants_path.write_text(new_text, newline="\n", encoding="utf-8")
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/sqids/constants.py b/sqids/constants.py
index 601ffb2..07ce122 100644
--- a/sqids/constants.py
+++ b/sqids/constants.py
@@ -1,4 +1,14 @@
 DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+DEFAULT_MIN_LENGTH = 0
+
+# =======
+#  NOTE
+# =======
+#
+# When updating the blocklist, run assets/filter_blocklist.py to pre-filter constants.
+# This is critical for performance.
+#
+
 DEFAULT_BLOCKLIST = [
     "0rgasm",
     "1d10t",
@@ -561,4 +571,572 @@
     "zocco1a",
     "zoccola",
 ]
-DEFAULT_MIN_LENGTH = 0
+
+_exact_match = {
+    "ass",
+    "cum",
+    "fag",
+    "g00",
+    "g0o",
+    "go0",
+    "goo",
+}
+
+_match_at_ends = (
+    "0rgasm",
+    "1d10t",
+    "1d1ot",
+    "1di0t",
+    "1diot",
+    "1eccacu10",
+    "1eccacu1o",
+    "1eccacul0",
+    "1eccaculo",
+    "1mbec11e",
+    "1mbec1le",
+    "1mbeci1e",
+    "1mbecile",
+    "a11upat0",
+    "a11upato",
+    "a1lupat0",
+    "a1lupato",
+    "ah01e",
+    "ah0le",
+    "aho1e",
+    "al1upat0",
+    "al1upato",
+    "allupat0",
+    "ana1",
+    "ana1e",
+    "arrapat0",
+    "b00b",
+    "b00be",
+    "b01ata",
+    "b0ceta",
+    "b0iata",
+    "b0ob",
+    "b0obe",
+    "b0sta",
+    "b1tch",
+    "b1te",
+    "b1tte",
+    "ba1atkar",
+    "bastard0",
+    "batt0na",
+    "bo0b",
+    "bo0be",
+    "bo1ata",
+    "bran1age",
+    "bran1er",
+    "bran1ette",
+    "bran1eur",
+    "bran1euse",
+    "c0ck",
+    "c0g110ne",
+    "c0g11one",
+    "c0g1i0ne",
+    "c0g1ione",
+    "c0gl10ne",
+    "c0gl1one",
+    "c0gli0ne",
+    "c0glione",
+    "c0na",
+    "c0nnard",
+    "c0nnasse",
+    "c0nne",
+    "c0u111es",
+    "c0u11les",
+    "c0u1l1es",
+    "c0u1lles",
+    "c0ui11es",
+    "c0ui1les",
+    "c0uil1es",
+    "c0uilles",
+    "c11t",
+    "c11t0",
+    "c11to",
+    "c1it",
+    "c1it0",
+    "c1ito",
+    "cabr0n",
+    "cabra0",
+    "cara1h0",
+    "cara1ho",
+    "caracu10",
+    "caracu1o",
+    "caracul0",
+    "caralh0",
+    "cazz0",
+    "cazz1mma",
+    "ch00t1a",
+    "ch00t1ya",
+    "ch00tia",
+    "ch00tiya",
+    "ch0d",
+    "ch0ot1a",
+    "ch0ot1ya",
+    "ch0otia",
+    "ch0otiya",
+    "ch1asse",
+    "ch1avata",
+    "ch1er",
+    "ch1ng0",
+    "ch1ngadaz0s",
+    "ch1ngadazos",
+    "ch1ngader1ta",
+    "ch1ngaderita",
+    "ch1ngar",
+    "ch1ngo",
+    "ch1ngues",
+    "ch1nk",
+    "ching0",
+    "chingadaz0s",
+    "chingader1ta",
+    "cho0t1a",
+    "cho0t1ya",
+    "cho0tia",
+    "cho0tiya",
+    "choot1a",
+    "choot1ya",
+    "cl1t",
+    "cl1t0",
+    "cl1to",
+    "clit0",
+    "cog110ne",
+    "cog11one",
+    "cog1i0ne",
+    "cog1ione",
+    "cogl10ne",
+    "cogl1one",
+    "cogli0ne",
+    "cou111es",
+    "cou11les",
+    "cou1l1es",
+    "cou1lles",
+    "coui11es",
+    "coui1les",
+    "couil1es",
+    "cu10",
+    "cu1att0ne",
+    "cu1attone",
+    "cu1er0",
+    "cu1ero",
+    "cu1o",
+    "cul0",
+    "culatt0ne",
+    "culer0",
+    "d11d0",
+    "d11do",
+    "d1ck",
+    "d1ld0",
+    "d1ldo",
+    "de1ch",
+    "di1d0",
+    "di1do",
+    "dild0",
+    "encu1e",
+    "enf01re",
+    "enf0ire",
+    "enfo1re",
+    "estup1d0",
+    "estup1do",
+    "estupid0",
+    "etr0n",
+    "f0da",
+    "f0der",
+    "f0ttere",
+    "f0tters1",
+    "f0ttersi",
+    "f0tze",
+    "f0utre",
+    "f1ca",
+    "f1cker",
+    "f1ga",
+    "fotters1",
+    "fr0c10",
+    "fr0c1o",
+    "fr0ci0",
+    "fr0cio",
+    "fr0sc10",
+    "fr0sc1o",
+    "fr0sci0",
+    "fr0scio",
+    "froc10",
+    "froc1o",
+    "froci0",
+    "frosc10",
+    "frosc1o",
+    "frosci0",
+    "g0u1ne",
+    "g0uine",
+    "gou1ne",
+    "gr0gnasse",
+    "haram1",
+    "hund1n",
+    "id10t",
+    "id1ot",
+    "idi0t",
+    "imbec11e",
+    "imbec1le",
+    "imbeci1e",
+    "j1zz",
+    "k1ke",
+    "kam1ne",
+    "leccacu10",
+    "leccacu1o",
+    "leccacul0",
+    "m1erda",
+    "m1gn0tta",
+    "m1gnotta",
+    "m1nch1a",
+    "m1nchia",
+    "m1st",
+    "mam0n",
+    "mamahuev0",
+    "masturbat10n",
+    "masturbat1on",
+    "masturbati0n",
+    "merd0s0",
+    "merd0so",
+    "merdos0",
+    "mign0tta",
+    "minch1a",
+    "musch1",
+    "n1gger",
+    "negr0",
+    "nerch1a",
+    "p00p",
+    "p011a",
+    "p01la",
+    "p0l1a",
+    "p0lla",
+    "p0mp1n0",
+    "p0mp1no",
+    "p0mpin0",
+    "p0mpino",
+    "p0op",
+    "p0rca",
+    "p0rn",
+    "p0rra",
+    "p0uff1asse",
+    "p0uffiasse",
+    "p1p1",
+    "p1pi",
+    "p1r1a",
+    "p1rla",
+    "p1sc10",
+    "p1sc1o",
+    "p1sci0",
+    "p1scio",
+    "p1sser",
+    "pa11e",
+    "pa1le",
+    "pal1e",
+    "pane1e1r0",
+    "pane1e1ro",
+    "pane1eir0",
+    "pane1eiro",
+    "panele1r0",
+    "panele1ro",
+    "paneleir0",
+    "pec0r1na",
+    "pec0rina",
+    "pecor1na",
+    "pen1s",
+    "pendej0",
+    "pip1",
+    "pir1a",
+    "pisc10",
+    "pisc1o",
+    "pisci0",
+    "po0p",
+    "po11a",
+    "po1la",
+    "pol1a",
+    "pomp1n0",
+    "pomp1no",
+    "pompin0",
+    "pouff1asse",
+    "pr1ck",
+    "put1za",
+    "puta1n",
+    "r0mp1ba11e",
+    "r0mp1ba1le",
+    "r0mp1bal1e",
+    "r0mp1balle",
+    "r0mpiba11e",
+    "r0mpiba1le",
+    "r0mpibal1e",
+    "r0mpiballe",
+    "rand1",
+    "recch10ne",
+    "recch1one",
+    "recchi0ne",
+    "romp1ba11e",
+    "romp1ba1le",
+    "romp1bal1e",
+    "romp1balle",
+    "rompiba11e",
+    "rompiba1le",
+    "rompibal1e",
+    "ruff1an0",
+    "ruff1ano",
+    "ruffian0",
+    "s1ut",
+    "sa10pe",
+    "sa1aud",
+    "sa1ope",
+    "sal0pe",
+    "sb0rr0ne",
+    "sb0rra",
+    "sb0rrone",
+    "sbatters1",
+    "sborr0ne",
+    "sc0pare",
+    "sc0pata",
+    "sch1ampe",
+    "sche1se",
+    "sche1sse",
+    "schwachs1nn1g",
+    "schwachs1nnig",
+    "schwachsinn1g",
+    "sh1t",
+    "sp0mp1nare",
+    "sp0mpinare",
+    "spomp1nare",
+    "str0nz0",
+    "str0nza",
+    "str0nzo",
+    "stronz0",
+    "stup1d",
+    "succh1am1",
+    "succh1ami",
+    "succhiam1",
+    "t0pa",
+    "test1c1e",
+    "test1cle",
+    "testic1e",
+    "tr01a",
+    "tr0ia",
+    "tr0mbare",
+    "tr1ng1er",
+    "tr1ngler",
+    "tring1er",
+    "tro1a",
+    "vaffancu10",
+    "vaffancu1o",
+    "vaffancul0",
+    "vag1na",
+    "w1chsen",
+    "x0ch0ta",
+    "x0chota",
+    "xoch0ta",
+    "z0cc01a",
+    "z0cc0la",
+    "z0cco1a",
+    "z0ccola",
+    "z1z1",
+    "z1zi",
+    "ziz1",
+    "zocc01a",
+    "zocc0la",
+    "zocco1a",
+)
+
+_match_anywhere = {
+    "aand",
+    "ahole",
+    "allupato",
+    "anal",
+    "anale",
+    "anus",
+    "arrapato",
+    "arsch",
+    "arse",
+    "balatkar",
+    "bastardo",
+    "battona",
+    "bitch",
+    "bite",
+    "bitte",
+    "boceta",
+    "boiata",
+    "boob",
+    "boobe",
+    "bosta",
+    "branlage",
+    "branler",
+    "branlette",
+    "branleur",
+    "branleuse",
+    "cabrao",
+    "cabron",
+    "caca",
+    "cacca",
+    "cacete",
+    "cagante",
+    "cagar",
+    "cagare",
+    "cagna",
+    "caraculo",
+    "caralho",
+    "cazzata",
+    "cazzimma",
+    "cazzo",
+    "chatte",
+    "chiasse",
+    "chiavata",
+    "chier",
+    "chingadazos",
+    "chingaderita",
+    "chingar",
+    "chingo",
+    "chingues",
+    "chink",
+    "chod",
+    "chootia",
+    "chootiya",
+    "clit",
+    "clito",
+    "cock",
+    "coglione",
+    "cona",
+    "connard",
+    "connasse",
+    "conne",
+    "couilles",
+    "cracker",
+    "crap",
+    "culattone",
+    "culero",
+    "culo",
+    "cunt",
+    "damn",
+    "deich",
+    "depp",
+    "dick",
+    "dildo",
+    "dyke",
+    "encule",
+    "enema",
+    "enfoire",
+    "estupido",
+    "etron",
+    "fica",
+    "ficker",
+    "figa",
+    "foda",
+    "foder",
+    "fottere",
+    "fottersi",
+    "fotze",
+    "foutre",
+    "frocio",
+    "froscio",
+    "fuck",
+    "gandu",
+    "gouine",
+    "grognasse",
+    "harami",
+    "haramzade",
+    "hundin",
+    "idiot",
+    "imbecile",
+    "jerk",
+    "jizz",
+    "kamine",
+    "kike",
+    "leccaculo",
+    "mamahuevo",
+    "mamon",
+    "masturbate",
+    "masturbation",
+    "merda",
+    "merde",
+    "merdoso",
+    "mierda",
+    "mignotta",
+    "minchia",
+    "mist",
+    "muschi",
+    "neger",
+    "negre",
+    "negro",
+    "nerchia",
+    "nigger",
+    "orgasm",
+    "palle",
+    "paneleiro",
+    "patakha",
+    "pecorina",
+    "pendejo",
+    "penis",
+    "pipi",
+    "pirla",
+    "piscio",
+    "pisser",
+    "polla",
+    "pompino",
+    "poop",
+    "porca",
+    "porn",
+    "porra",
+    "pouffiasse",
+    "prick",
+    "pussy",
+    "puta",
+    "putain",
+    "pute",
+    "putiza",
+    "puttana",
+    "queca",
+    "randi",
+    "rape",
+    "recchione",
+    "retard",
+    "rompiballe",
+    "ruffiano",
+    "sacanagem",
+    "salaud",
+    "salope",
+    "saugnapf",
+    "sbattere",
+    "sbattersi",
+    "sborra",
+    "sborrone",
+    "scheise",
+    "scheisse",
+    "schlampe",
+    "schwachsinnig",
+    "schwanz",
+    "scopare",
+    "scopata",
+    "sexy",
+    "shit",
+    "slut",
+    "spompinare",
+    "stronza",
+    "stronzo",
+    "stupid",
+    "succhiami",
+    "sucker",
+    "tapette",
+    "testicle",
+    "tette",
+    "topa",
+    "tringler",
+    "troia",
+    "trombare",
+    "turd",
+    "twat",
+    "vaffanculo",
+    "vagina",
+    "verdammt",
+    "verga",
+    "wank",
+    "wichsen",
+    "xana",
+    "xochota",
+    "zizi",
+    "zoccola",
+}
diff --git a/sqids/sqids.py b/sqids/sqids.py
index 1e3b8bc..89d51e9 100644
--- a/sqids/sqids.py
+++ b/sqids/sqids.py
@@ -1,6 +1,13 @@
-from typing import List, Set
+from typing import List, Set, Tuple
 import sys
-from .constants import DEFAULT_ALPHABET, DEFAULT_BLOCKLIST, DEFAULT_MIN_LENGTH
+from .constants import (
+    DEFAULT_ALPHABET,
+    DEFAULT_BLOCKLIST,
+    DEFAULT_MIN_LENGTH,
+    _exact_match,
+    _match_at_ends,
+    _match_anywhere,
+)
 
 DIGITS = set("0123456789")
 
@@ -30,33 +37,38 @@ def __init__(
                 f"Minimum length has to be between 0 and {MIN_LENGTH_LIMIT}"
             )
 
-        exact_match: Set[str] = set()
-        match_at_ends: Set[str] = set()
-        match_anywhere: Set[str] = set()
-        alphabet_lower = set(alphabet.lower())
-        for word in blocklist:
-            if len(word) < 3:
-                continue
-            elif len(word) == 3:
-                exact_match.add(word.lower())
-                continue
-
-            word_lower = word.lower()
-            word_lower_set = set(word_lower)
-            if word_lower_set & alphabet_lower != word_lower_set:
-                continue
-
-            if word_lower_set & DIGITS:
-                match_at_ends.add(word_lower)
-            else:
-                match_anywhere.add(word_lower)
+        # When the blocklist and alphabet are defaults, use pre-filtered blocklists.
+        if blocklist is DEFAULT_BLOCKLIST and alphabet is DEFAULT_ALPHABET:
+            self.__blocklist_exact_match: Set[str] = _exact_match
+            self.__blocklist_match_at_ends: Tuple[str, ...] = _match_at_ends
+            self.__blocklist_match_anywhere: Set[str] = _match_anywhere
+        else:
+            alphabet_lower = set(alphabet.lower())
+            exact_match: Set[str] = set()
+            match_at_ends: Set[str] = set()
+            match_anywhere: Set[str] = set()
+            for word in blocklist:
+                if len(word) < 3:
+                    continue
+                word_lower = word.lower()
+                word_lower_set = set(word_lower)
+                if word_lower_set & alphabet_lower != word_lower_set:
+                    continue
+
+                if len(word) == 3:
+                    exact_match.add(word.lower())
+                elif word_lower_set & DIGITS:
+                    match_at_ends.add(word_lower)
+                else:
+                    match_anywhere.add(word_lower)
+
+            self.__blocklist_exact_match = exact_match
+            # When matching at the ends, `.startswith()` and `.endswith()` need a tuple.
+            self.__blocklist_match_at_ends = tuple(match_at_ends)
+            self.__blocklist_match_anywhere = match_anywhere
 
         self.__alphabet = self.__shuffle(alphabet)
         self.__min_length = min_length
-        self.__blocklist_exact_match = exact_match
-        # When matching at the ends, `.startswith()` and `.endswith()` need a tuple.
-        self.__blocklist_match_at_ends = tuple(match_at_ends)
-        self.__blocklist_match_anywhere = match_anywhere
 
     def encode(self, numbers: List[int]) -> str:
         if not numbers:
diff --git a/tests/test_blocklist.py b/tests/test_blocklist.py
index a385070..594de59 100644
--- a/tests/test_blocklist.py
+++ b/tests/test_blocklist.py
@@ -1,3 +1,6 @@
+import pathlib
+import sys
+
 import pytest
 from sqids import Sqids
 
@@ -70,6 +73,16 @@ def test_blocklist_filtering_in_constructor():
     assert numbers == [1, 2, 3]
 
 
+@pytest.mark.parametrize("word", ("ab!", "abc!", "xyz"))
+def test_alphabet_is_not_superset_of_blocklist_word_characters(word):
+    """Verify that a non-subset blocklist word is ignored."""
+
+    sqids = Sqids(alphabet="abc", blocklist=[word])
+    assert sqids._Sqids__blocklist_exact_match == set()
+    assert sqids._Sqids__blocklist_match_at_ends == tuple()
+    assert sqids._Sqids__blocklist_match_anywhere == set()
+
+
 def test_max_encoding_attempts():
     alphabet = "abc"
     min_length = 3
@@ -91,3 +104,16 @@ def test_small_words_are_ignored():
     assert id_ == "bM"
     id_ = Sqids(blocklist=[id_]).encode([0])
     assert id_ == "bM"
+
+
+def test_constants_file_is_pristine():
+    """Verify the constants file is pristine."""
+
+    repo_root = pathlib.Path(__file__).parent.parent
+    sys.path.append(str(repo_root / "assets"))
+    import filter_blocklist
+
+    sets = filter_blocklist.filter_blocklist()
+    new_text = filter_blocklist.generate_new_constants_file(*sets)
+    error_message = "You must run assets/filter_blocklist.py!"
+    assert filter_blocklist.constants_path.read_text() == new_text, error_message