Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
Add more confusable characters to the list of replacements.
Browse files Browse the repository at this point in the history
  • Loading branch information
finnbear committed Mar 17, 2021
1 parent 8e6ef79 commit eb9d89c
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Accuracy was evaluated based on the first 100,000 items from this [dataset of mo

|**Package**|**Time**|**Accuracy**|**Comment**|
|:-----:|:-----:|:-----:|:-----:|
|[finnbear/moderation](https://github.com/finnbear/moderation)|1.66s|91.12%|Current API version is not stable|
|[finnbear/moderation](https://github.com/finnbear/moderation)|1.62s|91.13%|Current API version is not stable|
|[TwinProduction/go-away](https://github.com/TwinProduction/go-away)|2.20s|82.14%|Many false positives from combined words like "push it"|


Expand Down
16 changes: 16 additions & 0 deletions moderation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"os"
"strings"
"testing"
)

Expand Down Expand Up @@ -74,6 +75,21 @@ func TestAnalyze(t *testing.T) {
}
}

func TestRedundantReplacement(t *testing.T) {
for c, s := range replacements {
if strings.ContainsRune(s, rune(c)) {
t.Errorf("byte replacement %s is redundant\n", string([]byte{byte(c)}))
t.Fail()
}
}
for c, s := range runeReplacements {
if strings.ContainsRune(s, c) {
t.Errorf("rune replacement %s is redundant\n", string([]rune{c}))
t.Fail()
}
}
}

func ExampleIs_shit_profane() {
fmt.Println(Is("shit", Profane))
// Output: true
Expand Down
40 changes: 39 additions & 1 deletion replacements.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ var (
'ℬ': "b",
'ℭ': "c",
'℮': "e",
'e': "e",
'ℰ': "e",
'ℱ': "f",
'ℳ': "m",
Expand All @@ -120,5 +119,44 @@ var (
'ⅇ': "e",
'ⅈ': "i",
'ⅉ': "ji",

// Confusable: http://www.unicode.org/reports/tr36/confusables.txt
'е': "e",
'о': "o",
'ѕ': "s",
'х': "x",
'і': "i",
'ј': "j",
'р': "p",
'с': "c",
'у': "y",
'ѵ': "v",
'ɑ': "a",
'ɡ': "g",
'ɩ': "li",
'ɒ': "o",
'г': "r",
'π': "n",
'ո': "n",
'հ': "h",
'ս': "u",
'ց': "g",
'ք': "fp",
'ყ': "y",
'୦': "o",
'০': "o",
'੦': "o",
'౦': "o",
'೦': "o",
'๐': "o",
'໐': "o",
'᠐': "o",
'〇': "o",
'օ': "o",
'б': "b",
'৪': "b",
'৭': "g",
'੧': "g",
'୨': "g",
}
)

0 comments on commit eb9d89c

Please sign in to comment.