Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
Tweaks and comparison code.
Browse files Browse the repository at this point in the history
finnbear committed Dec 30, 2020

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent d463687 commit 68353b4
Showing 6 changed files with 91 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -55,7 +55,7 @@ $ go run hello_world.go
Accuracy was evaluated based on a [dataset of moderated comments](https://raw.githubusercontent.com/vzhou842/profanity-check/master/profanity_check/data/clean_data.csv).
**Package**|**Accuracy**|**Comment**
:-----:|:-----:|:-----:
https://github.com/finnbear/moderation|90.50%|This repository
https://github.com/finnbear/moderation|90.50%|Current version is not stable
https://github.com/TwinProduction/go-away|74.83%|Many false positives from combined words like "push it"


71 changes: 71 additions & 0 deletions comparison/comparison_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package moderation

import (
"encoding/csv"
"io"
"os"
"testing"

"github.com/finnbear/moderation"
"github.com/TwinProduction/go-away"
)

func moderationIsInappropriate(phrase string) bool {
return moderation.Analyze(phrase).IsInappropriate()
}

func TestModerationWikipedia(t *testing.T) {
testWikipedia(t, moderationIsInappropriate)
}

func TestGoAwayWikipedia(t *testing.T) {
testWikipedia(t, goaway.IsProfane)
}

func testWikipedia(t *testing.T, isInappropriate func(string) bool) {
wikiModerationData, err := os.Open("../wikipedia-test.csv")
if err != nil {
t.Skip()
}
reader := csv.NewReader(wikiModerationData)

correct := 0
total := 0

for total < 50000 {
fields, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}
t.Error(err)
}
phrase := fields[1]
offensive := fields[0] == "1"
inappropriate := isInappropriate(phrase)
if inappropriate == offensive {
correct++
} else {
//fmt.Printf("phrase=\"%s\" analysis offensive=%v actual offensive=%v", phrase, inappropriate, offensive)
}

total++
}

accuracy := 100 * float64(correct) / float64(total)

// Wikipedia takes into account more than whether the text contains
// bad words
const requiredAccuracy = 90

if accuracy >= requiredAccuracy {
t.Logf("accuracy was %f%% (%d%% required)\n", accuracy, requiredAccuracy)
} else {
t.Errorf("accuracy was %f%% (%d%% required)\n", accuracy, requiredAccuracy)
}

err = wikiModerationData.Close()
if err != nil {
t.Error(err)
}
}
8 changes: 8 additions & 0 deletions comparison/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module comparison

go 1.15

require (
github.com/TwinProduction/go-away v1.0.1
github.com/finnbear/moderation v0.5.0
)
8 changes: 8 additions & 0 deletions comparison/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
github.com/TwinProduction/go-away v1.0.1 h1:LDe6jPktucIz/dftNGL5x2LYRB6VXjVUtRsrlVHo+Ag=
github.com/TwinProduction/go-away v1.0.1/go.mod h1:VB/lNzhkzh7Xw2QgU+tYBjMheldukJaIJzVaIx2rh30=
github.com/finnbear/moderation v0.5.0 h1:k00252U3XaworO6EN/VRf1hasn0pcANWe7tNjEoyzsc=
github.com/finnbear/moderation v0.5.0/go.mod h1:zoexQHUm4TZCb9x/Re0TqV8SgvnDPZjMRdSMAjEqmgE=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4 h1:0YWbFKbhXG/wIiuHDSKpS0Iy7FSA+u45VtBMfQcFTTc=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
2 changes: 1 addition & 1 deletion moderation.go
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ var replacements = [...]string{
'6': "b",
'(': "c",
'<': "c",
'3': "e",
'3': "eg",
'9': "gq",
'#': "h",
'1': "li",
3 changes: 2 additions & 1 deletion moderation_test.go
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ func TestAnalyze(t *testing.T) {
{"what a bunch of bullsh1t", true},
{"bitčh", true},
{"assassin", false},
{"push it", false},
{"carcass", false},
{"I had called upon my friend, Mr. Sherlock Holmes, one day in the autumn of last year and found him in deep conversation with a very stout, florid-faced, elderly gentleman with fiery red hair.", false},
{"With an apology for my intrusion, I was about to withdraw when Holmes pulled me abruptly into the room and closed the door behind me.", false},
@@ -92,7 +93,7 @@ func TestAnalyzeWikipedia(t *testing.T) {
if analysis.IsInappropriate() == offensive {
correct++
} else {
//t.Errorf("phrase=\"%s\" analysis offensive=%v actual offensive=%v", phrase, analysis.IsOffensive(), offensive)
//fmt.Printf("phrase=\"%s\" analysis offensive=%v actual offensive=%v", phrase, analysis.IsInappropriate(), offensive)
}

total++

0 comments on commit 68353b4

Please sign in to comment.