diff --git a/README.md b/README.md index ec3b3d3..4e62f2f 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ $ go run hello_world.go Accuracy was evaluated based on a [dataset of moderated comments](https://raw.githubusercontent.com/vzhou842/profanity-check/master/profanity_check/data/clean_data.csv). **Package**|**Accuracy**|**Comment** :-----:|:-----:|:-----: -https://github.com/finnbear/moderation|90.50%|This repository +https://github.com/finnbear/moderation|90.50%|Current version is not stable https://github.com/TwinProduction/go-away|74.83%|Many false positives from combined words like "push it" diff --git a/comparison/comparison_test.go b/comparison/comparison_test.go new file mode 100644 index 0000000..4124f7f --- /dev/null +++ b/comparison/comparison_test.go @@ -0,0 +1,71 @@ +package moderation + +import ( + "encoding/csv" + "io" + "os" + "testing" + + "github.com/finnbear/moderation" + "github.com/TwinProduction/go-away" +) + +func moderationIsInappropriate(phrase string) bool { + return moderation.Analyze(phrase).IsInappropriate() +} + +func TestModerationWikipedia(t *testing.T) { + testWikipedia(t, moderationIsInappropriate) +} + +func TestGoAwayWikipedia(t *testing.T) { + testWikipedia(t, goaway.IsProfane) +} + +func testWikipedia(t *testing.T, isInappropriate func(string) bool) { + wikiModerationData, err := os.Open("../wikipedia-test.csv") + if err != nil { + t.Skip() + } + reader := csv.NewReader(wikiModerationData) + + correct := 0 + total := 0 + + for total < 50000 { + fields, err := reader.Read() + if err != nil { + if err == io.EOF { + break + } + t.Error(err) + } + phrase := fields[1] + offensive := fields[0] == "1" + inappropriate := isInappropriate(phrase) + if inappropriate == offensive { + correct++ + } else { + //fmt.Printf("phrase=\"%s\" analysis offensive=%v actual offensive=%v", phrase, inappropriate, offensive) + } + + total++ + } + + accuracy := 100 * float64(correct) / float64(total) + + // Wikipedia takes into account more than whether the text contains + // bad words + const requiredAccuracy = 90 + + if accuracy >= requiredAccuracy { + t.Logf("accuracy was %f%% (%d%% required)\n", accuracy, requiredAccuracy) + } else { + t.Errorf("accuracy was %f%% (%d%% required)\n", accuracy, requiredAccuracy) + } + + err = wikiModerationData.Close() + if err != nil { + t.Error(err) + } +} diff --git a/comparison/go.mod b/comparison/go.mod new file mode 100644 index 0000000..391da39 --- /dev/null +++ b/comparison/go.mod @@ -0,0 +1,8 @@ +module comparison + +go 1.15 + +require ( + github.com/TwinProduction/go-away v1.0.1 + github.com/finnbear/moderation v0.5.0 +) diff --git a/comparison/go.sum b/comparison/go.sum new file mode 100644 index 0000000..6bef5d1 --- /dev/null +++ b/comparison/go.sum @@ -0,0 +1,8 @@ +github.com/TwinProduction/go-away v1.0.1 h1:LDe6jPktucIz/dftNGL5x2LYRB6VXjVUtRsrlVHo+Ag= +github.com/TwinProduction/go-away v1.0.1/go.mod h1:VB/lNzhkzh7Xw2QgU+tYBjMheldukJaIJzVaIx2rh30= +github.com/finnbear/moderation v0.5.0 h1:k00252U3XaworO6EN/VRf1hasn0pcANWe7tNjEoyzsc= +github.com/finnbear/moderation v0.5.0/go.mod h1:zoexQHUm4TZCb9x/Re0TqV8SgvnDPZjMRdSMAjEqmgE= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.4 h1:0YWbFKbhXG/wIiuHDSKpS0Iy7FSA+u45VtBMfQcFTTc= +golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/moderation.go b/moderation.go index a988fb8..0c0254f 100644 --- a/moderation.go +++ b/moderation.go @@ -26,7 +26,7 @@ var replacements = [...]string{ '6': "b", '(': "c", '<': "c", - '3': "e", + '3': "eg", '9': "gq", '#': "h", '1': "li", diff --git a/moderation_test.go b/moderation_test.go index 1ebad5d..c086159 100644 --- a/moderation_test.go +++ b/moderation_test.go @@ -34,6 +34,7 @@ func TestAnalyze(t *testing.T) { {"what a bunch of bullsh1t", true}, {"bitĨh", true}, {"assassin", false}, + {"push it", false}, {"carcass", false}, {"I had called upon my friend, Mr. Sherlock Holmes, one day in the autumn of last year and found him in deep conversation with a very stout, florid-faced, elderly gentleman with fiery red hair.", false}, {"With an apology for my intrusion, I was about to withdraw when Holmes pulled me abruptly into the room and closed the door behind me.", false}, @@ -92,7 +93,7 @@ func TestAnalyzeWikipedia(t *testing.T) { if analysis.IsInappropriate() == offensive { correct++ } else { - //t.Errorf("phrase=\"%s\" analysis offensive=%v actual offensive=%v", phrase, analysis.IsOffensive(), offensive) + //fmt.Printf("phrase=\"%s\" analysis offensive=%v actual offensive=%v", phrase, analysis.IsInappropriate(), offensive) } total++