Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
Update wordlists and tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
finnbear committed Feb 12, 2021
1 parent 29020fc commit 2f9547f
Show file tree
Hide file tree
Showing 4 changed files with 963 additions and 216 deletions.
1 change: 1 addition & 0 deletions generator/dictionary_blacklist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ livesex
(.*)nigger(.*)
nude(.*)
penises
penist
poop(.*)
porn(.?)
(.*)prostitut(.*)
Expand Down
9 changes: 7 additions & 2 deletions generator/profanity.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ balls,0,0,1,0
ballsack,0,0,2,0
bastard,1,2,0,0
biatch,0,1,1,0
bich,0,1,1,0
bitch,0,1,1,0
bloody,1,0,0,0
blowjob,0,0,2,0
Expand All @@ -19,14 +20,16 @@ bugger,1,0,0,0
bullshit,1,0,0,0
butt,1,0,0,0
clitoris,0,0,2,0
cock,1,0,0,0
cock,1,0,1,0
cok,1,0,1,0
condom,0,0,1,0
coon,0,3,0,0
crap,1,0,0,0
cum,0,0,2,0
cunt,0,1,1,0
dick,1,0,1,0
dildo,0,0,2,0
dong,0,0,1,0
dumb,0,0,0,1
dyke,1,1,1,0
effing,1,0,1,0
Expand All @@ -39,6 +42,7 @@ flange,0,0,2,0
frig,1,0,1,0
fuck,1,0,1,0
fudgepacker,0,0,2,0
fuk,1,0,1,0
gangbang,0,2,0,0
handjob,0,0,2,0
heshe,0,0,1,0
Expand All @@ -65,8 +69,9 @@ prick,2,0,0,0
prostitut,0,0,2,0
pube,0,0,2,0
pussies,0,1,1,0
pussy,0,1,1,0
pusy,0,1,1,0
queer,0,2,0,0
rape,0,0,1,0
retard,0,1,0,1
rimjob,0,0,2,0
scrotum,0,0,2,0
Expand Down
19 changes: 19 additions & 0 deletions moderation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,11 @@ func TestAnalyzeWikipedia(t *testing.T) {
reader := csv.NewReader(wikiModerationData)

correct := 0
correctOk := 0
correctNok := 0
total := 0
totalOk := 0
totalNok := 0

for total < 50000 {
fields, err := reader.Read()
Expand All @@ -112,14 +116,26 @@ func TestAnalyzeWikipedia(t *testing.T) {
offensive := fields[0] == "1"
if Is(phrase, Profane|Offensive|Sexual|Mean) == offensive {
correct++
if offensive {
correctNok++
} else {
correctOk++
}
} else {
//fmt.Printf("phrase=\"%s\" analysis offensive=%v actual offensive=%v", phrase, analysis.IsInappropriate(), offensive)
}

total++
if offensive {
totalNok++
} else {
totalOk++
}
}

accuracy := 100 * float64(correct) / float64(total)
accuracyOk := 100 * float64(correctOk) / float64(totalOk)
accuracyNok := 100 * float64(correctNok) / float64(totalNok)

// Wikipedia takes into account more than whether the text contains
// bad words
Expand All @@ -131,6 +147,9 @@ func TestAnalyzeWikipedia(t *testing.T) {
t.Errorf("accuracy was %f%% (%d%% required)\n", accuracy, requiredAccuracy)
}

t.Logf("positive accuracy was %f%%\n", accuracyNok)
t.Logf("negative accuracy was %f%%\n", accuracyOk)

err = wikiModerationData.Close()
if err != nil {
t.Error(err)
Expand Down
Loading

0 comments on commit 2f9547f

Please sign in to comment.