Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
More robust filtering, word additions, bugfix.
Browse files Browse the repository at this point in the history
  • Loading branch information
finnbear committed Mar 4, 2021
1 parent 749a37d commit d4d42d7
Show file tree
Hide file tree
Showing 11 changed files with 21,105 additions and 23,389 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Accuracy was evaluated based on the first 100,000 items from this [dataset of mo

|**Package**|**Time**|**Accuracy**|**Comment**|
|:-----:|:-----:|:-----:|:-----:|
|[finnbear/moderation](https://github.com/finnbear/moderation)|1.23s|92.71%|Current API version is not stable|
|[finnbear/moderation](https://github.com/finnbear/moderation)|1.52s|91.18%|Current API version is not stable|
|[TwinProduction/go-away](https://github.com/TwinProduction/go-away)|2.20s|82.14%|Many false positives from combined words like "push it"|


Expand Down
3 changes: 2 additions & 1 deletion examples/detection_finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ func init() {

func main() {
fmt.Println("Original phrase: " + input)
fmt.Printf("Inappropriate: %t\n", moderation.IsInappropriate(input))
censored, numCensored := moderation.Censor(input, moderation.Inappropriate)
fmt.Printf("Censored phrase: %s (%d characters replaced)\n", censored, numCensored)

shorter := input
for moderation.Is(shorter, moderation.Any) { // satisfies all bitmasks
for moderation.Is(shorter, moderation.Any) {
input = shorter
shorter = shorter[:len(shorter)-1]
}
Expand Down
4 changes: 4 additions & 0 deletions generator/dictionary_blacklist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ acock
arsehole
asshole
(.*)bastard(.*)
bitchain
bitchar
bitchi
bitches(.*)
bitcho
Expand Down Expand Up @@ -31,6 +33,8 @@ jackass
livesex
masturbate
motherfuck(.*)
necrap
nidiot
(.*)nigger(.*)
nude(.*)
penises
Expand Down
9 changes: 3 additions & 6 deletions generator/generate.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

295 changes: 151 additions & 144 deletions generator/profanity.csv
Original file line number Diff line number Diff line change
@@ -1,160 +1,167 @@
word,profane,offensive,sexual,mean
anal,1,0,1,0
anus,1,0,0,0
arse,1,0,0,0
ass,1,0,0,0
anal,2,0,2,0
anus,2,0,0,0
arse,2,0,0,0
ass,2,0,0,0
balls,0,0,1,0
ballsack,0,0,2,0
bastard,1,2,0,0
biatch,0,1,1,0
bich,0,1,1,0
bitch,0,1,1,0
bloody,1,0,0,0
blowjob,0,0,2,0
bollock,1,0,1,0
bollok,1,0,1,0
boner,0,0,2,0
boob,0,0,2,0
breast,0,0,1,0
btch,0,1,1,0
bugger,1,0,0,0
bullshit,1,0,0,0
ballsack,0,0,3,0
bastard,2,3,0,0
biatch,0,2,2,0
bich,0,2,2,0
bitch,0,2,2,0
bloody,2,0,0,0
blowjob,0,0,3,0
bollock,2,0,2,0
bollok,2,0,2,0
boner,0,0,3,0
boob,0,0,3,0
breast,0,0,2,0
btch,0,2,2,0
bugger,2,0,0,0
bullshit,2,0,0,0
butt,1,0,0,0
bra,0,0,1,0
chink,0,2,0,0
clit,0,0,2,0
cock,1,0,1,0
commie,0,1,0,0
condom,0,0,1,0
coon,0,3,0,0
cracker,0,1,0,0
brainless,0,0,0,1
chink,0,3,0,0
clit,0,0,3,0
cock,2,0,2,0
commie,0,2,0,0
condom,0,0,2,0
coon,0,5,0,0
coward,0,0,0,1
cracker,0,2,0,0
crap,1,0,0,0
cum,0,0,2,0
cunt,0,1,1,0
cum,0,0,3,0
cunt,0,2,2,0
damn,1,0,0,0
daygo,0,2,0,0
dego,0,2,0,0
dick,1,0,1,0
dildo,0,0,2,0
dominatrics,0,0,2,0
dominatrix,0,0,2,0
dong,0,0,1,0
douche,1,1,1,1
daygo,0,3,0,0
dego,0,3,0,0
dick,2,0,2,0
dildo,0,0,3,0
dominatrics,0,0,3,0
dominatrix,0,0,3,0
dong,0,0,2,0
douche,1,1,2,2
dumb,0,0,0,1
dumbass,1,0,0,1
dyke,1,1,1,0
effing,1,0,1,0
ejaculat,0,0,1,0
fascist,0,1,0,0
fag,0,3,0,0
fatty,0,1,0,1
feck,1,0,1,0
felching,0,0,2,0
felate,0,0,2,0
felatio,0,0,2,0
flange,0,0,2,0
foursome,0,0,2,0
freak,0,0,0,1
dumbass,2,0,0,2
dyke,2,2,2,0
effing,2,0,2,0
ejaculat,0,0,2,0
fascist,0,2,0,0
fashist,0,2,0,0
fag,0,3,1,0
fatty,0,2,0,2
feck,2,0,2,0
felching,0,0,3,0
felate,0,0,3,0
felatio,0,0,3,0
flange,0,0,3,0
foursome,0,0,3,0
freak,0,0,0,2
frick,1,0,0,0
frig,1,0,1,0
fuck,1,0,1,0
fudgepacker,0,0,2,0
fuk,1,0,1,0
gangbang,0,2,0,0
gaay,0,1,0,0
genital,0,0,1,0
ghetto,0,1,0,0
handjob,0,0,2,0
hate,0,0,0,1
frig,1,0,2,0
fuck,2,0,2,0
fudgepacker,0,0,3,0
fuk,2,0,2,0
gangbang,0,3,0,0
gaay,0,2,0,0
genital,0,0,2,0
getlost,0,0,0,2
ghetto,0,2,0,0
handjob,0,0,3,0
hate,0,0,0,2
hell,1,0,0,0
heshe,0,0,1,0
hoar,0,2,2,0
hoe,0,1,1,0
homo,0,1,1,0
honkey,0,1,0,0
honkie,0,1,0,0
hooker,0,0,2,0
horny,0,0,2,0
heshe,0,0,2,0
hoar,0,3,2,0
hoe,0,2,1,0
homo,0,2,1,0
honkey,0,2,0,0
honkie,0,2,0,0
hooker,0,0,3,0
horny,0,0,3,0
idiot,0,0,0,1
incest,0,0,2,0
imecile,0,0,0,2
incest,0,0,3,0
jackass,1,0,0,1
jerk,1,0,0,0
jigaboo,0,2,0,0
jizz,0,0,2,0
killyourself,0,1,0,2
labia,0,0,2,0
loser,0,0,0,1
masterbat,0,0,1,0
masturbat,0,0,1,0
moron,0,0,0,1
motherfuck,1,1,1,0
muff,0,0,2,0
naked,0,0,1,0
nazi,0,1,0,0
nicca,1,2,0,0
nigga,1,2,0,0
nigger,2,3,0,0
nigglet,0,2,0,0
nigguh,1,2,0,0
nigr,2,3,0,0
nigs,1,2,0,0
nude,0,0,2,0
orgasm,0,0,1,0
peepee,1,0,1,0
penis,2,0,2,0
penus,2,0,2,0
phuc,1,0,1,0
phuk,1,0,1,0
jerk,0,0,0,1
jigaboo,0,3,0,0
jizz,0,0,3,0
killyourself,0,2,0,3
labia,0,0,3,0
loser,0,0,0,2
masterbat,0,0,2,0
masturbat,0,0,2,0
moron,0,0,0,2
motherfuck,2,2,2,0
muff,0,0,3,0
naked,0,0,2,0
nazi,0,2,0,0
nicca,2,3,0,0
nig,2,3,0,0
nigga,2,3,0,0
nigger,3,5,0,0
nigglet,0,3,0,0
nigguh,2,3,0,0
nigr,3,5,0,0
nigs,2,3,0,0
nude,0,0,3,0
orgasm,0,0,2,0
peepee,2,0,2,0
penis,1,0,3,0
penus,1,0,3,0
phuc,2,0,2,0
phuk,2,0,2,0
pieceofgarbage,0,0,0,1
piss,1,0,0,0
poop,1,0,0,0
porn,0,0,2,0
pp,0,0,1,0
pregnant,0,0,1,0
prick,2,0,0,0
prostitut,0,0,2,0
pube,0,0,2,0
pussies,0,1,1,0
pusy,0,1,1,0
queer,0,2,0,0
rape,0,0,1,0
rectum,0,0,1,0
recktum,0,0,1,0
retard,0,1,0,1
retart,0,1,0,1
rimjob,0,0,2,0
scrotum,0,0,2,0
scum,0,0,1,1
semen,0,0,2,0
porn,0,0,3,0
pp,0,0,2,0
pregnant,0,0,2,0
prick,2,0,0,1
prostitut,0,0,3,0
pube,0,0,3,0
pussies,0,2,2,0
pusy,0,2,2,0
queer,0,3,0,0
rape,0,0,3,0
rectum,0,0,2,0
recktum,0,0,2,0
retard,0,2,0,2
retart,0,2,0,2
rimjob,0,0,3,0
scrotum,0,0,3,0
scum,0,0,2,2
semen,0,0,3,0
sex,0,0,1,0
shagger,0,0,2,0
shagging,0,0,2,0
shit,1,0,0,0
sissy,0,0,0,1
skank,0,1,2,0
slut,0,1,1,0
sodomize,0,0,2,0
sodomy,0,0,2,0
spunk,0,0,2,0
stfu,0,0,0,1
stupid,0,0,0,1
suckit,0,0,1,1
suckmy,0,0,2,0
testical,0,0,2,0
testicle,0,0,2,0
threesome,0,0,2,0
tit,0,0,2,0
tohell,1,1,0,0
tosser,1,0,0,0
shagger,0,0,3,0
shagging,0,0,3,0
shit,2,0,0,0
sissy,0,0,0,2
skank,0,2,3,0
slut,0,2,2,0
sodomize,0,0,3,0
sodomy,0,0,3,0
spunk,0,0,3,0
stfu,0,0,0,2
stupid,0,0,0,2
suckit,0,0,2,2
suckmy,0,0,3,0
testical,0,0,3,0
testicle,0,0,3,0
threesome,0,0,3,0
tit,0,0,3,0
tohell,2,2,0,0
tosser,2,0,0,0
turd,1,0,0,0
twat,0,0,2,0
ugly,0,0,0,1
vagina,0,0,2,0
vulva,0,0,2,0
wank,0,0,2,0
whitetrash,0,1,0,0
whore,0,2,2,0
wigga,1,2,0,0
wigger,1,2,0,0
xrated,0,0,1,0
xxx,0,0,1,0
turnmeon,0,0,2,0
twat,0,0,3,0
ugly,0,0,0,2
vagina,0,0,3,0
vulva,0,0,3,0
wank,0,0,3,0
whitetrash,0,2,0,0
whore,0,3,3,0
wigga,2,3,0,0
wigger,2,3,0,0
xrated,0,0,2,0
xxx,0,0,2,0
12 changes: 12 additions & 0 deletions internal/radix/match.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package radix

type Match struct {
Node *Node
Length int // how many matchable characters contributed
Replaced bool // whether a replacement character contributed
Separate bool // false if the match came after another caracter (no space/separation)
}

func (match Match) EqualsExceptLength(other Match) bool {
return match.Node == other.Node && match.Replaced == other.Replaced && match.Separate == other.Separate
}
Loading

0 comments on commit d4d42d7

Please sign in to comment.