Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
Heavily optimize cpu time and allocations.
Browse files Browse the repository at this point in the history
  • Loading branch information
finnbear committed Dec 21, 2020
1 parent e7bb3ce commit 3d18f94
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 33 deletions.
10 changes: 5 additions & 5 deletions examples/hello_world.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ func main() {
printResult("assassin")

/*
Expected results:
"hello world" is appropriate.
"$#1t" is NOT appropriate.
"a$$" is NOT appropriate.
"assassin" is appropriate.
Expected results:
"hello world" is appropriate.
"$#1t" is NOT appropriate.
"a$$" is NOT appropriate.
"assassin" is appropriate.
*/
}

Expand Down
26 changes: 26 additions & 0 deletions examples/pprof.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package main

import (
"github.com/finnbear/moderation"
"log"
"os"
"runtime/pprof"
)

func main() {
f, err := os.Create("moderation.prof")
if err != nil {
log.Fatal("could not create CPU profile: ", err)
}
defer f.Close()
if err := pprof.StartCPUProfile(f); err != nil {
log.Fatal("could not start CPU profile: ", err)
}
defer pprof.StopCPUProfile()

for i := 0; i < 100000; i++ {
moderation.Analyze("hello")
moderation.Analyze("sh1t")
moderation.Analyze("Hello John Doe, I hope you're feeling well, as I come today bearing shitty news regarding your favorite chocolate chip cookie brand")
}
}
26 changes: 26 additions & 0 deletions internal/radix/buffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package radix

type Buffer struct {
Storage [longestWord * 2]*Node // * 2 because some characters turn into 2 matches
index int
}

func (buffer *Buffer) Append(node *Node) {
buffer.Storage[buffer.index] = node
buffer.index++
if buffer.index >= len(buffer.Storage) {
buffer.index = 0
}
}

func (buffer *Buffer) Clear() {
buffer.index = 0
}

func (buffer *Buffer) Get(index int) *Node {
return buffer.Storage[index]
}

func (buffer *Buffer) Len() int {
return buffer.index
}
2 changes: 1 addition & 1 deletion internal/radix/radix.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package radix

const (
alphabet = 26
longestWord = 28
longestWord = 25
chMax = 1 + byte('z') - byte('a')
chOffset = byte('a')
)
Expand Down
62 changes: 35 additions & 27 deletions moderation.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func init() {
}
}

var replacements = map[byte]string{
var replacements = [...]string{
'!': "li",
'@': "a",
'4': "a",
Expand All @@ -45,11 +45,14 @@ var removeAccentsTransform = transform.Chain(norm.NFD, runes.Remove(runes.In(uni

// Analyze analyzes a given phrase for moderation purposes
func Analyze(text string) (analysis Analysis) {
text, _, _ = transform.String(removeAccentsTransform, text)
buf := make([]byte, 0, len(text))
_, n, _ := transform.Append(removeAccentsTransform, buf, []byte(text))
text = string(buf[:n])
lastSepMin := 0
lastSepMax := 0

matches := make([]*radix.Node, 0, len(text))
var matchesGet, matchesPut radix.Buffer

var lastMatchable byte
for _, textRune := range text {
if textRune >= 0x0020 && textRune <= 0x007E {
Expand All @@ -62,15 +65,18 @@ func Analyze(text string) (analysis Analysis) {
matchable := false
skippable := false

replacement, replaceable := replacements[textByte]
var replacement string
if int(textByte) < len(replacements) {
replacement = replacements[textByte]
}

switch {
case textByte >= 'a' && textByte <= 'z':
matchable = true
case textByte >= 'A' && textByte <= 'Z':
textByte += 'a' - 'A'
matchable = true
case replaceable:
case replacement != "":
textByte = replacement[0]
textBytes = replacement
matchable = true
Expand All @@ -85,49 +91,51 @@ func Analyze(text string) (analysis Analysis) {
}
}

if len(textBytes) < 1 {
textBytes += string(textByte)
}

if textByte == lastMatchable {
lastSepMin-- // this character doesn't count
}

if ok {
matches = append(matches, tree.Root())
if matchable {
for matchIndex, match := range matches {
if match == nil {
continue
matchesGet.Append(tree.Root())

for i := 0; i < matchesGet.Len(); i++ {
match := matchesGet.Get(i)

if textByte == lastMatchable {
matchesPut.Append(match)
}

for i := 0; i < len(textBytes); i++ {
textByte := textBytes[i]
next := match.Next(textByte)
// Process textBytes as multiple textBytes or textByte
loops := 1
if len(textBytes) > 1 {
loops = len(textBytes)
}

for i := 0; i < loops; i++ {
loopTextByte := textByte
if len(textBytes) > 0 {
loopTextByte = textBytes[i]
}
next := match.Next(loopTextByte)

if next != nil {
if next.Word() {
if next.Data() == 0 {
// clear
} else if next.Depth() > 4 || (next.Depth() > 3 && next.Start() != 's') || (next.Depth() >= lastSepMin && next.Depth() <= lastSepMax) {
if next.Depth() > 4 || (next.Depth() > 3 && next.Start() != 's') || (next.Depth() >= lastSepMin && next.Depth() <= lastSepMax) {
analysis.InappropriateLevel += int(next.Data())
}
}
}

if textByte == lastMatchable || i > 1 {
if next != nil {
matches = append(matches, next)
}
} else {
matches[matchIndex] = next
matchesPut.Append(next)
}
}
}

lastMatchable = textByte
matchesGet = matchesPut
matchesPut.Clear()
} else if !skippable {
matches = matches[:0]
matchesGet.Clear()
}
}
}
Expand Down
94 changes: 94 additions & 0 deletions moderation_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package moderation

import (
"testing"
)

func IsProfane(phrase string) bool {
return Analyze(phrase).IsInappropriate()
}

// from https://github.com/TwinProduction/go-away/blob/master/goaway_bench_test.go
func BenchmarkIsProfaneWhenShortStringHasNoProfanity(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("aaaaaaaaaaaaaa")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenShortStringHasProfanityAtTheStart(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("fuckaaaaaaaaaa")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenShortStringHasProfanityInTheMiddle(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("aaaaafuckaaaaa")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenShortStringHasProfanityAtTheEnd(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("aaaaaaaaaafuck")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenMediumStringHasNoProfanity(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("How are you doing today?")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenMediumStringHasProfanityAtTheStart(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("Shit, you're cute today.")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenMediumStringHasProfanityInTheMiddle(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("How are you fu ck doing?")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenMediumStringHasProfanityAtTheEnd(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("you're cute today. Fuck.")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenLongStringHasNoProfanity(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie brand")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenLongStringHasProfanityAtTheStart(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("Fuck John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie brand")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenLongStringHasProfanityInTheMiddle(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing shitty news regarding your favorite chocolate chip cookie brand")
}
b.ReportAllocs()
}

func BenchmarkIsProfaneWhenLongStringHasProfanityAtTheEnd(b *testing.B) {
for n := 0; n < b.N; n++ {
IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie bitch")
}
b.ReportAllocs()
}

0 comments on commit 3d18f94

Please sign in to comment.