Skip to content

Commit

Permalink
Speedup Trilogy#escape by 3 to 5x
Browse files Browse the repository at this point in the history
Copying characters one by one is very inneficient, especially since each
time the buffer capacity has to be checked again and again.

Using `memcpy` for chunks of bytes that don't need any escaping, we can
save a lot of time.

Before:

```
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
            2k-plain    23.802k i/100ms
       2k-few-escape    22.693k i/100ms
Calculating -------------------------------------
            2k-plain    239.746k (± 0.5%) i/s    (4.17 μs/i) -      1.214M in   5.063403s
       2k-few-escape    227.900k (± 0.6%) i/s    (4.39 μs/i) -      1.157M in   5.078454s
```

After:

```
Warming up --------------------------------------
            2k-plain    94.975k i/100ms
       2k-few-escape    61.998k i/100ms
Calculating -------------------------------------
            2k-plain      1.039M (± 1.0%) i/s -      5.224M in   5.025882s
       2k-few-escape    658.756k (± 1.3%) i/s -      3.348M in   5.082975s
```

Bench:

```ruby

require "trilogy"
require "benchmark/ips"
t = Trilogy.new(database: "test")

plain = "A" * 2000
few_escapes = (("a" * 19) + "\n") * 100
Benchmark.ips do |x|
  x.report("2k-plain") { t.escape(plain) }
  x.report("2k-few-escape") { t.escape(few_escapes) }
end
```
  • Loading branch information
byroot committed Dec 3, 2024
1 parent d22ed28 commit c0815c2
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 13 deletions.
13 changes: 13 additions & 0 deletions inc/trilogy/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@ int trilogy_buffer_expand(trilogy_buffer_t *buffer, size_t needed);
*/
int trilogy_buffer_putc(trilogy_buffer_t *buffer, uint8_t c);

/* trilogy_buffer_puts - Appends multiple bytes to the buffer, resizing the underlying
* allocation if necessary.
*
* buffer - A pointer to a pre-initialized trilogy_buffer_t.
* ptr - The pointer to the byte array.
* len - How many bytes to append.
*
* Return values:
* TRILOGY_OK - The character was appended to the buffer
* TRILOGY_SYSERR - A system error occurred, check errno.
*/
int trilogy_buffer_puts(trilogy_buffer_t *buffer, const uint8_t *ptr, size_t len);

/* trilogy_buffer_free - Free an trilogy_buffer_t's underlying storage. The buffer
* must be re-initialized with trilogy_buffer_init if it is to be reused. Any
* operations performed on an unintialized or freed buffer are undefined.
Expand Down
14 changes: 14 additions & 0 deletions src/buffer.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "trilogy/buffer.h"
#include "trilogy/error.h"
Expand Down Expand Up @@ -57,6 +58,19 @@ int trilogy_buffer_putc(trilogy_buffer_t *buffer, uint8_t c)
return TRILOGY_OK;
}

int trilogy_buffer_puts(trilogy_buffer_t *buffer, const uint8_t *ptr, size_t len)
{
int rc = trilogy_buffer_expand(buffer, len);
if (rc) {
return rc;
}

memcpy(buffer->buff + buffer->len, ptr, len);
buffer->len += len;

return TRILOGY_OK;
}

void trilogy_buffer_free(trilogy_buffer_t *buffer)
{
free(buffer->buff);
Expand Down
38 changes: 25 additions & 13 deletions src/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ int trilogy_drain_results(trilogy_conn_t *conn)
}
}

static uint8_t escape_lookup_table[256] = {
static const uint8_t escape_lookup_table[256] = {
['"'] = '"', ['\0'] = '0', ['\''] = '\'', ['\\'] = '\\', ['\n'] = 'n', ['\r'] = 'r', [26] = 'Z',
};

Expand All @@ -735,28 +735,40 @@ int trilogy_escape(trilogy_conn_t *conn, const char *str, size_t len, const char

b->len = 0;

if (conn->server_status & TRILOGY_SERVER_STATUS_NO_BACKSLASH_ESCAPES) {
for (size_t i = 0; i < len; i++) {
const uint8_t c = (uint8_t)str[i];
// Escaped string will be at least as large as the source string,
// so might as well pre-expand the buffer.
CHECKED(trilogy_buffer_expand(b, len));

uint8_t *cursor = (uint8_t *)str;
const uint8_t *end = cursor + len;

if (c == '\'') {
CHECKED(trilogy_buffer_putc(b, '\''));
CHECKED(trilogy_buffer_putc(b, '\''));
if (conn->server_status & TRILOGY_SERVER_STATUS_NO_BACKSLASH_ESCAPES) {
while (cursor < end) {
uint8_t *next_backslash = memchr(cursor, '\'', (size_t)(end - cursor));
if (next_backslash) {
CHECKED(trilogy_buffer_puts(b, cursor, next_backslash - cursor));
CHECKED(trilogy_buffer_puts(b, (uint8_t *)"\'\'", 2));
cursor = next_backslash + 1;
} else {
CHECKED(trilogy_buffer_putc(b, c));
CHECKED(trilogy_buffer_puts(b, cursor, (size_t)(end - cursor)));
break;
}
}
} else {
for (size_t i = 0; i < len; i++) {
const uint8_t c = (uint8_t)str[i];

uint8_t escaped = escape_lookup_table[(uint8_t)c];
while (cursor < end) {
uint8_t escaped;
const uint8_t *start = cursor;
while (cursor < end && !(escaped = escape_lookup_table[*cursor])) {
cursor++;
}

CHECKED(trilogy_buffer_puts(b, start, cursor - start));
if (escaped) {
CHECKED(trilogy_buffer_putc(b, '\\'));
CHECKED(trilogy_buffer_putc(b, escaped));
cursor++;
} else {
CHECKED(trilogy_buffer_putc(b, c));
break;
}
}
}
Expand Down
25 changes: 25 additions & 0 deletions test/buffer_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,31 @@ TEST test_buffer_putc()
PASS();
}

TEST test_buffer_puts()
{
trilogy_buffer_t buff;

int err = trilogy_buffer_init(&buff, 1);
ASSERT_OK(err);
ASSERT_EQ(0, buff.len);
ASSERT_EQ(1, buff.cap);

err = trilogy_buffer_puts(&buff, "aaaaBBBB", 4);
ASSERT_OK(err);
ASSERT_EQ(4, buff.len);
ASSERT_EQ(4, buff.cap);

err = trilogy_buffer_puts(&buff, "ccccccc", 8);
ASSERT_OK(err);
ASSERT_EQ(12, buff.len);
ASSERT_EQ(16, buff.cap);
ASSERT_MEM_EQ(buff.buff, "aaaaccccccc", 12);

trilogy_buffer_free(&buff);

PASS();
}

int buffer_test()
{
RUN_TEST(test_buffer_expand);
Expand Down

0 comments on commit c0815c2

Please sign in to comment.