From 5a6c6b9ca42dfb4a04ad696c7924e037ad176e8f Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 3 Dec 2024 12:29:22 +0100 Subject: [PATCH] Speedup Trilogy#escape by 3 to 5x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copying characters one by one is very inneficient, especially since each time the buffer capacity has to be checked again and again. Using `memcpy` for chunks of bytes that don't need any escaping, we can save a lot of time. Before: ``` ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- 2k-plain 23.802k i/100ms 2k-few-escape 22.693k i/100ms Calculating ------------------------------------- 2k-plain 239.746k (± 0.5%) i/s (4.17 μs/i) - 1.214M in 5.063403s 2k-few-escape 227.900k (± 0.6%) i/s (4.39 μs/i) - 1.157M in 5.078454s ``` After: ``` Warming up -------------------------------------- 2k-plain 94.975k i/100ms 2k-few-escape 61.998k i/100ms Calculating ------------------------------------- 2k-plain 1.039M (± 1.0%) i/s - 5.224M in 5.025882s 2k-few-escape 658.756k (± 1.3%) i/s - 3.348M in 5.082975s ``` Bench: ```ruby require "trilogy" require "benchmark/ips" t = Trilogy.new(database: "test") plain = "A" * 2000 few_escapes = (("a" * 19) + "\n") * 100 Benchmark.ips do |x| x.report("2k-plain") { t.escape(plain) } x.report("2k-few-escape") { t.escape(few_escapes) } end ``` --- inc/trilogy/buffer.h | 13 +++++++++++++ src/buffer.c | 14 ++++++++++++++ src/client.c | 34 +++++++++++++++++++++------------- test/buffer_test.c | 25 +++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 13 deletions(-) diff --git a/inc/trilogy/buffer.h b/inc/trilogy/buffer.h index 8e57a199..0ea35fa9 100644 --- a/inc/trilogy/buffer.h +++ b/inc/trilogy/buffer.h @@ -53,6 +53,19 @@ int trilogy_buffer_expand(trilogy_buffer_t *buffer, size_t needed); */ int trilogy_buffer_putc(trilogy_buffer_t *buffer, uint8_t c); +/* trilogy_buffer_puts - Appends multiple bytes to the buffer, resizing the underlying + * allocation if necessary. + * + * buffer - A pointer to a pre-initialized trilogy_buffer_t. + * ptr - The pointer to the byte array. + * len - How many bytes to append. + * + * Return values: + * TRILOGY_OK - The character was appended to the buffer + * TRILOGY_SYSERR - A system error occurred, check errno. + */ +int trilogy_buffer_puts(trilogy_buffer_t *buffer, const uint8_t *ptr, size_t len); + /* trilogy_buffer_free - Free an trilogy_buffer_t's underlying storage. The buffer * must be re-initialized with trilogy_buffer_init if it is to be reused. Any * operations performed on an unintialized or freed buffer are undefined. diff --git a/src/buffer.c b/src/buffer.c index a4c78393..f3195ba7 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,5 +1,6 @@ #include #include +#include #include "trilogy/buffer.h" #include "trilogy/error.h" @@ -57,6 +58,19 @@ int trilogy_buffer_putc(trilogy_buffer_t *buffer, uint8_t c) return TRILOGY_OK; } +int trilogy_buffer_puts(trilogy_buffer_t *buffer, const uint8_t *ptr, size_t len) +{ + int rc = trilogy_buffer_expand(buffer, len); + if (rc) { + return rc; + } + + memcpy(buffer->buff + buffer->len, ptr, len); + buffer->len += len; + + return TRILOGY_OK; +} + void trilogy_buffer_free(trilogy_buffer_t *buffer) { free(buffer->buff); diff --git a/src/client.c b/src/client.c index fbd286c5..2b5b8f27 100644 --- a/src/client.c +++ b/src/client.c @@ -722,7 +722,7 @@ int trilogy_drain_results(trilogy_conn_t *conn) } } -static uint8_t escape_lookup_table[256] = { +static const uint8_t escape_lookup_table[256] = { ['"'] = '"', ['\0'] = '0', ['\''] = '\'', ['\\'] = '\\', ['\n'] = 'n', ['\r'] = 'r', [26] = 'Z', }; @@ -735,28 +735,36 @@ int trilogy_escape(trilogy_conn_t *conn, const char *str, size_t len, const char b->len = 0; - if (conn->server_status & TRILOGY_SERVER_STATUS_NO_BACKSLASH_ESCAPES) { - for (size_t i = 0; i < len; i++) { - const uint8_t c = (uint8_t)str[i]; + uint8_t *cursor = (uint8_t *)str; + const uint8_t *end = cursor + len; - if (c == '\'') { - CHECKED(trilogy_buffer_putc(b, '\'')); - CHECKED(trilogy_buffer_putc(b, '\'')); + if (conn->server_status & TRILOGY_SERVER_STATUS_NO_BACKSLASH_ESCAPES) { + while (cursor < end) { + uint8_t *next_backslash = memchr(cursor, '\'', end - cursor); + if (next_backslash) { + CHECKED(trilogy_buffer_puts(b, cursor, next_backslash - cursor)); + CHECKED(trilogy_buffer_puts(b, (uint8_t *)"\'\'", 2)); + cursor = next_backslash + 1; } else { - CHECKED(trilogy_buffer_putc(b, c)); + CHECKED(trilogy_buffer_puts(b, cursor, end - cursor)); + break; } } } else { - for (size_t i = 0; i < len; i++) { - const uint8_t c = (uint8_t)str[i]; - - uint8_t escaped = escape_lookup_table[(uint8_t)c]; + while (cursor < end) { + uint8_t escaped; + const uint8_t *start = cursor; + while (cursor < end && !(escaped = escape_lookup_table[*cursor])) { + cursor++; + } + CHECKED(trilogy_buffer_puts(b, start, cursor - start)); if (escaped) { CHECKED(trilogy_buffer_putc(b, '\\')); CHECKED(trilogy_buffer_putc(b, escaped)); + cursor++; } else { - CHECKED(trilogy_buffer_putc(b, c)); + break; } } } diff --git a/test/buffer_test.c b/test/buffer_test.c index edbf1ce4..21f500cb 100644 --- a/test/buffer_test.c +++ b/test/buffer_test.c @@ -56,6 +56,31 @@ TEST test_buffer_putc() PASS(); } +TEST test_buffer_puts() +{ + trilogy_buffer_t buff; + + int err = trilogy_buffer_init(&buff, 1); + ASSERT_OK(err); + ASSERT_EQ(0, buff.len); + ASSERT_EQ(1, buff.cap); + + err = trilogy_buffer_puts(&buff, "aaaaBBBB", 4); + ASSERT_OK(err); + ASSERT_EQ(4, buff.len); + ASSERT_EQ(4, buff.cap); + + err = trilogy_buffer_puts(&buff, "ccccccc", 8); + ASSERT_OK(err); + ASSERT_EQ(12, buff.len); + ASSERT_EQ(16, buff.cap); + ASSERT_MEM_EQ(buff.buff, "aaaaccccccc", 12); + + trilogy_buffer_free(&buff); + + PASS(); +} + int buffer_test() { RUN_TEST(test_buffer_expand);