Skip to content

Commit

Permalink
Provide support for text-2
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-volkov committed Nov 21, 2022
1 parent d53399b commit 663bc97
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 24 deletions.
30 changes: 19 additions & 11 deletions .github/workflows/integrate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: mrkkrp/ormolu-action@v4
- uses: mrkkrp/ormolu-action@v7

build:

strategy:
matrix:
ghc: ['8.4.2', '9.0.1']
include:
- ghc: 8.4.2
text: 1.2.5.0
- ghc: 9.4.2
text: 2.0.1

runs-on: ubuntu-latest

Expand All @@ -26,15 +30,19 @@ jobs:
- uses: actions/checkout@v2

- name: Setup Haskell
uses: haskell/actions/setup@v1.2.1
uses: haskell/actions/setup@v2
with:
ghc-version: ${{ matrix.ghc }}
cabal-version: 3.4
cabal-version: 3.8

- run: cabal v2-update --enable-tests --enable-benchmarks
- run: cabal v2-freeze --enable-tests --enable-benchmarks

- uses: actions/cache@v2.1.6
- run: |
echo "packages: ." > cabal.project
echo "constraints:" >> cabal.project
echo " text == ${{ matrix.text }}" >> cabal.project
- run: cabal update --enable-tests --enable-benchmarks
- run: cabal freeze --enable-tests --enable-benchmarks

- uses: actions/cache@v2
with:
path: |
~/.cabal/store
Expand All @@ -43,7 +51,7 @@ jobs:
restore-keys: |
${{ runner.os }}-${{ matrix.ghc }}-
- run: cabal v2-build --enable-tests --enable-benchmarks
- run: cabal v2-test --enable-tests --enable-benchmarks
- run: cabal v2-haddock --enable-tests --enable-benchmarks
- run: cabal build --enable-tests --enable-benchmarks
- run: cabal test --enable-tests --enable-benchmarks

- run: cabal haddock --enable-tests --enable-benchmarks
3 changes: 1 addition & 2 deletions bench/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ main =
. Char8ByteString.length
. encodeWithJsonifier
sample sampleName sampleData =
"- " <> TextBuilder.text sampleName <> ": "
<> sampleDataSize sampleData
"- " <> TextBuilder.text sampleName <> ": " <> sampleDataSize sampleData
in "Input data sizes report:\n"
<> sample "twitter with 1 objects" twitter1Data
<> "\n"
Expand Down
52 changes: 49 additions & 3 deletions cbits/json_allocation.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
static const int allocation_by_septet[128] =
{6,6,6,6,6,6,6,6,6,2,2,6,6,2,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};

int count_string_allocation
// UTF-16

int measure_utf16_text_
(
const uint16_t *src_ptr,
const uint16_t *end_ptr
Expand Down Expand Up @@ -42,7 +44,7 @@ int count_string_allocation
return allocation;
}

int count_string_allocation_off_len
int measure_utf16_text_off_len
(
const uint16_t *src_ptr,
size_t src_off,
Expand All @@ -52,5 +54,49 @@ int count_string_allocation_off_len
src_ptr += src_off;
const uint16_t *end_ptr = src_ptr + src_len;

return count_string_allocation(src_ptr, end_ptr);
return measure_utf16_text_(src_ptr, end_ptr);
}

// UTF-8

int measure_utf8_text_
(
const uint8_t *src_ptr,
const uint8_t *end_ptr
)
{
size_t allocation = 0;

while (src_ptr < end_ptr) {
uint8_t x = *src_ptr;

if (x < 0x80) {
allocation += allocation_by_septet[x];
src_ptr += 1;
} else if (x < 0xE0) {
allocation += 2;
src_ptr += 2;
} else if (x < 0xF0) {
allocation += 3;
src_ptr += 3;
} else {
allocation += 4;
src_ptr += 4;
}
}

return allocation;
}

int measure_utf8_text_off_len
(
const uint8_t *src_ptr,
size_t src_off,
size_t src_len
)
{
src_ptr += src_off;
const uint8_t *end_ptr = src_ptr + src_len;

return measure_utf8_text_(src_ptr, end_ptr);
}
68 changes: 67 additions & 1 deletion cbits/json_encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static const bool pass_through_by_septet[128] =
static const uint16_t two_byte_seq_by_septet[128] =
{0,0,0,0,0,0,0,0,0,slash_t_seq_def,slash_n_seq_def,0,0,slash_r_seq_def,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,slash_doublequote_seq_def,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,slash_slash_seq_def};

uint8_t* encode_utf16_as_string
uint8_t* encode_utf16_text
(
uint8_t *dest,
const uint16_t *src,
Expand Down Expand Up @@ -93,3 +93,69 @@ uint8_t* encode_utf16_as_string

return dest;
}

uint8_t* encode_utf8_text
(
uint8_t *dest,
const uint8_t *src,
size_t src_offset,
size_t src_length
)
{

src += src_offset;

const uint8_t *src_end = src + src_length;

// Write double quote
*dest++ = 34;

while (src < src_end) {
uint8_t x = *src;

if (x < 0x80) {
if (pass_through_by_septet[x]) {
*dest++ = x;
} else {
uint16_t two_byte_seq = two_byte_seq_by_septet[x];
if (two_byte_seq) {
*((uint16_t*) dest) = two_byte_seq;
dest += 2;
} else {
// \u
*((uint16_t*) dest) = slash_u_seq;

// hex encoding of 4 nibbles
*(dest + 2) = digits[x >> 12 & 0xF];
*(dest + 3) = digits[x >> 8 & 0xF];
*(dest + 4) = digits[x >> 4 & 0xF];
*(dest + 5) = digits[x & 0xF];
dest += 6;
}
}
src++;
} else if (x < 0xE0) {
*dest = x;
*(dest + 1) = *(src + 1);
dest += 2;
src += 2;
} else if (x < 0xF0) {
*dest = x;
*(dest + 1) = *(src + 1);
*(dest + 2) = *(src + 2);
dest += 3;
src += 3;
} else {
*dest = x;
*(dest + 1) = *(src + 1);
*(dest + 2) = *(src + 2);
*(dest + 3) = *(src + 3);
dest += 4;
src += 4;
}
}

*dest++ = 34;

return dest;
}
2 changes: 1 addition & 1 deletion jsonifier.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ library
, bytestring >=0.10.10 && <0.12
, ptr-poker ^>=0.1.2.2
, scientific ^>=0.3.6.2
, text >=1 && <2
, text >=1 && <3

test-suite demo
import: base-settings
Expand Down
21 changes: 17 additions & 4 deletions library/Jsonifier/Ffi.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE UnliftedFFITypes #-}

module Jsonifier.Ffi where
Expand All @@ -6,8 +7,20 @@ import Foreign.C
import GHC.Base (ByteArray#, MutableByteArray#)
import Jsonifier.Prelude

foreign import ccall unsafe "static count_string_allocation_off_len"
countStringAllocationSize :: ByteArray# -> CSize -> CSize -> IO CInt
#if MIN_VERSION_text (2, 0, 0)

foreign import ccall unsafe "static encode_utf16_as_string"
encodeString :: Ptr Word8 -> ByteArray# -> CSize -> CSize -> IO (Ptr Word8)
foreign import ccall unsafe "static measure_utf8_text_off_len"
countTextEncoding :: ByteArray# -> CSize -> CSize -> IO CInt

foreign import ccall unsafe "static encode_utf8_text"
encodeText :: Ptr Word8 -> ByteArray# -> CSize -> CSize -> IO (Ptr Word8)

#else

foreign import ccall unsafe "static measure_utf16_text_off_len"
countTextEncoding :: ByteArray# -> CSize -> CSize -> IO CInt

foreign import ccall unsafe "static encode_utf16_text"
encodeText :: Ptr Word8 -> ByteArray# -> CSize -> CSize -> IO (Ptr Word8)

#endif
2 changes: 1 addition & 1 deletion library/Jsonifier/Poke.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ string :: Text -> Poke
string =
Text.destruct $ \arr off len ->
Poke $ \ptr ->
Ffi.encodeString ptr arr (fromIntegral off) (fromIntegral len)
Ffi.encodeText ptr arr (fromIntegral off) (fromIntegral len)

-- |
-- > "key":value
Expand Down
2 changes: 1 addition & 1 deletion library/Jsonifier/Size.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ commas rowsAmount =
stringBody :: Text -> Int
stringBody =
Text.destruct $ \arr off len ->
Ffi.countStringAllocationSize
Ffi.countTextEncoding
arr
(fromIntegral off)
(fromIntegral len)
Expand Down

0 comments on commit 663bc97

Please sign in to comment.