Skip to content

Commit

Permalink
all tests ok
Browse files Browse the repository at this point in the history
  • Loading branch information
Joao Paulo Magalhaes committed Jan 27, 2024
1 parent e87adc9 commit 1f6d83e
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 63 deletions.
48 changes: 45 additions & 3 deletions src/c4/yml/filter_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ struct FilterProcessorSrcDst
++wpos;
rpos += 2;
}
C4_ALWAYS_INLINE void translate_esc(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
C4_ALWAYS_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
{
RYML_ASSERT(nw > 0);
RYML_ASSERT(nr > 0);
Expand All @@ -111,6 +111,10 @@ struct FilterProcessorSrcDst
wpos += nw;
rpos += 1 + nr;
}
C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
{
translate_esc_bulk(s, nw, nr);
}
};


Expand Down Expand Up @@ -226,7 +230,7 @@ struct FilterProcessorInplaceEndExtending
++wpos;
}

C4_NO_INLINE void translate_esc(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
{
RYML_ASSERT(nw > 0);
RYML_ASSERT(nr > 0);
Expand All @@ -241,9 +245,25 @@ struct FilterProcessorInplaceEndExtending
rpos = rpos_next;
wpos = wpos_next;
}

C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
{
translate_esc_bulk(s, nw, nr);
}
};


/** Filters in place. The result may be larger than the source, and
* extending may happen anywhere. As a result some characters may be
* left unfiltered when there is no slack in the buffer and the
* write-position would overlap the read-position. Consequently, it's
* possible for characters to be left unfiltered. In YAML, this
* happens only with double-quoted strings, and only with a small
* number of escape sequences such as \L which is substituted by three
* bytes. These escape sequences cause a call to translate_esc_extending()
* which is the only entry point to this unfiltered situation.
*
* @see FilterProcessorInplaceMidExtending */
struct FilterProcessorInplaceMidExtending
{
substr src; ///< the subject string
Expand Down Expand Up @@ -388,7 +408,29 @@ struct FilterProcessorInplaceMidExtending
maxcap = wpos > maxcap ? wpos : maxcap;
}

C4_NO_INLINE void translate_esc(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
C4_NO_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
{
RYML_ASSERT(nw > 0);
RYML_ASSERT(nr > 0);
RYML_ASSERT(nr+1u >= nw);
const size_t wpos_next = wpos + nw;
const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character
if(wpos_next <= wcap) // respect write-capacity
{
if((wpos <= rpos) && !unfiltered_chars) // write only if wpos is behind rpos
memcpy(src.str + wpos, s, nw);
}
else
{
_c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}<wcap={}) maxcap={}->{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap);
unfiltered_chars = true;
}
rpos = rpos_next;
wpos = wpos_next;
maxcap = wpos > maxcap ? wpos : maxcap;
}

C4_NO_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
{
RYML_ASSERT(nw > 0);
RYML_ASSERT(nr > 0);
Expand Down
14 changes: 7 additions & 7 deletions src/c4/yml/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4896,7 +4896,7 @@ void Parser::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
uint8_t byteval = {};
if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
_c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
proc.translate_esc((const char*)&byteval, 1u, /*nread*/3u);
proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
_c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
}
else if(next == 'u') // UTF16
Expand All @@ -4910,7 +4910,7 @@ void Parser::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
_c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
C4_ASSERT(numbytes <= 4);
proc.translate_esc(readbuf, numbytes, /*nread*/5u);
proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
}
else if(next == 'U') // UTF32
{
Expand All @@ -4923,7 +4923,7 @@ void Parser::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
_c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
C4_ASSERT(numbytes <= 4);
proc.translate_esc(readbuf, numbytes, /*nread*/9u);
proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
}
// https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
else if(next == '0')
Expand Down Expand Up @@ -4957,7 +4957,7 @@ void Parser::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
_RYML_CHCONST(-0x3e, 0xc2),
_RYML_CHCONST(-0x60, 0xa0),
};
proc.translate_esc(payload, /*nwrite*/2, /*nread*/1);
proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
}
else if(next == 'N') // unicode next line \u0085
{
Expand All @@ -4966,7 +4966,7 @@ void Parser::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
_RYML_CHCONST(-0x3e, 0xc2),
_RYML_CHCONST(-0x7b, 0x85),
};
proc.translate_esc(payload, /*nwrite*/2, /*nread*/1);
proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
}
else if(next == 'L') // unicode line separator \u2028
{
Expand All @@ -4976,7 +4976,7 @@ void Parser::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
_RYML_CHCONST(-0x80, 0x80),
_RYML_CHCONST(-0x58, 0xa8),
};
proc.translate_esc(payload, /*nwrite*/3, /*nread*/1);
proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
}
else if(next == 'P') // unicode paragraph separator \u2029
{
Expand All @@ -4986,7 +4986,7 @@ void Parser::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
_RYML_CHCONST(-0x80, 0x80),
_RYML_CHCONST(-0x57, 0xa9),
};
proc.translate_esc(payload, /*nwrite*/3, /*nread*/1);
proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
}
else if(next == '\0')
{
Expand Down
32 changes: 28 additions & 4 deletions test/test_block_folded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,16 @@ void test_filter_inplace(blockfolded_case const& blcase)
if(blcase.input.len >= blcase.expected.len)
{
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
c4::substr dst = to_substr(subject_);
Parser proc = {};
FilterResult result = proc.filter_scalar_block_folded_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
Parser parser1 = {};
FilterResult result = parser1.filter_scalar_block_folded_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
ASSERT_TRUE(result.valid());
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_folded(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
const csubstr out = result.get();
if(blcase.chomp != CHOMP_CLIP)
{
Expand All @@ -54,13 +60,19 @@ void test_filter_inplace(blockfolded_case const& blcase)
{
SCOPED_TRACE("spare size");
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
subject_.resize(blcase.expected.len + 30);
c4::substr dst = to_substr(subject_).first(blcase.input.len);
c4::substr rem = to_substr(subject_).sub(blcase.expected.len);
rem.fill('^');
Parser proc = {};
FilterResult result = proc.filter_scalar_block_folded_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
Parser parser1 = {};
FilterResult result = parser1.filter_scalar_block_folded_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
ASSERT_TRUE(result.valid());
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_folded(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
const csubstr out = result.get();
if(blcase.chomp != CHOMP_CLIP)
{
Expand All @@ -75,11 +87,17 @@ void test_filter_inplace(blockfolded_case const& blcase)
{
SCOPED_TRACE("trimmed size");
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
subject_.resize(blcase.expected.len);
c4::substr dst = to_substr(subject_).first(blcase.input.len);
Parser proc = {};
FilterResult result = proc.filter_scalar_block_folded_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
ASSERT_TRUE(result.valid());
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_folded(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
const csubstr out = result.get();
if(blcase.chomp != CHOMP_CLIP)
{
Expand All @@ -93,9 +111,15 @@ void test_filter_inplace(blockfolded_case const& blcase)
{
SCOPED_TRACE("insufficient size");
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
c4::substr dst = to_substr(subject_);
Parser proc = {};
FilterResult result = proc.filter_scalar_block_folded_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_folded(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
if(blcase.chomp != CHOMP_CLIP)
{
EXPECT_EQ(result.required_len(), blcase.expected.len);
Expand Down
25 changes: 25 additions & 0 deletions test/test_block_literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ void test_filter_src_dst(blocklit_case const& blcase)
RYML_TRACE_FMT("\nstr=[{}]~~~{}~~~\nexp=[{}]~~~{}~~~", blcase.input.len, blcase.input, blcase.expected.len, blcase.expected);
std::string subject_;
subject_.resize(2 * blcase.input.size());
std::string subject_2 = subject_;
c4::substr dst = to_substr(subject_);
Parser proc = {};
FilterResult result = proc.filter_scalar_block_literal(blcase.input, dst, blcase.indentation, blcase.chomp);
Expand All @@ -36,10 +37,16 @@ void test_filter_inplace(blocklit_case const& blcase)
if(blcase.input.len >= blcase.expected.len)
{
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
c4::substr dst = to_substr(subject_);
Parser proc = {};
FilterResult result = proc.filter_scalar_block_literal_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
ASSERT_TRUE(result.valid());
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_literal(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
const csubstr out = result.get();
if(blcase.chomp != CHOMP_CLIP)
{
Expand All @@ -55,13 +62,19 @@ void test_filter_inplace(blocklit_case const& blcase)
{
SCOPED_TRACE("spare size");
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
subject_.resize(blcase.expected.len + 30);
c4::substr dst = to_substr(subject_).first(blcase.input.len);
c4::substr rem = to_substr(subject_).sub(blcase.expected.len);
rem.fill('^');
Parser proc = {};
FilterResult result = proc.filter_scalar_block_literal_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
ASSERT_TRUE(result.valid());
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_literal(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
const csubstr out = result.get();
if(blcase.chomp != CHOMP_CLIP)
{
Expand All @@ -76,11 +89,17 @@ void test_filter_inplace(blocklit_case const& blcase)
{
SCOPED_TRACE("trimmed size");
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
subject_.resize(blcase.expected.len);
c4::substr dst = to_substr(subject_).first(blcase.input.len);
Parser proc = {};
FilterResult result = proc.filter_scalar_block_literal_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
ASSERT_TRUE(result.valid());
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_literal(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
const csubstr out = result.get();
if(blcase.chomp != CHOMP_CLIP)
{
Expand All @@ -94,10 +113,16 @@ void test_filter_inplace(blocklit_case const& blcase)
{
SCOPED_TRACE("insufficient size");
std::string subject_(blcase.input.str, blcase.input.len);
std::string subject_2 = subject_;
c4::substr dst = to_substr(subject_);
Parser proc = {};
FilterResult result = proc.filter_scalar_block_literal_in_place(dst, subject_.size(), blcase.indentation, blcase.chomp);
ASSERT_FALSE(result.valid());
Parser parser2 = {};
Tree tree = parser2.parse_in_arena("file", "# set the tree in the parser");
csubstr sresult = parser2._filter_scalar_block_literal(to_substr(subject_2), blcase.chomp, blcase.indentation);
EXPECT_GE(result.required_len(), blcase.expected.len);
EXPECT_EQ(sresult.len, result.str.len);
if(blcase.chomp != CHOMP_CLIP)
{
EXPECT_EQ(result.required_len(), blcase.expected.len);
Expand Down
Loading

0 comments on commit 1f6d83e

Please sign in to comment.