diff --git a/src/c4/yml/filter_processor.hpp b/src/c4/yml/filter_processor.hpp index c5993a7a0..f5380a1f1 100644 --- a/src/c4/yml/filter_processor.hpp +++ b/src/c4/yml/filter_processor.hpp @@ -138,7 +138,7 @@ struct FilterProcessorInplace , wcap(wcap_) , rpos(0) , wpos(0) - , maxcap(0) + , maxcap(src.len) , unfiltered_chars(false) { RYML_ASSERT(wcap >= src.len); @@ -153,10 +153,10 @@ struct FilterProcessorInplace C4_ALWAYS_INLINE FilterResultInPlace result() const noexcept { - _c4dbgip("inplace: wpos={} wcap={} unfiltered={}", this->wpos, this->wcap, this->unfiltered_chars); + _c4dbgip("inplace: wpos={} wcap={} unfiltered={} maxcap={}", this->wpos, this->wcap, this->unfiltered_chars, this->maxcap); FilterResultInPlace ret; ret.str.str = (wpos <= wcap && !unfiltered_chars) ? src.str : nullptr; - ret.str.len = wpos + unfiltered_chars; + ret.str.len = wpos; ret.reqlen = maxcap; return ret; } @@ -186,7 +186,7 @@ struct FilterProcessorInplace } else { - _c4dbgip("inplace: add unwritten {}->{}!", unfiltered_chars, true); + _c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap)); unfiltered_chars = true; } ++wpos; @@ -202,7 +202,7 @@ struct FilterProcessorInplace } else { - _c4dbgip("inplace: add unwritten {}->{}!", unfiltered_chars, true); + _c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+num > maxcap ? wpos+num : maxcap)); unfiltered_chars = true; } wpos += num; @@ -219,7 +219,7 @@ struct FilterProcessorInplace } else { - _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos maxcap ? wpos+1u : maxcap)); unfiltered_chars = true; } ++rpos; @@ -242,7 +242,7 @@ struct FilterProcessorInplace } else { - _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos{}!", unfiltered_chars, true); + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap)); unfiltered_chars = true; } rpos += 2; @@ -278,13 +279,15 @@ struct FilterProcessorInplace { if(wpos_next <= wcap) memcpy(src.str + wpos, s, nw); - wpos = wpos_next; rpos = rpos_next; + wpos = wpos_next; + maxcap = wpos > maxcap ? wpos : maxcap; } else // there is overlap. move the (to-be-read) string to the right. { const size_t excess = wpos_next - rpos_next; - if(src.len + excess <= wcap) // ensure we do not go past the end. + RYML_ASSERT(wpos_next > rpos_next); + if(src.len + excess <= wcap) // ensure we do not go past the end { RYML_ASSERT(rpos+nr+excess <= src.len); if(wpos_next <= wcap) @@ -296,23 +299,28 @@ struct FilterProcessorInplace else { rpos = rpos_next; - const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; - _c4dbgip("inplace: add unfiltered {}->{}!", unfiltered_chars, unfiltered_chars+unw); - unfiltered_chars += unw; + //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true); + unfiltered_chars = true; } + wpos = wpos_next; // extend the string up to capacity src.len += excess; + maxcap = wpos > maxcap ? wpos : maxcap; } else { + //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; + RYML_ASSERT(rpos_next <= src.len); + const size_t required_size = wpos_next + (src.len - rpos_next); + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, required_size); + RYML_ASSERT(required_size > wcap); + unfiltered_chars = true; + maxcap = required_size > maxcap ? required_size : maxcap; + wpos = wpos_next; rpos = rpos_next; - const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; - _c4dbgip("inplace: add unfiltered {}->{}!", unfiltered_chars, unfiltered_chars+unw); - unfiltered_chars += unw; } - wpos = wpos_next; } - maxcap = wpos > maxcap ? wpos : maxcap; } }; diff --git a/src/c4/yml/parse.cpp b/src/c4/yml/parse.cpp index fef37e10d..d5bf37e5f 100644 --- a/src/c4/yml/parse.cpp +++ b/src/c4/yml/parse.cpp @@ -5670,7 +5670,6 @@ csubstr Parser::_filter_scalar_dquot(substr s) else { const size_t len = r.required_len(); - _RYML_CB_ASSERT(this->callbacks(), s.len < len); _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len); _RYML_CB_ASSERT(this->callbacks(), m_tree); substr dst = m_tree->alloc_arena(len); @@ -5678,10 +5677,10 @@ csubstr Parser::_filter_scalar_dquot(substr s) _RYML_CB_ASSERT(this->callbacks(), dst.len == len); FilterResult rsd = this->filter_scalar_dquoted(s, dst); _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len); - _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() == len); + _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller! _RYML_CB_CHECK(m_stack.m_callbacks, rsd.valid()); _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); - return r.get(); + return rsd.get(); } } @@ -5704,7 +5703,7 @@ csubstr Parser::_filter_scalar_block_literal(substr s, BlockChomp_e chomp, size_ FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp); _RYML_CB_CHECK(m_stack.m_callbacks, rsd.valid()); _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); - return r.get(); + return rsd.get(); } } @@ -5727,7 +5726,7 @@ csubstr Parser::_filter_scalar_block_folded(substr s, BlockChomp_e chomp, size_t FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp); _RYML_CB_CHECK(m_stack.m_callbacks, rsd.valid()); _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); - return r.get(); + return rsd.get(); } } diff --git a/test/test_double_quoted.cpp b/test/test_double_quoted.cpp index 16f247e1e..c1f6651be 100644 --- a/test/test_double_quoted.cpp +++ b/test/test_double_quoted.cpp @@ -84,8 +84,8 @@ std::cout << "WTF1: input_sz=" << input_sz << " --> expected_sz=" << expected_ std::cout << "WTF2: input_sz=" << input_sz << " --> expected_sz=" << expected_sz << " " << subject_2.size() << "\n"; csubstr sresult = parser2._filter_scalar_dquot(to_substr(subject_2)); std::cout << "WTF3: input_sz=" << input_sz << " --> expected_sz=" << expected_sz << " " << subject_2.size() << "\n"; - EXPECT_EQ(result.required_len(), expected_sz); - EXPECT_EQ(sresult.len, expected_sz); + EXPECT_GE(result.required_len(), expected_sz); + EXPECT_EQ(sresult.len, result.str.len); if(result.valid()) { const csubstr out = result.get(); diff --git a/test/test_filter.cpp b/test/test_filter.cpp index 52385d2ed..1bc27a757 100644 --- a/test/test_filter.cpp +++ b/test/test_filter.cpp @@ -810,6 +810,215 @@ TEST(FilterProcessorInplace, set_after_translate_esc_bulk_excess__trimmed_capaci } } +TEST(FilterProcessorInplace, translate_esc_with_temporary_excess_requirement__trimmed_capacity) +{ + InplaceTester t("00112233445566"); + t.trim_capacity(); + EXPECT_EQ(t.proc.wcap, 14); + EXPECT_EQ(t.proc.rpos, 0); + EXPECT_EQ(t.proc.wpos, 0); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_FALSE(t.proc.unfiltered_chars); + EXPECT_EQ(t.proc.sofar(), ""); + EXPECT_EQ(t.proc.maxcap, 14); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.translate_esc("aaaa", /*nw*/4, /*nr*/1); + EXPECT_EQ(t.proc.rpos, 2); + EXPECT_EQ(t.proc.wpos, 4); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "0011"); + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 4); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "00112233445566"); + EXPECT_EQ(t.proc.maxcap, 16); // increased! + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.translate_esc('b'); // do not write! + EXPECT_EQ(t.proc.rpos, 4); + EXPECT_EQ(t.proc.wpos, 5); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "00112"); // must not set 'b' + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 5); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "00112233445566"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.translate_esc('c'); // do not write! + EXPECT_EQ(t.proc.rpos, 6); + EXPECT_EQ(t.proc.wpos, 6); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122"); // must not set 'c' + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 6); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "00112233445566"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.translate_esc('d'); // can write + EXPECT_EQ(t.proc.rpos, 8); + EXPECT_EQ(t.proc.wpos, 7); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122d"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 7); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122d3445566"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.translate_esc('e'); // can write + EXPECT_EQ(t.proc.rpos, 10); + EXPECT_EQ(t.proc.wpos, 8); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122de"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 8); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122de445566"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.translate_esc('f'); // can write + EXPECT_EQ(t.proc.rpos, 12); + EXPECT_EQ(t.proc.wpos, 9); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122def"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 9); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122def45566"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.translate_esc('g'); // can write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 10); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defg"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 10); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122defg5566"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.set('h'); // can write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 11); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defgh"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 11); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122defgh566"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.set('i'); // can write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 12); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defghi"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 12); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122defghi66"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.set('j'); // can write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 13); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defghij"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 13); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122defghij6"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.set('k'); // can write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 14); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 14); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122defghijk"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.set('!'); // cannot write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 15); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 15); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122defghijk"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.set('!'); // cannot write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 16); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 16); + EXPECT_EQ(t.proc.result().required_len(), 16); + EXPECT_EQ(t.subject, "001122defghijk"); + EXPECT_EQ(t.proc.maxcap, 16); + EXPECT_TRUE(t.proc.unfiltered_chars); + // 00112233445566 + // ^ (rpos) + // ^ (wpos) + t.proc.set('!'); // cannot write + EXPECT_EQ(t.proc.rpos, 14); + EXPECT_EQ(t.proc.wpos, 17); + EXPECT_EQ(t.proc.src.len, 14); + EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos + EXPECT_EQ(t.proc.result().str.str, nullptr); + EXPECT_EQ(t.proc.result().str.len, 17); + EXPECT_EQ(t.proc.result().required_len(), 17); // increased! + EXPECT_EQ(t.subject, "001122defghijk"); + EXPECT_EQ(t.proc.maxcap, 17); + EXPECT_TRUE(t.proc.unfiltered_chars); +} + TEST(FilterProcessorInplace, translate_esc_after_translate_esc_bulk_excess__trimmed_capacity) { {