Skip to content

Commit

Permalink
fix wip
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed Jan 25, 2024
1 parent e7b0662 commit 6b9d5d9
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 25 deletions.
44 changes: 26 additions & 18 deletions src/c4/yml/filter_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ struct FilterProcessorInplace
, wcap(wcap_)
, rpos(0)
, wpos(0)
, maxcap(0)
, maxcap(src.len)
, unfiltered_chars(false)
{
RYML_ASSERT(wcap >= src.len);
Expand All @@ -153,10 +153,10 @@ struct FilterProcessorInplace

C4_ALWAYS_INLINE FilterResultInPlace result() const noexcept
{
_c4dbgip("inplace: wpos={} wcap={} unfiltered={}", this->wpos, this->wcap, this->unfiltered_chars);
_c4dbgip("inplace: wpos={} wcap={} unfiltered={} maxcap={}", this->wpos, this->wcap, this->unfiltered_chars, this->maxcap);
FilterResultInPlace ret;
ret.str.str = (wpos <= wcap && !unfiltered_chars) ? src.str : nullptr;
ret.str.len = wpos + unfiltered_chars;
ret.str.len = wpos;
ret.reqlen = maxcap;
return ret;
}
Expand Down Expand Up @@ -186,7 +186,7 @@ struct FilterProcessorInplace
}
else
{
_c4dbgip("inplace: add unwritten {}->{}!", unfiltered_chars, true);
_c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap));
unfiltered_chars = true;
}
++wpos;
Expand All @@ -202,7 +202,7 @@ struct FilterProcessorInplace
}
else
{
_c4dbgip("inplace: add unwritten {}->{}!", unfiltered_chars, true);
_c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+num > maxcap ? wpos+num : maxcap));
unfiltered_chars = true;
}
wpos += num;
Expand All @@ -219,7 +219,7 @@ struct FilterProcessorInplace
}
else
{
_c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}<wcap={})!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap);
_c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}<wcap={}) maxcap={}->{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap));
unfiltered_chars = true;
}
++rpos;
Expand All @@ -242,7 +242,7 @@ struct FilterProcessorInplace
}
else
{
_c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}<wcap={})!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap);
_c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}<wcap={}) maxcap={}->{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap);
unfiltered_chars = true;
}
rpos += num;
Expand All @@ -252,14 +252,15 @@ struct FilterProcessorInplace

void translate_esc(char c) noexcept
{
RYML_ASSERT(rpos + 2 <= src.len);
if(wpos < wcap) // respect write-capacity
{
if(wpos <= rpos)
src.str[wpos] = c;
}
else
{
_c4dbgip("inplace: add unfiltered {}->{}!", unfiltered_chars, true);
_c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap));
unfiltered_chars = true;
}
rpos += 2;
Expand All @@ -278,13 +279,15 @@ struct FilterProcessorInplace
{
if(wpos_next <= wcap)
memcpy(src.str + wpos, s, nw);
wpos = wpos_next;
rpos = rpos_next;
wpos = wpos_next;
maxcap = wpos > maxcap ? wpos : maxcap;
}
else // there is overlap. move the (to-be-read) string to the right.
{
const size_t excess = wpos_next - rpos_next;
if(src.len + excess <= wcap) // ensure we do not go past the end.
RYML_ASSERT(wpos_next > rpos_next);
if(src.len + excess <= wcap) // ensure we do not go past the end
{
RYML_ASSERT(rpos+nr+excess <= src.len);
if(wpos_next <= wcap)
Expand All @@ -296,23 +299,28 @@ struct FilterProcessorInplace
else
{
rpos = rpos_next;
const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0;
_c4dbgip("inplace: add unfiltered {}->{}!", unfiltered_chars, unfiltered_chars+unw);
unfiltered_chars += unw;
//const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0;

Check notice

Code scanning / CodeQL

Commented-out code Note

This comment appears to contain commented-out code.
_c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true);
unfiltered_chars = true;
}
wpos = wpos_next;
// extend the string up to capacity
src.len += excess;
maxcap = wpos > maxcap ? wpos : maxcap;
}
else
{
//const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0;

Check notice

Code scanning / CodeQL

Commented-out code Note

This comment appears to contain commented-out code.
RYML_ASSERT(rpos_next <= src.len);
const size_t required_size = wpos_next + (src.len - rpos_next);
_c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, required_size);
RYML_ASSERT(required_size > wcap);
unfiltered_chars = true;
maxcap = required_size > maxcap ? required_size : maxcap;
wpos = wpos_next;
rpos = rpos_next;
const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0;
_c4dbgip("inplace: add unfiltered {}->{}!", unfiltered_chars, unfiltered_chars+unw);
unfiltered_chars += unw;
}
wpos = wpos_next;
}
maxcap = wpos > maxcap ? wpos : maxcap;
}
};

Expand Down
9 changes: 4 additions & 5 deletions src/c4/yml/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5670,18 +5670,17 @@ csubstr Parser::_filter_scalar_dquot(substr s)
else
{
const size_t len = r.required_len();
_RYML_CB_ASSERT(this->callbacks(), s.len < len);
_c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
_RYML_CB_ASSERT(this->callbacks(), m_tree);
substr dst = m_tree->alloc_arena(len);
_c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
_RYML_CB_ASSERT(this->callbacks(), dst.len == len);
FilterResult rsd = this->filter_scalar_dquoted(s, dst);
_c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
_RYML_CB_ASSERT(this->callbacks(), rsd.required_len() == len);
_RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
_RYML_CB_CHECK(m_stack.m_callbacks, rsd.valid());
_c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
return r.get();
return rsd.get();
}
}

Expand All @@ -5704,7 +5703,7 @@ csubstr Parser::_filter_scalar_block_literal(substr s, BlockChomp_e chomp, size_
FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
_RYML_CB_CHECK(m_stack.m_callbacks, rsd.valid());
_c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
return r.get();
return rsd.get();
}
}

Expand All @@ -5727,7 +5726,7 @@ csubstr Parser::_filter_scalar_block_folded(substr s, BlockChomp_e chomp, size_t
FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
_RYML_CB_CHECK(m_stack.m_callbacks, rsd.valid());
_c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
return r.get();
return rsd.get();
}
}

Expand Down
4 changes: 2 additions & 2 deletions test/test_double_quoted.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ std::cout << "WTF1: input_sz=" << input_sz << " --> expected_sz=" << expected_
std::cout << "WTF2: input_sz=" << input_sz << " --> expected_sz=" << expected_sz << " " << subject_2.size() << "\n";
csubstr sresult = parser2._filter_scalar_dquot(to_substr(subject_2));
std::cout << "WTF3: input_sz=" << input_sz << " --> expected_sz=" << expected_sz << " " << subject_2.size() << "\n";
EXPECT_EQ(result.required_len(), expected_sz);
EXPECT_EQ(sresult.len, expected_sz);
EXPECT_GE(result.required_len(), expected_sz);
EXPECT_EQ(sresult.len, result.str.len);
if(result.valid())
{
const csubstr out = result.get();
Expand Down
209 changes: 209 additions & 0 deletions test/test_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,215 @@ TEST(FilterProcessorInplace, set_after_translate_esc_bulk_excess__trimmed_capaci
}
}

TEST(FilterProcessorInplace, translate_esc_with_temporary_excess_requirement__trimmed_capacity)
{
InplaceTester t("00112233445566");
t.trim_capacity();
EXPECT_EQ(t.proc.wcap, 14);
EXPECT_EQ(t.proc.rpos, 0);
EXPECT_EQ(t.proc.wpos, 0);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_FALSE(t.proc.unfiltered_chars);
EXPECT_EQ(t.proc.sofar(), "");
EXPECT_EQ(t.proc.maxcap, 14);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.translate_esc("aaaa", /*nw*/4, /*nr*/1);
EXPECT_EQ(t.proc.rpos, 2);
EXPECT_EQ(t.proc.wpos, 4);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "0011");
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 4);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "00112233445566");
EXPECT_EQ(t.proc.maxcap, 16); // increased!
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.translate_esc('b'); // do not write!
EXPECT_EQ(t.proc.rpos, 4);
EXPECT_EQ(t.proc.wpos, 5);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "00112"); // must not set 'b'
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 5);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "00112233445566");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.translate_esc('c'); // do not write!
EXPECT_EQ(t.proc.rpos, 6);
EXPECT_EQ(t.proc.wpos, 6);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122"); // must not set 'c'
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 6);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "00112233445566");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.translate_esc('d'); // can write
EXPECT_EQ(t.proc.rpos, 8);
EXPECT_EQ(t.proc.wpos, 7);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122d"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 7);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122d3445566");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.translate_esc('e'); // can write
EXPECT_EQ(t.proc.rpos, 10);
EXPECT_EQ(t.proc.wpos, 8);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122de"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 8);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122de445566");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.translate_esc('f'); // can write
EXPECT_EQ(t.proc.rpos, 12);
EXPECT_EQ(t.proc.wpos, 9);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122def"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 9);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122def45566");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.translate_esc('g'); // can write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 10);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defg"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 10);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122defg5566");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.set('h'); // can write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 11);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defgh"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 11);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122defgh566");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.set('i'); // can write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 12);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defghi"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 12);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122defghi66");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.set('j'); // can write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 13);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defghij"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 13);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122defghij6");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.set('k'); // can write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 14);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 14);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122defghijk");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.set('!'); // cannot write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 15);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 15);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122defghijk");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.set('!'); // cannot write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 16);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 16);
EXPECT_EQ(t.proc.result().required_len(), 16);
EXPECT_EQ(t.subject, "001122defghijk");
EXPECT_EQ(t.proc.maxcap, 16);
EXPECT_TRUE(t.proc.unfiltered_chars);
// 00112233445566
// ^ (rpos)
// ^ (wpos)
t.proc.set('!'); // cannot write
EXPECT_EQ(t.proc.rpos, 14);
EXPECT_EQ(t.proc.wpos, 17);
EXPECT_EQ(t.proc.src.len, 14);
EXPECT_EQ(t.proc.sofar(), "001122defghijk"); // can set because now wpos < rpos
EXPECT_EQ(t.proc.result().str.str, nullptr);
EXPECT_EQ(t.proc.result().str.len, 17);
EXPECT_EQ(t.proc.result().required_len(), 17); // increased!
EXPECT_EQ(t.subject, "001122defghijk");
EXPECT_EQ(t.proc.maxcap, 17);
EXPECT_TRUE(t.proc.unfiltered_chars);
}

TEST(FilterProcessorInplace, translate_esc_after_translate_esc_bulk_excess__trimmed_capacity)
{
{
Expand Down

0 comments on commit 6b9d5d9

Please sign in to comment.