From bee1e6e4db3c947c7b129e8a400ea44002fe5c26 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 5 Oct 2024 16:04:11 -0400 Subject: [PATCH] test: update more tests to reflect new libxml2 HTML5 behaviors - starting to deprecate HTML element description data - working around changed error generation - updated CRLF test --- test/html4/sax/test_document_error.rb | 9 ++------- test/html4/test_document.rb | 18 ++++++++---------- test/html4/test_document_encoding.rb | 2 +- test/html4/test_element_description.rb | 17 ++++++++++++++--- test/html4/test_node.rb | 5 +---- 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/test/html4/sax/test_document_error.rb b/test/html4/sax/test_document_error.rb index 513164148fd..fdd15ef694a 100644 --- a/test/html4/sax/test_document_error.rb +++ b/test/html4/sax/test_document_error.rb @@ -20,15 +20,10 @@ def start_document end def test_warning_document_encounters_error_but_terminates_normally - # Probably I'm doing something wrong, but I can't make nekohtml report errors, - # despite setting http://cyberneko.org/html/features/report-errors. - # See https://nekohtml.sourceforge.net/settings.html for more info. - # I'd love some help here if someone finds this comment and cares enough to dig in. - skip_unless_libxml2("nekohtml sax parser does not seem to report errors?") - warning_parser = Nokogiri::HTML4::SAX::Parser.new(Nokogiri::SAX::TestCase::Doc.new) warning_parser.parse("<
-
one") - doc2 = Nokogiri::HTML4("two") + doc1 = Nokogiri::HTML4("
one") + doc2 = Nokogiri::HTML4("
two") node1 = doc1.at_css("#unique") node2 = doc2.at_css("#unique") original_errors1 = doc1.errors.dup original_errors2 = doc2.errors.dup - assert(original_errors1.any? { |e| e.to_s.include?("Tag diva invalid") }, "it should complain about the tag name") - assert(original_errors2.any? { |e| e.to_s.include?("Tag dive invalid") }, "it should complain about the tag name") + assert(original_errors1.any? { |e| e.to_s.include?("foo1") }, "it should complain about the tag name") + assert(original_errors2.any? { |e| e.to_s.include?("foo2") }, "it should complain about the tag name") node1.add_child(node2) @@ -804,7 +802,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262 end describe "read memory" do - let(:input) { "
" } describe "strict parsing" do let(:parse_options) { html_strict } @@ -826,7 +824,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262 end describe "read io" do - let(:input) { StringIO.new("
") } describe "strict parsing" do let(:parse_options) { html_strict } diff --git a/test/html4/test_document_encoding.rb b/test/html4/test_document_encoding.rb index 0abc7057818..1cadf151589 100644 --- a/test/html4/test_document_encoding.rb +++ b/test/html4/test_document_encoding.rb @@ -148,7 +148,7 @@ def binopen(file) end describe "error handling" do - RAW = " RAW, "read_io" => StringIO.new(RAW) }.each do |flavor, input| it "#{flavor} should handle errors" do diff --git a/test/html4/test_element_description.rb b/test/html4/test_element_description.rb index bdc8d6ce654..fda891ab532 100644 --- a/test/html4/test_element_description.rb +++ b/test/html4/test_element_description.rb @@ -58,7 +58,9 @@ def test_description def test_subelements sub_elements = ElementDescription["body"].sub_elements - if Nokogiri.uses_libxml? + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_equal(0, sub_elements.length) + elsif Nokogiri.uses_libxml? assert_equal(65, sub_elements.length) else assert_equal(105, sub_elements.length) @@ -66,7 +68,12 @@ def test_subelements end def test_default_sub_element - assert_equal("div", ElementDescription["body"].default_sub_element) + sub_element = ElementDescription["body"].default_sub_element + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_nil(sub_element) + else + assert_equal("div", sub_element) + end end def test_null_default_sub_element @@ -86,7 +93,11 @@ def test_optional_attributes def test_deprecated_attributes attrs = ElementDescription["table"].deprecated_attributes assert(attrs) - assert_equal(2, attrs.length) + if Nokogiri.uses_libxml?(">= 2.14.0") + assert_equal(0, attrs.length) + else + assert_equal(2, attrs.length) + end end def test_required_attributes diff --git a/test/html4/test_node.rb b/test/html4/test_node.rb index 60759b9ceb3..45ee8feb193 100644 --- a/test/html4/test_node.rb +++ b/test/html4/test_node.rb @@ -168,13 +168,10 @@ def test_fragment_serialization end def test_to_html_does_not_contain_entities - # as generated by a tool like NKF html = "\r\n

test paragraph\r\nfoo bar

\r\n\r\n" nokogiri = Nokogiri::HTML4.parse(html) - if RUBY_PLATFORM.include?("java") - # NKF linebreak modes are not supported as of jruby 1.2 - # see http://jira.codehaus.org/browse/JRUBY-3602 for status + if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.14.0") assert_equal( "

testparagraph\nfoobar

", nokogiri.at("p").to_html.delete(" "),