From bee1e6e4db3c947c7b129e8a400ea44002fe5c26 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike@37signals.com>
Date: Sat, 5 Oct 2024 16:04:11 -0400
Subject: [PATCH] test: update more tests to reflect new libxml2 HTML5
 behaviors

- starting to deprecate HTML element description data
- working around changed error generation
- updated CRLF test
---
 test/html4/sax/test_document_error.rb  |  9 ++-------
 test/html4/test_document.rb            | 18 ++++++++----------
 test/html4/test_document_encoding.rb   |  2 +-
 test/html4/test_element_description.rb | 17 ++++++++++++++---
 test/html4/test_node.rb                |  5 +----
 5 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/test/html4/sax/test_document_error.rb b/test/html4/sax/test_document_error.rb
index 513164148fd..fdd15ef694a 100644
--- a/test/html4/sax/test_document_error.rb
+++ b/test/html4/sax/test_document_error.rb
@@ -20,15 +20,10 @@ def start_document
     end
 
     def test_warning_document_encounters_error_but_terminates_normally
-      # Probably I'm doing something wrong, but I can't make nekohtml report errors,
-      # despite setting http://cyberneko.org/html/features/report-errors.
-      # See https://nekohtml.sourceforge.net/settings.html for more info.
-      # I'd love some help here if someone finds this comment and cares enough to dig in.
-      skip_unless_libxml2("nekohtml sax parser does not seem to report errors?")
-
       warning_parser = Nokogiri::HTML4::SAX::Parser.new(Nokogiri::SAX::TestCase::Doc.new)
       warning_parser.parse("<html><body><<div att=")
-      refute_empty(warning_parser.document.errors, "error collector did not collect an error")
+
+      assert(warning_parser.document.end_document_called)
     end
   end
 end
diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb
index 99061bba7cc..763a72af1f6 100644
--- a/test/html4/test_document.rb
+++ b/test/html4/test_document.rb
@@ -363,10 +363,8 @@ def test_document_has_error
           html = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
-                <div awesome="asdf>
-                  <p>inside div tag</p>
-                </div>
-                <p>outside div tag</p>
+                <div>
+                </foo>
               </body>
             </html>
           HTML
@@ -660,14 +658,14 @@ def test_capturing_nonparse_errors_during_document_clone
 
         def test_capturing_nonparse_errors_during_node_copy_between_docs
           # Errors should be emitted while parsing only, and should not change when moving nodes.
-          doc1 = Nokogiri::HTML4("<html><body><diva id='unique'>one</diva></body></html>")
-          doc2 = Nokogiri::HTML4("<html><body><dive id='unique'>two</dive></body></html>")
+          doc1 = Nokogiri::HTML4("<html><body><div id='unique'>one</foo1></body></html>")
+          doc2 = Nokogiri::HTML4("<html><body><div id='unique'>two</foo2></body></html>")
           node1 = doc1.at_css("#unique")
           node2 = doc2.at_css("#unique")
           original_errors1 = doc1.errors.dup
           original_errors2 = doc2.errors.dup
-          assert(original_errors1.any? { |e| e.to_s.include?("Tag diva invalid") }, "it should complain about the tag name")
-          assert(original_errors2.any? { |e| e.to_s.include?("Tag dive invalid") }, "it should complain about the tag name")
+          assert(original_errors1.any? { |e| e.to_s.include?("foo1") }, "it should complain about the tag name")
+          assert(original_errors2.any? { |e| e.to_s.include?("foo2") }, "it should complain about the tag name")
 
           node1.add_child(node2)
 
@@ -804,7 +802,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
           end
 
           describe "read memory" do
-            let(:input) { "<html><body><div" }
+            let(:input) { "<html><body><div></foo>" }
 
             describe "strict parsing" do
               let(:parse_options) { html_strict }
@@ -826,7 +824,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
           end
 
           describe "read io" do
-            let(:input) { StringIO.new("<html><body><div") }
+            let(:input) { StringIO.new("<html><body><div></foo>") }
 
             describe "strict parsing" do
               let(:parse_options) { html_strict }
diff --git a/test/html4/test_document_encoding.rb b/test/html4/test_document_encoding.rb
index 0abc7057818..1cadf151589 100644
--- a/test/html4/test_document_encoding.rb
+++ b/test/html4/test_document_encoding.rb
@@ -148,7 +148,7 @@ def binopen(file)
         end
 
         describe "error handling" do
-          RAW = "<html><body><div"
+          RAW = "<html><body><div></foo>"
 
           { "read_memory" => RAW, "read_io" => StringIO.new(RAW) }.each do |flavor, input|
             it "#{flavor} should handle errors" do
diff --git a/test/html4/test_element_description.rb b/test/html4/test_element_description.rb
index bdc8d6ce654..fda891ab532 100644
--- a/test/html4/test_element_description.rb
+++ b/test/html4/test_element_description.rb
@@ -58,7 +58,9 @@ def test_description
 
       def test_subelements
         sub_elements = ElementDescription["body"].sub_elements
-        if Nokogiri.uses_libxml?
+        if Nokogiri.uses_libxml?(">= 2.14.0")
+          assert_equal(0, sub_elements.length)
+        elsif Nokogiri.uses_libxml?
           assert_equal(65, sub_elements.length)
         else
           assert_equal(105, sub_elements.length)
@@ -66,7 +68,12 @@ def test_subelements
       end
 
       def test_default_sub_element
-        assert_equal("div", ElementDescription["body"].default_sub_element)
+        sub_element = ElementDescription["body"].default_sub_element
+        if Nokogiri.uses_libxml?(">= 2.14.0")
+          assert_nil(sub_element)
+        else
+          assert_equal("div", sub_element)
+        end
       end
 
       def test_null_default_sub_element
@@ -86,7 +93,11 @@ def test_optional_attributes
       def test_deprecated_attributes
         attrs = ElementDescription["table"].deprecated_attributes
         assert(attrs)
-        assert_equal(2, attrs.length)
+        if Nokogiri.uses_libxml?(">= 2.14.0")
+          assert_equal(0, attrs.length)
+        else
+          assert_equal(2, attrs.length)
+        end
       end
 
       def test_required_attributes
diff --git a/test/html4/test_node.rb b/test/html4/test_node.rb
index 60759b9ceb3..45ee8feb193 100644
--- a/test/html4/test_node.rb
+++ b/test/html4/test_node.rb
@@ -168,13 +168,10 @@ def test_fragment_serialization
       end
 
       def test_to_html_does_not_contain_entities
-        # as generated by a tool like NKF
         html = "<html><body>\r\n<p> test paragraph\r\nfoo bar </p>\r\n</body></html>\r\n"
         nokogiri = Nokogiri::HTML4.parse(html)
 
-        if RUBY_PLATFORM.include?("java")
-          # NKF linebreak modes are not supported as of jruby 1.2
-          # see http://jira.codehaus.org/browse/JRUBY-3602 for status
+        if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.14.0")
           assert_equal(
             "<p>testparagraph\nfoobar</p>",
             nokogiri.at("p").to_html.delete(" "),