Skip to content

Commit

Permalink
test: update more tests to reflect new libxml2 HTML5 behaviors
Browse files Browse the repository at this point in the history
- starting to deprecate HTML element description data
- working around changed error generation
- updated CRLF test
  • Loading branch information
flavorjones committed Oct 5, 2024
1 parent 9e57ff0 commit bee1e6e
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 25 deletions.
9 changes: 2 additions & 7 deletions test/html4/sax/test_document_error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,10 @@ def start_document
end

def test_warning_document_encounters_error_but_terminates_normally
# Probably I'm doing something wrong, but I can't make nekohtml report errors,
# despite setting http://cyberneko.org/html/features/report-errors.
# See https://nekohtml.sourceforge.net/settings.html for more info.
# I'd love some help here if someone finds this comment and cares enough to dig in.
skip_unless_libxml2("nekohtml sax parser does not seem to report errors?")

warning_parser = Nokogiri::HTML4::SAX::Parser.new(Nokogiri::SAX::TestCase::Doc.new)
warning_parser.parse("<html><body><<div att=")
refute_empty(warning_parser.document.errors, "error collector did not collect an error")

assert(warning_parser.document.end_document_called)
end
end
end
18 changes: 8 additions & 10 deletions test/html4/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,8 @@ def test_document_has_error
html = Nokogiri::HTML4(<<~HTML)
<html>
<body>
<div awesome="asdf>
<p>inside div tag</p>
</div>
<p>outside div tag</p>
<div>
</foo>
</body>
</html>
HTML
Expand Down Expand Up @@ -660,14 +658,14 @@ def test_capturing_nonparse_errors_during_document_clone

def test_capturing_nonparse_errors_during_node_copy_between_docs
# Errors should be emitted while parsing only, and should not change when moving nodes.
doc1 = Nokogiri::HTML4("<html><body><diva id='unique'>one</diva></body></html>")
doc2 = Nokogiri::HTML4("<html><body><dive id='unique'>two</dive></body></html>")
doc1 = Nokogiri::HTML4("<html><body><div id='unique'>one</foo1></body></html>")
doc2 = Nokogiri::HTML4("<html><body><div id='unique'>two</foo2></body></html>")
node1 = doc1.at_css("#unique")
node2 = doc2.at_css("#unique")
original_errors1 = doc1.errors.dup
original_errors2 = doc2.errors.dup
assert(original_errors1.any? { |e| e.to_s.include?("Tag diva invalid") }, "it should complain about the tag name")
assert(original_errors2.any? { |e| e.to_s.include?("Tag dive invalid") }, "it should complain about the tag name")
assert(original_errors1.any? { |e| e.to_s.include?("foo1") }, "it should complain about the tag name")
assert(original_errors2.any? { |e| e.to_s.include?("foo2") }, "it should complain about the tag name")

node1.add_child(node2)

Expand Down Expand Up @@ -804,7 +802,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
end

describe "read memory" do
let(:input) { "<html><body><div" }
let(:input) { "<html><body><div></foo>" }

describe "strict parsing" do
let(:parse_options) { html_strict }
Expand All @@ -826,7 +824,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
end

describe "read io" do
let(:input) { StringIO.new("<html><body><div") }
let(:input) { StringIO.new("<html><body><div></foo>") }

describe "strict parsing" do
let(:parse_options) { html_strict }
Expand Down
2 changes: 1 addition & 1 deletion test/html4/test_document_encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def binopen(file)
end

describe "error handling" do
RAW = "<html><body><div"
RAW = "<html><body><div></foo>"

{ "read_memory" => RAW, "read_io" => StringIO.new(RAW) }.each do |flavor, input|
it "#{flavor} should handle errors" do
Expand Down
17 changes: 14 additions & 3 deletions test/html4/test_element_description.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,22 @@ def test_description

def test_subelements
sub_elements = ElementDescription["body"].sub_elements
if Nokogiri.uses_libxml?
if Nokogiri.uses_libxml?(">= 2.14.0")
assert_equal(0, sub_elements.length)
elsif Nokogiri.uses_libxml?
assert_equal(65, sub_elements.length)
else
assert_equal(105, sub_elements.length)
end
end

def test_default_sub_element
assert_equal("div", ElementDescription["body"].default_sub_element)
sub_element = ElementDescription["body"].default_sub_element
if Nokogiri.uses_libxml?(">= 2.14.0")
assert_nil(sub_element)
else
assert_equal("div", sub_element)
end
end

def test_null_default_sub_element
Expand All @@ -86,7 +93,11 @@ def test_optional_attributes
def test_deprecated_attributes
attrs = ElementDescription["table"].deprecated_attributes
assert(attrs)
assert_equal(2, attrs.length)
if Nokogiri.uses_libxml?(">= 2.14.0")
assert_equal(0, attrs.length)
else
assert_equal(2, attrs.length)
end
end

def test_required_attributes
Expand Down
5 changes: 1 addition & 4 deletions test/html4/test_node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,10 @@ def test_fragment_serialization
end

def test_to_html_does_not_contain_entities
# as generated by a tool like NKF
html = "<html><body>\r\n<p> test paragraph\r\nfoo bar </p>\r\n</body></html>\r\n"
nokogiri = Nokogiri::HTML4.parse(html)

if RUBY_PLATFORM.include?("java")
# NKF linebreak modes are not supported as of jruby 1.2
# see http://jira.codehaus.org/browse/JRUBY-3602 for status
if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.14.0")
assert_equal(
"<p>testparagraph\nfoobar</p>",
nokogiri.at("p").to_html.delete(" "),
Expand Down

0 comments on commit bee1e6e

Please sign in to comment.