From d524c8d2db810fc18b648c9e46e5bfbb617b5d4d Mon Sep 17 00:00:00 2001 From: Ryan Ong Date: Tue, 15 Oct 2024 15:07:47 -0400 Subject: [PATCH 1/6] add syntax error node path --- ext/nokogiri/xml_syntax_error.c | 6 ++++++ lib/nokogiri/xml/syntax_error.rb | 9 +++++++++ test/xml/test_schema.rb | 4 ++++ test/xml/test_syntax_error.rb | 1 + 4 files changed, 20 insertions(+) diff --git a/ext/nokogiri/xml_syntax_error.c b/ext/nokogiri/xml_syntax_error.c index 0ccf43985dd..d0a3bf8d17e 100644 --- a/ext/nokogiri/xml_syntax_error.c +++ b/ext/nokogiri/xml_syntax_error.c @@ -44,6 +44,7 @@ noko__error_raise(void *ctx, xmlErrorConstPtr error) VALUE noko_xml_syntax_error__wrap(xmlErrorConstPtr error) { + xmlChar *c_path ; VALUE msg, e, klass; klass = cNokogiriXmlSyntaxError; @@ -61,16 +62,21 @@ noko_xml_syntax_error__wrap(xmlErrorConstPtr error) ); if (error) { + c_path = xmlGetNodePath(error->node); + rb_iv_set(e, "@domain", INT2NUM(error->domain)); rb_iv_set(e, "@code", INT2NUM(error->code)); rb_iv_set(e, "@level", INT2NUM((short)error->level)); rb_iv_set(e, "@file", RBSTR_OR_QNIL(error->file)); rb_iv_set(e, "@line", INT2NUM(error->line)); + rb_iv_set(e, "@path", RBSTR_OR_QNIL(c_path)); rb_iv_set(e, "@str1", RBSTR_OR_QNIL(error->str1)); rb_iv_set(e, "@str2", RBSTR_OR_QNIL(error->str2)); rb_iv_set(e, "@str3", RBSTR_OR_QNIL(error->str3)); rb_iv_set(e, "@int1", INT2NUM(error->int1)); rb_iv_set(e, "@column", INT2NUM(error->int2)); + + xmlFree(c_path); } return e; diff --git a/lib/nokogiri/xml/syntax_error.rb b/lib/nokogiri/xml/syntax_error.rb index 9db500e9dc6..7ed64b99d70 100644 --- a/lib/nokogiri/xml/syntax_error.rb +++ b/lib/nokogiri/xml/syntax_error.rb @@ -19,14 +19,23 @@ def aggregate(errors) end end + # What part of libxml2 raised this error (enum xmlErrorDomain) attr_reader :domain + # libxml2 error code (enum xmlParserErrors) attr_reader :code + # libxml2 error level (enum xmlErrorLevel) attr_reader :level attr_reader :file attr_reader :line + # libxml2 path of the node in the tree that caused the error + attr_reader :path + # libxml2 extra string information attr_reader :str1 + # libxml2 extra string information attr_reader :str2 + # libxml2 extra string information attr_reader :str3 + # libxml2 extra extra number information attr_reader :int1 attr_reader :column diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 4bc401390b0..d4120b34d6e 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -159,6 +159,10 @@ class TestNokogiriXMLSchema < Nokogiri::TestCase assert(errors = xsd.validate(doc)) assert_equal(2, errors.length) + assert_equal( + ["/purchaseOrder/billTo/state", "/purchaseOrder/shipTo/state"], + errors.map(&:path).sort, + ) end it "validate_invalid_file" do diff --git a/test/xml/test_syntax_error.rb b/test/xml/test_syntax_error.rb index 40709d71901..6fde1ea3b53 100644 --- a/test/xml/test_syntax_error.rb +++ b/test/xml/test_syntax_error.rb @@ -58,5 +58,6 @@ assert_nil error.column assert_nil error.level end + assert_nil error.path end end From bc2504d54e712930403145a066f676247b6178a8 Mon Sep 17 00:00:00 2001 From: Ryan Ong Date: Tue, 15 Oct 2024 16:50:44 -0400 Subject: [PATCH 2/6] add failing test --- test/xml/test_schema.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index d4120b34d6e..008c4135361 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -175,6 +175,10 @@ class TestNokogiriXMLSchema < Nokogiri::TestCase assert(errors = xsd.validate(tempfile.path)) assert_equal(2, errors.length) + assert_equal( + ["/purchaseOrder/billTo/state", "/purchaseOrder/shipTo/state"], + errors.map(&:path).sort, + ) end it "validate_non_document" do From 1058866f501c344899a7be090b9a3fdebe17a8c3 Mon Sep 17 00:00:00 2001 From: Ryan Ong Date: Wed, 16 Oct 2024 11:25:49 -0400 Subject: [PATCH 3/6] add more tests --- test/html4/test_document.rb | 3 +++ test/xml/test_document.rb | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 988989f5f10..7e05fc25b68 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -24,6 +24,7 @@ def test_exceptions_remove_newlines refute_empty(errors, "has errors") errors.each do |error| assert_equal(error.to_s.chomp, error.to_s) + assert_nil(error.path) end end @@ -813,6 +814,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262 Nokogiri::HTML4.parse(input, nil, nil, parse_options) end assert_match(/Parser without recover option encountered error or warning/, exception.to_s) + assert_nil(exception.path) end end @@ -835,6 +837,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262 Nokogiri::HTML4.parse(input, nil, "UTF-8", parse_options) end assert_match(/Parser without recover option encountered error or warning/, exception.to_s) + assert_nil(exception.path) end end diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index 65589d80d3d..2860c99a798 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -1087,9 +1087,10 @@ def test_can_be_closed let(:parse_options) { xml_strict } it "raises exception on parse error" do - assert_raises Nokogiri::SyntaxError do + error = assert_raises Nokogiri::SyntaxError do Nokogiri::XML.parse(input, nil, nil, parse_options) end + assert_nil(error.path) end end From 7ef096534ae711f5abd1bca7f3be6012b6b1b081 Mon Sep 17 00:00:00 2001 From: Ryan Ong Date: Fri, 18 Oct 2024 12:16:07 -0400 Subject: [PATCH 4/6] update tests and documentation --- lib/nokogiri/xml/syntax_error.rb | 11 +++-------- test/html4/test_document.rb | 1 - test/xml/test_schema.rb | 2 +- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/nokogiri/xml/syntax_error.rb b/lib/nokogiri/xml/syntax_error.rb index 7ed64b99d70..4444c775ce1 100644 --- a/lib/nokogiri/xml/syntax_error.rb +++ b/lib/nokogiri/xml/syntax_error.rb @@ -19,23 +19,18 @@ def aggregate(errors) end end - # What part of libxml2 raised this error (enum xmlErrorDomain) attr_reader :domain - # libxml2 error code (enum xmlParserErrors) attr_reader :code - # libxml2 error level (enum xmlErrorLevel) attr_reader :level attr_reader :file attr_reader :line - # libxml2 path of the node in the tree that caused the error + # ⚠ path functionality is not available when running JRuby. + # + # path of the node that caused the error when validating a `Nokogiri::XML::Document` attr_reader :path - # libxml2 extra string information attr_reader :str1 - # libxml2 extra string information attr_reader :str2 - # libxml2 extra string information attr_reader :str3 - # libxml2 extra extra number information attr_reader :int1 attr_reader :column diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 7e05fc25b68..e0ece6ada23 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -24,7 +24,6 @@ def test_exceptions_remove_newlines refute_empty(errors, "has errors") errors.each do |error| assert_equal(error.to_s.chomp, error.to_s) - assert_nil(error.path) end end diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 008c4135361..44ae0732ef1 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -176,7 +176,7 @@ class TestNokogiriXMLSchema < Nokogiri::TestCase assert(errors = xsd.validate(tempfile.path)) assert_equal(2, errors.length) assert_equal( - ["/purchaseOrder/billTo/state", "/purchaseOrder/shipTo/state"], + [nil, nil], errors.map(&:path).sort, ) end From 5e579ee785554994799ab86321f2b0c3a5d2d620 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 18 Oct 2024 12:54:29 -0400 Subject: [PATCH 5/6] doc: clarify SyntaxError#path docstring --- lib/nokogiri/xml/syntax_error.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/nokogiri/xml/syntax_error.rb b/lib/nokogiri/xml/syntax_error.rb index 4444c775ce1..290a1682bb7 100644 --- a/lib/nokogiri/xml/syntax_error.rb +++ b/lib/nokogiri/xml/syntax_error.rb @@ -24,9 +24,14 @@ def aggregate(errors) attr_reader :level attr_reader :file attr_reader :line - # ⚠ path functionality is not available when running JRuby. + + # The XPath path of the node that caused the error when validating a `Nokogiri::XML::Document`. + # + # This attribute will only be non-nil when the error is emitted by `Schema#validate` on + # Document objects. It will return `nil` for DOM parsing errors and for errors emitted during + # Schema validation of files. # - # path of the node that caused the error when validating a `Nokogiri::XML::Document` + # ⚠ `#path` is not supported on JRuby, where it will always return `nil`. attr_reader :path attr_reader :str1 attr_reader :str2 From 06e0f3efa549b0e286699c3b2e8f2928813f4e63 Mon Sep 17 00:00:00 2001 From: Ryan Ong Date: Fri, 18 Oct 2024 13:24:47 -0400 Subject: [PATCH 6/6] fix test for java --- test/xml/test_schema.rb | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 44ae0732ef1..f242de6b84e 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -159,10 +159,17 @@ class TestNokogiriXMLSchema < Nokogiri::TestCase assert(errors = xsd.validate(doc)) assert_equal(2, errors.length) - assert_equal( - ["/purchaseOrder/billTo/state", "/purchaseOrder/shipTo/state"], - errors.map(&:path).sort, - ) + if Nokogiri.uses_libxml? + assert_equal( + ["/purchaseOrder/billTo/state", "/purchaseOrder/shipTo/state"], + errors.map(&:path).sort, + ) + else + assert_equal( + [nil, nil], + errors.map(&:path).sort, + ) + end end it "validate_invalid_file" do