From 2e97f750ef44190267b8cad8ea2ce195be8b9a7d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 19:14:59 -0400 Subject: [PATCH 1/8] dep: bump min libxml2 version to 2.7.7 --- ext/nokogiri/extconf.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index b97fe67b557..419b67449ef 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -12,7 +12,7 @@ # helpful constants PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", "..")) -REQUIRED_LIBXML_VERSION = "2.6.21" +REQUIRED_LIBXML_VERSION = "2.7.7" RECOMMENDED_LIBXML_VERSION = "2.9.3" REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.2" # keep this version in sync with the one in the gemspec @@ -1116,7 +1116,6 @@ def compile ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h") end -have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21 have_func("xmlFirstElementChild") # introduced in libxml 2.7.3 have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24 have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21 From cf899ef349b71db3148e7d07fc12da6825119df3 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 19:16:43 -0400 Subject: [PATCH 2/8] tidy: remove workarounds for xmlFirstElementChild in older libxml2 --- ext/nokogiri/extconf.rb | 1 - ext/nokogiri/libxml2_backwards_compat.c | 121 ------------------------ ext/nokogiri/nokogiri.h | 7 -- nokogiri.gemspec | 1 - 4 files changed, 130 deletions(-) delete mode 100644 ext/nokogiri/libxml2_backwards_compat.c diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index 419b67449ef..cc46f99a5b0 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -1116,7 +1116,6 @@ def compile ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h") end -have_func("xmlFirstElementChild") # introduced in libxml 2.7.3 have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24 have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21 have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 diff --git a/ext/nokogiri/libxml2_backwards_compat.c b/ext/nokogiri/libxml2_backwards_compat.c deleted file mode 100644 index f5255cb989c..00000000000 --- a/ext/nokogiri/libxml2_backwards_compat.c +++ /dev/null @@ -1,121 +0,0 @@ -#ifndef HAVE_XMLFIRSTELEMENTCHILD -#include -/** - * xmlFirstElementChild: - * @parent: the parent node - * - * Finds the first child node of that element which is a Element node - * Note the handling of entities references is different than in - * the W3C DOM element traversal spec since we don't have back reference - * from entities content to entities references. - * - * Returns the first element child or NULL if not available - */ -xmlNodePtr -xmlFirstElementChild(xmlNodePtr parent) -{ - xmlNodePtr cur = NULL; - - if (parent == NULL) { - return (NULL); - } - switch (parent->type) { - case XML_ELEMENT_NODE: - case XML_ENTITY_NODE: - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - cur = parent->children; - break; - default: - return (NULL); - } - while (cur != NULL) { - if (cur->type == XML_ELEMENT_NODE) { - return (cur); - } - cur = cur->next; - } - return (NULL); -} - -/** - * xmlNextElementSibling: - * @node: the current node - * - * Finds the first closest next sibling of the node which is an - * element node. - * Note the handling of entities references is different than in - * the W3C DOM element traversal spec since we don't have back reference - * from entities content to entities references. - * - * Returns the next element sibling or NULL if not available - */ -xmlNodePtr -xmlNextElementSibling(xmlNodePtr node) -{ - if (node == NULL) { - return (NULL); - } - switch (node->type) { - case XML_ELEMENT_NODE: - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: - case XML_ENTITY_REF_NODE: - case XML_ENTITY_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_DTD_NODE: - case XML_XINCLUDE_START: - case XML_XINCLUDE_END: - node = node->next; - break; - default: - return (NULL); - } - while (node != NULL) { - if (node->type == XML_ELEMENT_NODE) { - return (node); - } - node = node->next; - } - return (NULL); -} - -/** - * xmlLastElementChild: - * @parent: the parent node - * - * Finds the last child node of that element which is a Element node - * Note the handling of entities references is different than in - * the W3C DOM element traversal spec since we don't have back reference - * from entities content to entities references. - * - * Returns the last element child or NULL if not available - */ -xmlNodePtr -xmlLastElementChild(xmlNodePtr parent) -{ - xmlNodePtr cur = NULL; - - if (parent == NULL) { - return (NULL); - } - switch (parent->type) { - case XML_ELEMENT_NODE: - case XML_ENTITY_NODE: - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - cur = parent->last; - break; - default: - return (NULL); - } - while (cur != NULL) { - if (cur->type == XML_ELEMENT_NODE) { - return (cur); - } - cur = cur->prev; - } - return (NULL); -} -#endif diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 0d82cb7c3f4..7a2e070af8a 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -56,13 +56,6 @@ #include -/* libxml2_backwards_compat.c */ -#ifndef HAVE_XMLFIRSTELEMENTCHILD -xmlNodePtr xmlFirstElementChild(xmlNodePtr parent); -xmlNodePtr xmlNextElementSibling(xmlNodePtr node); -xmlNodePtr xmlLastElementChild(xmlNodePtr parent); -#endif - #define XMLNS_PREFIX "xmlns" #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */ diff --git a/nokogiri.gemspec b/nokogiri.gemspec index 49477c6543f..f4f3941b4ed 100644 --- a/nokogiri.gemspec +++ b/nokogiri.gemspec @@ -152,7 +152,6 @@ Gem::Specification.new do |spec| "ext/nokogiri/html4_entity_lookup.c", "ext/nokogiri/html4_sax_parser_context.c", "ext/nokogiri/html4_sax_push_parser.c", - "ext/nokogiri/libxml2_backwards_compat.c", "ext/nokogiri/nokogiri.c", "ext/nokogiri/nokogiri.h", "ext/nokogiri/xml_attr.c", From e212a4f7ee67f653cc29032c3c66251080c9c506 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 19:17:49 -0400 Subject: [PATCH 3/8] tidy: remove conditional use of structured error handlers All supported libxml2 versions have these functions. --- ext/nokogiri/extconf.rb | 4 ---- ext/nokogiri/xml_relax_ng.c | 4 ---- ext/nokogiri/xml_schema.c | 6 ------ 3 files changed, 14 deletions(-) diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index cc46f99a5b0..9f9ee8dd0a2 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -1116,10 +1116,6 @@ def compile ensure_func("gumbo_parse_with_options", "nokogiri_gumbo.h") end -have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24 -have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21 -have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 -have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23 have_func("rb_category_warning") # introduced in Ruby 3.0 other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",") diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c index 2586e9617b3..0161115b94d 100644 --- a/ext/nokogiri/xml_relax_ng.c +++ b/ext/nokogiri/xml_relax_ng.c @@ -43,13 +43,11 @@ validate_document(VALUE self, VALUE document) rb_raise(rb_eRuntimeError, "Could not create a validation context"); } -#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS xmlRelaxNGSetValidStructuredErrors( valid_ctxt, Nokogiri_error_array_pusher, (void *)errors ); -#endif xmlRelaxNGValidateDoc(valid_ctxt, doc); @@ -79,13 +77,11 @@ xml_relax_ng_parse_schema( rb_errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher); -#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS xmlRelaxNGSetParserStructuredErrors( c_parser_context, Nokogiri_error_array_pusher, (void *)rb_errors ); -#endif c_schema = xmlRelaxNGParse(c_parser_context); diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index 7663bf153f1..dfe7543d711 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -43,13 +43,11 @@ validate_document(VALUE self, VALUE document) rb_raise(rb_eRuntimeError, "Could not create a validation context"); } -#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS xmlSchemaSetValidStructuredErrors( valid_ctxt, Nokogiri_error_array_pusher, (void *)errors ); -#endif xmlSchemaValidateDoc(valid_ctxt, doc); @@ -84,13 +82,11 @@ validate_file(VALUE self, VALUE rb_filename) rb_raise(rb_eRuntimeError, "Could not create a validation context"); } -#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS xmlSchemaSetValidStructuredErrors( valid_ctxt, Nokogiri_error_array_pusher, (void *)errors ); -#endif xmlSchemaValidateFile(valid_ctxt, filename, 0); @@ -122,13 +118,11 @@ xml_schema_parse_schema( rb_errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher); -#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS xmlSchemaSetParserStructuredErrors( c_parser_context, Nokogiri_error_array_pusher, (void *)rb_errors ); -#endif parse_options_int = (int)NUM2INT(rb_funcall(rb_parse_options, rb_intern("to_i"), 0)); if (parse_options_int & XML_PARSE_NONET) { From 3043338d735b7be876df7fe1e3a49a520c77c7e7 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 19:18:27 -0400 Subject: [PATCH 4/8] tidy: remove conditional use of htmlHandleOmittedElem which has been handled since libxml 2.7.7 --- ext/nokogiri/xml_node.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 07db41d72bb..1170e0bc9db 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -2165,15 +2165,6 @@ in_context(VALUE self, VALUE _str, VALUE _options) xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher); - /* Twiddle global variable because of a bug in libxml2. - * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7 - * - * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7 - */ -#ifndef HTML_PARSE_NOIMPLIED - htmlHandleOmittedElem(0); -#endif - /* This function adds a fake node to the child of +node+. If the parser * does not exit cleanly with XML_ERR_OK, the list is freed. This can * leave the child pointers in a bad state if they were originally empty. @@ -2202,10 +2193,6 @@ in_context(VALUE self, VALUE _str, VALUE _options) child_iter = child_iter->next; } -#ifndef HTML_PARSE_NOIMPLIED - htmlHandleOmittedElem(1); -#endif - xmlSetStructuredErrorFunc(NULL, NULL); /* From 9da62898f95b6c92c13722088f6c214b1ff1ddf7 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 19:19:16 -0400 Subject: [PATCH 5/8] tidy: remove workaround for libxml with broken serialization only broken in the 2.6.x series --- lib/nokogiri/xml/node.rb | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb index 735bd50d3db..7f0761217d8 100644 --- a/lib/nokogiri/xml/node.rb +++ b/lib/nokogiri/xml/node.rb @@ -1577,19 +1577,12 @@ def add_sibling(next_or_previous, node_or_tags) node_or_tags end - USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze - private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION - def to_format(save_option, options) - return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION - options[:save_with] = save_option unless options[:save_with] serialize(options) end def write_format_to(save_option, io, options) - return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION - options[:save_with] ||= save_option write_to(io, options) end From c6880377b27a99783682fcec945a2a7dbf2409ab Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 19:58:26 -0400 Subject: [PATCH 6/8] tidy: remove xmlPreviousElementSibling workaround fixed in libxml 2.7.7 --- ext/nokogiri/xml_node.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 1170e0bc9db..0c59ae49e4a 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1066,17 +1066,10 @@ previous_element(VALUE self) xmlNodePtr node, sibling; Noko_Node_Get_Struct(self, xmlNode, node); - /* - * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7. - */ - sibling = node->prev; + sibling = xmlPreviousElementSibling(node); if (!sibling) { return Qnil; } - while (sibling && sibling->type != XML_ELEMENT_NODE) { - sibling = sibling->prev; - } - - return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ; + return noko_xml_node_wrap(Qnil, sibling); } /* :nodoc: */ From 7ed778eab51b9966e1eeadb7a452bd600a5aa574 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 21:47:28 -0400 Subject: [PATCH 7/8] tidy: remove conditional tests for unsupported versions of libxml2 --- test/html4/sax/test_parser.rb | 6 +----- test/html4/test_document.rb | 2 -- test/html4/test_document_fragment.rb | 14 ++------------ test/html4/test_element_description.rb | 4 +--- test/xml/test_document.rb | 8 +++----- 5 files changed, 7 insertions(+), 27 deletions(-) diff --git a/test/html4/sax/test_parser.rb b/test/html4/sax/test_parser.rb index 49254298666..efdd27a5bb4 100644 --- a/test/html4/sax/test_parser.rb +++ b/test/html4/sax/test_parser.rb @@ -116,11 +116,7 @@ def test_parser_attributes assert(block_called) - noshade_value = if Nokogiri.uses_libxml?("< 2.7.7") - ["noshade", "noshade"] - else - ["noshade", nil] - end + noshade_value = ["noshade", nil] assert_equal( [ diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 0372db438ba..cafca528ea5 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -74,14 +74,12 @@ def test_empty_string_returns_empty_doc end def test_to_xhtml_with_indent - skip if Nokogiri.uses_libxml?("~> 2.6.0") doc = Nokogiri::HTML4("foo") doc = Nokogiri::HTML4(doc.to_xhtml(indent: 2)) assert_indent(2, doc) end def test_write_to_xhtml_with_indent - skip if Nokogiri.uses_libxml?("~> 2.6.0") io = StringIO.new doc = Nokogiri::HTML4("foo") doc.write_xhtml_to(io, indent: 5) diff --git a/test/html4/test_document_fragment.rb b/test/html4/test_document_fragment.rb index 5638890b4b0..323a33e05fe 100644 --- a/test/html4/test_document_fragment.rb +++ b/test/html4/test_document_fragment.rb @@ -100,11 +100,7 @@ def test_html_fragment def test_html_fragment_has_outer_text doc = "a
b
c" fragment = Nokogiri::HTML4::Document.new.fragment(doc) - if Nokogiri.uses_libxml?("<= 2.6.16") - assert_equal("a
b

c

", fragment.to_s) - else - assert_equal("a
b
c", fragment.to_s) - end + assert_equal("a
b
c", fragment.to_s) end def test_html_fragment_case_insensitivity @@ -162,13 +158,7 @@ def test_to_html def test_to_xhtml doc = "foo
bar

" fragment = Nokogiri::HTML4::Document.new.fragment(doc) - if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.7.0") - assert_equal("foo
bar

", fragment.to_xhtml) - else - # FIXME: why are we doing this ? this violates the spec, - # see http://www.w3.org/TR/xhtml1/#C_2 - assert_equal("foo
bar

", fragment.to_xhtml) - end + assert_equal("foo
bar

", fragment.to_xhtml) end def test_to_xml diff --git a/test/html4/test_element_description.rb b/test/html4/test_element_description.rb index c4638c6a347..4f9d7f3bc63 100644 --- a/test/html4/test_element_description.rb +++ b/test/html4/test_element_description.rb @@ -58,10 +58,8 @@ def test_description def test_subelements sub_elements = ElementDescription["body"].sub_elements - if Nokogiri.uses_libxml?(">= 2.7.7") + if Nokogiri.uses_libxml? assert_equal(65, sub_elements.length) - elsif Nokogiri.uses_libxml? - assert_equal(61, sub_elements.length) else assert_equal(105, sub_elements.length) end diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index 4e275273e08..76262a1fcb2 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -646,11 +646,9 @@ def test_write_xml_to_with_indent assert_indent(5, doc) end - unless Nokogiri.uses_libxml?("~> 2.6.0") - def test_encoding - xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, "UTF-8") - assert_equal("UTF-8", xml.encoding) - end + def test_encoding + xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, "UTF-8") + assert_equal("UTF-8", xml.encoding) end def test_memory_explosion_on_invalid_xml From e57e4066f536f4921eb4260322543db63eb5c8a9 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 13 Jun 2024 21:53:09 -0400 Subject: [PATCH 8/8] doc: update CHANGELOG --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78af34ba50b..3fe891bc2b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,9 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Dependencies -* [CRuby] Vendored libxml2 is updated to [v2.13.0](https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.13.0). @flavorjones -* [CRuby] Vendored libxslt is updated to [v1.1.40](https://gitlab.gnome.org/GNOME/libxslt/-/releases/v1.1.40). @flavorjones +* [CRuby] Vendored libxml2 is updated to [v2.13.0](https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.13.0). [#3230] @flavorjones +* [CRuby] Vendored libxslt is updated to [v1.1.40](https://gitlab.gnome.org/GNOME/libxslt/-/releases/v1.1.40). [#3230] @flavorjones +* [CRuby] Minimum supported version of libxml2 raised to v2.7.7 (released 2010-03-15) from v2.6.21. @flavorjones ### Added