diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index c142273d49..80a4f51315 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -144,13 +144,6 @@ noko_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri) xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri); - VALUE registered_namespaces = rb_iv_get(rb_context, "@registered_namespaces"); - if (NIL_P(uri)) { - rb_hash_delete(registered_namespaces, prefix); - } else { - rb_hash_aset(registered_namespaces, prefix, Qtrue); - } - return rb_context; } @@ -179,13 +172,6 @@ noko_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE val xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue); - VALUE registered_variables = rb_iv_get(rb_context, "@registered_variables"); - if (NIL_P(value)) { - rb_hash_delete(registered_variables, name); - } else { - rb_hash_aset(registered_variables, name, Qtrue); - } - return rb_context; } @@ -426,6 +412,78 @@ noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context) return rb_xpath_object; } +static xmlXPathContextPtr +_noko_xml_xpath_context_initialize(void) +{ + xmlXPathContextPtr c_context = xmlXPathNewContext(NULL); + + xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI); + xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI); + + xmlXPathRegisterFuncNS(c_context, + (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI, + noko_xml_xpath_context_xpath_func_css_class); + xmlXPathRegisterFuncNS(c_context, + (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI, + noko_xml_xpath_context_xpath_func_local_name_is); + + return c_context; +} + +static VALUE +_noko_xml_xpath_context_protocontext(void) +{ + VALUE rb_context = rb_iv_get(cNokogiriXmlXpathContext, "@protocontext"); + + if (NIL_P(rb_context)) { + xmlXPathContextPtr c_context = _noko_xml_xpath_context_initialize(); + rb_context = TypedData_Wrap_Struct(cNokogiriXmlXpathContext, &_noko_xml_xpath_context_type, c_context); + rb_iv_set(cNokogiriXmlXpathContext, "@protocontext", rb_context); + } + + return rb_context; +} + +static void * +_noko_xml_xpath_context__xmlHashCopyPtr(void *payload, const xmlChar *name) +{ + return payload; +} + +static void * +_noko_xml_xpath_context__xmlHashCopyString(void *payload, const xmlChar *name) +{ + return xmlStrdup(payload); +} + +static xmlHashTablePtr +_noko_xml_xpath_context_copy_nsHash(xmlHashTablePtr hash) +{ + return xmlHashCopy(hash, _noko_xml_xpath_context__xmlHashCopyString); +} + +static xmlHashTablePtr +_noko_xml_xpath_context_copy_funcHash(xmlHashTablePtr hash) +{ + return xmlHashCopy(hash, _noko_xml_xpath_context__xmlHashCopyPtr); +} + +static xmlXPathContextPtr +_noko_xml_xpath_context_fast_initialize(void) +{ + xmlXPathContextPtr c_protocontext; + VALUE rb_protocontext = _noko_xml_xpath_context_protocontext(); + TypedData_Get_Struct(rb_protocontext, xmlXPathContext, &_noko_xml_xpath_context_type, c_protocontext); + + xmlXPathContextPtr c_context = xmlMalloc(sizeof(xmlXPathContext)); + memcpy(c_context, c_protocontext, sizeof(xmlXPathContext)); + + c_context->funcHash = _noko_xml_xpath_context_copy_funcHash(c_protocontext->funcHash); + c_context->nsHash = _noko_xml_xpath_context_copy_nsHash(c_protocontext->nsHash); + + return c_context; +} + /* * call-seq: * new(node) @@ -445,24 +503,12 @@ noko_xml_xpath_context_new(VALUE klass, VALUE rb_node) xmlXPathInit(); /* deprecated in 40483d0 */ #endif - c_context = xmlXPathNewContext(c_node->doc); + c_context = _noko_xml_xpath_context_fast_initialize(); + c_context->doc = c_node->doc; c_context->node = c_node; - xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI); - xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI); - - xmlXPathRegisterFuncNS(c_context, - (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI, - noko_xml_xpath_context_xpath_func_css_class); - xmlXPathRegisterFuncNS(c_context, - (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI, - noko_xml_xpath_context_xpath_func_local_name_is); - rb_context = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_context_type, c_context); - rb_iv_set(rb_context, "@registered_namespaces", rb_hash_new()); - rb_iv_set(rb_context, "@registered_variables", rb_hash_new()); - return rb_context; } @@ -493,6 +539,8 @@ noko_init_xml_xpath_context(void) rb_undef_alloc_func(cNokogiriXmlXpathContext); + rb_iv_set(cNokogiriXmlXpathContext, "@protocontext", Qnil); + rb_define_singleton_method(cNokogiriXmlXpathContext, "new", noko_xml_xpath_context_new, 1); rb_define_method(cNokogiriXmlXpathContext, "evaluate", noko_xml_xpath_context_evaluate, -1); diff --git a/lib/nokogiri/xml/searchable.rb b/lib/nokogiri/xml/searchable.rb index 11c0532639..2985fa63d1 100644 --- a/lib/nokogiri/xml/searchable.rb +++ b/lib/nokogiri/xml/searchable.rb @@ -261,36 +261,13 @@ def xpath_internal(node, paths, handler, ns, binds) end def xpath_impl(node, path, handler, ns, binds) - get_xpath_context(node) do |context| - context.register_namespaces(ns) - context.register_variables(binds) + context = XPathContext.new(node) + context.register_namespaces(ns) + context.register_variables(binds) - path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml? + path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml? - context.evaluate(path, handler) - end - end - - if Nokogiri.uses_libxml? && ENV["NOKOGIRI_DEOPTIMIZE_XPATH"].nil? # env var is an escape hatch - # optimized path - def get_xpath_context(node) - context = Thread.current.thread_variable_get(:nokogiri_xpath_context) - if context - context.node = node - else - context = Thread.current.thread_variable_set(:nokogiri_xpath_context, XPathContext.new(node)) - end - - begin - yield context - ensure - context.reset - end - end - else - def get_xpath_context(node) - yield XPathContext.new(node) - end + context.evaluate(path, handler) end end end diff --git a/lib/nokogiri/xml/xpath_context.rb b/lib/nokogiri/xml/xpath_context.rb index 2078f40098..033af04b02 100644 --- a/lib/nokogiri/xml/xpath_context.rb +++ b/lib/nokogiri/xml/xpath_context.rb @@ -22,28 +22,6 @@ def register_variables(binds) register_variable(key, value) end end - - if Nokogiri.uses_libxml? - def reset - return unless - - @registered_namespaces.each do |key, _| - register_ns(key, nil) - end - unless @registered_namespaces.empty? - warn "Nokogiri::XML::XPathContext#reset: unexpected registered namespaces: #{@registered_namespaces.keys}" - @registered_namespaces.clear - end - - @registered_variables.each do |key, _| - register_variable(key, nil) - end - unless @registered_variables.empty? - warn "Nokogiri::XML::XPathContext#reset: unexpected registered variables: #{@registered_variables.keys}" - @registered_variables.clear - end - end - end end end end diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 790f0094d0..de94b5cd4e 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -699,83 +699,27 @@ def collision(nodes) end end - describe "re-using XPathContext objects within a thread" do - # see https://github.com/sparklemotion/nokogiri/issues/3266 - # this optimization is CRuby-only, but we run the tests on JRuby for consistency - let(:doc) { - Nokogiri::XML::Document.parse(<<~XML) - - default - ns1 - - XML - } - - it "clears registered namespaces" do - # make sure the thread-local XPathContext is initialized - doc.xpath("//xmlns:child") - - # do namespaces behave the way we expect? - node = doc.at_xpath("//ns:child", { "ns" => "http://nokogiri.org/ns1" }) - assert_equal("ns1", node.text) - - assert_raises(XPath::SyntaxError) { - doc.at_xpath("//ns:child") - } - - node = doc.at_xpath("//child") - assert_nil(node) - - # make sure the nokogiri and nokogiri-builting namespaces are re-registered - doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler) - assert_equal(1, @handler.things.length) - - if Nokogiri.uses_libxml? - nodes = doc.xpath("//xmlns:child[nokogiri-builtin:local-name-is('child')]") - assert_equal(1, nodes.length) - end - end - - it "clears registered handlers and functions" do - # make sure the thread-local XPathContext is initialized - doc.xpath("//xmlns:child") - - doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler) - assert_equal(1, @handler.things.length) - - assert_raises(XPath::SyntaxError) { - doc.xpath("//xmlns:child[nokogiri:thing(.)]") - } - - doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler) - assert_equal(2, @handler.things.length) - - if Nokogiri.uses_libxml? - nodes = doc.xpath("//xmlns:child[nokogiri-builtin:local-name-is('child')]") - assert_equal(1, nodes.length) + it "handles recursion OK" do + handler = Class.new do + def has_sizzle(node) + node.css("foo.sizzle").any? end end - it "clears registered variables" do - # make sure the thread-local XPathContext is initialized - doc.xpath("//xmlns:child") - - nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Yes") - assert_equal(4, nodes.length) - - assert_raises(XPath::SyntaxError) { - @xml.xpath("//address[@domestic=$value]") - } - - nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Qwerty") - assert_empty(nodes) + doc = Nokogiri::XML::Document.parse(<<~XML) + + + + + + XML - assert_raises(XPath::SyntaxError) { - @xml.xpath("//address[@domestic=$value]") - } + result = doc.xpath("//target[nokogiri:has_sizzle(.)]", handler.new) - nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Yes") - assert_equal(4, nodes.length) + assert_pattern do + result => [ + {name: "target", attributes: [{ name: "id", value: "3" }] }, + ] end end end diff --git a/test/xml/test_xpath_context.rb b/test/xml/test_xpath_context.rb new file mode 100644 index 0000000000..aa1d88a55c --- /dev/null +++ b/test/xml/test_xpath_context.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +require "helper" + +module Nokogiri + module XML + describe XPathContext do + it "can register and deregister namespaces" do + doc = Document.parse(<<~XML) + + default + ns1 + + XML + + xc = XPathContext.new(doc) + + assert_raises(XPath::SyntaxError) do + xc.evaluate("//xmlns:child") + end + + xc.register_namespaces({ "xmlns" => "http://nokogiri.org/default" }) + assert_pattern do + xc.evaluate("//xmlns:child") => [ + { name: "child", namespace: { href: "http://nokogiri.org/default" } } + ] + end + + xc.register_namespaces({ "xmlns" => nil }) + assert_raises(XPath::SyntaxError) do + xc.evaluate("//xmlns:child") + end + end + + it "can register and deregister variables" do + doc = Nokogiri::XML.parse(File.read(TestBase::XML_FILE), TestBase::XML_FILE) + + xc = XPathContext.new(doc) + + assert_raises(XPath::SyntaxError) do + xc.evaluate("//address[@domestic=$value]") + end + + xc.register_variables({ "value" => "Yes" }) + nodes = xc.evaluate("//address[@domestic=$value]") + assert_equal(4, nodes.length) + + xc.register_variables({ "value" => "Qwerty" }) + nodes = xc.evaluate("//address[@domestic=$value]") + assert_empty(nodes) + + xc.register_variables({ "value" => nil }) + assert_raises(XPath::SyntaxError) do + xc.evaluate("//address[@domestic=$value]") + end + end + + it "#node=" do + doc = Nokogiri::XML::Document.parse(<<~XML) + + one + two + three + + XML + + xc = XPathContext.new(doc) + results = xc.evaluate(".//foo") + assert_equal(3, results.length) + + xc.node = doc.root.elements[0] + assert_pattern { xc.evaluate(".//foo") => [{ name: "foo", inner_html: "one" }] } + + xc.node = doc.root.elements[1] + assert_pattern { xc.evaluate(".//foo") => [{ name: "foo", inner_html: "two" }] } + end + end + end +end