Skip to content

Commit

Permalink
perf: reuse XPathContext objects
Browse files Browse the repository at this point in the history
- the `XPathContext` object ...
  - can accept `nil` as the value arg to `#register_ns` and `#register_variable` to _deregister_ the namespace or variable
  - tracks namespaces and variables registered through those two methods
  - has a new `#reset` method that deregisters all namespaces and variables registered
  - has a new `#node=` method to set the current node being searched
- all the `Searchable` methods ...
  - use a thread-local XPathContext object that is only available in a block yielded by `#get_xpath_context`
  - and that context object is `#reset` when the block returns

Also, the JRuby impl now properly raises XPath::SyntaxError for unbound variables.
  • Loading branch information
flavorjones committed Dec 14, 2024
1 parent cffcbac commit f92c678
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 19 deletions.
2 changes: 1 addition & 1 deletion ext/java/nokogiri/XmlXpathContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public class XmlXpathContext extends RubyObject
final NokogiriXPathFunctionResolver fnResolver = NokogiriXPathFunctionResolver.create(handler);
try {
return tryGetNodeSet(context, expr, fnResolver);
} catch (TransformerException ex) {
} catch (TransformerException | RuntimeException ex) {
throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime,
(expr + ": " + ex.toString()),
ex).toThrowable();
Expand Down
64 changes: 54 additions & 10 deletions ext/nokogiri/xml_xpath_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,20 +124,33 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
* register_ns(prefix, uri) → Nokogiri::XML::XPathContext
*
* Register the namespace with +prefix+ and +uri+ for use in future queries.
* Passing a uri of +nil+ will unregister the namespace.
*
* [Returns] +self+
*/
static VALUE
rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
{
xmlXPathContextPtr c_context;
const xmlChar *ns_uri;

TypedData_Get_Struct(rb_context, xmlXPathContext, &xml_xpath_context_type, c_context);

xmlXPathRegisterNs(c_context,
(const xmlChar *)StringValueCStr(prefix),
(const xmlChar *)StringValueCStr(uri)
);
if (NIL_P(uri)) {
ns_uri = NULL;
} else {
ns_uri = (const xmlChar *)StringValueCStr(uri);
}

xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri);

VALUE registered_namespaces = rb_iv_get(rb_context, "@registered_namespaces");
if (NIL_P(uri)) {
rb_hash_delete(registered_namespaces, prefix);
} else {
rb_hash_aset(registered_namespaces, prefix, Qtrue);
}

return rb_context;
}

Expand All @@ -146,6 +159,7 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
* register_variable(name, value) → Nokogiri::XML::XPathContext
*
* Register the variable +name+ with +value+ for use in future queries.
* Passing a value of +nil+ will unregister the variable.
*
* [Returns] +self+
*/
Expand All @@ -157,13 +171,20 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value

TypedData_Get_Struct(rb_context, xmlXPathContext, &xml_xpath_context_type, c_context);

xmlValue = xmlXPathNewCString(StringValueCStr(value));
if (NIL_P(value)) {
xmlValue = NULL;
} else {
xmlValue = xmlXPathNewCString(StringValueCStr(value));
}

xmlXPathRegisterVariable(
c_context,
(const xmlChar *)StringValueCStr(name),
xmlValue
);
xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue);

VALUE registered_variables = rb_iv_get(rb_context, "@registered_variables");
if (NIL_P(value)) {
rb_hash_delete(registered_variables, name);
} else {
rb_hash_aset(registered_variables, name, Qtrue);
}

return rb_context;
}
Expand Down Expand Up @@ -394,6 +415,7 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)

xmlSetStructuredErrorFunc(NULL, NULL);
xmlSetGenericErrorFunc(NULL, NULL);
xmlXPathRegisterFuncLookup(c_context, NULL, NULL);

if (xpath == NULL) {
rb_exc_raise(rb_ary_entry(errors, 0));
Expand Down Expand Up @@ -447,9 +469,30 @@ rb_xml_xpath_context_new(VALUE klass, VALUE rb_node)
&xml_xpath_context_type,
c_context
);

rb_iv_set(rb_context, "@registered_namespaces", rb_hash_new());
rb_iv_set(rb_context, "@registered_variables", rb_hash_new());

return rb_context;
}


/* :nodoc: */
static VALUE
rb_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node)
{
xmlNodePtr c_node;
xmlXPathContextPtr c_context;

TypedData_Get_Struct(rb_context, xmlXPathContext, &xml_xpath_context_type, c_context);
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);

c_context->doc = c_node->doc;
c_context->node = c_node;

return rb_node;
}

void
noko_init_xml_xpath_context(void)
{
Expand All @@ -465,4 +508,5 @@ noko_init_xml_xpath_context(void)
rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1);
rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2);
rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2);
rb_define_method(cNokogiriXmlXpathContext, "node=", rb_xml_xpath_context_set_node, 1);
}
34 changes: 26 additions & 8 deletions lib/nokogiri/xml/searchable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -261,18 +261,36 @@ def xpath_internal(node, paths, handler, ns, binds)
end

def xpath_impl(node, path, handler, ns, binds)
ctx = get_xpath_context(node)
get_xpath_context(node) do |context|
context.register_namespaces(ns)
context.register_variables(binds)

ctx.register_namespaces(ns)
ctx.register_variables(binds)
path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?

path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?

ctx.evaluate(path, handler)
context.evaluate(path, handler)
end
end

def get_xpath_context(node)
XPathContext.new(node)
if Nokogiri.uses_libxml? && ENV["NOKOGIRI_DEOPTIMIZE_XPATH"].nil? # env var is an escape hatch
# optimized path
def get_xpath_context(node)
context = Thread.current.thread_variable_get(:nokogiri_xpath_context)
if context
context.node = node
else
context = Thread.current.thread_variable_set(:nokogiri_xpath_context, XPathContext.new(node))
end

begin
yield context
ensure
context.reset
end
end
else
def get_xpath_context(node)
yield XPathContext.new(node)
end
end
end
end
Expand Down
22 changes: 22 additions & 0 deletions lib/nokogiri/xml/xpath_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,28 @@ def register_variables(binds)
register_variable(key, value)
end
end

if Nokogiri.uses_libxml?
def reset
return unless

@registered_namespaces.each do |key, _|
register_ns(key, nil)
end
unless @registered_namespaces.empty?
warn "Nokogiri::XML::XPathContext#reset: unexpected registered namespaces: #{@registered_namespaces.keys}"
@registered_namespaces.clear
end

@registered_variables.each do |key, _|
register_variable(key, nil)
end
unless @registered_variables.empty?
warn "Nokogiri::XML::XPathContext#reset: unexpected registered variables: #{@registered_variables.keys}"
@registered_variables.clear
end
end
end
end
end
end
80 changes: 80 additions & 0 deletions test/xml/test_xpath.rb
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,86 @@ def collision(nodes)
assert_equal(3, doc.xpath("//self::*:child").length)
end
end

describe "re-using XPathContext objects within a thread" do
# see https://github.com/sparklemotion/nokogiri/issues/3266
# this optimization is CRuby-only, but we run the tests on JRuby for consistency
let(:doc) {
Nokogiri::XML::Document.parse(<<~XML)
<root xmlns="http://nokogiri.org/default" xmlns:ns1="http://nokogiri.org/ns1">
<child>default</child>
<ns1:child>ns1</ns1:child>
</root>
XML
}

it "clears registered namespaces" do
# make sure the thread-local XPathContext is initialized
doc.xpath("//xmlns:child")

# do namespaces behave the way we expect?
node = doc.at_xpath("//ns:child", { "ns" => "http://nokogiri.org/ns1" })
assert_equal("ns1", node.text)

assert_raises(XPath::SyntaxError) {
doc.at_xpath("//ns:child")
}

node = doc.at_xpath("//child")
assert_nil(node)

# make sure the nokogiri and nokogiri-builting namespaces are re-registered
doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler)
assert_equal(1, @handler.things.length)

if Nokogiri.uses_libxml?
nodes = doc.xpath("//xmlns:child[nokogiri-builtin:local-name-is('child')]")
assert_equal(1, nodes.length)
end
end

it "clears registered handlers and functions" do
# make sure the thread-local XPathContext is initialized
doc.xpath("//xmlns:child")

doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler)
assert_equal(1, @handler.things.length)

assert_raises(XPath::SyntaxError) {
doc.xpath("//xmlns:child[nokogiri:thing(.)]")
}

doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler)
assert_equal(2, @handler.things.length)

if Nokogiri.uses_libxml?
nodes = doc.xpath("//xmlns:child[nokogiri-builtin:local-name-is('child')]")
assert_equal(1, nodes.length)
end
end

it "clears registered variables" do
# make sure the thread-local XPathContext is initialized
doc.xpath("//xmlns:child")

nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Yes")
assert_equal(4, nodes.length)

assert_raises(XPath::SyntaxError) {
@xml.xpath("//address[@domestic=$value]")
}

nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Qwerty")
assert_empty(nodes)

assert_raises(XPath::SyntaxError) {
@xml.xpath("//address[@domestic=$value]")
}

nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Yes")
assert_equal(4, nodes.length)
end
end
end
end
end

0 comments on commit f92c678

Please sign in to comment.