Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: relink_namespaces handles attributes properly #2310

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ This version of Nokogiri uses [`jar-dependencies`](https://github.com/mkristian/
* [JRuby] `NodeSet#[]` now raises a TypeError if passed an invalid parameter type. [[#2211](https://github.com/sparklemotion/nokogiri/issues/2211)]


### Fixed

* [CRuby] When reparenting HTML nodes, do not attempt to relink namespaces. Previously, an HTML attribute with a colon might be interpreted as a prefixed/namespaced atttribute (for example, "xml:lang"). [[#1790](https://github.com/sparklemotion/nokogiri/issues/1790)]


### Improved

* Serialization of HTML5 documents and fragments has been re-implemented and is ~10x faster than previous versions. [[#2596](https://github.com/sparklemotion/nokogiri/issues/2596), [#2569](https://github.com/sparklemotion/nokogiri/issues/2569)]
Expand Down
42 changes: 22 additions & 20 deletions ext/java/nokogiri/XmlNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -440,24 +440,24 @@ public class XmlNode extends RubyObject
set_namespace(context, ((XmlNode)parent(context)).namespace(context));
}
return;
}

String currentPrefix = e.getParentNode().lookupPrefix(nsURI);
String currentURI = e.getParentNode().lookupNamespaceURI(prefix);
boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI);

// add xmlns attribute if this is a new root node or if the node's
// namespace isn't a default namespace in the new document
if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) {
// this is the root node, so we must set the namespaces attributes anyway
e.setAttribute(prefix == null ? "xmlns" : "xmlns:" + prefix, nsURI);
} else if (prefix == null) {
// this is a default namespace but isn't the default where this node is being added
if (!isDefault) { e.setAttribute("xmlns", nsURI); }
} else if (!prefix.equals(currentPrefix) || nsURI.equals(currentURI)) {
// this is a prefixed namespace
// but doesn't have the same prefix or the prefix is set to a different URI
e.setAttribute("xmlns:" + prefix, nsURI);
} else {
String currentPrefix = e.getParentNode().lookupPrefix(nsURI);
String currentURI = e.getParentNode().lookupNamespaceURI(prefix);
boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI);

// add xmlns attribute if this is a new root node or if the node's
// namespace isn't a default namespace in the new document
if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) {
// this is the root node, so we must set the namespaces attributes anyway
e.setAttribute(prefix == null ? "xmlns" : "xmlns:" + prefix, nsURI);
} else if (prefix == null) {
// this is a default namespace but isn't the default where this node is being added
if (!isDefault) { e.setAttribute("xmlns", nsURI); }
} else if (!prefix.equals(currentPrefix) || nsURI.equals(currentURI)) {
// this is a prefixed namespace
// but doesn't have the same prefix or the prefix is set to a different URI
e.setAttribute("xmlns:" + prefix, nsURI);
}
}

if (e.hasAttributes()) {
Expand Down Expand Up @@ -492,8 +492,10 @@ public class XmlNode extends RubyObject
}
}

if (this.node.hasChildNodes()) {
relink_namespace(context, getChildren());
if (nsURI != null && !nsURI.isEmpty()) {
if (this.node.hasChildNodes()) {
relink_namespace(context, getChildren());
}
}
}

Expand Down
29 changes: 16 additions & 13 deletions ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,25 +153,25 @@ relink_namespace(xmlNodePtr reparented)
}
}

/* Only walk all children if there actually is a namespace we need to */
/* reparent. */
if (NULL == reparented->ns) { return; }

/* When a node gets reparented, walk it's children to make sure that */
/* their namespaces are reparented as well. */
child = reparented->children;
while (NULL != child) {
relink_namespace(child);
child = child->next;
}

if (reparented->type == XML_ELEMENT_NODE) {
attr = reparented->properties;
while (NULL != attr) {
relink_namespace((xmlNodePtr)attr);
attr = attr->next;
}
}

/* Only walk all children if there actually is a namespace we need to */
/* reparent. */
if (reparented->ns) {
/* When a node gets reparented, walk it's children to make sure that */
/* their namespaces are reparented as well. */
child = reparented->children;
while (NULL != child) {
relink_namespace(child);
child = child->next;
}
}
}


Expand Down Expand Up @@ -408,7 +408,10 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func
/* if we've created a cycle, raise an exception */
raise_if_ancestor_of_self(reparented);

relink_namespace(reparented);
/* HTML doesn't have namespaces */
if (!rb_obj_is_kind_of(DOC_RUBY_OBJECT(reparented->doc), cNokogiriHtml4Document)) {
relink_namespace(reparented);
}

return reparented_obj ;
}
Expand Down
23 changes: 1 addition & 22 deletions lib/nokogiri/html5/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,29 +67,8 @@ def fragment(tags)

DocumentFragment.new(document, tags, self)
end

private

# HTML elements can have attributes that contain colons.
# Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
# and tries to create an attribute in a namespace. This is especially
# annoying with attribute names like xml:lang since libxml2 will
# actually create the xml namespace if it doesn't exist already.
def add_child_node_and_reparent_attrs(node)
return super(node) unless document.is_a?(HTML5::Document)

# I'm not sure what this method is supposed to do. Reparenting
# namespaces is handled by libxml2, including child namespaces which
# this method wouldn't handle.
# https://github.com/sparklemotion/nokogiri/issues/1790
add_child_node(node)
# node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
# attr.remove
# ns = attr.namespace
# a["#{ns.prefix}:#{attr.name}"] = attr.value
# end
end
end

# Monkey patch
XML::Node.prepend(HTML5::Node)
end
Expand Down
16 changes: 4 additions & 12 deletions lib/nokogiri/xml/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ def decorate!
def add_child(node_or_tags)
node_or_tags = coerce(node_or_tags)
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
node_or_tags.each { |n| add_child_node(n) }
else
add_child_node_and_reparent_attrs(node_or_tags)
add_child_node(node_or_tags)
end
node_or_tags
end
Expand Down Expand Up @@ -263,9 +263,9 @@ def children=(node_or_tags)
node_or_tags = coerce(node_or_tags)
children.unlink
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
node_or_tags.each { |n| add_child_node(n) }
else
add_child_node_and_reparent_attrs(node_or_tags)
add_child_node(node_or_tags)
end
end

Expand Down Expand Up @@ -1392,14 +1392,6 @@ def inspect_attributes
end

IMPLIED_XPATH_CONTEXTS = [".//"].freeze

def add_child_node_and_reparent_attrs(node)
add_child_node(node)
node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
attr_node.remove
node[attr_node.name] = attr_node.value
end
end
end
end
end
Expand Down
43 changes: 43 additions & 0 deletions test/namespaces/test_namespaces_in_created_doc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,49 @@ def test_created_namespaced_attribute_on_unparented_node
doc.root.add_child(node)
assert_equal("http://foo.io", doc.root.children.first.attribute_nodes.first.namespace.href)
end

def test_created_namespaced_attribute_on_unparented_node_is_renamespaced_in_xml_doc
doc1 = Nokogiri::XML("<root>")
doc2 = Nokogiri::XML("<root>")

child = doc1.create_element("child")
child["tempname"] = "en"
attr = child.attribute("tempname")
attr.name = "xml:lang"
assert_nil(child.attribute_nodes.first.namespace)

doc2.root.add_child(child)
assert(child.attribute_nodes.first.namespace)
end

def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html4_doc
doc1 = Nokogiri::HTML4("<html><body></body></html>")
doc2 = Nokogiri::HTML4("<html><body></body></html>")

child = doc1.create_element("div")
child["tempname"] = "en"
attr = child.attribute("tempname")
attr.name = "xml:lang"
assert_nil(child.attribute_nodes.first.namespace)

doc2.at_css("body").add_child(child)
assert_nil(child.attribute_nodes.first.namespace)
end

def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html5_doc
skip("HTML5 not supported on this platform yet") unless defined?(Nokogiri::HTML5)
doc1 = Nokogiri::HTML5("<html><body></body></html>")
doc2 = Nokogiri::HTML5("<html><body></body></html>")

child = doc1.create_element("div")
child["tempname"] = "en"
attr = child.attribute("tempname")
attr.name = "xml:lang"
assert_nil(child.attribute_nodes.first.namespace)

doc2.at_css("body").add_child(child)
assert_nil(child.attribute_nodes.first.namespace)
end
end
end
end