Skip to content

Commit

Permalink
fix: skip relinking namespaces in HTML4/5 documents
Browse files Browse the repository at this point in the history
Also fix both implementations of relink_namespaces to not skip
attributes when the node itself doesn't have a namespace.

Finally, the private method
XML::Node#add_child_node_and_reparent_attrs is no longer needed after
fixing relink_namespaces, so this commit essentially reverts 9fd03d8.
  • Loading branch information
flavorjones committed Aug 14, 2021
1 parent e9d52d2 commit cf0392e
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 77 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA

## next / unreleased

### Fixed

* [CRuby] When reparenting HTML nodes, do not attempt to relink namespaces. Previously, an HTML attribute with a colon might be interpreted as a prefixed/namespaced atttribute (for example, "xml:lang"). [[#1790](https://github.com/sparklemotion/nokogiri/issues/1790)]


### Improved

* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)]
Expand Down
42 changes: 22 additions & 20 deletions ext/java/nokogiri/XmlNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -421,24 +421,24 @@ public class XmlNode extends RubyObject
String nsURI = e.lookupNamespaceURI(prefix);
this.node = NokogiriHelpers.renameNode(e, nsURI, e.getNodeName());

if (nsURI == null || nsURI.isEmpty()) { return; }

String currentPrefix = e.getParentNode().lookupPrefix(nsURI);
String currentURI = e.getParentNode().lookupNamespaceURI(prefix);
boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI);

// add xmlns attribute if this is a new root node or if the node's
// namespace isn't a default namespace in the new document
if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) {
// this is the root node, so we must set the namespaces attributes anyway
e.setAttribute(prefix == null ? "xmlns" : "xmlns:" + prefix, nsURI);
} else if (prefix == null) {
// this is a default namespace but isn't the default where this node is being added
if (!isDefault) { e.setAttribute("xmlns", nsURI); }
} else if (!prefix.equals(currentPrefix) || nsURI.equals(currentURI)) {
// this is a prefixed namespace
// but doesn't have the same prefix or the prefix is set to a different URI
e.setAttribute("xmlns:" + prefix, nsURI);
if (nsURI != null && !nsURI.isEmpty()) {
String currentPrefix = e.getParentNode().lookupPrefix(nsURI);
String currentURI = e.getParentNode().lookupNamespaceURI(prefix);
boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI);

// add xmlns attribute if this is a new root node or if the node's
// namespace isn't a default namespace in the new document
if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) {
// this is the root node, so we must set the namespaces attributes anyway
e.setAttribute(prefix == null ? "xmlns" : "xmlns:" + prefix, nsURI);
} else if (prefix == null) {
// this is a default namespace but isn't the default where this node is being added
if (!isDefault) { e.setAttribute("xmlns", nsURI); }
} else if (!prefix.equals(currentPrefix) || nsURI.equals(currentURI)) {
// this is a prefixed namespace
// but doesn't have the same prefix or the prefix is set to a different URI
e.setAttribute("xmlns:" + prefix, nsURI);
}
}

if (e.hasAttributes()) {
Expand Down Expand Up @@ -473,8 +473,10 @@ public class XmlNode extends RubyObject
}
}

if (this.node.hasChildNodes()) {
relink_namespace(context, getChildren());
if (nsURI != null && !nsURI.isEmpty()) {
if (this.node.hasChildNodes()) {
relink_namespace(context, getChildren());
}
}
}

Expand Down
29 changes: 16 additions & 13 deletions ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,25 +115,25 @@ relink_namespace(xmlNodePtr reparented)
}
}

/* Only walk all children if there actually is a namespace we need to */
/* reparent. */
if (NULL == reparented->ns) { return; }

/* When a node gets reparented, walk it's children to make sure that */
/* their namespaces are reparented as well. */
child = reparented->children;
while (NULL != child) {
relink_namespace(child);
child = child->next;
}

if (reparented->type == XML_ELEMENT_NODE) {
attr = reparented->properties;
while (NULL != attr) {
relink_namespace((xmlNodePtr)attr);
attr = attr->next;
}
}

/* Only walk all children if there actually is a namespace we need to */
/* reparent. */
if (reparented->ns) {
/* When a node gets reparented, walk it's children to make sure that */
/* their namespaces are reparented as well. */
child = reparented->children;
while (NULL != child) {
relink_namespace(child);
child = child->next;
}
}
}

/* :nodoc: */
Expand Down Expand Up @@ -351,7 +351,10 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func
*/
DATA_PTR(reparentee_obj) = reparented ;

relink_namespace(reparented);
/* HTML doesn't have namespaces */
if (!rb_obj_is_kind_of(DOC_RUBY_OBJECT(reparented->doc), cNokogiriHtml4Document)) {
relink_namespace(reparented);
}

reparented_obj = noko_xml_node_wrap(Qnil, reparented);

Expand Down
22 changes: 1 addition & 21 deletions lib/nokogiri/html5/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,28 +63,8 @@ def fragment(tags)
return super(tags) unless document.is_a?(HTML5::Document)
DocumentFragment.new(document, tags, self)
end

private

# HTML elements can have attributes that contain colons.
# Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
# and tries to create an attribute in a namespace. This is especially
# annoying with attribute names like xml:lang since libxml2 will
# actually create the xml namespace if it doesn't exist already.
def add_child_node_and_reparent_attrs(node)
return super(node) unless document.is_a?(HTML5::Document)
# I'm not sure what this method is supposed to do. Reparenting
# namespaces is handled by libxml2, including child namespaces which
# this method wouldn't handle.
# https://github.com/sparklemotion/nokogiri/issues/1790
add_child_node(node)
# node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
# attr.remove
# ns = attr.namespace
# a["#{ns.prefix}:#{attr.name}"] = attr.value
# end
end
end

# Monkey patch
XML::Node.prepend(HTML5::Node)
end
Expand Down
16 changes: 4 additions & 12 deletions lib/nokogiri/xml/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ def >(selector)
def add_child(node_or_tags)
node_or_tags = coerce(node_or_tags)
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
node_or_tags.each { |n| add_child_node n }
else
add_child_node_and_reparent_attrs node_or_tags
add_child_node node_or_tags
end
node_or_tags
end
Expand Down Expand Up @@ -248,9 +248,9 @@ def children=(node_or_tags)
node_or_tags = coerce(node_or_tags)
children.unlink
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
node_or_tags.each { |n| add_child_node n }
else
add_child_node_and_reparent_attrs node_or_tags
add_child_node node_or_tags
end
node_or_tags
end
Expand Down Expand Up @@ -1223,14 +1223,6 @@ def inspect_attributes

# @private
IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze

def add_child_node_and_reparent_attrs(node)
add_child_node node
node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
attr_node.remove
node[attr_node.name] = attr_node.value
end
end
end
end
end
Expand Down
22 changes: 11 additions & 11 deletions test/namespaces/test_namespaces_in_created_doc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,19 @@ def test_created_namespaced_attribute_on_unparented_node_is_renamespaced_in_xml_
assert(child.attribute_nodes.first.namespace)
end

# def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html4_doc
# doc1 = Nokogiri::HTML4("<html><body></body></html>")
# doc2 = Nokogiri::HTML4("<html><body></body></html>")
def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html4_doc
doc1 = Nokogiri::HTML4("<html><body></body></html>")
doc2 = Nokogiri::HTML4("<html><body></body></html>")

# child = doc1.create_element("div")
# child["tempname"] = "en"
# attr = child.attribute("tempname")
# attr.name = "xml:lang"
# assert_nil(child.attribute_nodes.first.namespace)
child = doc1.create_element("div")
child["tempname"] = "en"
attr = child.attribute("tempname")
attr.name = "xml:lang"
assert_nil(child.attribute_nodes.first.namespace)

# doc2.at_css("body").add_child(child)
# assert_nil(child.attribute_nodes.first.namespace)
# end
doc2.at_css("body").add_child(child)
assert_nil(child.attribute_nodes.first.namespace)
end

def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html5_doc
skip("HTML5 not supported on this platform yet") unless defined?(Nokogiri::HTML5)
Expand Down

0 comments on commit cf0392e

Please sign in to comment.