diff --git a/CHANGELOG.md b/CHANGELOG.md index 025306b9a0f..7be0b055201 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ We've resolved many long-standing bugs in the various schema classes, validation * Introduce support for a new SAX callback `XML::SAX::Document#reference`, which is called to report some parsed XML entities when `XML::SAX::ParserContext#replace_entities` is set to the default value `false`. This is necessary functionality for some applications that were previously relying on incorrect entity error reporting which has been fixed (see below). For more information, read the docs for `Nokogiri::XML::SAX::Document`. [#1926] @flavorjones * `XML::SAX::Parser#parse_memory` and `#parse_file` now accept an optional `encoding` argument. When not provided, the parser will fall back to the encoding passed to the initializer, and then fall back to autodetection. [#3288] @flavorjones * `XML::SAX::ParserContext.memory` now accepts an optional `encoding` argument. When not provided, the encoding will be autodetected. [#3288] @flavorjones +* `XML::DocumentFragment#parse_options` and `HTML4::DocumentFragment#parse_options` return the options used to parse the document fragment. @flavorjones * [CRuby] `Nokogiri::HTML5::Builder` is similar to `HTML4::Builder` but returns an `HTML5::Document`. [#3119] @flavorjones * [CRuby] Attributes in an HTML5 document can be serialized individually, something that has always been supported by the HTML4 serializer. [#3125, #3127] @flavorjones * [CRuby] Introduce a compile-time option, `--disable-xml2-legacy`, to remove from libxml2 its dependencies on `zlib` and `liblzma` and disable implicit `HTTP` network requests. These all remain enabled by default, and are present in the precompiled native gems. This option is a precursor for removing these libraries in a future major release, but may be interesting for the security-minded who do not need features like automatic decompression and would like to remove these dependencies. You can read more and give feedback on these plans in #3168. [#3247] @flavorjones diff --git a/lib/nokogiri/html4/document_fragment.rb b/lib/nokogiri/html4/document_fragment.rb index 1681822acbb..eae79bcb14a 100644 --- a/lib/nokogiri/html4/document_fragment.rb +++ b/lib/nokogiri/html4/document_fragment.rb @@ -91,6 +91,7 @@ def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEF return self unless tags options = Nokogiri::XML::ParseOptions.new(options) if Integer === options + @parse_options = options yield options if block_given? if ctx diff --git a/lib/nokogiri/xml/document_fragment.rb b/lib/nokogiri/xml/document_fragment.rb index 40cc8f4fa11..dbdc46b4243 100644 --- a/lib/nokogiri/xml/document_fragment.rb +++ b/lib/nokogiri/xml/document_fragment.rb @@ -4,6 +4,11 @@ module Nokogiri module XML class DocumentFragment < Nokogiri::XML::Node + # The options used to parse the document fragment. Returns the value of any options that were + # passed into the constructor as a parameter or set in a config block, else the default + # options for the specific subclass. + attr_reader :parse_options + #### # Create a Nokogiri::XML::DocumentFragment from +tags+ def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block) @@ -20,6 +25,7 @@ def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_ return self unless tags options = Nokogiri::XML::ParseOptions.new(options) if Integer === options + @parse_options = options yield options if block_given? children = if ctx diff --git a/test/html4/sax/test_document_error.rb b/test/html4/sax/test_document_error.rb index 513164148fd..fdd15ef694a 100644 --- a/test/html4/sax/test_document_error.rb +++ b/test/html4/sax/test_document_error.rb @@ -20,15 +20,10 @@ def start_document end def test_warning_document_encounters_error_but_terminates_normally - # Probably I'm doing something wrong, but I can't make nekohtml report errors, - # despite setting http://cyberneko.org/html/features/report-errors. - # See https://nekohtml.sourceforge.net/settings.html for more info. - # I'd love some help here if someone finds this comment and cares enough to dig in. - skip_unless_libxml2("nekohtml sax parser does not seem to report errors?") - warning_parser = Nokogiri::HTML4::SAX::Parser.new(Nokogiri::SAX::TestCase::Doc.new) warning_parser.parse("
<test paragraph\r\nfoo bar
\r\n\r\n" nokogiri = Nokogiri::HTML4.parse(html) - if RUBY_PLATFORM.include?("java") - # NKF linebreak modes are not supported as of jruby 1.2 - # see http://jira.codehaus.org/browse/JRUBY-3602 for status + if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.14.0") assert_equal( "testparagraph\nfoobar
", nokogiri.at("p").to_html.delete(" "), diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb index 4a8e1fb40c6..abb11161a34 100644 --- a/test/xml/test_node.rb +++ b/test/xml/test_node.rb @@ -105,9 +105,19 @@ def test_node_context_parsing_of_malformed_html_fragment context_node = doc.at_css("div") nodeset = context_node.parse("