diff --git a/ext/nokogiri/html4_document.c b/ext/nokogiri/html4_document.c
index 2a308a109f9..e3e0ee0847f 100644
--- a/ext/nokogiri/html4_document.c
+++ b/ext/nokogiri/html4_document.c
@@ -7,9 +7,9 @@ static ID id_to_s;
/*
* call-seq:
- * new
+ * new(uri=nil, external_id=nil) → HTML4::Document
*
- * Create a new document
+ * Create a new empty document with base URI +uri+ and external ID +external_id+.
*/
static VALUE
rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c
index 66c40bda263..a43813b9ceb 100644
--- a/ext/nokogiri/nokogiri.c
+++ b/ext/nokogiri/nokogiri.c
@@ -185,8 +185,8 @@ Init_nokogiri(void)
{
mNokogiri = rb_define_module("Nokogiri");
mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
- mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
- mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c
index e1022f67f5f..740819300b5 100644
--- a/ext/nokogiri/xml_document.c
+++ b/ext/nokogiri/xml_document.c
@@ -370,6 +370,8 @@ noko_xml_document_s_read_io(VALUE rb_class,
VALUE rb_encoding,
VALUE rb_options)
{
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
+ private. */
libxmlStructuredErrorHandlerState handler_state;
VALUE rb_errors = rb_ary_new();
@@ -417,6 +419,8 @@ noko_xml_document_s_read_memory(VALUE rb_class,
VALUE rb_encoding,
VALUE rb_options)
{
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
+ private. */
VALUE rb_errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
@@ -444,9 +448,9 @@ noko_xml_document_s_read_memory(VALUE rb_class,
/*
* call-seq:
- * new(version = default)
+ * new(version = "1.0")
*
- * Create a new document with +version+ (defaults to "1.0")
+ * Create a new empty document declaring XML version +version+.
*/
static VALUE
new (int argc, VALUE *argv, VALUE klass)
@@ -756,9 +760,7 @@ void
noko_init_xml_document(void)
{
assert(cNokogiriXmlNode);
- /*
- * Nokogiri::XML::Document wraps an xml document.
- */
+
cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
rb_define_alloc_func(cNokogiriXmlDocument, _xml_document_alloc);
diff --git a/lib/nokogiri/html5.rb b/lib/nokogiri/html5.rb
index 9ca26db494a..5566e058ce0 100644
--- a/lib/nokogiri/html5.rb
+++ b/lib/nokogiri/html5.rb
@@ -46,11 +46,11 @@ def self.HTML5(...)
# The document and fragment parsing methods support options that are different from
# Nokogiri::HTML4::Document or Nokogiri::XML::Document.
#
- # - Nokogiri.HTML5(html, url:, encoding:, **parse_options)
- # - Nokogiri::HTML5.parse(html, url:, encoding:, **parse_options)
- # - Nokogiri::HTML5::Document.parse(html, url:, encoding:, **parse_options)
- # - Nokogiri::HTML5.fragment(html, encoding = nil, **parse_options)
- # - Nokogiri::HTML5::DocumentFragment.parse(html, encoding = nil, **parse_options)
+ # - Nokogiri.HTML5(input, url:, encoding:, **parse_options)
+ # - Nokogiri::HTML5.parse(input, url:, encoding:, **parse_options)
+ # - Nokogiri::HTML5::Document.parse(input, url:, encoding:, **parse_options)
+ # - Nokogiri::HTML5.fragment(input, encoding:, **parse_options)
+ # - Nokogiri::HTML5::DocumentFragment.parse(input, encoding:, **parse_options)
#
# The four currently supported parse options are
#
diff --git a/lib/nokogiri/html5/document.rb b/lib/nokogiri/html5/document.rb
index 51b77c23b36..8bb4f10d28c 100644
--- a/lib/nokogiri/html5/document.rb
+++ b/lib/nokogiri/html5/document.rb
@@ -50,8 +50,9 @@ class Document < Nokogiri::HTML4::Document
class << self
# :call-seq:
- # parse(input) { |parse_options| ... }
- # parse(input, url:, encoding:, **parse_options)
+ # parse(input) { |options| ... } → HTML5::Document
+ # parse(input, url: encoding:) { |options| ... } → HTML5::Document
+ # parse(input, **options) → HTML5::Document
#
# Parse \HTML input with a parser compliant with the HTML5 spec. This method uses the
# encoding of +input+ if it can be determined, or else falls back to the +encoding:+
@@ -62,11 +63,25 @@ class << self
#
# [Optional Parameters]
# - +url:+ (String) the base URI of the document.
- # - +encoding+ (Encoding) The encoding that should be used when processing the
- # document. This option is only used as a fallback when the encoding of +input+ cannot be
- # determined.
- # - +parse_options+ (Hash) represents keywords arguments that control the behavior of the
- # parser. See rdoc-ref:HTML5@Parsing+options for a list of available options.
+ #
+ # [Optional Keyword Arguments]
+ # - +encoding:+ (Encoding) The name of the encoding that should be used when processing the
+ # document. When not provided, the encoding will be determined based on the document
+ # content.
+ #
+ # - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
+ # +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
+ #
+ # - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
+ # +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
+ #
+ # - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
+ # element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
+ #
+ # - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
+ # elements as text. (default +false+)
+ #
+ # See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
#
# [Yields]
# If present, the block will be passed a Hash object to modify with parse options before the
diff --git a/lib/nokogiri/xml/document.rb b/lib/nokogiri/xml/document.rb
index 2ef98c9dd47..6c9d4949a12 100644
--- a/lib/nokogiri/xml/document.rb
+++ b/lib/nokogiri/xml/document.rb
@@ -5,12 +5,12 @@
module Nokogiri
module XML
- # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
- # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
- # on parsing.
+ # Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
+ # is created by parsing \XML content from a String or an IO object. See
+ # Nokogiri::XML::Document.parse for more information on parsing.
#
- # For searching a Document, see Nokogiri::XML::Searchable#css and
- # Nokogiri::XML::Searchable#xpath
+ # Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
+ # please read that class's documentation as well.
class Document < Nokogiri::XML::Node
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
@@ -25,34 +25,34 @@ class Document < Nokogiri::XML::Node
class << self
# call-seq:
- # parse(input, url: nil, encoding: nil, options: DEFAULT_XML) { |options| } => Nokogiri::XML::Document
+ # parse(input) { |options| ... } => Nokogiri::XML::Document
+ # parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
#
- # Parse XML input from a String or IO object, and return a new Document object.
+ # Parse \XML input from a String or IO object, and return a new XML::Document.
#
- # By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
+ # 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
# or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
# that module's DEFAULT_XML constant for what's set (and not set) by default.
#
- # See also: Nokogiri.XML() which is a convenience method which will call this method.
+ # [Required Parameters]
+ # - +input+ (String | IO) The content to be parsed.
#
- # [Parameters]
- # - +input+ (String, IO) The content to be parsed.
- #
- # [Keyword arguments]
- # - +url:+ (String) The URI where this document is located.
+ # [Optional Keyword Arguments]
+ # - +url:+ (String) The base URI for this document.
#
# - +encoding:+ (String) The name of the encoding that should be used when processing the
- # document. (default +nil+ means that the encoding will be determined based on the
- # document content)
+ # document. When not provided, the encoding will be determined based on the document
+ # content.
#
- # - +options+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
- # behaviors during parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
- # Nokogiri::XML::ParseOptions for more information.
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
+ # behaviors during parsing. See ParseOptions for more information. The default value is
+ # +ParseOptions::DEFAULT_XML+.
#
# [Yields]
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
- # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
#
+ # [Returns] Nokogiri::XML::Document
def parse(
string_or_io,
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
@@ -72,6 +72,7 @@ def parse(
end
doc = if string_or_io.respond_to?(:read)
+ # TODO: should we instead check for respond_to?(:to_path) ?
if string_or_io.is_a?(Pathname)
# resolve the Pathname to the file and open it as an IO object, see #2110
string_or_io = string_or_io.expand_path.open