diff --git a/README.md b/README.md
index ea6af5f..162d8cc 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,8 @@ backtracking regular expression engines like those used in PCRE, Perl, and
Python".
**Current version:** 2.4.3
-**Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
**Bundled RE2 version:** libre2.11 (2023-11-01)
+**Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
**Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
Installation
@@ -68,9 +68,10 @@ Documentation
Full documentation automatically generated from the latest version is
available at .
-Note that RE2's regular expression syntax differs from PCRE and Ruby's
-built-in [`Regexp`][Regexp] library, see the [official syntax page][] for more
-details.
+> [!IMPORTANT]
+> Note that RE2's regular expression syntax differs from PCRE and Ruby's
+> built-in [`Regexp`][Regexp] library, see the [official syntax page][] for
+> more details.
Usage
-----
@@ -80,27 +81,19 @@ library (with [`Regexp`](http://mudge.name/re2/RE2/Regexp.html) and
[`MatchData`](http://mudge.name/re2/RE2/MatchData.html)), its API is slightly
different:
-```console
-$ irb -rubygems
-> require 're2'
-> r = RE2::Regexp.new('w(\d)(\d+)')
-=> #
-> m = r.match("w1234")
-=> #
-> m[1]
-=> "1"
-> m.string
-=> "w1234"
-> m.begin(1)
-=> 1
-> m.end(1)
-=> 2
-> r =~ "w1234"
-=> true
-> r !~ "bob"
-=> true
-> r.match("bob")
-=> nil
+```ruby
+require "re2"
+
+r = RE2::Regexp.new('w(\d)(\d+)') # => #
+m = r.match("w1234") # => #
+m[1] # => "1"
+
+# Improve performance by requesting fewer submatches
+m = r.match("w1234", 1) # => #
+
+# Or no submatches at all
+r.match("w1234", 0) # => true
+r =~ "w1234" # => true
```
As
@@ -109,30 +102,25 @@ As
defined against `Kernel` so you can use a shorter version to create regular
expressions:
-```console
-> RE2('(\d+)')
-=> #
+```ruby
+RE2('(\d+)') # => #
```
Note the use of *single quotes* as double quotes will interpret `\d` as `d` as
in the following example:
-```console
-> RE2("(\d+)")
-=> #
+```ruby
+RE2("(\d+)") # => #
```
As of 0.3.0, you can use named groups:
-```console
-> r = RE2::Regexp.new('(?P\w+) (?P\d+)')
-=> #\w+) (?P\d+)/>
-> m = r.match("Bob 40")
-=> #
-> m[:name]
-=> "Bob"
-> m["age"]
-=> "40"
+```ruby
+r = RE2::Regexp.new('(?P\w+) (?P\d+)')
+# => #\w+) (?P\d+)/>
+m = r.match("Bob 40") # => #
+m[:name] # => "Bob"
+m["age"] # => "40"
```
As of 0.6.0, you can use `RE2::Regexp#scan` to incrementally scan text for
@@ -197,9 +185,10 @@ end
Encoding
--------
-Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
-returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
-`RE2::Regexp` is set to false (any other encoding's behaviour is undefined).
+> [!IMPORTANT]
+> Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
+> returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
+> `RE2::Regexp` is set to false (any other encoding's behaviour is undefined).
For backward compatibility: re2 won't automatically convert string inputs to
the right encoding so this is the responsibility of the caller, e.g.
@@ -220,8 +209,8 @@ Features
`RE2::Regexp.compile(re)` or `RE2(re)` (including specifying options, e.g.
`RE2::Regexp.new("pattern", :case_sensitive => false)`
-* Extracting matches with `re2.match(text)` (and an exact number of matches
- with `re2.match(text, number_of_matches)` such as `re2.match("123-234", 2)`)
+* Extracting matches with `re2.match(text)` (and an exact number of submatches
+ with `re2.match(text, number_of_submatches)` such as `re2.match("123-234", 2)`)
* Extracting matches by name (both with strings and symbols)
diff --git a/ext/re2/re2.cc b/ext/re2/re2.cc
index 1815bf3..c59bb54 100644
--- a/ext/re2/re2.cc
+++ b/ext/re2/re2.cc
@@ -1309,15 +1309,20 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
}
/*
- * Match the pattern against the given +text+ and return either
- * a boolean (if no submatches are required) or a {RE2::MatchData}
- * instance.
+ * Match the pattern against the given +text+ and return either a boolean (if
+ * no submatches are required) or a {RE2::MatchData} instance with the
+ * specified number of submatches (defaults to the total number of capturing
+ * groups).
+ *
+ * The number of submatches has a significant impact on performance: requesting
+ * one submatch is much faster than requesting more than one and requesting
+ * zero submatches is faster still.
*
* @return [Boolean, RE2::MatchData]
*
* @overload match(text)
* Returns an {RE2::MatchData} containing the matching pattern and all
- * subpatterns resulting from looking for the regexp in +text+ if the pattern
+ * submatches resulting from looking for the regexp in +text+ if the pattern
* contains capturing groups.
*
* Returns either true or false indicating whether a successful match was
@@ -1326,7 +1331,7 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
* @param [String] text the text to search
* @return [RE2::MatchData] if the pattern contains capturing groups
* @return [Boolean] if the pattern does not contain capturing groups
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
+ * @raise [NoMemoryError] if there was not enough memory to allocate the submatches
* @example Matching with capturing groups
* r = RE2::Regexp.new('w(o)(o)')
* r.match('woo') #=> #
@@ -1340,20 +1345,20 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
*
* @param [String] text the text to search
* @return [Boolean] whether the match was successful
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
+ * @raise [NoMemoryError] if there was not enough memory to allocate the submatches
* @example
* r = RE2::Regexp.new('w(o)(o)')
* r.match('woo', 0) #=> true
* r.match('bob', 0) #=> false
*
- * @overload match(text, number_of_matches)
+ * @overload match(text, number_of_submatches)
* See +match(text)+ but with a specific number of
- * matches returned (padded with nils if necessary).
+ * submatches returned (padded with nils if necessary).
*
* @param [String] text the text to search
- * @param [Integer] number_of_matches the number of matches to return
- * @return [RE2::MatchData] the matches
- * @raise [ArgumentError] if given a negative number of matches
+ * @param [Integer] number_of_submatches the number of submatches to return
+ * @return [RE2::MatchData] the submatches
+ * @raise [ArgumentError] if given a negative number of submatches
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
* @example
* r = RE2::Regexp.new('w(o)(o)')
@@ -1363,9 +1368,9 @@ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
re2_pattern *p;
re2_matchdata *m;
- VALUE text, number_of_matches;
+ VALUE text, number_of_submatches;
- rb_scan_args(argc, argv, "11", &text, &number_of_matches);
+ rb_scan_args(argc, argv, "11", &text, &number_of_submatches);
/* Ensure text is a string. */
StringValue(text);
@@ -1374,8 +1379,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
int n;
- if (RTEST(number_of_matches)) {
- n = NUM2INT(number_of_matches);
+ if (RTEST(number_of_submatches)) {
+ n = NUM2INT(number_of_submatches);
if (n < 0) {
rb_raise(rb_eArgError, "number of matches should be >= 0");