aboutsummaryrefslogtreecommitdiffstats
path: root/string.c
diff options
context:
space:
mode:
authorYusuke Endoh <mame@ruby-lang.org>2019-08-12 23:12:27 +0900
committerYusuke Endoh <mame@ruby-lang.org>2019-08-12 23:28:35 +0900
commit8d302c914c15af4a29c8b8af801281fa117a7ad2 (patch)
treebe29ca349642c0c03ea537746b106461b6388afa /string.c
parent3fc10eff1e6dd63bc0daa753108072954adc89ac (diff)
downloadruby-8d302c914c15af4a29c8b8af801281fa117a7ad2.tar.gz
string.c (rb_str_sub, _gsub): improve the rdoc
This change: * Added an explanation about back references except \n and \k<n> (\` \& \' \+ \0) * Added an explanation about an escape (\\) * Added some rdoc references * Rephrased and clarified the reason why double escape is needed, added some examples, and moved the note to the last (because it is not specific to the method itself).
Diffstat (limited to 'string.c')
-rw-r--r--string.c79
1 files changed, 58 insertions, 21 deletions
diff --git a/string.c b/string.c
index 2890f46e24..77d67577a0 100644
--- a/string.c
+++ b/string.c
@@ -5129,27 +5129,31 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
* Returns a copy of +str+ with the _first_ occurrence of +pattern+
* replaced by the second argument. The +pattern+ is typically a Regexp; if
* given as a String, any regular expression metacharacters it contains will
- * be interpreted literally, e.g. <code>'\\\d'</code> will match a backslash
+ * be interpreted literally, e.g. <code>\d</code> will match a backslash
* followed by 'd', instead of a digit.
*
* If +replacement+ is a String it will be substituted for the matched text.
* It may contain back-references to the pattern's capture groups of the form
- * <code>"\\d"</code>, where <i>d</i> is a group number, or
- * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a
- * double-quoted string, both back-references must be preceded by an
- * additional backslash. However, within +replacement+ the special match
- * variables, such as <code>$&</code>, will not refer to the current match.
- * If +replacement+ is a String that looks like a pattern's capture group but
- * is actually not a pattern capture group e.g. <code>"\\'"</code>, then it
- * will have to be preceded by two backslashes like so <code>"\\\\'"</code>.
+ * <code>\d</code>, where <i>d</i> is a group number, or
+ * <code>\k<n></code>, where <i>n</i> is a group name.
+ * Similarly, <code>\&</code>, <code>\'</code>, <code>\`</code>, and
+ * <code>\+</code> are corresponded to special variables, <code>$&</code>,
+ * <code>$'</code>, <code>$`</code>, and <code>$+</code>, respectively.
+ * (See rdoc-ref:regexp.rdoc in detail.)
+ * <code>\0</code> is the same as <code>\&</code>.
+ * <code>\\\\</code> is interpreted as an escape, i.e., a single backslash.
+ * Note that, within +replacement+ the special match variables, such as
+ * <code>$&</code>, will not refer to the current match.
*
* If the second argument is a Hash, and the matched text is one of its keys,
* the corresponding value is the replacement string.
*
* In the block form, the current match string is passed in as a parameter,
* and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
- * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
- * returned by the block will be substituted for the match on each call.
+ * <code>$&</code>, and <code>$'</code> will be set appropriately.
+ * (See rdoc-ref:regexp.rdoc in detail.)
+ * The value returned by the block will be substituted for the match on each
+ * call.
*
* The result inherits any tainting in the original string or any supplied
* replacement string.
@@ -5160,6 +5164,19 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
* "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
* 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
* #=> "Is /bin/bash your preferred shell?"
+ *
+ * Note that a string literal consumes backslashes.
+ * (See rdoc-ref:syntax/literals.rdoc for the detail of string literals.)
+ * So, back-references are typically preceded by an additional backslash.
+ * For example, if you want to write a back-reference <code>\&</code> in
+ * +replacement+ with a double-quoted string literal, you need to write:
+ * <code>"..\\\\&.."</code>.
+ * If you want to write a non-back-reference string <code>\&</code> in
+ * +replacement+, you need first to escape the backslash to prevent
+ * this method from interpreting it as a back-reference, and then you
+ * need to escape the backslashes again to prevent a string literal from
+ * consuming them: <code>"..\\\\\\\\&.."</code>.
+ * You may want to use the block form to avoid a lot of backslashes.
*/
static VALUE
@@ -5332,24 +5349,31 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
* <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
* typically a Regexp; if given as a String, any
* regular expression metacharacters it contains will be interpreted
- * literally, e.g. <code>'\\\d'</code> will match a backslash followed by 'd',
+ * literally, e.g. <code>\d</code> will match a backslash followed by 'd',
* instead of a digit.
*
- * If <i>replacement</i> is a String it will be substituted for
- * the matched text. It may contain back-references to the pattern's capture
- * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
- * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
- * double-quoted string, both back-references must be preceded by an
- * additional backslash. However, within <i>replacement</i> the special match
- * variables, such as <code>$&</code>, will not refer to the current match.
+ * If +replacement+ is a String it will be substituted for the matched text.
+ * It may contain back-references to the pattern's capture groups of the form
+ * <code>\d</code>, where <i>d</i> is a group number, or
+ * <code>\k<n></code>, where <i>n</i> is a group name.
+ * Similarly, <code>\&</code>, <code>\'</code>, <code>\`</code>, and
+ * <code>\+</code> are corresponded to special variables, <code>$&</code>,
+ * <code>$'</code>, <code>$`</code>, and <code>$+</code>, respectively.
+ * (See rdoc-ref:regexp.rdoc in detail.)
+ * <code>\0</code> is the same as <code>\&</code>.
+ * <code>\\\\</code> is interpreted as an escape, i.e., a single backslash.
+ * Note that, within +replacement+ the special match variables, such as
+ * <code>$&</code>, will not refer to the current match.
*
* If the second argument is a Hash, and the matched text is one
* of its keys, the corresponding value is the replacement string.
*
* In the block form, the current match string is passed in as a parameter,
* and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
- * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
- * returned by the block will be substituted for the match on each call.
+ * <code>$&</code>, and <code>$'</code> will be set appropriately.
+ * (See rdoc-ref:regexp.rdoc in detail.)
+ * The value returned by the block will be substituted for the match on each
+ * call.
*
* The result inherits any tainting in the original string or any supplied
* replacement string.
@@ -5362,6 +5386,19 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
* "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
* "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
* 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
+ *
+ * Note that a string literal consumes backslashes.
+ * (See rdoc-ref:syntax/literals.rdoc for the detail of string literals.)
+ * So, back-references are typically preceded by an additional backslash.
+ * For example, if you want to write a back-reference <code>\&</code> in
+ * +replacement+ with a double-quoted string literal, you need to write:
+ * <code>"..\\\\&.."</code>.
+ * If you want to write a non-back-reference string <code>\&</code> in
+ * +replacement+, you need first to escape the backslash to prevent
+ * this method from interpreting it as a back-reference, and then you
+ * need to escape the backslashes again to prevent a string literal from
+ * consuming them: <code>"..\\\\\\\\&.."</code>.
+ * You may want to use the block form to avoid a lot of backslashes.
*/
static VALUE