aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-06-08 12:28:42 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-06-08 12:28:42 +0000
commit39a2ad0f4b427e58c934388b59c4a8803aa49ed7 (patch)
treee70acb99167bd9f78c20e5a5b61a57b5e6fb17fb
parentf03b5439f423b1c169b36b564bf55c967d4b12d9 (diff)
downloadruby-39a2ad0f4b427e58c934388b59c4a8803aa49ed7.tar.gz
* string.c: New static function rb_str_ascii_casemap; special-casing
:ascii option in rb_str_upcase_bang and rb_str_downcase_bang. * regenc.c: Fix a bug (wrong use of unnecessary slack at end of string). * regenc.h -> include/ruby/oniguruma.h: Move declaration of onigenc_ascii_only_case_map so that it is visible in string.c. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55329 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--include/ruby/oniguruma.h8
-rw-r--r--regenc.c4
-rw-r--r--regenc.h1
-rw-r--r--string.c40
5 files changed, 51 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 9d00b68313..23806d8355 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Wed Jun 8 21:28:36 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * string.c: New static function rb_str_ascii_casemap; special-casing
+ :ascii option in rb_str_upcase_bang and rb_str_downcase_bang.
+
+ * regenc.c: Fix a bug (wrong use of unnecessary slack at end of string).
+
+ * regenc.h -> include/ruby/oniguruma.h: Move declaration of
+ onigenc_ascii_only_case_map so that it is visible in string.c.
+
Wed Jun 8 20:33:44 2016 Naohisa Goto <ngotogenome@gmail.com>
* include/ruby/intern.h: Remove excess semicolons in PUREFUNC().
diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h
index 51c200a351..523fc7089a 100644
--- a/include/ruby/oniguruma.h
+++ b/include/ruby/oniguruma.h
@@ -229,6 +229,14 @@ ONIG_EXTERN const OnigEncodingType OnigEncodingASCII;
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
+#ifdef ONIG_CASE_MAPPING
+ /* this declaration needs to be here because it is used in string.c */
+ ONIG_EXTERN int onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP,
+ const OnigUChar** pp, const OnigUChar* end,
+ OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc));
+#endif /* ONIG_CASE_MAPPING */
+
/* work size */
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
diff --git a/regenc.c b/regenc.c
index abc0c029c8..d345b0f8d5 100644
--- a/regenc.c
+++ b/regenc.c
@@ -968,9 +968,7 @@ onigenc_ascii_only_case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, cons
OnigCaseFoldType flags = *flagP;
int codepoint_length;
- to_end -= 4; /* longest possible length of a single character */
-
- while (*pp<end && to<=to_end) {
+ while (*pp<end && to<to_end) {
codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
if (codepoint_length < 0)
return codepoint_length; /* encoding invalid */
diff --git a/regenc.h b/regenc.h
index bc4ac4fa33..019bcd9611 100644
--- a/regenc.h
+++ b/regenc.h
@@ -133,7 +133,6 @@ CONSTFUNC(ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype
PUREFUNC(ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc)));
#ifdef ONIG_CASE_MAPPING
- ONIG_EXTERN int onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
ONIG_EXTERN int onigenc_single_byte_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
#endif /* ONIG_CASE_MAPPING */
diff --git a/string.c b/string.c
index 5c62a28fca..e1cedf42aa 100644
--- a/string.c
+++ b/string.c
@@ -5833,6 +5833,29 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
return target;
}
+static void
+rb_str_ascii_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
+{
+ OnigUChar *source_current, *source_end;
+ int old_length = RSTRING_LEN(source);
+ int length_or_invalid;
+
+ if (old_length == 0) return;
+
+ source_current = (OnigUChar*)RSTRING_PTR(source);
+ source_end = (OnigUChar*)RSTRING_END(source);
+
+ length_or_invalid = onigenc_ascii_only_case_map(flags,
+ (const OnigUChar**)&source_current, source_end,
+ source_current, source_end, enc);
+ if (length_or_invalid < 0)
+ rb_raise(rb_eArgError, "input string invalid");
+/* if (length_or_invalid != old_length)
+printf("problem with rb_str_ascii_casemap; old_length=%d, new_length=%d\n", old_length, length_or_invalid),
+ rb_raise(rb_eArgError, "internal problem with rb_str_ascii_casemap");
+*/
+}
+
/*
* call-seq:
* str.upcase! -> str or nil
@@ -5855,7 +5878,8 @@ rb_str_upcase_bang(int argc, VALUE *argv, VALUE str)
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
rb_str_check_dummy_enc(enc);
- if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) {
+ if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1)
+ || (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) {
char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
while (s < send) {
@@ -5914,14 +5938,14 @@ static VALUE
rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
{
rb_encoding *enc;
- int modify = 0;
OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
flags = check_case_options(argc, argv, flags);
str_modify_keep_cr(str);
enc = STR_ENC_GET(str);
rb_str_check_dummy_enc(enc);
- if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) {
+ if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1)
+ || (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) {
char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
while (s < send) {
@@ -5929,17 +5953,17 @@ rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
*s = 'a' + (c - 'A');
- modify = 1;
+ flags |= ONIGENC_CASE_MODIFIED;
}
s++;
}
}
- else {
+ else if (flags&ONIGENC_CASE_ASCII_ONLY)
+ rb_str_ascii_casemap(str, &flags, enc);
+ else
str_shared_replace(str, rb_str_casemap(str, &flags, enc));
- modify = ONIGENC_CASE_MODIFIED & flags;
- }
- if (modify) return str;
+ if (ONIGENC_CASE_MODIFIED&flags) return str;
return Qnil;
}