diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-01-08 20:42:45 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-01-08 20:42:45 +0000 |
commit | 800f04c6a521c007e9837813b3564ae6b491e31c (patch) | |
tree | e91082d62cad55b091d9e7f42a1dad6090dc227e /string.c | |
parent | 5393622dad7f5204333b8aa6a39263a961bdc77a (diff) | |
download | ruby-800f04c6a521c007e9837813b3564ae6b491e31c.tar.gz |
* numeric.c (rb_enc_uint_char): raise RangeError when added codepoint
is invalid. [Feature #5855] [Bug #5863] [Bug #5864]
* string.c (rb_str_concat): ditto.
* string.c (rb_str_concat): set encoding as ASCII-8BIT when the string
is US-ASCII and the argument is an integer greater than 127.
* regenc.c (onigenc_mb2_code_to_mbclen): rearrange error code.
* enc/euc_jp.c (code_to_mbclen): ditto.
* enc/shift_jis.c (code_to_mbclen): ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@34236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 43 |
1 files changed, 34 insertions, 9 deletions
@@ -2074,10 +2074,11 @@ rb_str_append(VALUE str, VALUE str2) VALUE rb_str_concat(VALUE str1, VALUE str2) { - unsigned int lc; + unsigned int code; + rb_encoding *enc = STR_ENC_GET(str1); if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) { - if (rb_num_to_uint(str2, &lc) == 0) { + if (rb_num_to_uint(str2, &code) == 0) { } else if (FIXNUM_P(str2)) { rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2)); @@ -2089,22 +2090,46 @@ rb_str_concat(VALUE str1, VALUE str2) else { return rb_str_append(str1, str2); } - { - rb_encoding *enc = STR_ENC_GET(str1); + + if (enc == rb_usascii_encoding()) { + /* US-ASCII automatically extended to ASCII-8BIT */ + char buf[1] = {(char)code}; + if (code > 0xFF) { + rb_raise(rb_eRangeError, "%u out of char range", code); + } + rb_str_cat(str1, buf, 1); + if (code > 127) { + rb_enc_associate(str1, rb_ascii8bit_encoding()); + ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID); + } + } + else { long pos = RSTRING_LEN(str1); int cr = ENC_CODERANGE(str1); int len; + char *buf; - if ((len = rb_enc_codelen(lc, enc)) <= 0) { - rb_raise(rb_eRangeError, "%u invalid char", lc); + switch (len = rb_enc_codelen(code, enc)) { + case ONIGERR_INVALID_CODE_POINT_VALUE: + rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc)); + break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: + case 0: + rb_raise(rb_eRangeError, "%u out of char range", code); + break; + } + buf = ALLOCA_N(char, len + 1); + rb_enc_mbcput(code, buf, enc); + if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) { + rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc)); } rb_str_resize(str1, pos+len); - rb_enc_mbcput(lc, RSTRING_PTR(str1)+pos, enc); - if (cr == ENC_CODERANGE_7BIT && lc > 127) + strncpy(RSTRING_PTR(str1) + pos, buf, len); + if (cr == ENC_CODERANGE_7BIT && code > 127) cr = ENC_CODERANGE_VALID; ENC_CODERANGE_SET(str1, cr); - return str1; } + return str1; } /* |