diff options
author | shyouhei <shyouhei@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-11-21 08:51:39 +0000 |
---|---|---|
committer | shyouhei <shyouhei@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-11-21 08:51:39 +0000 |
commit | 953091a4b1b862340e59ab8a9c9755342f7488c3 (patch) | |
tree | f119733cddad27f873d41c1ddd11d14f6d45fb62 | |
parent | f1ed4b713b2a6adf1cca30eaf0f7874ea4f1577a (diff) | |
download | ruby-953091a4b1b862340e59ab8a9c9755342f7488c3.tar.gz |
char is not unsigned
It seems that decades ago, ruby was written under assumption that
char is unsigned. Which is of course a false assumption. We
need to explicitly store a numeric value into an unsigned char
variable to tell we expect 0..255 value.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65900 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | re.c | 9 | ||||
-rw-r--r-- | string.c | 58 |
2 files changed, 34 insertions, 33 deletions
@@ -2539,7 +2539,7 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc, VALUE buf, rb_encoding **encp, int *has_property, onig_errmsg_buffer err) { - char c; + unsigned char c; char smallbuf[2]; while (p < end) { @@ -2602,8 +2602,9 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc, p = p-2; if (enc == rb_usascii_encoding()) { const char *pbeg = p; - c = read_escaped_byte(&p, end, err); - if (c == (char)-1) return -1; + int byte = read_escaped_byte(&p, end, err); + if (byte == -1) return -1; + c = byte; rb_str_buf_cat(buf, pbeg, p-pbeg); } else { @@ -2652,7 +2653,7 @@ escape_asis: break; default: - rb_str_buf_cat(buf, &c, 1); + rb_str_buf_cat(buf, (char *)&c, 1); break; } } @@ -6190,7 +6190,7 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en unsigned int c; int codelen; size_t hexlen; - char buf[6]; + unsigned char buf[6]; static rb_encoding *enc_utf8 = NULL; switch (*s) { @@ -6208,8 +6208,8 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en case 'b': case 'a': case 'e': - *buf = (char)unescape_ascii(*s); - rb_str_cat(undumped, buf, 1); + *buf = unescape_ascii(*s); + rb_str_cat(undumped, (char *)buf, 1); s++; break; case 'u': @@ -6249,8 +6249,8 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en if (0xd800 <= c && c <= 0xdfff) { rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); } - codelen = rb_enc_mbcput(c, buf, *penc); - rb_str_cat(undumped, buf, codelen); + codelen = rb_enc_mbcput(c, (char *)buf, *penc); + rb_str_cat(undumped, (char *)buf, codelen); s += hexlen; } } @@ -6262,8 +6262,8 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en if (0xd800 <= c && c <= 0xdfff) { rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); } - codelen = rb_enc_mbcput(c, buf, *penc); - rb_str_cat(undumped, buf, codelen); + codelen = rb_enc_mbcput(c, (char *)buf, *penc); + rb_str_cat(undumped, (char *)buf, codelen); s += hexlen; } break; @@ -6279,7 +6279,7 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en if (hexlen != 2) { rb_raise(rb_eRuntimeError, "invalid hex escape"); } - rb_str_cat(undumped, buf, 1); + rb_str_cat(undumped, (char *)buf, 1); s += hexlen; break; default: @@ -6915,7 +6915,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) int cflag = 0; unsigned int c, c0, last = 0; int modify = 0, i, l; - char *s, *send; + unsigned char *s, *send; VALUE hash = 0; int singlebyte = single_byte_optimizable(str); int termlen; @@ -6999,18 +6999,18 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) if (cr == ENC_CODERANGE_VALID && rb_enc_asciicompat(e1)) cr = ENC_CODERANGE_7BIT; str_modify_keep_cr(str); - s = RSTRING_PTR(str); send = RSTRING_END(str); + s = (unsigned char *)RSTRING_PTR(str); send = (unsigned char *)RSTRING_END(str); termlen = rb_enc_mbminlen(enc); if (sflag) { int clen, tlen; long offset, max = RSTRING_LEN(str); unsigned int save = -1; - char *buf = ALLOC_N(char, max + termlen), *t = buf; + unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf; while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); + c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); s += clen; @@ -7046,7 +7046,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) if ((offset = t - buf) + tlen > max) { size_t MAYBE_UNUSED(old) = max + termlen; max = offset + tlen + (send - s); - SIZED_REALLOC_N(buf, char, max + termlen, old); + SIZED_REALLOC_N(buf, unsigned char, max + termlen, old); t = buf + offset; } rb_enc_mbcput(c, t, enc); @@ -7059,8 +7059,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) if (!STR_EMBED_P(str)) { ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); } - TERM_FILL(t, termlen); - RSTRING(str)->as.heap.ptr = buf; + TERM_FILL((char *)t, termlen); + RSTRING(str)->as.heap.ptr = (char *)buf; RSTRING(str)->as.heap.len = t - buf; STR_SET_NOEMBED(str); RSTRING(str)->as.heap.aux.capa = max; @@ -7086,11 +7086,11 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) else { int clen, tlen; long offset, max = (long)((send - s) * 1.2); - char *buf = ALLOC_N(char, max + termlen), *t = buf; + unsigned char *buf = ALLOC_N(unsigned char, max + termlen), *t = buf; while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint_len(s, send, &clen, e1); + c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); if (c < 256) { @@ -7119,7 +7119,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) if ((offset = t - buf) + tlen > max) { size_t MAYBE_UNUSED(old) = max + termlen; max = offset + tlen + (long)((send - s) * 1.2); - SIZED_REALLOC_N(buf, char, max + termlen, old); + SIZED_REALLOC_N(buf, unsigned char, max + termlen, old); t = buf + offset; } if (s != t) { @@ -7135,8 +7135,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) if (!STR_EMBED_P(str)) { ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str)); } - TERM_FILL(t, termlen); - RSTRING(str)->as.heap.ptr = buf; + TERM_FILL((char *)t, termlen); + RSTRING(str)->as.heap.ptr = (char *)buf; RSTRING(str)->as.heap.len = t - buf; STR_SET_NOEMBED(str); RSTRING(str)->as.heap.aux.capa = max; @@ -7405,7 +7405,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) char squeez[TR_TABLE_SIZE]; rb_encoding *enc = 0; VALUE del = 0, nodel = 0; - char *s, *send, *t; + unsigned char *s, *send, *t; int i, modify = 0; int ascompat, singlebyte = single_byte_optimizable(str); unsigned int save; @@ -7426,15 +7426,15 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) } str_modify_keep_cr(str); - s = t = RSTRING_PTR(str); + s = t = (unsigned char *)RSTRING_PTR(str); if (!s || RSTRING_LEN(str) == 0) return Qnil; - send = RSTRING_END(str); + send = (unsigned char *)RSTRING_END(str); save = -1; ascompat = rb_enc_asciicompat(enc); if (singlebyte) { while (s < send) { - unsigned int c = *(unsigned char*)s++; + unsigned int c = *s++; if (c != save || (argc > 0 && !squeez[c])) { *t++ = save = c; } @@ -7445,14 +7445,14 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) unsigned int c; int clen; - if (ascompat && (c = *(unsigned char*)s) < 0x80) { + if (ascompat && (c = *s) < 0x80) { if (c != save || (argc > 0 && !squeez[c])) { *t++ = save = c; } s++; } else { - c = rb_enc_codepoint_len(s, send, &clen, enc); + c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc); if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { if (t != s) rb_enc_mbcput(c, t, enc); @@ -7464,9 +7464,9 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) } } - TERM_FILL(t, TERM_LEN(str)); - if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) { - STR_SET_LEN(str, t - RSTRING_PTR(str)); + TERM_FILL((char *)t, TERM_LEN(str)); + if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) { + STR_SET_LEN(str, (char *)t - RSTRING_PTR(str)); modify = 1; } |