From f1b7e60cb90a7e1a392d4ffccd07dd06eeff5345 Mon Sep 17 00:00:00 2001 From: akr Date: Sat, 8 Dec 2007 02:50:43 +0000 Subject: * encoding.c (rb_enc_mbclen): make it never fail. (rb_enc_nth): don't check the return value of rb_enc_mbclen. (rb_enc_strlen): ditto. (rb_enc_precise_mbclen): return needmore(1) if e <= p. (rb_enc_get_ascii): new function for extracting ASCII character. * include/ruby/encoding.h (rb_enc_get_ascii): declared. * include/ruby/regex.h (ismbchar): removed. * re.c (rb_reg_expr_str): use rb_enc_get_ascii. (unescape_escaped_nonascii): use rb_enc_precise_mbclen to determine the termination of escaped non-ASCII character. (unescape_nonascii): use rb_enc_precise_mbclen. (rb_reg_quote): use rb_enc_get_ascii. (rb_reg_regsub): use rb_enc_get_ascii. * string.c (rb_str_reverse) don't check the return value of rb_enc_mbclen. (rb_str_split_m): don't call rb_enc_mbclen with e <= p. * parse.y (is_identchar): use ISASCII. (parser_ismbchar): removed. (parser_precise_mbclen): new macro. (parser_isascii): new macro. (parser_tokadd_mbchar): use parser_precise_mbclen to check invalid character precisely. (parser_tokadd_string): use parser_isascii. (parser_yylex): ditto. (is_special_global_name): don't call is_identchar with e <= p. (rb_enc_symname_p): ditto. [ruby-dev:32455] * ext/tk/sample/tkextlib/vu/canvSticker2.rb: remove coding cookie because the encoding is not UTF-8. [ruby-dev:32475] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14131 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 39 ++++++++++++++++++++++ encoding.c | 32 ++++++++++++++---- ext/tk/sample/tkextlib/vu/canvSticker2.rb | 1 - include/ruby/encoding.h | 3 ++ include/ruby/regex.h | 1 - parse.y | 22 +++++++------ re.c | 54 +++++++++++++++++-------------- string.c | 8 ++--- test/ruby/test_m17n.rb | 6 ++-- test/ruby/test_regexp.rb | 6 ++-- 10 files changed, 119 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 349fc9c6d7..8ce4954bac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,42 @@ +Sat Dec 8 11:06:29 2007 Tanaka Akira + + * encoding.c (rb_enc_mbclen): make it never fail. + (rb_enc_nth): don't check the return value of rb_enc_mbclen. + (rb_enc_strlen): ditto. + (rb_enc_precise_mbclen): return needmore(1) if e <= p. + (rb_enc_get_ascii): new function for extracting ASCII character. + + * include/ruby/encoding.h (rb_enc_get_ascii): declared. + + * include/ruby/regex.h (ismbchar): removed. + + * re.c (rb_reg_expr_str): use rb_enc_get_ascii. + (unescape_escaped_nonascii): use rb_enc_precise_mbclen to determine + the termination of escaped non-ASCII character. + (unescape_nonascii): use rb_enc_precise_mbclen. + (rb_reg_quote): use rb_enc_get_ascii. + (rb_reg_regsub): use rb_enc_get_ascii. + + * string.c (rb_str_reverse) don't check the return value of + rb_enc_mbclen. + (rb_str_split_m): don't call rb_enc_mbclen with e <= p. + + * parse.y (is_identchar): use ISASCII. + (parser_ismbchar): removed. + (parser_precise_mbclen): new macro. + (parser_isascii): new macro. + (parser_tokadd_mbchar): use parser_precise_mbclen to check invalid + character precisely. + (parser_tokadd_string): use parser_isascii. + (parser_yylex): ditto. + (is_special_global_name): don't call is_identchar with e <= p. + (rb_enc_symname_p): ditto. + + [ruby-dev:32455] + + * ext/tk/sample/tkextlib/vu/canvSticker2.rb: remove coding cookie + because the encoding is not UTF-8. [ruby-dev:32475] + Fri Dec 7 20:21:35 2007 GOTOU Yuuzou * ext/openssl/lib/net/ftptls.rb, ext/openssl/lib/net/telnets.rb: diff --git a/encoding.c b/encoding.c index 53ceac851d..540aa88701 100644 --- a/encoding.c +++ b/encoding.c @@ -459,7 +459,6 @@ rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc) for (c=0; p 0x00..0x7f, -1 */ +int rb_enc_get_ascii(const char*, const char *, rb_encoding*); + /* code,encoding -> codelen */ int rb_enc_codelen(int, rb_encoding*); diff --git a/include/ruby/regex.h b/include/ruby/regex.h index b214c63d3e..d0a670f283 100644 --- a/include/ruby/regex.h +++ b/include/ruby/regex.h @@ -29,7 +29,6 @@ extern "C" { ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -#define ismbchar(p, e, enc) (mbclen((p),(e),(enc)) != 1) #define mbclen(p,e,enc) rb_enc_mbclen((p),(e),(enc)) #endif /* ifndef ONIG_RUBY_M17N */ diff --git a/parse.y b/parse.y index 78cae5a367..0becb9280d 100644 --- a/parse.y +++ b/parse.y @@ -4583,10 +4583,12 @@ ripper_dispatch_delayed_token(struct parser_params *parser, int t) #endif #define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc) -#define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || ismbchar(p,e,enc)) -#define parser_ismbchar() ismbchar((lex_p-1), lex_pend, parser->enc) +#define parser_precise_mbclen() rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc) +#define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p)) #define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc)) +#define parser_isascii() ISASCII(*(lex_p-1)) + static int parser_yyerror(struct parser_params *parser, const char *msg) { @@ -5305,8 +5307,8 @@ dispose_string(VALUE str) static int parser_tokadd_mbchar(struct parser_params *parser, int c) { - int len = parser_mbclen(); - if (len <= 0 || lex_p + len - 1 > lex_pend) { + int len = parser_precise_mbclen(); + if (!MBCLEN_CHARFOUND(len)) { compile_error(PARSER_ARG "illegal multibyte char"); return -1; } @@ -5414,7 +5416,7 @@ parser_tokadd_string(struct parser_params *parser, } } } - else if (parser_ismbchar()) { + else if (!parser_isascii()) { has_nonascii = 1; if (enc != *encp) { mixed_error(enc, *encp); @@ -6306,7 +6308,7 @@ parser_yylex(struct parser_params *parser) } newtok(); enc = parser->enc; - if (parser_ismbchar()) { + if (!parser_isascii()) { if (tokadd_mbchar(c) == -1) return 0; } else if ((rb_enc_isalnum(c, parser->enc) || c == '_') && @@ -6889,7 +6891,7 @@ parser_yylex(struct parser_params *parser) } else { term = nextc(); - if (rb_enc_isalnum(term, parser->enc) || parser_ismbchar()) { + if (rb_enc_isalnum(term, parser->enc) || !parser_isascii()) { yyerror("unknown type of %string"); return 0; } @@ -8693,7 +8695,7 @@ is_special_global_name(const char *m, const char *e, rb_encoding *enc) break; case '-': ++m; - if (is_identchar(m, e, enc)) { + if (m < e && is_identchar(m, e, enc)) { if (!ISASCII(*m)) mb = 1; m += rb_enc_mbclen(m, e, enc); } @@ -8776,9 +8778,9 @@ rb_enc_symname_p(const char *name, rb_encoding *enc) default: localid = !rb_enc_isupper(*m, enc); id: - if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, e, enc)) + if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m))) return Qfalse; - while (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); + while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); if (localid) { switch (*m) { case '!': case '?': case '=': ++m; diff --git a/re.c b/re.c index fca7f3a791..c30453591f 100644 --- a/re.c +++ b/re.c @@ -218,10 +218,12 @@ rb_reg_expr_str(VALUE str, const char *s, long len) rb_encoding *enc = rb_enc_get(str); const char *p, *pend; int need_escape = 0; + int c; p = s; pend = p + len; while (p