diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-08 02:50:43 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-08 02:50:43 +0000 |
commit | f1b7e60cb90a7e1a392d4ffccd07dd06eeff5345 (patch) | |
tree | 8135b8dc1f1ef8a6bcd08a86c4106c83941780fa /encoding.c | |
parent | 990bec97020bfabd09ebfd92581f505b4f09a78a (diff) | |
download | ruby-f1b7e60cb90a7e1a392d4ffccd07dd06eeff5345.tar.gz |
* encoding.c (rb_enc_mbclen): make it never fail.
(rb_enc_nth): don't check the return value of rb_enc_mbclen.
(rb_enc_strlen): ditto.
(rb_enc_precise_mbclen): return needmore(1) if e <= p.
(rb_enc_get_ascii): new function for extracting ASCII character.
* include/ruby/encoding.h (rb_enc_get_ascii): declared.
* include/ruby/regex.h (ismbchar): removed.
* re.c (rb_reg_expr_str): use rb_enc_get_ascii.
(unescape_escaped_nonascii): use rb_enc_precise_mbclen to determine
the termination of escaped non-ASCII character.
(unescape_nonascii): use rb_enc_precise_mbclen.
(rb_reg_quote): use rb_enc_get_ascii.
(rb_reg_regsub): use rb_enc_get_ascii.
* string.c (rb_str_reverse) don't check the return value of
rb_enc_mbclen.
(rb_str_split_m): don't call rb_enc_mbclen with e <= p.
* parse.y (is_identchar): use ISASCII.
(parser_ismbchar): removed.
(parser_precise_mbclen): new macro.
(parser_isascii): new macro.
(parser_tokadd_mbchar): use parser_precise_mbclen to check invalid
character precisely.
(parser_tokadd_string): use parser_isascii.
(parser_yylex): ditto.
(is_special_global_name): don't call is_identchar with e <= p.
(rb_enc_symname_p): ditto.
[ruby-dev:32455]
* ext/tk/sample/tkextlib/vu/canvSticker2.rb: remove coding cookie
because the encoding is not UTF-8. [ruby-dev:32475]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14131 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'encoding.c')
-rw-r--r-- | encoding.c | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/encoding.c b/encoding.c index 53ceac851d..540aa88701 100644 --- a/encoding.c +++ b/encoding.c @@ -459,7 +459,6 @@ rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc) for (c=0; p<e && nth--; c++) { int n = rb_enc_mbclen(p, e, enc); - if (n == 0) return 0; p += n; } } @@ -478,7 +477,6 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) for (c=0; p<e; c++) { int n = rb_enc_mbclen(p, e, enc); - if (n == 0) return -1; p += n; } return c; @@ -487,19 +485,39 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) { - int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); - if (n == 0) { - rb_raise(rb_eArgError, "invalid mbstring sequence"); - } - return n; + int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); + if (MBCLEN_CHARFOUND(n)) + return n; + else + return 1; } int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc) { + if (e <= p) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); return ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); } +int rb_enc_get_ascii(const char *p, const char *e, rb_encoding *enc) +{ + int c, l; + if (e <= p) + return -1; + if (rb_enc_asciicompat(enc)) { + c = (unsigned char)*p; + return ISASCII(c) ? c : -1; + } + l = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND(l)) + return -1; + c = rb_enc_codepoint(p, e, enc); + if (rb_enc_isascii(c, enc)) + return c; + return -1; +} + int rb_enc_codelen(int c, rb_encoding *enc) { |