diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2002-02-01 08:49:02 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2002-02-01 08:49:02 +0000 |
commit | d9b49e39b2f3380cb6c4bb68a175a230c4702b58 (patch) | |
tree | d2563ee6af8604892dfa8fca3a4d751cedc5cfdc | |
parent | dad91ce6d8974ed959146ce252749a9fa0d7ebfe (diff) | |
download | ruby-d9b49e39b2f3380cb6c4bb68a175a230c4702b58.tar.gz |
* regex.c (mbc_startpos): become macro.
* regex.c (euc_startpos): added for improvement.
* regex.c (sjis_startpos): ditto.
* regex.c (utf8_startpos): ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2040 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | regex.c | 152 |
2 files changed, 84 insertions, 78 deletions
@@ -1,3 +1,13 @@ +Fri Feb 1 17:46:39 2002 Nobuyoshi Nakada <nobu.nakada@nifty.ne.jp> + + * regex.c (mbc_startpos): become macro. + + * regex.c (euc_startpos): added for improvement. + + * regex.c (sjis_startpos): ditto. + + * regex.c (utf8_startpos): ditto. + Fri Feb 1 00:03:30 2002 Yukihiro Matsumoto <matz@ruby-lang.org> * file.c (rb_stat_inspect): print dev, rdev in hexadecimal. @@ -478,7 +478,9 @@ re_set_syntax(syntax) #define WC2MBC1ST(c) \ ((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c)) -int mbc_startpos _((const char *start, int pos)); +typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos)); +const mbc_startpos_func_t mbc_startpos_func[]; +#define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos)) static unsigned int utf8_firstbyte(c) @@ -4384,7 +4386,6 @@ re_free_registers(regs) Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto) Last change: Jul. 9, 1993 by t^2 */ static const unsigned char mbctab_ascii[] = { - /* forward scan */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -4401,28 +4402,9 @@ static const unsigned char mbctab_ascii[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - /* reverse scan */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */ - /* forward scan */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -4439,28 +4421,9 @@ static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, - - /* reverse scan */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }; -static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ - /* forward scan */ +static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFC */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -4476,9 +4439,10 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 +}; - /* reverse scan */ +static const unsigned char mbctab_sjis_trail[] = { /* 0x40-0x7E,0x80-0xFC */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -4498,7 +4462,6 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ }; static const unsigned char mbctab_utf8[] = { - /* forward scan */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -4515,24 +4478,6 @@ static const unsigned char mbctab_utf8[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0, - - /* reverse scan */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; const unsigned char *re_mbctab = mbctab_ascii; @@ -4561,36 +4506,87 @@ re_mbcinit(mbctype) } } -int -mbc_startpos(string, pos) +#define mbc_isfirst(t, c) (t)[(unsigned char)(c)] +#define mbc_len(t, c) ((t)[(unsigned char)(c)]+1) + +static unsigned int asc_startpos _((const char *string, unsigned int pos)); +static unsigned int +asc_startpos(string, pos) const char *string; - int pos; + unsigned int pos; { - int i = pos, w; + return pos; +} + +#define euc_islead(c) ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1) +#define euc_mbclen(c) mbc_len(mbctab_euc, (c)) +static unsigned int euc_startpos _((const char *string, unsigned int pos)); +static unsigned int +euc_startpos(string, pos) + const char *string; + unsigned int pos; +{ + unsigned int i = pos, w; - while (i > 0 && re_mbctab[(unsigned char)string[i]+256]) { + while (i > 0 && !euc_islead(string[i])) { --i; } - if (i == pos || i + (w = mbclen(string[i])) > pos) return i; + if (i == pos || i + (w = euc_mbclen(string[i])) > pos) { + return i; + } i += w; + return i + ((pos - i) & ~1); +} - switch (current_mbctype) { - case MBCTYPE_EUC: - return i + ((pos - i) & ~1); +#define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c)) +#define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)] +#define sjis_mbclen(c) mbc_len(mbctab_sjis, (c)) +static unsigned int sjis_startpos _((const char *string, unsigned int pos)); +static unsigned int +sjis_startpos(string, pos) + const char *string; + unsigned int pos; +{ + unsigned int i = pos, w; - case MBCTYPE_SJIS: - while (i + (w = mbclen(string[i])) < pos) { - i += w; - } + if (i > 0 && sjis_istrail(string[i])) { + do { + if (!sjis_isfirst(string[--i])) { + ++i; + break; + } + } while (i > 0); + } + if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) { return i; + } + i += w; + return i + ((pos - i) & ~1); +} - case MBCTYPE_UTF8: +#define utf8_islead(c) ((unsigned char)((c) & 0xc0) != 0x80) +#define utf8_mbclen(c) mbc_len(mbctab_utf8, (c)) +static unsigned int utf8_startpos _((const char *string, unsigned int pos)); +static unsigned int +utf8_startpos(string, pos) + const char *string; + unsigned int pos; +{ + unsigned int i = pos, w; + + while (i > 0 && !utf8_islead(string[i])) { + --i; + } + if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) { return i; - default: - return pos; } + return i + w; } +const mbc_startpos_func_t mbc_startpos_func[4] = { + asc_startpos, euc_startpos, sjis_startpos, utf8_startpos +}; + /* vi: sw=2 ts=8 Local variables: |