From 7ddde51c68b572b2a94a4946d2bf9b7d0e12a65a Mon Sep 17 00:00:00 2001 From: nobu Date: Tue, 29 Jan 2002 19:33:11 +0000 Subject: * regex.c (re_adjust_startpos): search start of multibyte backward. * regex.c (mbc_startpos): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2031 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- regex.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 118 insertions(+), 22 deletions(-) (limited to 'regex.c') diff --git a/regex.c b/regex.c index 64d32151ed..eb14a38a41 100644 --- a/regex.c +++ b/regex.c @@ -478,6 +478,8 @@ re_set_syntax(syntax) #define WC2MBC1ST(c) \ ((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c)) +int mbc_startpos _((const char *start, int pos)); + static unsigned int utf8_firstbyte(c) unsigned long c; @@ -3076,28 +3078,13 @@ re_adjust_startpos(bufp, string, size, startpos, range) /* Adjust startpos for mbc string */ if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) { - int i = 0; + int i = mbc_startpos(string, startpos); - if (range > 0) { - while (i 0) { + startpos = i + mbclen(string[i]); } else { - int w; - - while (i 0 && re_mbctab[(unsigned char)string[i]+256]) { + --i; + } + if (i == pos || i + (w = mbclen(string[i])) > pos) return i; + i += w; + + switch (current_mbctype) { + case MBCTYPE_EUC: + case MBCTYPE_SJIS: + /* double byte char only */ + return i + ((pos - i) & ~1); + case MBCTYPE_UTF8: + default: + return pos; + } +} + +/* + vi: sw=2 ts=8 + Local variables: + mode : C + c-file-style : "gnu" + tab-width : 8 + End : +*/ -- cgit v1.2.3