aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog60
-rw-r--r--array.c2
-rw-r--r--bignum.c2
-rw-r--r--class.c2
-rw-r--r--common.mk4
-rw-r--r--compar.c2
-rw-r--r--dir.c2
-rw-r--r--dln.c2
-rw-r--r--dln.h2
-rw-r--r--enum.c2
-rw-r--r--error.c2
-rw-r--r--euc_jp.c5
-rw-r--r--eval.c2
-rw-r--r--ext/socket/socket.c2
-rw-r--r--ext/stringio/stringio.c79
-rw-r--r--ext/strscan/strscan.c5
-rw-r--r--ext/syck/emitter.c2
-rw-r--r--ext/tk/sample/demos-jp/anilabel.rb1
-rw-r--r--ext/tk/sample/demos-jp/aniwave.rb1
-rw-r--r--ext/tk/sample/demos-jp/arrow.rb1
-rw-r--r--ext/tk/sample/demos-jp/bind.rb1
-rw-r--r--ext/tk/sample/demos-jp/bitmap.rb1
-rw-r--r--ext/tk/sample/demos-jp/button.rb1
-rw-r--r--ext/tk/sample/demos-jp/check.rb1
-rw-r--r--ext/tk/sample/demos-jp/check2.rb1
-rw-r--r--ext/tk/sample/demos-jp/clrpick.rb1
-rw-r--r--ext/tk/sample/demos-jp/colors.rb1
-rw-r--r--ext/tk/sample/demos-jp/cscroll.rb1
-rw-r--r--ext/tk/sample/demos-jp/ctext.rb1
-rw-r--r--ext/tk/sample/demos-jp/dialog1.rb1
-rw-r--r--ext/tk/sample/demos-jp/dialog2.rb1
-rw-r--r--ext/tk/sample/demos-jp/entry1.rb1
-rw-r--r--ext/tk/sample/demos-jp/entry2.rb1
-rw-r--r--ext/tk/sample/demos-jp/entry3.rb1
-rw-r--r--ext/tk/sample/demos-jp/filebox.rb1
-rw-r--r--ext/tk/sample/demos-jp/floor.rb1
-rw-r--r--ext/tk/sample/demos-jp/floor2.rb1
-rw-r--r--ext/tk/sample/demos-jp/form.rb1
-rw-r--r--ext/tk/sample/demos-jp/goldberg.rb1
-rw-r--r--ext/tk/sample/demos-jp/hello1
-rw-r--r--ext/tk/sample/demos-jp/hscale.rb1
-rw-r--r--ext/tk/sample/demos-jp/icon.rb1
-rw-r--r--ext/tk/sample/demos-jp/image1.rb1
-rw-r--r--ext/tk/sample/demos-jp/image2.rb1
-rw-r--r--ext/tk/sample/demos-jp/image3.rb1
-rw-r--r--ext/tk/sample/demos-jp/items.rb1
-rw-r--r--ext/tk/sample/demos-jp/ixset21
-rw-r--r--ext/tk/sample/demos-jp/label.rb1
-rw-r--r--ext/tk/sample/demos-jp/labelframe.rb2
-rw-r--r--ext/tk/sample/demos-jp/menu.rb1
-rw-r--r--ext/tk/sample/demos-jp/menu84.rb1
-rw-r--r--ext/tk/sample/demos-jp/menu8x.rb1
-rw-r--r--ext/tk/sample/demos-jp/menubu.rb1
-rw-r--r--ext/tk/sample/demos-jp/msgbox.rb1
-rw-r--r--ext/tk/sample/demos-jp/paned1.rb2
-rw-r--r--ext/tk/sample/demos-jp/paned2.rb2
-rw-r--r--ext/tk/sample/demos-jp/pendulum.rb1
-rw-r--r--ext/tk/sample/demos-jp/plot.rb1
-rw-r--r--ext/tk/sample/demos-jp/puzzle.rb1
-rw-r--r--ext/tk/sample/demos-jp/radio.rb1
-rw-r--r--ext/tk/sample/demos-jp/radio2.rb2
-rw-r--r--ext/tk/sample/demos-jp/radio3.rb2
-rw-r--r--ext/tk/sample/demos-jp/rolodex-j1
-rw-r--r--ext/tk/sample/demos-jp/ruler.rb1
-rw-r--r--ext/tk/sample/demos-jp/sayings.rb1
-rw-r--r--ext/tk/sample/demos-jp/search.rb1
-rw-r--r--ext/tk/sample/demos-jp/spin.rb2
-rw-r--r--ext/tk/sample/demos-jp/states.rb1
-rw-r--r--ext/tk/sample/demos-jp/style.rb1
-rw-r--r--ext/tk/sample/demos-jp/tcolor1
-rw-r--r--ext/tk/sample/demos-jp/text.rb1
-rw-r--r--ext/tk/sample/demos-jp/twind.rb1
-rw-r--r--ext/tk/sample/demos-jp/twind2.rb1
-rw-r--r--ext/tk/sample/demos-jp/unicodeout.rb2
-rw-r--r--ext/tk/sample/demos-jp/vscale.rb1
-rw-r--r--ext/tk/sample/demos-jp/widget1
-rw-r--r--ext/tk/sample/tkextlib/vu/canvSticker2.rb1
-rw-r--r--file.c2
-rw-r--r--gc.c2
-rw-r--r--hash.c2
-rw-r--r--include/ruby/intern.h7
-rw-r--r--include/ruby/io.h4
-rw-r--r--include/ruby/node.h2
-rw-r--r--include/ruby/re.h2
-rw-r--r--include/ruby/regex.h8
-rw-r--r--include/ruby/ruby.h30
-rw-r--r--include/ruby/signal.h2
-rw-r--r--include/ruby/util.h2
-rw-r--r--inits.c2
-rw-r--r--insns.def2
-rw-r--r--io.c290
-rw-r--r--main.c2
-rw-r--r--marshal.c4
-rw-r--r--math.c2
-rw-r--r--numeric.c2
-rw-r--r--object.c2
-rw-r--r--pack.c2
-rw-r--r--parse.y199
-rw-r--r--prec.c2
-rw-r--r--process.c2
-rw-r--r--random.c2
-rw-r--r--range.c2
-rw-r--r--re.c145
-rw-r--r--regint.h1
-rw-r--r--ruby.c17
-rw-r--r--signal.c2
-rw-r--r--sprintf.c67
-rw-r--r--string.c1111
-rw-r--r--struct.c2
-rw-r--r--time.c2
-rw-r--r--util.c2
-rw-r--r--variable.c2
-rw-r--r--version.c2
113 files changed, 1423 insertions, 752 deletions
diff --git a/ChangeLog b/ChangeLog
index 34603b33c3..30dbd45853 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,63 @@
+Sat Aug 25 11:45:37 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * encoding.c: provide basic features for M17N.
+
+ * parse.y: encoding aware parsing.
+
+ * parse.y (pragma_encoding): encoding specification pragma.
+
+ * parse.y (rb_intern3): encoding specified symbols.
+
+ * string.c (rb_str_length): length based on characters.
+ for older behavior, bytesize method added.
+
+ * string.c (rb_str_index_m): index based on characters. rindex as
+ well.
+
+ * string.c (succ_char): encoding aware succeeding string.
+
+ * string.c (rb_str_reverse): reverse based on characters.
+
+ * string.c (rb_str_inspect): encoding aware string description.
+
+ * string.c (rb_str_upcase_bang): encoding aware case conversion.
+ downcase, capitalize, swapcase as well.
+
+ * string.c (rb_str_tr_bang): tr based on characters. delete,
+ squeeze, tr_s, count as well.
+
+ * string.c (rb_str_split_m): split based on characters.
+
+ * string.c (rb_str_each_line): encoding aware each_line.
+
+ * string.c (rb_str_each_char): added. iteration based on
+ characters.
+
+ * string.c (rb_str_strip_bang): encoding aware whitespace
+ stripping. lstrip, rstrip as well.
+
+ * string.c (rb_str_justify): encoding aware justifying (ljust,
+ rjust, center).
+
+ * string.c (str_encoding): get encoding attribute from a string.
+
+ * re.c (rb_reg_initialize): encoding aware regular expression
+
+ * sprintf.c (rb_str_format): formatting (i.e. length count) based
+ on characters.
+
+ * io.c (rb_io_getc): getc to return one-character string.
+ for older behavior, getbyte method added.
+
+ * ext/stringio/stringio.c (strio_getc): ditto.
+
+ * io.c (rb_io_ungetc): allow pushing arbitrary string at the
+ current reading point.
+
+ * ext/stringio/stringio.c (strio_ungetc): ditto.
+
+ * ext/strscan/strscan.c: encoding support.
+
Sat Aug 25 10:59:19 2007 Koichi Sasada <ko1@atdot.net>
* cont.c: separate Continuation and Fiber from core.
diff --git a/array.c b/array.c
index a7361661c4..0b4ffa14d7 100644
--- a/array.c
+++ b/array.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Aug 6 09:46:12 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/bignum.c b/bignum.c
index fcfe64983a..c944907c64 100644
--- a/bignum.c
+++ b/bignum.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Jun 10 00:48:55 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/class.c b/class.c
index f881e690e4..dff402c759 100644
--- a/class.c
+++ b/class.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Aug 10 15:05:44 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/common.mk b/common.mk
index 75c649a75f..6fdaa9e82e 100644
--- a/common.mk
+++ b/common.mk
@@ -25,6 +25,7 @@ OBJS = array.$(OBJEXT) \
compar.$(OBJEXT) \
dir.$(OBJEXT) \
dln.$(OBJEXT) \
+ encoding.$(OBJEXT) \
enum.$(OBJEXT) \
enumerator.$(OBJEXT) \
error.$(OBJEXT) \
@@ -401,6 +402,7 @@ dmydln.$(OBJEXT): {$(VPATH)}dmydln.c {$(VPATH)}dln.c {$(VPATH)}ruby.h \
{$(VPATH)}config.h {$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
{$(VPATH)}dln.h
dmyext.$(OBJEXT): {$(VPATH)}dmyext.c
+encoding.$(OBJEXT): {$(VPATH)}encoding.c {$(VPATH)}encoding.h
enum.$(OBJEXT): {$(VPATH)}enum.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
{$(VPATH)}node.h {$(VPATH)}util.h
@@ -523,7 +525,7 @@ sprintf.$(OBJEXT): {$(VPATH)}sprintf.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
st.$(OBJEXT): {$(VPATH)}st.c {$(VPATH)}config.h {$(VPATH)}st.h {$(VPATH)}defines.h
string.$(OBJEXT): {$(VPATH)}string.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h \
- {$(VPATH)}re.h {$(VPATH)}regex.h
+ {$(VPATH)}re.h {$(VPATH)}regex.h {$(VPATH)}encoding.h
struct.$(OBJEXT): {$(VPATH)}struct.c {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h
thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \
diff --git a/compar.c b/compar.c
index 7802567070..a8952b2d1c 100644
--- a/compar.c
+++ b/compar.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Aug 26 14:39:48 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/dir.c b/dir.c
index f7b7eb1933..b26df63f71 100644
--- a/dir.c
+++ b/dir.c
@@ -6,7 +6,7 @@
$Date$
created at: Wed Jan 5 09:51:01 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/dln.c b/dln.c
index 8b46954002..6403e9a87c 100644
--- a/dln.c
+++ b/dln.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Jan 18 17:05:06 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/dln.h b/dln.h
index 4fd51cbee0..6905a36d5a 100644
--- a/dln.h
+++ b/dln.h
@@ -6,7 +6,7 @@
$Date$
created at: Wed Jan 19 16:53:09 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/enum.c b/enum.c
index 73b2767863..fa4b480f9f 100644
--- a/enum.c
+++ b/enum.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Oct 1 15:15:19 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/error.c b/error.c
index 77d63d7b38..e972bdd1b0 100644
--- a/error.c
+++ b/error.c
@@ -6,7 +6,7 @@
$Date$
created at: Mon Aug 9 16:11:34 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/euc_jp.c b/euc_jp.c
index 65729feda8..f39c4de628 100644
--- a/euc_jp.c
+++ b/euc_jp.c
@@ -78,8 +78,9 @@ static int
code_to_mbclen(OnigCodePoint code)
{
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
- else if ((code & 0xff0000) != 0) return 3;
- else if ((code & 0xff00) != 0) return 2;
+ else if (code > 0xffffff) return 0;
+ else if ((code & 0xff0000) >= 0x800000) return 3;
+ else if ((code & 0xff00) >= 0x8000) return 2;
else return 0;
}
diff --git a/eval.c b/eval.c
index 721ac6302c..63579c4e4e 100644
--- a/eval.c
+++ b/eval.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Jun 10 14:22:17 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/ext/socket/socket.c b/ext/socket/socket.c
index aee4b21567..514ba3fd59 100644
--- a/ext/socket/socket.c
+++ b/ext/socket/socket.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Mar 31 12:21:29 JST 1994
- Copyright (C) 1993-2001 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
************************************************/
diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c
index 480c9ed378..08a6341b70 100644
--- a/ext/stringio/stringio.c
+++ b/ext/stringio/stringio.c
@@ -13,7 +13,7 @@
**********************************************************************/
#include "ruby.h"
-#include "rubyio.h"
+#include "ruby/io.h"
#if defined(HAVE_FCNTL_H) || defined(_WIN32)
#include <fcntl.h>
#elif defined(HAVE_SYS_FCNTL_H)
@@ -84,6 +84,18 @@ get_strio(VALUE self)
return ptr;
}
+static VALUE
+strio_substr(struct StringIO *ptr, int pos, int len)
+{
+ VALUE str = ptr->string;
+ rb_encoding *enc = rb_enc_get(str);
+ int rlen = RSTRING_LEN(str) - pos;
+
+ if (len > rlen) len = rlen;
+ if (len < 0) len = 0;
+ return rb_enc_str_new(RSTRING_PTR(str)+pos, len, enc);
+}
+
#define StringIO(obj) get_strio(obj)
#define CLOSED(ptr) (!((ptr)->flags & FMODE_READWRITE))
@@ -603,7 +615,7 @@ strio_each_byte(VALUE self)
/*
* call-seq:
- * strio.getc -> fixnum or nil
+ * strio.getc -> string or nil
*
* See IO#getc.
*/
@@ -611,15 +623,17 @@ static VALUE
strio_getc(VALUE self)
{
struct StringIO *ptr = readable(StringIO(self));
- int c;
- char ch;
+ rb_encoding *enc = rb_enc_get(ptr->string);
+ int len;
+ char *p;
if (ptr->pos >= RSTRING_LEN(ptr->string)) {
return Qnil;
}
- c = RSTRING_PTR(ptr->string)[ptr->pos++];
- ch = c & 0xff;
- return rb_str_new(&ch, 1);
+ p = RSTRING_PTR(ptr->string)+ptr->pos;
+ len = rb_enc_mbclen(p, enc);
+ ptr->pos += len;
+ return rb_enc_str_new(p, len, rb_enc_get(ptr->string));
}
/*
@@ -671,30 +685,34 @@ static VALUE
strio_ungetc(VALUE self, VALUE c)
{
struct StringIO *ptr = readable(StringIO(self));
- int cc;
- long len, pos = ptr->pos;
+ long lpos, clen;
+ char *p, *pend;
+ rb_encoding *enc;
if (NIL_P(c)) return Qnil;
if (FIXNUM_P(c)) {
- cc = FIX2INT(c);
+ int cc = FIX2INT(c);
+ char buf[16];
+
+ enc = rb_enc_get(ptr->string);
+ rb_enc_mbcput(cc, buf, enc);
+ c = rb_enc_str_new(buf, rb_enc_codelen(cc, enc), enc);
}
else {
SafeStringValue(c);
- if (RSTRING_LEN(c) > 1) {
- rb_warn("IO#ungetc pushes back only one byte");
- }
- cc = (unsigned char)RSTRING_PTR(c)[0];
+ enc = rb_enc_check(ptr->string, c);
}
- if (cc != EOF && pos > 0) {
- if ((len = RSTRING_LEN(ptr->string)) < pos-- ||
- (unsigned char)RSTRING_PTR(ptr->string)[pos] !=
- (unsigned char)cc) {
- strio_extend(ptr, pos, 1);
- RSTRING_PTR(ptr->string)[pos] = cc;
- OBJ_INFECT(ptr->string, self);
- }
- --ptr->pos;
+ /* get logical position */
+ lpos = 0; p = RSTRING_PTR(ptr->string); pend = p + ptr->pos - 1;
+ for (;;) {
+ clen = rb_enc_mbclen(p, enc);
+ if (p+clen >= pend) break;
+ p += clen;
+ lpos++;
}
+ rb_str_update(ptr->string, lpos, ptr->pos ? 1 : 0, c);
+ ptr->pos = p - RSTRING_PTR(ptr->string);
+
return Qnil;
}
@@ -800,7 +818,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
e = s + limit;
}
if (NIL_P(str)) {
- str = rb_str_substr(ptr->string, ptr->pos, e - s);
+ str = strio_substr(ptr, ptr->pos, e - s);
}
else if ((n = RSTRING_LEN(str)) == 0) {
p = s;
@@ -816,13 +834,13 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
break;
}
}
- str = rb_str_substr(ptr->string, s - RSTRING_PTR(ptr->string), e - s);
+ str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s);
}
else if (n == 1) {
if ((p = memchr(s, RSTRING_PTR(str)[0], e - s)) != 0) {
e = p + 1;
}
- str = rb_str_substr(ptr->string, ptr->pos, e - s);
+ str = strio_substr(ptr, ptr->pos, e - s);
}
else {
if (n < e - s) {
@@ -843,7 +861,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
}
}
}
- str = rb_str_substr(ptr->string, ptr->pos, e - s);
+ str = strio_substr(ptr, ptr->pos, e - s);
}
ptr->pos = e - RSTRING_PTR(ptr->string);
ptr->lineno++;
@@ -944,7 +962,7 @@ strio_write(VALUE self, VALUE str)
if (TYPE(str) != T_STRING)
str = rb_obj_as_string(str);
len = RSTRING_LEN(str);
- if (!len) return INT2FIX(0);
+ if (len == 0) return INT2FIX(0);
check_modifiable(ptr);
olen = RSTRING_LEN(ptr->string);
if (ptr->flags & FMODE_APPEND) {
@@ -955,7 +973,8 @@ strio_write(VALUE self, VALUE str)
}
else {
strio_extend(ptr, ptr->pos, len);
- rb_str_update(ptr->string, ptr->pos, len, str);
+ memmove(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_PTR(str), len);
+ OBJ_INFECT(ptr->string, str);
}
OBJ_INFECT(ptr->string, self);
ptr->pos += len;
@@ -1070,7 +1089,7 @@ strio_read(int argc, VALUE *argv, VALUE self)
rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc);
}
if (NIL_P(str)) {
- str = rb_str_substr(ptr->string, ptr->pos, len);
+ str = strio_substr(ptr, ptr->pos, len);
}
else {
long rest = RSTRING_LEN(ptr->string) - ptr->pos;
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index 63a0f1185e..44cd0f4131 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -10,6 +10,7 @@
#include "ruby/ruby.h"
#include "ruby/re.h"
+#include "ruby/encoding.h"
#define STRSCAN_VERSION "0.7.0"
@@ -189,6 +190,7 @@ strscan_initialize(int argc, VALUE *argv, VALUE self)
rb_scan_args(argc, argv, "11", &str, &need_dup);
StringValue(str);
p->str = str;
+ rb_enc_associate(self, rb_enc_get(str));
return self;
}
@@ -652,13 +654,14 @@ strscan_getch(VALUE self)
{
struct strscanner *p;
long len;
+ rb_encoding *enc = rb_enc_get(self);
GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);
if (EOS_P(p))
return Qnil;
- len = mbclen(*CURPTR(p));
+ len = rb_enc_mbclen(CURPTR(p), enc);
if (p->curr + len > S_LEN(p)) {
len = S_LEN(p) - p->curr;
}
diff --git a/ext/syck/emitter.c b/ext/syck/emitter.c
index 03bdaa2dd7..cadee64789 100644
--- a/ext/syck/emitter.c
+++ b/ext/syck/emitter.c
@@ -7,7 +7,7 @@
* Copyright (C) 2003 why the lucky stiff
*
* All Base64 code from Ruby's pack.c.
- * Ruby is Copyright (C) 1993-2003 Yukihiro Matsumoto
+ * Ruby is Copyright (C) 1993-2007 Yukihiro Matsumoto
*/
#include "ruby/ruby.h"
diff --git a/ext/tk/sample/demos-jp/anilabel.rb b/ext/tk/sample/demos-jp/anilabel.rb
index 97781fbe77..c6e5c7385b 100644
--- a/ext/tk/sample/demos-jp/anilabel.rb
+++ b/ext/tk/sample/demos-jp/anilabel.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# animated label widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/aniwave.rb b/ext/tk/sample/demos-jp/aniwave.rb
index 81e2d76b30..5f94add111 100644
--- a/ext/tk/sample/demos-jp/aniwave.rb
+++ b/ext/tk/sample/demos-jp/aniwave.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# animated wave demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/arrow.rb b/ext/tk/sample/demos-jp/arrow.rb
index 477a0abf6f..43c6eef4eb 100644
--- a/ext/tk/sample/demos-jp/arrow.rb
+++ b/ext/tk/sample/demos-jp/arrow.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# arrowhead widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/bind.rb b/ext/tk/sample/demos-jp/bind.rb
index e1e23b9893..779e395826 100644
--- a/ext/tk/sample/demos-jp/bind.rb
+++ b/ext/tk/sample/demos-jp/bind.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# text (tag bindings) widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/bitmap.rb b/ext/tk/sample/demos-jp/bitmap.rb
index b71c67d3fd..4594892c81 100644
--- a/ext/tk/sample/demos-jp/bitmap.rb
+++ b/ext/tk/sample/demos-jp/bitmap.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# bitmap widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/button.rb b/ext/tk/sample/demos-jp/button.rb
index 20f8cae299..7e9457f5b4 100644
--- a/ext/tk/sample/demos-jp/button.rb
+++ b/ext/tk/sample/demos-jp/button.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# button widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/check.rb b/ext/tk/sample/demos-jp/check.rb
index be675b9042..7545df80fa 100644
--- a/ext/tk/sample/demos-jp/check.rb
+++ b/ext/tk/sample/demos-jp/check.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# checkbutton widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/check2.rb b/ext/tk/sample/demos-jp/check2.rb
index f681a4d684..90c6dd736f 100644
--- a/ext/tk/sample/demos-jp/check2.rb
+++ b/ext/tk/sample/demos-jp/check2.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# checkbutton widget demo2 (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/clrpick.rb b/ext/tk/sample/demos-jp/clrpick.rb
index de8cd80fcd..d81ecebc83 100644
--- a/ext/tk/sample/demos-jp/clrpick.rb
+++ b/ext/tk/sample/demos-jp/clrpick.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# widget demo prompts the user to select a color (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/colors.rb b/ext/tk/sample/demos-jp/colors.rb
index c6128f9c00..68b40e69f0 100644
--- a/ext/tk/sample/demos-jp/colors.rb
+++ b/ext/tk/sample/demos-jp/colors.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# listbox widget demo 'colors' (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/cscroll.rb b/ext/tk/sample/demos-jp/cscroll.rb
index 0d6db69af6..0be26133c5 100644
--- a/ext/tk/sample/demos-jp/cscroll.rb
+++ b/ext/tk/sample/demos-jp/cscroll.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# simple scrollable canvas widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/ctext.rb b/ext/tk/sample/demos-jp/ctext.rb
index 66e1fe8107..35d43febbc 100644
--- a/ext/tk/sample/demos-jp/ctext.rb
+++ b/ext/tk/sample/demos-jp/ctext.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# Canvas Text widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/dialog1.rb b/ext/tk/sample/demos-jp/dialog1.rb
index 0d6181bfc6..07e50306ab 100644
--- a/ext/tk/sample/demos-jp/dialog1.rb
+++ b/ext/tk/sample/demos-jp/dialog1.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# a dialog box with a local grab (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/dialog2.rb b/ext/tk/sample/demos-jp/dialog2.rb
index a934378dda..f747f8d6a8 100644
--- a/ext/tk/sample/demos-jp/dialog2.rb
+++ b/ext/tk/sample/demos-jp/dialog2.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# a dialog box with a global grab (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/entry1.rb b/ext/tk/sample/demos-jp/entry1.rb
index edf3b5f71d..d794282284 100644
--- a/ext/tk/sample/demos-jp/entry1.rb
+++ b/ext/tk/sample/demos-jp/entry1.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# entry (no scrollbars) widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/entry2.rb b/ext/tk/sample/demos-jp/entry2.rb
index 7d5740e663..528ad6dec4 100644
--- a/ext/tk/sample/demos-jp/entry2.rb
+++ b/ext/tk/sample/demos-jp/entry2.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# entry (with scrollbars) widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/entry3.rb b/ext/tk/sample/demos-jp/entry3.rb
index f57dc13553..46426af6ae 100644
--- a/ext/tk/sample/demos-jp/entry3.rb
+++ b/ext/tk/sample/demos-jp/entry3.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
# entry3.rb --
#
# This demonstration script creates several entry widgets whose
diff --git a/ext/tk/sample/demos-jp/filebox.rb b/ext/tk/sample/demos-jp/filebox.rb
index f3608ab70f..04b4810b3b 100644
--- a/ext/tk/sample/demos-jp/filebox.rb
+++ b/ext/tk/sample/demos-jp/filebox.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# widget demo prompts the user to select a file (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/floor.rb b/ext/tk/sample/demos-jp/floor.rb
index b029580bd6..b7d07bdafa 100644
--- a/ext/tk/sample/demos-jp/floor.rb
+++ b/ext/tk/sample/demos-jp/floor.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# floorDisplay widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/floor2.rb b/ext/tk/sample/demos-jp/floor2.rb
index a20b31d45c..b7571a592f 100644
--- a/ext/tk/sample/demos-jp/floor2.rb
+++ b/ext/tk/sample/demos-jp/floor2.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# floorDisplay widget demo 2 (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/form.rb b/ext/tk/sample/demos-jp/form.rb
index fe456d3943..637dd9a8ea 100644
--- a/ext/tk/sample/demos-jp/form.rb
+++ b/ext/tk/sample/demos-jp/form.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# form widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/goldberg.rb b/ext/tk/sample/demos-jp/goldberg.rb
index 592b69f775..8bf0104c16 100644
--- a/ext/tk/sample/demos-jp/goldberg.rb
+++ b/ext/tk/sample/demos-jp/goldberg.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# Ruby/Tk Goldverg demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/hello b/ext/tk/sample/demos-jp/hello
index 859ebd950e..08f154d499 100644
--- a/ext/tk/sample/demos-jp/hello
+++ b/ext/tk/sample/demos-jp/hello
@@ -1,4 +1,5 @@
#!/usr/bin/env ruby
+# -*- coding: euc-jp -*-
require 'tk'
TkButton.new(nil,
diff --git a/ext/tk/sample/demos-jp/hscale.rb b/ext/tk/sample/demos-jp/hscale.rb
index 37d215435c..690479d6d1 100644
--- a/ext/tk/sample/demos-jp/hscale.rb
+++ b/ext/tk/sample/demos-jp/hscale.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
require "tkcanvas"
if defined?($hscale_demo) && $hscale_deom
diff --git a/ext/tk/sample/demos-jp/icon.rb b/ext/tk/sample/demos-jp/icon.rb
index e4fef5cae7..26382a57a7 100644
--- a/ext/tk/sample/demos-jp/icon.rb
+++ b/ext/tk/sample/demos-jp/icon.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# iconic button widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/image1.rb b/ext/tk/sample/demos-jp/image1.rb
index 3d47f844e5..3b56d240dc 100644
--- a/ext/tk/sample/demos-jp/image1.rb
+++ b/ext/tk/sample/demos-jp/image1.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# two image widgets demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/image2.rb b/ext/tk/sample/demos-jp/image2.rb
index e2e2a2b036..de627448c1 100644
--- a/ext/tk/sample/demos-jp/image2.rb
+++ b/ext/tk/sample/demos-jp/image2.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# widget demo 'load image' (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/image3.rb b/ext/tk/sample/demos-jp/image3.rb
index d9f378c116..36c1823745 100644
--- a/ext/tk/sample/demos-jp/image3.rb
+++ b/ext/tk/sample/demos-jp/image3.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
# image3.rb
#
# This demonstration script creates a simple collection of widgets
diff --git a/ext/tk/sample/demos-jp/items.rb b/ext/tk/sample/demos-jp/items.rb
index 38774d10d2..d538fac75f 100644
--- a/ext/tk/sample/demos-jp/items.rb
+++ b/ext/tk/sample/demos-jp/items.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# canvas item types widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/ixset2 b/ext/tk/sample/demos-jp/ixset2
index 8947daa4b4..5b816e40b1 100644
--- a/ext/tk/sample/demos-jp/ixset2
+++ b/ext/tk/sample/demos-jp/ixset2
@@ -1,4 +1,5 @@
#!/usr/bin/env ruby
+# -*- coding: euc-jp -*-
#
# ixset --
# A nice interface to "xset" to change X server settings
diff --git a/ext/tk/sample/demos-jp/label.rb b/ext/tk/sample/demos-jp/label.rb
index 59626289fc..a1ecc2ec80 100644
--- a/ext/tk/sample/demos-jp/label.rb
+++ b/ext/tk/sample/demos-jp/label.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# label widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/labelframe.rb b/ext/tk/sample/demos-jp/labelframe.rb
index 23c974dcc2..f16b601ffd 100644
--- a/ext/tk/sample/demos-jp/labelframe.rb
+++ b/ext/tk/sample/demos-jp/labelframe.rb
@@ -1,3 +1,5 @@
+# -*- coding: euc-jp -*-
+#
# labelframe.rb
#
# This demonstration script creates a toplevel window containing
diff --git a/ext/tk/sample/demos-jp/menu.rb b/ext/tk/sample/demos-jp/menu.rb
index add85f7f7b..6b9e5c9e5e 100644
--- a/ext/tk/sample/demos-jp/menu.rb
+++ b/ext/tk/sample/demos-jp/menu.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# menus widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/menu84.rb b/ext/tk/sample/demos-jp/menu84.rb
index 8c2a815d78..762cfa53b8 100644
--- a/ext/tk/sample/demos-jp/menu84.rb
+++ b/ext/tk/sample/demos-jp/menu84.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# menus widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/menu8x.rb b/ext/tk/sample/demos-jp/menu8x.rb
index 050f0decb4..23efa7e790 100644
--- a/ext/tk/sample/demos-jp/menu8x.rb
+++ b/ext/tk/sample/demos-jp/menu8x.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# menus widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/menubu.rb b/ext/tk/sample/demos-jp/menubu.rb
index aa90a3087f..e73c393aa5 100644
--- a/ext/tk/sample/demos-jp/menubu.rb
+++ b/ext/tk/sample/demos-jp/menubu.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
require "tkcanvas"
def optionMenu(menubutton, varName, firstValue, *rest)
diff --git a/ext/tk/sample/demos-jp/msgbox.rb b/ext/tk/sample/demos-jp/msgbox.rb
index 983e6b6589..0fe5db7dd6 100644
--- a/ext/tk/sample/demos-jp/msgbox.rb
+++ b/ext/tk/sample/demos-jp/msgbox.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# message boxes widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/paned1.rb b/ext/tk/sample/demos-jp/paned1.rb
index 8d16d03c08..137e187417 100644
--- a/ext/tk/sample/demos-jp/paned1.rb
+++ b/ext/tk/sample/demos-jp/paned1.rb
@@ -1,3 +1,5 @@
+# -*- coding: euc-jp -*-
+#
# paned1.rb
#
# This demonstration script creates a toplevel window containing
diff --git a/ext/tk/sample/demos-jp/paned2.rb b/ext/tk/sample/demos-jp/paned2.rb
index 1e82eddda4..b394432b1c 100644
--- a/ext/tk/sample/demos-jp/paned2.rb
+++ b/ext/tk/sample/demos-jp/paned2.rb
@@ -1,3 +1,5 @@
+# -*- coding: euc-jp -*-
+#
# paned2.rb --
#
# This demonstration script creates a toplevel window containing
diff --git a/ext/tk/sample/demos-jp/pendulum.rb b/ext/tk/sample/demos-jp/pendulum.rb
index d703c74d5a..c245136d5c 100644
--- a/ext/tk/sample/demos-jp/pendulum.rb
+++ b/ext/tk/sample/demos-jp/pendulum.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# This demonstration illustrates how Tcl/Tk can be used to construct
# simulations of physical systems.
diff --git a/ext/tk/sample/demos-jp/plot.rb b/ext/tk/sample/demos-jp/plot.rb
index 902b144f72..09a3446836 100644
--- a/ext/tk/sample/demos-jp/plot.rb
+++ b/ext/tk/sample/demos-jp/plot.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# 2-D plot widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/puzzle.rb b/ext/tk/sample/demos-jp/puzzle.rb
index ad69775aab..6a3c8c8ef6 100644
--- a/ext/tk/sample/demos-jp/puzzle.rb
+++ b/ext/tk/sample/demos-jp/puzzle.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# widet demo 'puzzle' (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/radio.rb b/ext/tk/sample/demos-jp/radio.rb
index 5858b4222a..3a11c394a3 100644
--- a/ext/tk/sample/demos-jp/radio.rb
+++ b/ext/tk/sample/demos-jp/radio.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# radiobutton widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/radio2.rb b/ext/tk/sample/demos-jp/radio2.rb
index 5ac877d99a..b89520cdc5 100644
--- a/ext/tk/sample/demos-jp/radio2.rb
+++ b/ext/tk/sample/demos-jp/radio2.rb
@@ -1,3 +1,5 @@
+# -*- coding: euc-jp -*-
+#
# radio2.rb
#
# This demonstration script creates a toplevel window containing
diff --git a/ext/tk/sample/demos-jp/radio3.rb b/ext/tk/sample/demos-jp/radio3.rb
index 6e9a0f750b..a223a19bc2 100644
--- a/ext/tk/sample/demos-jp/radio3.rb
+++ b/ext/tk/sample/demos-jp/radio3.rb
@@ -1,3 +1,5 @@
+# -*- coding: euc-jp -*-
+#
# radio3.rb
#
# This demonstration script creates a toplevel window containing
diff --git a/ext/tk/sample/demos-jp/rolodex-j b/ext/tk/sample/demos-jp/rolodex-j
index dcc18cfa51..6c3ea7a484 100644
--- a/ext/tk/sample/demos-jp/rolodex-j
+++ b/ext/tk/sample/demos-jp/rolodex-j
@@ -1,4 +1,5 @@
#!/usr/bin/env ruby
+# -*- coding: euc-jp -*-
#
# rolodex --
# ¤³¤Î¥¹¥¯¥ê¥×¥È¤Ï Tom LaStrange ¤Î rolodex ¤Î°ìÉô¤Ç¤¹¡£
diff --git a/ext/tk/sample/demos-jp/ruler.rb b/ext/tk/sample/demos-jp/ruler.rb
index 94b4c921d3..c913e247d1 100644
--- a/ext/tk/sample/demos-jp/ruler.rb
+++ b/ext/tk/sample/demos-jp/ruler.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# ruler widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/sayings.rb b/ext/tk/sample/demos-jp/sayings.rb
index ce195a0e53..24b011f5ab 100644
--- a/ext/tk/sample/demos-jp/sayings.rb
+++ b/ext/tk/sample/demos-jp/sayings.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# listbox widget demo 'sayings' (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/search.rb b/ext/tk/sample/demos-jp/search.rb
index f5268f987f..adb72fd809 100644
--- a/ext/tk/sample/demos-jp/search.rb
+++ b/ext/tk/sample/demos-jp/search.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# Text Search widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/spin.rb b/ext/tk/sample/demos-jp/spin.rb
index c7b8096723..b8eb99c4ed 100644
--- a/ext/tk/sample/demos-jp/spin.rb
+++ b/ext/tk/sample/demos-jp/spin.rb
@@ -1,3 +1,5 @@
+# -*- coding: euc-jp -*-
+#
# spin.rb --
#
# This demonstration script creates several spinbox widgets.
diff --git a/ext/tk/sample/demos-jp/states.rb b/ext/tk/sample/demos-jp/states.rb
index dfae821261..3c58711bd1 100644
--- a/ext/tk/sample/demos-jp/states.rb
+++ b/ext/tk/sample/demos-jp/states.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# listbox widget demo 'states' (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/style.rb b/ext/tk/sample/demos-jp/style.rb
index 59e406bc80..50855a549a 100644
--- a/ext/tk/sample/demos-jp/style.rb
+++ b/ext/tk/sample/demos-jp/style.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# text (display styles) widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/tcolor b/ext/tk/sample/demos-jp/tcolor
index c7f7ec7289..17f7e1347e 100644
--- a/ext/tk/sample/demos-jp/tcolor
+++ b/ext/tk/sample/demos-jp/tcolor
@@ -1,4 +1,5 @@
#!/usr/bin/env ruby
+# -*- coding: euc-jp -*-
#
# tcolor --
# ¤³¤Î¥¹¥¯¥ê¥×¥È¤ÏRGB,HSB,CYM·Á¼°¤ò¥µ¥Ý¡¼¥È¤¹¤ë
diff --git a/ext/tk/sample/demos-jp/text.rb b/ext/tk/sample/demos-jp/text.rb
index 0057d5dbdc..25e0e64e9a 100644
--- a/ext/tk/sample/demos-jp/text.rb
+++ b/ext/tk/sample/demos-jp/text.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# text (basic facilities) widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/twind.rb b/ext/tk/sample/demos-jp/twind.rb
index 2b228e4acd..166a44cdb2 100644
--- a/ext/tk/sample/demos-jp/twind.rb
+++ b/ext/tk/sample/demos-jp/twind.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# text (embedded windows) widget demo (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/twind2.rb b/ext/tk/sample/demos-jp/twind2.rb
index b634f07b4b..e8009cef19 100644
--- a/ext/tk/sample/demos-jp/twind2.rb
+++ b/ext/tk/sample/demos-jp/twind2.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
#
# text (embedded windows) widget demo 2 (called by 'widget')
#
diff --git a/ext/tk/sample/demos-jp/unicodeout.rb b/ext/tk/sample/demos-jp/unicodeout.rb
index 090cdf3059..7ab415fe57 100644
--- a/ext/tk/sample/demos-jp/unicodeout.rb
+++ b/ext/tk/sample/demos-jp/unicodeout.rb
@@ -1,3 +1,5 @@
+# -*- coding: euc-jp -*-
+#
# unicodeout.rb --
#
# This demonstration script shows how you can produce output (in label
diff --git a/ext/tk/sample/demos-jp/vscale.rb b/ext/tk/sample/demos-jp/vscale.rb
index 86f6f7cdee..eb0cea250d 100644
--- a/ext/tk/sample/demos-jp/vscale.rb
+++ b/ext/tk/sample/demos-jp/vscale.rb
@@ -1,3 +1,4 @@
+# -*- coding: euc-jp -*-
require "tkcanvas"
if defined?($vscale_demo) && $vscale_demo
diff --git a/ext/tk/sample/demos-jp/widget b/ext/tk/sample/demos-jp/widget
index 11495dda54..b369bfba96 100644
--- a/ext/tk/sample/demos-jp/widget
+++ b/ext/tk/sample/demos-jp/widget
@@ -1,4 +1,5 @@
#!/usr/bin/env ruby
+# -*- coding: euc-jp -*-
# ´Á»ú¥³¡¼¥ÉÀßÄê ( tk.rb ¤Î¥í¡¼¥É»þ¤Î encoding ¿äÄê/ÀßÄê¤Ë»È¤ï¤ì¤ë )
$KCODE = 'euc'
diff --git a/ext/tk/sample/tkextlib/vu/canvSticker2.rb b/ext/tk/sample/tkextlib/vu/canvSticker2.rb
index f54e748660..21f098a196 100644
--- a/ext/tk/sample/tkextlib/vu/canvSticker2.rb
+++ b/ext/tk/sample/tkextlib/vu/canvSticker2.rb
@@ -1,4 +1,5 @@
#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
require 'tk'
require 'tkextlib/vu/charts'
diff --git a/file.c b/file.c
index 2f4643dcd3..260de7c822 100644
--- a/file.c
+++ b/file.c
@@ -6,7 +6,7 @@
$Date$
created at: Mon Nov 15 12:24:34 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/gc.c b/gc.c
index a9683e7493..8054d9acb3 100644
--- a/gc.c
+++ b/gc.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Oct 5 09:44:46 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/hash.c b/hash.c
index d9198f678a..27c0c5bdfc 100644
--- a/hash.c
+++ b/hash.c
@@ -6,7 +6,7 @@
$Date$
created at: Mon Nov 22 18:51:18 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index aefee7f674..ae231c7d91 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -6,7 +6,7 @@
$Date$
created at: Thu Jun 10 14:22:17 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
@@ -337,7 +337,7 @@ RUBY_EXTERN VALUE rb_default_rs;
RUBY_EXTERN VALUE rb_output_rs;
VALUE rb_io_write(VALUE, VALUE);
VALUE rb_io_gets(VALUE);
-VALUE rb_io_getc(VALUE);
+VALUE rb_io_getbyte(VALUE);
VALUE rb_io_ungetc(VALUE, VALUE);
VALUE rb_io_close(VALUE);
VALUE rb_io_flush(VALUE);
@@ -444,7 +444,7 @@ VALUE rb_reg_last_match(VALUE);
VALUE rb_reg_match_pre(VALUE);
VALUE rb_reg_match_post(VALUE);
VALUE rb_reg_match_last(VALUE);
-VALUE rb_reg_new(const char*, long, int);
+VALUE rb_reg_new(VALUE, int);
VALUE rb_reg_match(VALUE, VALUE);
VALUE rb_reg_match2(VALUE);
int rb_reg_options(VALUE);
@@ -498,6 +498,7 @@ VALUE rb_str_unlocktmp(VALUE);
VALUE rb_str_dup_frozen(VALUE);
VALUE rb_str_plus(VALUE, VALUE);
VALUE rb_str_times(VALUE, VALUE);
+int rb_str_sublen(VALUE, int);
VALUE rb_str_substr(VALUE, long, long);
void rb_str_modify(VALUE);
VALUE rb_str_freeze(VALUE);
diff --git a/include/ruby/io.h b/include/ruby/io.h
index 6d22de8df1..becf262eca 100644
--- a/include/ruby/io.h
+++ b/include/ruby/io.h
@@ -6,7 +6,7 @@
$Date$
created at: Fri Nov 12 16:47:09 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
@@ -22,6 +22,7 @@ extern "C" {
#include <stdio.h>
#include <errno.h>
+#include "ruby/encoding.h"
#if defined(HAVE_STDIO_EXT_H)
#include <stdio_ext.h>
@@ -44,6 +45,7 @@ typedef struct rb_io_t {
int rbuf_off;
int rbuf_len;
int rbuf_capa;
+ rb_encoding *enc;
} rb_io_t;
#define HAVE_RB_IO_T 1
diff --git a/include/ruby/node.h b/include/ruby/node.h
index e3722e0c41..4d6d73c7fd 100644
--- a/include/ruby/node.h
+++ b/include/ruby/node.h
@@ -6,7 +6,7 @@
$Date$
created at: Fri May 28 15:14:02 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/include/ruby/re.h b/include/ruby/re.h
index ae6e0357b6..5b0cc24e9a 100644
--- a/include/ruby/re.h
+++ b/include/ruby/re.h
@@ -6,7 +6,7 @@
$Date$
created at: Thu Sep 30 14:18:32 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/include/ruby/regex.h b/include/ruby/regex.h
index 118c37c480..ad736775fe 100644
--- a/include/ruby/regex.h
+++ b/include/ruby/regex.h
@@ -5,7 +5,7 @@
$Author$
$Date$
- Copyright (C) 1993-2005 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
@@ -29,10 +29,8 @@ extern "C" {
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
-#undef ismbchar
-#define ismbchar(c) (mbclen((c)) != 1)
-#define mbclen(c) \
- ONIGENC_MBC_ENC_LEN(OnigEncDefaultCharEncoding, (UChar* )(&c))
+#define ismbchar(p, enc) (mbclen((p),(enc)) != 1)
+#define mbclen(p,enc) rb_enc_mbclen((p), (enc))
#endif /* ifndef ONIG_RUBY_M17N */
diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h
index ea3265505b..2dbf29956e 100644
--- a/include/ruby/ruby.h
+++ b/include/ruby/ruby.h
@@ -5,7 +5,7 @@
$Author$
created at: Thu Jun 10 14:26:32 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
@@ -455,6 +455,7 @@ struct RString {
(!(RBASIC(str)->flags & RSTRING_NOEMBED) ? \
RSTRING(str)->as.ary : \
RSTRING(str)->as.heap.ptr)
+#define RSTRING_END(str) (RSTRING_PTR(str)+RSTRING_LEN(str))
struct RArray {
struct RBasic basic;
@@ -598,6 +599,32 @@ enum ruby_value_flags {
#define FL_USER6 RUBY_FL_USER6
RUBY_FL_USER7 = (1<<(FL_USHIFT+7)),
#define FL_USER7 RUBY_FL_USER7
+ RUBY_FL_USER8 = (1<<(FL_USHIFT+8)),
+#define FL_USER8 RUBY_FL_USER8
+ RUBY_FL_USER9 = (1<<(FL_USHIFT+9)),
+#define FL_USER9 RUBY_FL_USER9
+ RUBY_FL_USER10 = (1<<(FL_USHIFT+10)),
+#define FL_USER10 RUBY_FL_USER10
+ RUBY_FL_USER11 = (1<<(FL_USHIFT+11)),
+#define FL_USER11 RUBY_FL_USER11
+ RUBY_FL_USER12 = (1<<(FL_USHIFT+12)),
+#define FL_USER12 RUBY_FL_USER12
+ RUBY_FL_USER13 = (1<<(FL_USHIFT+13)),
+#define FL_USER13 RUBY_FL_USER13
+ RUBY_FL_USER14 = (1<<(FL_USHIFT+14)),
+#define FL_USER14 RUBY_FL_USER14
+ RUBY_FL_USER15 = (1<<(FL_USHIFT+15)),
+#define FL_USER15 RUBY_FL_USER15
+ RUBY_FL_USER16 = (1<<(FL_USHIFT+16)),
+#define FL_USER16 RUBY_FL_USER16
+ RUBY_FL_USER17 = (1<<(FL_USHIFT+17)),
+#define FL_USER17 RUBY_FL_USER17
+ RUBY_FL_USER18 = (1<<(FL_USHIFT+18)),
+#define FL_USER18 RUBY_FL_USER18
+ RUBY_FL_USER19 = (1<<(FL_USHIFT+19)),
+#define FL_USER19 RUBY_FL_USER19
+ RUBY_FL_USER20 = (1<<(FL_USHIFT+20)),
+#define FL_USER20 RUBY_FL_USER20
};
#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x))
@@ -667,6 +694,7 @@ void rb_gc_unregister_address(VALUE*);
ID rb_intern(const char*);
ID rb_intern2(const char*, long);
+ID rb_intern_str(VALUE str);
const char *rb_id2name(ID);
ID rb_to_id(VALUE);
VALUE rb_id2str(ID);
diff --git a/include/ruby/signal.h b/include/ruby/signal.h
index 29ffcd9f11..23db123d92 100644
--- a/include/ruby/signal.h
+++ b/include/ruby/signal.h
@@ -6,7 +6,7 @@
$Date$
created at: Wed Aug 16 01:15:38 JST 1995
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/include/ruby/util.h b/include/ruby/util.h
index 8437872479..f9ce983269 100644
--- a/include/ruby/util.h
+++ b/include/ruby/util.h
@@ -6,7 +6,7 @@
$Date$
created at: Thu Mar 9 11:55:53 JST 1995
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/inits.c b/inits.c
index 7c53fb7aa2..ce7172987f 100644
--- a/inits.c
+++ b/inits.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Dec 28 16:01:58 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/insns.def b/insns.def
index 60d9d9a634..685f49f606 100644
--- a/insns.def
+++ b/insns.def
@@ -406,7 +406,7 @@ toregexp
(VALUE val)
{
volatile VALUE tmp = str; /* for GC */
- val = rb_reg_new(RSTRING_PTR(str), RSTRING_LEN(str), flag);
+ val = rb_reg_new(str, flag);
}
/**
diff --git a/io.c b/io.c
index 850c1a657e..e9fd2a0b07 100644
--- a/io.c
+++ b/io.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Oct 15 18:08:59 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
@@ -278,27 +278,38 @@ io_unread(rb_io_t *fptr)
return;
}
-static int
-io_ungetc(int c, rb_io_t *fptr)
+static void
+io_ungetc(VALUE str, rb_io_t *fptr)
{
+ int len = RSTRING_LEN(str);
+
if (fptr->rbuf == NULL) {
fptr->rbuf_off = 0;
fptr->rbuf_len = 0;
- fptr->rbuf_capa = 8192;
+ if (len > 8192)
+ fptr->rbuf_capa = len;
+ else
+ fptr->rbuf_capa = 8192;
fptr->rbuf = ALLOC_N(char, fptr->rbuf_capa);
}
- if (c < 0 || fptr->rbuf_len == fptr->rbuf_capa) {
- return -1;
- }
if (fptr->rbuf_off == 0) {
- if (fptr->rbuf_len)
- MEMMOVE(fptr->rbuf+1, fptr->rbuf, char, fptr->rbuf_len);
- fptr->rbuf_off = 1;
+ if (fptr->rbuf_len) {
+ MEMMOVE(fptr->rbuf+len, fptr->rbuf, char, fptr->rbuf_len);
+ }
+ fptr->rbuf_off = len;
}
- fptr->rbuf_off--;
- fptr->rbuf_len++;
- fptr->rbuf[fptr->rbuf_off] = c;
- return c;
+ else if (fptr->rbuf_off < len) {
+ int capa = fptr->rbuf_len + len;
+ char *buf = ALLOC_N(char, capa);
+
+ if (fptr->rbuf_len) {
+ MEMMOVE(buf+len, fptr->rbuf+fptr->rbuf_off, char, fptr->rbuf_len);
+ }
+ fptr->rbuf_off = len;
+ }
+ fptr->rbuf_off-=len;
+ fptr->rbuf_len+=len;
+ MEMMOVE(fptr->rbuf+fptr->rbuf_off, RSTRING_PTR(str), char, len);
}
static rb_io_t *
@@ -875,16 +886,10 @@ rb_io_rewind(VALUE io)
}
static int
-io_getc(rb_io_t *fptr)
+io_fillbuf(rb_io_t *fptr)
{
int r;
- if (fptr->fd == 0 && (fptr->mode & FMODE_TTY) && TYPE(rb_stdout) == T_FILE) {
- rb_io_t *ofp;
- GetOpenFile(rb_stdout, ofp);
- if (ofp->mode & FMODE_TTY) {
- rb_io_flush(rb_stdout);
- }
- }
+
if (fptr->rbuf == NULL) {
fptr->rbuf_off = 0;
fptr->rbuf_len = 0;
@@ -906,9 +911,7 @@ io_getc(rb_io_t *fptr)
if (r == 0)
return -1; /* EOF */
}
- fptr->rbuf_off++;
- fptr->rbuf_len--;
- return (unsigned char)fptr->rbuf[fptr->rbuf_off-1];
+ return 0;
}
/*
@@ -947,20 +950,16 @@ VALUE
rb_io_eof(VALUE io)
{
rb_io_t *fptr;
- int ch;
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
if (READ_DATA_PENDING(fptr)) return Qfalse;
READ_CHECK(fptr);
- ch = io_getc(fptr);
-
- if (ch != EOF) {
- io_ungetc(ch, fptr);
- return Qfalse;
+ if (io_fillbuf(fptr) < 0) {
+ return Qtrue;
}
- return Qtrue;
+ return Qfalse;
}
/*
@@ -1167,13 +1166,9 @@ io_fread(VALUE str, long offset, rb_io_t *fptr)
}
rb_thread_wait_fd(fptr->fd);
rb_io_check_closed(fptr);
- c = io_getc(fptr);
- if (c < 0) {
+ if (io_fillbuf(fptr) < 0) {
break;
}
- RSTRING_PTR(str)[offset++] = c;
- if (offset > RSTRING_LEN(str)) break;
- n--;
}
return len - n;
}
@@ -1599,9 +1594,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
}
rb_thread_wait_fd(fptr->fd);
rb_io_check_closed(fptr);
- c = io_getc(fptr);
- limit--;
- if (c < 0) {
+ if (io_fillbuf(fptr) < 0) {
*lp = limit;
return c;
}
@@ -1640,10 +1633,8 @@ swallow(rb_io_t *fptr, int term)
}
rb_thread_wait_fd(fptr->fd);
rb_io_check_closed(fptr);
- c = io_getc(fptr);
- if (c != term) {
- io_ungetc(c, fptr);
- return Qtrue;
+ if (io_fillbuf(fptr) < 0) {
+ break;
}
} while (c != EOF);
return Qfalse;
@@ -2020,20 +2011,24 @@ static VALUE
rb_io_each_byte(VALUE io)
{
rb_io_t *fptr;
- int c;
+ char *p, *e;
RETURN_ENUMERATOR(io, 0, 0);
GetOpenFile(io, fptr);
for (;;) {
+ p = fptr->rbuf+fptr->rbuf_off;
+ e = p + fptr->rbuf_len;
+ while (p < e) {
+ rb_yield(INT2FIX(*p & 0xff));
+ p++;
+ }
rb_io_check_readable(fptr);
READ_CHECK(fptr);
- c = io_getc(fptr);
- if (c < 0) {
+ if (io_fillbuf(fptr) < 0) {
break;
}
- rb_yield(INT2FIX(c & 0xff));
- }
+ }
return io;
}
@@ -2070,54 +2065,54 @@ rb_io_bytes(VALUE str)
return rb_enumeratorize(str, ID2SYM(rb_intern("each_byte")), 0, 0);
}
-VALUE
-rb_io_getc(VALUE io)
-{
- rb_io_t *fptr;
- int c;
-
- GetOpenFile(io, fptr);
- rb_io_check_readable(fptr);
-
- READ_CHECK(fptr);
- c = io_getc(fptr);
-
- if (c < 0) {
- return Qnil;
- }
- return INT2FIX(c & 0xff);
-}
-
/*
* call-seq:
- * ios.getc => string or nil
- *
+ * ios.getc => fixnum or nil
+ *
* Reads a one-character string from <em>ios</em>. Returns
* <code>nil</code> if called at end of file.
- *
+ *
* f = File.new("testfile")
* f.getc #=> "8"
* f.getc #=> "1"
*/
-VALUE
-rb_io_getc_m(VALUE io)
+static VALUE
+rb_io_getc(VALUE io)
{
- char ch;
+ rb_encoding *enc;
rb_io_t *fptr;
- int c;
+ int n, left;
+ VALUE str;
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
+ enc = rb_enc_get(io);
READ_CHECK(fptr);
- c = io_getc(fptr);
-
- if (c < 0) {
- return Qnil;
+ if (io_fillbuf(fptr) < 0) {
+ rb_eof_error();
+ }
+ n = rb_enc_mbclen(fptr->rbuf+fptr->rbuf_off, enc);
+ if (n < fptr->rbuf_len) {
+ str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n);
+ fptr->rbuf_off += n;
+ fptr->rbuf_len -= n;
+ }
+ else {
+ str = rb_str_new(0, n);
+ left = fptr->rbuf_len;
+ MEMCPY(RSTRING_PTR(str), fptr->rbuf+fptr->rbuf_off, char, left);
+ if (io_fillbuf(fptr) < 0) {
+ rb_eof_error();
+ }
+ MEMCPY(RSTRING_PTR(str)+left, fptr->rbuf, char, n-left);
+ fptr->rbuf_off += left;
+ fptr->rbuf_len -= left;
}
- ch = c & 0xff;
- return rb_str_new(&ch, 1);
+ rb_enc_associate(str, enc);
+
+ return str;
}
int
@@ -2139,14 +2134,74 @@ rb_getc(FILE *f)
* call-seq:
* ios.readchar => string
*
- * Reads a character as with <code>IO#getc</code>, but raises an
+ * Reads a one-character string from <em>ios</em>. Raises an
* <code>EOFError</code> on end of file.
+ *
+ * f = File.new("testfile")
+ * f.readchar #=> "8"
+ * f.readchar #=> "1"
*/
static VALUE
rb_io_readchar(VALUE io)
{
- VALUE c = rb_io_getc_m(io);
+ VALUE c = rb_io_getc(io);
+
+ if (NIL_P(c)) {
+ rb_eof_error();
+ }
+ return c;
+}
+
+/*
+ * call-seq:
+ * ios.getbyte => fixnum or nil
+ *
+ * Gets the next 8-bit byte (0..255) from <em>ios</em>. Returns
+ * <code>nil</code> if called at end of file.
+ *
+ * f = File.new("testfile")
+ * f.getbyte #=> 84
+ * f.getbyte #=> 104
+ */
+
+VALUE
+rb_io_getbyte(VALUE io)
+{
+ rb_io_t *fptr;
+ int c;
+
+ GetOpenFile(io, fptr);
+ rb_io_check_readable(fptr);
+ READ_CHECK(fptr);
+ if (fptr->fd == 0 && (fptr->mode & FMODE_TTY) && TYPE(rb_stdout) == T_FILE) {
+ rb_io_t *ofp;
+ GetOpenFile(rb_stdout, ofp);
+ if (ofp->mode & FMODE_TTY) {
+ rb_io_flush(rb_stdout);
+ }
+ }
+ if (io_fillbuf(fptr) < 0) {
+ return Qnil;
+ }
+ fptr->rbuf_off++;
+ fptr->rbuf_len--;
+ c = (unsigned char)fptr->rbuf[fptr->rbuf_off-1];
+ return INT2FIX(c & 0xff);
+}
+
+/*
+ * call-seq:
+ * ios.readbyte => fixnum
+ *
+ * Reads a character as with <code>IO#getc</code>, but raises an
+ * <code>EOFError</code> on end of file.
+ */
+
+static VALUE
+rb_io_readbyte(VALUE io)
+{
+ VALUE c = rb_io_getbyte(io);
if (NIL_P(c)) {
rb_eof_error();
@@ -2173,25 +2228,24 @@ rb_io_readchar(VALUE io)
VALUE
rb_io_ungetc(VALUE io, VALUE c)
{
+ rb_encoding *enc;
rb_io_t *fptr;
- int cc;
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
if (NIL_P(c)) return Qnil;
+ enc = rb_enc_get(io);
if (FIXNUM_P(c)) {
- cc = FIX2INT(c);
+ int cc = FIX2INT(c);
+ char buf[16];
+
+ rb_enc_mbcput(cc, buf, enc);
+ c = rb_str_new(buf, rb_enc_codelen(cc, enc));
}
else {
SafeStringValue(c);
- if (RSTRING_LEN(c) > 1) {
- rb_warn("IO#ungetc pushes back only one byte");
- }
- cc = (unsigned char)RSTRING_PTR(c)[0];
- }
- if (io_ungetc(cc, fptr) == EOF && cc != EOF) {
- rb_raise(rb_eIOError, "ungetc failed");
}
+ io_ungetc(c, fptr);
return Qnil;
}
@@ -5465,7 +5519,29 @@ argf_getc(void)
ch = rb_funcall3(current_file, rb_intern("getc"), 0, 0);
}
else {
- ch = rb_io_getc_m(current_file);
+ ch = rb_io_getc(current_file);
+ }
+ if (NIL_P(ch) && next_p != -1) {
+ argf_close(current_file);
+ next_p = 1;
+ goto retry;
+ }
+
+ return ch;
+}
+
+static VALUE
+argf_getbyte(void)
+{
+ VALUE ch;
+
+ retry:
+ if (!next_argv()) return Qnil;
+ if (TYPE(current_file) != T_FILE) {
+ ch = rb_funcall3(current_file, rb_intern("getbyte"), 0, 0);
+ }
+ else {
+ ch = rb_io_getbyte(current_file);
}
if (NIL_P(ch) && next_p != -1) {
argf_close(current_file);
@@ -5479,10 +5555,32 @@ argf_getc(void)
static VALUE
argf_readchar(void)
{
+ VALUE ch;
+
+ retry:
+ if (!next_argv()) return Qnil;
+ if (TYPE(current_file) != T_FILE) {
+ ch = rb_funcall3(current_file, rb_intern("getc"), 0, 0);
+ }
+ else {
+ ch = rb_io_getc(current_file);
+ }
+ if (NIL_P(ch) && next_p != -1) {
+ argf_close(current_file);
+ next_p = 1;
+ goto retry;
+ }
+
+ return ch;
+}
+
+static VALUE
+argf_readbyte(void)
+{
VALUE c;
NEXT_ARGF_FORWARD(0, 0);
- c = argf_getc();
+ c = argf_getbyte();
if (NIL_P(c)) {
rb_eof_error();
}
@@ -5780,8 +5878,10 @@ Init_IO(void)
rb_define_method(rb_cIO, "write", io_write, 1);
rb_define_method(rb_cIO, "gets", rb_io_gets_m, -1);
rb_define_method(rb_cIO, "readline", rb_io_readline, -1);
- rb_define_method(rb_cIO, "getc", rb_io_getc_m, 0);
+ rb_define_method(rb_cIO, "getc", rb_io_getc, 0);
+ rb_define_method(rb_cIO, "getbyte", rb_io_getbyte, 0);
rb_define_method(rb_cIO, "readchar", rb_io_readchar, 0);
+ rb_define_method(rb_cIO, "readbyte", rb_io_readbyte, 0);
rb_define_method(rb_cIO, "ungetc",rb_io_ungetc, 1);
rb_define_method(rb_cIO, "<<", rb_io_addstr, 1);
rb_define_method(rb_cIO, "flush", rb_io_flush, 0);
@@ -5851,7 +5951,9 @@ Init_IO(void)
rb_define_singleton_method(argf, "gets", rb_f_gets, -1);
rb_define_singleton_method(argf, "readline", rb_f_readline, -1);
rb_define_singleton_method(argf, "getc", argf_getc, 0);
+ rb_define_singleton_method(argf, "getbyte", argf_getbyte, 0);
rb_define_singleton_method(argf, "readchar", argf_readchar, 0);
+ rb_define_singleton_method(argf, "readbyte", argf_readbyte, 0);
rb_define_singleton_method(argf, "tell", argf_tell, 0);
rb_define_singleton_method(argf, "seek", argf_seek_m, -1);
rb_define_singleton_method(argf, "rewind", argf_rewind, 0);
diff --git a/main.c b/main.c
index 09c0bdb8bf..b5c5bb3351 100644
--- a/main.c
+++ b/main.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Aug 19 13:19:58 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/marshal.c b/marshal.c
index c6c512bdb0..b4ba9b6140 100644
--- a/marshal.c
+++ b/marshal.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Apr 27 16:30:01 JST 1995
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
@@ -1105,7 +1105,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
{
volatile VALUE str = r_bytes(arg);
int options = r_byte(arg);
- v = r_entry(rb_reg_new(RSTRING_PTR(str), RSTRING_LEN(str), options), arg);
+ v = r_entry(rb_reg_new(str, options), arg);
}
break;
diff --git a/math.c b/math.c
index d0c2eee487..1e55902db8 100644
--- a/math.c
+++ b/math.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Jan 25 14:12:56 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/numeric.c b/numeric.c
index 9b723d8581..f6782aebd8 100644
--- a/numeric.c
+++ b/numeric.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Aug 13 18:33:09 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/object.c b/object.c
index bcff31eebc..c6fa1176f3 100644
--- a/object.c
+++ b/object.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Jul 15 12:01:24 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/pack.c b/pack.c
index 726048fae0..ff0cdae971 100644
--- a/pack.c
+++ b/pack.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Feb 10 15:17:05 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/parse.y b/parse.y
index e76c7c3187..093d8c3593 100644
--- a/parse.y
+++ b/parse.y
@@ -6,7 +6,7 @@
$Date$
created at: Fri May 28 18:02:42 JST 1993
- Copyright (C) 1993-2004 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
@@ -20,6 +20,8 @@
#include "ruby/intern.h"
#include "ruby/node.h"
#include "ruby/st.h"
+#include "ruby/encoding.h"
+#include "regenc.h"
#include <stdio.h>
#include <errno.h>
#include <ctype.h>
@@ -255,8 +257,13 @@ struct parser_params {
VALUE parsing_thread;
int toplevel_p;
#endif
+
+ rb_encoding *enc;
};
+#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
+#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
+
#ifdef YYMALLOC
void *rb_parser_malloc(struct parser_params *, size_t);
void *rb_parser_realloc(struct parser_params *, void *, size_t);
@@ -3555,7 +3562,7 @@ strings : string
/*%%%*/
NODE *node = $1;
if (!node) {
- node = NEW_STR(rb_str_new(0, 0));
+ node = NEW_STR(STR_NEW(0, 0));
}
else {
node = evstr2dstr(node);
@@ -3594,7 +3601,7 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END
/*%%%*/
NODE *node = $2;
if (!node) {
- node = NEW_XSTR(rb_str_new(0, 0));
+ node = NEW_XSTR(STR_NEW(0, 0));
}
else {
switch (nd_type(node)) {
@@ -3605,7 +3612,7 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END
nd_set_type(node, NODE_DXSTR);
break;
default:
- node = NEW_NODE(NODE_DXSTR, rb_str_new(0, 0), 1, NEW_LIST(node));
+ node = NEW_NODE(NODE_DXSTR, STR_NEW(0, 0), 1, NEW_LIST(node));
break;
}
}
@@ -3622,20 +3629,18 @@ regexp : tREGEXP_BEG xstring_contents tREGEXP_END
int options = $3;
NODE *node = $2;
if (!node) {
- node = NEW_LIT(reg_compile("", 0, options));
+ node = NEW_LIT(rb_reg_compile(0, options & ~RE_OPTION_ONCE));
}
else switch (nd_type(node)) {
case NODE_STR:
{
VALUE src = node->nd_lit;
nd_set_type(node, NODE_LIT);
- node->nd_lit = reg_compile(RSTRING_PTR(src),
- RSTRING_LEN(src),
- options);
+ node->nd_lit = rb_reg_compile(src, options&~RE_OPTION_ONCE);
}
break;
default:
- node = NEW_NODE(NODE_DSTR, rb_str_new(0, 0), 1, NEW_LIST(node));
+ node = NEW_NODE(NODE_DSTR, STR_NEW(0, 0), 1, NEW_LIST(node));
case NODE_DSTR:
if (options & RE_OPTION_ONCE) {
nd_set_type(node, NODE_DREGX_ONCE);
@@ -3880,7 +3885,7 @@ dsym : tSYMBEG xstring_contents tSTRING_END
nd_set_type($$, NODE_LIT);
break;
default:
- $$ = NEW_NODE(NODE_DSYM, rb_str_new(0, 0), 1, NEW_LIST($$));
+ $$ = NEW_NODE(NODE_DSYM, STR_NEW(0, 0), 1, NEW_LIST($$));
break;
}
}
@@ -4518,7 +4523,7 @@ ripper_dispatch_scan_event(struct parser_params *parser, int t)
if (lex_p < parser->tokp) rb_raise(rb_eRuntimeError, "lex_p < tokp");
if (lex_p == parser->tokp) return;
- str = rb_str_new(parser->tokp, lex_p - parser->tokp);
+ str = STR_NEW(parser->tokp, lex_p - parser->tokp);
yylval.val = ripper_dispatch1(parser, ripper_token2eventid(t), str);
ripper_flush(parser);
}
@@ -4552,7 +4557,11 @@ ripper_dispatch_delayed_token(struct parser_params *parser, int t)
/* As in Harbison and Steele. */
# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
#endif
-#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_' || ismbchar(c)))
+
+#define parser_mbclen() mbclen((lex_p-1),parser->enc)
+#define is_identchar(p, enc) (rb_enc_isalnum(*p, enc) || (*p) == '_' || ismbchar(p, enc))
+#define parser_ismbchar() ismbchar((lex_p-1), parser->enc)
+#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),parser->enc))
static int
parser_yyerror(struct parser_params *parser, const char *msg)
@@ -4596,7 +4605,7 @@ parser_yyerror(struct parser_params *parser, const char *msg)
rb_compile_error_append("%s", buf);
}
#else
- dispatch1(parse_error, rb_str_new2(msg));
+ dispatch1(parse_error, STR_NEW2(msg));
#endif /* !RIPPER */
return 0;
}
@@ -4634,7 +4643,7 @@ yycompile(struct parser_params *parser, const char *f, int line)
if (!compile_for_eval && rb_safe_level() == 0) {
ruby_debug_lines = ruby_suppress_tracing(debug_lines, (VALUE)f);
if (ruby_debug_lines && line > 1) {
- VALUE str = rb_str_new(0,0);
+ VALUE str = STR_NEW(0,0);
n = line - 1;
do {
rb_ary_push(ruby_debug_lines, str);
@@ -4660,7 +4669,15 @@ yycompile(struct parser_params *parser, const char *f, int line)
tree = NEW_NIL();
}
if (ruby_eval_tree_begin) {
- tree->nd_body = NEW_PRELUDE(ruby_eval_tree_begin, tree->nd_body);
+ NODE *scope = ruby_eval_tree;
+
+ if (scope) {
+ scope->nd_body = NEW_PRELUDE(ruby_eval_tree_begin, scope->nd_body);
+ }
+ return scope;
+ }
+ else {
+ return ruby_eval_tree;
}
return tree;
}
@@ -4682,7 +4699,7 @@ lex_get_str(struct parser_params *parser, VALUE s)
if (*end++ == '\n') break;
}
lex_gets_ptr = end - RSTRING_PTR(s);
- return rb_str_new(beg, end - beg);
+ return STR_NEW(beg, end - beg);
}
static VALUE
@@ -5173,8 +5190,8 @@ parser_tokadd_string(struct parser_params *parser,
}
}
}
- else if (ismbchar(uc)) {
- int i, len = mbclen(uc)-1;
+ else if (parser_ismbchar()) {
+ int i, len = parser_mbclen()-1;
for (i = 0; i < len; i++) {
tokadd(c);
@@ -5252,7 +5269,7 @@ parser_parse_string(struct parser_params *parser, NODE *quote)
}
tokfix();
- set_yylval_str(rb_str_new(tok(), toklen()));
+ set_yylval_str(STR_NEW(tok(), toklen()));
return tSTRING_CONTENT;
}
@@ -5278,8 +5295,7 @@ parser_heredoc_identifier(struct parser_params *parser)
tokadd(func);
term = c;
while ((c = nextc()) != -1 && c != term) {
- uc = (unsigned int)c;
- len = mbclen(uc);
+ len = parser_mbclen();
do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1);
}
if (c == -1) {
@@ -5289,8 +5305,7 @@ parser_heredoc_identifier(struct parser_params *parser)
break;
default:
- uc = (unsigned int)c;
- if (!is_identchar(uc)) {
+ if (!parser_is_identchar()) {
pushback(c);
if (func & STR_FUNC_INDENT) {
pushback('-');
@@ -5301,11 +5316,9 @@ parser_heredoc_identifier(struct parser_params *parser)
term = '"';
tokadd(func |= str_dquote);
do {
- uc = (unsigned int)c;
- len = mbclen(uc);
+ len = parser_mbclen();
do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1);
- } while ((c = nextc()) != -1 &&
- (uc = (unsigned char)c, is_identchar(uc)));
+ } while ((c = nextc()) != -1 && parser_is_identchar());
pushback(c);
break;
}
@@ -5317,7 +5330,7 @@ parser_heredoc_identifier(struct parser_params *parser)
len = lex_p - lex_pbeg;
lex_goto_eol(parser);
lex_strterm = rb_node_newnode(NODE_HEREDOC,
- rb_str_new(tok(), toklen()), /* nd_lit */
+ STR_NEW(tok(), toklen()), /* nd_lit */
len, /* nd_nth */
lex_lastline); /* nd_orig */
nd_set_line(lex_strterm, ruby_sourceline);
@@ -5410,7 +5423,7 @@ parser_here_document(struct parser_params *parser, NODE *here)
if (str)
rb_str_cat(str, p, pend - p);
else
- str = rb_str_new(p, pend - p);
+ str = STR_NEW(p, pend - p);
if (pend < lex_pend) rb_str_cat(str, "\n", 1);
lex_goto_eol(parser);
if (nextc() == -1) {
@@ -5436,13 +5449,13 @@ parser_here_document(struct parser_params *parser, NODE *here)
pushback(c);
if ((c = tokadd_string(func, '\n', 0, NULL)) == -1) goto error;
if (c != '\n') {
- set_yylval_str(rb_str_new(tok(), toklen()));
+ set_yylval_str(STR_NEW(tok(), toklen()));
return tSTRING_CONTENT;
}
tokadd(nextc());
if ((c = nextc()) == -1) goto error;
} while (!whole_match_p(eos, len, indent));
- str = rb_str_new(tok(), toklen());
+ str = STR_NEW(tok(), toklen());
}
heredoc_restore(lex_strterm);
lex_strterm = NEW_STRTERM(-1, 0, 0);
@@ -5487,6 +5500,7 @@ pragma_encoding(struct parser_params *parser, const char *name, const char *val)
if (parser && parser->line_count != (parser->has_shebang ? 2 : 1))
return;
rb_set_kcode(val);
+ parser->enc = rb_enc_find(val);
}
struct pragma {
@@ -5540,7 +5554,7 @@ parser_pragma(struct parser_params *parser, const char *str, int len)
#define str_copy(_s, _p, _n) ((_s) \
? (rb_str_resize((_s), (_n)), \
MEMCPY(RSTRING_PTR(_s), (_p), char, (_n)), (_s)) \
- : ((_s) = rb_str_new((_p), (_n))))
+ : ((_s) = STR_NEW((_p), (_n))))
if (len <= 7) return Qfalse;
if (!(beg = pragma_marker(str, len))) return Qfalse;
@@ -5934,8 +5948,7 @@ parser_yylex(struct parser_params *parser)
compile_error(PARSER_ARG "incomplete character syntax");
return 0;
}
- uc = (unsigned char)c;
- if (ISSPACE(c)){
+ if (rb_enc_isspace(c, parser->enc)){
if (!IS_ARG()){
int c2 = 0;
switch (c) {
@@ -5968,8 +5981,8 @@ parser_yylex(struct parser_params *parser)
return '?';
}
newtok();
- if (ismbchar(uc)) {
- int i, len = mbclen(uc)-1;
+ if (parser_ismbchar()) {
+ int i, len = parser_mbclen()-1;
tokadd(c);
for (i = 0; i < len; i++) {
@@ -5977,7 +5990,8 @@ parser_yylex(struct parser_params *parser)
tokadd(c);
}
}
- else if ((ISALNUM(c) || c == '_') && lex_p < lex_pend && is_identchar(*lex_p)) {
+ else if ((rb_enc_isalnum(c, parser->enc) || c == '_') &&
+ lex_p < lex_pend && is_identchar(lex_p, parser->enc)) {
goto ternary;
}
else if (c == '\\') {
@@ -5988,7 +6002,7 @@ parser_yylex(struct parser_params *parser)
tokadd(c);
}
tokfix();
- set_yylval_str(rb_str_new(tok(), toklen()));
+ set_yylval_str(STR_NEW(tok(), toklen()));
lex_state = EXPR_ENDARG;
return tCHAR;
@@ -6544,8 +6558,7 @@ parser_yylex(struct parser_params *parser)
}
else {
term = nextc();
- uc = (unsigned char)c;
- if (ISALNUM(term) || ismbchar(uc)) {
+ if (rb_enc_isalnum(term, parser->enc) || parser_ismbchar()) {
yyerror("unknown type of %string");
return 0;
}
@@ -6625,8 +6638,7 @@ parser_yylex(struct parser_params *parser)
switch (c) {
case '_': /* $_: last read line string */
c = nextc();
- uc = (unsigned char)c;
- if (is_identchar(uc)) {
+ if (parser_is_identchar()) {
tokadd('$');
tokadd('_');
break;
@@ -6660,8 +6672,7 @@ parser_yylex(struct parser_params *parser)
tokadd('$');
tokadd(c);
c = nextc();
- uc = (unsigned char)c;
- if (is_identchar(uc)) {
+ if (parser_is_identchar()) {
tokadd(c);
}
else {
@@ -6703,8 +6714,7 @@ parser_yylex(struct parser_params *parser)
return tNTH_REF;
default:
- uc = (unsigned char)c;
- if (!is_identchar(uc)) {
+ if (!parser_is_identchar()) {
pushback(c);
return '$';
}
@@ -6730,8 +6740,7 @@ parser_yylex(struct parser_params *parser)
}
return 0;
}
- uc = (unsigned char)c;
- if (!is_identchar(uc)) {
+ if (!parser_is_identchar()) {
pushback(c);
return '@';
}
@@ -6753,9 +6762,8 @@ parser_yylex(struct parser_params *parser)
break;
default:
- uc = (unsigned char)c;
- if (!is_identchar(uc)) {
- compile_error(PARSER_ARG "Invalid char `\\%03o' in expression", c);
+ if (!parser_is_identchar()) {
+ rb_compile_error(PARSER_ARG "Invalid char `\\%03o' in expression", c);
goto retry;
}
@@ -6763,21 +6771,18 @@ parser_yylex(struct parser_params *parser)
break;
}
- uc = (unsigned char)c;
do {
+ int i, len;
tokadd(c);
- if (ismbchar(uc)) {
- int i, len = mbclen(uc)-1;
- for (i = 0; i < len; i++) {
- c = nextc();
- tokadd(c);
- }
+ len = parser_mbclen()-1;
+ for (i = 0; i < len; i++) {
+ c = nextc();
+ tokadd(c);
}
c = nextc();
- uc = (unsigned char)c;
- } while (is_identchar(uc));
- if ((c == '!' || c == '?') && is_identchar(tok()[0]) && !peek('=')) {
+ } while (parser_is_identchar());
+ if ((c == '!' || c == '?') && !peek('=')) {
tokadd(c);
}
else {
@@ -7214,7 +7219,7 @@ gettable_gen(struct parser_params *parser, ID id)
return NEW_FALSE();
}
else if (id == keyword__FILE__) {
- return NEW_STR(rb_str_new2(ruby_sourcefile));
+ return NEW_STR(STR_NEW2(ruby_sourcefile));
}
else if (id == keyword__LINE__) {
return NEW_LIT(INT2FIX(ruby_sourceline));
@@ -8115,8 +8120,7 @@ dvar_curr_gen(struct parser_params *parser, ID id)
static VALUE
reg_compile_gen(struct parser_params* parser, const char *ptr, long len, int options)
{
- VALUE rb_reg_compile(const char *, long, int);
- VALUE re = rb_reg_compile(ptr, len, (options) & ~RE_OPTION_ONCE);
+ VALUE re = rb_reg_compile(STR_NEW(ptr, len), (options) & ~RE_OPTION_ONCE);
if (NIL_P(re)) {
RB_GC_GUARD(re) = rb_obj_as_string(rb_errinfo());
@@ -8316,7 +8320,7 @@ internal_id_gen(struct parser_params *parser)
}
static int
-is_special_global_name(const char *m)
+is_special_global_name(const char *m, rb_encoding *enc)
{
switch (*m) {
case '~': case '*': case '$': case '?': case '!': case '@':
@@ -8328,11 +8332,11 @@ is_special_global_name(const char *m)
break;
case '-':
++m;
- if (is_identchar(*m)) m += mbclen(*m);
+ if (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc);
break;
default:
- if (!ISDIGIT(*m)) return 0;
- do ++m; while (ISDIGIT(*m));
+ if (!rb_enc_isdigit(*m, enc)) return 0;
+ do ++m; while (rb_enc_isdigit(*m, enc));
}
return !*m;
}
@@ -8342,6 +8346,7 @@ rb_symname_p(const char *name)
{
const char *m = name;
int localid = Qfalse;
+ rb_encoding *enc = rb_enc_from_index(0);
if (!m) return Qfalse;
switch (*m) {
@@ -8349,7 +8354,7 @@ rb_symname_p(const char *name)
return Qfalse;
case '$':
- if (is_special_global_name(++m)) return Qtrue;
+ if (is_special_global_name(++m, enc)) return Qtrue;
goto id;
case '@':
@@ -8396,10 +8401,10 @@ rb_symname_p(const char *name)
break;
default:
- localid = !ISUPPER(*m);
+ localid = !rb_enc_isupper(*m, enc);
id:
- if (*m != '_' && !ISALPHA(*m) && !ismbchar(*m)) return Qfalse;
- while (is_identchar(*m)) m += mbclen(*m);
+ if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, enc)) return Qfalse;
+ while (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc);
if (localid) {
switch (*m) {
case '!': case '?': case '=': ++m;
@@ -8411,7 +8416,7 @@ rb_symname_p(const char *name)
}
ID
-rb_intern2(const char *name, long len)
+rb_intern3(const char *name, long len, rb_encoding *enc)
{
const char *m = name;
VALUE str;
@@ -8429,13 +8434,13 @@ rb_intern2(const char *name, long len)
last = len-1;
id = 0;
- switch (*name) {
+ switch (*m) {
case '$':
id |= ID_GLOBAL;
- if (is_special_global_name(++m)) goto new_id;
+ if (is_special_global_name(++m, enc)) goto new_id;
break;
case '@':
- if (name[1] == '@') {
+ if (m[1] == '@') {
m++;
id |= ID_CLASS;
}
@@ -8445,20 +8450,21 @@ rb_intern2(const char *name, long len)
m++;
break;
default:
- if (name[0] != '_' && ISASCII(name[0]) && !ISALNUM(name[0])) {
+ if (m[0] != '_' && rb_enc_isascii((unsigned char)m[0], enc)
+ && !rb_enc_isalnum(m[0], enc)) {
/* operators */
int i;
for (i=0; op_tbl[i].token; i++) {
- if (*op_tbl[i].name == *name &&
- strcmp(op_tbl[i].name, name) == 0) {
+ if (*op_tbl[i].name == *m &&
+ strcmp(op_tbl[i].name, m) == 0) {
id = op_tbl[i].token;
goto id_register;
}
}
}
- if (name[last] == '=') {
+ if (m[last] == '=') {
/* attribute assignment */
id = rb_intern2(name, last);
if (id > tLAST_TOKEN && !is_attrset_id(id)) {
@@ -8467,7 +8473,7 @@ rb_intern2(const char *name, long len)
}
id = ID_ATTRSET;
}
- else if (ISUPPER(name[0])) {
+ else if (rb_enc_isupper(m[0], enc)) {
id = ID_CONST;
}
else {
@@ -8475,9 +8481,9 @@ rb_intern2(const char *name, long len)
}
break;
}
- if (!ISDIGIT(*m)) {
- while (m <= name + last && is_identchar(*m)) {
- m += mbclen(*m);
+ if (!rb_enc_isdigit(*m, enc)) {
+ while (m <= name + last && is_identchar(m, enc)) {
+ m += rb_enc_mbclen(m, enc);
}
}
if (m - name < len) id = ID_JUNK;
@@ -8492,11 +8498,23 @@ rb_intern2(const char *name, long len)
}
ID
+rb_intern2(const char *name, long len)
+{
+ return rb_intern3(name, len, rb_enc_from_index(0));
+}
+
+ID
rb_intern(const char *name)
{
return rb_intern2(name, strlen(name));
}
+ID
+rb_intern_str(VALUE str)
+{
+ return rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
+}
+
VALUE
rb_id2str(ID id)
{
@@ -8662,6 +8680,7 @@ parser_initialize(struct parser_params *parser)
#ifdef YYMALLOC
parser->heap = NULL;
#endif
+ parser->enc = rb_enc_from_index(0);
}
extern void rb_mark_source_filename(char *);
@@ -9013,27 +9032,27 @@ ripper_compile_error(struct parser_params *parser, const char *fmt, ...)
static void
ripper_warn0(struct parser_params *parser, const char *fmt)
{
- rb_funcall(parser->value, rb_intern("warn"), 1, rb_str_new2(fmt));
+ rb_funcall(parser->value, rb_intern("warn"), 1, STR_NEW2(fmt));
}
static void
ripper_warnI(struct parser_params *parser, const char *fmt, int a)
{
rb_funcall(parser->value, rb_intern("warn"), 2,
- rb_str_new2(fmt), INT2NUM(a));
+ STR_NEW2(fmt), INT2NUM(a));
}
static void
ripper_warnS(struct parser_params *parser, const char *fmt, const char *str)
{
rb_funcall(parser->value, rb_intern("warn"), 2,
- rb_str_new2(fmt), rb_str_new2(str));
+ STR_NEW2(fmt), STR_NEW2(str));
}
static void
ripper_warning0(struct parser_params *parser, const char *fmt)
{
- rb_funcall(parser->value, rb_intern("warning"), 1, rb_str_new2(fmt));
+ rb_funcall(parser->value, rb_intern("warning"), 1, STR_NEW2(fmt));
}
#if 0 /* unused in ripper right now */
@@ -9041,7 +9060,7 @@ static void
ripper_warningS(struct parser_params *parser, const char *fmt, const char *str)
{
rb_funcall(parser->value, rb_intern("warning"), 2,
- rb_str_new2(fmt), rb_str_new2(str));
+ STR_NEW2(fmt), STR_NEW2(str));
}
#endif
@@ -9094,7 +9113,7 @@ ripper_initialize(int argc, VALUE *argv, VALUE self)
parser->parser_lex_input = src;
parser->eofp = Qfalse;
if (NIL_P(fname)) {
- fname = rb_str_new2("(ripper)");
+ fname = STR_NEW2("(ripper)");
}
else {
StringValue(fname);
diff --git a/prec.c b/prec.c
index 981dcbc85c..da394027c6 100644
--- a/prec.c
+++ b/prec.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Jan 26 02:40:41 2000
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/process.c b/process.c
index 5374375e1b..92aaefca8c 100644
--- a/process.c
+++ b/process.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Aug 10 14:30:50 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/random.c b/random.c
index d37995baff..077415420f 100644
--- a/random.c
+++ b/random.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Dec 24 16:39:21 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/range.c b/range.c
index 0d00c93cab..934f56b7a2 100644
--- a/range.c
+++ b/range.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Aug 19 17:46:47 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/re.c b/re.c
index 824d56210c..d44f274197 100644
--- a/re.c
+++ b/re.c
@@ -5,12 +5,13 @@
$Author$
created at: Mon Aug 9 18:24:49 JST 1993
- Copyright (C) 1993-2006 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
#include "ruby/ruby.h"
#include "ruby/re.h"
+#include "ruby/encoding.h"
#include "regint.h"
#include <ctype.h>
@@ -289,23 +290,27 @@ kcode_to_arg_value(unsigned int kcode)
static void
set_re_kcode_by_option(struct RRegexp *re, int options)
{
+ rb_encoding *enc = 0;
+
+ FL_UNSET(re, KCODE_MASK);
switch (options & ARG_KCODE_MASK) {
case ARG_KCODE_NONE:
- FL_UNSET(re, KCODE_MASK);
+ enc = rb_enc_from_index(0);
+ FL_SET(re, KCODE_NONE);
FL_SET(re, KCODE_FIXED);
break;
case ARG_KCODE_EUC:
- FL_UNSET(re, KCODE_MASK);
+ enc = rb_enc_find("euc-jp");
FL_SET(re, KCODE_EUC);
FL_SET(re, KCODE_FIXED);
break;
case ARG_KCODE_SJIS:
- FL_UNSET(re, KCODE_MASK);
- FL_SET(re, KCODE_SJIS);
+ enc = rb_enc_find("sjis");
FL_SET(re, KCODE_FIXED);
+ FL_SET(re, KCODE_SJIS);
break;
case ARG_KCODE_UTF8:
- FL_UNSET(re, KCODE_MASK);
+ enc = rb_enc_find("utf-8");
FL_SET(re, KCODE_UTF8);
FL_SET(re, KCODE_FIXED);
break;
@@ -315,6 +320,9 @@ set_re_kcode_by_option(struct RRegexp *re, int options)
FL_SET(re, reg_kcode);
break;
}
+ if (enc) {
+ rb_enc_associate((VALUE)re, enc);
+ }
}
static int
@@ -371,15 +379,9 @@ kcode_reset_option(void)
int
rb_reg_mbclen2(unsigned int c, VALUE re)
{
- int len;
unsigned char uc = (unsigned char)c;
- if (!FL_TEST(re, KCODE_FIXED))
- return mbclen(uc);
- kcode_set_option(re);
- len = mbclen(uc);
- kcode_reset_option();
- return len;
+ return rb_enc_mbclen(&uc, rb_enc_get(re));
}
static void
@@ -393,16 +395,17 @@ rb_reg_check(VALUE re)
static void
rb_reg_expr_str(VALUE str, const char *s, long len)
{
+ rb_encoding *enc = rb_enc_get(str);
const char *p, *pend;
int need_escape = 0;
p = s; pend = p + len;
while (p<pend) {
- if (*p == '/' || (!ISPRINT(*p) && !ismbchar(*p))) {
+ if (*p == '/' || (!rb_enc_isprint(*p, enc) && !ismbchar(p, enc))) {
need_escape = 1;
break;
}
- p += mbclen(*p);
+ p += mbclen(p, enc);
}
if (!need_escape) {
rb_str_buf_cat(str, s, len);
@@ -411,7 +414,7 @@ rb_reg_expr_str(VALUE str, const char *s, long len)
p = s;
while (p<pend) {
if (*p == '\\') {
- int n = mbclen(p[1]) + 1;
+ int n = mbclen(p+1, enc) + 1;
rb_str_buf_cat(str, p, n);
p += n;
continue;
@@ -421,15 +424,15 @@ rb_reg_expr_str(VALUE str, const char *s, long len)
rb_str_buf_cat(str, &c, 1);
rb_str_buf_cat(str, p, 1);
}
- else if (ismbchar(*p)) {
- rb_str_buf_cat(str, p, mbclen(*p));
- p += mbclen(*p);
+ else if (ismbchar(p, enc)) {
+ rb_str_buf_cat(str, p, mbclen(p, enc));
+ p += mbclen(p, enc);
continue;
}
- else if (ISPRINT(*p)) {
+ else if (rb_enc_isprint(*p, enc)) {
rb_str_buf_cat(str, p, 1);
}
- else if (!ISSPACE(*p)) {
+ else if (!rb_enc_isspace(*p, enc)) {
char b[8];
sprintf(b, "\\%03o", *p & 0377);
@@ -621,21 +624,13 @@ rb_reg_raise(const char *s, long len, const char *err, VALUE re)
rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING_PTR(desc));
}
-static VALUE
-rb_reg_error_desc(const char *s, long len, int options, onig_errmsg_buffer err)
+static void
+rb_reg_raise_str(VALUE str, const char *err, VALUE re)
{
- char opts[6];
- VALUE desc = rb_str_buf_new2(err);
-
- rb_str_buf_cat2(desc, ": /");
- rb_reg_expr_str(desc, s, len);
- opts[0] = '/';
- option_to_str(opts + 1, options);
- strlcat(opts, arg_kcode(options), sizeof(opts));
- rb_str_buf_cat2(desc, opts);
- return rb_exc_new3(rb_eRegexpError, desc);
+ rb_reg_raise(RSTRING_PTR(str), RSTRING_LEN(str), err, re);
}
+
/*
* call-seq:
* rxp.casefold? => true or false
@@ -1489,7 +1484,7 @@ match_inspect(VALUE match)
VALUE rb_cRegexp;
static int
-rb_reg_initialize(VALUE obj, const char *s, long len,
+rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc,
int options, onig_errmsg_buffer err)
{
struct RRegexp *re = RREGEXP(obj);
@@ -1504,7 +1499,12 @@ rb_reg_initialize(VALUE obj, const char *s, long len,
re->ptr = 0;
re->str = 0;
- set_re_kcode_by_option(re, options);
+ if (options & ARG_KCODE_MASK) {
+ set_re_kcode_by_option(re, options);
+ }
+ else {
+ rb_enc_associate((VALUE)re, enc);
+ }
if (options & ARG_KCODE_MASK) {
kcode_set_option((VALUE)re);
@@ -1525,6 +1525,13 @@ rb_reg_initialize(VALUE obj, const char *s, long len,
return 0;
}
+static int
+rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err)
+{
+ return rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str),
+ options, err);
+}
+
static VALUE
rb_reg_s_alloc(VALUE klass)
{
@@ -1539,27 +1546,35 @@ rb_reg_s_alloc(VALUE klass)
}
VALUE
-rb_reg_new(const char *s, long len, int options)
+rb_reg_new(VALUE s, int options)
{
VALUE re = rb_reg_s_alloc(rb_cRegexp);
onig_errmsg_buffer err;
- if (rb_reg_initialize(re, s, len, options, err) != 0) {
- rb_exc_raise(rb_reg_error_desc(s, len, options, err));
+ if (rb_reg_initialize_str(re, s, options, err) != 0) {
+ rb_reg_raise_str(s, err, re);
}
return re;
}
VALUE
-rb_reg_compile(const char *s, long len, int options)
+rb_reg_compile(VALUE str, int options)
{
VALUE re = rb_reg_s_alloc(rb_cRegexp);
onig_errmsg_buffer err;
- if (rb_reg_initialize(re, s, len, options, err) != 0) {
- rb_set_errinfo(rb_reg_error_desc(s, len, options, err));
- return Qnil;
+ if (!str) str = rb_str_new(0,0);
+ if (rb_reg_initialize_str(re, str, options, err) != 0) {
+ char opts[6];
+ VALUE desc = rb_str_buf_new2(err);
+
+ rb_str_buf_cat2(desc, ": /");
+ rb_reg_expr_str(desc, RSTRING_PTR(str), RSTRING_LEN(str));
+ opts[0] = '/';
+ option_to_str(opts + 1, options);
+ strlcat(opts, arg_kcode(options), sizeof(opts));
+ return rb_str_buf_cat2(desc, opts);
}
FL_SET(re, REG_LITERAL);
return re;
@@ -1581,8 +1596,7 @@ rb_reg_regcomp(VALUE str)
case_cache = ruby_ignorecase;
kcode_cache = reg_kcode;
- return reg_cache = rb_reg_new(RSTRING_PTR(save_str), RSTRING_LEN(save_str),
- ruby_ignorecase);
+ return reg_cache = rb_reg_new(save_str, ruby_ignorecase);
}
static int
@@ -1843,9 +1857,8 @@ static VALUE
rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
{
onig_errmsg_buffer err;
- const char *s;
- long len;
int flags = 0;
+ VALUE str;
if (argc == 0 || argc > 3) {
rb_raise(rb_eArgError, "wrong number of arguments");
@@ -1859,8 +1872,8 @@ rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
if (FL_TEST(argv[0], KCODE_FIXED)) {
flags |= re_to_kcode_arg_value(argv[0]);
}
- s = RREGEXP(argv[0])->str;
- len = RREGEXP(argv[0])->len;
+ str = rb_enc_str_new(RREGEXP(argv[0])->str, RREGEXP(argv[0])->len,
+ rb_enc_get(argv[0]));
}
else {
if (argc >= 2) {
@@ -1873,11 +1886,10 @@ rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
flags &= ~ARG_KCODE_MASK;
flags |= char_to_arg_kcode((int )kcode[0]);
}
- s = StringValuePtr(argv[0]);
- len = RSTRING_LEN(argv[0]);
+ str = argv[0];
}
- if (rb_reg_initialize(self, s, len, flags, err) != 0) {
- rb_exc_raise(rb_reg_error_desc(s, len, flags, err));
+ if (rb_reg_initialize_str(self, str, flags, err) != 0) {
+ rb_reg_raise_str(str, err, self);
}
return self;
}
@@ -1885,6 +1897,7 @@ rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
VALUE
rb_reg_quote(VALUE str)
{
+ rb_encoding *enc = rb_enc_get(str);
char *s, *send, *t;
VALUE tmp;
int c;
@@ -1893,8 +1906,8 @@ rb_reg_quote(VALUE str)
send = s + RSTRING_LEN(str);
for (; s < send; s++) {
c = *s;
- if (ismbchar(*s)) {
- int n = mbclen(*s);
+ if (ismbchar(s, enc)) {
+ int n = mbclen(s, enc);
while (n-- && s < send)
s++;
@@ -1922,8 +1935,8 @@ rb_reg_quote(VALUE str)
for (; s < send; s++) {
c = *s;
- if (ismbchar(*s)) {
- int n = mbclen(*s);
+ if (ismbchar(s, enc)) {
+ int n = mbclen(s, enc);
while (n-- && s < send)
*t++ = *s++;
@@ -2146,9 +2159,8 @@ rb_reg_init_copy(VALUE copy, VALUE re)
rb_reg_check(re);
s = RREGEXP(re)->str;
len = RREGEXP(re)->len;
- options = rb_reg_options(re);
- if (rb_reg_initialize(copy, s, len, options, err) != 0) {
- rb_exc_raise(rb_reg_error_desc(s, len, options, err));
+ if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re), err) != 0) {
+ rb_reg_raise(s, len, err, copy);
}
return copy;
}
@@ -2160,20 +2172,20 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
char *p, *s, *e;
unsigned char uc;
int no;
+ rb_encoding *enc = rb_enc_check(str, src);
-
+ rb_enc_check(str, regexp);
p = s = RSTRING_PTR(str);
e = s + RSTRING_LEN(str);
while (s < e) {
- char *ss = s;
+ char *ss = s++;
- uc = (unsigned char)*s++;
- if (ismbchar(uc)) {
- s += mbclen(uc) - 1;
+ if (ismbchar(ss, enc)) {
+ s += mbclen(ss, enc) - 1;
continue;
}
- if (uc != '\\' || s == e) continue;
+ if (*ss != '\\' || s == e) continue;
if (!val) {
val = rb_str_buf_new(ss-p);
@@ -2203,8 +2215,7 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
name_end = name = s + 1;
while (name_end < e) {
if (*name_end == '>') break;
- uc = (unsigned char)*name_end;
- name_end += mbclen(uc);
+ name_end += mbclen(name_end, enc);
}
if (name_end < e) {
no = name_to_backref_number(regs, regexp, name, name_end);
diff --git a/regint.h b/regint.h
index 6c6d2746fa..1705410eec 100644
--- a/regint.h
+++ b/regint.h
@@ -127,6 +127,7 @@
#define onig_st_nothing_key_free st_nothing_key_free
#define onig_st_is_member st_is_member
+#define USE_UPPER_CASE_TABLE
#else
#define st_init_table onig_st_init_table
diff --git a/ruby.c b/ruby.c
index 7fb36355fa..ad3c172971 100644
--- a/ruby.c
+++ b/ruby.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Aug 10 12:47:31 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
@@ -676,7 +676,7 @@ proc_options(int argc, char **argv)
case 'F':
if (*++s) {
- rb_fs = rb_reg_new(s, strlen(s), 0);
+ rb_fs = rb_reg_new(rb_str_new2(s), 0);
}
break;
@@ -962,10 +962,14 @@ load_file(VALUE parser, const char *fname, int script)
rb_raise(rb_eLoadError, "no Ruby script found in input");
}
- c = rb_io_getc(f);
+ c = rb_io_getbyte(f);
if (c == INT2FIX('#')) {
- c = rb_io_getc(f);
- if (c == INT2FIX('!') && !NIL_P(line = rb_io_gets(f))) {
+ c = rb_io_getbyte(f);
+ if (c == INT2FIX('!')) {
+ line = rb_io_gets(f);
+ if (NIL_P(line))
+ return 0;
+
if ((p = strstr(RSTRING_PTR(line), "ruby")) == 0) {
/* not ruby script, kick the program */
char **argv;
@@ -1011,8 +1015,7 @@ load_file(VALUE parser, const char *fname, int script)
}
/* push back shebang for pragma may exist in next line */
- rb_io_ungetc(f, INT2FIX('\n'));
- rb_io_ungetc(f, INT2FIX('!'));
+ rb_io_ungetc(f, rb_str_new2("!\n"));
}
else if (!NIL_P(c)) {
rb_io_ungetc(f, c);
diff --git a/signal.c b/signal.c
index 772e5ec890..53e2a0f19f 100644
--- a/signal.c
+++ b/signal.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Dec 20 10:13:44 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/sprintf.c b/sprintf.c
index 1c60837e78..7fbfe1b391 100644
--- a/sprintf.c
+++ b/sprintf.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Oct 15 10:39:26 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
@@ -14,7 +14,7 @@
#include "ruby/ruby.h"
#include "ruby/re.h"
-#include <ctype.h>
+#include "ruby/encoding.h"
#include <math.h>
#include <stdarg.h>
@@ -115,7 +115,7 @@ sign_bits(int base, const char *p)
((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth])
#define GETNUM(n, val) \
- for (; p < end && ISDIGIT(*p); p++) { \
+ for (; p < end && rb_enc_isdigit(*p, enc); p++) { \
int next_n = 10 * n + (*p - '0'); \
if (next_n / 10 != n) {\
rb_raise(rb_eArgError, #val " too big"); \
@@ -254,6 +254,7 @@ rb_f_sprintf(int argc, const VALUE *argv)
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
+ rb_encoding *enc;
const char *p, *end;
char *buf;
int blen, bsiz;
@@ -286,6 +287,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
--argv;
if (OBJ_TAINTED(fmt)) tainted = 1;
StringValue(fmt);
+ enc = rb_enc_get(fmt);
fmt = rb_str_new4(fmt);
p = RSTRING_PTR(fmt);
end = p + RSTRING_LEN(fmt);
@@ -311,7 +313,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
retry:
switch (*p) {
default:
- if (ISPRINT(*p))
+ if (rb_enc_isprint(*p, enc))
rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
else
rb_raise(rb_eArgError, "malformed format string");
@@ -409,24 +411,38 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
VALUE val = GETARG();
VALUE tmp;
- char c;
+ int c, n;
tmp = rb_check_string_type(val);
if (!NIL_P(tmp)) {
- if (RSTRING_LEN(tmp) != 1) {
+ if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
rb_raise(rb_eArgError, "%%c requires a character");
}
- c = RSTRING_PTR(tmp)[0];
+ c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc);
}
else {
- c = NUM2INT(val) & 0xff;
+ c = NUM2INT(val);
+ }
+ n = rb_enc_codelen(c, enc);
+ if (n == 0) {
+ rb_raise(rb_eArgError, "invalid character");
}
if (!(flags & FWIDTH)) {
- PUSH(&c, 1);
+ CHECK(n);
+ rb_enc_mbcput(c, &buf[blen], enc);
+ blen += n;
+ }
+ else if ((flags & FMINUS)) {
+ CHECK(n);
+ rb_enc_mbcput(c, &buf[blen], enc);
+ blen += n;
+ FILL(' ', width-1);
}
else {
- FILL(' ', width);
- buf[blen - ((flags & FMINUS) ? width : 1)] = c;
+ FILL(' ', width-1);
+ CHECK(n);
+ rb_enc_mbcput(c, &buf[blen], enc);
+ blen += n;
}
}
break;
@@ -435,30 +451,42 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
case 'p':
{
VALUE arg = GETARG();
- long len;
+ long len, slen;
if (*p == 'p') arg = rb_inspect(arg);
str = rb_obj_as_string(arg);
if (OBJ_TAINTED(str)) tainted = 1;
len = RSTRING_LEN(str);
+ enc = rb_enc_check(fmt, str);
+ if (flags&(FPREC|FWIDTH)) {
+ slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
+ if (slen < 0) {
+ rb_raise(rb_eArgError, "invalid mbstring sequence");
+ }
+ }
if (flags&FPREC) {
- if (prec < len) {
- len = prec;
+ if (prec < slen) {
+ char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
+ prec, enc);
+ slen = prec;
+ len = p - RSTRING_PTR(str);
}
}
/* need to adjust multi-byte string pos */
if (flags&FWIDTH) {
- if (width > len) {
- CHECK(width);
- width -= len;
+ if (width > slen) {
+ width -= slen;
if (!(flags&FMINUS)) {
+ CHECK(width);
while (width--) {
buf[blen++] = ' ';
}
}
+ CHECK(len);
memcpy(&buf[blen], RSTRING_PTR(str), len);
blen += len;
if (flags&FMINUS) {
+ CHECK(width);
while (width--) {
buf[blen++] = ' ';
}
@@ -666,8 +694,9 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
if (*p == 'X') {
char *pp = s;
- while (*pp) {
- *pp = toupper(*pp);
+ int c;
+ while (c = (int)*pp) {
+ *pp = rb_enc_toupper(c, enc);
pp++;
}
}
diff --git a/string.c b/string.c
index dd2d7473a8..d516d03438 100644
--- a/string.c
+++ b/string.c
@@ -6,7 +6,7 @@
$Date$
created at: Mon Aug 9 17:12:58 JST 1993
- Copyright (C) 1993-2006 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
@@ -14,6 +14,7 @@
#include "ruby/ruby.h"
#include "ruby/re.h"
+#include "ruby/encoding.h"
#define BEG(no) regs->beg[no]
#define END(no) regs->end[no]
@@ -30,6 +31,7 @@ VALUE rb_cSymbol;
#define STR_TMPLOCK FL_USER7
#define STR_NOEMBED FL_USER1
+#define STR_SHARED FL_USER2 /* = ELTS_SHARED */
#define STR_ASSOC FL_USER3
#define STR_SHARED_P(s) FL_ALL(s, STR_NOEMBED|ELTS_SHARED)
#define STR_ASSOC_P(s) FL_ALL(s, STR_NOEMBED|STR_ASSOC)
@@ -90,11 +92,6 @@ VALUE rb_cSymbol;
}\
} while (0)
-char *
-rb_str_ptr(VALUE str) {
- return RSTRING_PTR(str);
-}
-
VALUE rb_fs;
static inline void
@@ -160,6 +157,15 @@ rb_str_new(const char *ptr, long len)
}
VALUE
+rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
+{
+ VALUE str = str_new(rb_cString, ptr, len);
+
+ rb_enc_associate(str, enc);
+ return str;
+}
+
+VALUE
rb_str_new2(const char *ptr)
{
if (!ptr) {
@@ -203,6 +209,7 @@ str_new3(VALUE klass, VALUE str)
RSTRING(str2)->as.heap.aux.shared = str;
FL_SET(str2, ELTS_SHARED);
}
+ rb_enc_copy((VALUE)str2, str);
return str2;
}
@@ -233,6 +240,7 @@ str_new4(VALUE klass, VALUE str)
FL_SET(str, ELTS_SHARED);
RSTRING(str)->as.heap.aux.shared = str2;
}
+ rb_enc_copy(str2, str);
OBJ_INFECT(str2, str);
return str2;
}
@@ -392,18 +400,48 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
return str;
}
+static int
+str_strlen(VALUE str, rb_encoding *enc)
+{
+ int len;
+
+ if (!enc) enc = rb_enc_get(str);
+ len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc);
+ if (len < 0) {
+ rb_raise(rb_eArgError, "invalid mbstring sequence");
+ }
+ return len;
+}
+
/*
* call-seq:
* str.length => integer
+ * str.size => integer
*
- * Returns the length of <i>str</i>.
+ * Returns the character length of <i>str</i>.
*/
static VALUE
rb_str_length(VALUE str)
{
- long len = RSTRING_LEN(str);
- return LONG2NUM(len);
+ int len;
+
+ len = str_strlen(str, rb_enc_get(str));
+ return INT2NUM(len);
+}
+
+/*
+ * call-seq:
+ * str.bytesize => integer
+ *
+ * Returns the length of <i>str</i> in bytes.
+ */
+
+static VALUE
+rb_str_bytesize(str)
+ VALUE str;
+{
+ return INT2NUM(RSTRING_LEN(str));
}
/*
@@ -438,8 +476,10 @@ VALUE
rb_str_plus(VALUE str1, VALUE str2)
{
VALUE str3;
+ rb_encoding *enc;
StringValue(str2);
+ enc = rb_enc_check(str1, str2);
str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
@@ -448,6 +488,7 @@ rb_str_plus(VALUE str1, VALUE str2)
if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
OBJ_TAINT(str3);
+ rb_enc_associate(str3, enc);
return str3;
}
@@ -481,8 +522,8 @@ rb_str_times(VALUE str, VALUE times)
RSTRING_PTR(str), RSTRING_LEN(str));
}
RSTRING_PTR(str2)[RSTRING_LEN(str2)] = '\0';
-
OBJ_INFECT(str2, str);
+ rb_enc_copy(str2, str);
return str2;
}
@@ -504,8 +545,10 @@ rb_str_times(VALUE str, VALUE times)
static VALUE
rb_str_format_m(VALUE str, VALUE arg)
{
- if (TYPE(arg) == T_ARRAY) {
- return rb_str_format(RARRAY_LEN(arg), RARRAY_PTR(arg), str);
+ VALUE tmp = rb_check_array_type(arg);
+
+ if (!NIL_P(tmp)) {
+ return rb_str_format(RARRAY_LEN(tmp), RARRAY_PTR(tmp), str);
}
return rb_str_format(1, &arg, str);
}
@@ -632,19 +675,66 @@ rb_str_s_try_convert(VALUE dummy, VALUE str)
return rb_check_string_type(str);
}
+static char*
+str_nth(const char *p, const char *e, int nth, rb_encoding *enc)
+{
+ p = rb_enc_nth(p, e, nth, enc);
+ if (!p) {
+ rb_raise(rb_eArgError, "invalid mbstring sequence");
+ }
+ if (p > e) {
+ rb_raise(rb_eIndexError, "index out of range");
+ }
+ return (char*)p;
+}
+
+static int
+str_offset(const char *p, const char *e, int nth, rb_encoding *enc)
+{
+ const char *pp = str_nth(p, e, nth, enc);
+
+ return pp - p;
+}
+
+static int
+str_sublen(VALUE str, int pos, rb_encoding *enc)
+{
+ if (rb_enc_mbmaxlen(enc) == 1 || pos < 0) return pos;
+ else {
+ char *p = RSTRING_PTR(str);
+ char *e = p + pos;
+ int i;
+
+ i = 0;
+ while (p < e) {
+ p += rb_enc_mbclen(p, enc);
+ i++;
+ }
+ return i;
+ }
+}
+
+int
+rb_str_sublen(VALUE str, int len)
+{
+ return str_sublen(str, len, rb_enc_get(str));
+}
+
VALUE
rb_str_substr(VALUE str, long beg, long len)
{
+ rb_encoding *enc = rb_enc_get(str);
VALUE str2;
+ int slen = str_strlen(str, enc);
if (len < 0) return Qnil;
- if (beg > RSTRING_LEN(str)) return Qnil;
+ if (beg > slen) return Qnil;
if (beg < 0) {
- beg += RSTRING_LEN(str);
+ beg += slen;
if (beg < 0) return Qnil;
}
- if (beg + len > RSTRING_LEN(str)) {
- len = RSTRING_LEN(str) - beg;
+ if (beg + len > slen) {
+ len = slen - beg;
}
if (len < 0) {
len = 0;
@@ -652,16 +742,11 @@ rb_str_substr(VALUE str, long beg, long len)
if (len == 0) {
str2 = rb_str_new5(str,0,0);
}
- else if (len > RSTRING_EMBED_LEN_MAX &&
- beg + len == RSTRING_LEN(str) && !STR_ASSOC_P(str)) {
- str2 = rb_str_new4(str);
- str2 = str_new3(rb_obj_class(str2), str2);
- RSTRING(str2)->as.heap.ptr += RSTRING_LEN(str2) - len;
- RSTRING(str2)->as.heap.len = len;
- }
else {
- str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
+ char *p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc);
+ str2 = rb_str_new5(str, p, str_offset(p, RSTRING_END(str), len, enc));
}
+ rb_enc_copy(str2, str);
OBJ_INFECT(str2, str);
return str2;
@@ -848,7 +933,10 @@ rb_str_buf_append(VALUE str, VALUE str2)
VALUE
rb_str_append(VALUE str, VALUE str2)
{
+ rb_encoding *enc;
+
StringValue(str2);
+ enc = rb_enc_check(str, str2);
rb_str_modify(str);
if (RSTRING_LEN(str2) > 0) {
if (STR_ASSOC_P(str)) {
@@ -863,6 +951,7 @@ rb_str_append(VALUE str, VALUE str2)
}
}
OBJ_INFECT(str, str2);
+ rb_enc_associate(str, enc);
return str;
}
@@ -875,8 +964,8 @@ rb_str_append(VALUE str, VALUE str2)
* str.concat(obj) => str
*
* Append---Concatenates the given object to <i>str</i>. If the object is a
- * <code>Fixnum</code> between 0 and 255, it is converted to a character before
- * concatenation.
+ * <code>Fixnum</code>, it is considered as a codepoint, and is converted
+ * to a character before concatenation.
*
* a = "hello "
* a << "world" #=> "hello world"
@@ -887,11 +976,17 @@ VALUE
rb_str_concat(VALUE str1, VALUE str2)
{
if (FIXNUM_P(str2)) {
- int i = FIX2INT(str2);
- if (0 <= i && i <= 0xff) { /* byte */
- char c = i;
- return rb_str_cat(str1, &c, 1);
+ rb_encoding *enc = rb_enc_get(str1);
+ int c = FIX2INT(str2);
+ int pos = RSTRING_LEN(str1);
+ int len = rb_enc_codelen(c, enc);
+
+ if (len == 0) {
+ rb_raise(rb_eArgError, "invalid codepoint 0x%x", c);
}
+ rb_str_resize(str1, pos+len);
+ rb_enc_mbcput(c, RSTRING_PTR(str1)+pos, enc);
+ return str1;
}
return rb_str_append(str1, str2);
}
@@ -1048,6 +1143,7 @@ rb_str_cmp(VALUE str1, VALUE str2)
long len;
int retval;
+ rb_enc_check(str1, str2); /* xxxx error-less encoding check? */
len = lesser(RSTRING_LEN(str1), RSTRING_LEN(str2));
retval = rb_memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len);
if (retval == 0) {
@@ -1079,6 +1175,7 @@ rb_str_equal(VALUE str1, VALUE str2)
}
return rb_equal(str2, str1);
}
+ rb_enc_check(str1, str2); /* need weak check */
if (RSTRING_LEN(str1) == RSTRING_LEN(str2) &&
rb_str_cmp(str1, str2) == 0) {
return Qtrue;
@@ -1194,15 +1291,23 @@ static long
rb_str_index(VALUE str, VALUE sub, long offset)
{
long pos;
+ char *s;
+ long len, slen;
+ rb_encoding *enc;
+ enc = rb_enc_check(str, sub);
+ len = str_strlen(str, enc);
+ slen = str_strlen(sub, enc);
if (offset < 0) {
- offset += RSTRING_LEN(str);
+ offset += len;
if (offset < 0) return -1;
}
- if (RSTRING_LEN(str) - offset < RSTRING_LEN(sub)) return -1;
- if (RSTRING_LEN(sub) == 0) return offset;
+ if (len - offset < slen) return -1;
+ if (slen == 0) return offset;
+ s = offset ? str_nth(RSTRING_PTR(str), RSTRING_END(str), offset, enc) : RSTRING_PTR(str);
+ /* need proceed one character at a time */
pos = rb_memsearch(RSTRING_PTR(sub), RSTRING_LEN(sub),
- RSTRING_PTR(str)+offset, RSTRING_LEN(str)-offset);
+ s, RSTRING_LEN(str)-(s - RSTRING_PTR(str)));
if (pos < 0) return pos;
return pos + offset;
}
@@ -1240,7 +1345,7 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str)
pos = 0;
}
if (pos < 0) {
- pos += RSTRING_LEN(str);
+ pos += str_strlen(str, rb_enc_get(str));
if (pos < 0) {
if (TYPE(sub) == T_REGEXP) {
rb_backref_set(Qnil);
@@ -1253,19 +1358,9 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str)
case T_REGEXP:
pos = rb_reg_adjust_startpos(sub, str, pos, 0);
pos = rb_reg_search(sub, str, pos, 0);
+ pos = rb_str_sublen(str, pos);
break;
- case T_FIXNUM: {
- int c = FIX2INT(sub);
- long len = RSTRING_LEN(str);
- char *p = RSTRING_PTR(str);
-
- for (;pos<len;pos++) {
- if ((unsigned char)p[pos] == c) return LONG2NUM(pos);
- }
- return Qnil;
- }
-
default: {
VALUE tmp;
@@ -1279,6 +1374,7 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str)
/* fall through */
case T_STRING:
pos = rb_str_index(str, sub, pos);
+ pos = rb_str_sublen(str, pos);
break;
}
@@ -1289,29 +1385,33 @@ rb_str_index_m(int argc, VALUE *argv, VALUE str)
static long
rb_str_rindex(VALUE str, VALUE sub, long pos)
{
- long len = RSTRING_LEN(sub);
- char *s, *sbeg, *t;
+ long len, slen;
+ char *s, *sbeg, *e, *t;
+ rb_encoding *enc;
+ enc = rb_enc_check(str, sub);
+ len = str_strlen(str, enc);
+ slen = str_strlen(sub, enc);
/* substring longer than string */
- if (RSTRING_LEN(str) < len) return -1;
- if (RSTRING_LEN(str) - pos < len) {
- pos = RSTRING_LEN(str) - len;
+ if (len < slen) return -1;
+ if (len - pos < slen) {
+ pos = len - slen;
+ }
+ if (len == 0) {
+ return pos;
}
sbeg = RSTRING_PTR(str);
- s = RSTRING_PTR(str) + pos;
+ e = RSTRING_END(str);
t = RSTRING_PTR(sub);
- if (len) {
- while (sbeg <= s) {
- if (rb_memcmp(s, t, len) == 0) {
- return s - RSTRING_PTR(str);
- }
- s--;
+ for (;;) {
+ s = str_nth(sbeg, e, pos, enc);
+ if (rb_memcmp(s, t, slen) == 0) {
+ return pos;
}
- return -1;
- }
- else {
- return pos;
+ if (pos == 0) break;
+ pos--;
}
+ return -1;
}
@@ -1338,11 +1438,11 @@ static VALUE
rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
{
VALUE sub;
- VALUE position;
+ VALUE vpos;
long pos;
- if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
- pos = NUM2LONG(position);
+ if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
+ pos = NUM2LONG(vpos);
if (pos < 0) {
pos += RSTRING_LEN(str);
if (pos < 0) {
@@ -1360,9 +1460,13 @@ rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
switch (TYPE(sub)) {
case T_REGEXP:
+ /* enc = rb_get_check(str, sub); */
+ pos++; /* xxx adjust for Oniguruma 5.x */
+ pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, rb_enc_get(str));
if (RREGEXP(sub)->len) {
pos = rb_reg_adjust_startpos(sub, str, pos, 1);
pos = rb_reg_search(sub, str, pos, 1);
+ pos = rb_str_sublen(str, pos);
}
if (pos >= 0) return LONG2NUM(pos);
break;
@@ -1382,23 +1486,6 @@ rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
pos = rb_str_rindex(str, sub, pos);
if (pos >= 0) return LONG2NUM(pos);
break;
-
- case T_FIXNUM: {
- int c = FIX2INT(sub);
- char *p = RSTRING_PTR(str) + pos;
- char *pbeg = RSTRING_PTR(str);
-
- if (pos == RSTRING_LEN(str)) {
- if (pos == 0) return Qnil;
- --p;
- }
- while (pbeg <= p) {
- if ((unsigned char)*p == c)
- return LONG2NUM((char*)p - RSTRING_PTR(str));
- p--;
- }
- return Qnil;
- }
}
return Qnil;
}
@@ -1462,7 +1549,7 @@ rb_str_match_m(int argc, VALUE *argv, VALUE str)
return rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
}
-static char
+static int
succ_char(char *s)
{
char c = *s;
@@ -1515,8 +1602,9 @@ succ_char(char *s)
VALUE
rb_str_succ(VALUE orig)
{
+ rb_encoding *enc;
VALUE str;
- char *sbeg, *s;
+ char *sbeg, *s, *e;
int c = -1;
long n = 0;
@@ -1524,10 +1612,13 @@ rb_str_succ(VALUE orig)
OBJ_INFECT(str, orig);
if (RSTRING_LEN(str) == 0) return str;
+ enc = rb_enc_get(orig);
sbeg = RSTRING_PTR(str); s = sbeg + RSTRING_LEN(str) - 1;
+ e = RSTRING_END(str);
while (sbeg <= s) {
- if (ISALNUM(*s)) {
+ unsigned int cc = rb_enc_codepoint(s, e, enc);
+ if (rb_enc_isalnum(cc, enc)) {
if ((c = succ_char(s)) == 0) break;
n = s - sbeg;
}
@@ -1642,13 +1733,9 @@ rb_str_aref(VALUE str, VALUE indx)
idx = FIX2LONG(indx);
num_index:
- if (idx < 0) {
- idx = RSTRING_LEN(str) + idx;
- }
- if (idx < 0 || RSTRING_LEN(str) <= idx) {
- return Qnil;
- }
- return rb_str_substr(str, idx, 1);
+ str = rb_str_substr(str, idx, 1);
+ if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
+ return str;
case T_REGEXP:
return rb_str_subpat(str, indx, 0);
@@ -1664,14 +1751,14 @@ rb_str_aref(VALUE str, VALUE indx)
long beg, len;
VALUE tmp;
- switch (rb_range_beg_len(indx, &beg, &len, RSTRING_LEN(str), 0)) {
+ len = str_strlen(str, rb_enc_get(str));
+ switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
case Qfalse:
break;
case Qnil:
return Qnil;
default:
tmp = rb_str_substr(str, beg, len);
- OBJ_INFECT(tmp, indx);
return tmp;
}
}
@@ -1745,27 +1832,8 @@ rb_str_aref_m(int argc, VALUE *argv, VALUE str)
}
static void
-rb_str_splice(VALUE str, long beg, long len, VALUE val)
+rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
{
- if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
-
- StringValue(val);
- rb_str_modify(str);
-
- if (RSTRING_LEN(str) < beg) {
- out_of_range:
- rb_raise(rb_eIndexError, "index %ld out of string", beg);
- }
- if (beg < 0) {
- if (-beg > RSTRING_LEN(str)) {
- goto out_of_range;
- }
- beg += RSTRING_LEN(str);
- }
- if (RSTRING_LEN(str) < beg + len) {
- len = RSTRING_LEN(str) - beg;
- }
-
if (len < RSTRING_LEN(val)) {
/* expand string */
RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1);
@@ -1776,7 +1844,7 @@ rb_str_splice(VALUE str, long beg, long len, VALUE val)
RSTRING_PTR(str) + beg + len,
RSTRING_LEN(str) - (beg + len));
}
- if (RSTRING_LEN(str) < beg && len < 0) {
+ if (RSTRING_LEN(val) < beg && len < 0) {
MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len);
}
if (RSTRING_LEN(val) > 0) {
@@ -1789,6 +1857,41 @@ rb_str_splice(VALUE str, long beg, long len, VALUE val)
OBJ_INFECT(str, val);
}
+static void
+rb_str_splice(VALUE str, long beg, long len, VALUE val)
+{
+ long slen;
+ char *p, *e;
+ rb_encoding *enc;
+
+ if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
+
+ StringValue(val);
+ rb_str_modify(str);
+ enc = rb_enc_check(str, val);
+ slen = str_strlen(str, enc);
+
+ if (slen < beg) {
+ out_of_range:
+ rb_raise(rb_eIndexError, "index %ld out of string", beg);
+ }
+ if (beg < 0) {
+ if (-beg > slen) {
+ goto out_of_range;
+ }
+ beg += slen;
+ }
+ if (slen < beg + len) {
+ len = slen - beg;
+ }
+ p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc);
+ e = str_nth(p, RSTRING_END(str), len, enc);
+ /* error check */
+ beg = p - RSTRING_PTR(str); /* physical position */
+ len = e - p; /* physical length */
+ return rb_str_splice_0(str, beg, len, val);
+}
+
void
rb_str_update(VALUE str, long beg, long len, VALUE val)
{
@@ -1822,7 +1925,8 @@ rb_str_subpat_set(VALUE str, VALUE re, int nth, VALUE val)
}
end = RMATCH(match)->END(nth);
len = end - start;
- rb_str_splice(str, start, len, val);
+ rb_enc_check(str, val);
+ rb_str_splice_0(str, start, len, val);
}
static VALUE
@@ -1834,16 +1938,7 @@ rb_str_aset(VALUE str, VALUE indx, VALUE val)
case T_FIXNUM:
idx = FIX2LONG(indx);
num_index:
- if (RSTRING_LEN(str) <= idx) {
- out_of_range:
- rb_raise(rb_eIndexError, "index %ld out of string", idx);
- }
- if (idx < 0) {
- if (-idx > RSTRING_LEN(str))
- goto out_of_range;
- idx += RSTRING_LEN(str);
- }
- rb_str_splice(str, idx, 1, val);
+ rb_str_splice(str, idx, 1, val);
return val;
case T_REGEXP:
@@ -1855,14 +1950,15 @@ rb_str_aset(VALUE str, VALUE indx, VALUE val)
if (beg < 0) {
rb_raise(rb_eIndexError, "string not matched");
}
- rb_str_splice(str, beg, RSTRING_LEN(indx), val);
+ beg = rb_str_sublen(str, beg);
+ rb_str_splice(str, beg, str_strlen(indx, 0), val);
return val;
default:
/* check if indx is Range */
{
long beg, len;
- if (rb_range_beg_len(indx, &beg, &len, RSTRING_LEN(str), 2)) {
+ if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
rb_str_splice(str, beg, len, val);
return val;
}
@@ -2352,6 +2448,7 @@ rb_str_replace(VALUE str, VALUE str2)
}
OBJ_INFECT(str, str2);
+ rb_enc_copy(str, str2);
return str;
}
@@ -2396,58 +2493,83 @@ rb_str_chr(VALUE str)
/*
* call-seq:
- * str.reverse! => str
+ * str.reverse => new_str
*
- * Reverses <i>str</i> in place.
+ * Returns a new string with the characters from <i>str</i> in reverse order.
+ *
+ * "stressed".reverse #=> "desserts"
*/
static VALUE
-rb_str_reverse_bang(VALUE str)
+rb_str_reverse(VALUE str)
{
- char *s, *e;
- char c;
+ rb_encoding *enc;
+ VALUE obj;
+ char *s, *e, *p;
+
+ if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
+ enc = rb_enc_get(str);
+ obj = rb_str_new5(str, 0, RSTRING_LEN(str));
+ s = RSTRING_PTR(str); e = RSTRING_END(str);
+ p = RSTRING_END(obj);
if (RSTRING_LEN(str) > 1) {
- rb_str_modify(str);
- s = RSTRING_PTR(str);
- e = s + RSTRING_LEN(str) - 1;
- while (s < e) {
- c = *s;
- *s++ = *e;
- *e-- = c;
+ if (rb_enc_mbmaxlen(enc) == 1) {
+ while (s < e) {
+ *--p = *s++;
+ }
+ }
+ else {
+ while (s < e) {
+ int clen = rb_enc_mbclen(s, enc);
+
+ if (clen == 0) {
+ rb_raise(rb_eArgError, "invalid mbstring sequence");
+ }
+ p -= clen;
+ memcpy(p, s, clen);
+ s += clen;
+ }
}
}
- return str;
+ STR_SET_LEN(obj, RSTRING_LEN(str));
+ OBJ_INFECT(obj, str);
+ rb_enc_associate(obj, enc);
+
+ return obj;
}
/*
* call-seq:
- * str.reverse => new_str
+ * str.reverse! => str
*
- * Returns a new string with the characters from <i>str</i> in reverse order.
- *
- * "stressed".reverse #=> "desserts"
+ * Reverses <i>str</i> in place.
*/
static VALUE
-rb_str_reverse(VALUE str)
+rb_str_reverse_bang(VALUE str)
{
- VALUE obj;
- char *s, *e, *p;
-
- if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
+ rb_encoding *enc = rb_enc_get(str);
+ char *s, *e, c;
- obj = rb_str_new5(str, 0, RSTRING_LEN(str));
- s = RSTRING_PTR(str); e = s + RSTRING_LEN(str) - 1;
- p = RSTRING_PTR(obj);
+ if (RSTRING_LEN(str) > 1) {
+ rb_str_modify(str);
+ s = RSTRING_PTR(str);
+ e = RSTRING_END(str) - 1;
- while (e >= s) {
- *p++ = *e--;
+ if (rb_enc_mbmaxlen(enc) == 1) {
+ while (s < e) {
+ c = *s;
+ *s++ = *e;
+ *e-- = c;
+ }
+ }
+ else {
+ rb_str_shared_replace(str, rb_str_reverse(str));
+ }
}
- OBJ_INFECT(obj, str);
-
- return obj;
+ return str;
}
@@ -2469,12 +2591,6 @@ rb_str_include(VALUE str, VALUE arg)
{
long i;
- if (FIXNUM_P(arg)) {
- if (memchr(RSTRING_PTR(str), FIX2INT(arg), RSTRING_LEN(str)))
- return Qtrue;
- return Qfalse;
- }
-
StringValue(arg);
i = rb_str_index(str, arg, 0);
@@ -2561,7 +2677,22 @@ rb_str_to_s(VALUE str)
return str;
}
-#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
+static void
+str_cat_char(VALUE str, int c, rb_encoding *enc)
+{
+ char s[16];
+ int n = rb_enc_codelen(c, enc);
+
+ rb_enc_mbcput(c, s, enc);
+ rb_str_buf_cat(str, s, n);
+}
+
+static void
+prefix_escape(VALUE str, int c, rb_encoding *enc)
+{
+ str_cat_char(str, '\\', enc);
+ str_cat_char(str, c, enc);
+}
/*
* call-seq:
@@ -2578,69 +2709,71 @@ rb_str_to_s(VALUE str)
VALUE
rb_str_inspect(VALUE str)
{
+ rb_encoding *enc = rb_enc_get(str);
char *p, *pend;
- VALUE result = rb_str_buf_new2("\"");
- char s[5];
+ VALUE result = rb_str_buf_new2("");
- p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
+ str_cat_char(result, '"', enc);
+ p = RSTRING_PTR(str); pend = RSTRING_END(str);
while (p < pend) {
- char c = *p++;
- if (ismbchar(c) && p < pend) {
- int len = mbclen(c);
- rb_str_buf_cat(result, p - 1, len);
- p += len - 1;
- }
- else if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
- s[0] = '\\'; s[1] = c;
- rb_str_buf_cat(result, s, 2);
- }
- else if (ISPRINT(c)) {
- s[0] = c;
- rb_str_buf_cat(result, s, 1);
+ int c = rb_enc_codepoint(p, pend, enc);
+ int n = rb_enc_codelen(c, enc);
+ int cc;
+
+ p += n;
+ if (c == '"'|| c == '\\' ||
+ (c == '#' && (cc = rb_enc_codepoint(p,pend,enc),
+ (cc == '$' || cc == '@' || cc == '{')))) {
+ prefix_escape(result, c, enc);
}
else if (c == '\n') {
- s[0] = '\\'; s[1] = 'n';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 'n', enc);
}
else if (c == '\r') {
- s[0] = '\\'; s[1] = 'r';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 'r', enc);
}
else if (c == '\t') {
- s[0] = '\\'; s[1] = 't';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 't', enc);
}
else if (c == '\f') {
- s[0] = '\\'; s[1] = 'f';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 'f', enc);
}
else if (c == '\013') {
- s[0] = '\\'; s[1] = 'v';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 'v', enc);
}
else if (c == '\010') {
- s[0] = '\\'; s[1] = 'b';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 'b', enc);
}
else if (c == '\007') {
- s[0] = '\\'; s[1] = 'a';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 'a', enc);
}
else if (c == 033) {
- s[0] = '\\'; s[1] = 'e';
- rb_str_buf_cat(result, s, 2);
+ prefix_escape(result, 'e', enc);
+ }
+ else if (rb_enc_isprint(c, enc)) {
+ char buf[5];
+
+ rb_enc_mbcput(c, buf, enc);
+ rb_str_buf_cat(result, buf, n);
}
else {
- sprintf(s, "\\%03o", c & 0377);
- rb_str_buf_cat2(result, s);
+ char buf[5];
+ char *s = buf;
+
+ sprintf(buf, "\\%03o", c & 0377);
+ while (*s) {
+ str_cat_char(result, *s++, enc);
+ }
}
}
- rb_str_buf_cat2(result, "\"");
+ str_cat_char(result, '"', enc);
OBJ_INFECT(result, str);
+ rb_enc_associate(result, enc);
return result;
}
+#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
/*
* call-seq:
@@ -2653,6 +2786,7 @@ rb_str_inspect(VALUE str)
VALUE
rb_str_dump(VALUE str)
{
+ rb_encoding *enc = rb_enc_from_index(0);
long len;
char *p, *pend;
char *q, *qend;
@@ -2675,7 +2809,7 @@ rb_str_dump(VALUE str)
break;
default:
- if (ISPRINT(c)) {
+ if (rb_enc_isprint(c, enc)) {
len++;
}
else {
@@ -2701,9 +2835,6 @@ rb_str_dump(VALUE str)
if (IS_EVSTR(p, pend)) *q++ = '\\';
*q++ = '#';
}
- else if (ISPRINT(c)) {
- *q++ = c;
- }
else if (c == '\n') {
*q++ = '\\';
*q++ = 'n';
@@ -2736,6 +2867,9 @@ rb_str_dump(VALUE str)
*q++ = '\\';
*q++ = 'e';
}
+ else if (rb_enc_isprint(c, enc)) {
+ *q++ = c;
+ }
else {
*q++ = '\\';
sprintf(q, "%03o", c&0xff);
@@ -2745,6 +2879,8 @@ rb_str_dump(VALUE str)
*q++ = '"';
OBJ_INFECT(result, str);
+ /* result from dump is ASCII */
+ rb_enc_associate(result, enc);
return result;
}
@@ -2761,20 +2897,22 @@ rb_str_dump(VALUE str)
static VALUE
rb_str_upcase_bang(VALUE str)
{
+ rb_encoding *enc;
char *s, *send;
int modify = 0;
rb_str_modify(str);
- s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
+ enc = rb_enc_get(str);
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
while (s < send) {
- if (ismbchar(*s)) {
- s+=mbclen(*s) - 1;
- }
- else if (ISLOWER(*s)) {
- *s = toupper(*s);
+ int c = rb_enc_codepoint(s, send, enc);
+
+ if (rb_enc_islower(c, enc)) {
+ /* assuming toupper returns codepoint with same size */
+ rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
modify = 1;
}
- s++;
+ s += rb_enc_codelen(c, enc);
}
if (modify) return str;
@@ -2815,20 +2953,22 @@ rb_str_upcase(VALUE str)
static VALUE
rb_str_downcase_bang(VALUE str)
{
+ rb_encoding *enc;
char *s, *send;
int modify = 0;
rb_str_modify(str);
- s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
+ enc = rb_enc_get(str);
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
while (s < send) {
- if (ismbchar(*s)) {
- s+=mbclen(*s) - 1;
- }
- else if (ISUPPER(*s)) {
- *s = tolower(*s);
+ int c = rb_enc_codepoint(s, send, enc);
+
+ if (rb_enc_isupper(c, enc)) {
+ /* assuming toupper returns codepoint with same size */
+ rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
modify = 1;
}
- s++;
+ s += rb_enc_codelen(c, enc);
}
if (modify) return str;
@@ -2874,24 +3014,29 @@ rb_str_downcase(VALUE str)
static VALUE
rb_str_capitalize_bang(VALUE str)
{
+ rb_encoding *enc;
char *s, *send;
int modify = 0;
+ int c;
rb_str_modify(str);
+ enc = rb_enc_get(str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
- s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
- if (ISLOWER(*s)) {
- *s = toupper(*s);
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
+
+ c = rb_enc_codepoint(s, send, enc);
+ if (rb_enc_islower(c, enc)) {
+ rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
modify = 1;
}
- while (++s < send) {
- if (ismbchar(*s)) {
- s+=mbclen(*s) - 1;
- }
- else if (ISUPPER(*s)) {
- *s = tolower(*s);
+ s += rb_enc_codelen(c, enc);
+ while (s < send) {
+ c = rb_enc_codepoint(s, send, enc);
+ if (rb_enc_isupper(c, enc)) {
+ rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
modify = 1;
}
+ s += rb_enc_codelen(c, enc);
}
if (modify) return str;
return Qnil;
@@ -2932,24 +3077,27 @@ rb_str_capitalize(VALUE str)
static VALUE
rb_str_swapcase_bang(VALUE str)
{
+ rb_encoding *enc;
char *s, *send;
int modify = 0;
rb_str_modify(str);
- s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
+ enc = rb_enc_get(str);
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
while (s < send) {
- if (ismbchar(*s)) {
- s+=mbclen(*s) - 1;
- }
- else if (ISUPPER(*s)) {
- *s = tolower(*s);
+ int c = rb_enc_codepoint(s, send, enc);
+
+ if (rb_enc_isupper(c, enc)) {
+ /* assuming toupper returns codepoint with same size */
+ rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
modify = 1;
}
- else if (ISLOWER(*s)) {
- *s = toupper(*s);
+ else if (rb_enc_islower(c, enc)) {
+ /* assuming toupper returns codepoint with same size */
+ rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
modify = 1;
}
- s++;
+ s += rb_enc_codelen(c, enc);
}
if (modify) return str;
@@ -2985,24 +3133,21 @@ struct tr {
};
static int
-trnext(struct tr *t)
+trnext(struct tr *t, rb_encoding *enc)
{
for (;;) {
if (!t->gen) {
if (t->p == t->pend) return -1;
- if (t->p < t->pend - 1 && *t->p == '\\') {
- t->p++;
- }
- t->now = *(USTR)t->p++;
+ t->now = rb_enc_codepoint(t->p, t->pend, enc);
+ t->p += rb_enc_codelen(t->now, enc);
if (t->p < t->pend - 1 && *t->p == '-') {
t->p++;
if (t->p < t->pend) {
- if (t->now > *(USTR)t->p) {
- t->p++;
- continue;
- }
+ int c = rb_enc_codepoint(t->p, t->pend, enc);
+ t->p += rb_enc_codelen(c, enc);
+ if (t->now > c) continue;
t->gen = 1;
- t->max = *(USTR)t->p++;
+ t->max = c;
}
}
return t->now;
@@ -3022,11 +3167,12 @@ static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
static VALUE
tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
{
+ rb_encoding *enc;
struct tr trsrc, trrepl;
int cflag = 0;
- int trans[256];
- int i, c, modify = 0;
+ int c, last, modify = 0;
char *s, *send;
+ VALUE hash;
StringValue(src);
StringValue(repl);
@@ -3039,73 +3185,138 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
if (RSTRING_LEN(repl) == 0) {
return rb_str_delete_bang(1, &src, str);
}
+ enc = rb_enc_check(str, src);
+ if (rb_enc_check(str, repl) != enc) {
+ rb_raise(rb_eArgError, "character encodings differ");
+ }
trrepl.p = RSTRING_PTR(repl);
trrepl.pend = trrepl.p + RSTRING_LEN(repl);
trsrc.gen = trrepl.gen = 0;
trsrc.now = trrepl.now = 0;
trsrc.max = trrepl.max = 0;
+ hash = rb_hash_new();
if (cflag) {
- for (i=0; i<256; i++) {
- trans[i] = 1;
- }
- while ((c = trnext(&trsrc)) >= 0) {
- trans[c & 0xff] = -1;
+ while ((c = trnext(&trsrc, enc)) >= 0) {
+ rb_hash_aset(hash, INT2NUM(c), Qtrue);
}
- while ((c = trnext(&trrepl)) >= 0)
+ while ((c = trnext(&trrepl, enc)) >= 0)
/* retrieve last replacer */;
- for (i=0; i<256; i++) {
- if (trans[i] >= 0) {
- trans[i] = trrepl.now;
- }
- }
+ last = trrepl.now;
}
else {
int r;
- for (i=0; i<256; i++) {
- trans[i] = -1;
- }
- while ((c = trnext(&trsrc)) >= 0) {
- r = trnext(&trrepl);
+ while ((c = trnext(&trsrc, enc)) >= 0) {
+ r = trnext(&trrepl, enc);
if (r == -1) r = trrepl.now;
- trans[c & 0xff] = r;
+ rb_hash_aset(hash, INT2NUM(c), INT2NUM(r));
}
}
rb_str_modify(str);
- s = RSTRING_PTR(str); send = s + RSTRING_LEN(str);
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
if (sflag) {
- char *t = s;
- int c0, last = -1;
+ int clen, tlen, max = RSTRING_LEN(str);
+ int offset, save = -1;
+ char *buf = ALLOC_N(char, max), *t = buf;
+ VALUE v;
+ if (cflag) tlen = rb_enc_codelen(last, enc);
while (s < send) {
- c0 = *s++;
- if ((c = trans[c0 & 0xff]) >= 0) {
- if (last == c) continue;
- last = c;
- *t++ = c & 0xff;
+ c = rb_enc_codepoint(s, send, enc);
+ tlen = clen = rb_enc_codelen(c, enc);
+
+ s += clen;
+ v = rb_hash_aref(hash, INT2NUM(c));
+ if (!NIL_P(v)) {
+ if (!cflag) {
+ c = NUM2INT(v);
+ if (save == c) continue;
+ save = c;
+ tlen = rb_enc_codelen(c, enc);
+ modify = 1;
+ }
+ }
+ else if (cflag) {
+ save = c = last;
modify = 1;
}
else {
- last = -1;
- *t++ = c0;
+ save = -1;
}
+ while (t - buf + tlen >= max) {
+ offset = t - buf;
+ max *= 2;
+ REALLOC_N(buf, char, max);
+ t = buf + offset;
+ }
+ rb_enc_mbcput(c, t, enc);
+ t += tlen;
}
- if (RSTRING_LEN(str) > (t - RSTRING_PTR(str))) {
- STR_SET_LEN(str, (t - RSTRING_PTR(str)));
- modify = 1;
- *t = '\0';
+ *t = '\0';
+ RSTRING(str)->as.heap.ptr = buf;
+ RSTRING(str)->as.heap.len = t - buf;
+ STR_SET_NOEMBED(str);
+ RSTRING(str)->as.heap.aux.capa = max;
+ }
+ else if (rb_enc_mbmaxlen(enc) == 1) {
+ while (s < send) {
+ VALUE v = rb_hash_aref(hash, INT2FIX(*s));
+ if (!NIL_P(v)) {
+ if (cflag) {
+ *s = last;
+ }
+ else {
+ c = FIX2INT(v);
+ *s = c & 0xff;
+ }
+ modify = 1;
+ }
+ s++;
}
}
else {
+ int clen, tlen, max = RSTRING_LEN(str) * 1.2;
+ int offset;
+ char *buf = ALLOC_N(char, max), *t = buf;
+ VALUE v;
+
+ if (cflag) tlen = rb_enc_codelen(last, enc);
while (s < send) {
- if ((c = trans[*s & 0xff]) >= 0) {
- *s = c & 0xff;
+ c = rb_enc_codepoint(s, send, enc);
+ tlen = clen = rb_enc_codelen(c, enc);
+
+ v = rb_hash_aref(hash, INT2NUM(c));
+ if (!NIL_P(v)) {
+ if (!cflag) {
+ c = NUM2INT(v);
+ tlen = rb_enc_codelen(c, enc);
+ modify = 1;
+ }
+ }
+ else if (cflag) {
+ c = last;
modify = 1;
}
- s++;
+ while (t - buf + tlen >= max) {
+ offset = t - buf;
+ max *= 2;
+ REALLOC_N(buf, char, max);
+ t = buf + offset;
+ }
+ if (s != t) rb_enc_mbcput(c, t, enc);
+ s += clen;
+ t += tlen;
+ }
+ if (!STR_EMBED_P(str)) {
+ free(RSTRING(str)->as.heap.ptr);
}
+ *t = '\0';
+ RSTRING(str)->as.heap.ptr = buf;
+ RSTRING(str)->as.heap.len = t - buf;
+ STR_SET_NOEMBED(str);
+ RSTRING(str)->as.heap.aux.capa = max;
}
if (modify) return str;
@@ -3155,34 +3366,32 @@ rb_str_tr(VALUE str, VALUE src, VALUE repl)
}
static void
-tr_setup_table(VALUE str, char table[256], int init)
+tr_setup_table(VALUE str, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
{
- char buf[256];
struct tr tr;
- int i, c;
- int cflag = 0;
+ int c;
+ VALUE table, ptable;
tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
tr.gen = tr.now = tr.max = 0;
+ table = rb_hash_new();
if (RSTRING_LEN(str) > 1 && RSTRING_PTR(str)[0] == '^') {
- cflag = 1;
tr.p++;
+ ptable = *ctablep;
+ *ctablep = table;
+ }
+ else {
+ ptable = *tablep;
+ *tablep = table;
}
- if (init) {
- for (i=0; i<256; i++) {
- table[i] = 1;
+ while ((c = trnext(&tr, enc)) >= 0) {
+ VALUE key = INT2NUM(c);
+
+ if (!ptable || !NIL_P(rb_hash_aref(ptable, key))) {
+ rb_hash_aset(table, key, Qtrue);
}
}
- for (i=0; i<256; i++) {
- buf[i] = cflag;
- }
- while ((c = trnext(&tr)) >= 0) {
- buf[c & 0xff] = !cflag;
- }
- for (i=0; i<256; i++) {
- table[i] = table[i] && buf[i];
- }
}
@@ -3197,10 +3406,10 @@ tr_setup_table(VALUE str, char table[256], int init)
static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
+ rb_encoding *enc;
char *s, *send, *t;
- char squeez[256];
+ VALUE del = 0, nodel = 0;
int modify = 0;
- int init = 1;
int i;
if (argc < 1) {
@@ -3210,20 +3419,28 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
VALUE s = argv[i];
StringValue(s);
- tr_setup_table(s, squeez, init);
- init = 0;
+ enc = rb_enc_check(str, s);
+ tr_setup_table(s, &del, &nodel, enc);
}
rb_str_modify(str);
s = t = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil;
- send = s + RSTRING_LEN(str);
+ send = RSTRING_END(str);
while (s < send) {
- if (squeez[*s & 0xff])
+ int c = rb_enc_codepoint(s, send, enc);
+ int clen = rb_enc_codelen(c, enc);
+ VALUE v = INT2NUM(c);
+
+ if ((del && !NIL_P(rb_hash_aref(del, v))) &&
+ (!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
modify = 1;
- else
- *t++ = *s;
- s++;
+ }
+ else {
+ if (t != s) rb_enc_mbcput(c, t, enc);
+ t += clen;
+ }
+ s += clen;
}
*t = '\0';
STR_SET_LEN(str, t - RSTRING_PTR(str));
@@ -3267,37 +3484,43 @@ rb_str_delete(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{
- char squeez[256];
+ rb_encoding *enc;
+ VALUE del = 0, nodel = 0;
char *s, *send, *t;
- int c, save, modify = 0;
- int init = 1;
+ int save, modify = 0;
int i;
if (argc == 0) {
- for (i=0; i<256; i++) {
- squeez[i] = 1;
- }
+ enc = rb_enc_get(str);
}
else {
for (i=0; i<argc; i++) {
VALUE s = argv[i];
StringValue(s);
- tr_setup_table(s, squeez, init);
- init = 0;
+ enc = rb_enc_check(str, s);
+ tr_setup_table(s, &del, &nodel, enc);
}
}
rb_str_modify(str);
s = t = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil;
- send = s + RSTRING_LEN(str);
+ send = RSTRING_END(str);
save = -1;
while (s < send) {
- c = *s++ & 0xff;
- if (c != save || !squeez[c]) {
- *t++ = save = c;
+ int c = rb_enc_codepoint(s, send, enc);
+ int clen = rb_enc_codelen(c, enc);
+ VALUE v = INT2NUM(c);
+
+ if (c != save ||
+ ((del && NIL_P(rb_hash_aref(del, v))) &&
+ (!nodel || NIL_P(rb_hash_aref(nodel, v))))) {
+ if (t != s) rb_enc_mbcput(c, t, enc);
+ save = c;
+ t += clen;
}
+ s += clen;
}
*t = '\0';
if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
@@ -3390,9 +3613,9 @@ rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
- char table[256];
+ rb_encoding *enc;
+ VALUE del = 0, nodel = 0;
char *s, *send;
- int init = 1;
int i;
if (argc < 1) {
@@ -3402,18 +3625,24 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
VALUE s = argv[i];
StringValue(s);
- tr_setup_table(s, table, init);
- init = 0;
+ enc = rb_enc_check(str, s);
+ tr_setup_table(s, &del, &nodel, enc);
}
s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
- send = s + RSTRING_LEN(str);
+ send = RSTRING_END(str);
i = 0;
while (s < send) {
- if (table[*s++ & 0xff]) {
+ int c = rb_enc_codepoint(s, send, enc);
+ int clen = rb_enc_codelen(c, enc);
+ VALUE v = INT2NUM(c);
+
+ if ((del && !NIL_P(rb_hash_aref(del, v))) &&
+ (!nodel || NIL_P(rb_hash_aref(nodel, v)))) {
i++;
}
+ s += clen;
}
return INT2NUM(i);
}
@@ -3464,6 +3693,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_split_m(int argc, VALUE *argv, VALUE str)
{
+ rb_encoding *enc;
VALUE spat;
VALUE limit;
int awk_split = Qfalse;
@@ -3482,6 +3712,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
i = 1;
}
+ enc = rb_enc_get(str);
if (NIL_P(spat)) {
if (!NIL_P(rb_fs)) {
spat = rb_fs;
@@ -3508,13 +3739,14 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
beg = 0;
if (awk_split) {
char *ptr = RSTRING_PTR(str);
- long len = RSTRING_LEN(str);
- char *eptr = ptr + len;
+ char *eptr = RSTRING_END(str);
int skip = 1;
+ int c;
- for (end = beg = 0; ptr<eptr; ptr++) {
+ while (ptr < eptr) {
+ c = rb_enc_codepoint(ptr, eptr, enc);
if (skip) {
- if (ISSPACE(*ptr)) {
+ if (rb_enc_isspace(c, enc)) {
beg++;
}
else {
@@ -3524,7 +3756,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
}
else {
- if (ISSPACE(*ptr)) {
+ if (rb_enc_isspace(c, enc)) {
rb_ary_push(result, rb_str_substr(str, beg, end-beg));
skip = 1;
beg = end + 1;
@@ -3534,6 +3766,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
end++;
}
}
+ ptr += rb_enc_codelen(c, enc);
}
}
else {
@@ -3542,6 +3775,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
int last_null = 0;
struct re_registers *regs;
+ enc = rb_enc_check(str, spat);
while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
regs = RMATCH(rb_backref_get())->regs;
if (start == end && BEG(0) == END(0)) {
@@ -3550,11 +3784,12 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
break;
}
else if (last_null == 1) {
- rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING_PTR(str)[beg],spat)));
+ rb_ary_push(result, rb_str_substr(str, beg,
+ rb_enc_mbclen(RSTRING_PTR(str)+beg,enc)));
beg = start;
}
else {
- start += mbclen2(RSTRING_PTR(str)[start],spat);
+ start += rb_enc_mbclen(RSTRING_PTR(str)+start,enc);
last_null = 1;
continue;
}
@@ -3652,9 +3887,10 @@ rb_str_split(VALUE str, const char *sep0)
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
+ rb_encoding *enc;
VALUE rs;
int newline;
- char *p = RSTRING_PTR(str), *pend = p + RSTRING_LEN(str), *s;
+ char *p = RSTRING_PTR(str), *pend = p + RSTRING_LEN(str), *s = p;
char *ptr = p;
long len = RSTRING_LEN(str), rslen;
VALUE line;
@@ -3662,7 +3898,6 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
if (rb_scan_args(argc, argv, "01", &rs) == 0) {
rs = rb_rs;
}
-
RETURN_ENUMERATOR(str, argc, argv);
if (NIL_P(rs)) {
@@ -3670,28 +3905,28 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
return str;
}
StringValue(rs);
+ enc = rb_enc_check(str, rs);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
newline = '\n';
}
else {
- newline = RSTRING_PTR(rs)[rslen-1];
+ newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
}
- for (s = p, p += rslen; p < pend; p++) {
- if (rslen == 0 && *p == '\n') {
- if (*++p != '\n') continue;
- while (*p == '\n') p++;
- }
- if (RSTRING_PTR(str) < p && p[-1] == newline &&
- (rslen <= 1 ||
- rb_memcmp(RSTRING_PTR(rs), p-rslen, rslen) == 0)) {
- line = rb_str_new5(str, s, p - s);
+ while (p < pend) {
+ int c = rb_enc_codepoint(p, pend, enc);
+ int n = rb_enc_codelen(c, enc);
+
+ if (c == newline &&
+ (rslen <= 1 || rb_memcmp(RSTRING_PTR(rs), p, rslen) == 0)) {
+ line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
OBJ_INFECT(line, str);
rb_yield(line);
str_mod_check(str, ptr, len);
- s = p;
+ s = p + n;
}
+ p += n;
}
if (s != pend) {
@@ -3745,6 +3980,44 @@ rb_str_each_byte(VALUE str)
/*
+ * Document-method: chars
+ * call-seq:
+ * str.chars => anEnumerator
+ * str.chars {|substr| block } => str
+ *
+ * Returns an enumerator that gives each character in the string.
+ * If a block is given, it iterates over each character in the string.
+ *
+ * "foo".lines.to_a #=> ["f","o","o"]
+ */
+
+/*
+ * Document-method: each_char
+ * call-seq:
+ * str.each_char {|cstr| block } => str
+ *
+ * Passes each character in <i>str</i> to the given block.
+ *
+ * "hello".each_char {|c| print c, ' ' }
+ *
+ * <em>produces:</em>
+ *
+ * h e l l o
+ */
+
+static VALUE
+rb_str_each_char(VALUE str)
+{
+ int i, len = str_strlen(str, 0);
+
+ RETURN_ENUMERATOR(str, 0, 0);
+ for (i=0; i<len; i++) {
+ rb_yield(rb_str_substr(str, i, 1));
+ }
+ return str;
+}
+
+/*
* call-seq:
* str.chop! => str or nil
*
@@ -3918,13 +4191,21 @@ rb_str_chomp(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_lstrip_bang(VALUE str)
{
+ rb_encoding *enc;
char *s, *t, *e;
+ rb_str_modify(str);
+ enc = rb_enc_get(str);
s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil;
- e = t = s + RSTRING_LEN(str);
+ e = t = RSTRING_END(str);
/* remove spaces at head */
- while (s < t && ISSPACE(*s)) s++;
+ while (s < e) {
+ int cc = rb_enc_codepoint(s, e, enc);
+
+ if (!rb_enc_isspace(cc, enc)) break;
+ s += rb_enc_codelen(cc, enc);
+ }
if (s > RSTRING_PTR(str)) {
rb_str_modify(str);
@@ -3972,21 +4253,30 @@ rb_str_lstrip(VALUE str)
static VALUE
rb_str_rstrip_bang(VALUE str)
{
+ rb_encoding *enc;
char *s, *t, *e;
+ int space_seen = Qfalse;
- s = RSTRING_PTR(str);
+ rb_str_modify(str);
+ enc = rb_enc_get(str);
+ s = t = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil;
- e = t = s + RSTRING_LEN(str);
-
- /* remove trailing '\0's */
- while (s < t && t[-1] == '\0') t--;
-
- /* remove trailing spaces */
- while (s < t && ISSPACE(*(t-1))) t--;
+ e = RSTRING_END(str);
+ while (s < e) {
+ int cc = rb_enc_codepoint(s, e, enc);
+ if (!cc || rb_enc_isspace(cc, enc)) {
+ if (!space_seen) t = s;
+ space_seen = Qtrue;
+ }
+ else {
+ space_seen = Qfalse;
+ }
+ s += rb_enc_codelen(cc, enc);
+ }
if (t < e) {
rb_str_modify(str);
- STR_SET_LEN(str, t-s);
+ STR_SET_LEN(str, t-RSTRING_PTR(str));
RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
return str;
}
@@ -4054,10 +4344,12 @@ rb_str_strip(VALUE str)
static VALUE
scan_once(VALUE str, VALUE pat, long *start)
{
+ rb_encoding *enc;
VALUE result, match;
struct re_registers *regs;
long i;
+ enc = rb_enc_check(str, pat);
if (rb_reg_search(pat, str, *start, 0) >= 0) {
match = rb_backref_get();
regs = RMATCH(match)->regs;
@@ -4066,7 +4358,7 @@ scan_once(VALUE str, VALUE pat, long *start)
* Always consume at least one character of the input string
*/
if (RSTRING_LEN(str) > END(0))
- *start = END(0)+mbclen2(RSTRING_PTR(str)[END(0)],pat);
+ *start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0),enc);
else
*start = END(0)+1;
}
@@ -4251,7 +4543,7 @@ rb_str_intern(VALUE s)
if (OBJ_TAINTED(str) && rb_safe_level() >= 1) {
rb_raise(rb_eSecurityError, "Insecure: can't intern tainted string");
}
- id = rb_intern2(RSTRING_PTR(str), RSTRING_LEN(str));
+ id = rb_intern_str(str);
return ID2SYM(id);
}
@@ -4335,65 +4627,81 @@ rb_str_sum(int argc, VALUE *argv, VALUE str)
static VALUE
rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
{
+ rb_encoding *enc;
VALUE w;
- long width, flen = 0;
+ long width, len, flen = 1, fclen = 1;
VALUE res;
- char *p, *pend, *f = " ";
- long n;
- VALUE pad;
+ char *p, *f = " ";
+ long n, llen, rlen;
+ volatile VALUE pad;
rb_scan_args(argc, argv, "11", &w, &pad);
+ enc = rb_enc_get(str);
width = NUM2LONG(w);
if (argc == 2) {
StringValue(pad);
+ rb_enc_check(str, pad);
f = RSTRING_PTR(pad);
flen = RSTRING_LEN(pad);
+ fclen = str_strlen(pad, enc);
if (flen == 0) {
rb_raise(rb_eArgError, "zero width padding");
}
}
- if (width < 0 || RSTRING_LEN(str) >= width) return rb_str_dup(str);
- res = rb_str_new5(str, 0, width);
+#if 0
+ else if (!m17n_asciicompat(enc)) {
+ rb_raise(rb_eArgError, "character encodings differ");
+ }
+#endif
+ len = str_strlen(str, enc);
+ if (width < 0 || len >= width) return rb_str_dup(str);
+ n = width - len;
+ llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
+ rlen = n - llen;
+ res = rb_str_new5(str, 0, RSTRING_LEN(str)+n*flen/fclen+2);
p = RSTRING_PTR(res);
- if (jflag != 'l') {
- n = width - RSTRING_LEN(str);
- pend = p + ((jflag == 'r') ? n : n/2);
+ while (llen) {
if (flen <= 1) {
- while (p < pend) {
- *p++ = *f;
- }
+ *p++ = *f;
+ llen--;
+ }
+ else if (llen > fclen) {
+ memcpy(p,f,flen);
+ p += flen;
+ llen -= fclen;
}
else {
- char *q = f;
- while (p + flen <= pend) {
- memcpy(p,f,flen);
- p += flen;
- }
- while (p < pend) {
- *p++ = *q++;
- }
+ char *fp = str_nth(f, f+flen, llen, enc);
+ n = fp - f;
+ memcpy(p,f,n);
+ p+=n;
+ break;
}
}
- memcpy(p, RSTRING_PTR(str), RSTRING_LEN(str)+1);
- if (jflag != 'r') {
- p += RSTRING_LEN(str); pend = RSTRING_PTR(res) + width;
+ memcpy(p, RSTRING_PTR(str), RSTRING_LEN(str));
+ p+=RSTRING_LEN(str);
+ while (rlen) {
if (flen <= 1) {
- while (p < pend) {
- *p++ = *f;
- }
+ *p++ = *f;
+ rlen--;
+ }
+ else if (rlen > fclen) {
+ memcpy(p,f,flen);
+ p += flen;
+ rlen -= fclen;
}
else {
- while (p + flen <= pend) {
- memcpy(p,f,flen);
- p += flen;
- }
- while (p < pend) {
- *p++ = *f++;
- }
+ char *fp = str_nth(f, f+flen, rlen, enc);
+ n = fp - f;
+ memcpy(p,f,n);
+ p+=n;
+ break;
}
}
+ *p = '\0';
+ STR_SET_LEN(res, p-RSTRING_PTR(res));
OBJ_INFECT(res, str);
- if (flen > 0) OBJ_INFECT(res, pad);
+ if (!NIL_P(pad)) OBJ_INFECT(res, pad);
return res;
}
@@ -4493,6 +4801,7 @@ rb_str_partition(VALUE str, VALUE sep)
failed:
return rb_ary_new3(3, str, rb_str_new(0,0),rb_str_new(0,0));
}
+ pos = rb_str_sublen(str, pos);
if (regex) {
sep = rb_str_subpat(str, sep, 0);
if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
@@ -4534,11 +4843,13 @@ rb_str_rpartition(VALUE str, VALUE sep)
rb_raise(rb_eTypeError, "type mismatch: %s given",
rb_obj_classname(sep));
}
+ pos = rb_str_sublen(str, pos);
pos = rb_str_rindex(str, sep, pos);
}
if (pos < 0) {
return rb_ary_new3(3, rb_str_new(0,0),rb_str_new(0,0), str);
}
+ pos = rb_str_sublen(str, pos);
if (regex) {
sep = rb_reg_nth_match(0, rb_backref_get());
}
@@ -4563,6 +4874,7 @@ rb_str_start_with(int argc, VALUE *argv, VALUE str)
for (i=0; i<argc; i++) {
VALUE tmp = rb_check_string_type(argv[i]);
if (NIL_P(tmp)) continue;
+ rb_enc_check(str, tmp);
if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
if (rb_memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
return Qtrue;
@@ -4585,6 +4897,7 @@ rb_str_end_with(int argc, VALUE *argv, VALUE str)
for (i=0; i<argc; i++) {
VALUE tmp = rb_check_string_type(argv[i]);
if (NIL_P(tmp)) continue;
+ rb_enc_check(str, tmp);
if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
if (rb_memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - RSTRING_LEN(tmp),
RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
@@ -4603,6 +4916,12 @@ rb_str_setter(VALUE val, ID id, VALUE *var)
}
+static VALUE
+str_encoding(VALUE str)
+{
+ return rb_str_new2(rb_enc_name(rb_enc_get(str)));
+}
+
/**********************************************************************
* Document-class: Symbol
*
@@ -4910,6 +5229,7 @@ Init_String(void)
rb_define_method(rb_cString, "insert", rb_str_insert, 2);
rb_define_method(rb_cString, "length", rb_str_length, 0);
rb_define_method(rb_cString, "size", rb_str_length, 0);
+ rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
rb_define_method(rb_cString, "=~", rb_str_match, 1);
rb_define_method(rb_cString, "match", rb_str_match_m, -1);
@@ -4994,6 +5314,7 @@ Init_String(void)
rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
+ rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
rb_define_method(rb_cString, "sum", rb_str_sum, -1);
@@ -5003,6 +5324,8 @@ Init_String(void)
rb_define_method(rb_cString, "partition", rb_str_partition, 1);
rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
+ rb_define_method(rb_cString, "encoding", str_encoding, 0);
+
id_to_s = rb_intern("to_s");
rb_fs = Qnil;
@@ -5042,4 +5365,6 @@ Init_String(void)
rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
+
+ rb_define_method(rb_cSymbol, "encoding", str_encoding, 0);
}
diff --git a/struct.c b/struct.c
index 347b864a9b..f1d94945fc 100644
--- a/struct.c
+++ b/struct.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Mar 22 18:44:30 JST 1995
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/time.c b/time.c
index 798696cb22..59237f151b 100644
--- a/time.c
+++ b/time.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Dec 28 14:31:59 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/util.c b/util.c
index e41926e64f..34b54a79b0 100644
--- a/util.c
+++ b/util.c
@@ -6,7 +6,7 @@
$Date$
created at: Fri Mar 10 17:22:34 JST 1995
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/
diff --git a/variable.c b/variable.c
index 01a0bca527..58fd17b78c 100644
--- a/variable.c
+++ b/variable.c
@@ -6,7 +6,7 @@
$Date$
created at: Tue Apr 19 23:55:15 JST 1994
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
Copyright (C) 2000 Information-technology Promotion Agency, Japan
diff --git a/version.c b/version.c
index 8db37ecd3d..4e54d063f7 100644
--- a/version.c
+++ b/version.c
@@ -6,7 +6,7 @@
$Date$
created at: Thu Sep 30 20:08:01 JST 1993
- Copyright (C) 1993-2003 Yukihiro Matsumoto
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
**********************************************************************/