aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog9
-rw-r--r--include/ruby/encoding.h2
-rw-r--r--io.c80
-rw-r--r--string.c40
4 files changed, 85 insertions, 46 deletions
diff --git a/ChangeLog b/ChangeLog
index 902f7e8b83..2ca97f5c41 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Thu Jan 24 03:23:44 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * string.c (rb_str_each_line): use memchr(3) for faster newline
+ search.
+
+ * io.c (appendline): remove unused arguments
+
+ * io.c (rb_io_getline_fast): make much simpler (and faster).
+
Thu Jan 24 02:13:07 2008 Yusuke Endoh <mame@tsg.ne.jp>
* insns.def (expandarray): fix stack inc.
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 27abba1d53..ca06b4dd7a 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -134,7 +134,7 @@ int rb_enc_codelen(int code, rb_encoding *enc);
#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p))
/* ptr, ptr, encoding -> newline_or_not */
-#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,p,end)
+#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)p,(UChar*)end)
#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
diff --git a/io.c b/io.c
index 1206b9bcaa..79f2396383 100644
--- a/io.c
+++ b/io.c
@@ -1686,21 +1686,19 @@ rscheck(const char *rsptr, long rslen, VALUE rs)
}
static int
-appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE *strp, long *lp)
+appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
{
VALUE str = *strp;
int c = EOF;
long limit = *lp;
- if (rsptr == 0)
- rslen = 1;
-
do {
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
const char *p = READ_DATA_PENDING_PTR(fptr);
const char *e;
long last = 0, len = (c != EOF);
+ rb_encoding *enc = io_read_encoding(fptr);
if (limit > 0 && pending > limit) pending = limit;
e = memchr(p, delim, pending);
@@ -1720,7 +1718,7 @@ appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE *strp,
if (limit > 0 && limit == pending) {
char *p = fptr->rbuf+fptr->rbuf_off;
char *pp = p + limit;
- char *pl = rb_enc_left_char_head(p, pp, io_read_encoding(fptr));
+ char *pl = rb_enc_left_char_head(p, pp, enc);
if (pl < pp) {
int diff = pp - pl;
@@ -1790,27 +1788,53 @@ swallow(rb_io_t *fptr, int term)
}
static VALUE
-rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit)
+rb_io_getline_fast(rb_io_t *fptr)
{
VALUE str = Qnil;
- int c, nolimit = 0;
+ int len = 0;
+ rb_encoding *enc = io_read_encoding(fptr);
for (;;) {
- c = appendline(fptr, delim, 0, 0, &str, &limit);
- if (c == EOF || c == delim) break;
- if (limit == 0) {
- nolimit = 1;
+ long pending = READ_DATA_PENDING_COUNT(fptr);
+
+ if (pending > 0) {
+ const char *p = READ_DATA_PENDING_PTR(fptr);
+ const char *e;
+
+ e = memchr(p, '\n', pending);
+ if (e) {
+ const char *p0 = rb_enc_left_char_head(p, e, enc);
+ const char *pend = rb_enc_left_char_head(p, p+pending, enc);
+ if (rb_enc_is_newline(p0, pend, enc)) {
+ pending = p0 - p + rb_enc_mbclen(p0, pend, enc);
+ }
+ else {
+ e = 0;
+ }
+ }
+ if (NIL_P(str)) {
+ str = rb_str_new(p, pending);
+ fptr->rbuf_off += pending;
+ fptr->rbuf_len -= pending;
+ }
+ else {
+ rb_str_resize(str, len + pending);
+ read_buffered_data(RSTRING_PTR(str)+len, pending, fptr);
+ }
+ len += pending;
+ if (e) break;
+ }
+ rb_thread_wait_fd(fptr->fd);
+ rb_io_check_closed(fptr);
+ if (io_fillbuf(fptr) < 0) {
+ if (NIL_P(str)) return Qnil;
break;
}
}
- if (!NIL_P(str)) {
- str = io_enc_str(str, fptr);
- if (!nolimit) {
- fptr->lineno++;
- lineno = INT2FIX(fptr->lineno);
- }
- }
+ str = io_enc_str(str, fptr);
+ fptr->lineno++;
+ lineno = INT2FIX(fptr->lineno);
return str;
}
@@ -1838,11 +1862,12 @@ prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io)
}
}
}
- GetOpenFile(io, fptr);
- if (!NIL_P(rs)) {
- rb_encoding *enc_rs = rb_enc_get(rs);
- rb_encoding *enc_io = io_read_encoding(fptr);
+ if (!NIL_P(rs) && rs != rb_default_rs) {
+ rb_encoding *enc_rs, *enc_io;
+ GetOpenFile(io, fptr);
+ enc_rs = rb_enc_get(rs);
+ enc_io = io_read_encoding(fptr);
if (enc_io != enc_rs &&
(rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT ||
!rb_enc_asciicompat(enc_io))) {
@@ -1876,8 +1901,8 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
else if (limit == 0) {
return rb_enc_str_new(0, 0, io_read_encoding(fptr));
}
- else if (rs == rb_default_rs) {
- return rb_io_getline_fast(fptr, '\n', limit);
+ else if (rs == rb_default_rs && limit < 0) {
+ return rb_io_getline_fast(fptr);
}
else {
int c, newline;
@@ -1893,15 +1918,12 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
swallow(fptr, '\n');
rs = 0;
}
- else if (rslen == 1) {
- return rb_io_getline_fast(fptr, (unsigned char)RSTRING_PTR(rs)[0], limit);
- }
else {
rsptr = RSTRING_PTR(rs);
}
newline = rsptr[rslen - 1];
- while ((c = appendline(fptr, newline, rsptr, rslen, &str, &limit)) != EOF) {
+ while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
if (c == newline) {
const char *s, *p, *pp;
@@ -1954,7 +1976,7 @@ rb_io_gets(VALUE io)
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
- return rb_io_getline_fast(fptr, '\n', 0);
+ return rb_io_getline_fast(fptr);
}
/*
diff --git a/string.c b/string.c
index eacced74ca..0dfb098679 100644
--- a/string.c
+++ b/string.c
@@ -4470,9 +4470,8 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
rb_encoding *enc;
VALUE rs;
int newline;
- char *p = RSTRING_PTR(str), *pend = p + RSTRING_LEN(str), *s = p;
- char *ptr = p;
- long len = RSTRING_LEN(str), rslen;
+ char *p, *pend, *s, *ptr;
+ long len, rslen;
VALUE line;
int n;
@@ -4480,29 +4479,39 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
rs = rb_rs;
}
RETURN_ENUMERATOR(str, argc, argv);
-
if (NIL_P(rs)) {
rb_yield(str);
return str;
}
+ str = rb_str_new4(str);
+ ptr = p = s = RSTRING_PTR(str);
+ pend = p + RSTRING_LEN(str);
+ len = RSTRING_LEN(str);
StringValue(rs);
- enc = rb_enc_check(str, rs);
if (rs == rb_default_rs) {
+ enc = rb_enc_get(str);
while (p < pend) {
- n = rb_enc_mbclen(p, pend, enc);
- if (rb_enc_is_newline(p, pend, enc)) {
- line = rb_str_new5(str, s, p - s + n);
- OBJ_INFECT(line, str);
- rb_enc_copy(line, str);
- rb_yield(line);
- str_mod_check(str, ptr, len);
- s = p + n;
+ char *p0;
+
+ p = memchr(p, '\n', pend - p);
+ if (!p) break;
+ p0 = rb_enc_left_char_head(s, p, enc);
+ if (!rb_enc_is_newline(p0, pend, enc)) {
+ p++;
+ continue;
}
- p += n;
+ p = p0 + rb_enc_mbclen(s, p0, enc);
+ line = rb_str_new5(str, s, p - s);
+ OBJ_INFECT(line, str);
+ rb_enc_copy(line, str);
+ rb_yield(line);
+ str_mod_check(str, ptr, len);
+ s = p;
}
goto finish;
}
+ enc = rb_enc_check(str, rs);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
newline = '\n';
@@ -4535,8 +4544,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
finish:
if (s != pend) {
- if (p > pend) p = pend;
- line = rb_str_new5(str, s, p - s);
+ line = rb_str_new5(str, s, pend - s);
OBJ_INFECT(line, str);
rb_enc_copy(line, str);
rb_yield(line);