aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-04-08 17:05:12 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-04-08 17:05:12 +0000
commit3b6c4dad0c0e3e7a27fd1e89f9cf9579e9f53079 (patch)
tree585115bcfb27e47356a0d242c64fe3997ae614b5
parent45c593d89ee29253e6c98427f58f32d55ff55aa1 (diff)
downloadruby-3b6c4dad0c0e3e7a27fd1e89f9cf9579e9f53079.tar.gz
bignum.c: rb_cstr_parse_inum
* bignum.c (rb_cstr_parse_inum): [EXPERIMENTAL] new function to parse integer in C-string with length. the name and the arguments may be changed in the future. * bignum.c (rb_str_to_inum): preserve encoding of the argument in error messages, and no longer needs to copy non-terminated strings. * bignum.c (rb_str2big_{poweroftwo,normal,karatsuba,gmp}): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54518 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog12
-rw-r--r--bignum.c245
-rw-r--r--internal.h1
-rw-r--r--test/ruby/test_integer.rb2
4 files changed, 169 insertions, 91 deletions
diff --git a/ChangeLog b/ChangeLog
index 7cddd7ce23..b3b4d4d8da 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+Sat Apr 9 02:05:10 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * bignum.c (rb_cstr_parse_inum): [EXPERIMENTAL] new function to
+ parse integer in C-string with length. the name and the
+ arguments may be changed in the future.
+
+ * bignum.c (rb_str_to_inum): preserve encoding of the argument in
+ error messages, and no longer needs to copy non-terminated
+ strings.
+
+ * bignum.c (rb_str2big_{poweroftwo,normal,karatsuba,gmp}): ditto.
+
Thu Apr 7 19:04:03 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* doc/regexp.rdoc (comments): [DOC] terminators cannot appear in
diff --git a/bignum.c b/bignum.c
index d6ebeb5ed5..a6d669e887 100644
--- a/bignum.c
+++ b/bignum.c
@@ -3686,6 +3686,7 @@ rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t na
#define conv_digit(c) (ruby_digit36_to_number_table[(unsigned char)(c)])
NORETURN(static inline void invalid_radix(int base));
+NORETURN(static inline void invalid_integer(VALUE s));
static inline int
valid_radix_p(int base)
@@ -3699,16 +3700,29 @@ invalid_radix(int base)
rb_raise(rb_eArgError, "invalid radix %d", base);
}
-static void
-str2big_scan_digits(const char *s, const char *str, int base, int badcheck, size_t *num_digits_p, size_t *len_p)
+static inline void
+invalid_integer(VALUE s)
+{
+ rb_raise(rb_eArgError, "invalid value for Integer(): %+"PRIsVALUE, s);
+}
+
+static int
+str2big_scan_digits(const char *s, const char *str, int base, int badcheck, size_t *num_digits_p, ssize_t *len_p)
{
char nondigit = 0;
size_t num_digits = 0;
const char *digits_start = str;
const char *digits_end = str;
+ ssize_t len = *len_p;
int c;
+ if (!len) {
+ *num_digits_p = 0;
+ *len_p = 0;
+ return TRUE;
+ }
+
if (badcheck && *str == '_') goto bad;
while ((c = *str++) != 0) {
@@ -3718,27 +3732,32 @@ str2big_scan_digits(const char *s, const char *str, int base, int badcheck, size
break;
}
nondigit = (char) c;
- continue;
}
- else if ((c = conv_digit(c)) < 0) {
+ else if ((c = conv_digit(c)) < 0 || c >= base) {
break;
}
- if (c >= base) break;
- nondigit = 0;
- num_digits++;
- digits_end = str;
+ else {
+ nondigit = 0;
+ num_digits++;
+ digits_end = str;
+ }
+ if (len > 0 && !--len) break;
}
- if (badcheck) {
+ if (badcheck && nondigit) goto bad;
+ if (badcheck && len) {
str--;
- if (s+1 < str && str[-1] == '_') goto bad;
- while (*str && ISSPACE(*str)) str++;
- if (*str) {
+ while (*str && ISSPACE(*str)) {
+ str++;
+ if (len > 0 && !--len) break;
+ }
+ if (len && *str) {
bad:
- rb_invalid_str(s, "Integer()");
+ return FALSE;
}
}
*num_digits_p = num_digits;
*len_p = digits_end - digits_start;
+ return TRUE;
}
static VALUE
@@ -3972,56 +3991,92 @@ str2big_gmp(
VALUE
rb_cstr_to_inum(const char *str, int base, int badcheck)
{
- const char *s = str;
+ char *end;
+ VALUE ret = rb_cstr_parse_inum(str, -1, (badcheck ? NULL : &end), base);
+ if (NIL_P(ret)) {
+ if (badcheck) rb_invalid_str(str, "Integer()");
+ ret = INT2FIX(0);
+ }
+ return ret;
+}
+
+/*
+ * Parse +str+ as Ruby Integer, i.e., underscores, 0d and 0b prefixes.
+ *
+ * str: pointer to the string to be parsed.
+ * should be NUL-terminated if +len+ is negative.
+ * len: length of +str+ if >= 0. if +len+ is negative, +str+ should
+ * be NUL-terminated.
+ * endp: if non-NULL, the address after parsed part is stored. if
+ * NULL, Qnil is returned when +str+ is not valid as an Integer.
+ * base: see +rb_cstr_to_inum+
+ */
+
+VALUE
+rb_cstr_parse_inum(const char *str, ssize_t len, char **endp, int base)
+{
+ const char *const s = str;
char sign = 1;
int c;
VALUE z;
- int bits_per_digit;
+ unsigned long val;
+ int ov;
const char *digits_start, *digits_end;
size_t num_digits;
size_t num_bdigits;
- size_t len;
+ const ssize_t len0 = len;
+ const int badcheck = !endp;
+
+#define ADV(n) do {\
+ if (len > 0 && len <= (n)) goto bad; \
+ str += (n); \
+ len -= (n); \
+ } while (0)
+#define ASSERT_LEN() do {\
+ assert(len != 0); \
+ if (len0 >= 0) assert(s + len0 == str + len); \
+ } while (0)
if (!str) {
- if (badcheck) {
- bad:
- rb_invalid_str(s, "Integer()");
- }
- return INT2FIX(0);
+ bad:
+ if (endp) *endp = (char *)str;
+ return Qnil;
}
- while (ISSPACE(*str)) str++;
+ if (len) {
+ while (ISSPACE(*str)) ADV(1);
- if (str[0] == '+') {
- str++;
- }
- else if (str[0] == '-') {
- str++;
- sign = 0;
- }
- if (str[0] == '+' || str[0] == '-') {
- if (badcheck) goto bad;
- return INT2FIX(0);
+ if (str[0] == '+') {
+ ADV(1);
+ }
+ else if (str[0] == '-') {
+ ADV(1);
+ sign = 0;
+ }
+ ASSERT_LEN();
+ if (str[0] == '+' || str[0] == '-') {
+ goto bad;
+ }
}
if (base <= 0) {
- if (str[0] == '0') {
+ if (str[0] == '0' && len > 1) {
switch (str[1]) {
case 'x': case 'X':
base = 16;
- str += 2;
+ ADV(2);
break;
case 'b': case 'B':
base = 2;
- str += 2;
+ ADV(2);
break;
case 'o': case 'O':
base = 8;
- str += 2;
+ ADV(2);
break;
case 'd': case 'D':
base = 10;
- str += 2;
+ ADV(2);
break;
default:
base = 8;
@@ -4034,31 +4089,36 @@ rb_cstr_to_inum(const char *str, int base, int badcheck)
base = 10;
}
}
+ else if (len == 1) {
+ /* no prefix */
+ }
else if (base == 2) {
if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
- str += 2;
+ ADV(2);
}
}
else if (base == 8) {
if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
- str += 2;
+ ADV(2);
}
}
else if (base == 10) {
if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
- str += 2;
+ ADV(2);
}
}
else if (base == 16) {
if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
- str += 2;
+ ADV(2);
}
}
if (!valid_radix_p(base)) {
invalid_radix(base);
}
- if (*str == '0') { /* squeeze preceding 0s */
+ if (!len) goto bad;
+ if (*str == '0' && len != 1) { /* squeeze preceding 0s */
int us = 0;
+ const char *end = len < 0 ? NULL : str + len;
while ((c = *++str) == '0' || c == '_') {
if (c == '_') {
if (++us >= 2)
@@ -4067,26 +4127,29 @@ rb_cstr_to_inum(const char *str, int base, int badcheck)
else {
us = 0;
}
+ if (str == end) break;
}
- if (!(c = *str) || ISSPACE(c)) --str;
+ if (!c || ISSPACE(c)) --str;
+ if (end) len = end - str;
+ ASSERT_LEN();
}
c = *str;
c = conv_digit(c);
if (c < 0 || c >= base) {
- if (badcheck) goto bad;
- return INT2FIX(0);
+ goto bad;
}
- bits_per_digit = bit_length(base-1);
- if (bits_per_digit * strlen(str) <= sizeof(long) * CHAR_BIT) {
- char *end;
- unsigned long val = STRTOUL(str, &end, base);
-
- if (str < end && *end == '_') goto bigparse;
+ val = ruby_scan_digits(str, len, base, &num_digits, &ov);
+ if (!ov) {
+ const char *end = &str[num_digits];
+ if (num_digits > 0 && *end == '_') goto bigparse;
+ if (endp) *endp = (char *)end;
if (badcheck) {
- if (end == str) goto bad; /* no number */
- while (*end && ISSPACE(*end)) end++;
- if (*end) goto bad; /* trailing garbage */
+ if (num_digits == 0) return Qnil; /* no number */
+ while (len < 0 ? *end : end < str + len) {
+ if (!ISSPACE(*end)) return Qnil; /* trailing garbage */
+ end++;
+ }
}
if (POSFIXABLE(val)) {
@@ -4105,12 +4168,13 @@ rb_cstr_to_inum(const char *str, int base, int badcheck)
bigparse:
digits_start = str;
- str2big_scan_digits(s, str, base, badcheck, &num_digits, &len);
+ if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
+ goto bad;
digits_end = digits_start + len;
if (POW2_P(base)) {
z = str2big_poweroftwo(sign, digits_start, digits_end, num_digits,
- bits_per_digit);
+ bit_length(base-1));
}
else {
int digits_per_bdigits_dbl;
@@ -4140,32 +4204,19 @@ rb_cstr_to_inum(const char *str, int base, int badcheck)
VALUE
rb_str_to_inum(VALUE str, int base, int badcheck)
{
- char *s;
- long len;
- VALUE v = 0;
VALUE ret;
+ const char *s;
+ long len;
+ char *end;
StringValue(str);
rb_must_asciicompat(str);
- if (badcheck) {
- s = StringValueCStr(str);
- }
- else {
- s = RSTRING_PTR(str);
+ RSTRING_GETMEM(str, s, len);
+ ret = rb_cstr_parse_inum(s, len, (badcheck ? NULL : &end), base);
+ if (NIL_P(ret)) {
+ if (badcheck) invalid_integer(str);
+ ret = INT2FIX(0);
}
- if (s) {
- len = RSTRING_LEN(str);
- if (s[len]) { /* no sentinel somehow */
- char *p = ALLOCV(v, len+1);
-
- MEMCPY(p, s, char, len);
- p[len] = '\0';
- s = p;
- }
- }
- ret = rb_cstr_to_inum(s, base, badcheck);
- if (v)
- ALLOCV_END(v);
return ret;
}
@@ -4176,7 +4227,7 @@ rb_str2big_poweroftwo(VALUE arg, int base, int badcheck)
const char *s, *str;
const char *digits_start, *digits_end;
size_t num_digits;
- size_t len;
+ ssize_t len;
VALUE z;
if (!valid_radix_p(base) || !POW2_P(base)) {
@@ -4185,13 +4236,16 @@ rb_str2big_poweroftwo(VALUE arg, int base, int badcheck)
rb_must_asciicompat(arg);
s = str = StringValueCStr(arg);
+ len = RSTRING_LEN(arg);
if (*str == '-') {
+ len--;
str++;
positive_p = 0;
}
digits_start = str;
- str2big_scan_digits(s, str, base, badcheck, &num_digits, &len);
+ if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
+ invalid_integer(arg);
digits_end = digits_start + len;
z = str2big_poweroftwo(positive_p, digits_start, digits_end, num_digits,
@@ -4209,7 +4263,7 @@ rb_str2big_normal(VALUE arg, int base, int badcheck)
const char *s, *str;
const char *digits_start, *digits_end;
size_t num_digits;
- size_t len;
+ ssize_t len;
VALUE z;
int digits_per_bdigits_dbl;
@@ -4220,14 +4274,17 @@ rb_str2big_normal(VALUE arg, int base, int badcheck)
}
rb_must_asciicompat(arg);
- s = str = StringValueCStr(arg);
- if (*str == '-') {
+ s = str = StringValuePtr(arg);
+ len = RSTRING_LEN(arg);
+ if (len > 0 && *str == '-') {
+ len--;
str++;
positive_p = 0;
}
digits_start = str;
- str2big_scan_digits(s, str, base, badcheck, &num_digits, &len);
+ if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
+ invalid_integer(arg);
digits_end = digits_start + len;
maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
@@ -4248,7 +4305,7 @@ rb_str2big_karatsuba(VALUE arg, int base, int badcheck)
const char *s, *str;
const char *digits_start, *digits_end;
size_t num_digits;
- size_t len;
+ ssize_t len;
VALUE z;
int digits_per_bdigits_dbl;
@@ -4259,14 +4316,17 @@ rb_str2big_karatsuba(VALUE arg, int base, int badcheck)
}
rb_must_asciicompat(arg);
- s = str = StringValueCStr(arg);
- if (*str == '-') {
+ s = str = StringValuePtr(arg);
+ len = RSTRING_LEN(arg);
+ if (len > 0 && *str == '-') {
+ len--;
str++;
positive_p = 0;
}
digits_start = str;
- str2big_scan_digits(s, str, base, badcheck, &num_digits, &len);
+ if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
+ invalid_integer(arg);
digits_end = digits_start + len;
maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
@@ -4288,7 +4348,7 @@ rb_str2big_gmp(VALUE arg, int base, int badcheck)
const char *s, *str;
const char *digits_start, *digits_end;
size_t num_digits;
- size_t len;
+ ssize_t len;
VALUE z;
int digits_per_bdigits_dbl;
@@ -4299,14 +4359,17 @@ rb_str2big_gmp(VALUE arg, int base, int badcheck)
}
rb_must_asciicompat(arg);
- s = str = StringValueCStr(arg);
- if (*str == '-') {
+ s = str = StringValuePtr(arg);
+ len = RSTRING_LEN(arg);
+ if (len > 0 && *str == '-') {
+ len--;
str++;
positive_p = 0;
}
digits_start = str;
- str2big_scan_digits(s, str, base, badcheck, &num_digits, &len);
+ if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
+ invalid_integer(arg);
digits_end = digits_start + len;
maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
diff --git a/internal.h b/internal.h
index 3970431fcb..86a0852cc1 100644
--- a/internal.h
+++ b/internal.h
@@ -782,6 +782,7 @@ VALUE rb_big_odd_p(VALUE);
VALUE rb_big_even_p(VALUE);
VALUE rb_integer_float_cmp(VALUE x, VALUE y);
VALUE rb_integer_float_eq(VALUE x, VALUE y);
+VALUE rb_cstr_parse_inum(const char *str, ssize_t len, char **endp, int base);
/* class.c */
VALUE rb_class_boot(VALUE);
diff --git a/test/ruby/test_integer.rb b/test/ruby/test_integer.rb
index 3f5fca75d7..a82562f887 100644
--- a/test/ruby/test_integer.rb
+++ b/test/ruby/test_integer.rb
@@ -98,6 +98,8 @@ class TestInteger < Test::Unit::TestCase
assert_raise(Encoding::CompatibilityError, bug6192) {Integer("0".encode("utf-32be"))}
assert_raise(Encoding::CompatibilityError, bug6192) {Integer("0".encode("utf-32le"))}
assert_raise(Encoding::CompatibilityError, bug6192) {Integer("0".encode("iso-2022-jp"))}
+
+ assert_raise_with_message(ArgumentError, /\u{1f4a1}/) {Integer("\u{1f4a1}")}
end
def test_int_p