aboutsummaryrefslogtreecommitdiffstats
path: root/yarp
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-09-15 10:40:18 -0400
committergit <svn-admin@ruby-lang.org>2023-09-15 22:38:58 +0000
commit18780c22f657be2a0251fbf174fb46fd8523fae7 (patch)
tree2c695c747141e517d0ef53bf8c3286ddbec5cd20 /yarp
parentb848700ccfffc0d4c424daa10ca29e19106e8e3f (diff)
downloadruby-18780c22f657be2a0251fbf174fb46fd8523fae7.tar.gz
[ruby/yarp] Properly handle invalid underscores in number literals
https://github.com/ruby/yarp/commit/35da3d1a4c
Diffstat (limited to 'yarp')
-rw-r--r--yarp/diagnostic.c1
-rw-r--r--yarp/diagnostic.h1
-rw-r--r--yarp/util/yp_char.c66
-rw-r--r--yarp/util/yp_char.h26
-rw-r--r--yarp/yarp.c61
5 files changed, 127 insertions, 28 deletions
diff --git a/yarp/diagnostic.c b/yarp/diagnostic.c
index b6436f135c..9bbc30edee 100644
--- a/yarp/diagnostic.c
+++ b/yarp/diagnostic.c
@@ -164,6 +164,7 @@ static const char* const diagnostic_messages[YP_DIAGNOSTIC_ID_LEN] = {
[YP_ERR_INVALID_NUMBER_DECIMAL] = "Invalid decimal number",
[YP_ERR_INVALID_NUMBER_HEXADECIMAL] = "Invalid hexadecimal number",
[YP_ERR_INVALID_NUMBER_OCTAL] = "Invalid octal number",
+ [YP_ERR_INVALID_NUMBER_UNDERSCORE] = "Invalid underscore placement in number",
[YP_ERR_INVALID_PERCENT] = "Invalid `%` token", // TODO WHAT?
[YP_ERR_INVALID_TOKEN] = "Invalid token", // TODO WHAT?
[YP_ERR_INVALID_VARIABLE_GLOBAL] = "Invalid global variable",
diff --git a/yarp/diagnostic.h b/yarp/diagnostic.h
index 9aa21b0b0c..a4b030adfd 100644
--- a/yarp/diagnostic.h
+++ b/yarp/diagnostic.h
@@ -130,6 +130,7 @@ typedef enum {
YP_ERR_INVALID_NUMBER_DECIMAL,
YP_ERR_INVALID_NUMBER_HEXADECIMAL,
YP_ERR_INVALID_NUMBER_OCTAL,
+ YP_ERR_INVALID_NUMBER_UNDERSCORE,
YP_ERR_INVALID_PERCENT,
YP_ERR_INVALID_TOKEN,
YP_ERR_INVALID_VARIABLE_GLOBAL,
diff --git a/yarp/util/yp_char.c b/yarp/util/yp_char.c
index ae0ffea6b8..42c3896626 100644
--- a/yarp/util/yp_char.c
+++ b/yarp/util/yp_char.c
@@ -123,6 +123,9 @@ yp_char_is_inline_whitespace(const uint8_t b) {
return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
}
+// Scan through the string and return the number of characters at the start of
+// the string that match the given kind. Disallows searching past the given
+// maximum number of characters.
static inline size_t
yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
if (length <= 0) return 0;
@@ -134,20 +137,57 @@ yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
return size;
}
+// Scan through the string and return the number of characters at the start of
+// the string that match the given kind. Disallows searching past the given
+// maximum number of characters.
+//
+// Additionally, report the location of the last invalid underscore character
+// found in the string through the out invalid parameter.
+static inline size_t
+yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
+ if (length <= 0) return 0;
+
+ size_t size = 0;
+ size_t maximum = (size_t) length;
+
+ bool underscore = false;
+ while (size < maximum && (yp_number_table[string[size]] & kind)) {
+ if (string[size] == '_') {
+ if (underscore) *invalid = string + size;
+ underscore = true;
+ } else {
+ underscore = false;
+ }
+
+ size++;
+ }
+
+ if (string[size - 1] == '_') *invalid = string + size - 1;
+ return size;
+}
+
// Returns the number of characters at the start of the string that are binary
// digits or underscores. Disallows searching past the given maximum number of
// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
size_t
-yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length) {
- return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
+yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER);
}
// Returns the number of characters at the start of the string that are octal
-// digits or underscores. Disallows searching past the given maximum number of
+// digits or underscores. Disallows searching past the given maximum number of
// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
size_t
-yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length) {
- return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
+yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER);
}
// Returns the number of characters at the start of the string that are decimal
@@ -160,9 +200,13 @@ yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
// Returns the number of characters at the start of the string that are decimal
// digits or underscores. Disallows searching past the given maximum number of
// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
size_t
-yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length) {
- return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
+yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER);
}
// Returns the number of characters at the start of the string that are
@@ -176,9 +220,13 @@ yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
// Returns the number of characters at the start of the string that are
// hexadecimal digits or underscores. Disallows searching past the given maximum
// number of characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
size_t
-yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length) {
- return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
+yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
}
static inline bool
diff --git a/yarp/util/yp_char.h b/yarp/util/yp_char.h
index e155b69d64..f08d6a8c9d 100644
--- a/yarp/util/yp_char.h
+++ b/yarp/util/yp_char.h
@@ -31,19 +31,31 @@ size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are octal
-// digits or underscores. Disallows searching past the given maximum number of
+// digits or underscores. Disallows searching past the given maximum number of
// characters.
-size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length);
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are decimal
// digits or underscores. Disallows searching past the given maximum number of
// characters.
-size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length);
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are
// hexadecimal digits or underscores. Disallows searching past the given maximum
// number of characters.
-size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length);
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns the number of characters at the start of the string that are regexp
// options. Disallows searching past the given maximum number of characters.
@@ -52,7 +64,11 @@ size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
// Returns the number of characters at the start of the string that are binary
// digits or underscores. Disallows searching past the given maximum number of
// characters.
-size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length);
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
// Returns true if the given character is a whitespace character.
bool yp_char_is_whitespace(const uint8_t b);
diff --git a/yarp/yarp.c b/yarp/yarp.c
index bf3b3b79cb..b343566ee0 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -5330,6 +5330,45 @@ context_def_p(yp_parser_t *parser) {
/* Specific token lexers */
/******************************************************************************/
+static void
+yp_strspn_number_validate(yp_parser_t *parser, const uint8_t *invalid) {
+ if (invalid != NULL) {
+ yp_diagnostic_list_append(&parser->error_list, invalid, invalid + 1, YP_ERR_INVALID_NUMBER_UNDERSCORE);
+ }
+}
+
+static size_t
+yp_strspn_binary_number_validate(yp_parser_t *parser, const uint8_t *string) {
+ const uint8_t *invalid = NULL;
+ size_t length = yp_strspn_binary_number(string, parser->end - string, &invalid);
+ yp_strspn_number_validate(parser, invalid);
+ return length;
+}
+
+static size_t
+yp_strspn_octal_number_validate(yp_parser_t *parser, const uint8_t *string) {
+ const uint8_t *invalid = NULL;
+ size_t length = yp_strspn_octal_number(string, parser->end - string, &invalid);
+ yp_strspn_number_validate(parser, invalid);
+ return length;
+}
+
+static size_t
+yp_strspn_decimal_number_validate(yp_parser_t *parser, const uint8_t *string) {
+ const uint8_t *invalid = NULL;
+ size_t length = yp_strspn_decimal_number(string, parser->end - string, &invalid);
+ yp_strspn_number_validate(parser, invalid);
+ return length;
+}
+
+static size_t
+yp_strspn_hexadecimal_number_validate(yp_parser_t *parser, const uint8_t *string) {
+ const uint8_t *invalid = NULL;
+ size_t length = yp_strspn_hexadecimal_number(string, parser->end - string, &invalid);
+ yp_strspn_number_validate(parser, invalid);
+ return length;
+}
+
static yp_token_type_t
lex_optional_float_suffix(yp_parser_t *parser) {
yp_token_type_t type = YP_TOKEN_INTEGER;
@@ -5339,7 +5378,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
if (peek(parser) == '.') {
if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
parser->current.end += 2;
- parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
type = YP_TOKEN_FLOAT;
} else {
// If we had a . and then something else, then it's not a float suffix on
@@ -5355,7 +5394,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
if (yp_char_is_decimal_digit(*parser->current.end)) {
parser->current.end++;
- parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
type = YP_TOKEN_FLOAT;
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_FLOAT_EXPONENT);
@@ -5377,7 +5416,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'D':
parser->current.end++;
if (yp_char_is_decimal_digit(peek(parser))) {
- parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_DECIMAL);
}
@@ -5389,7 +5428,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'B':
parser->current.end++;
if (yp_char_is_binary_digit(peek(parser))) {
- parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_binary_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_BINARY);
}
@@ -5402,7 +5441,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'O':
parser->current.end++;
if (yp_char_is_octal_digit(peek(parser))) {
- parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_OCTAL);
}
@@ -5420,7 +5459,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case '5':
case '6':
case '7':
- parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end);
parser->integer_base = YP_INTEGER_BASE_FLAGS_OCTAL;
break;
@@ -5429,7 +5468,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
case 'X':
parser->current.end++;
if (yp_char_is_hexadecimal_digit(peek(parser))) {
- parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_hexadecimal_number_validate(parser, parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_HEXADECIMAL);
}
@@ -5453,18 +5492,12 @@ lex_numeric_prefix(yp_parser_t *parser) {
} else {
// If it didn't start with a 0, then we'll lex as far as we can into a
// decimal number.
- parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
+ parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end);
// Afterward, we'll lex as far as we can into an optional float suffix.
type = lex_optional_float_suffix(parser);
}
- // If the last character that we consumed was an underscore, then this is
- // actually an invalid integer value, and we should return an invalid token.
- if (peek_offset(parser, -1) == '_') {
- yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_NUMBER_LITERAL_UNDERSCORE);
- }
-
return type;
}