aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--prism/prism.c31
1 files changed, 24 insertions, 7 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 96ed3989e2..1751857e1e 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -5908,6 +5908,19 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
}
/**
+ * Similar to char_is_identifier but this function assumes that the encoding
+ * has not been changed.
+ */
+static inline size_t
+char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
+ if (*b < 0x80) {
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
+ } else {
+ return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
+ }
+}
+
+/**
* Like the above, this function is also used extremely frequently to lex all of
* the identifiers in a source file once the first character has been found. So
* it's important that it be as fast as possible.
@@ -5925,11 +5938,8 @@ char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
} else {
return 0;
}
- } else if (*b < 0x80) {
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
- } else {
- return (size_t) (pm_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
}
+ return char_is_identifier_utf8(b, parser->end);
}
// Here we're defining a perfect hash for the characters that are allowed in
@@ -7003,9 +7013,16 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
const uint8_t *end = parser->end;
const uint8_t *current_start = parser->current.start;
const uint8_t *current_end = parser->current.end;
+ bool encoding_changed = parser->encoding_changed;
- while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
- current_end += width;
+ if (encoding_changed) {
+ while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
+ current_end += width;
+ }
+ } else {
+ while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
+ current_end += width;
+ }
}
parser->current.end = current_end;
@@ -7123,7 +7140,7 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
}
}
- if (parser->encoding_changed) {
+ if (encoding_changed) {
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
}
return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;