aboutsummaryrefslogtreecommitdiffstats
path: root/prism/parser.h
diff options
context:
space:
mode:
Diffstat (limited to 'prism/parser.h')
-rw-r--r--prism/parser.h285
1 files changed, 143 insertions, 142 deletions
diff --git a/prism/parser.h b/prism/parser.h
index 89b0f2744b..0a5ba80819 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -1,13 +1,13 @@
-#ifndef YARP_PARSER_H
-#define YARP_PARSER_H
+#ifndef PRISM_PARSER_H
+#define PRISM_PARSER_H
-#include "yarp/ast.h"
-#include "yarp/defines.h"
-#include "yarp/enc/yp_encoding.h"
-#include "yarp/util/yp_constant_pool.h"
-#include "yarp/util/yp_list.h"
-#include "yarp/util/yp_newline_list.h"
-#include "yarp/util/yp_state_stack.h"
+#include "prism/ast.h"
+#include "prism/defines.h"
+#include "prism/enc/pm_encoding.h"
+#include "prism/util/pm_constant_pool.h"
+#include "prism/util/pm_list.h"
+#include "prism/util/pm_newline_list.h"
+#include "prism/util/pm_state_stack.h"
#include <stdbool.h>
@@ -15,88 +15,88 @@
// the lexer can track. This is used to determine which kind of token to return
// based on the context of the parser.
typedef enum {
- YP_LEX_STATE_BIT_BEG,
- YP_LEX_STATE_BIT_END,
- YP_LEX_STATE_BIT_ENDARG,
- YP_LEX_STATE_BIT_ENDFN,
- YP_LEX_STATE_BIT_ARG,
- YP_LEX_STATE_BIT_CMDARG,
- YP_LEX_STATE_BIT_MID,
- YP_LEX_STATE_BIT_FNAME,
- YP_LEX_STATE_BIT_DOT,
- YP_LEX_STATE_BIT_CLASS,
- YP_LEX_STATE_BIT_LABEL,
- YP_LEX_STATE_BIT_LABELED,
- YP_LEX_STATE_BIT_FITEM
-} yp_lex_state_bit_t;
+ PM_LEX_STATE_BIT_BEG,
+ PM_LEX_STATE_BIT_END,
+ PM_LEX_STATE_BIT_ENDARG,
+ PM_LEX_STATE_BIT_ENDFN,
+ PM_LEX_STATE_BIT_ARG,
+ PM_LEX_STATE_BIT_CMDARG,
+ PM_LEX_STATE_BIT_MID,
+ PM_LEX_STATE_BIT_FNAME,
+ PM_LEX_STATE_BIT_DOT,
+ PM_LEX_STATE_BIT_CLASS,
+ PM_LEX_STATE_BIT_LABEL,
+ PM_LEX_STATE_BIT_LABELED,
+ PM_LEX_STATE_BIT_FITEM
+} pm_lex_state_bit_t;
// This enum combines the various bits from the above enum into individual
// values that represent the various states of the lexer.
typedef enum {
- YP_LEX_STATE_NONE = 0,
- YP_LEX_STATE_BEG = (1 << YP_LEX_STATE_BIT_BEG),
- YP_LEX_STATE_END = (1 << YP_LEX_STATE_BIT_END),
- YP_LEX_STATE_ENDARG = (1 << YP_LEX_STATE_BIT_ENDARG),
- YP_LEX_STATE_ENDFN = (1 << YP_LEX_STATE_BIT_ENDFN),
- YP_LEX_STATE_ARG = (1 << YP_LEX_STATE_BIT_ARG),
- YP_LEX_STATE_CMDARG = (1 << YP_LEX_STATE_BIT_CMDARG),
- YP_LEX_STATE_MID = (1 << YP_LEX_STATE_BIT_MID),
- YP_LEX_STATE_FNAME = (1 << YP_LEX_STATE_BIT_FNAME),
- YP_LEX_STATE_DOT = (1 << YP_LEX_STATE_BIT_DOT),
- YP_LEX_STATE_CLASS = (1 << YP_LEX_STATE_BIT_CLASS),
- YP_LEX_STATE_LABEL = (1 << YP_LEX_STATE_BIT_LABEL),
- YP_LEX_STATE_LABELED = (1 << YP_LEX_STATE_BIT_LABELED),
- YP_LEX_STATE_FITEM = (1 << YP_LEX_STATE_BIT_FITEM),
- YP_LEX_STATE_BEG_ANY = YP_LEX_STATE_BEG | YP_LEX_STATE_MID | YP_LEX_STATE_CLASS,
- YP_LEX_STATE_ARG_ANY = YP_LEX_STATE_ARG | YP_LEX_STATE_CMDARG,
- YP_LEX_STATE_END_ANY = YP_LEX_STATE_END | YP_LEX_STATE_ENDARG | YP_LEX_STATE_ENDFN
-} yp_lex_state_t;
+ PM_LEX_STATE_NONE = 0,
+ PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
+ PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
+ PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
+ PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
+ PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
+ PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
+ PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
+ PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
+ PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
+ PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
+ PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
+ PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
+ PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
+ PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
+ PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
+ PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
+} pm_lex_state_t;
typedef enum {
- YP_HEREDOC_QUOTE_NONE,
- YP_HEREDOC_QUOTE_SINGLE = '\'',
- YP_HEREDOC_QUOTE_DOUBLE = '"',
- YP_HEREDOC_QUOTE_BACKTICK = '`',
-} yp_heredoc_quote_t;
+ PM_HEREDOC_QUOTE_NONE,
+ PM_HEREDOC_QUOTE_SINGLE = '\'',
+ PM_HEREDOC_QUOTE_DOUBLE = '"',
+ PM_HEREDOC_QUOTE_BACKTICK = '`',
+} pm_heredoc_quote_t;
typedef enum {
- YP_HEREDOC_INDENT_NONE,
- YP_HEREDOC_INDENT_DASH,
- YP_HEREDOC_INDENT_TILDE,
-} yp_heredoc_indent_t;
+ PM_HEREDOC_INDENT_NONE,
+ PM_HEREDOC_INDENT_DASH,
+ PM_HEREDOC_INDENT_TILDE,
+} pm_heredoc_indent_t;
// When lexing Ruby source, the lexer has a small amount of state to tell which
// kind of token it is currently lexing. For example, when we find the start of
// a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
-// that the lexer is now in the YP_LEX_STRING mode, and will return tokens that
+// that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
// are found as part of a string.
-typedef struct yp_lex_mode {
+typedef struct pm_lex_mode {
enum {
// This state is used when any given token is being lexed.
- YP_LEX_DEFAULT,
+ PM_LEX_DEFAULT,
// This state is used when we're lexing as normal but inside an embedded
// expression of a string.
- YP_LEX_EMBEXPR,
+ PM_LEX_EMBEXPR,
// This state is used when we're lexing a variable that is embedded
// directly inside of a string with the # shorthand.
- YP_LEX_EMBVAR,
+ PM_LEX_EMBVAR,
// This state is used when you are inside the content of a heredoc.
- YP_LEX_HEREDOC,
+ PM_LEX_HEREDOC,
// This state is used when we are lexing a list of tokens, as in a %w
// word list literal or a %i symbol list literal.
- YP_LEX_LIST,
+ PM_LEX_LIST,
// This state is used when a regular expression has been begun and we
// are looking for the terminator.
- YP_LEX_REGEXP,
+ PM_LEX_REGEXP,
// This state is used when we are lexing a string or a string-like
// token, as in string content with either quote or an xstring.
- YP_LEX_STRING
+ PM_LEX_STRING
} mode;
union {
@@ -166,8 +166,8 @@ typedef struct yp_lex_mode {
const uint8_t *ident_start;
size_t ident_length;
- yp_heredoc_quote_t quote;
- yp_heredoc_indent_t indent;
+ pm_heredoc_quote_t quote;
+ pm_heredoc_indent_t indent;
// This is the pointer to the character where lexing should resume
// once the heredoc has been completely processed.
@@ -176,83 +176,83 @@ typedef struct yp_lex_mode {
} as;
// The previous lex state so that it knows how to pop.
- struct yp_lex_mode *prev;
-} yp_lex_mode_t;
+ struct pm_lex_mode *prev;
+} pm_lex_mode_t;
// We pre-allocate a certain number of lex states in order to avoid having to
// call malloc too many times while parsing. You really shouldn't need more than
// this because you only really nest deeply when doing string interpolation.
-#define YP_LEX_STACK_SIZE 4
+#define PM_LEX_STACK_SIZE 4
// A forward declaration since our error handler struct accepts a parser for
// each of its function calls.
-typedef struct yp_parser yp_parser_t;
+typedef struct pm_parser pm_parser_t;
// While parsing, we keep track of a stack of contexts. This is helpful for
// error recovery so that we can pop back to a previous context when we hit a
// token that is understood by a parent context but not by the current context.
typedef enum {
- YP_CONTEXT_BEGIN, // a begin statement
- YP_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
- YP_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
- YP_CONTEXT_CASE_WHEN, // a case when statements
- YP_CONTEXT_CASE_IN, // a case in statements
- YP_CONTEXT_CLASS, // a class declaration
- YP_CONTEXT_DEF, // a method definition
- YP_CONTEXT_DEF_PARAMS, // a method definition's parameters
- YP_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
- YP_CONTEXT_ELSE, // an else clause
- YP_CONTEXT_ELSIF, // an elsif clause
- YP_CONTEXT_EMBEXPR, // an interpolated expression
- YP_CONTEXT_ENSURE, // an ensure statement
- YP_CONTEXT_FOR, // a for loop
- YP_CONTEXT_IF, // an if statement
- YP_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
- YP_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
- YP_CONTEXT_MAIN, // the top level context
- YP_CONTEXT_MODULE, // a module declaration
- YP_CONTEXT_PARENS, // a parenthesized expression
- YP_CONTEXT_POSTEXE, // an END block
- YP_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
- YP_CONTEXT_PREEXE, // a BEGIN block
- YP_CONTEXT_RESCUE_ELSE, // a rescue else statement
- YP_CONTEXT_RESCUE, // a rescue statement
- YP_CONTEXT_SCLASS, // a singleton class definition
- YP_CONTEXT_UNLESS, // an unless statement
- YP_CONTEXT_UNTIL, // an until statement
- YP_CONTEXT_WHILE, // a while statement
-} yp_context_t;
+ PM_CONTEXT_BEGIN, // a begin statement
+ PM_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
+ PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
+ PM_CONTEXT_CASE_WHEN, // a case when statements
+ PM_CONTEXT_CASE_IN, // a case in statements
+ PM_CONTEXT_CLASS, // a class declaration
+ PM_CONTEXT_DEF, // a method definition
+ PM_CONTEXT_DEF_PARAMS, // a method definition's parameters
+ PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
+ PM_CONTEXT_ELSE, // an else clause
+ PM_CONTEXT_ELSIF, // an elsif clause
+ PM_CONTEXT_EMBEXPR, // an interpolated expression
+ PM_CONTEXT_ENSURE, // an ensure statement
+ PM_CONTEXT_FOR, // a for loop
+ PM_CONTEXT_IF, // an if statement
+ PM_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
+ PM_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
+ PM_CONTEXT_MAIN, // the top level context
+ PM_CONTEXT_MODULE, // a module declaration
+ PM_CONTEXT_PARENS, // a parenthesized expression
+ PM_CONTEXT_POSTEXE, // an END block
+ PM_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
+ PM_CONTEXT_PREEXE, // a BEGIN block
+ PM_CONTEXT_RESCUE_ELSE, // a rescue else statement
+ PM_CONTEXT_RESCUE, // a rescue statement
+ PM_CONTEXT_SCLASS, // a singleton class definition
+ PM_CONTEXT_UNLESS, // an unless statement
+ PM_CONTEXT_UNTIL, // an until statement
+ PM_CONTEXT_WHILE, // a while statement
+} pm_context_t;
// This is a node in a linked list of contexts.
-typedef struct yp_context_node {
- yp_context_t context;
- struct yp_context_node *prev;
-} yp_context_node_t;
+typedef struct pm_context_node {
+ pm_context_t context;
+ struct pm_context_node *prev;
+} pm_context_node_t;
// This is the type of a comment that we've found while parsing.
typedef enum {
- YP_COMMENT_INLINE,
- YP_COMMENT_EMBDOC,
- YP_COMMENT___END__
-} yp_comment_type_t;
+ PM_COMMENT_INLINE,
+ PM_COMMENT_EMBDOC,
+ PM_COMMENT___END__
+} pm_comment_type_t;
// This is a node in the linked list of comments that we've found while parsing.
-typedef struct yp_comment {
- yp_list_node_t node;
+typedef struct pm_comment {
+ pm_list_node_t node;
const uint8_t *start;
const uint8_t *end;
- yp_comment_type_t type;
-} yp_comment_t;
+ pm_comment_type_t type;
+} pm_comment_t;
-// When the encoding that is being used to parse the source is changed by YARP,
+// When the encoding that is being used to parse the source is changed by prism,
// we provide the ability here to call out to a user-defined function.
-typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
+typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
-// When an encoding is encountered that isn't understood by YARP, we provide
+// When an encoding is encountered that isn't understood by prism, we provide
// the ability here to call out to a user-defined function to get an encoding
// struct. If the function returns something that isn't NULL, we set that to
// our encoding and use it to parse identifiers.
-typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
+typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
// When you are lexing through a file, the lexer needs all of the information
// that the parser additionally provides (for example, the local table). So if
@@ -268,17 +268,17 @@ typedef struct {
// This is the callback that is called when a token is lexed. It is passed
// the opaque data pointer, the parser, and the token that was lexed.
- void (*callback)(void *data, yp_parser_t *parser, yp_token_t *token);
-} yp_lex_callback_t;
+ void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
+} pm_lex_callback_t;
// This struct represents a node in a linked list of scopes. Some scopes can see
// into their parent scopes, while others cannot.
-typedef struct yp_scope {
+typedef struct pm_scope {
// The IDs of the locals in the given scope.
- yp_constant_id_list_t locals;
+ pm_constant_id_list_t locals;
// A pointer to the previous scope in the linked list.
- struct yp_scope *previous;
+ struct pm_scope *previous;
// A boolean indicating whether or not this scope can see into its parent.
// If closed is true, then the scope cannot see into its parent.
@@ -293,14 +293,14 @@ typedef struct yp_scope {
// This is necessary to determine if child blocks are allowed to use
// numbered parameters.
bool numbered_params;
-} yp_scope_t;
+} pm_scope_t;
// This struct represents the overall parser. It contains a reference to the
// source file, as well as pointers that indicate where in the source it's
// currently parsing. It also contains the most recent and current token that
// it's considering.
-struct yp_parser {
- yp_lex_state_t lex_state; // the current state of the lexer
+struct pm_parser {
+ pm_lex_state_t lex_state; // the current state of the lexer
int enclosure_nesting; // tracks the current nesting of (), [], and {}
// Used to temporarily track the nesting of enclosures to determine if a {
@@ -313,22 +313,22 @@ struct yp_parser {
// the stack used to determine if a do keyword belongs to the predicate of a
// while, until, or for loop
- yp_state_stack_t do_loop_stack;
+ pm_state_stack_t do_loop_stack;
// the stack used to determine if a do keyword belongs to the beginning of a
// block
- yp_state_stack_t accepts_block_stack;
+ pm_state_stack_t accepts_block_stack;
struct {
- yp_lex_mode_t *current; // the current mode of the lexer
- yp_lex_mode_t stack[YP_LEX_STACK_SIZE]; // the stack of lexer modes
+ pm_lex_mode_t *current; // the current mode of the lexer
+ pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
size_t index; // the current index into the lexer mode stack
} lex_modes;
const uint8_t *start; // the pointer to the start of the source
const uint8_t *end; // the pointer to the end of the source
- yp_token_t previous; // the previous token we were considering
- yp_token_t current; // the current token we're considering
+ pm_token_t previous; // the previous token we were considering
+ pm_token_t current; // the current token we're considering
// This is a special field set on the parser when we need the parser to jump
// to a specific location when lexing the next token, as opposed to just
@@ -341,26 +341,27 @@ struct yp_parser {
// found on a line then this is NULL.
const uint8_t *heredoc_end;
- yp_list_t comment_list; // the list of comments that have been found while parsing
- yp_list_t warning_list; // the list of warnings that have been found while parsing
- yp_list_t error_list; // the list of errors that have been found while parsing
- yp_scope_t *current_scope; // the current local scope
+ pm_list_t comment_list; // the list of comments that have been found while parsing
+ pm_list_t warning_list; // the list of warnings that have been found while parsing
+ pm_list_t error_list; // the list of errors that have been found while parsing
+ pm_scope_t *current_scope; // the current local scope
- yp_context_node_t *current_context; // the current parsing context
+ pm_context_node_t *current_context; // the current parsing context
// The encoding functions for the current file is attached to the parser as
// it's parsing so that it can change with a magic comment.
- yp_encoding_t encoding;
+ pm_encoding_t encoding;
// When the encoding that is being used to parse the source is changed by
- // YARP, we provide the ability here to call out to a user-defined function.
- yp_encoding_changed_callback_t encoding_changed_callback;
+ // prism, we provide the ability here to call out to a user-defined
+ // function.
+ pm_encoding_changed_callback_t encoding_changed_callback;
- // When an encoding is encountered that isn't understood by YARP, we provide
- // the ability here to call out to a user-defined function to get an
+ // When an encoding is encountered that isn't understood by prism, we
+ // provide the ability here to call out to a user-defined function to get an
// encoding struct. If the function returns something that isn't NULL, we
// set that to our encoding and use it to parse identifiers.
- yp_encoding_decode_callback_t encoding_decode_callback;
+ pm_encoding_decode_callback_t encoding_decode_callback;
// This pointer indicates where a comment must start if it is to be
// considered an encoding comment.
@@ -368,24 +369,24 @@ struct yp_parser {
// This is an optional callback that can be attached to the parser that will
// be called whenever a new token is lexed by the parser.
- yp_lex_callback_t *lex_callback;
+ pm_lex_callback_t *lex_callback;
// This is the path of the file being parsed
// We use the filepath when constructing SourceFileNodes
- yp_string_t filepath_string;
+ pm_string_t filepath_string;
// This constant pool keeps all of the constants defined throughout the file
// so that we can reference them later.
- yp_constant_pool_t constant_pool;
+ pm_constant_pool_t constant_pool;
// This is the list of newline offsets in the source file.
- yp_newline_list_t newline_list;
+ pm_newline_list_t newline_list;
// We want to add a flag to integer nodes that indicates their base. We only
// want to parse these once, but we don't have space on the token itself to
// communicate this information. So we store it here and pass it through
// when we find tokens that we need it for.
- yp_node_flags_t integer_base;
+ pm_node_flags_t integer_base;
// Whether or not we're at the beginning of a command
bool command_start;
@@ -414,4 +415,4 @@ struct yp_parser {
bool frozen_string_literal;
};
-#endif // YARP_PARSER_H
+#endif // PRISM_PARSER_H