From 9a28a29b870b5f45d370bc8f16c431b435f0bbb3 Mon Sep 17 00:00:00 2001 From: nobu Date: Mon, 7 Dec 2015 14:39:52 +0000 Subject: parse.y: indented hereoc * parse.y: add heredoc <<~ syntax. [Feature #9098] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@52916 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- parse.y | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 182 insertions(+), 5 deletions(-) (limited to 'parse.y') diff --git a/parse.y b/parse.y index bc6de5a3ec..616418327a 100644 --- a/parse.y +++ b/parse.y @@ -257,6 +257,8 @@ struct parser_params { int toksiz; int tokline; int heredoc_end; + int heredoc_indent; + int heredoc_line_indent; char *tokenbuf; NODE *deferred_nodes; struct local_vars *lvtbl; @@ -347,6 +349,8 @@ static int parser_yyerror(struct parser_params*, const char*); #define lex_p (parser->lex.pcur) #define lex_pend (parser->lex.pend) #define heredoc_end (parser->heredoc_end) +#define heredoc_indent (parser->heredoc_indent) +#define heredoc_line_indent (parser->heredoc_line_indent) #define command_start (parser->command_start) #define deferred_nodes (parser->deferred_nodes) #define lex_gets_ptr (parser->lex.gets_ptr) @@ -487,6 +491,9 @@ static int reg_fragment_check_gen(struct parser_params*, VALUE, int); static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match)) +static void parser_heredoc_dedent(struct parser_params*,NODE*); +# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str)) + #define get_id(id) (id) #define get_value(val) (val) #else @@ -670,6 +677,9 @@ new_args_tail_gen(struct parser_params *parser, VALUE k, VALUE kr, VALUE b) #define new_defined(expr) dispatch1(defined, (expr)) +static void parser_heredoc_dedent(struct parser_params*,VALUE); +# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str)) + #define FIXME 0 #endif /* RIPPER */ @@ -3887,6 +3897,7 @@ strings : string else { node = evstr2dstr(node); } + heredoc_indent = 0; $$ = node; /*% $$ = $1; @@ -3908,6 +3919,7 @@ string : tCHAR string1 : tSTRING_BEG string_contents tSTRING_END { + heredoc_dedent($2); /*%%%*/ $$ = $2; /*% @@ -3920,6 +3932,10 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END { /*%%%*/ NODE *node = $2; + /*% + %*/ + heredoc_dedent($2); + /*%%%*/ if (!node) { node = NEW_XSTR(STR_NEW0()); } @@ -4319,6 +4335,10 @@ string_content : tSTRING_CONTENT $$ = brace_nest; brace_nest = 0; } + { + $$ = heredoc_indent; + heredoc_indent = 0; + } compstmt tSTRING_DEND { cond_stack = $1; @@ -4326,11 +4346,13 @@ string_content : tSTRING_CONTENT lex_strterm = $3; lex_state = $4; brace_nest = $5; + heredoc_indent = $6; + heredoc_line_indent = -1; /*%%%*/ - if ($6) $6->flags &= ~NODE_FL_NEWLINE; - $$ = new_evstr($6); + if ($7) $7->flags &= ~NODE_FL_NEWLINE; + $$ = new_evstr($7); /*% - $$ = dispatch1(string_embexpr, $6); + $$ = dispatch1(string_embexpr, $7); %*/ } ; @@ -6204,6 +6226,27 @@ parser_tokadd_string(struct parser_params *parser, } while (0) while ((c = nextc()) != -1) { + if (heredoc_indent > 0) { + if (heredoc_line_indent == -1) { + if (c == '\n') heredoc_line_indent = 0; + } + else { + if (c == ' ') { + heredoc_line_indent++; + } + else if (c == '\t') { + int w = (heredoc_line_indent / TAB_WIDTH) + 1; + heredoc_line_indent = w * TAB_WIDTH; + } + else if (c != '\n') { + if (heredoc_indent > heredoc_line_indent) { + heredoc_indent = heredoc_line_indent; + } + heredoc_line_indent = -1; + } + } + } + if (paren && c == paren) { ++*nest; } @@ -6465,6 +6508,12 @@ parser_heredoc_identifier(struct parser_params *parser) c = nextc(); func = STR_FUNC_INDENT; } + else if (c == '~') { + c = nextc(); + func = STR_FUNC_INDENT; + heredoc_indent = INT_MAX; + heredoc_line_indent = 0; + } switch (c) { case '\'': func |= str_squote; goto quoted; @@ -6489,7 +6538,7 @@ parser_heredoc_identifier(struct parser_params *parser) if (!parser_is_identchar()) { pushback(c); if (func & STR_FUNC_INDENT) { - pushback('-'); + pushback(heredoc_indent > 0 ? '~' : '-'); } return 0; } @@ -6534,6 +6583,114 @@ parser_heredoc_restore(struct parser_params *parser, NODE *here) ripper_flush(parser); } +static int +dedent_pos(const char *str, long len, int width) +{ + int i, col = 0; + + for (i = 0; i < len && col < width; i++) { + if (str[i] == ' ') { + col++; + } + else if (str[i] == '\t') { + int n = TAB_WIDTH * (col / TAB_WIDTH + 1); + if (n > width) break; + col = n; + } + else { + break; + } + } + return i; +} + +#ifndef RIPPER +static VALUE +parser_heredoc_dedent_string(VALUE input, int width, int first) +{ + long len; + int col; + char *str, *p, *out_p, *end, *t; + + RSTRING_GETMEM(input, str, len); + end = &str[len]; + + p = str; + if (!first) { + p = memchr(p, '\n', end - p); + if (!p) return input; + p++; + } + out_p = p; + while (p < end) { + col = dedent_pos(p, end - p, width); + p += col; + if (!(t = memchr(p, '\n', end - p))) + t = end; + else + ++t; + if (p > out_p) memmove(out_p, p, t - p); + out_p += t - p; + p = t; + } + rb_str_set_len(input, out_p - str); + + return input; +} + +static void +parser_heredoc_dedent(struct parser_params *parser, NODE *root) +{ + NODE *node, *str_node; + int first = TRUE; + int indent = heredoc_indent; + + if (indent <= 0) return; + + node = str_node = root; + + while (str_node) { + VALUE lit = str_node->nd_lit; + if (NIL_P(parser_heredoc_dedent_string(lit, indent, first))) + compile_error(PARSER_ARG "dedent failure: %d: %"PRIsVALUE, indent, lit); + first = FALSE; + + str_node = 0; + while ((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) { + if ((str_node = node->nd_head) != 0) { + enum node_type type = nd_type(str_node); + if (type == NODE_STR || type == NODE_DSTR) break; + } + } + } +} +#else /* RIPPER */ +static void +parser_heredoc_dedent(struct parser_params *parser, VALUE array) +{ + if (heredoc_indent <= 0) return; + + dispatch2(heredoc_dedent, array, INT2NUM(heredoc_indent)); +} + +static VALUE +parser_dedent_string(VALUE self, VALUE input, VALUE width) +{ + char *str; + long len; + int wid, col; + + StringValue(input); + wid = NUM2UINT(width); + rb_str_modify(input); + RSTRING_GETMEM(input, str, len); + col = dedent_pos(str, len, wid); + MEMMOVE(str, str + col, char, len - col); + rb_str_set_len(input, len - col); + return INT2NUM(col); +} +#endif + static int parser_whole_match_p(struct parser_params *parser, const char *eos, long len, int indent) @@ -6685,7 +6842,15 @@ parser_here_document(struct parser_params *parser, NODE *here) } if (!(func & STR_FUNC_EXPAND)) { + int end = 0; do { +#ifdef RIPPER + if (end && heredoc_indent > 0) { + set_yylval_str(str); + flush_string_content(enc); + return tSTRING_CONTENT; + } +#endif p = RSTRING_PTR(lex_lastline); pend = lex_pend; if (pend > p) { @@ -6712,7 +6877,7 @@ parser_here_document(struct parser_params *parser, NODE *here) } goto error; } - } while (!whole_match_p(eos, len, indent)); + } while (!(end = whole_match_p(eos, len, indent))); } else { /* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/ @@ -6730,11 +6895,20 @@ parser_here_document(struct parser_params *parser, NODE *here) goto restore; } if (c != '\n') { +#ifdef RIPPER + flush: +#endif set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); flush_string_content(enc); return tSTRING_CONTENT; } tokadd(nextc()); +#ifdef RIPPER + if (c == '\n' && heredoc_indent > 0) { + lex_goto_eol(parser); + goto flush; + } +#endif /* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/ if ((c = nextc()) == -1) goto error; } while (!whole_match_p(eos, len, indent)); @@ -11294,6 +11468,9 @@ InitVM_ripper(void) rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1); #endif + rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2); + rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2); + ripper_init_eventids1_table(Ripper); ripper_init_eventids2_table(Ripper); -- cgit v1.2.3