Ruby 4.0.5p0 (2026-05-20 revision 64336ffd0ee9e1f4c05891695a3d7b49cb709721)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Helpful AST-related macros */
23/******************************************************************************/
24
25#define FL PM_NODE_FLAGS
26#define UP PM_NODE_UPCAST
27
28#define PM_TOKEN_START(token_) ((token_)->start)
29#define PM_TOKEN_END(token_) ((token_)->end)
30
31#define PM_NODE_START(node_) (UP(node_)->location.start)
32#define PM_NODE_END(node_) (UP(node_)->location.end)
33
34#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start })
35#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) })
36#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) })
37#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token))
38
39/******************************************************************************/
40/* Lex mode manipulations */
41/******************************************************************************/
42
47static inline uint8_t
48lex_mode_incrementor(const uint8_t start) {
49 switch (start) {
50 case '(':
51 case '[':
52 case '{':
53 case '<':
54 return start;
55 default:
56 return '\0';
57 }
58}
59
64static inline uint8_t
65lex_mode_terminator(const uint8_t start) {
66 switch (start) {
67 case '(':
68 return ')';
69 case '[':
70 return ']';
71 case '{':
72 return '}';
73 case '<':
74 return '>';
75 default:
76 return start;
77 }
78}
79
85static bool
86lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
87 lex_mode.prev = parser->lex_modes.current;
88 parser->lex_modes.index++;
89
90 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
92 if (parser->lex_modes.current == NULL) return false;
93
94 *parser->lex_modes.current = lex_mode;
95 } else {
96 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
97 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
98 }
99
100 return true;
101}
102
106static inline bool
107lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
108 uint8_t incrementor = lex_mode_incrementor(delimiter);
109 uint8_t terminator = lex_mode_terminator(delimiter);
110
111 pm_lex_mode_t lex_mode = {
112 .mode = PM_LEX_LIST,
113 .as.list = {
114 .nesting = 0,
115 .interpolation = interpolation,
116 .incrementor = incrementor,
117 .terminator = terminator
118 }
119 };
120
121 // These are the places where we need to split up the content of the list.
122 // We'll use strpbrk to find the first of these characters.
123 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
124 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
125 size_t index = 7;
126
127 // Now we'll add the terminator to the list of breakpoints. If the
128 // terminator is not already a NULL byte, add it to the list.
129 if (terminator != '\0') {
130 breakpoints[index++] = terminator;
131 }
132
133 // If interpolation is allowed, then we're going to check for the #
134 // character. Otherwise we'll only look for escapes and the terminator.
135 if (interpolation) {
136 breakpoints[index++] = '#';
137 }
138
139 // If there is an incrementor, then we'll check for that as well.
140 if (incrementor != '\0') {
141 breakpoints[index++] = incrementor;
142 }
143
144 parser->explicit_encoding = NULL;
145 return lex_mode_push(parser, lex_mode);
146}
147
153static inline bool
154lex_mode_push_list_eof(pm_parser_t *parser) {
155 return lex_mode_push_list(parser, false, '\0');
156}
157
161static inline bool
162lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
163 pm_lex_mode_t lex_mode = {
164 .mode = PM_LEX_REGEXP,
165 .as.regexp = {
166 .nesting = 0,
167 .incrementor = incrementor,
168 .terminator = terminator
169 }
170 };
171
172 // These are the places where we need to split up the content of the
173 // regular expression. We'll use strpbrk to find the first of these
174 // characters.
175 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
176 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
177 size_t index = 4;
178
179 // First we'll add the terminator.
180 if (terminator != '\0') {
181 breakpoints[index++] = terminator;
182 }
183
184 // Next, if there is an incrementor, then we'll check for that as well.
185 if (incrementor != '\0') {
186 breakpoints[index++] = incrementor;
187 }
188
189 parser->explicit_encoding = NULL;
190 return lex_mode_push(parser, lex_mode);
191}
192
196static inline bool
197lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
198 pm_lex_mode_t lex_mode = {
199 .mode = PM_LEX_STRING,
200 .as.string = {
201 .nesting = 0,
202 .interpolation = interpolation,
203 .label_allowed = label_allowed,
204 .incrementor = incrementor,
205 .terminator = terminator
206 }
207 };
208
209 // These are the places where we need to split up the content of the
210 // string. We'll use strpbrk to find the first of these characters.
211 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
212 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
213 size_t index = 3;
214
215 // Now add in the terminator. If the terminator is not already a NULL byte,
216 // then we'll add it.
217 if (terminator != '\0') {
218 breakpoints[index++] = terminator;
219 }
220
221 // If interpolation is allowed, then we're going to check for the #
222 // character. Otherwise we'll only look for escapes and the terminator.
223 if (interpolation) {
224 breakpoints[index++] = '#';
225 }
226
227 // If we have an incrementor, then we'll add that in as a breakpoint as
228 // well.
229 if (incrementor != '\0') {
230 breakpoints[index++] = incrementor;
231 }
232
233 parser->explicit_encoding = NULL;
234 return lex_mode_push(parser, lex_mode);
235}
236
242static inline bool
243lex_mode_push_string_eof(pm_parser_t *parser) {
244 return lex_mode_push_string(parser, false, false, '\0', '\0');
245}
246
252static void
253lex_mode_pop(pm_parser_t *parser) {
254 if (parser->lex_modes.index == 0) {
255 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
256 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
257 parser->lex_modes.index--;
258 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
259 } else {
260 parser->lex_modes.index--;
261 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
262 xfree(parser->lex_modes.current);
263 parser->lex_modes.current = prev;
264 }
265}
266
270static inline bool
271lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
272 return parser->lex_state & state;
273}
274
275typedef enum {
276 PM_IGNORED_NEWLINE_NONE = 0,
277 PM_IGNORED_NEWLINE_ALL,
278 PM_IGNORED_NEWLINE_PATTERN
279} pm_ignored_newline_type_t;
280
281static inline pm_ignored_newline_type_t
282lex_state_ignored_p(pm_parser_t *parser) {
283 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
284
285 if (ignored) {
286 return PM_IGNORED_NEWLINE_ALL;
287 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
288 return PM_IGNORED_NEWLINE_PATTERN;
289 } else {
290 return PM_IGNORED_NEWLINE_NONE;
291 }
292}
293
294static inline bool
295lex_state_beg_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
297}
298
299static inline bool
300lex_state_arg_p(pm_parser_t *parser) {
301 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
302}
303
304static inline bool
305lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
306 if (parser->current.end >= parser->end) {
307 return false;
308 }
309 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
310}
311
312static inline bool
313lex_state_end_p(pm_parser_t *parser) {
314 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
315}
316
320static inline bool
321lex_state_operator_p(pm_parser_t *parser) {
322 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
323}
324
329static inline void
330lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
331 parser->lex_state = state;
332}
333
334#ifndef PM_DEBUG_LOGGING
339#define PM_DEBUG_LOGGING 0
340#endif
341
342#if PM_DEBUG_LOGGING
343PRISM_ATTRIBUTE_UNUSED static void
344debug_state(pm_parser_t *parser) {
345 fprintf(stderr, "STATE: ");
346 bool first = true;
347
348 if (parser->lex_state == PM_LEX_STATE_NONE) {
349 fprintf(stderr, "NONE\n");
350 return;
351 }
352
353#define CHECK_STATE(state) \
354 if (parser->lex_state & state) { \
355 if (!first) fprintf(stderr, "|"); \
356 fprintf(stderr, "%s", #state); \
357 first = false; \
358 }
359
360 CHECK_STATE(PM_LEX_STATE_BEG)
361 CHECK_STATE(PM_LEX_STATE_END)
362 CHECK_STATE(PM_LEX_STATE_ENDARG)
363 CHECK_STATE(PM_LEX_STATE_ENDFN)
364 CHECK_STATE(PM_LEX_STATE_ARG)
365 CHECK_STATE(PM_LEX_STATE_CMDARG)
366 CHECK_STATE(PM_LEX_STATE_MID)
367 CHECK_STATE(PM_LEX_STATE_FNAME)
368 CHECK_STATE(PM_LEX_STATE_DOT)
369 CHECK_STATE(PM_LEX_STATE_CLASS)
370 CHECK_STATE(PM_LEX_STATE_LABEL)
371 CHECK_STATE(PM_LEX_STATE_LABELED)
372 CHECK_STATE(PM_LEX_STATE_FITEM)
373
374#undef CHECK_STATE
375
376 fprintf(stderr, "\n");
377}
378
379static void
380debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
381 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
382 debug_state(parser);
383 lex_state_set(parser, state);
384 fprintf(stderr, "Now: ");
385 debug_state(parser);
386 fprintf(stderr, "\n");
387}
388
389#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
390#endif
391
392/******************************************************************************/
393/* Command-line macro helpers */
394/******************************************************************************/
395
397#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
398
400#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
401
403#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
404
406#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
407
409#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
410
412#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
413
415#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
416
417/******************************************************************************/
418/* Diagnostic-related functions */
419/******************************************************************************/
420
424static inline void
425pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
426 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
427}
428
432#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
433 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
434
439static inline void
440pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
441 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
442}
443
448#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
449 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
450
455static inline void
456pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
457 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
458}
459
464#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
465 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
466
471#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
472 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
473
478static inline void
479pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
480 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
481}
482
487static inline void
488pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
489 pm_parser_err(parser, token->start, token->end, diag_id);
490}
491
496#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
497 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
498
503#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
504 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
505
509static inline void
510pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
511 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
512}
513
518static inline void
519pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
520 pm_parser_warn(parser, token->start, token->end, diag_id);
521}
522
527static inline void
528pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
529 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
530}
531
535#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
536 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
537
542#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
543 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
544
549#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
550 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
551
556#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
557 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
558
564static void
565pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
566 PM_PARSER_ERR_FORMAT(
567 parser,
568 ident_start,
569 ident_start + ident_length,
570 PM_ERR_HEREDOC_TERM,
571 (int) ident_length,
572 (const char *) ident_start
573 );
574}
575
576/******************************************************************************/
577/* Scope-related functions */
578/******************************************************************************/
579
583static bool
584pm_parser_scope_push(pm_parser_t *parser, bool closed) {
585 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
586 if (scope == NULL) return false;
587
588 *scope = (pm_scope_t) {
589 .previous = parser->current_scope,
590 .locals = { 0 },
591 .parameters = PM_SCOPE_PARAMETERS_NONE,
592 .implicit_parameters = { 0 },
593 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
594 .closed = closed
595 };
596
597 parser->current_scope = scope;
598 return true;
599}
600
605static bool
606pm_parser_scope_toplevel_p(pm_parser_t *parser) {
607 pm_scope_t *scope = parser->current_scope;
608
609 do {
610 if (scope->previous == NULL) return true;
611 if (scope->closed) return false;
612 } while ((scope = scope->previous) != NULL);
613
614 assert(false && "unreachable");
615 return true;
616}
617
621static pm_scope_t *
622pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
623 pm_scope_t *scope = parser->current_scope;
624
625 while (depth-- > 0) {
626 assert(scope != NULL);
627 scope = scope->previous;
628 }
629
630 return scope;
631}
632
633typedef enum {
634 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
635 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
636 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
637} pm_scope_forwarding_param_check_result_t;
638
639static pm_scope_forwarding_param_check_result_t
640pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
641 pm_scope_t *scope = parser->current_scope;
642 bool conflict = false;
643
644 while (scope != NULL) {
645 if (scope->parameters & mask) {
646 if (scope->closed) {
647 if (conflict) {
648 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
649 } else {
650 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
651 }
652 }
653
654 conflict = true;
655 }
656
657 if (scope->closed) break;
658 scope = scope->previous;
659 }
660
661 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
662}
663
664static void
665pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
666 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
668 // Pass.
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
672 break;
673 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
674 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
675 break;
676 }
677}
678
679static void
680pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
681 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
683 // Pass.
684 break;
685 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
686 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
687 break;
688 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
689 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
690 break;
691 }
692}
693
694static void
695pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
696 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
697 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
698 // Pass.
699 break;
700 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
701 // This shouldn't happen, because ... is not allowed in the
702 // declaration of blocks. If we get here, we assume we already have
703 // an error for this.
704 break;
705 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
706 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
707 break;
708 }
709}
710
711static void
712pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
713 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
714 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
715 // Pass.
716 break;
717 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
718 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
719 break;
720 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
721 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
722 break;
723 }
724}
725
730pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
731 return parser->current_scope->shareable_constant;
732}
733
738static void
739pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
740 pm_scope_t *scope = parser->current_scope;
741
742 do {
743 scope->shareable_constant = shareable_constant;
744 } while (!scope->closed && (scope = scope->previous) != NULL);
745}
746
747/******************************************************************************/
748/* Local variable-related functions */
749/******************************************************************************/
750
754#define PM_LOCALS_HASH_THRESHOLD 9
755
756static void
757pm_locals_free(pm_locals_t *locals) {
758 if (locals->capacity > 0) {
759 xfree(locals->locals);
760 }
761}
762
767static uint32_t
768pm_locals_hash(pm_constant_id_t name) {
769 name = ((name >> 16) ^ name) * 0x45d9f3b;
770 name = ((name >> 16) ^ name) * 0x45d9f3b;
771 name = (name >> 16) ^ name;
772 return name;
773}
774
779static void
780pm_locals_resize(pm_locals_t *locals) {
781 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
782 assert(next_capacity > locals->capacity);
783
784 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
785 if (next_locals == NULL) abort();
786
787 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
788 if (locals->size > 0) {
789 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
790 }
791 } else {
792 // If we just switched from a list to a hash, then we need to fill in
793 // the hash values of all of the locals.
794 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
795 uint32_t mask = next_capacity - 1;
796
797 for (uint32_t index = 0; index < locals->capacity; index++) {
798 pm_local_t *local = &locals->locals[index];
799
800 if (local->name != PM_CONSTANT_ID_UNSET) {
801 if (hash_needed) local->hash = pm_locals_hash(local->name);
802
803 uint32_t hash = local->hash;
804 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
805 next_locals[hash & mask] = *local;
806 }
807 }
808 }
809
810 pm_locals_free(locals);
811 locals->locals = next_locals;
812 locals->capacity = next_capacity;
813}
814
830static bool
831pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
832 if (locals->size >= (locals->capacity / 4 * 3)) {
833 pm_locals_resize(locals);
834 }
835
836 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
837 for (uint32_t index = 0; index < locals->capacity; index++) {
838 pm_local_t *local = &locals->locals[index];
839
840 if (local->name == PM_CONSTANT_ID_UNSET) {
841 *local = (pm_local_t) {
842 .name = name,
843 .location = { .start = start, .end = end },
844 .index = locals->size++,
845 .reads = reads,
846 .hash = 0
847 };
848 return true;
849 } else if (local->name == name) {
850 return false;
851 }
852 }
853 } else {
854 uint32_t mask = locals->capacity - 1;
855 uint32_t hash = pm_locals_hash(name);
856 uint32_t initial_hash = hash;
857
858 do {
859 pm_local_t *local = &locals->locals[hash & mask];
860
861 if (local->name == PM_CONSTANT_ID_UNSET) {
862 *local = (pm_local_t) {
863 .name = name,
864 .location = { .start = start, .end = end },
865 .index = locals->size++,
866 .reads = reads,
867 .hash = initial_hash
868 };
869 return true;
870 } else if (local->name == name) {
871 return false;
872 } else {
873 hash++;
874 }
875 } while ((hash & mask) != initial_hash);
876 }
877
878 assert(false && "unreachable");
879 return true;
880}
881
886static uint32_t
887pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
888 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
889 for (uint32_t index = 0; index < locals->size; index++) {
890 pm_local_t *local = &locals->locals[index];
891 if (local->name == name) return index;
892 }
893 } else {
894 uint32_t mask = locals->capacity - 1;
895 uint32_t hash = pm_locals_hash(name);
896 uint32_t initial_hash = hash & mask;
897
898 do {
899 pm_local_t *local = &locals->locals[hash & mask];
900
901 if (local->name == PM_CONSTANT_ID_UNSET) {
902 return UINT32_MAX;
903 } else if (local->name == name) {
904 return hash & mask;
905 } else {
906 hash++;
907 }
908 } while ((hash & mask) != initial_hash);
909 }
910
911 return UINT32_MAX;
912}
913
918static void
919pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
920 uint32_t index = pm_locals_find(locals, name);
921 assert(index != UINT32_MAX);
922
923 pm_local_t *local = &locals->locals[index];
924 assert(local->reads < UINT32_MAX);
925
926 local->reads++;
927}
928
933static void
934pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
935 uint32_t index = pm_locals_find(locals, name);
936 assert(index != UINT32_MAX);
937
938 pm_local_t *local = &locals->locals[index];
939 assert(local->reads > 0);
940
941 local->reads--;
942}
943
947static uint32_t
948pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
949 uint32_t index = pm_locals_find(locals, name);
950 assert(index != UINT32_MAX);
951
952 return locals->locals[index].reads;
953}
954
963static void
964pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
965 pm_constant_id_list_init_capacity(list, locals->size);
966
967 // If we're still below the threshold for switching to a hash, then we only
968 // need to loop over the locals until we hit the size because the locals are
969 // stored in a list.
970 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
971
972 // We will only warn for unused variables if we're not at the top level, or
973 // if we're parsing a file outside of eval or -e.
974 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
975
976 for (uint32_t index = 0; index < capacity; index++) {
977 pm_local_t *local = &locals->locals[index];
978
979 if (local->name != PM_CONSTANT_ID_UNSET) {
980 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
981
982 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
983 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
984
985 if (constant->length >= 1 && *constant->start != '_') {
986 PM_PARSER_WARN_FORMAT(
987 parser,
988 local->location.start,
989 local->location.end,
990 PM_WARN_UNUSED_LOCAL_VARIABLE,
991 (int) constant->length,
992 (const char *) constant->start
993 );
994 }
995 }
996 }
997 }
998}
999
1000/******************************************************************************/
1001/* Node-related functions */
1002/******************************************************************************/
1003
1007static inline pm_constant_id_t
1008pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1009 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
1010}
1011
1015static inline pm_constant_id_t
1016pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1017 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1018}
1019
1023static inline pm_constant_id_t
1024pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1025 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1026}
1027
1031static inline pm_constant_id_t
1032pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1033 return pm_parser_constant_id_location(parser, token->start, token->end);
1034}
1035
1040static inline pm_constant_id_t
1041pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1042 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1043}
1044
1050static pm_node_t *
1051pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1052 pm_node_t *void_node = NULL;
1053
1054 while (node != NULL) {
1055 switch (PM_NODE_TYPE(node)) {
1056 case PM_RETURN_NODE:
1057 case PM_BREAK_NODE:
1058 case PM_NEXT_NODE:
1059 case PM_REDO_NODE:
1060 case PM_RETRY_NODE:
1061 case PM_MATCH_REQUIRED_NODE:
1062 return void_node != NULL ? void_node : node;
1063 case PM_MATCH_PREDICATE_NODE:
1064 return NULL;
1065 case PM_BEGIN_NODE: {
1066 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1067
1068 if (cast->ensure_clause != NULL) {
1069 if (cast->rescue_clause != NULL) {
1070 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1071 if (vn != NULL) return vn;
1072 }
1073
1074 if (cast->statements != NULL) {
1075 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1076 if (vn != NULL) return vn;
1077 }
1078
1079 node = UP(cast->ensure_clause);
1080 } else if (cast->rescue_clause != NULL) {
1081 if (cast->statements == NULL) return NULL;
1082
1083 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1084 if (vn == NULL) return NULL;
1085 if (void_node == NULL) void_node = vn;
1086
1087 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1088 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1089 if (vn == NULL) {
1090 void_node = NULL;
1091 break;
1092 }
1093 if (void_node == NULL) {
1094 void_node = vn;
1095 }
1096 }
1097
1098 if (cast->else_clause != NULL) {
1099 node = UP(cast->else_clause);
1100 } else {
1101 return void_node;
1102 }
1103 } else {
1104 node = UP(cast->statements);
1105 }
1106
1107 break;
1108 }
1109 case PM_ENSURE_NODE: {
1110 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1111 node = UP(cast->statements);
1112 break;
1113 }
1114 case PM_PARENTHESES_NODE: {
1115 pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
1116 node = UP(cast->body);
1117 break;
1118 }
1119 case PM_STATEMENTS_NODE: {
1120 pm_statements_node_t *cast = (pm_statements_node_t *) node;
1121 node = cast->body.nodes[cast->body.size - 1];
1122 break;
1123 }
1124 case PM_IF_NODE: {
1125 pm_if_node_t *cast = (pm_if_node_t *) node;
1126 if (cast->statements == NULL || cast->subsequent == NULL) {
1127 return NULL;
1128 }
1129 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1130 if (vn == NULL) {
1131 return NULL;
1132 }
1133 if (void_node == NULL) {
1134 void_node = vn;
1135 }
1136 node = cast->subsequent;
1137 break;
1138 }
1139 case PM_UNLESS_NODE: {
1140 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1141 if (cast->statements == NULL || cast->else_clause == NULL) {
1142 return NULL;
1143 }
1144 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1145 if (vn == NULL) {
1146 return NULL;
1147 }
1148 if (void_node == NULL) {
1149 void_node = vn;
1150 }
1151 node = UP(cast->else_clause);
1152 break;
1153 }
1154 case PM_ELSE_NODE: {
1155 pm_else_node_t *cast = (pm_else_node_t *) node;
1156 node = UP(cast->statements);
1157 break;
1158 }
1159 case PM_AND_NODE: {
1160 pm_and_node_t *cast = (pm_and_node_t *) node;
1161 node = cast->left;
1162 break;
1163 }
1164 case PM_OR_NODE: {
1165 pm_or_node_t *cast = (pm_or_node_t *) node;
1166 node = cast->left;
1167 break;
1168 }
1169 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1170 pm_local_variable_write_node_t *cast = (pm_local_variable_write_node_t *) node;
1171
1172 pm_scope_t *scope = parser->current_scope;
1173 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1174
1175 pm_locals_read(&scope->locals, cast->name);
1176 return NULL;
1177 }
1178 default:
1179 return NULL;
1180 }
1181 }
1182
1183 return NULL;
1184}
1185
1186static inline void
1187pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1188 pm_node_t *void_node = pm_check_value_expression(parser, node);
1189 if (void_node != NULL) {
1190 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1191 }
1192}
1193
1197static void
1198pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1199 const char *type = NULL;
1200 int length = 0;
1201
1202 switch (PM_NODE_TYPE(node)) {
1203 case PM_BACK_REFERENCE_READ_NODE:
1204 case PM_CLASS_VARIABLE_READ_NODE:
1205 case PM_GLOBAL_VARIABLE_READ_NODE:
1206 case PM_INSTANCE_VARIABLE_READ_NODE:
1207 case PM_LOCAL_VARIABLE_READ_NODE:
1208 case PM_NUMBERED_REFERENCE_READ_NODE:
1209 type = "a variable";
1210 length = 10;
1211 break;
1212 case PM_CALL_NODE: {
1213 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1214 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1215
1216 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1217 switch (message->length) {
1218 case 1:
1219 switch (message->start[0]) {
1220 case '+':
1221 case '-':
1222 case '*':
1223 case '/':
1224 case '%':
1225 case '|':
1226 case '^':
1227 case '&':
1228 case '>':
1229 case '<':
1230 type = (const char *) message->start;
1231 length = 1;
1232 break;
1233 }
1234 break;
1235 case 2:
1236 switch (message->start[1]) {
1237 case '=':
1238 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1239 type = (const char *) message->start;
1240 length = 2;
1241 }
1242 break;
1243 case '@':
1244 if (message->start[0] == '+' || message->start[0] == '-') {
1245 type = (const char *) message->start;
1246 length = 2;
1247 }
1248 break;
1249 case '*':
1250 if (message->start[0] == '*') {
1251 type = (const char *) message->start;
1252 length = 2;
1253 }
1254 break;
1255 }
1256 break;
1257 case 3:
1258 if (memcmp(message->start, "<=>", 3) == 0) {
1259 type = "<=>";
1260 length = 3;
1261 }
1262 break;
1263 }
1264
1265 break;
1266 }
1267 case PM_CONSTANT_PATH_NODE:
1268 type = "::";
1269 length = 2;
1270 break;
1271 case PM_CONSTANT_READ_NODE:
1272 type = "a constant";
1273 length = 10;
1274 break;
1275 case PM_DEFINED_NODE:
1276 type = "defined?";
1277 length = 8;
1278 break;
1279 case PM_FALSE_NODE:
1280 type = "false";
1281 length = 5;
1282 break;
1283 case PM_FLOAT_NODE:
1284 case PM_IMAGINARY_NODE:
1285 case PM_INTEGER_NODE:
1286 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1287 case PM_INTERPOLATED_STRING_NODE:
1288 case PM_RATIONAL_NODE:
1289 case PM_REGULAR_EXPRESSION_NODE:
1290 case PM_SOURCE_ENCODING_NODE:
1291 case PM_SOURCE_FILE_NODE:
1292 case PM_SOURCE_LINE_NODE:
1293 case PM_STRING_NODE:
1294 case PM_SYMBOL_NODE:
1295 type = "a literal";
1296 length = 9;
1297 break;
1298 case PM_NIL_NODE:
1299 type = "nil";
1300 length = 3;
1301 break;
1302 case PM_RANGE_NODE: {
1303 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1304
1305 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1306 type = "...";
1307 length = 3;
1308 } else {
1309 type = "..";
1310 length = 2;
1311 }
1312
1313 break;
1314 }
1315 case PM_SELF_NODE:
1316 type = "self";
1317 length = 4;
1318 break;
1319 case PM_TRUE_NODE:
1320 type = "true";
1321 length = 4;
1322 break;
1323 default:
1324 break;
1325 }
1326
1327 if (type != NULL) {
1328 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1329 }
1330}
1331
1336static void
1337pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1338 assert(node->body.size > 0);
1339 const size_t size = node->body.size - (last_value ? 1 : 0);
1340 for (size_t index = 0; index < size; index++) {
1341 pm_void_statement_check(parser, node->body.nodes[index]);
1342 }
1343}
1344
1350typedef enum {
1351 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1352 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1353 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1354} pm_conditional_predicate_type_t;
1355
1359static void
1360pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1361 switch (type) {
1362 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1363 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1364 break;
1365 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1366 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1367 break;
1368 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1369 break;
1370 }
1371}
1372
1377static bool
1378pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1379 switch (PM_NODE_TYPE(node)) {
1380 case PM_ARRAY_NODE: {
1381 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1382
1383 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1384 for (size_t index = 0; index < cast->elements.size; index++) {
1385 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1386 }
1387
1388 return true;
1389 }
1390 case PM_HASH_NODE: {
1391 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1392
1393 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1394 for (size_t index = 0; index < cast->elements.size; index++) {
1395 const pm_node_t *element = cast->elements.nodes[index];
1396 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1397
1398 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1399 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1400 }
1401
1402 return true;
1403 }
1404 case PM_FALSE_NODE:
1405 case PM_FLOAT_NODE:
1406 case PM_IMAGINARY_NODE:
1407 case PM_INTEGER_NODE:
1408 case PM_NIL_NODE:
1409 case PM_RATIONAL_NODE:
1410 case PM_REGULAR_EXPRESSION_NODE:
1411 case PM_SOURCE_ENCODING_NODE:
1412 case PM_SOURCE_FILE_NODE:
1413 case PM_SOURCE_LINE_NODE:
1414 case PM_STRING_NODE:
1415 case PM_SYMBOL_NODE:
1416 case PM_TRUE_NODE:
1417 return true;
1418 default:
1419 return false;
1420 }
1421}
1422
1427static inline void
1428pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1429 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1430 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1431 }
1432}
1433
1446static void
1447pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1448 switch (PM_NODE_TYPE(node)) {
1449 case PM_AND_NODE: {
1450 pm_and_node_t *cast = (pm_and_node_t *) node;
1451 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1452 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1453 break;
1454 }
1455 case PM_OR_NODE: {
1456 pm_or_node_t *cast = (pm_or_node_t *) node;
1457 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1458 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1459 break;
1460 }
1461 case PM_PARENTHESES_NODE: {
1462 pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
1463
1464 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1465 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1466 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1467 }
1468
1469 break;
1470 }
1471 case PM_BEGIN_NODE: {
1472 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1473 if (cast->statements != NULL) {
1474 pm_statements_node_t *statements = cast->statements;
1475 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1476 }
1477 break;
1478 }
1479 case PM_RANGE_NODE: {
1480 pm_range_node_t *cast = (pm_range_node_t *) node;
1481
1482 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1483 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1484
1485 // Here we change the range node into a flip flop node. We can do
1486 // this since the nodes are exactly the same except for the type.
1487 // We're only asserting against the size when we should probably
1488 // assert against the entire layout, but we'll assume tests will
1489 // catch this.
1490 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1491 node->type = PM_FLIP_FLOP_NODE;
1492
1493 break;
1494 }
1495 case PM_REGULAR_EXPRESSION_NODE:
1496 // Here we change the regular expression node into a match last line
1497 // node. We can do this since the nodes are exactly the same except
1498 // for the type.
1499 assert(sizeof(pm_regular_expression_node_t) == sizeof(pm_match_last_line_node_t));
1500 node->type = PM_MATCH_LAST_LINE_NODE;
1501
1502 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1503 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1504 }
1505
1506 break;
1507 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1508 // Here we change the interpolated regular expression node into an
1509 // interpolated match last line node. We can do this since the nodes
1510 // are exactly the same except for the type.
1511 assert(sizeof(pm_interpolated_regular_expression_node_t) == sizeof(pm_interpolated_match_last_line_node_t));
1512 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1513
1514 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1515 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1516 }
1517
1518 break;
1519 case PM_INTEGER_NODE:
1520 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1521 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1522 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1523 }
1524 } else {
1525 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1526 }
1527 break;
1528 case PM_STRING_NODE:
1529 case PM_SOURCE_FILE_NODE:
1530 case PM_INTERPOLATED_STRING_NODE:
1531 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1532 break;
1533 case PM_SYMBOL_NODE:
1534 case PM_INTERPOLATED_SYMBOL_NODE:
1535 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1536 break;
1537 case PM_SOURCE_LINE_NODE:
1538 case PM_SOURCE_ENCODING_NODE:
1539 case PM_FLOAT_NODE:
1540 case PM_RATIONAL_NODE:
1541 case PM_IMAGINARY_NODE:
1542 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1543 break;
1544 case PM_CLASS_VARIABLE_WRITE_NODE:
1545 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1546 break;
1547 case PM_CONSTANT_WRITE_NODE:
1548 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1549 break;
1550 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1551 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1552 break;
1553 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1554 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1555 break;
1556 case PM_LOCAL_VARIABLE_WRITE_NODE:
1557 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1558 break;
1559 case PM_MULTI_WRITE_NODE:
1560 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1561 break;
1562 default:
1563 break;
1564 }
1565}
1566
1575static inline pm_token_t
1576not_provided(pm_parser_t *parser) {
1577 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1578}
1579
1586typedef struct {
1589
1591 pm_arguments_node_t *arguments;
1592
1595
1597 pm_node_t *block;
1598
1602
1606static inline const uint8_t *
1607pm_arguments_end(pm_arguments_t *arguments) {
1608 if (arguments->block != NULL) {
1609 const uint8_t *end = arguments->block->location.end;
1610 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1611 end = arguments->closing_loc.end;
1612 }
1613 return end;
1614 }
1615 if (arguments->closing_loc.start != NULL) {
1616 return arguments->closing_loc.end;
1617 }
1618 if (arguments->arguments != NULL) {
1619 return arguments->arguments->base.location.end;
1620 }
1621 return arguments->closing_loc.end;
1622}
1623
1628static void
1629pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1630 // First, check that we have arguments and that we don't have a closing
1631 // location for them.
1632 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1633 return;
1634 }
1635
1636 // Next, check that we don't have a single parentheses argument. This would
1637 // look like:
1638 //
1639 // foo (1) {}
1640 //
1641 // In this case, it's actually okay for the block to be attached to the
1642 // call, even though it looks like it's attached to the argument.
1643 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1644 return;
1645 }
1646
1647 // If we didn't hit a case before this check, then at this point we need to
1648 // add a syntax error.
1649 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1650}
1651
1652/******************************************************************************/
1653/* Basic character checks */
1654/******************************************************************************/
1655
1662static inline size_t
1663char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1664 if (n <= 0) return 0;
1665
1666 if (parser->encoding_changed) {
1667 size_t width;
1668
1669 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1670 return width;
1671 } else if (*b == '_') {
1672 return 1;
1673 } else if (*b >= 0x80) {
1674 return parser->encoding->char_width(b, n);
1675 } else {
1676 return 0;
1677 }
1678 } else if (*b < 0x80) {
1679 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1680 } else {
1681 return pm_encoding_utf_8_char_width(b, n);
1682 }
1683}
1684
1689static inline size_t
1690char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1691 if (n <= 0) {
1692 return 0;
1693 } else if (*b < 0x80) {
1694 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1695 } else {
1696 return pm_encoding_utf_8_char_width(b, n);
1697 }
1698}
1699
1705static inline size_t
1706char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1707 if (n <= 0) {
1708 return 0;
1709 } else if (parser->encoding_changed) {
1710 size_t width;
1711
1712 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1713 return width;
1714 } else if (*b == '_') {
1715 return 1;
1716 } else if (*b >= 0x80) {
1717 return parser->encoding->char_width(b, n);
1718 } else {
1719 return 0;
1720 }
1721 } else {
1722 return char_is_identifier_utf8(b, n);
1723 }
1724}
1725
1726// Here we're defining a perfect hash for the characters that are allowed in
1727// global names. This is used to quickly check the next character after a $ to
1728// see if it's a valid character for a global name.
1729#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1730#define PUNCT(idx) ( \
1731 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1732 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1733 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1734 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1735 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1736 BIT('0', idx))
1737
1738const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1739
1740#undef BIT
1741#undef PUNCT
1742
1743static inline bool
1744char_is_global_name_punctuation(const uint8_t b) {
1745 const unsigned int i = (const unsigned int) b;
1746 if (i <= 0x20 || 0x7e < i) return false;
1747
1748 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1749}
1750
1751static inline bool
1752token_is_setter_name(pm_token_t *token) {
1753 return (
1754 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1755 ((token->type == PM_TOKEN_IDENTIFIER) &&
1756 (token->end - token->start >= 2) &&
1757 (token->end[-1] == '='))
1758 );
1759}
1760
1764static bool
1765pm_local_is_keyword(const char *source, size_t length) {
1766#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1767
1768 switch (length) {
1769 case 2:
1770 switch (source[0]) {
1771 case 'd': KEYWORD("do"); return false;
1772 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1773 case 'o': KEYWORD("or"); return false;
1774 default: return false;
1775 }
1776 case 3:
1777 switch (source[0]) {
1778 case 'a': KEYWORD("and"); return false;
1779 case 'd': KEYWORD("def"); return false;
1780 case 'e': KEYWORD("end"); return false;
1781 case 'f': KEYWORD("for"); return false;
1782 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1783 default: return false;
1784 }
1785 case 4:
1786 switch (source[0]) {
1787 case 'c': KEYWORD("case"); return false;
1788 case 'e': KEYWORD("else"); return false;
1789 case 'n': KEYWORD("next"); return false;
1790 case 'r': KEYWORD("redo"); return false;
1791 case 's': KEYWORD("self"); return false;
1792 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1793 case 'w': KEYWORD("when"); return false;
1794 default: return false;
1795 }
1796 case 5:
1797 switch (source[0]) {
1798 case 'a': KEYWORD("alias"); return false;
1799 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1800 case 'c': KEYWORD("class"); return false;
1801 case 'e': KEYWORD("elsif"); return false;
1802 case 'f': KEYWORD("false"); return false;
1803 case 'r': KEYWORD("retry"); return false;
1804 case 's': KEYWORD("super"); return false;
1805 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1806 case 'w': KEYWORD("while"); return false;
1807 case 'y': KEYWORD("yield"); return false;
1808 default: return false;
1809 }
1810 case 6:
1811 switch (source[0]) {
1812 case 'e': KEYWORD("ensure"); return false;
1813 case 'm': KEYWORD("module"); return false;
1814 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1815 case 'u': KEYWORD("unless"); return false;
1816 default: return false;
1817 }
1818 case 8:
1819 KEYWORD("__LINE__");
1820 KEYWORD("__FILE__");
1821 return false;
1822 case 12:
1823 KEYWORD("__ENCODING__");
1824 return false;
1825 default:
1826 return false;
1827 }
1828
1829#undef KEYWORD
1830}
1831
1832/******************************************************************************/
1833/* Node flag handling functions */
1834/******************************************************************************/
1835
1839static inline void
1840pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1841 node->flags |= flag;
1842}
1843
1847static inline void
1848pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1849 node->flags &= (pm_node_flags_t) ~flag;
1850}
1851
1855static inline void
1856pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1857 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1858 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1859 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1860 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1861 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1862 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1863 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1864 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1865
1866 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1867}
1868
1869/******************************************************************************/
1870/* Node creation functions */
1871/******************************************************************************/
1872
1878#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1879
1883static inline pm_node_flags_t
1884pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1885 pm_node_flags_t flags = 0;
1886
1887 if (closing->type == PM_TOKEN_REGEXP_END) {
1888 pm_buffer_t unknown_flags = { 0 };
1889
1890 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1891 switch (*flag) {
1892 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1893 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1894 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1895 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1896
1897 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1898 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1899 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1900 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1901
1902 default: pm_buffer_append_byte(&unknown_flags, *flag);
1903 }
1904 }
1905
1906 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1907 if (unknown_flags_length != 0) {
1908 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1909 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1910 }
1911 pm_buffer_free(&unknown_flags);
1912 }
1913
1914 return flags;
1915}
1916
1917#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1918
1919static pm_statements_node_t *
1920pm_statements_node_create(pm_parser_t *parser);
1921
1922static void
1923pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1924
1925static size_t
1926pm_statements_node_body_length(pm_statements_node_t *node);
1927
1932static inline void *
1933pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1934 void *memory = xcalloc(1, size);
1935 if (memory == NULL) {
1936 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1937 abort();
1938 }
1939 return memory;
1940}
1941
1942#define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_))
1943#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \
1944 .type = (type_), \
1945 .flags = (flags_), \
1946 .node_id = ++(parser_)->node_id, \
1947 .location = { .start = (start_), .end = (end_) } \
1948}
1949
1950#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL)
1951#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start)
1952#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_))
1953#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_))
1954
1955#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_))
1956#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_))
1957#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_))
1958#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_))
1959
1963static pm_missing_node_t *
1964pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1965 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1966
1967 *node = (pm_missing_node_t) {
1968 .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end)
1969 };
1970
1971 return node;
1972}
1973
1977static pm_alias_global_variable_node_t *
1978pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1979 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1980 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1981
1982 *node = (pm_alias_global_variable_node_t) {
1983 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name),
1984 .new_name = new_name,
1985 .old_name = old_name,
1986 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1987 };
1988
1989 return node;
1990}
1991
1995static pm_alias_method_node_t *
1996pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1997 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1998 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1999
2000 *node = (pm_alias_method_node_t) {
2001 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name),
2002 .new_name = new_name,
2003 .old_name = old_name,
2004 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2005 };
2006
2007 return node;
2008}
2009
2013static pm_alternation_pattern_node_t *
2014pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2015 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2016
2017 *node = (pm_alternation_pattern_node_t) {
2018 .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right),
2019 .left = left,
2020 .right = right,
2021 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2022 };
2023
2024 return node;
2025}
2026
2030static pm_and_node_t *
2031pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2032 pm_assert_value_expression(parser, left);
2033
2034 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2035
2036 *node = (pm_and_node_t) {
2037 .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right),
2038 .left = left,
2039 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2040 .right = right
2041 };
2042
2043 return node;
2044}
2045
2049static pm_arguments_node_t *
2050pm_arguments_node_create(pm_parser_t *parser) {
2051 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2052
2053 *node = (pm_arguments_node_t) {
2054 .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0),
2055 .arguments = { 0 }
2056 };
2057
2058 return node;
2059}
2060
2064static size_t
2065pm_arguments_node_size(pm_arguments_node_t *node) {
2066 return node->arguments.size;
2067}
2068
2072static void
2073pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2074 if (pm_arguments_node_size(node) == 0) {
2075 node->base.location.start = argument->location.start;
2076 }
2077
2078 if (node->base.location.end < argument->location.end) {
2079 node->base.location.end = argument->location.end;
2080 }
2081
2082 pm_node_list_append(&node->arguments, argument);
2083
2084 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2085 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2086 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2087 } else {
2088 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2089 }
2090 }
2091}
2092
2096static pm_array_node_t *
2097pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2098 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2099
2100 *node = (pm_array_node_t) {
2101 .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
2102 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2103 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104 .elements = { 0 }
2105 };
2106
2107 return node;
2108}
2109
2113static inline void
2114pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2115 if (!node->elements.size && !node->opening_loc.start) {
2116 node->base.location.start = element->location.start;
2117 }
2118
2119 pm_node_list_append(&node->elements, element);
2120 node->base.location.end = element->location.end;
2121
2122 // If the element is not a static literal, then the array is not a static
2123 // literal. Turn that flag off.
2124 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2125 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2126 }
2127
2128 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2129 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2130 }
2131}
2132
2136static void
2137pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2138 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2139 node->base.location.end = closing->end;
2140 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2141}
2142
2147static pm_array_pattern_node_t *
2148pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2149 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2150
2151 *node = (pm_array_pattern_node_t) {
2152 .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2153 .constant = NULL,
2154 .rest = NULL,
2155 .requireds = { 0 },
2156 .posts = { 0 },
2157 .opening_loc = { 0 },
2158 .closing_loc = { 0 }
2159 };
2160
2161 // For now we're going to just copy over each pointer manually. This could be
2162 // much more efficient, as we could instead resize the node list.
2163 bool found_rest = false;
2164 pm_node_t *child;
2165
2166 PM_NODE_LIST_FOREACH(nodes, index, child) {
2167 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2168 node->rest = child;
2169 found_rest = true;
2170 } else if (found_rest) {
2171 pm_node_list_append(&node->posts, child);
2172 } else {
2173 pm_node_list_append(&node->requireds, child);
2174 }
2175 }
2176
2177 return node;
2178}
2179
2183static pm_array_pattern_node_t *
2184pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2185 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2186
2187 *node = (pm_array_pattern_node_t) {
2188 .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest),
2189 .constant = NULL,
2190 .rest = rest,
2191 .requireds = { 0 },
2192 .posts = { 0 },
2193 .opening_loc = { 0 },
2194 .closing_loc = { 0 }
2195 };
2196
2197 return node;
2198}
2199
2204static pm_array_pattern_node_t *
2205pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2206 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2207
2208 *node = (pm_array_pattern_node_t) {
2209 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing),
2210 .constant = constant,
2211 .rest = NULL,
2212 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2213 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2214 .requireds = { 0 },
2215 .posts = { 0 }
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing),
2231 .constant = NULL,
2232 .rest = NULL,
2233 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2234 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2235 .requireds = { 0 },
2236 .posts = { 0 }
2237 };
2238
2239 return node;
2240}
2241
2242static inline void
2243pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2244 pm_node_list_append(&node->requireds, inner);
2245}
2246
2250static pm_assoc_node_t *
2251pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2252 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2253 const uint8_t *end;
2254
2255 if (value != NULL && value->location.end > key->location.end) {
2256 end = value->location.end;
2257 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2258 end = operator->end;
2259 } else {
2260 end = key->location.end;
2261 }
2262
2263 // Hash string keys will be frozen, so we can mark them as frozen here so
2264 // that the compiler picks them up and also when we check for static literal
2265 // on the keys it gets factored in.
2266 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2267 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2268 }
2269
2270 // If the key and value of this assoc node are both static literals, then
2271 // we can mark this node as a static literal.
2272 pm_node_flags_t flags = 0;
2273 if (
2274 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2275 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2276 ) {
2277 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2278 }
2279
2280 *node = (pm_assoc_node_t) {
2281 .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end),
2282 .key = key,
2283 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2284 .value = value
2285 };
2286
2287 return node;
2288}
2289
2293static pm_assoc_splat_node_t *
2294pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2295 assert(operator->type == PM_TOKEN_USTAR_STAR);
2296 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2297
2298 *node = (pm_assoc_splat_node_t) {
2299 .base = (
2300 (value == NULL)
2301 ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator)
2302 : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value)
2303 ),
2304 .value = value,
2305 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2306 };
2307
2308 return node;
2309}
2310
2314static pm_back_reference_read_node_t *
2315pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2316 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2317 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2318
2319 *node = (pm_back_reference_read_node_t) {
2320 .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name),
2321 .name = pm_parser_constant_id_token(parser, name)
2322 };
2323
2324 return node;
2325}
2326
2330static pm_begin_node_t *
2331pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2332 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2333
2334 *node = (pm_begin_node_t) {
2335 .base = (
2336 (statements == NULL)
2337 ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword)
2338 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements)
2339 ),
2340 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2341 .statements = statements,
2342 .end_keyword_loc = { 0 }
2343 };
2344
2345 return node;
2346}
2347
2351static void
2352pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2353 // If the begin keyword doesn't exist, we set the start on the begin_node
2354 if (!node->begin_keyword_loc.start) {
2355 node->base.location.start = rescue_clause->base.location.start;
2356 }
2357 node->base.location.end = rescue_clause->base.location.end;
2358 node->rescue_clause = rescue_clause;
2359}
2360
2364static void
2365pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2366 node->base.location.end = else_clause->base.location.end;
2367 node->else_clause = else_clause;
2368}
2369
2373static void
2374pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2375 node->base.location.end = ensure_clause->base.location.end;
2376 node->ensure_clause = ensure_clause;
2377}
2378
2382static void
2383pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2384 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2385
2386 node->base.location.end = end_keyword->end;
2387 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2388}
2389
2393static pm_block_argument_node_t *
2394pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2395 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2396
2397 *node = (pm_block_argument_node_t) {
2398 .base = (
2399 (expression == NULL)
2400 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator)
2401 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression)
2402 ),
2403 .expression = expression,
2404 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2405 };
2406
2407 return node;
2408}
2409
2413static pm_block_node_t *
2414pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2415 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2416
2417 *node = (pm_block_node_t) {
2418 .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing),
2419 .locals = *locals,
2420 .parameters = parameters,
2421 .body = body,
2422 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2423 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2424 };
2425
2426 return node;
2427}
2428
2432static pm_block_parameter_node_t *
2433pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2434 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2435 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2436
2437 *node = (pm_block_parameter_node_t) {
2438 .base = (
2439 (name->type == PM_TOKEN_NOT_PROVIDED)
2440 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator)
2441 : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name)
2442 ),
2443 .name = pm_parser_optional_constant_id_token(parser, name),
2444 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2445 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2446 };
2447
2448 return node;
2449}
2450
2454static pm_block_parameters_node_t *
2455pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2456 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2457
2458 const uint8_t *start;
2459 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2460 start = opening->start;
2461 } else if (parameters != NULL) {
2462 start = parameters->base.location.start;
2463 } else {
2464 start = NULL;
2465 }
2466
2467 const uint8_t *end;
2468 if (parameters != NULL) {
2469 end = parameters->base.location.end;
2470 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2471 end = opening->end;
2472 } else {
2473 end = NULL;
2474 }
2475
2476 *node = (pm_block_parameters_node_t) {
2477 .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end),
2478 .parameters = parameters,
2479 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2480 .closing_loc = { 0 },
2481 .locals = { 0 }
2482 };
2483
2484 return node;
2485}
2486
2490static void
2491pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2492 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2493
2494 node->base.location.end = closing->end;
2495 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2496}
2497
2501static pm_block_local_variable_node_t *
2502pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2503 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2504
2505 *node = (pm_block_local_variable_node_t) {
2506 .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name),
2507 .name = pm_parser_constant_id_token(parser, name)
2508 };
2509
2510 return node;
2511}
2512
2516static void
2517pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2518 pm_node_list_append(&node->locals, UP(local));
2519
2520 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2521 node->base.location.end = local->base.location.end;
2522}
2523
2527static pm_break_node_t *
2528pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2529 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2530 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2531
2532 *node = (pm_break_node_t) {
2533 .base = (
2534 (arguments == NULL)
2535 ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword)
2536 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments)
2537 ),
2538 .arguments = arguments,
2539 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2540 };
2541
2542 return node;
2543}
2544
2545// There are certain flags that we want to use internally but don't want to
2546// expose because they are not relevant beyond parsing. Therefore we'll define
2547// them here and not define them in config.yml/a header file.
2548static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2549
2550static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2551static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2552static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2553
2559static pm_call_node_t *
2560pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2561 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2562
2563 *node = (pm_call_node_t) {
2564 .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags),
2565 .receiver = NULL,
2566 .call_operator_loc = { 0 },
2567 .message_loc = { 0 },
2568 .opening_loc = { 0 },
2569 .arguments = NULL,
2570 .closing_loc = { 0 },
2571 .equal_loc = { 0 },
2572 .block = NULL,
2573 .name = 0
2574 };
2575
2576 return node;
2577}
2578
2583static inline pm_node_flags_t
2584pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2585 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2586}
2587
2592static pm_call_node_t *
2593pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2594 pm_assert_value_expression(parser, receiver);
2595
2596 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2597 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2598 flags |= PM_CALL_NODE_FLAGS_INDEX;
2599 }
2600
2601 pm_call_node_t *node = pm_call_node_create(parser, flags);
2602
2603 node->base.location.start = receiver->location.start;
2604 node->base.location.end = pm_arguments_end(arguments);
2605
2606 node->receiver = receiver;
2607 node->message_loc.start = arguments->opening_loc.start;
2608 node->message_loc.end = arguments->closing_loc.end;
2609
2610 node->opening_loc = arguments->opening_loc;
2611 node->arguments = arguments->arguments;
2612 node->closing_loc = arguments->closing_loc;
2613 node->block = arguments->block;
2614
2615 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2616 return node;
2617}
2618
2622static pm_call_node_t *
2623pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2624 pm_assert_value_expression(parser, receiver);
2625 pm_assert_value_expression(parser, argument);
2626
2627 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2628
2629 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2630 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2631
2632 node->receiver = receiver;
2633 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2634
2635 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2636 pm_arguments_node_arguments_append(arguments, argument);
2637 node->arguments = arguments;
2638
2639 node->name = pm_parser_constant_id_token(parser, operator);
2640 return node;
2641}
2642
2643static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2644
2648static pm_call_node_t *
2649pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2650 pm_assert_value_expression(parser, receiver);
2651
2652 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2653
2654 node->base.location.start = receiver->location.start;
2655 const uint8_t *end = pm_arguments_end(arguments);
2656 if (end == NULL) {
2657 end = message->end;
2658 }
2659 node->base.location.end = end;
2660
2661 node->receiver = receiver;
2662 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2663 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2664 node->opening_loc = arguments->opening_loc;
2665 node->arguments = arguments->arguments;
2666 node->closing_loc = arguments->closing_loc;
2667 node->block = arguments->block;
2668
2669 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2670 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2671 }
2672
2677 node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message));
2678 return node;
2679}
2680
2684static pm_call_node_t *
2685pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2686 pm_call_node_t *node = pm_call_node_create(parser, 0);
2687 node->base.location.start = parser->start;
2688 node->base.location.end = parser->end;
2689
2690 node->receiver = receiver;
2691 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2692 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2693 node->arguments = arguments;
2694
2695 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2696 return node;
2697}
2698
2703static pm_call_node_t *
2704pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2705 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2706
2707 node->base.location.start = message->start;
2708 node->base.location.end = pm_arguments_end(arguments);
2709
2710 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2711 node->opening_loc = arguments->opening_loc;
2712 node->arguments = arguments->arguments;
2713 node->closing_loc = arguments->closing_loc;
2714 node->block = arguments->block;
2715
2716 node->name = pm_parser_constant_id_token(parser, message);
2717 return node;
2718}
2719
2724static pm_call_node_t *
2725pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2726 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2727
2728 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2729 node->arguments = arguments;
2730
2731 node->name = name;
2732 return node;
2733}
2734
2738static pm_call_node_t *
2739pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2740 pm_assert_value_expression(parser, receiver);
2741 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2742
2743 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2744
2745 node->base.location.start = message->start;
2746 if (arguments->closing_loc.start != NULL) {
2747 node->base.location.end = arguments->closing_loc.end;
2748 } else {
2749 assert(receiver != NULL);
2750 node->base.location.end = receiver->location.end;
2751 }
2752
2753 node->receiver = receiver;
2754 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2755 node->opening_loc = arguments->opening_loc;
2756 node->arguments = arguments->arguments;
2757 node->closing_loc = arguments->closing_loc;
2758
2759 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2760 return node;
2761}
2762
2766static pm_call_node_t *
2767pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2768 pm_assert_value_expression(parser, receiver);
2769
2770 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2771
2772 node->base.location.start = receiver->location.start;
2773 node->base.location.end = pm_arguments_end(arguments);
2774
2775 node->receiver = receiver;
2776 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2777 node->opening_loc = arguments->opening_loc;
2778 node->arguments = arguments->arguments;
2779 node->closing_loc = arguments->closing_loc;
2780 node->block = arguments->block;
2781
2782 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2783 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2784 }
2785
2786 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2787 return node;
2788}
2789
2793static pm_call_node_t *
2794pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2795 pm_assert_value_expression(parser, receiver);
2796
2797 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2798
2799 node->base.location.start = operator->start;
2800 node->base.location.end = receiver->location.end;
2801
2802 node->receiver = receiver;
2803 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2804
2805 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2806 return node;
2807}
2808
2813static pm_call_node_t *
2814pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2815 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2816
2817 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2818 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2819
2820 node->name = pm_parser_constant_id_token(parser, message);
2821 return node;
2822}
2823
2828static inline bool
2829pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2830 return (
2831 (node->message_loc.start != NULL) &&
2832 (node->message_loc.end[-1] != '!') &&
2833 (node->message_loc.end[-1] != '?') &&
2834 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2835 (node->opening_loc.start == NULL) &&
2836 (node->arguments == NULL) &&
2837 (node->block == NULL)
2838 );
2839}
2840
2844static void
2845pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2846 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2847
2848 if (write_constant->length > 0) {
2849 size_t length = write_constant->length - 1;
2850
2851 void *memory = xmalloc(length);
2852 memcpy(memory, write_constant->start, length);
2853
2854 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2855 } else {
2856 // We can get here if the message was missing because of a syntax error.
2857 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2858 }
2859}
2860
2864static pm_call_and_write_node_t *
2865pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2866 assert(target->block == NULL);
2867 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2868 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2869
2870 *node = (pm_call_and_write_node_t) {
2871 .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value),
2872 .receiver = target->receiver,
2873 .call_operator_loc = target->call_operator_loc,
2874 .message_loc = target->message_loc,
2875 .read_name = 0,
2876 .write_name = target->name,
2877 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2878 .value = value
2879 };
2880
2881 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2882
2883 // Here we're going to free the target, since it is no longer necessary.
2884 // However, we don't want to call `pm_node_destroy` because we want to keep
2885 // around all of its children since we just reused them.
2886 xfree(target);
2887
2888 return node;
2889}
2890
2895static void
2896pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2897 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2898 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2899 pm_node_t *node;
2900 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2901 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2902 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2903 break;
2904 }
2905 }
2906 }
2907
2908 if (block != NULL) {
2909 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2910 }
2911 }
2912}
2913
2917static pm_index_and_write_node_t *
2918pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2919 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2920 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2921
2922 pm_index_arguments_check(parser, target->arguments, target->block);
2923
2924 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2925 *node = (pm_index_and_write_node_t) {
2926 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value),
2927 .receiver = target->receiver,
2928 .call_operator_loc = target->call_operator_loc,
2929 .opening_loc = target->opening_loc,
2930 .arguments = target->arguments,
2931 .closing_loc = target->closing_loc,
2932 .block = (pm_block_argument_node_t *) target->block,
2933 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2934 .value = value
2935 };
2936
2937 // Here we're going to free the target, since it is no longer necessary.
2938 // However, we don't want to call `pm_node_destroy` because we want to keep
2939 // around all of its children since we just reused them.
2940 xfree(target);
2941
2942 return node;
2943}
2944
2948static pm_call_operator_write_node_t *
2949pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2950 assert(target->block == NULL);
2951 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
2952
2953 *node = (pm_call_operator_write_node_t) {
2954 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value),
2955 .receiver = target->receiver,
2956 .call_operator_loc = target->call_operator_loc,
2957 .message_loc = target->message_loc,
2958 .read_name = 0,
2959 .write_name = target->name,
2960 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2961 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2962 .value = value
2963 };
2964
2965 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2966
2967 // Here we're going to free the target, since it is no longer necessary.
2968 // However, we don't want to call `pm_node_destroy` because we want to keep
2969 // around all of its children since we just reused them.
2970 xfree(target);
2971
2972 return node;
2973}
2974
2978static pm_index_operator_write_node_t *
2979pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2980 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
2981
2982 pm_index_arguments_check(parser, target->arguments, target->block);
2983
2984 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2985 *node = (pm_index_operator_write_node_t) {
2986 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value),
2987 .receiver = target->receiver,
2988 .call_operator_loc = target->call_operator_loc,
2989 .opening_loc = target->opening_loc,
2990 .arguments = target->arguments,
2991 .closing_loc = target->closing_loc,
2992 .block = (pm_block_argument_node_t *) target->block,
2993 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2994 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2995 .value = value
2996 };
2997
2998 // Here we're going to free the target, since it is no longer necessary.
2999 // However, we don't want to call `pm_node_destroy` because we want to keep
3000 // around all of its children since we just reused them.
3001 xfree(target);
3002
3003 return node;
3004}
3005
3009static pm_call_or_write_node_t *
3010pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3011 assert(target->block == NULL);
3012 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3013 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3014
3015 *node = (pm_call_or_write_node_t) {
3016 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value),
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .message_loc = target->message_loc,
3020 .read_name = 0,
3021 .write_name = target->name,
3022 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3023 .value = value
3024 };
3025
3026 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3027
3028 // Here we're going to free the target, since it is no longer necessary.
3029 // However, we don't want to call `pm_node_destroy` because we want to keep
3030 // around all of its children since we just reused them.
3031 xfree(target);
3032
3033 return node;
3034}
3035
3039static pm_index_or_write_node_t *
3040pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3041 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3042 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3043
3044 pm_index_arguments_check(parser, target->arguments, target->block);
3045
3046 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3047 *node = (pm_index_or_write_node_t) {
3048 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value),
3049 .receiver = target->receiver,
3050 .call_operator_loc = target->call_operator_loc,
3051 .opening_loc = target->opening_loc,
3052 .arguments = target->arguments,
3053 .closing_loc = target->closing_loc,
3054 .block = (pm_block_argument_node_t *) target->block,
3055 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3056 .value = value
3057 };
3058
3059 // Here we're going to free the target, since it is no longer necessary.
3060 // However, we don't want to call `pm_node_destroy` because we want to keep
3061 // around all of its children since we just reused them.
3062 xfree(target);
3063
3064 return node;
3065}
3066
3071static pm_call_target_node_t *
3072pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3073 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3074
3075 *node = (pm_call_target_node_t) {
3076 .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target),
3077 .receiver = target->receiver,
3078 .call_operator_loc = target->call_operator_loc,
3079 .name = target->name,
3080 .message_loc = target->message_loc
3081 };
3082
3083 /* It is possible to get here where we have parsed an invalid syntax tree
3084 * where the call operator was not present. In that case we will have a
3085 * problem because it is a required location. In this case we need to fill
3086 * it in with a fake location so that the syntax tree remains valid. */
3087 if (node->call_operator_loc.start == NULL) {
3088 node->call_operator_loc = (pm_location_t) {
3089 .start = target->base.location.start,
3090 .end = target->base.location.start
3091 };
3092 }
3093
3094 // Here we're going to free the target, since it is no longer necessary.
3095 // However, we don't want to call `pm_node_destroy` because we want to keep
3096 // around all of its children since we just reused them.
3097 xfree(target);
3098
3099 return node;
3100}
3101
3106static pm_index_target_node_t *
3107pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3108 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3109
3110 pm_index_arguments_check(parser, target->arguments, target->block);
3111 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3112
3113 *node = (pm_index_target_node_t) {
3114 .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target),
3115 .receiver = target->receiver,
3116 .opening_loc = target->opening_loc,
3117 .arguments = target->arguments,
3118 .closing_loc = target->closing_loc,
3119 .block = (pm_block_argument_node_t *) target->block,
3120 };
3121
3122 // Here we're going to free the target, since it is no longer necessary.
3123 // However, we don't want to call `pm_node_destroy` because we want to keep
3124 // around all of its children since we just reused them.
3125 xfree(target);
3126
3127 return node;
3128}
3129
3133static pm_capture_pattern_node_t *
3134pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3135 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3136
3137 *node = (pm_capture_pattern_node_t) {
3138 .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target),
3139 .value = value,
3140 .target = target,
3141 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3142 };
3143
3144 return node;
3145}
3146
3150static pm_case_node_t *
3151pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3152 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3153
3154 *node = (pm_case_node_t) {
3155 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword),
3156 .predicate = predicate,
3157 .else_clause = NULL,
3158 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3159 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3160 .conditions = { 0 }
3161 };
3162
3163 return node;
3164}
3165
3169static void
3170pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3171 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3172
3173 pm_node_list_append(&node->conditions, condition);
3174 node->base.location.end = condition->location.end;
3175}
3176
3180static void
3181pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3182 node->else_clause = else_clause;
3183 node->base.location.end = else_clause->base.location.end;
3184}
3185
3189static void
3190pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3191 node->base.location.end = end_keyword->end;
3192 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3193}
3194
3198static pm_case_match_node_t *
3199pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3200 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3201
3202 *node = (pm_case_match_node_t) {
3203 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword),
3204 .predicate = predicate,
3205 .else_clause = NULL,
3206 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3207 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3208 .conditions = { 0 }
3209 };
3210
3211 return node;
3212}
3213
3217static void
3218pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3219 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3220
3221 pm_node_list_append(&node->conditions, condition);
3222 node->base.location.end = condition->location.end;
3223}
3224
3228static void
3229pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3230 node->else_clause = else_clause;
3231 node->base.location.end = else_clause->base.location.end;
3232}
3233
3237static void
3238pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3239 node->base.location.end = end_keyword->end;
3240 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3241}
3242
3246static pm_class_node_t *
3247pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3248 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3249
3250 *node = (pm_class_node_t) {
3251 .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword),
3252 .locals = *locals,
3253 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3254 .constant_path = constant_path,
3255 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3256 .superclass = superclass,
3257 .body = body,
3258 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3259 .name = pm_parser_constant_id_token(parser, name)
3260 };
3261
3262 return node;
3263}
3264
3268static pm_class_variable_and_write_node_t *
3269pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3270 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3271 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3272
3273 *node = (pm_class_variable_and_write_node_t) {
3274 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value),
3275 .name = target->name,
3276 .name_loc = target->base.location,
3277 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3278 .value = value
3279 };
3280
3281 return node;
3282}
3283
3287static pm_class_variable_operator_write_node_t *
3288pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3289 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3290
3291 *node = (pm_class_variable_operator_write_node_t) {
3292 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
3293 .name = target->name,
3294 .name_loc = target->base.location,
3295 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3296 .value = value,
3297 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3298 };
3299
3300 return node;
3301}
3302
3306static pm_class_variable_or_write_node_t *
3307pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3308 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3309 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3310
3311 *node = (pm_class_variable_or_write_node_t) {
3312 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value),
3313 .name = target->name,
3314 .name_loc = target->base.location,
3315 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3316 .value = value
3317 };
3318
3319 return node;
3320}
3321
3325static pm_class_variable_read_node_t *
3326pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3327 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3328 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3329
3330 *node = (pm_class_variable_read_node_t) {
3331 .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token),
3332 .name = pm_parser_constant_id_token(parser, token)
3333 };
3334
3335 return node;
3336}
3337
3344static inline pm_node_flags_t
3345pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3346 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3347 return flags;
3348 }
3349 return 0;
3350}
3351
3355static pm_class_variable_write_node_t *
3356pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3357 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3358 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3359
3360 *node = (pm_class_variable_write_node_t) {
3361 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value),
3362 .name = read_node->name,
3363 .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)),
3364 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3365 .value = value
3366 };
3367
3368 return node;
3369}
3370
3374static pm_constant_path_and_write_node_t *
3375pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3376 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3377 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3378
3379 *node = (pm_constant_path_and_write_node_t) {
3380 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value),
3381 .target = target,
3382 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3383 .value = value
3384 };
3385
3386 return node;
3387}
3388
3392static pm_constant_path_operator_write_node_t *
3393pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3394 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3395
3396 *node = (pm_constant_path_operator_write_node_t) {
3397 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value),
3398 .target = target,
3399 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3400 .value = value,
3401 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3402 };
3403
3404 return node;
3405}
3406
3410static pm_constant_path_or_write_node_t *
3411pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3412 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3413 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3414
3415 *node = (pm_constant_path_or_write_node_t) {
3416 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value),
3417 .target = target,
3418 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3419 .value = value
3420 };
3421
3422 return node;
3423}
3424
3428static pm_constant_path_node_t *
3429pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3430 pm_assert_value_expression(parser, parent);
3431 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3432
3433 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3434 if (name_token->type == PM_TOKEN_CONSTANT) {
3435 name = pm_parser_constant_id_token(parser, name_token);
3436 }
3437
3438 if (parent == NULL) {
3439 *node = (pm_constant_path_node_t) {
3440 .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token),
3441 .parent = parent,
3442 .name = name,
3443 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3444 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3445 };
3446 } else {
3447 *node = (pm_constant_path_node_t) {
3448 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token),
3449 .parent = parent,
3450 .name = name,
3451 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3452 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3453 };
3454 }
3455
3456 return node;
3457}
3458
3462static pm_constant_path_write_node_t *
3463pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3464 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3465 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3466
3467 *node = (pm_constant_path_write_node_t) {
3468 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value),
3469 .target = target,
3470 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3471 .value = value
3472 };
3473
3474 return node;
3475}
3476
3480static pm_constant_and_write_node_t *
3481pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3482 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3483 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3484
3485 *node = (pm_constant_and_write_node_t) {
3486 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value),
3487 .name = target->name,
3488 .name_loc = target->base.location,
3489 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3490 .value = value
3491 };
3492
3493 return node;
3494}
3495
3499static pm_constant_operator_write_node_t *
3500pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3501 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3502
3503 *node = (pm_constant_operator_write_node_t) {
3504 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value),
3505 .name = target->name,
3506 .name_loc = target->base.location,
3507 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3508 .value = value,
3509 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3510 };
3511
3512 return node;
3513}
3514
3518static pm_constant_or_write_node_t *
3519pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3520 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3521 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3522
3523 *node = (pm_constant_or_write_node_t) {
3524 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value),
3525 .name = target->name,
3526 .name_loc = target->base.location,
3527 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3528 .value = value
3529 };
3530
3531 return node;
3532}
3533
3537static pm_constant_read_node_t *
3538pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3539 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3540 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3541
3542 *node = (pm_constant_read_node_t) {
3543 .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name),
3544 .name = pm_parser_constant_id_token(parser, name)
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_write_node_t *
3554pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3556 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3557
3558 *node = (pm_constant_write_node_t) {
3559 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value),
3560 .name = target->name,
3561 .name_loc = target->base.location,
3562 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3563 .value = value
3564 };
3565
3566 return node;
3567}
3568
3572static void
3573pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3574 switch (PM_NODE_TYPE(node)) {
3575 case PM_BEGIN_NODE: {
3576 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3577 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3578 break;
3579 }
3580 case PM_PARENTHESES_NODE: {
3581 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3582 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3583 break;
3584 }
3585 case PM_STATEMENTS_NODE: {
3586 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3587 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3588 break;
3589 }
3590 case PM_ARRAY_NODE:
3591 case PM_FLOAT_NODE:
3592 case PM_IMAGINARY_NODE:
3593 case PM_INTEGER_NODE:
3594 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3595 case PM_INTERPOLATED_STRING_NODE:
3596 case PM_INTERPOLATED_SYMBOL_NODE:
3597 case PM_INTERPOLATED_X_STRING_NODE:
3598 case PM_RATIONAL_NODE:
3599 case PM_REGULAR_EXPRESSION_NODE:
3600 case PM_SOURCE_ENCODING_NODE:
3601 case PM_SOURCE_FILE_NODE:
3602 case PM_SOURCE_LINE_NODE:
3603 case PM_STRING_NODE:
3604 case PM_SYMBOL_NODE:
3605 case PM_X_STRING_NODE:
3606 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3607 break;
3608 default:
3609 break;
3610 }
3611}
3612
3616static pm_def_node_t *
3617pm_def_node_create(
3618 pm_parser_t *parser,
3619 pm_constant_id_t name,
3620 const pm_token_t *name_loc,
3621 pm_node_t *receiver,
3622 pm_parameters_node_t *parameters,
3623 pm_node_t *body,
3624 pm_constant_id_list_t *locals,
3625 const pm_token_t *def_keyword,
3626 const pm_token_t *operator,
3627 const pm_token_t *lparen,
3628 const pm_token_t *rparen,
3629 const pm_token_t *equal,
3630 const pm_token_t *end_keyword
3631) {
3632 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3633
3634 if (receiver != NULL) {
3635 pm_def_node_receiver_check(parser, receiver);
3636 }
3637
3638 *node = (pm_def_node_t) {
3639 .base = (
3640 (end_keyword->type == PM_TOKEN_NOT_PROVIDED)
3641 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body)
3642 : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword)
3643 ),
3644 .name = name,
3645 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3646 .receiver = receiver,
3647 .parameters = parameters,
3648 .body = body,
3649 .locals = *locals,
3650 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3651 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3652 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3653 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3654 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3655 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3656 };
3657
3658 return node;
3659}
3660
3664static pm_defined_node_t *
3665pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3666 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3667
3668 *node = (pm_defined_node_t) {
3669 .base = (
3670 (rparen->type == PM_TOKEN_NOT_PROVIDED)
3671 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value)
3672 : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen)
3673 ),
3674 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3675 .value = value,
3676 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3677 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
3678 };
3679
3680 return node;
3681}
3682
3686static pm_else_node_t *
3687pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3688 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3689
3690 *node = (pm_else_node_t) {
3691 .base = (
3692 ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL))
3693 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements)
3694 : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword)
3695 ),
3696 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3697 .statements = statements,
3698 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3699 };
3700
3701 return node;
3702}
3703
3707static pm_embedded_statements_node_t *
3708pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3709 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3710
3711 *node = (pm_embedded_statements_node_t) {
3712 .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing),
3713 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3714 .statements = statements,
3715 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3716 };
3717
3718 return node;
3719}
3720
3724static pm_embedded_variable_node_t *
3725pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3726 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3727
3728 *node = (pm_embedded_variable_node_t) {
3729 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable),
3730 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3731 .variable = variable
3732 };
3733
3734 return node;
3735}
3736
3740static pm_ensure_node_t *
3741pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3742 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
3743
3744 *node = (pm_ensure_node_t) {
3745 .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword),
3746 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
3747 .statements = statements,
3748 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
3749 };
3750
3751 return node;
3752}
3753
3757static pm_false_node_t *
3758pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3759 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3760 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
3761
3762 *node = (pm_false_node_t) {
3763 .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
3764 };
3765
3766 return node;
3767}
3768
3773static pm_find_pattern_node_t *
3774pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3775 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
3776
3777 pm_node_t *left = nodes->nodes[0];
3778 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3779 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3780
3781 pm_node_t *right;
3782
3783 if (nodes->size == 1) {
3784 right = UP(pm_missing_node_create(parser, left->location.end, left->location.end));
3785 } else {
3786 right = nodes->nodes[nodes->size - 1];
3787 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3788 }
3789
3790#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3791 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3792 // The resulting AST will anyway be ignored, but this file still needs to compile.
3793 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3794#else
3795 pm_node_t *right_splat_node = right;
3796#endif
3797 *node = (pm_find_pattern_node_t) {
3798 .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right),
3799 .constant = NULL,
3800 .left = left_splat_node,
3801 .right = right_splat_node,
3802 .requireds = { 0 },
3803 .opening_loc = { 0 },
3804 .closing_loc = { 0 }
3805 };
3806
3807 // For now we're going to just copy over each pointer manually. This could be
3808 // much more efficient, as we could instead resize the node list to only point
3809 // to 1...-1.
3810 for (size_t index = 1; index < nodes->size - 1; index++) {
3811 pm_node_list_append(&node->requireds, nodes->nodes[index]);
3812 }
3813
3814 return node;
3815}
3816
3821static double
3822pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3823 ptrdiff_t diff = token->end - token->start;
3824 if (diff <= 0) return 0.0;
3825
3826 // First, get a buffer of the content.
3827 size_t length = (size_t) diff;
3828 char *buffer = xmalloc(sizeof(char) * (length + 1));
3829 memcpy((void *) buffer, token->start, length);
3830
3831 // Next, determine if we need to replace the decimal point because of
3832 // locale-specific options, and then normalize them if we have to.
3833 char decimal_point = *localeconv()->decimal_point;
3834 if (decimal_point != '.') {
3835 for (size_t index = 0; index < length; index++) {
3836 if (buffer[index] == '.') buffer[index] = decimal_point;
3837 }
3838 }
3839
3840 // Next, handle underscores by removing them from the buffer.
3841 for (size_t index = 0; index < length; index++) {
3842 if (buffer[index] == '_') {
3843 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3844 length--;
3845 }
3846 }
3847
3848 // Null-terminate the buffer so that strtod cannot read off the end.
3849 buffer[length] = '\0';
3850
3851 // Now, call strtod to parse the value. Note that CRuby has their own
3852 // version of strtod which avoids locales. We're okay using the locale-aware
3853 // version because we've already validated through the parser that the token
3854 // is in a valid format.
3855 errno = 0;
3856 char *eptr;
3857 double value = strtod(buffer, &eptr);
3858
3859 // This should never happen, because we've already checked that the token
3860 // is in a valid format. However it's good to be safe.
3861 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3862 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
3863 xfree((void *) buffer);
3864 return 0.0;
3865 }
3866
3867 // If errno is set, then it should only be ERANGE. At this point we need to
3868 // check if it's infinity (it should be).
3869 if (errno == ERANGE && PRISM_ISINF(value)) {
3870 int warn_width;
3871 const char *ellipsis;
3872
3873 if (length > 20) {
3874 warn_width = 20;
3875 ellipsis = "...";
3876 } else {
3877 warn_width = (int) length;
3878 ellipsis = "";
3879 }
3880
3881 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3882 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3883 }
3884
3885 // Finally we can free the buffer and return the value.
3886 xfree((void *) buffer);
3887 return value;
3888}
3889
3893static pm_float_node_t *
3894pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3895 assert(token->type == PM_TOKEN_FLOAT);
3896 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
3897
3898 *node = (pm_float_node_t) {
3899 .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3900 .value = pm_double_parse(parser, token)
3901 };
3902
3903 return node;
3904}
3905
3909static pm_imaginary_node_t *
3910pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3911 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3912
3913 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3914 *node = (pm_imaginary_node_t) {
3915 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3916 .numeric = UP(pm_float_node_create(parser, &((pm_token_t) {
3917 .type = PM_TOKEN_FLOAT,
3918 .start = token->start,
3919 .end = token->end - 1
3920 })))
3921 };
3922
3923 return node;
3924}
3925
3929static pm_rational_node_t *
3930pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3931 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3932
3933 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
3934 *node = (pm_rational_node_t) {
3935 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token),
3936 .numerator = { 0 },
3937 .denominator = { 0 }
3938 };
3939
3940 const uint8_t *start = token->start;
3941 const uint8_t *end = token->end - 1; // r
3942
3943 while (start < end && *start == '0') start++; // 0.1 -> .1
3944 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3945
3946 size_t length = (size_t) (end - start);
3947 if (length == 1) {
3948 node->denominator.value = 1;
3949 return node;
3950 }
3951
3952 const uint8_t *point = memchr(start, '.', length);
3953 assert(point && "should have a decimal point");
3954
3955 uint8_t *digits = xmalloc(length);
3956 if (digits == NULL) {
3957 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
3958 abort();
3959 }
3960
3961 memcpy(digits, start, (unsigned long) (point - start));
3962 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
3963 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
3964
3965 size_t fract_length = 0;
3966 for (const uint8_t *fract = point; fract < end; ++fract) {
3967 if (*fract != '_') ++fract_length;
3968 }
3969 digits[0] = '1';
3970 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
3971 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
3972 xfree(digits);
3973
3974 pm_integers_reduce(&node->numerator, &node->denominator);
3975 return node;
3976}
3977
3982static pm_imaginary_node_t *
3983pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3984 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
3985
3986 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3987 *node = (pm_imaginary_node_t) {
3988 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3989 .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) {
3990 .type = PM_TOKEN_FLOAT_RATIONAL,
3991 .start = token->start,
3992 .end = token->end - 1
3993 })))
3994 };
3995
3996 return node;
3997}
3998
4002static pm_for_node_t *
4003pm_for_node_create(
4004 pm_parser_t *parser,
4005 pm_node_t *index,
4006 pm_node_t *collection,
4007 pm_statements_node_t *statements,
4008 const pm_token_t *for_keyword,
4009 const pm_token_t *in_keyword,
4010 const pm_token_t *do_keyword,
4011 const pm_token_t *end_keyword
4012) {
4013 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4014
4015 *node = (pm_for_node_t) {
4016 .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword),
4017 .index = index,
4018 .collection = collection,
4019 .statements = statements,
4020 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4021 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4022 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4023 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4024 };
4025
4026 return node;
4027}
4028
4032static pm_forwarding_arguments_node_t *
4033pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4034 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4035 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4036
4037 *node = (pm_forwarding_arguments_node_t) {
4038 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token)
4039 };
4040
4041 return node;
4042}
4043
4047static pm_forwarding_parameter_node_t *
4048pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4049 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4050 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4051
4052 *node = (pm_forwarding_parameter_node_t) {
4053 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token)
4054 };
4055
4056 return node;
4057}
4058
4062static pm_forwarding_super_node_t *
4063pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4064 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4065 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4066 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4067
4068 pm_block_node_t *block = NULL;
4069 if (arguments->block != NULL) {
4070 block = (pm_block_node_t *) arguments->block;
4071 }
4072
4073 *node = (pm_forwarding_super_node_t) {
4074 .base = (
4075 (block == NULL)
4076 ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token)
4077 : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block)
4078 ),
4079 .block = block
4080 };
4081
4082 return node;
4083}
4084
4089static pm_hash_pattern_node_t *
4090pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4091 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4092
4093 *node = (pm_hash_pattern_node_t) {
4094 .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing),
4095 .constant = NULL,
4096 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4097 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4098 .elements = { 0 },
4099 .rest = NULL
4100 };
4101
4102 return node;
4103}
4104
4108static pm_hash_pattern_node_t *
4109pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4110 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4111
4112 const uint8_t *start;
4113 const uint8_t *end;
4114
4115 if (elements->size > 0) {
4116 if (rest) {
4117 start = MIN(rest->location.start, elements->nodes[0]->location.start);
4118 end = MAX(rest->location.end, elements->nodes[elements->size - 1]->location.end);
4119 } else {
4120 start = elements->nodes[0]->location.start;
4121 end = elements->nodes[elements->size - 1]->location.end;
4122 }
4123 } else {
4124 assert(rest != NULL);
4125 start = rest->location.start;
4126 end = rest->location.end;
4127 }
4128
4129 *node = (pm_hash_pattern_node_t) {
4130 .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end),
4131 .constant = NULL,
4132 .elements = { 0 },
4133 .rest = rest,
4134 .opening_loc = { 0 },
4135 .closing_loc = { 0 }
4136 };
4137
4138 pm_node_list_concat(&node->elements, elements);
4139 return node;
4140}
4141
4145static pm_constant_id_t
4146pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4147 switch (PM_NODE_TYPE(target)) {
4148 case PM_GLOBAL_VARIABLE_READ_NODE:
4149 return ((pm_global_variable_read_node_t *) target)->name;
4150 case PM_BACK_REFERENCE_READ_NODE:
4151 return ((pm_back_reference_read_node_t *) target)->name;
4152 case PM_NUMBERED_REFERENCE_READ_NODE:
4153 // This will only ever happen in the event of a syntax error, but we
4154 // still need to provide something for the node.
4155 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4156 default:
4157 assert(false && "unreachable");
4158 return (pm_constant_id_t) -1;
4159 }
4160}
4161
4165static pm_global_variable_and_write_node_t *
4166pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4167 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4168 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4169
4170 *node = (pm_global_variable_and_write_node_t) {
4171 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
4172 .name = pm_global_variable_write_name(parser, target),
4173 .name_loc = target->location,
4174 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4175 .value = value
4176 };
4177
4178 return node;
4179}
4180
4184static pm_global_variable_operator_write_node_t *
4185pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4186 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4187
4188 *node = (pm_global_variable_operator_write_node_t) {
4189 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4190 .name = pm_global_variable_write_name(parser, target),
4191 .name_loc = target->location,
4192 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4193 .value = value,
4194 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4195 };
4196
4197 return node;
4198}
4199
4203static pm_global_variable_or_write_node_t *
4204pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4205 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4206 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4207
4208 *node = (pm_global_variable_or_write_node_t) {
4209 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
4210 .name = pm_global_variable_write_name(parser, target),
4211 .name_loc = target->location,
4212 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4213 .value = value
4214 };
4215
4216 return node;
4217}
4218
4222static pm_global_variable_read_node_t *
4223pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4224 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4225
4226 *node = (pm_global_variable_read_node_t) {
4227 .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name),
4228 .name = pm_parser_constant_id_token(parser, name)
4229 };
4230
4231 return node;
4232}
4233
4237static pm_global_variable_read_node_t *
4238pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4239 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4240
4241 *node = (pm_global_variable_read_node_t) {
4242 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0),
4243 .name = name
4244 };
4245
4246 return node;
4247}
4248
4252static pm_global_variable_write_node_t *
4253pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4254 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4255 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4256
4257 *node = (pm_global_variable_write_node_t) {
4258 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value),
4259 .name = pm_global_variable_write_name(parser, target),
4260 .name_loc = PM_LOCATION_NODE_VALUE(target),
4261 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4262 .value = value
4263 };
4264
4265 return node;
4266}
4267
4271static pm_global_variable_write_node_t *
4272pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4273 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4274
4275 *node = (pm_global_variable_write_node_t) {
4276 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0),
4277 .name = name,
4278 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4279 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4280 .value = value
4281 };
4282
4283 return node;
4284}
4285
4289static pm_hash_node_t *
4290pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4291 assert(opening != NULL);
4292 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4293
4294 *node = (pm_hash_node_t) {
4295 .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4296 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4297 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4298 .elements = { 0 }
4299 };
4300
4301 return node;
4302}
4303
4307static inline void
4308pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4309 pm_node_list_append(&hash->elements, element);
4310
4311 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4312 if (static_literal) {
4313 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4314 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4315 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4316 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4317 }
4318
4319 if (!static_literal) {
4320 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4321 }
4322}
4323
4324static inline void
4325pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4326 hash->base.location.end = token->end;
4327 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4328}
4329
4333static pm_if_node_t *
4334pm_if_node_create(pm_parser_t *parser,
4335 const pm_token_t *if_keyword,
4336 pm_node_t *predicate,
4337 const pm_token_t *then_keyword,
4338 pm_statements_node_t *statements,
4339 pm_node_t *subsequent,
4340 const pm_token_t *end_keyword
4341) {
4342 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4343 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4344
4345 const uint8_t *end;
4346 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4347 end = end_keyword->end;
4348 } else if (subsequent != NULL) {
4349 end = subsequent->location.end;
4350 } else if (pm_statements_node_body_length(statements) != 0) {
4351 end = statements->base.location.end;
4352 } else {
4353 end = predicate->location.end;
4354 }
4355
4356 *node = (pm_if_node_t) {
4357 .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end),
4358 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4359 .predicate = predicate,
4360 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4361 .statements = statements,
4362 .subsequent = subsequent,
4363 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4364 };
4365
4366 return node;
4367}
4368
4372static pm_if_node_t *
4373pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4374 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4375 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4376
4377 pm_statements_node_t *statements = pm_statements_node_create(parser);
4378 pm_statements_node_body_append(parser, statements, statement, true);
4379
4380 *node = (pm_if_node_t) {
4381 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
4382 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4383 .predicate = predicate,
4384 .then_keyword_loc = { 0 },
4385 .statements = statements,
4386 .subsequent = NULL,
4387 .end_keyword_loc = { 0 }
4388 };
4389
4390 return node;
4391}
4392
4396static pm_if_node_t *
4397pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4398 pm_assert_value_expression(parser, predicate);
4399 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4400
4401 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4402 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4403
4404 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4405 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4406
4407 pm_token_t end_keyword = not_provided(parser);
4408 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4409
4410 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4411
4412 *node = (pm_if_node_t) {
4413 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression),
4414 .if_keyword_loc = { 0 },
4415 .predicate = predicate,
4416 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4417 .statements = if_statements,
4418 .subsequent = UP(else_node),
4419 .end_keyword_loc = { 0 }
4420 };
4421
4422 return node;
4423
4424}
4425
4426static inline void
4427pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4428 node->base.location.end = keyword->end;
4429 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4430}
4431
4432static inline void
4433pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4434 node->base.location.end = keyword->end;
4435 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4436}
4437
4441static pm_implicit_node_t *
4442pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4443 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4444
4445 *node = (pm_implicit_node_t) {
4446 .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value),
4447 .value = value
4448 };
4449
4450 return node;
4451}
4452
4456static pm_implicit_rest_node_t *
4457pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4458 assert(token->type == PM_TOKEN_COMMA);
4459
4460 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4461
4462 *node = (pm_implicit_rest_node_t) {
4463 .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token)
4464 };
4465
4466 return node;
4467}
4468
4472static pm_integer_node_t *
4473pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4474 assert(token->type == PM_TOKEN_INTEGER);
4475 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4476
4477 *node = (pm_integer_node_t) {
4478 .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4479 .value = { 0 }
4480 };
4481
4482 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4483 switch (base) {
4484 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4485 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4486 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4487 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4488 default: assert(false && "unreachable"); break;
4489 }
4490
4491 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4492 return node;
4493}
4494
4499static pm_imaginary_node_t *
4500pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4501 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4502
4503 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4504 *node = (pm_imaginary_node_t) {
4505 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4506 .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4507 .type = PM_TOKEN_INTEGER,
4508 .start = token->start,
4509 .end = token->end - 1
4510 })))
4511 };
4512
4513 return node;
4514}
4515
4520static pm_rational_node_t *
4521pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4522 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4523
4524 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4525 *node = (pm_rational_node_t) {
4526 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4527 .numerator = { 0 },
4528 .denominator = { .value = 1, 0 }
4529 };
4530
4531 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4532 switch (base) {
4533 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4534 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4535 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4536 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4537 default: assert(false && "unreachable"); break;
4538 }
4539
4540 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4541
4542 return node;
4543}
4544
4549static pm_imaginary_node_t *
4550pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4551 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4552
4553 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4554 *node = (pm_imaginary_node_t) {
4555 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4556 .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4557 .type = PM_TOKEN_INTEGER_RATIONAL,
4558 .start = token->start,
4559 .end = token->end - 1
4560 })))
4561 };
4562
4563 return node;
4564}
4565
4569static pm_in_node_t *
4570pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4571 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4572
4573 const uint8_t *end;
4574 if (statements != NULL) {
4575 end = statements->base.location.end;
4576 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4577 end = then_keyword->end;
4578 } else {
4579 end = pattern->location.end;
4580 }
4581
4582 *node = (pm_in_node_t) {
4583 .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end),
4584 .pattern = pattern,
4585 .statements = statements,
4586 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4587 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
4588 };
4589
4590 return node;
4591}
4592
4596static pm_instance_variable_and_write_node_t *
4597pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4598 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4599 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
4600
4601 *node = (pm_instance_variable_and_write_node_t) {
4602 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value),
4603 .name = target->name,
4604 .name_loc = target->base.location,
4605 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4606 .value = value
4607 };
4608
4609 return node;
4610}
4611
4615static pm_instance_variable_operator_write_node_t *
4616pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4617 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
4618
4619 *node = (pm_instance_variable_operator_write_node_t) {
4620 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4621 .name = target->name,
4622 .name_loc = target->base.location,
4623 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4624 .value = value,
4625 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4626 };
4627
4628 return node;
4629}
4630
4634static pm_instance_variable_or_write_node_t *
4635pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4636 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4637 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
4638
4639 *node = (pm_instance_variable_or_write_node_t) {
4640 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value),
4641 .name = target->name,
4642 .name_loc = target->base.location,
4643 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4644 .value = value
4645 };
4646
4647 return node;
4648}
4649
4653static pm_instance_variable_read_node_t *
4654pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4655 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4656 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
4657
4658 *node = (pm_instance_variable_read_node_t) {
4659 .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token),
4660 .name = pm_parser_constant_id_token(parser, token)
4661 };
4662
4663 return node;
4664}
4665
4670static pm_instance_variable_write_node_t *
4671pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4672 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
4673 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4674
4675 *node = (pm_instance_variable_write_node_t) {
4676 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value),
4677 .name = read_node->name,
4678 .name_loc = PM_LOCATION_NODE_VALUE(read_node),
4679 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4680 .value = value
4681 };
4682
4683 return node;
4684}
4685
4691static void
4692pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4693 switch (PM_NODE_TYPE(part)) {
4694 case PM_STRING_NODE:
4695 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4696 break;
4697 case PM_EMBEDDED_STATEMENTS_NODE: {
4698 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4699 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4700
4701 if (embedded == NULL) {
4702 // If there are no statements or more than one statement, then
4703 // we lose the static literal flag.
4704 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4705 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4706 // If the embedded statement is a string, then we can keep the
4707 // static literal flag and mark the string as frozen.
4708 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4709 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4710 // If the embedded statement is an interpolated string and it's
4711 // a static literal, then we can keep the static literal flag.
4712 } else {
4713 // Otherwise we lose the static literal flag.
4714 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4715 }
4716
4717 break;
4718 }
4719 case PM_EMBEDDED_VARIABLE_NODE:
4720 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4721 break;
4722 default:
4723 assert(false && "unexpected node type");
4724 break;
4725 }
4726
4727 pm_node_list_append(parts, part);
4728}
4729
4733static pm_interpolated_regular_expression_node_t *
4734pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4735 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
4736
4737 *node = (pm_interpolated_regular_expression_node_t) {
4738 .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4739 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4740 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
4741 .parts = { 0 }
4742 };
4743
4744 return node;
4745}
4746
4747static inline void
4748pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4749 if (node->base.location.start > part->location.start) {
4750 node->base.location.start = part->location.start;
4751 }
4752 if (node->base.location.end < part->location.end) {
4753 node->base.location.end = part->location.end;
4754 }
4755
4756 pm_interpolated_node_append(UP(node), &node->parts, part);
4757}
4758
4759static inline void
4760pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4761 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4762 node->base.location.end = closing->end;
4763 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4764}
4765
4789static inline void
4790pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
4791#define CLEAR_FLAGS(node) \
4792 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4793
4794#define MUTABLE_FLAGS(node) \
4795 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4796
4797 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4798 node->base.location.start = part->location.start;
4799 }
4800
4801 node->base.location.end = MAX(node->base.location.end, part->location.end);
4802
4803 switch (PM_NODE_TYPE(part)) {
4804 case PM_STRING_NODE:
4805 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4806 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4807 // as long as this interpolation only consists of other string literals.
4808 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4809 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4810 }
4811 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4812 break;
4813 case PM_INTERPOLATED_STRING_NODE:
4814 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4815 // If the string that we're concatenating is a static literal,
4816 // then we can keep the static literal flag for this string.
4817 } else {
4818 // Otherwise, we lose the static literal flag here and we should
4819 // also clear the mutability flags.
4820 CLEAR_FLAGS(node);
4821 }
4822 break;
4823 case PM_EMBEDDED_STATEMENTS_NODE: {
4824 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4825 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4826
4827 if (embedded == NULL) {
4828 // If we're embedding multiple statements or no statements, then
4829 // the string is not longer a static literal.
4830 CLEAR_FLAGS(node);
4831 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4832 // If the embedded statement is a string, then we can make that
4833 // string as frozen and static literal, and not touch the static
4834 // literal status of this string.
4835 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4836
4837 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4838 MUTABLE_FLAGS(node);
4839 }
4840 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4841 // If the embedded statement is an interpolated string, but that
4842 // string is marked as static literal, then we can keep our
4843 // static literal status for this string.
4844 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4845 MUTABLE_FLAGS(node);
4846 }
4847 } else {
4848 // In all other cases, we lose the static literal flag here and
4849 // become mutable.
4850 CLEAR_FLAGS(node);
4851 }
4852
4853 break;
4854 }
4855 case PM_EMBEDDED_VARIABLE_NODE:
4856 // Embedded variables clear static literal, which means we also
4857 // should clear the mutability flags.
4858 CLEAR_FLAGS(node);
4859 break;
4860 case PM_X_STRING_NODE:
4861 case PM_INTERPOLATED_X_STRING_NODE:
4862 case PM_SYMBOL_NODE:
4863 case PM_INTERPOLATED_SYMBOL_NODE:
4864 // These will only happen in error cases. But we want to handle it
4865 // here so that we don't fail the assertion.
4866 CLEAR_FLAGS(node);
4867 break;
4868 default:
4869 assert(false && "unexpected node type");
4870 break;
4871 }
4872
4873 pm_node_list_append(&node->parts, part);
4874
4875#undef CLEAR_FLAGS
4876#undef MUTABLE_FLAGS
4877}
4878
4882static pm_interpolated_string_node_t *
4883pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4884 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
4885 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4886
4887 switch (parser->frozen_string_literal) {
4888 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4889 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4890 break;
4891 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4892 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4893 break;
4894 }
4895
4896 *node = (pm_interpolated_string_node_t) {
4897 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing),
4898 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4899 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4900 .parts = { 0 }
4901 };
4902
4903 if (parts != NULL) {
4904 pm_node_t *part;
4905 PM_NODE_LIST_FOREACH(parts, index, part) {
4906 pm_interpolated_string_node_append(node, part);
4907 }
4908 }
4909
4910 return node;
4911}
4912
4916static void
4917pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4918 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4919 node->base.location.end = closing->end;
4920}
4921
4922static void
4923pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4924 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4925 node->base.location.start = part->location.start;
4926 }
4927
4928 pm_interpolated_node_append(UP(node), &node->parts, part);
4929 node->base.location.end = MAX(node->base.location.end, part->location.end);
4930}
4931
4932static void
4933pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4934 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4935 node->base.location.end = closing->end;
4936}
4937
4941static pm_interpolated_symbol_node_t *
4942pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4943 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
4944
4945 *node = (pm_interpolated_symbol_node_t) {
4946 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
4947 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4948 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4949 .parts = { 0 }
4950 };
4951
4952 if (parts != NULL) {
4953 pm_node_t *part;
4954 PM_NODE_LIST_FOREACH(parts, index, part) {
4955 pm_interpolated_symbol_node_append(node, part);
4956 }
4957 }
4958
4959 return node;
4960}
4961
4965static pm_interpolated_x_string_node_t *
4966pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4967 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
4968
4969 *node = (pm_interpolated_x_string_node_t) {
4970 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing),
4971 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4972 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4973 .parts = { 0 }
4974 };
4975
4976 return node;
4977}
4978
4979static inline void
4980pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
4981 pm_interpolated_node_append(UP(node), &node->parts, part);
4982 node->base.location.end = part->location.end;
4983}
4984
4985static inline void
4986pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
4987 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4988 node->base.location.end = closing->end;
4989}
4990
4994static pm_it_local_variable_read_node_t *
4995pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4996 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
4997
4998 *node = (pm_it_local_variable_read_node_t) {
4999 .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name),
5000 };
5001
5002 return node;
5003}
5004
5008static pm_it_parameters_node_t *
5009pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5010 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5011
5012 *node = (pm_it_parameters_node_t) {
5013 .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing),
5014 };
5015
5016 return node;
5017}
5018
5022static pm_keyword_hash_node_t *
5023pm_keyword_hash_node_create(pm_parser_t *parser) {
5024 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5025
5026 *node = (pm_keyword_hash_node_t) {
5027 .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS),
5028 .elements = { 0 }
5029 };
5030
5031 return node;
5032}
5033
5037static void
5038pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5039 // If the element being added is not an AssocNode or does not have a symbol
5040 // key, then we want to turn the SYMBOL_KEYS flag off.
5041 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5042 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5043 }
5044
5045 pm_node_list_append(&hash->elements, element);
5046 if (hash->base.location.start == NULL) {
5047 hash->base.location.start = element->location.start;
5048 }
5049 hash->base.location.end = element->location.end;
5050}
5051
5055static pm_required_keyword_parameter_node_t *
5056pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5057 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5058
5059 *node = (pm_required_keyword_parameter_node_t) {
5060 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name),
5061 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5062 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5063 };
5064
5065 return node;
5066}
5067
5071static pm_optional_keyword_parameter_node_t *
5072pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5073 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5074
5075 *node = (pm_optional_keyword_parameter_node_t) {
5076 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value),
5077 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5078 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5079 .value = value
5080 };
5081
5082 return node;
5083}
5084
5088static pm_keyword_rest_parameter_node_t *
5089pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5090 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5091
5092 *node = (pm_keyword_rest_parameter_node_t) {
5093 .base = (
5094 (name->type == PM_TOKEN_NOT_PROVIDED)
5095 ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator)
5096 : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name)
5097 ),
5098 .name = pm_parser_optional_constant_id_token(parser, name),
5099 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5100 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5101 };
5102
5103 return node;
5104}
5105
5109static pm_lambda_node_t *
5110pm_lambda_node_create(
5111 pm_parser_t *parser,
5112 pm_constant_id_list_t *locals,
5113 const pm_token_t *operator,
5114 const pm_token_t *opening,
5115 const pm_token_t *closing,
5116 pm_node_t *parameters,
5117 pm_node_t *body
5118) {
5119 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5120
5121 *node = (pm_lambda_node_t) {
5122 .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing),
5123 .locals = *locals,
5124 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5125 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5126 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5127 .parameters = parameters,
5128 .body = body
5129 };
5130
5131 return node;
5132}
5133
5137static pm_local_variable_and_write_node_t *
5138pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5139 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5140 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5141 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5142
5143 *node = (pm_local_variable_and_write_node_t) {
5144 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
5145 .name_loc = target->location,
5146 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5147 .value = value,
5148 .name = name,
5149 .depth = depth
5150 };
5151
5152 return node;
5153}
5154
5158static pm_local_variable_operator_write_node_t *
5159pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5160 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5161
5162 *node = (pm_local_variable_operator_write_node_t) {
5163 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
5164 .name_loc = target->location,
5165 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5166 .value = value,
5167 .name = name,
5168 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5169 .depth = depth
5170 };
5171
5172 return node;
5173}
5174
5178static pm_local_variable_or_write_node_t *
5179pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5180 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5181 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5182 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5183
5184 *node = (pm_local_variable_or_write_node_t) {
5185 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
5186 .name_loc = target->location,
5187 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5188 .value = value,
5189 .name = name,
5190 .depth = depth
5191 };
5192
5193 return node;
5194}
5195
5199static pm_local_variable_read_node_t *
5200pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5201 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5202
5203 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5204
5205 *node = (pm_local_variable_read_node_t) {
5206 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name),
5207 .name = name_id,
5208 .depth = depth
5209 };
5210
5211 return node;
5212}
5213
5217static pm_local_variable_read_node_t *
5218pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5219 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5220 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5221}
5222
5227static pm_local_variable_read_node_t *
5228pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5229 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5230 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5231}
5232
5236static pm_local_variable_write_node_t *
5237pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5238 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5239 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5240
5241 *node = (pm_local_variable_write_node_t) {
5242 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value),
5243 .name = name,
5244 .depth = depth,
5245 .value = value,
5246 .name_loc = *name_loc,
5247 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5248 };
5249
5250 return node;
5251}
5252
5256static inline bool
5257pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5258 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5259}
5260
5265static inline bool
5266pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5267 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5268}
5269
5274static inline void
5275pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5276 if (pm_token_is_numbered_parameter(start, end)) {
5277 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5278 }
5279}
5280
5285static pm_local_variable_target_node_t *
5286pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5287 pm_refute_numbered_parameter(parser, location->start, location->end);
5288 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5289
5290 *node = (pm_local_variable_target_node_t) {
5291 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location),
5292 .name = name,
5293 .depth = depth
5294 };
5295
5296 return node;
5297}
5298
5302static pm_match_predicate_node_t *
5303pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5304 pm_assert_value_expression(parser, value);
5305
5306 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5307
5308 *node = (pm_match_predicate_node_t) {
5309 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern),
5310 .value = value,
5311 .pattern = pattern,
5312 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5313 };
5314
5315 return node;
5316}
5317
5321static pm_match_required_node_t *
5322pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5323 pm_assert_value_expression(parser, value);
5324
5325 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5326
5327 *node = (pm_match_required_node_t) {
5328 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern),
5329 .value = value,
5330 .pattern = pattern,
5331 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5332 };
5333
5334 return node;
5335}
5336
5340static pm_match_write_node_t *
5341pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5342 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5343
5344 *node = (pm_match_write_node_t) {
5345 .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call),
5346 .call = call,
5347 .targets = { 0 }
5348 };
5349
5350 return node;
5351}
5352
5356static pm_module_node_t *
5357pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5358 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5359
5360 *node = (pm_module_node_t) {
5361 .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword),
5362 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5363 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5364 .constant_path = constant_path,
5365 .body = body,
5366 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5367 .name = pm_parser_constant_id_token(parser, name)
5368 };
5369
5370 return node;
5371}
5372
5376static pm_multi_target_node_t *
5377pm_multi_target_node_create(pm_parser_t *parser) {
5378 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5379
5380 *node = (pm_multi_target_node_t) {
5381 .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0),
5382 .lefts = { 0 },
5383 .rest = NULL,
5384 .rights = { 0 },
5385 .lparen_loc = { 0 },
5386 .rparen_loc = { 0 }
5387 };
5388
5389 return node;
5390}
5391
5395static void
5396pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5397 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5398 if (node->rest == NULL) {
5399 node->rest = target;
5400 } else {
5401 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5402 pm_node_list_append(&node->rights, target);
5403 }
5404 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5405 if (node->rest == NULL) {
5406 node->rest = target;
5407 } else {
5408 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5409 pm_node_list_append(&node->rights, target);
5410 }
5411 } else if (node->rest == NULL) {
5412 pm_node_list_append(&node->lefts, target);
5413 } else {
5414 pm_node_list_append(&node->rights, target);
5415 }
5416
5417 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5418 node->base.location.start = target->location.start;
5419 }
5420
5421 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
5422 node->base.location.end = target->location.end;
5423 }
5424}
5425
5429static void
5430pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
5431 node->base.location.start = lparen->start;
5432 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
5433}
5434
5438static void
5439pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
5440 node->base.location.end = rparen->end;
5441 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
5442}
5443
5447static pm_multi_write_node_t *
5448pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5449 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
5450 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5451
5452 *node = (pm_multi_write_node_t) {
5453 .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value),
5454 .lefts = target->lefts,
5455 .rest = target->rest,
5456 .rights = target->rights,
5457 .lparen_loc = target->lparen_loc,
5458 .rparen_loc = target->rparen_loc,
5459 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5460 .value = value
5461 };
5462
5463 // Explicitly do not call pm_node_destroy here because we want to keep
5464 // around all of the information within the MultiWriteNode node.
5465 xfree(target);
5466
5467 return node;
5468}
5469
5473static pm_next_node_t *
5474pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5475 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5476 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
5477
5478 *node = (pm_next_node_t) {
5479 .base = (
5480 (arguments == NULL)
5481 ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword)
5482 : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments)
5483 ),
5484 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5485 .arguments = arguments
5486 };
5487
5488 return node;
5489}
5490
5494static pm_nil_node_t *
5495pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5496 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5497 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
5498
5499 *node = (pm_nil_node_t) {
5500 .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
5501 };
5502
5503 return node;
5504}
5505
5509static pm_no_keywords_parameter_node_t *
5510pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5511 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5512 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5513 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
5514
5515 *node = (pm_no_keywords_parameter_node_t) {
5516 .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword),
5517 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5518 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
5519 };
5520
5521 return node;
5522}
5523
5527static pm_numbered_parameters_node_t *
5528pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
5529 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
5530
5531 *node = (pm_numbered_parameters_node_t) {
5532 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location),
5533 .maximum = maximum
5534 };
5535
5536 return node;
5537}
5538
5543#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5544
5551static uint32_t
5552pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5553 const uint8_t *start = token->start + 1;
5554 const uint8_t *end = token->end;
5555
5556 ptrdiff_t diff = end - start;
5557 assert(diff > 0);
5558#if PTRDIFF_MAX > SIZE_MAX
5559 assert(diff < (ptrdiff_t) SIZE_MAX);
5560#endif
5561 size_t length = (size_t) diff;
5562
5563 char *digits = xcalloc(length + 1, sizeof(char));
5564 memcpy(digits, start, length);
5565 digits[length] = '\0';
5566
5567 char *endptr;
5568 errno = 0;
5569 unsigned long value = strtoul(digits, &endptr, 10);
5570
5571 if ((digits == endptr) || (*endptr != '\0')) {
5572 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
5573 value = 0;
5574 }
5575
5576 xfree(digits);
5577
5578 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5579 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5580 value = 0;
5581 }
5582
5583 return (uint32_t) value;
5584}
5585
5586#undef NTH_REF_MAX
5587
5591static pm_numbered_reference_read_node_t *
5592pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5593 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5594 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
5595
5596 *node = (pm_numbered_reference_read_node_t) {
5597 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name),
5598 .number = pm_numbered_reference_read_node_number(parser, name)
5599 };
5600
5601 return node;
5602}
5603
5607static pm_optional_parameter_node_t *
5608pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5609 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
5610
5611 *node = (pm_optional_parameter_node_t) {
5612 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value),
5613 .name = pm_parser_constant_id_token(parser, name),
5614 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5615 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5616 .value = value
5617 };
5618
5619 return node;
5620}
5621
5625static pm_or_node_t *
5626pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5627 pm_assert_value_expression(parser, left);
5628
5629 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
5630
5631 *node = (pm_or_node_t) {
5632 .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right),
5633 .left = left,
5634 .right = right,
5635 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5636 };
5637
5638 return node;
5639}
5640
5644static pm_parameters_node_t *
5645pm_parameters_node_create(pm_parser_t *parser) {
5646 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
5647
5648 *node = (pm_parameters_node_t) {
5649 .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0),
5650 .rest = NULL,
5651 .keyword_rest = NULL,
5652 .block = NULL,
5653 .requireds = { 0 },
5654 .optionals = { 0 },
5655 .posts = { 0 },
5656 .keywords = { 0 }
5657 };
5658
5659 return node;
5660}
5661
5665static void
5666pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5667 if (params->base.location.start == NULL) {
5668 params->base.location.start = param->location.start;
5669 } else {
5670 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
5671 }
5672
5673 if (params->base.location.end == NULL) {
5674 params->base.location.end = param->location.end;
5675 } else {
5676 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
5677 }
5678}
5679
5683static void
5684pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
5685 pm_parameters_node_location_set(params, param);
5686 pm_node_list_append(&params->requireds, param);
5687}
5688
5692static void
5693pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5694 pm_parameters_node_location_set(params, UP(param));
5695 pm_node_list_append(&params->optionals, UP(param));
5696}
5697
5701static void
5702pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
5703 pm_parameters_node_location_set(params, param);
5704 pm_node_list_append(&params->posts, param);
5705}
5706
5710static void
5711pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5712 pm_parameters_node_location_set(params, param);
5713 params->rest = param;
5714}
5715
5719static void
5720pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
5721 pm_parameters_node_location_set(params, param);
5722 pm_node_list_append(&params->keywords, param);
5723}
5724
5728static void
5729pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5730 assert(params->keyword_rest == NULL);
5731 pm_parameters_node_location_set(params, param);
5732 params->keyword_rest = param;
5733}
5734
5738static void
5739pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
5740 assert(params->block == NULL);
5741 pm_parameters_node_location_set(params, UP(param));
5742 params->block = param;
5743}
5744
5748static pm_program_node_t *
5749pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5750 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
5751
5752 *node = (pm_program_node_t) {
5753 .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements),
5754 .locals = *locals,
5755 .statements = statements
5756 };
5757
5758 return node;
5759}
5760
5764static pm_parentheses_node_t *
5765pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5766 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
5767
5768 *node = (pm_parentheses_node_t) {
5769 .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing),
5770 .body = body,
5771 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5772 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5773 };
5774
5775 return node;
5776}
5777
5781static pm_pinned_expression_node_t *
5782pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5783 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
5784
5785 *node = (pm_pinned_expression_node_t) {
5786 .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen),
5787 .expression = expression,
5788 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5789 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
5790 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
5791 };
5792
5793 return node;
5794}
5795
5799static pm_pinned_variable_node_t *
5800pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5801 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
5802
5803 *node = (pm_pinned_variable_node_t) {
5804 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable),
5805 .variable = variable,
5806 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5807 };
5808
5809 return node;
5810}
5811
5815static pm_post_execution_node_t *
5816pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5817 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
5818
5819 *node = (pm_post_execution_node_t) {
5820 .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing),
5821 .statements = statements,
5822 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5823 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5824 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5825 };
5826
5827 return node;
5828}
5829
5833static pm_pre_execution_node_t *
5834pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5835 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
5836
5837 *node = (pm_pre_execution_node_t) {
5838 .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing),
5839 .statements = statements,
5840 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5841 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5842 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5843 };
5844
5845 return node;
5846}
5847
5851static pm_range_node_t *
5852pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5853 pm_assert_value_expression(parser, left);
5854 pm_assert_value_expression(parser, right);
5855
5856 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
5857 pm_node_flags_t flags = 0;
5858
5859 // Indicate that this node is an exclusive range if the operator is `...`.
5860 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5861 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5862 }
5863
5864 // Indicate that this node is a static literal (i.e., can be compiled with
5865 // a putobject in CRuby) if the left and right are implicit nil, explicit
5866 // nil, or integers.
5867 if (
5868 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5869 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5870 ) {
5871 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5872 }
5873
5874 *node = (pm_range_node_t) {
5875 .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)),
5876 .left = left,
5877 .right = right,
5878 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5879 };
5880
5881 return node;
5882}
5883
5887static pm_redo_node_t *
5888pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5889 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5890 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
5891
5892 *node = (pm_redo_node_t) {
5893 .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token)
5894 };
5895
5896 return node;
5897}
5898
5903static pm_regular_expression_node_t *
5904pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5905 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
5906 pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL;
5907
5908 *node = (pm_regular_expression_node_t) {
5909 .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing),
5910 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5911 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
5912 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5913 .unescaped = *unescaped
5914 };
5915
5916 return node;
5917}
5918
5922static inline pm_regular_expression_node_t *
5923pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5924 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5925}
5926
5930static pm_required_parameter_node_t *
5931pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
5932 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
5933
5934 *node = (pm_required_parameter_node_t) {
5935 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token),
5936 .name = pm_parser_constant_id_token(parser, token)
5937 };
5938
5939 return node;
5940}
5941
5945static pm_rescue_modifier_node_t *
5946pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
5947 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
5948
5949 *node = (pm_rescue_modifier_node_t) {
5950 .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression),
5951 .expression = expression,
5952 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5953 .rescue_expression = rescue_expression
5954 };
5955
5956 return node;
5957}
5958
5962static pm_rescue_node_t *
5963pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5964 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
5965
5966 *node = (pm_rescue_node_t) {
5967 .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword),
5968 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5969 .operator_loc = { 0 },
5970 .then_keyword_loc = { 0 },
5971 .reference = NULL,
5972 .statements = NULL,
5973 .subsequent = NULL,
5974 .exceptions = { 0 }
5975 };
5976
5977 return node;
5978}
5979
5980static inline void
5981pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
5982 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
5983}
5984
5988static void
5989pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5990 node->reference = reference;
5991 node->base.location.end = reference->location.end;
5992}
5993
5997static void
5998pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
5999 node->statements = statements;
6000 if (pm_statements_node_body_length(statements) > 0) {
6001 node->base.location.end = statements->base.location.end;
6002 }
6003}
6004
6008static void
6009pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6010 node->subsequent = subsequent;
6011 node->base.location.end = subsequent->base.location.end;
6012}
6013
6017static void
6018pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6019 pm_node_list_append(&node->exceptions, exception);
6020 node->base.location.end = exception->location.end;
6021}
6022
6026static pm_rest_parameter_node_t *
6027pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6028 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6029
6030 *node = (pm_rest_parameter_node_t) {
6031 .base = (
6032 (name->type == PM_TOKEN_NOT_PROVIDED)
6033 ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator)
6034 : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name)
6035 ),
6036 .name = pm_parser_optional_constant_id_token(parser, name),
6037 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6038 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6039 };
6040
6041 return node;
6042}
6043
6047static pm_retry_node_t *
6048pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6049 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6050 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6051
6052 *node = (pm_retry_node_t) {
6053 .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token)
6054 };
6055
6056 return node;
6057}
6058
6062static pm_return_node_t *
6063pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6064 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6065
6066 *node = (pm_return_node_t) {
6067 .base = (
6068 (arguments == NULL)
6069 ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword)
6070 : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments)
6071 ),
6072 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6073 .arguments = arguments
6074 };
6075
6076 return node;
6077}
6078
6082static pm_self_node_t *
6083pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6084 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6085 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6086
6087 *node = (pm_self_node_t) {
6088 .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token)
6089 };
6090
6091 return node;
6092}
6093
6097static pm_shareable_constant_node_t *
6098pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6099 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6100
6101 *node = (pm_shareable_constant_node_t) {
6102 .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write),
6103 .write = write
6104 };
6105
6106 return node;
6107}
6108
6112static pm_singleton_class_node_t *
6113pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6114 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6115
6116 *node = (pm_singleton_class_node_t) {
6117 .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword),
6118 .locals = *locals,
6119 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6120 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6121 .expression = expression,
6122 .body = body,
6123 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6124 };
6125
6126 return node;
6127}
6128
6132static pm_source_encoding_node_t *
6133pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6134 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6135 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6136
6137 *node = (pm_source_encoding_node_t) {
6138 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6139 };
6140
6141 return node;
6142}
6143
6147static pm_source_file_node_t*
6148pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6149 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6150 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6151
6152 pm_node_flags_t flags = 0;
6153
6154 switch (parser->frozen_string_literal) {
6155 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6156 flags |= PM_STRING_FLAGS_MUTABLE;
6157 break;
6158 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6159 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6160 break;
6161 }
6162
6163 *node = (pm_source_file_node_t) {
6164 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword),
6165 .filepath = parser->filepath
6166 };
6167
6168 return node;
6169}
6170
6174static pm_source_line_node_t *
6175pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6176 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6177 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6178
6179 *node = (pm_source_line_node_t) {
6180 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6181 };
6182
6183 return node;
6184}
6185
6189static pm_splat_node_t *
6190pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6191 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6192
6193 *node = (pm_splat_node_t) {
6194 .base = (
6195 (expression == NULL)
6196 ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator)
6197 : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression)
6198 ),
6199 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6200 .expression = expression
6201 };
6202
6203 return node;
6204}
6205
6209static pm_statements_node_t *
6210pm_statements_node_create(pm_parser_t *parser) {
6211 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6212
6213 *node = (pm_statements_node_t) {
6214 .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0),
6215 .body = { 0 }
6216 };
6217
6218 return node;
6219}
6220
6224static size_t
6225pm_statements_node_body_length(pm_statements_node_t *node) {
6226 return node && node->body.size;
6227}
6228
6232static void
6233pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6234 node->base.location = (pm_location_t) { .start = start, .end = end };
6235}
6236
6241static inline void
6242pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6243 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6244 node->base.location.start = statement->location.start;
6245 }
6246
6247 if (statement->location.end > node->base.location.end) {
6248 node->base.location.end = statement->location.end;
6249 }
6250}
6251
6255static void
6256pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6257 pm_statements_node_body_update(node, statement);
6258
6259 if (node->body.size > 0) {
6260 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6261
6262 switch (PM_NODE_TYPE(previous)) {
6263 case PM_BREAK_NODE:
6264 case PM_NEXT_NODE:
6265 case PM_REDO_NODE:
6266 case PM_RETRY_NODE:
6267 case PM_RETURN_NODE:
6268 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6269 break;
6270 default:
6271 break;
6272 }
6273 }
6274
6275 pm_node_list_append(&node->body, statement);
6276 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6277}
6278
6282static void
6283pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
6284 pm_statements_node_body_update(node, statement);
6285 pm_node_list_prepend(&node->body, statement);
6286 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6287}
6288
6292static inline pm_string_node_t *
6293pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6294 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
6295 pm_node_flags_t flags = 0;
6296
6297 switch (parser->frozen_string_literal) {
6298 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6299 flags = PM_STRING_FLAGS_MUTABLE;
6300 break;
6301 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6302 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6303 break;
6304 }
6305
6306 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start);
6307 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end);
6308
6309 *node = (pm_string_node_t) {
6310 .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end),
6311 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6312 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6313 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6314 .unescaped = *string
6315 };
6316
6317 return node;
6318}
6319
6323static pm_string_node_t *
6324pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6325 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6326}
6327
6332static pm_string_node_t *
6333pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6334 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6335 parser->current_string = PM_STRING_EMPTY;
6336 return node;
6337}
6338
6342static pm_super_node_t *
6343pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6344 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6345 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
6346
6347 const uint8_t *end = pm_arguments_end(arguments);
6348 if (end == NULL) {
6349 assert(false && "unreachable");
6350 }
6351
6352 *node = (pm_super_node_t) {
6353 .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end),
6354 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6355 .lparen_loc = arguments->opening_loc,
6356 .arguments = arguments->arguments,
6357 .rparen_loc = arguments->closing_loc,
6358 .block = arguments->block
6359 };
6360
6361 return node;
6362}
6363
6368static bool
6369pm_ascii_only_p(const pm_string_t *contents) {
6370 const size_t length = pm_string_length(contents);
6371 const uint8_t *source = pm_string_source(contents);
6372
6373 for (size_t index = 0; index < length; index++) {
6374 if (source[index] & 0x80) return false;
6375 }
6376
6377 return true;
6378}
6379
6383static void
6384parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6385 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6386 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6387
6388 if (width == 0) {
6389 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6390 break;
6391 }
6392
6393 cursor += width;
6394 }
6395}
6396
6401static void
6402parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6403 const pm_encoding_t *encoding = parser->encoding;
6404
6405 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6406 size_t width = encoding->char_width(cursor, end - cursor);
6407
6408 if (width == 0) {
6409 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6410 break;
6411 }
6412
6413 cursor += width;
6414 }
6415}
6416
6426static inline pm_node_flags_t
6427parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6428 if (parser->explicit_encoding != NULL) {
6429 // A Symbol may optionally have its encoding explicitly set. This will
6430 // happen if an escape sequence results in a non-ASCII code point.
6431 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6432 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6433 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6434 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6435 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6436 } else if (validate) {
6437 parse_symbol_encoding_validate_other(parser, location, contents);
6438 }
6439 } else if (pm_ascii_only_p(contents)) {
6440 // Ruby stipulates that all source files must use an ASCII-compatible
6441 // encoding. Thus, all symbols appearing in source are eligible for
6442 // "downgrading" to US-ASCII.
6443 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6444 } else if (validate) {
6445 parse_symbol_encoding_validate_other(parser, location, contents);
6446 }
6447
6448 return 0;
6449}
6450
6451static pm_node_flags_t
6452parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
6453 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
6454 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
6455 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
6456 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
6457
6458 // There's special validation logic used if a string does not contain any character escape sequences.
6459 if (parser->explicit_encoding == NULL) {
6460 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
6461 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
6462 // the US-ASCII encoding.
6463 if (ascii_only) {
6464 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
6465 }
6466
6467 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6468 if (!ascii_only) {
6469 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6470 }
6471 } else if (parser->encoding != modifier_encoding) {
6472 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
6473
6474 if (modifier == 'n' && !ascii_only) {
6475 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
6476 }
6477 }
6478
6479 return flags;
6480 }
6481
6482 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
6483 bool mixed_encoding = false;
6484
6485 if (mixed_encoding) {
6486 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6487 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
6488 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
6489 bool valid_string_in_modifier_encoding = true;
6490
6491 if (!valid_string_in_modifier_encoding) {
6492 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6493 }
6494 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6495 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
6496 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
6497 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
6498 }
6499 }
6500
6501 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
6502 return flags;
6503}
6504
6511static pm_node_flags_t
6512parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
6513 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
6514 bool valid_unicode_range = true;
6515 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
6516 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6517 return flags;
6518 }
6519
6520 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
6521 // to multi-byte characters are allowed.
6522 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
6523 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
6524 // following error message appearing twice. We do the same for compatibility.
6525 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6526 }
6527
6535
6536 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
6537 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
6538 }
6539
6540 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
6541 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
6542 }
6543
6544 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
6545 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
6546 }
6547
6548 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
6549 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
6550 }
6551
6552 // At this point no encoding modifiers will be present on the regular expression as they would have already
6553 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
6554 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
6555 if (ascii_only) {
6556 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
6557 }
6558
6559 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
6560 // or by specifying a modifier.
6561 //
6562 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
6563 if (parser->explicit_encoding != NULL) {
6564 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6565 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
6566 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6567 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
6568 }
6569 }
6570
6571 return 0;
6572}
6573
6578static pm_symbol_node_t *
6579pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6580 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6581
6582 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start);
6583 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end);
6584
6585 *node = (pm_symbol_node_t) {
6586 .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end),
6587 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6588 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
6589 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6590 .unescaped = *unescaped
6591 };
6592
6593 return node;
6594}
6595
6599static inline pm_symbol_node_t *
6600pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6601 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6602}
6603
6607static pm_symbol_node_t *
6608pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6609 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6610 parser->current_string = PM_STRING_EMPTY;
6611 return node;
6612}
6613
6617static pm_symbol_node_t *
6618pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6619 pm_symbol_node_t *node;
6620
6621 switch (token->type) {
6622 case PM_TOKEN_LABEL: {
6623 pm_token_t opening = not_provided(parser);
6624 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6625
6626 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6627 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6628
6629 assert((label.end - label.start) >= 0);
6630 pm_string_shared_init(&node->unescaped, label.start, label.end);
6631 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6632
6633 break;
6634 }
6635 case PM_TOKEN_MISSING: {
6636 pm_token_t opening = not_provided(parser);
6637 pm_token_t closing = not_provided(parser);
6638
6639 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
6640 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6641 break;
6642 }
6643 default:
6644 assert(false && "unreachable");
6645 node = NULL;
6646 break;
6647 }
6648
6649 return node;
6650}
6651
6655static pm_symbol_node_t *
6656pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6657 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6658
6659 *node = (pm_symbol_node_t) {
6660 .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING),
6661 .value_loc = PM_LOCATION_NULL_VALUE(parser),
6662 .unescaped = { 0 }
6663 };
6664
6665 pm_string_constant_init(&node->unescaped, content, strlen(content));
6666 return node;
6667}
6668
6672static bool
6673pm_symbol_node_label_p(pm_node_t *node) {
6674 const uint8_t *end = NULL;
6675
6676 switch (PM_NODE_TYPE(node)) {
6677 case PM_SYMBOL_NODE:
6678 end = ((pm_symbol_node_t *) node)->closing_loc.end;
6679 break;
6680 case PM_INTERPOLATED_SYMBOL_NODE:
6681 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
6682 break;
6683 default:
6684 return false;
6685 }
6686
6687 return (end != NULL) && (end[-1] == ':');
6688}
6689
6693static pm_symbol_node_t *
6694pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6695 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6696
6697 *new_node = (pm_symbol_node_t) {
6698 .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
6699 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6700 .value_loc = node->content_loc,
6701 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6702 .unescaped = node->unescaped
6703 };
6704
6705 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
6706 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6707
6708 // We are explicitly _not_ using pm_node_destroy here because we don't want
6709 // to trash the unescaped string. We could instead copy the string if we
6710 // know that it is owned, but we're taking the fast path for now.
6711 xfree(node);
6712
6713 return new_node;
6714}
6715
6719static pm_string_node_t *
6720pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6721 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
6722 pm_node_flags_t flags = 0;
6723
6724 switch (parser->frozen_string_literal) {
6725 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6726 flags = PM_STRING_FLAGS_MUTABLE;
6727 break;
6728 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6729 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6730 break;
6731 }
6732
6733 *new_node = (pm_string_node_t) {
6734 .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node),
6735 .opening_loc = node->opening_loc,
6736 .content_loc = node->value_loc,
6737 .closing_loc = node->closing_loc,
6738 .unescaped = node->unescaped
6739 };
6740
6741 // We are explicitly _not_ using pm_node_destroy here because we don't want
6742 // to trash the unescaped string. We could instead copy the string if we
6743 // know that it is owned, but we're taking the fast path for now.
6744 xfree(node);
6745
6746 return new_node;
6747}
6748
6752static pm_true_node_t *
6753pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6754 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6755 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6756
6757 *node = (pm_true_node_t) {
6758 .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6759 };
6760
6761 return node;
6762}
6763
6767static pm_true_node_t *
6768pm_true_node_synthesized_create(pm_parser_t *parser) {
6769 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6770
6771 *node = (pm_true_node_t) {
6772 .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL)
6773 };
6774
6775 return node;
6776}
6777
6781static pm_undef_node_t *
6782pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6783 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6784 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
6785
6786 *node = (pm_undef_node_t) {
6787 .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token),
6788 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
6789 .names = { 0 }
6790 };
6791
6792 return node;
6793}
6794
6798static void
6799pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
6800 node->base.location.end = name->location.end;
6801 pm_node_list_append(&node->names, name);
6802}
6803
6807static pm_unless_node_t *
6808pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6809 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6810
6811 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6812 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6813
6814 *node = (pm_unless_node_t) {
6815 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end),
6816 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6817 .predicate = predicate,
6818 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
6819 .statements = statements,
6820 .else_clause = NULL,
6821 .end_keyword_loc = { 0 }
6822 };
6823
6824 return node;
6825}
6826
6830static pm_unless_node_t *
6831pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6832 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6833 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6834
6835 pm_statements_node_t *statements = pm_statements_node_create(parser);
6836 pm_statements_node_body_append(parser, statements, statement, true);
6837
6838 *node = (pm_unless_node_t) {
6839 .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
6840 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
6841 .predicate = predicate,
6842 .then_keyword_loc = { 0 },
6843 .statements = statements,
6844 .else_clause = NULL,
6845 .end_keyword_loc = { 0 }
6846 };
6847
6848 return node;
6849}
6850
6851static inline void
6852pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
6853 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
6854 node->base.location.end = end_keyword->end;
6855}
6856
6862static void
6863pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6864 assert(parser->current_block_exits != NULL);
6865
6866 // All of the block exits that we want to remove should be within the
6867 // statements, and since we are modifying the statements, we shouldn't have
6868 // to check the end location.
6869 const uint8_t *start = statements->base.location.start;
6870
6871 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6872 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6873 if (block_exit->location.start < start) break;
6874
6875 // Implicitly remove from the list by lowering the size.
6876 parser->current_block_exits->size--;
6877 }
6878}
6879
6883static pm_until_node_t *
6884pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6885 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6886 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6887
6888 *node = (pm_until_node_t) {
6889 .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing),
6890 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6891 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6892 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6893 .predicate = predicate,
6894 .statements = statements
6895 };
6896
6897 return node;
6898}
6899
6903static pm_until_node_t *
6904pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6905 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6906 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6907 pm_loop_modifier_block_exits(parser, statements);
6908
6909 *node = (pm_until_node_t) {
6910 .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate),
6911 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6912 .do_keyword_loc = { 0 },
6913 .closing_loc = { 0 },
6914 .predicate = predicate,
6915 .statements = statements
6916 };
6917
6918 return node;
6919}
6920
6924static pm_when_node_t *
6925pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6926 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
6927
6928 *node = (pm_when_node_t) {
6929 .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword),
6930 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6931 .statements = NULL,
6932 .then_keyword_loc = { 0 },
6933 .conditions = { 0 }
6934 };
6935
6936 return node;
6937}
6938
6942static void
6943pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
6944 node->base.location.end = condition->location.end;
6945 pm_node_list_append(&node->conditions, condition);
6946}
6947
6951static inline void
6952pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
6953 node->base.location.end = then_keyword->end;
6954 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
6955}
6956
6960static void
6961pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
6962 if (statements->base.location.end > node->base.location.end) {
6963 node->base.location.end = statements->base.location.end;
6964 }
6965
6966 node->statements = statements;
6967}
6968
6972static pm_while_node_t *
6973pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6974 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6975 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6976
6977 *node = (pm_while_node_t) {
6978 .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing),
6979 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6980 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6981 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6982 .predicate = predicate,
6983 .statements = statements
6984 };
6985
6986 return node;
6987}
6988
6992static pm_while_node_t *
6993pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6994 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6995 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6996 pm_loop_modifier_block_exits(parser, statements);
6997
6998 *node = (pm_while_node_t) {
6999 .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate),
7000 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7001 .do_keyword_loc = { 0 },
7002 .closing_loc = { 0 },
7003 .predicate = predicate,
7004 .statements = statements
7005 };
7006
7007 return node;
7008}
7009
7013static pm_while_node_t *
7014pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7015 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7016
7017 *node = (pm_while_node_t) {
7018 .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0),
7019 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7020 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7021 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7022 .predicate = predicate,
7023 .statements = statements
7024 };
7025
7026 return node;
7027}
7028
7033static pm_x_string_node_t *
7034pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7035 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7036
7037 *node = (pm_x_string_node_t) {
7038 .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing),
7039 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7040 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7041 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7042 .unescaped = *unescaped
7043 };
7044
7045 return node;
7046}
7047
7051static inline pm_x_string_node_t *
7052pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7053 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7054}
7055
7059static pm_yield_node_t *
7060pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7061 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7062
7063 const uint8_t *end;
7064 if (rparen_loc->start != NULL) {
7065 end = rparen_loc->end;
7066 } else if (arguments != NULL) {
7067 end = arguments->base.location.end;
7068 } else if (lparen_loc->start != NULL) {
7069 end = lparen_loc->end;
7070 } else {
7071 end = keyword->end;
7072 }
7073
7074 *node = (pm_yield_node_t) {
7075 .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end),
7076 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7077 .lparen_loc = *lparen_loc,
7078 .arguments = arguments,
7079 .rparen_loc = *rparen_loc
7080 };
7081
7082 return node;
7083}
7084
7089static int
7090pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7091 pm_scope_t *scope = parser->current_scope;
7092 int depth = 0;
7093
7094 while (scope != NULL) {
7095 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7096 if (scope->closed) break;
7097
7098 scope = scope->previous;
7099 depth++;
7100 }
7101
7102 return -1;
7103}
7104
7110static inline int
7111pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7112 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7113}
7114
7118static inline void
7119pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7120 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7121}
7122
7126static pm_constant_id_t
7127pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7128 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7129 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7130 return constant_id;
7131}
7132
7136static inline pm_constant_id_t
7137pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7138 return pm_parser_local_add_location(parser, token->start, token->end, reads);
7139}
7140
7144static pm_constant_id_t
7145pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7146 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7147 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7148 return constant_id;
7149}
7150
7154static pm_constant_id_t
7155pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7156 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7157 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7158 return constant_id;
7159}
7160
7168static bool
7169pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7170 // We want to check whether the parameter name is a numbered parameter or
7171 // not.
7172 pm_refute_numbered_parameter(parser, name->start, name->end);
7173
7174 // Otherwise we'll fetch the constant id for the parameter name and check
7175 // whether it's already in the current scope.
7176 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7177
7178 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7179 // Add an error if the parameter doesn't start with _ and has been seen before
7180 if ((name->start < name->end) && (*name->start != '_')) {
7181 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7182 }
7183 return true;
7184 }
7185 return false;
7186}
7187
7191static void
7192pm_parser_scope_pop(pm_parser_t *parser) {
7193 pm_scope_t *scope = parser->current_scope;
7194 parser->current_scope = scope->previous;
7195 pm_locals_free(&scope->locals);
7196 pm_node_list_free(&scope->implicit_parameters);
7197 xfree(scope);
7198}
7199
7200/******************************************************************************/
7201/* Stack helpers */
7202/******************************************************************************/
7203
7207static inline void
7208pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7209 *stack = (*stack << 1) | (value & 1);
7210}
7211
7215static inline void
7216pm_state_stack_pop(pm_state_stack_t *stack) {
7217 *stack >>= 1;
7218}
7219
7223static inline bool
7224pm_state_stack_p(const pm_state_stack_t *stack) {
7225 return *stack & 1;
7226}
7227
7228static inline void
7229pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7230 // Use the negation of the value to prevent stack overflow.
7231 pm_state_stack_push(&parser->accepts_block_stack, !value);
7232}
7233
7234static inline void
7235pm_accepts_block_stack_pop(pm_parser_t *parser) {
7236 pm_state_stack_pop(&parser->accepts_block_stack);
7237}
7238
7239static inline bool
7240pm_accepts_block_stack_p(pm_parser_t *parser) {
7241 return !pm_state_stack_p(&parser->accepts_block_stack);
7242}
7243
7244static inline void
7245pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7246 pm_state_stack_push(&parser->do_loop_stack, value);
7247}
7248
7249static inline void
7250pm_do_loop_stack_pop(pm_parser_t *parser) {
7251 pm_state_stack_pop(&parser->do_loop_stack);
7252}
7253
7254static inline bool
7255pm_do_loop_stack_p(pm_parser_t *parser) {
7256 return pm_state_stack_p(&parser->do_loop_stack);
7257}
7258
7259/******************************************************************************/
7260/* Lexer check helpers */
7261/******************************************************************************/
7262
7267static inline uint8_t
7268peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7269 if (cursor < parser->end) {
7270 return *cursor;
7271 } else {
7272 return '\0';
7273 }
7274}
7275
7281static inline uint8_t
7282peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7283 return peek_at(parser, parser->current.end + offset);
7284}
7285
7290static inline uint8_t
7291peek(const pm_parser_t *parser) {
7292 return peek_at(parser, parser->current.end);
7293}
7294
7299static inline bool
7300match(pm_parser_t *parser, uint8_t value) {
7301 if (peek(parser) == value) {
7302 parser->current.end++;
7303 return true;
7304 }
7305 return false;
7306}
7307
7312static inline size_t
7313match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7314 if (peek_at(parser, cursor) == '\n') {
7315 return 1;
7316 }
7317 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7318 return 2;
7319 }
7320 return 0;
7321}
7322
7328static inline size_t
7329match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7330 return match_eol_at(parser, parser->current.end + offset);
7331}
7332
7338static inline size_t
7339match_eol(pm_parser_t *parser) {
7340 return match_eol_at(parser, parser->current.end);
7341}
7342
7346static inline const uint8_t *
7347next_newline(const uint8_t *cursor, ptrdiff_t length) {
7348 assert(length >= 0);
7349
7350 // Note that it's okay for us to use memchr here to look for \n because none
7351 // of the encodings that we support have \n as a component of a multi-byte
7352 // character.
7353 return memchr(cursor, '\n', (size_t) length);
7354}
7355
7359static inline bool
7360ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7361 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7362}
7363
7368static bool
7369parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7370 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7371
7372 if (encoding != NULL) {
7373 if (parser->encoding != encoding) {
7374 parser->encoding = encoding;
7375 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7376 }
7377
7378 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7379 return true;
7380 }
7381
7382 return false;
7383}
7384
7389static void
7390parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7391 const uint8_t *cursor = parser->current.start + 1;
7392 const uint8_t *end = parser->current.end;
7393
7394 bool separator = false;
7395 while (true) {
7396 if (end - cursor <= 6) return;
7397 switch (cursor[6]) {
7398 case 'C': case 'c': cursor += 6; continue;
7399 case 'O': case 'o': cursor += 5; continue;
7400 case 'D': case 'd': cursor += 4; continue;
7401 case 'I': case 'i': cursor += 3; continue;
7402 case 'N': case 'n': cursor += 2; continue;
7403 case 'G': case 'g': cursor += 1; continue;
7404 case '=': case ':':
7405 separator = true;
7406 cursor += 6;
7407 break;
7408 default:
7409 cursor += 6;
7410 if (pm_char_is_whitespace(*cursor)) break;
7411 continue;
7412 }
7413 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7414 separator = false;
7415 }
7416
7417 while (true) {
7418 do {
7419 if (++cursor >= end) return;
7420 } while (pm_char_is_whitespace(*cursor));
7421
7422 if (separator) break;
7423 if (*cursor != '=' && *cursor != ':') return;
7424
7425 separator = true;
7426 cursor++;
7427 }
7428
7429 const uint8_t *value_start = cursor;
7430 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7431
7432 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7433 // If we were unable to parse the encoding value, then we've got an
7434 // issue because we didn't understand the encoding that the user was
7435 // trying to use. In this case we'll keep using the default encoding but
7436 // add an error to the parser to indicate an unsuccessful parse.
7437 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7438 }
7439}
7440
7441typedef enum {
7442 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7443 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7444 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7445} pm_magic_comment_boolean_value_t;
7446
7451static pm_magic_comment_boolean_value_t
7452parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7453 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7454 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7455 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7456 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7457 } else {
7458 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7459 }
7460}
7461
7462static inline bool
7463pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7464 return b == '\'' || b == '"' || b == ':' || b == ';';
7465}
7466
7472static inline const uint8_t *
7473parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7474 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
7475 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
7476 return cursor;
7477 }
7478 cursor++;
7479 }
7480 return NULL;
7481}
7482
7493static inline bool
7494parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7495 bool result = true;
7496
7497 const uint8_t *start = parser->current.start + 1;
7498 const uint8_t *end = parser->current.end;
7499 if (end - start <= 7) return false;
7500
7501 const uint8_t *cursor;
7502 bool indicator = false;
7503
7504 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7505 start = cursor + 3;
7506
7507 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7508 end = cursor;
7509 indicator = true;
7510 } else {
7511 // If we have a start marker but not an end marker, then we cannot
7512 // have a magic comment.
7513 return false;
7514 }
7515 }
7516
7517 cursor = start;
7518 while (cursor < end) {
7519 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7520
7521 const uint8_t *key_start = cursor;
7522 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7523
7524 const uint8_t *key_end = cursor;
7525 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7526 if (cursor == end) break;
7527
7528 if (*cursor == ':') {
7529 cursor++;
7530 } else {
7531 if (!indicator) return false;
7532 continue;
7533 }
7534
7535 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7536 if (cursor == end) break;
7537
7538 const uint8_t *value_start;
7539 const uint8_t *value_end;
7540
7541 if (*cursor == '"') {
7542 value_start = ++cursor;
7543 for (; cursor < end && *cursor != '"'; cursor++) {
7544 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7545 }
7546 value_end = cursor;
7547 if (cursor < end && *cursor == '"') cursor++;
7548 } else {
7549 value_start = cursor;
7550 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7551 value_end = cursor;
7552 }
7553
7554 if (indicator) {
7555 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7556 } else {
7557 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7558 if (cursor != end) return false;
7559 }
7560
7561 // Here, we need to do some processing on the key to swap out dashes for
7562 // underscores. We only need to do this if there _is_ a dash in the key.
7563 pm_string_t key;
7564 const size_t key_length = (size_t) (key_end - key_start);
7565 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7566
7567 if (dash == NULL) {
7568 pm_string_shared_init(&key, key_start, key_end);
7569 } else {
7570 uint8_t *buffer = xmalloc(key_length);
7571 if (buffer == NULL) break;
7572
7573 memcpy(buffer, key_start, key_length);
7574 buffer[dash - key_start] = '_';
7575
7576 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7577 buffer[dash - key_start] = '_';
7578 }
7579
7580 pm_string_owned_init(&key, buffer, key_length);
7581 }
7582
7583 // Finally, we can start checking the key against the list of known
7584 // magic comment keys, and potentially change state based on that.
7585 const uint8_t *key_source = pm_string_source(&key);
7586 uint32_t value_length = (uint32_t) (value_end - value_start);
7587
7588 // We only want to attempt to compare against encoding comments if it's
7589 // the first line in the file (or the second in the case of a shebang).
7590 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7591 if (
7592 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7593 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7594 ) {
7595 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7596 }
7597 }
7598
7599 if (key_length == 11) {
7600 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7601 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7602 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7603 PM_PARSER_WARN_TOKEN_FORMAT(
7604 parser,
7605 parser->current,
7606 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7607 (int) key_length,
7608 (const char *) key_source,
7609 (int) value_length,
7610 (const char *) value_start
7611 );
7612 break;
7613 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7614 parser->warn_mismatched_indentation = false;
7615 break;
7616 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7617 parser->warn_mismatched_indentation = true;
7618 break;
7619 }
7620 }
7621 } else if (key_length == 21) {
7622 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7623 // We only want to handle frozen string literal comments if it's
7624 // before any semantic tokens have been seen.
7625 if (semantic_token_seen) {
7626 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7627 } else {
7628 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7629 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7630 PM_PARSER_WARN_TOKEN_FORMAT(
7631 parser,
7632 parser->current,
7633 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7634 (int) key_length,
7635 (const char *) key_source,
7636 (int) value_length,
7637 (const char *) value_start
7638 );
7639 break;
7640 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7642 break;
7643 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7645 break;
7646 }
7647 }
7648 }
7649 } else if (key_length == 24) {
7650 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7651 const uint8_t *cursor = parser->current.start;
7652 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7653
7654 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7655 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7656 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7657 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7658 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7659 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7660 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7661 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7662 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7663 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7664 } else {
7665 PM_PARSER_WARN_TOKEN_FORMAT(
7666 parser,
7667 parser->current,
7668 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7669 (int) key_length,
7670 (const char *) key_source,
7671 (int) value_length,
7672 (const char *) value_start
7673 );
7674 }
7675 }
7676 }
7677
7678 // When we're done, we want to free the string in case we had to
7679 // allocate memory for it.
7680 pm_string_free(&key);
7681
7682 // Allocate a new magic comment node to append to the parser's list.
7684 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
7685 magic_comment->key_start = key_start;
7686 magic_comment->value_start = value_start;
7687 magic_comment->key_length = (uint32_t) key_length;
7688 magic_comment->value_length = value_length;
7689 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7690 }
7691 }
7692
7693 return result;
7694}
7695
7696/******************************************************************************/
7697/* Context manipulations */
7698/******************************************************************************/
7699
7700static const uint32_t context_terminators[] = {
7701 [PM_CONTEXT_NONE] = 0,
7702 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7703 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7704 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7705 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7706 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7707 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7708 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7709 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7710 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7711 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7712 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7713 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7714 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7715 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7716 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7717 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7718 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7719 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7720 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7721 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7722 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7723 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7724 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7725 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7726 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7727 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7728 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7729 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7730 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7731 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7732 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7733 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7734 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7735 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7736 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7737 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7738 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7739 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7740 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7741 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7742 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7743 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7744 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7745 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7746 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7747 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7748 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7749 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7750 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7751 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7752 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7753 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7754 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7755 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7756};
7757
7758static inline bool
7759context_terminator(pm_context_t context, pm_token_t *token) {
7760 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7761}
7762
7767static pm_context_t
7768context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7769 pm_context_node_t *context_node = parser->current_context;
7770
7771 while (context_node != NULL) {
7772 if (context_terminator(context_node->context, token)) return context_node->context;
7773 context_node = context_node->prev;
7774 }
7775
7776 return PM_CONTEXT_NONE;
7777}
7778
7779static bool
7780context_push(pm_parser_t *parser, pm_context_t context) {
7781 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7782 if (context_node == NULL) return false;
7783
7784 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7785
7786 if (parser->current_context == NULL) {
7787 parser->current_context = context_node;
7788 } else {
7789 context_node->prev = parser->current_context;
7790 parser->current_context = context_node;
7791 }
7792
7793 return true;
7794}
7795
7796static void
7797context_pop(pm_parser_t *parser) {
7798 pm_context_node_t *prev = parser->current_context->prev;
7799 xfree(parser->current_context);
7800 parser->current_context = prev;
7801}
7802
7803static bool
7804context_p(const pm_parser_t *parser, pm_context_t context) {
7805 pm_context_node_t *context_node = parser->current_context;
7806
7807 while (context_node != NULL) {
7808 if (context_node->context == context) return true;
7809 context_node = context_node->prev;
7810 }
7811
7812 return false;
7813}
7814
7815static bool
7816context_def_p(const pm_parser_t *parser) {
7817 pm_context_node_t *context_node = parser->current_context;
7818
7819 while (context_node != NULL) {
7820 switch (context_node->context) {
7821 case PM_CONTEXT_DEF:
7826 return true;
7827 case PM_CONTEXT_CLASS:
7831 case PM_CONTEXT_MODULE:
7835 case PM_CONTEXT_SCLASS:
7839 return false;
7840 default:
7841 context_node = context_node->prev;
7842 }
7843 }
7844
7845 return false;
7846}
7847
7852static const char *
7853context_human(pm_context_t context) {
7854 switch (context) {
7855 case PM_CONTEXT_NONE:
7856 assert(false && "unreachable");
7857 return "";
7858 case PM_CONTEXT_BEGIN: return "begin statement";
7859 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
7860 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
7861 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
7862 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
7863 case PM_CONTEXT_CASE_IN: return "'in' clause";
7864 case PM_CONTEXT_CLASS: return "class definition";
7865 case PM_CONTEXT_DEF: return "method definition";
7866 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
7867 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
7868 case PM_CONTEXT_DEFINED: return "'defined?' expression";
7869 case PM_CONTEXT_ELSE:
7876 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
7877 case PM_CONTEXT_ELSIF: return "'elsif' clause";
7878 case PM_CONTEXT_EMBEXPR: return "embedded expression";
7885 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
7886 case PM_CONTEXT_FOR: return "for loop";
7887 case PM_CONTEXT_FOR_INDEX: return "for loop index";
7888 case PM_CONTEXT_IF: return "if statement";
7889 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
7890 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
7891 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
7892 case PM_CONTEXT_MAIN: return "top level context";
7893 case PM_CONTEXT_MODULE: return "module definition";
7894 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
7895 case PM_CONTEXT_PARENS: return "parentheses";
7896 case PM_CONTEXT_POSTEXE: return "'END' block";
7897 case PM_CONTEXT_PREDICATE: return "predicate";
7898 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
7906 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
7907 case PM_CONTEXT_SCLASS: return "singleton class definition";
7908 case PM_CONTEXT_TERNARY: return "ternary expression";
7909 case PM_CONTEXT_UNLESS: return "unless statement";
7910 case PM_CONTEXT_UNTIL: return "until statement";
7911 case PM_CONTEXT_WHILE: return "while statement";
7912 }
7913
7914 assert(false && "unreachable");
7915 return "";
7916}
7917
7918/******************************************************************************/
7919/* Specific token lexers */
7920/******************************************************************************/
7921
7922static inline void
7923pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
7924 if (invalid != NULL) {
7925 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7926 pm_parser_err(parser, invalid, invalid + 1, diag_id);
7927 }
7928}
7929
7930static size_t
7931pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
7932 const uint8_t *invalid = NULL;
7933 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
7934 pm_strspn_number_validate(parser, string, length, invalid);
7935 return length;
7936}
7937
7938static size_t
7939pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7940 const uint8_t *invalid = NULL;
7941 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
7942 pm_strspn_number_validate(parser, string, length, invalid);
7943 return length;
7944}
7945
7946static size_t
7947pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7948 const uint8_t *invalid = NULL;
7949 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
7950 pm_strspn_number_validate(parser, string, length, invalid);
7951 return length;
7952}
7953
7954static size_t
7955pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7956 const uint8_t *invalid = NULL;
7957 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
7958 pm_strspn_number_validate(parser, string, length, invalid);
7959 return length;
7960}
7961
7962static pm_token_type_t
7963lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
7964 pm_token_type_t type = PM_TOKEN_INTEGER;
7965
7966 // Here we're going to attempt to parse the optional decimal portion of a
7967 // float. If it's not there, then it's okay and we'll just continue on.
7968 if (peek(parser) == '.') {
7969 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7970 parser->current.end += 2;
7971 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7972 type = PM_TOKEN_FLOAT;
7973 } else {
7974 // If we had a . and then something else, then it's not a float
7975 // suffix on a number it's a method call or something else.
7976 return type;
7977 }
7978 }
7979
7980 // Here we're going to attempt to parse the optional exponent portion of a
7981 // float. If it's not there, it's okay and we'll just continue on.
7982 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
7983 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
7984 parser->current.end += 2;
7985
7986 if (pm_char_is_decimal_digit(peek(parser))) {
7987 parser->current.end++;
7988 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7989 } else {
7990 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
7991 }
7992 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7993 parser->current.end++;
7994 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7995 } else {
7996 return type;
7997 }
7998
7999 *seen_e = true;
8000 type = PM_TOKEN_FLOAT;
8001 }
8002
8003 return type;
8004}
8005
8006static pm_token_type_t
8007lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8008 pm_token_type_t type = PM_TOKEN_INTEGER;
8009 *seen_e = false;
8010
8011 if (peek_offset(parser, -1) == '0') {
8012 switch (*parser->current.end) {
8013 // 0d1111 is a decimal number
8014 case 'd':
8015 case 'D':
8016 parser->current.end++;
8017 if (pm_char_is_decimal_digit(peek(parser))) {
8018 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8019 } else {
8020 match(parser, '_');
8021 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8022 }
8023
8024 break;
8025
8026 // 0b1111 is a binary number
8027 case 'b':
8028 case 'B':
8029 parser->current.end++;
8030 if (pm_char_is_binary_digit(peek(parser))) {
8031 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8032 } else {
8033 match(parser, '_');
8034 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8035 }
8036
8037 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8038 break;
8039
8040 // 0o1111 is an octal number
8041 case 'o':
8042 case 'O':
8043 parser->current.end++;
8044 if (pm_char_is_octal_digit(peek(parser))) {
8045 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8046 } else {
8047 match(parser, '_');
8048 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8049 }
8050
8051 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8052 break;
8053
8054 // 01111 is an octal number
8055 case '_':
8056 case '0':
8057 case '1':
8058 case '2':
8059 case '3':
8060 case '4':
8061 case '5':
8062 case '6':
8063 case '7':
8064 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8065 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8066 break;
8067
8068 // 0x1111 is a hexadecimal number
8069 case 'x':
8070 case 'X':
8071 parser->current.end++;
8072 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8073 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8074 } else {
8075 match(parser, '_');
8076 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8077 }
8078
8079 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8080 break;
8081
8082 // 0.xxx is a float
8083 case '.': {
8084 type = lex_optional_float_suffix(parser, seen_e);
8085 break;
8086 }
8087
8088 // 0exxx is a float
8089 case 'e':
8090 case 'E': {
8091 type = lex_optional_float_suffix(parser, seen_e);
8092 break;
8093 }
8094 }
8095 } else {
8096 // If it didn't start with a 0, then we'll lex as far as we can into a
8097 // decimal number.
8098 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8099
8100 // Afterward, we'll lex as far as we can into an optional float suffix.
8101 type = lex_optional_float_suffix(parser, seen_e);
8102 }
8103
8104 // At this point we have a completed number, but we want to provide the user
8105 // with a good experience if they put an additional .xxx fractional
8106 // component on the end, so we'll check for that here.
8107 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8108 const uint8_t *fraction_start = parser->current.end;
8109 const uint8_t *fraction_end = parser->current.end + 2;
8110 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8111 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8112 }
8113
8114 return type;
8115}
8116
8117static pm_token_type_t
8118lex_numeric(pm_parser_t *parser) {
8119 pm_token_type_t type = PM_TOKEN_INTEGER;
8120 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8121
8122 if (parser->current.end < parser->end) {
8123 bool seen_e = false;
8124 type = lex_numeric_prefix(parser, &seen_e);
8125
8126 const uint8_t *end = parser->current.end;
8127 pm_token_type_t suffix_type = type;
8128
8129 if (type == PM_TOKEN_INTEGER) {
8130 if (match(parser, 'r')) {
8131 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8132
8133 if (match(parser, 'i')) {
8134 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8135 }
8136 } else if (match(parser, 'i')) {
8137 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8138 }
8139 } else {
8140 if (!seen_e && match(parser, 'r')) {
8141 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8142
8143 if (match(parser, 'i')) {
8144 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8145 }
8146 } else if (match(parser, 'i')) {
8147 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8148 }
8149 }
8150
8151 const uint8_t b = peek(parser);
8152 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8153 parser->current.end = end;
8154 } else {
8155 type = suffix_type;
8156 }
8157 }
8158
8159 return type;
8160}
8161
8162static pm_token_type_t
8163lex_global_variable(pm_parser_t *parser) {
8164 if (parser->current.end >= parser->end) {
8165 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8166 return PM_TOKEN_GLOBAL_VARIABLE;
8167 }
8168
8169 // True if multiple characters are allowed after the declaration of the
8170 // global variable. Not true when it starts with "$-".
8171 bool allow_multiple = true;
8172
8173 switch (*parser->current.end) {
8174 case '~': // $~: match-data
8175 case '*': // $*: argv
8176 case '$': // $$: pid
8177 case '?': // $?: last status
8178 case '!': // $!: error string
8179 case '@': // $@: error position
8180 case '/': // $/: input record separator
8181 case '\\': // $\: output record separator
8182 case ';': // $;: field separator
8183 case ',': // $,: output field separator
8184 case '.': // $.: last read line number
8185 case '=': // $=: ignorecase
8186 case ':': // $:: load path
8187 case '<': // $<: reading filename
8188 case '>': // $>: default output handle
8189 case '\"': // $": already loaded files
8190 parser->current.end++;
8191 return PM_TOKEN_GLOBAL_VARIABLE;
8192
8193 case '&': // $&: last match
8194 case '`': // $`: string before last match
8195 case '\'': // $': string after last match
8196 case '+': // $+: string matches last paren.
8197 parser->current.end++;
8198 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8199
8200 case '0': {
8201 parser->current.end++;
8202 size_t width;
8203
8204 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8205 do {
8206 parser->current.end += width;
8207 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8208
8209 // $0 isn't allowed to be followed by anything.
8210 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8211 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8212 }
8213
8214 return PM_TOKEN_GLOBAL_VARIABLE;
8215 }
8216
8217 case '1':
8218 case '2':
8219 case '3':
8220 case '4':
8221 case '5':
8222 case '6':
8223 case '7':
8224 case '8':
8225 case '9':
8226 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8227 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8228
8229 case '-':
8230 parser->current.end++;
8231 allow_multiple = false;
8233 default: {
8234 size_t width;
8235
8236 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8237 do {
8238 parser->current.end += width;
8239 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8240 } else if (pm_char_is_whitespace(peek(parser))) {
8241 // If we get here, then we have a $ followed by whitespace,
8242 // which is not allowed.
8243 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8244 } else {
8245 // If we get here, then we have a $ followed by something that
8246 // isn't recognized as a global variable.
8247 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8248 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8249 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
8250 }
8251
8252 return PM_TOKEN_GLOBAL_VARIABLE;
8253 }
8254 }
8255}
8256
8269static inline pm_token_type_t
8270lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8271 if (memcmp(current_start, value, vlen) == 0) {
8272 pm_lex_state_t last_state = parser->lex_state;
8273
8274 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8275 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8276 } else {
8277 lex_state_set(parser, state);
8278 if (state == PM_LEX_STATE_BEG) {
8279 parser->command_start = true;
8280 }
8281
8282 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8283 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8284 return modifier_type;
8285 }
8286 }
8287
8288 return type;
8289 }
8290
8291 return PM_TOKEN_EOF;
8292}
8293
8294static pm_token_type_t
8295lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8296 // Lex as far as we can into the current identifier.
8297 size_t width;
8298 const uint8_t *end = parser->end;
8299 const uint8_t *current_start = parser->current.start;
8300 const uint8_t *current_end = parser->current.end;
8301 bool encoding_changed = parser->encoding_changed;
8302
8303 if (encoding_changed) {
8304 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8305 current_end += width;
8306 }
8307 } else {
8308 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8309 current_end += width;
8310 }
8311 }
8312 parser->current.end = current_end;
8313
8314 // Now cache the length of the identifier so that we can quickly compare it
8315 // against known keywords.
8316 width = (size_t) (current_end - current_start);
8317
8318 if (current_end < end) {
8319 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8320 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8321 // check if we're returning the defined? keyword or just an identifier.
8322 width++;
8323
8324 if (
8325 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8326 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8327 ) {
8328 // If we're in a position where we can accept a : at the end of an
8329 // identifier, then we'll optionally accept it.
8330 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8331 (void) match(parser, ':');
8332 return PM_TOKEN_LABEL;
8333 }
8334
8335 if (parser->lex_state != PM_LEX_STATE_DOT) {
8336 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8337 return PM_TOKEN_KEYWORD_DEFINED;
8338 }
8339 }
8340
8341 return PM_TOKEN_METHOD_NAME;
8342 }
8343
8344 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8345 // If we're in a position where we can accept a = at the end of an
8346 // identifier, then we'll optionally accept it.
8347 return PM_TOKEN_IDENTIFIER;
8348 }
8349
8350 if (
8351 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8352 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8353 ) {
8354 // If we're in a position where we can accept a : at the end of an
8355 // identifier, then we'll optionally accept it.
8356 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8357 (void) match(parser, ':');
8358 return PM_TOKEN_LABEL;
8359 }
8360 }
8361
8362 if (parser->lex_state != PM_LEX_STATE_DOT) {
8363 pm_token_type_t type;
8364 switch (width) {
8365 case 2:
8366 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8367 if (pm_do_loop_stack_p(parser)) {
8368 return PM_TOKEN_KEYWORD_DO_LOOP;
8369 }
8370 return PM_TOKEN_KEYWORD_DO;
8371 }
8372
8373 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8374 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8375 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8376 break;
8377 case 3:
8378 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8379 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8380 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8381 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8382 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8383 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8384 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8385 break;
8386 case 4:
8387 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8388 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8389 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8390 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8391 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8392 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8393 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8394 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8395 break;
8396 case 5:
8397 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8398 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8399 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8400 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8401 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8402 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8403 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8404 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8405 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8406 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8407 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8408 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8409 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8410 break;
8411 case 6:
8412 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8413 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8414 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8415 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8416 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8417 break;
8418 case 8:
8419 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8420 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8421 break;
8422 case 12:
8423 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8424 break;
8425 }
8426 }
8427
8428 if (encoding_changed) {
8429 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8430 }
8431 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8432}
8433
8438static bool
8439current_token_starts_line(pm_parser_t *parser) {
8440 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8441}
8442
8457static pm_token_type_t
8458lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8459 // If there is no content following this #, then we're at the end of
8460 // the string and we can safely return string content.
8461 if (pound + 1 >= parser->end) {
8462 parser->current.end = pound + 1;
8463 return PM_TOKEN_STRING_CONTENT;
8464 }
8465
8466 // Now we'll check against the character that follows the #. If it constitutes
8467 // valid interplation, we'll handle that, otherwise we'll return
8468 // PM_TOKEN_NOT_PROVIDED.
8469 switch (pound[1]) {
8470 case '@': {
8471 // In this case we may have hit an embedded instance or class variable.
8472 if (pound + 2 >= parser->end) {
8473 parser->current.end = pound + 1;
8474 return PM_TOKEN_STRING_CONTENT;
8475 }
8476
8477 // If we're looking at a @ and there's another @, then we'll skip past the
8478 // second @.
8479 const uint8_t *variable = pound + 2;
8480 if (*variable == '@' && pound + 3 < parser->end) variable++;
8481
8482 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8483 // At this point we're sure that we've either hit an embedded instance
8484 // or class variable. In this case we'll first need to check if we've
8485 // already consumed content.
8486 if (pound > parser->current.start) {
8487 parser->current.end = pound;
8488 return PM_TOKEN_STRING_CONTENT;
8489 }
8490
8491 // Otherwise we need to return the embedded variable token
8492 // and then switch to the embedded variable lex mode.
8493 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8494 parser->current.end = pound + 1;
8495 return PM_TOKEN_EMBVAR;
8496 }
8497
8498 // If we didn't get a valid interpolation, then this is just regular
8499 // string content. This is like if we get "#@-". In this case the caller
8500 // should keep lexing.
8501 parser->current.end = pound + 1;
8502 return PM_TOKEN_NOT_PROVIDED;
8503 }
8504 case '$':
8505 // In this case we may have hit an embedded global variable. If there's
8506 // not enough room, then we'll just return string content.
8507 if (pound + 2 >= parser->end) {
8508 parser->current.end = pound + 1;
8509 return PM_TOKEN_STRING_CONTENT;
8510 }
8511
8512 // This is the character that we're going to check to see if it is the
8513 // start of an identifier that would indicate that this is a global
8514 // variable.
8515 const uint8_t *check = pound + 2;
8516
8517 if (pound[2] == '-') {
8518 if (pound + 3 >= parser->end) {
8519 parser->current.end = pound + 2;
8520 return PM_TOKEN_STRING_CONTENT;
8521 }
8522
8523 check++;
8524 }
8525
8526 // If the character that we're going to check is the start of an
8527 // identifier, or we don't have a - and the character is a decimal number
8528 // or a global name punctuation character, then we've hit an embedded
8529 // global variable.
8530 if (
8531 char_is_identifier_start(parser, check, parser->end - check) ||
8532 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8533 ) {
8534 // In this case we've hit an embedded global variable. First check to
8535 // see if we've already consumed content. If we have, then we need to
8536 // return that content as string content first.
8537 if (pound > parser->current.start) {
8538 parser->current.end = pound;
8539 return PM_TOKEN_STRING_CONTENT;
8540 }
8541
8542 // Otherwise, we need to return the embedded variable token and switch
8543 // to the embedded variable lex mode.
8544 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8545 parser->current.end = pound + 1;
8546 return PM_TOKEN_EMBVAR;
8547 }
8548
8549 // In this case we've hit a #$ that does not indicate a global variable.
8550 // In this case we'll continue lexing past it.
8551 parser->current.end = pound + 1;
8552 return PM_TOKEN_NOT_PROVIDED;
8553 case '{':
8554 // In this case it's the start of an embedded expression. If we have
8555 // already consumed content, then we need to return that content as string
8556 // content first.
8557 if (pound > parser->current.start) {
8558 parser->current.end = pound;
8559 return PM_TOKEN_STRING_CONTENT;
8560 }
8561
8562 parser->enclosure_nesting++;
8563
8564 // Otherwise we'll skip past the #{ and begin lexing the embedded
8565 // expression.
8566 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8567 parser->current.end = pound + 2;
8568 parser->command_start = true;
8569 pm_do_loop_stack_push(parser, false);
8570 return PM_TOKEN_EMBEXPR_BEGIN;
8571 default:
8572 // In this case we've hit a # that doesn't constitute interpolation. We'll
8573 // mark that by returning the not provided token type. This tells the
8574 // consumer to keep lexing forward.
8575 parser->current.end = pound + 1;
8576 return PM_TOKEN_NOT_PROVIDED;
8577 }
8578}
8579
8580static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8581static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8582static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8583static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8584static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8585
8589static const bool ascii_printable_chars[] = {
8590 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8592 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8593 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8594 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8595 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8596 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8597 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8598};
8599
8600static inline bool
8601char_is_ascii_printable(const uint8_t b) {
8602 return (b < 0x80) && ascii_printable_chars[b];
8603}
8604
8609static inline uint8_t
8610escape_hexadecimal_digit(const uint8_t value) {
8611 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8612}
8613
8619static inline uint32_t
8620escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location) {
8621 uint32_t value = 0;
8622 for (size_t index = 0; index < length; index++) {
8623 if (index != 0) value <<= 4;
8624 value |= escape_hexadecimal_digit(string[index]);
8625 }
8626
8627 // Here we're going to verify that the value is actually a valid Unicode
8628 // codepoint and not a surrogate pair.
8629 if (value >= 0xD800 && value <= 0xDFFF) {
8630 if (error_location != NULL) {
8631 pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE);
8632 } else {
8633 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
8634 }
8635 return 0xFFFD;
8636 }
8637
8638 return value;
8639}
8640
8644static inline uint8_t
8645escape_byte(uint8_t value, const uint8_t flags) {
8646 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8647 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8648 return value;
8649}
8650
8654static inline void
8655escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8656 // \u escape sequences in string-like structures implicitly change the
8657 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8658 // literal.
8659 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8660 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8661 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8662 }
8663
8665 }
8666
8667 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8668 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
8669 pm_buffer_append_byte(buffer, 0xEF);
8670 pm_buffer_append_byte(buffer, 0xBF);
8671 pm_buffer_append_byte(buffer, 0xBD);
8672 }
8673}
8674
8679static inline void
8680escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
8681 if (byte >= 0x80) {
8682 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8683 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8684 }
8685
8686 parser->explicit_encoding = parser->encoding;
8687 }
8688
8689 pm_buffer_append_byte(buffer, byte);
8690}
8691
8707static inline void
8708escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8709 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8710 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8711 }
8712
8713 escape_write_byte_encoded(parser, buffer, byte);
8714}
8715
8719static inline void
8720escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8721 size_t width;
8722 if (parser->encoding_changed) {
8723 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8724 } else {
8725 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8726 }
8727
8728 if (width == 1) {
8729 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8730 } else if (width > 1) {
8731 // Valid multibyte character. Just ignore escape.
8732 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8733 pm_buffer_append_bytes(b, parser->current.end, width);
8734 parser->current.end += width;
8735 } else {
8736 // Assume the next character wasn't meant to be part of this escape
8737 // sequence since it is invalid. Add an error and move on.
8738 parser->current.end++;
8739 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8740 }
8741}
8742
8748static void
8749escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8750#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8751
8752 PM_PARSER_WARN_TOKEN_FORMAT(
8753 parser,
8754 parser->current,
8755 PM_WARN_INVALID_CHARACTER,
8756 FLAG(flags),
8757 FLAG(flag),
8758 type
8759 );
8760
8761#undef FLAG
8762}
8763
8767static void
8768escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8769 uint8_t peeked = peek(parser);
8770 switch (peeked) {
8771 case '\\': {
8772 parser->current.end++;
8773 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
8774 return;
8775 }
8776 case '\'': {
8777 parser->current.end++;
8778 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
8779 return;
8780 }
8781 case 'a': {
8782 parser->current.end++;
8783 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
8784 return;
8785 }
8786 case 'b': {
8787 parser->current.end++;
8788 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
8789 return;
8790 }
8791 case 'e': {
8792 parser->current.end++;
8793 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
8794 return;
8795 }
8796 case 'f': {
8797 parser->current.end++;
8798 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
8799 return;
8800 }
8801 case 'n': {
8802 parser->current.end++;
8803 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
8804 return;
8805 }
8806 case 'r': {
8807 parser->current.end++;
8808 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
8809 return;
8810 }
8811 case 's': {
8812 parser->current.end++;
8813 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
8814 return;
8815 }
8816 case 't': {
8817 parser->current.end++;
8818 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
8819 return;
8820 }
8821 case 'v': {
8822 parser->current.end++;
8823 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
8824 return;
8825 }
8826 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
8827 uint8_t value = (uint8_t) (*parser->current.end - '0');
8828 parser->current.end++;
8829
8830 if (pm_char_is_octal_digit(peek(parser))) {
8831 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8832 parser->current.end++;
8833
8834 if (pm_char_is_octal_digit(peek(parser))) {
8835 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8836 parser->current.end++;
8837 }
8838 }
8839
8840 value = escape_byte(value, flags);
8841 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8842 return;
8843 }
8844 case 'x': {
8845 const uint8_t *start = parser->current.end - 1;
8846
8847 parser->current.end++;
8848 uint8_t byte = peek(parser);
8849
8850 if (pm_char_is_hexadecimal_digit(byte)) {
8851 uint8_t value = escape_hexadecimal_digit(byte);
8852 parser->current.end++;
8853
8854 byte = peek(parser);
8855 if (pm_char_is_hexadecimal_digit(byte)) {
8856 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
8857 parser->current.end++;
8858 }
8859
8860 value = escape_byte(value, flags);
8861 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8862 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8863 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
8864 } else {
8865 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8866 }
8867 }
8868
8869 escape_write_byte_encoded(parser, buffer, value);
8870 } else {
8871 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8872 }
8873
8874 return;
8875 }
8876 case 'u': {
8877 const uint8_t *start = parser->current.end - 1;
8878 parser->current.end++;
8879
8880 if (parser->current.end == parser->end) {
8881 const uint8_t *start = parser->current.end - 2;
8882 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8883 } else if (peek(parser) == '{') {
8884 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
8885 parser->current.end++;
8886
8887 size_t whitespace;
8888 while (true) {
8889 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
8890 parser->current.end += whitespace;
8891 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
8892 // This is super hacky, but it gets us nicer error
8893 // messages because we can still pass it off to the
8894 // regular expression engine even if we hit an
8895 // unterminated regular expression.
8896 parser->current.end += 2;
8897 } else {
8898 break;
8899 }
8900 }
8901
8902 const uint8_t *extra_codepoints_start = NULL;
8903 int codepoints_count = 0;
8904
8905 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
8906 const uint8_t *unicode_start = parser->current.end;
8907 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
8908
8909 if (hexadecimal_length > 6) {
8910 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
8911 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8912 } else if (hexadecimal_length == 0) {
8913 // there are not hexadecimal characters
8914
8915 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8916 // If this is a regular expression, we are going to
8917 // let the regular expression engine handle this
8918 // error instead of us.
8919 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8920 } else {
8921 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
8922 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8923 }
8924
8925 return;
8926 }
8927
8928 parser->current.end += hexadecimal_length;
8929 codepoints_count++;
8930 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8931 extra_codepoints_start = unicode_start;
8932 }
8933
8934 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
8935 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
8936
8937 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
8938 }
8939
8940 // ?\u{nnnn} character literal should contain only one codepoint
8941 // and cannot be like ?\u{nnnn mmmm}.
8942 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8943 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8944 }
8945
8946 if (parser->current.end == parser->end) {
8947 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
8948 } else if (peek(parser) == '}') {
8949 parser->current.end++;
8950 } else {
8951 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8952 // If this is a regular expression, we are going to let
8953 // the regular expression engine handle this error
8954 // instead of us.
8955 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8956 } else {
8957 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8958 }
8959 }
8960
8961 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8962 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
8963 }
8964 } else {
8965 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
8966
8967 if (length == 0) {
8968 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8969 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8970 } else {
8971 const uint8_t *start = parser->current.end - 2;
8972 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8973 }
8974 } else if (length == 4) {
8975 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL);
8976
8977 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8978 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
8979 }
8980
8981 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
8982 parser->current.end += 4;
8983 } else {
8984 parser->current.end += length;
8985
8986 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8987 // If this is a regular expression, we are going to let
8988 // the regular expression engine handle this error
8989 // instead of us.
8990 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8991 } else {
8992 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
8993 }
8994 }
8995 }
8996
8997 return;
8998 }
8999 case 'c': {
9000 parser->current.end++;
9001 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9002 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9003 }
9004
9005 if (parser->current.end == parser->end) {
9006 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9007 return;
9008 }
9009
9010 uint8_t peeked = peek(parser);
9011 switch (peeked) {
9012 case '?': {
9013 parser->current.end++;
9014 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9015 return;
9016 }
9017 case '\\':
9018 parser->current.end++;
9019
9020 if (match(parser, 'u') || match(parser, 'U')) {
9021 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9022 return;
9023 }
9024
9025 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9026 return;
9027 case ' ':
9028 parser->current.end++;
9029 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9030 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9031 return;
9032 case '\t':
9033 parser->current.end++;
9034 escape_read_warn(parser, flags, 0, "\\t");
9035 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9036 return;
9037 default: {
9038 if (!char_is_ascii_printable(peeked)) {
9039 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9040 return;
9041 }
9042
9043 parser->current.end++;
9044 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9045 return;
9046 }
9047 }
9048 }
9049 case 'C': {
9050 parser->current.end++;
9051 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9052 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9053 }
9054
9055 if (peek(parser) != '-') {
9056 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9057 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9058 return;
9059 }
9060
9061 parser->current.end++;
9062 if (parser->current.end == parser->end) {
9063 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9064 return;
9065 }
9066
9067 uint8_t peeked = peek(parser);
9068 switch (peeked) {
9069 case '?': {
9070 parser->current.end++;
9071 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9072 return;
9073 }
9074 case '\\':
9075 parser->current.end++;
9076
9077 if (match(parser, 'u') || match(parser, 'U')) {
9078 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9079 return;
9080 }
9081
9082 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9083 return;
9084 case ' ':
9085 parser->current.end++;
9086 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9087 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9088 return;
9089 case '\t':
9090 parser->current.end++;
9091 escape_read_warn(parser, flags, 0, "\\t");
9092 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9093 return;
9094 default: {
9095 if (!char_is_ascii_printable(peeked)) {
9096 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9097 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9098 return;
9099 }
9100
9101 parser->current.end++;
9102 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9103 return;
9104 }
9105 }
9106 }
9107 case 'M': {
9108 parser->current.end++;
9109 if (flags & PM_ESCAPE_FLAG_META) {
9110 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9111 }
9112
9113 if (peek(parser) != '-') {
9114 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9115 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9116 return;
9117 }
9118
9119 parser->current.end++;
9120 if (parser->current.end == parser->end) {
9121 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9122 return;
9123 }
9124
9125 uint8_t peeked = peek(parser);
9126 switch (peeked) {
9127 case '\\':
9128 parser->current.end++;
9129
9130 if (match(parser, 'u') || match(parser, 'U')) {
9131 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9132 return;
9133 }
9134
9135 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9136 return;
9137 case ' ':
9138 parser->current.end++;
9139 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9140 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9141 return;
9142 case '\t':
9143 parser->current.end++;
9144 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9145 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9146 return;
9147 default:
9148 if (!char_is_ascii_printable(peeked)) {
9149 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9150 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9151 return;
9152 }
9153
9154 parser->current.end++;
9155 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9156 return;
9157 }
9158 }
9159 case '\r': {
9160 if (peek_offset(parser, 1) == '\n') {
9161 parser->current.end += 2;
9162 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9163 return;
9164 }
9166 }
9167 default: {
9168 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9169 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9170 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9171 return;
9172 }
9173 if (parser->current.end < parser->end) {
9174 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9175 } else {
9176 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9177 }
9178 return;
9179 }
9180 }
9181}
9182
9208static pm_token_type_t
9209lex_question_mark(pm_parser_t *parser) {
9210 if (lex_state_end_p(parser)) {
9211 lex_state_set(parser, PM_LEX_STATE_BEG);
9212 return PM_TOKEN_QUESTION_MARK;
9213 }
9214
9215 if (parser->current.end >= parser->end) {
9216 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9217 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9218 return PM_TOKEN_CHARACTER_LITERAL;
9219 }
9220
9221 if (pm_char_is_whitespace(*parser->current.end)) {
9222 lex_state_set(parser, PM_LEX_STATE_BEG);
9223 return PM_TOKEN_QUESTION_MARK;
9224 }
9225
9226 lex_state_set(parser, PM_LEX_STATE_BEG);
9227
9228 if (match(parser, '\\')) {
9229 lex_state_set(parser, PM_LEX_STATE_END);
9230
9231 pm_buffer_t buffer;
9232 pm_buffer_init_capacity(&buffer, 3);
9233
9234 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9235 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
9236
9237 return PM_TOKEN_CHARACTER_LITERAL;
9238 } else {
9239 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9240
9241 // Ternary operators can have a ? immediately followed by an identifier
9242 // which starts with an underscore. We check for this case here.
9243 if (
9244 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9245 (
9246 (parser->current.end + encoding_width >= parser->end) ||
9247 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9248 )
9249 ) {
9250 lex_state_set(parser, PM_LEX_STATE_END);
9251 parser->current.end += encoding_width;
9252 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9253 return PM_TOKEN_CHARACTER_LITERAL;
9254 }
9255 }
9256
9257 return PM_TOKEN_QUESTION_MARK;
9258}
9259
9264static pm_token_type_t
9265lex_at_variable(pm_parser_t *parser) {
9266 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9267 const uint8_t *end = parser->end;
9268
9269 size_t width;
9270 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9271 parser->current.end += width;
9272
9273 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9274 parser->current.end += width;
9275 }
9276 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9277 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9278 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9279 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9280 }
9281
9282 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9283 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9284 } else {
9285 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9286 pm_parser_err_token(parser, &parser->current, diag_id);
9287 }
9288
9289 // If we're lexing an embedded variable, then we need to pop back into the
9290 // parent lex context.
9291 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9292 lex_mode_pop(parser);
9293 }
9294
9295 return type;
9296}
9297
9301static inline void
9302parser_lex_callback(pm_parser_t *parser) {
9303 if (parser->lex_callback) {
9304 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
9305 }
9306}
9307
9311static inline pm_comment_t *
9312parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9313 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
9314 if (comment == NULL) return NULL;
9315
9316 *comment = (pm_comment_t) {
9317 .type = type,
9318 .location = { parser->current.start, parser->current.end }
9319 };
9320
9321 return comment;
9322}
9323
9329static pm_token_type_t
9330lex_embdoc(pm_parser_t *parser) {
9331 // First, lex out the EMBDOC_BEGIN token.
9332 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9333
9334 if (newline == NULL) {
9335 parser->current.end = parser->end;
9336 } else {
9337 pm_newline_list_append(&parser->newline_list, newline);
9338 parser->current.end = newline + 1;
9339 }
9340
9341 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9342 parser_lex_callback(parser);
9343
9344 // Now, create a comment that is going to be attached to the parser.
9345 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9346 if (comment == NULL) return PM_TOKEN_EOF;
9347
9348 // Now, loop until we find the end of the embedded documentation or the end
9349 // of the file.
9350 while (parser->current.end + 4 <= parser->end) {
9351 parser->current.start = parser->current.end;
9352
9353 // If we've hit the end of the embedded documentation then we'll return
9354 // that token here.
9355 if (
9356 (memcmp(parser->current.end, "=end", 4) == 0) &&
9357 (
9358 (parser->current.end + 4 == parser->end) || // end of file
9359 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9360 (parser->current.end[4] == '\0') || // NUL or end of script
9361 (parser->current.end[4] == '\004') || // ^D
9362 (parser->current.end[4] == '\032') // ^Z
9363 )
9364 ) {
9365 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9366
9367 if (newline == NULL) {
9368 parser->current.end = parser->end;
9369 } else {
9370 pm_newline_list_append(&parser->newline_list, newline);
9371 parser->current.end = newline + 1;
9372 }
9373
9374 parser->current.type = PM_TOKEN_EMBDOC_END;
9375 parser_lex_callback(parser);
9376
9377 comment->location.end = parser->current.end;
9378 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9379
9380 return PM_TOKEN_EMBDOC_END;
9381 }
9382
9383 // Otherwise, we'll parse until the end of the line and return a line of
9384 // embedded documentation.
9385 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9386
9387 if (newline == NULL) {
9388 parser->current.end = parser->end;
9389 } else {
9390 pm_newline_list_append(&parser->newline_list, newline);
9391 parser->current.end = newline + 1;
9392 }
9393
9394 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9395 parser_lex_callback(parser);
9396 }
9397
9398 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9399
9400 comment->location.end = parser->current.end;
9401 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9402
9403 return PM_TOKEN_EOF;
9404}
9405
9411static inline void
9412parser_lex_ignored_newline(pm_parser_t *parser) {
9413 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9414 parser_lex_callback(parser);
9415}
9416
9426static inline void
9427parser_flush_heredoc_end(pm_parser_t *parser) {
9428 assert(parser->heredoc_end <= parser->end);
9429 parser->next_start = parser->heredoc_end;
9430 parser->heredoc_end = NULL;
9431}
9432
9436static bool
9437parser_end_of_line_p(const pm_parser_t *parser) {
9438 const uint8_t *cursor = parser->current.end;
9439
9440 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9441 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9442 }
9443
9444 return true;
9445}
9446
9465typedef struct {
9471
9476 const uint8_t *cursor;
9478
9498
9502static inline void
9503pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9504 pm_buffer_append_byte(&token_buffer->buffer, byte);
9505}
9506
9507static inline void
9508pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9509 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9510}
9511
9515static inline size_t
9516parser_char_width(const pm_parser_t *parser) {
9517 size_t width;
9518 if (parser->encoding_changed) {
9519 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9520 } else {
9521 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9522 }
9523
9524 // TODO: If the character is invalid in the given encoding, then we'll just
9525 // push one byte into the buffer. This should actually be an error.
9526 return (width == 0 ? 1 : width);
9527}
9528
9532static void
9533pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9534 size_t width = parser_char_width(parser);
9535 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9536 parser->current.end += width;
9537}
9538
9539static void
9540pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9541 size_t width = parser_char_width(parser);
9542 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
9543 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
9544 parser->current.end += width;
9545}
9546
9547static bool
9548pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
9549 for (size_t index = 0; index < length; index++) {
9550 if (value[index] & 0x80) return false;
9551 }
9552
9553 return true;
9554}
9555
9562static inline void
9563pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9564 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
9565}
9566
9567static inline void
9568pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9569 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
9570 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
9571 pm_buffer_free(&token_buffer->regexp_buffer);
9572}
9573
9583static void
9584pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9585 if (token_buffer->cursor == NULL) {
9586 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9587 } else {
9588 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9589 pm_token_buffer_copy(parser, token_buffer);
9590 }
9591}
9592
9593static void
9594pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9595 if (token_buffer->base.cursor == NULL) {
9596 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9597 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
9598 } else {
9599 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9600 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9601 pm_regexp_token_buffer_copy(parser, token_buffer);
9602 }
9603}
9604
9605#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9606
9615static void
9616pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9617 const uint8_t *start;
9618 if (token_buffer->cursor == NULL) {
9619 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9620 start = parser->current.start;
9621 } else {
9622 start = token_buffer->cursor;
9623 }
9624
9625 const uint8_t *end = parser->current.end - 1;
9626 assert(end >= start);
9627 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9628
9629 token_buffer->cursor = end;
9630}
9631
9632static void
9633pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9634 const uint8_t *start;
9635 if (token_buffer->base.cursor == NULL) {
9636 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9637 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9638 start = parser->current.start;
9639 } else {
9640 start = token_buffer->base.cursor;
9641 }
9642
9643 const uint8_t *end = parser->current.end - 1;
9644 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9645 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9646
9647 token_buffer->base.cursor = end;
9648}
9649
9650#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9651
9656static inline size_t
9657pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9658 size_t whitespace = 0;
9659
9660 switch (indent) {
9661 case PM_HEREDOC_INDENT_NONE:
9662 // Do nothing, we can't match a terminator with
9663 // indentation and there's no need to calculate common
9664 // whitespace.
9665 break;
9666 case PM_HEREDOC_INDENT_DASH:
9667 // Skip past inline whitespace.
9668 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9669 break;
9670 case PM_HEREDOC_INDENT_TILDE:
9671 // Skip past inline whitespace and calculate common
9672 // whitespace.
9673 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9674 if (**cursor == '\t') {
9675 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9676 } else {
9677 whitespace++;
9678 }
9679 (*cursor)++;
9680 }
9681
9682 break;
9683 }
9684
9685 return whitespace;
9686}
9687
9692static uint8_t
9693pm_lex_percent_delimiter(pm_parser_t *parser) {
9694 size_t eol_length = match_eol(parser);
9695
9696 if (eol_length) {
9697 if (parser->heredoc_end) {
9698 // If we have already lexed a heredoc, then the newline has already
9699 // been added to the list. In this case we want to just flush the
9700 // heredoc end.
9701 parser_flush_heredoc_end(parser);
9702 } else {
9703 // Otherwise, we'll add the newline to the list of newlines.
9704 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
9705 }
9706
9707 uint8_t delimiter = *parser->current.end;
9708
9709 // If our delimiter is \r\n, we want to treat it as if it's \n.
9710 // For example, %\r\nfoo\r\n should be "foo"
9711 if (eol_length == 2) {
9712 delimiter = *(parser->current.end + 1);
9713 }
9714
9715 parser->current.end += eol_length;
9716 return delimiter;
9717 }
9718
9719 return *parser->current.end++;
9720}
9721
9726#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9727
9734static void
9735parser_lex(pm_parser_t *parser) {
9736 assert(parser->current.end <= parser->end);
9737 parser->previous = parser->current;
9738
9739 // This value mirrors cmd_state from CRuby.
9740 bool previous_command_start = parser->command_start;
9741 parser->command_start = false;
9742
9743 // This is used to communicate to the newline lexing function that we've
9744 // already seen a comment.
9745 bool lexed_comment = false;
9746
9747 // Here we cache the current value of the semantic token seen flag. This is
9748 // used to reset it in case we find a token that shouldn't flip this flag.
9749 unsigned int semantic_token_seen = parser->semantic_token_seen;
9750 parser->semantic_token_seen = true;
9751
9752 switch (parser->lex_modes.current->mode) {
9753 case PM_LEX_DEFAULT:
9754 case PM_LEX_EMBEXPR:
9755 case PM_LEX_EMBVAR:
9756
9757 // We have a specific named label here because we are going to jump back to
9758 // this location in the event that we have lexed a token that should not be
9759 // returned to the parser. This includes comments, ignored newlines, and
9760 // invalid tokens of some form.
9761 lex_next_token: {
9762 // If we have the special next_start pointer set, then we're going to jump
9763 // to that location and start lexing from there.
9764 if (parser->next_start != NULL) {
9765 parser->current.end = parser->next_start;
9766 parser->next_start = NULL;
9767 }
9768
9769 // This value mirrors space_seen from CRuby. It tracks whether or not
9770 // space has been eaten before the start of the next token.
9771 bool space_seen = false;
9772
9773 // First, we're going to skip past any whitespace at the front of the next
9774 // token.
9775 bool chomping = true;
9776 while (parser->current.end < parser->end && chomping) {
9777 switch (*parser->current.end) {
9778 case ' ':
9779 case '\t':
9780 case '\f':
9781 case '\v':
9782 parser->current.end++;
9783 space_seen = true;
9784 break;
9785 case '\r':
9786 if (match_eol_offset(parser, 1)) {
9787 chomping = false;
9788 } else {
9789 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9790 parser->current.end++;
9791 space_seen = true;
9792 }
9793 break;
9794 case '\\': {
9795 size_t eol_length = match_eol_offset(parser, 1);
9796 if (eol_length) {
9797 if (parser->heredoc_end) {
9798 parser->current.end = parser->heredoc_end;
9799 parser->heredoc_end = NULL;
9800 } else {
9801 parser->current.end += eol_length + 1;
9802 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9803 space_seen = true;
9804 }
9805 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
9806 parser->current.end += 2;
9807 } else {
9808 chomping = false;
9809 }
9810
9811 break;
9812 }
9813 default:
9814 chomping = false;
9815 break;
9816 }
9817 }
9818
9819 // Next, we'll set to start of this token to be the current end.
9820 parser->current.start = parser->current.end;
9821
9822 // We'll check if we're at the end of the file. If we are, then we
9823 // need to return the EOF token.
9824 if (parser->current.end >= parser->end) {
9825 // If we hit EOF, but the EOF came immediately after a newline,
9826 // set the start of the token to the newline. This way any EOF
9827 // errors will be reported as happening on that line rather than
9828 // a line after. For example "foo(\n" should report an error
9829 // on line 1 even though EOF technically occurs on line 2.
9830 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
9831 parser->current.start -= 1;
9832 }
9833 LEX(PM_TOKEN_EOF);
9834 }
9835
9836 // Finally, we'll check the current character to determine the next
9837 // token.
9838 switch (*parser->current.end++) {
9839 case '\0': // NUL or end of script
9840 case '\004': // ^D
9841 case '\032': // ^Z
9842 parser->current.end--;
9843 LEX(PM_TOKEN_EOF);
9844
9845 case '#': { // comments
9846 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
9847 parser->current.end = ending == NULL ? parser->end : ending;
9848
9849 // If we found a comment while lexing, then we're going to
9850 // add it to the list of comments in the file and keep
9851 // lexing.
9852 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9853 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9854
9855 if (ending) parser->current.end++;
9856 parser->current.type = PM_TOKEN_COMMENT;
9857 parser_lex_callback(parser);
9858
9859 // Here, parse the comment to see if it's a magic comment
9860 // and potentially change state on the parser.
9861 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
9862 ptrdiff_t length = parser->current.end - parser->current.start;
9863
9864 // If we didn't find a magic comment within the first
9865 // pass and we're at the start of the file, then we need
9866 // to do another pass to potentially find other patterns
9867 // for encoding comments.
9868 if (length >= 10 && !parser->encoding_locked) {
9869 parser_lex_magic_comment_encoding(parser);
9870 }
9871 }
9872
9873 lexed_comment = true;
9874 }
9876 case '\r':
9877 case '\n': {
9878 parser->semantic_token_seen = semantic_token_seen & 0x1;
9879 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
9880
9881 if (eol_length) {
9882 // The only way you can have carriage returns in this
9883 // particular loop is if you have a carriage return
9884 // followed by a newline. In that case we'll just skip
9885 // over the carriage return and continue lexing, in
9886 // order to make it so that the newline token
9887 // encapsulates both the carriage return and the
9888 // newline. Note that we need to check that we haven't
9889 // already lexed a comment here because that falls
9890 // through into here as well.
9891 if (!lexed_comment) {
9892 parser->current.end += eol_length - 1; // skip CR
9893 }
9894
9895 if (parser->heredoc_end == NULL) {
9896 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9897 }
9898 }
9899
9900 if (parser->heredoc_end) {
9901 parser_flush_heredoc_end(parser);
9902 }
9903
9904 // If this is an ignored newline, then we can continue lexing after
9905 // calling the callback with the ignored newline token.
9906 switch (lex_state_ignored_p(parser)) {
9907 case PM_IGNORED_NEWLINE_NONE:
9908 break;
9909 case PM_IGNORED_NEWLINE_PATTERN:
9910 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
9911 if (!lexed_comment) parser_lex_ignored_newline(parser);
9912 lex_state_set(parser, PM_LEX_STATE_BEG);
9913 parser->command_start = true;
9914 parser->current.type = PM_TOKEN_NEWLINE;
9915 return;
9916 }
9918 case PM_IGNORED_NEWLINE_ALL:
9919 if (!lexed_comment) parser_lex_ignored_newline(parser);
9920 lexed_comment = false;
9921 goto lex_next_token;
9922 }
9923
9924 // Here we need to look ahead and see if there is a call operator
9925 // (either . or &.) that starts the next line. If there is, then this
9926 // is going to become an ignored newline and we're going to instead
9927 // return the call operator.
9928 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
9929 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
9930
9931 if (next_content < parser->end) {
9932 // If we hit a comment after a newline, then we're going to check
9933 // if it's ignored or if it's followed by a method call ('.').
9934 // If it is, then we're going to call the
9935 // callback with an ignored newline and then continue lexing.
9936 // Otherwise we'll return a regular newline.
9937 if (next_content[0] == '#') {
9938 // Here we look for a "." or "&." following a "\n".
9939 const uint8_t *following = next_newline(next_content, parser->end - next_content);
9940
9941 while (following && (following + 1 < parser->end)) {
9942 following++;
9943 following += pm_strspn_inline_whitespace(following, parser->end - following);
9944
9945 // If this is not followed by a comment, then we can break out
9946 // of this loop.
9947 if (peek_at(parser, following) != '#') break;
9948
9949 // If there is a comment, then we need to find the end of the
9950 // comment and continue searching from there.
9951 following = next_newline(following, parser->end - following);
9952 }
9953
9954 // If the lex state was ignored, we will lex the
9955 // ignored newline.
9956 if (lex_state_ignored_p(parser)) {
9957 if (!lexed_comment) parser_lex_ignored_newline(parser);
9958 lexed_comment = false;
9959 goto lex_next_token;
9960 }
9961
9962 // If we hit a '.' or a '&.' we will lex the ignored
9963 // newline.
9964 if (following && (
9965 (peek_at(parser, following) == '.') ||
9966 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
9967 )) {
9968 if (!lexed_comment) parser_lex_ignored_newline(parser);
9969 lexed_comment = false;
9970 goto lex_next_token;
9971 }
9972
9973
9974 // If we are parsing as CRuby 4.0 or later and we
9975 // hit a '&&' or a '||' then we will lex the ignored
9976 // newline.
9977 if (
9979 following && (
9980 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
9981 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
9982 (
9983 peek_at(parser, following) == 'a' &&
9984 peek_at(parser, following + 1) == 'n' &&
9985 peek_at(parser, following + 2) == 'd' &&
9986 peek_at(parser, next_content + 3) != '!' &&
9987 peek_at(parser, next_content + 3) != '?' &&
9988 !char_is_identifier(parser, following + 3, parser->end - (following + 3))
9989 ) ||
9990 (
9991 peek_at(parser, following) == 'o' &&
9992 peek_at(parser, following + 1) == 'r' &&
9993 peek_at(parser, next_content + 2) != '!' &&
9994 peek_at(parser, next_content + 2) != '?' &&
9995 !char_is_identifier(parser, following + 2, parser->end - (following + 2))
9996 )
9997 )
9998 ) {
9999 if (!lexed_comment) parser_lex_ignored_newline(parser);
10000 lexed_comment = false;
10001 goto lex_next_token;
10002 }
10003 }
10004
10005 // If we hit a . after a newline, then we're in a call chain and
10006 // we need to return the call operator.
10007 if (next_content[0] == '.') {
10008 // To match ripper, we need to emit an ignored newline even though
10009 // it's a real newline in the case that we have a beginless range
10010 // on a subsequent line.
10011 if (peek_at(parser, next_content + 1) == '.') {
10012 if (!lexed_comment) parser_lex_ignored_newline(parser);
10013 lex_state_set(parser, PM_LEX_STATE_BEG);
10014 parser->command_start = true;
10015 parser->current.type = PM_TOKEN_NEWLINE;
10016 return;
10017 }
10018
10019 if (!lexed_comment) parser_lex_ignored_newline(parser);
10020 lex_state_set(parser, PM_LEX_STATE_DOT);
10021 parser->current.start = next_content;
10022 parser->current.end = next_content + 1;
10023 parser->next_start = NULL;
10024 LEX(PM_TOKEN_DOT);
10025 }
10026
10027 // If we hit a &. after a newline, then we're in a call chain and
10028 // we need to return the call operator.
10029 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10030 if (!lexed_comment) parser_lex_ignored_newline(parser);
10031 lex_state_set(parser, PM_LEX_STATE_DOT);
10032 parser->current.start = next_content;
10033 parser->current.end = next_content + 2;
10034 parser->next_start = NULL;
10035 LEX(PM_TOKEN_AMPERSAND_DOT);
10036 }
10037
10038 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10039 // If we hit an && then we are in a logical chain
10040 // and we need to return the logical operator.
10041 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10042 if (!lexed_comment) parser_lex_ignored_newline(parser);
10043 lex_state_set(parser, PM_LEX_STATE_BEG);
10044 parser->current.start = next_content;
10045 parser->current.end = next_content + 2;
10046 parser->next_start = NULL;
10047 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10048 }
10049
10050 // If we hit a || then we are in a logical chain and
10051 // we need to return the logical operator.
10052 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10053 if (!lexed_comment) parser_lex_ignored_newline(parser);
10054 lex_state_set(parser, PM_LEX_STATE_BEG);
10055 parser->current.start = next_content;
10056 parser->current.end = next_content + 2;
10057 parser->next_start = NULL;
10058 LEX(PM_TOKEN_PIPE_PIPE);
10059 }
10060
10061 // If we hit an 'and' then we are in a logical chain
10062 // and we need to return the logical operator.
10063 if (
10064 peek_at(parser, next_content) == 'a' &&
10065 peek_at(parser, next_content + 1) == 'n' &&
10066 peek_at(parser, next_content + 2) == 'd' &&
10067 peek_at(parser, next_content + 3) != '!' &&
10068 peek_at(parser, next_content + 3) != '?' &&
10069 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10070 ) {
10071 if (!lexed_comment) parser_lex_ignored_newline(parser);
10072 lex_state_set(parser, PM_LEX_STATE_BEG);
10073 parser->current.start = next_content;
10074 parser->current.end = next_content + 3;
10075 parser->next_start = NULL;
10076 parser->command_start = true;
10077 LEX(PM_TOKEN_KEYWORD_AND);
10078 }
10079
10080 // If we hit a 'or' then we are in a logical chain
10081 // and we need to return the logical operator.
10082 if (
10083 peek_at(parser, next_content) == 'o' &&
10084 peek_at(parser, next_content + 1) == 'r' &&
10085 peek_at(parser, next_content + 2) != '!' &&
10086 peek_at(parser, next_content + 2) != '?' &&
10087 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10088 ) {
10089 if (!lexed_comment) parser_lex_ignored_newline(parser);
10090 lex_state_set(parser, PM_LEX_STATE_BEG);
10091 parser->current.start = next_content;
10092 parser->current.end = next_content + 2;
10093 parser->next_start = NULL;
10094 parser->command_start = true;
10095 LEX(PM_TOKEN_KEYWORD_OR);
10096 }
10097 }
10098 }
10099
10100 // At this point we know this is a regular newline, and we can set the
10101 // necessary state and return the token.
10102 lex_state_set(parser, PM_LEX_STATE_BEG);
10103 parser->command_start = true;
10104 parser->current.type = PM_TOKEN_NEWLINE;
10105 if (!lexed_comment) parser_lex_callback(parser);
10106 return;
10107 }
10108
10109 // ,
10110 case ',':
10111 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10112 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10113 }
10114
10115 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10116 LEX(PM_TOKEN_COMMA);
10117
10118 // (
10119 case '(': {
10120 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10121
10122 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10123 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10124 }
10125
10126 parser->enclosure_nesting++;
10127 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10128 pm_do_loop_stack_push(parser, false);
10129 LEX(type);
10130 }
10131
10132 // )
10133 case ')':
10134 parser->enclosure_nesting--;
10135 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10136 pm_do_loop_stack_pop(parser);
10137 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10138
10139 // ;
10140 case ';':
10141 lex_state_set(parser, PM_LEX_STATE_BEG);
10142 parser->command_start = true;
10143 LEX(PM_TOKEN_SEMICOLON);
10144
10145 // [ [] []=
10146 case '[':
10147 parser->enclosure_nesting++;
10148 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10149
10150 if (lex_state_operator_p(parser)) {
10151 if (match(parser, ']')) {
10152 parser->enclosure_nesting--;
10153 lex_state_set(parser, PM_LEX_STATE_ARG);
10154 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10155 }
10156
10157 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10158 LEX(type);
10159 }
10160
10161 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10162 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10163 }
10164
10165 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10166 pm_do_loop_stack_push(parser, false);
10167 LEX(type);
10168
10169 // ]
10170 case ']':
10171 parser->enclosure_nesting--;
10172 lex_state_set(parser, PM_LEX_STATE_END);
10173 pm_do_loop_stack_pop(parser);
10174 LEX(PM_TOKEN_BRACKET_RIGHT);
10175
10176 // {
10177 case '{': {
10178 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10179
10180 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10181 // This { begins a lambda
10182 parser->command_start = true;
10183 lex_state_set(parser, PM_LEX_STATE_BEG);
10184 type = PM_TOKEN_LAMBDA_BEGIN;
10185 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10186 // This { begins a hash literal
10187 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10188 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10189 // This { begins a block
10190 parser->command_start = true;
10191 lex_state_set(parser, PM_LEX_STATE_BEG);
10192 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10193 // This { begins a block on a command
10194 parser->command_start = true;
10195 lex_state_set(parser, PM_LEX_STATE_BEG);
10196 } else {
10197 // This { begins a hash literal
10198 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10199 }
10200
10201 parser->enclosure_nesting++;
10202 parser->brace_nesting++;
10203 pm_do_loop_stack_push(parser, false);
10204
10205 LEX(type);
10206 }
10207
10208 // }
10209 case '}':
10210 parser->enclosure_nesting--;
10211 pm_do_loop_stack_pop(parser);
10212
10213 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10214 lex_mode_pop(parser);
10215 LEX(PM_TOKEN_EMBEXPR_END);
10216 }
10217
10218 parser->brace_nesting--;
10219 lex_state_set(parser, PM_LEX_STATE_END);
10220 LEX(PM_TOKEN_BRACE_RIGHT);
10221
10222 // * ** **= *=
10223 case '*': {
10224 if (match(parser, '*')) {
10225 if (match(parser, '=')) {
10226 lex_state_set(parser, PM_LEX_STATE_BEG);
10227 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10228 }
10229
10230 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10231
10232 if (lex_state_spcarg_p(parser, space_seen)) {
10233 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10234 type = PM_TOKEN_USTAR_STAR;
10235 } else if (lex_state_beg_p(parser)) {
10236 type = PM_TOKEN_USTAR_STAR;
10237 } else if (ambiguous_operator_p(parser, space_seen)) {
10238 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10239 }
10240
10241 if (lex_state_operator_p(parser)) {
10242 lex_state_set(parser, PM_LEX_STATE_ARG);
10243 } else {
10244 lex_state_set(parser, PM_LEX_STATE_BEG);
10245 }
10246
10247 LEX(type);
10248 }
10249
10250 if (match(parser, '=')) {
10251 lex_state_set(parser, PM_LEX_STATE_BEG);
10252 LEX(PM_TOKEN_STAR_EQUAL);
10253 }
10254
10255 pm_token_type_t type = PM_TOKEN_STAR;
10256
10257 if (lex_state_spcarg_p(parser, space_seen)) {
10258 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10259 type = PM_TOKEN_USTAR;
10260 } else if (lex_state_beg_p(parser)) {
10261 type = PM_TOKEN_USTAR;
10262 } else if (ambiguous_operator_p(parser, space_seen)) {
10263 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10264 }
10265
10266 if (lex_state_operator_p(parser)) {
10267 lex_state_set(parser, PM_LEX_STATE_ARG);
10268 } else {
10269 lex_state_set(parser, PM_LEX_STATE_BEG);
10270 }
10271
10272 LEX(type);
10273 }
10274
10275 // ! != !~ !@
10276 case '!':
10277 if (lex_state_operator_p(parser)) {
10278 lex_state_set(parser, PM_LEX_STATE_ARG);
10279 if (match(parser, '@')) {
10280 LEX(PM_TOKEN_BANG);
10281 }
10282 } else {
10283 lex_state_set(parser, PM_LEX_STATE_BEG);
10284 }
10285
10286 if (match(parser, '=')) {
10287 LEX(PM_TOKEN_BANG_EQUAL);
10288 }
10289
10290 if (match(parser, '~')) {
10291 LEX(PM_TOKEN_BANG_TILDE);
10292 }
10293
10294 LEX(PM_TOKEN_BANG);
10295
10296 // = => =~ == === =begin
10297 case '=':
10298 if (
10299 current_token_starts_line(parser) &&
10300 (parser->current.end + 5 <= parser->end) &&
10301 memcmp(parser->current.end, "begin", 5) == 0 &&
10302 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10303 ) {
10304 pm_token_type_t type = lex_embdoc(parser);
10305 if (type == PM_TOKEN_EOF) {
10306 LEX(type);
10307 }
10308
10309 goto lex_next_token;
10310 }
10311
10312 if (lex_state_operator_p(parser)) {
10313 lex_state_set(parser, PM_LEX_STATE_ARG);
10314 } else {
10315 lex_state_set(parser, PM_LEX_STATE_BEG);
10316 }
10317
10318 if (match(parser, '>')) {
10319 LEX(PM_TOKEN_EQUAL_GREATER);
10320 }
10321
10322 if (match(parser, '~')) {
10323 LEX(PM_TOKEN_EQUAL_TILDE);
10324 }
10325
10326 if (match(parser, '=')) {
10327 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10328 }
10329
10330 LEX(PM_TOKEN_EQUAL);
10331
10332 // < << <<= <= <=>
10333 case '<':
10334 if (match(parser, '<')) {
10335 if (
10336 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10337 !lex_state_end_p(parser) &&
10338 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10339 ) {
10340 const uint8_t *end = parser->current.end;
10341
10342 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10343 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10344
10345 if (match(parser, '-')) {
10346 indent = PM_HEREDOC_INDENT_DASH;
10347 }
10348 else if (match(parser, '~')) {
10349 indent = PM_HEREDOC_INDENT_TILDE;
10350 }
10351
10352 if (match(parser, '`')) {
10353 quote = PM_HEREDOC_QUOTE_BACKTICK;
10354 }
10355 else if (match(parser, '"')) {
10356 quote = PM_HEREDOC_QUOTE_DOUBLE;
10357 }
10358 else if (match(parser, '\'')) {
10359 quote = PM_HEREDOC_QUOTE_SINGLE;
10360 }
10361
10362 const uint8_t *ident_start = parser->current.end;
10363 size_t width = 0;
10364
10365 if (parser->current.end >= parser->end) {
10366 parser->current.end = end;
10367 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10368 parser->current.end = end;
10369 } else {
10370 if (quote == PM_HEREDOC_QUOTE_NONE) {
10371 parser->current.end += width;
10372
10373 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10374 parser->current.end += width;
10375 }
10376 } else {
10377 // If we have quotes, then we're going to go until we find the
10378 // end quote.
10379 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10380 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10381 parser->current.end++;
10382 }
10383 }
10384
10385 size_t ident_length = (size_t) (parser->current.end - ident_start);
10386 bool ident_error = false;
10387
10388 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10389 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10390 ident_error = true;
10391 }
10392
10393 parser->explicit_encoding = NULL;
10394 lex_mode_push(parser, (pm_lex_mode_t) {
10395 .mode = PM_LEX_HEREDOC,
10396 .as.heredoc = {
10397 .base = {
10398 .ident_start = ident_start,
10399 .ident_length = ident_length,
10400 .quote = quote,
10401 .indent = indent
10402 },
10403 .next_start = parser->current.end,
10404 .common_whitespace = NULL,
10405 .line_continuation = false
10406 }
10407 });
10408
10409 if (parser->heredoc_end == NULL) {
10410 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10411
10412 if (body_start == NULL) {
10413 // If there is no newline after the heredoc identifier, then
10414 // this is not a valid heredoc declaration. In this case we
10415 // will add an error, but we will still return a heredoc
10416 // start.
10417 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10418 body_start = parser->end;
10419 } else {
10420 // Otherwise, we want to indicate that the body of the
10421 // heredoc starts on the character after the next newline.
10422 pm_newline_list_append(&parser->newline_list, body_start);
10423 body_start++;
10424 }
10425
10426 parser->next_start = body_start;
10427 } else {
10428 parser->next_start = parser->heredoc_end;
10429 }
10430
10431 LEX(PM_TOKEN_HEREDOC_START);
10432 }
10433 }
10434
10435 if (match(parser, '=')) {
10436 lex_state_set(parser, PM_LEX_STATE_BEG);
10437 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10438 }
10439
10440 if (ambiguous_operator_p(parser, space_seen)) {
10441 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10442 }
10443
10444 if (lex_state_operator_p(parser)) {
10445 lex_state_set(parser, PM_LEX_STATE_ARG);
10446 } else {
10447 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10448 lex_state_set(parser, PM_LEX_STATE_BEG);
10449 }
10450
10451 LEX(PM_TOKEN_LESS_LESS);
10452 }
10453
10454 if (lex_state_operator_p(parser)) {
10455 lex_state_set(parser, PM_LEX_STATE_ARG);
10456 } else {
10457 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10458 lex_state_set(parser, PM_LEX_STATE_BEG);
10459 }
10460
10461 if (match(parser, '=')) {
10462 if (match(parser, '>')) {
10463 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10464 }
10465
10466 LEX(PM_TOKEN_LESS_EQUAL);
10467 }
10468
10469 LEX(PM_TOKEN_LESS);
10470
10471 // > >> >>= >=
10472 case '>':
10473 if (match(parser, '>')) {
10474 if (lex_state_operator_p(parser)) {
10475 lex_state_set(parser, PM_LEX_STATE_ARG);
10476 } else {
10477 lex_state_set(parser, PM_LEX_STATE_BEG);
10478 }
10479 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10480 }
10481
10482 if (lex_state_operator_p(parser)) {
10483 lex_state_set(parser, PM_LEX_STATE_ARG);
10484 } else {
10485 lex_state_set(parser, PM_LEX_STATE_BEG);
10486 }
10487
10488 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10489
10490 // double-quoted string literal
10491 case '"': {
10492 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10493 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10494 LEX(PM_TOKEN_STRING_BEGIN);
10495 }
10496
10497 // xstring literal
10498 case '`': {
10499 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10500 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10501 LEX(PM_TOKEN_BACKTICK);
10502 }
10503
10504 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10505 if (previous_command_start) {
10506 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10507 } else {
10508 lex_state_set(parser, PM_LEX_STATE_ARG);
10509 }
10510
10511 LEX(PM_TOKEN_BACKTICK);
10512 }
10513
10514 lex_mode_push_string(parser, true, false, '\0', '`');
10515 LEX(PM_TOKEN_BACKTICK);
10516 }
10517
10518 // single-quoted string literal
10519 case '\'': {
10520 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10521 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10522 LEX(PM_TOKEN_STRING_BEGIN);
10523 }
10524
10525 // ? character literal
10526 case '?':
10527 LEX(lex_question_mark(parser));
10528
10529 // & && &&= &=
10530 case '&': {
10531 if (match(parser, '&')) {
10532 lex_state_set(parser, PM_LEX_STATE_BEG);
10533
10534 if (match(parser, '=')) {
10535 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10536 }
10537
10538 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10539 }
10540
10541 if (match(parser, '=')) {
10542 lex_state_set(parser, PM_LEX_STATE_BEG);
10543 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10544 }
10545
10546 if (match(parser, '.')) {
10547 lex_state_set(parser, PM_LEX_STATE_DOT);
10548 LEX(PM_TOKEN_AMPERSAND_DOT);
10549 }
10550
10551 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10552 if (lex_state_spcarg_p(parser, space_seen)) {
10553 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10554 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10555 } else {
10556 const uint8_t delim = peek_offset(parser, 1);
10557
10558 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10559 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10560 }
10561 }
10562
10563 type = PM_TOKEN_UAMPERSAND;
10564 } else if (lex_state_beg_p(parser)) {
10565 type = PM_TOKEN_UAMPERSAND;
10566 } else if (ambiguous_operator_p(parser, space_seen)) {
10567 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10568 }
10569
10570 if (lex_state_operator_p(parser)) {
10571 lex_state_set(parser, PM_LEX_STATE_ARG);
10572 } else {
10573 lex_state_set(parser, PM_LEX_STATE_BEG);
10574 }
10575
10576 LEX(type);
10577 }
10578
10579 // | || ||= |=
10580 case '|':
10581 if (match(parser, '|')) {
10582 if (match(parser, '=')) {
10583 lex_state_set(parser, PM_LEX_STATE_BEG);
10584 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10585 }
10586
10587 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10588 parser->current.end--;
10589 LEX(PM_TOKEN_PIPE);
10590 }
10591
10592 lex_state_set(parser, PM_LEX_STATE_BEG);
10593 LEX(PM_TOKEN_PIPE_PIPE);
10594 }
10595
10596 if (match(parser, '=')) {
10597 lex_state_set(parser, PM_LEX_STATE_BEG);
10598 LEX(PM_TOKEN_PIPE_EQUAL);
10599 }
10600
10601 if (lex_state_operator_p(parser)) {
10602 lex_state_set(parser, PM_LEX_STATE_ARG);
10603 } else {
10604 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10605 }
10606
10607 LEX(PM_TOKEN_PIPE);
10608
10609 // + += +@
10610 case '+': {
10611 if (lex_state_operator_p(parser)) {
10612 lex_state_set(parser, PM_LEX_STATE_ARG);
10613
10614 if (match(parser, '@')) {
10615 LEX(PM_TOKEN_UPLUS);
10616 }
10617
10618 LEX(PM_TOKEN_PLUS);
10619 }
10620
10621 if (match(parser, '=')) {
10622 lex_state_set(parser, PM_LEX_STATE_BEG);
10623 LEX(PM_TOKEN_PLUS_EQUAL);
10624 }
10625
10626 if (
10627 lex_state_beg_p(parser) ||
10628 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10629 ) {
10630 lex_state_set(parser, PM_LEX_STATE_BEG);
10631
10632 if (pm_char_is_decimal_digit(peek(parser))) {
10633 parser->current.end++;
10634 pm_token_type_t type = lex_numeric(parser);
10635 lex_state_set(parser, PM_LEX_STATE_END);
10636 LEX(type);
10637 }
10638
10639 LEX(PM_TOKEN_UPLUS);
10640 }
10641
10642 if (ambiguous_operator_p(parser, space_seen)) {
10643 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10644 }
10645
10646 lex_state_set(parser, PM_LEX_STATE_BEG);
10647 LEX(PM_TOKEN_PLUS);
10648 }
10649
10650 // - -= -@
10651 case '-': {
10652 if (lex_state_operator_p(parser)) {
10653 lex_state_set(parser, PM_LEX_STATE_ARG);
10654
10655 if (match(parser, '@')) {
10656 LEX(PM_TOKEN_UMINUS);
10657 }
10658
10659 LEX(PM_TOKEN_MINUS);
10660 }
10661
10662 if (match(parser, '=')) {
10663 lex_state_set(parser, PM_LEX_STATE_BEG);
10664 LEX(PM_TOKEN_MINUS_EQUAL);
10665 }
10666
10667 if (match(parser, '>')) {
10668 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10669 LEX(PM_TOKEN_MINUS_GREATER);
10670 }
10671
10672 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10673 bool is_beg = lex_state_beg_p(parser);
10674 if (!is_beg && spcarg) {
10675 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10676 }
10677
10678 if (is_beg || spcarg) {
10679 lex_state_set(parser, PM_LEX_STATE_BEG);
10680 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10681 }
10682
10683 if (ambiguous_operator_p(parser, space_seen)) {
10684 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10685 }
10686
10687 lex_state_set(parser, PM_LEX_STATE_BEG);
10688 LEX(PM_TOKEN_MINUS);
10689 }
10690
10691 // . .. ...
10692 case '.': {
10693 bool beg_p = lex_state_beg_p(parser);
10694
10695 if (match(parser, '.')) {
10696 if (match(parser, '.')) {
10697 // If we're _not_ inside a range within default parameters
10698 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10699 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10700 lex_state_set(parser, PM_LEX_STATE_BEG);
10701 } else {
10702 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10703 }
10704 LEX(PM_TOKEN_UDOT_DOT_DOT);
10705 }
10706
10707 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10708 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10709 }
10710
10711 lex_state_set(parser, PM_LEX_STATE_BEG);
10712 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10713 }
10714
10715 lex_state_set(parser, PM_LEX_STATE_BEG);
10716 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10717 }
10718
10719 lex_state_set(parser, PM_LEX_STATE_DOT);
10720 LEX(PM_TOKEN_DOT);
10721 }
10722
10723 // integer
10724 case '0':
10725 case '1':
10726 case '2':
10727 case '3':
10728 case '4':
10729 case '5':
10730 case '6':
10731 case '7':
10732 case '8':
10733 case '9': {
10734 pm_token_type_t type = lex_numeric(parser);
10735 lex_state_set(parser, PM_LEX_STATE_END);
10736 LEX(type);
10737 }
10738
10739 // :: symbol
10740 case ':':
10741 if (match(parser, ':')) {
10742 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10743 lex_state_set(parser, PM_LEX_STATE_BEG);
10744 LEX(PM_TOKEN_UCOLON_COLON);
10745 }
10746
10747 lex_state_set(parser, PM_LEX_STATE_DOT);
10748 LEX(PM_TOKEN_COLON_COLON);
10749 }
10750
10751 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
10752 lex_state_set(parser, PM_LEX_STATE_BEG);
10753 LEX(PM_TOKEN_COLON);
10754 }
10755
10756 if (peek(parser) == '"' || peek(parser) == '\'') {
10757 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
10758 parser->current.end++;
10759 }
10760
10761 lex_state_set(parser, PM_LEX_STATE_FNAME);
10762 LEX(PM_TOKEN_SYMBOL_BEGIN);
10763
10764 // / /=
10765 case '/':
10766 if (lex_state_beg_p(parser)) {
10767 lex_mode_push_regexp(parser, '\0', '/');
10768 LEX(PM_TOKEN_REGEXP_BEGIN);
10769 }
10770
10771 if (match(parser, '=')) {
10772 lex_state_set(parser, PM_LEX_STATE_BEG);
10773 LEX(PM_TOKEN_SLASH_EQUAL);
10774 }
10775
10776 if (lex_state_spcarg_p(parser, space_seen)) {
10777 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
10778 lex_mode_push_regexp(parser, '\0', '/');
10779 LEX(PM_TOKEN_REGEXP_BEGIN);
10780 }
10781
10782 if (ambiguous_operator_p(parser, space_seen)) {
10783 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
10784 }
10785
10786 if (lex_state_operator_p(parser)) {
10787 lex_state_set(parser, PM_LEX_STATE_ARG);
10788 } else {
10789 lex_state_set(parser, PM_LEX_STATE_BEG);
10790 }
10791
10792 LEX(PM_TOKEN_SLASH);
10793
10794 // ^ ^=
10795 case '^':
10796 if (lex_state_operator_p(parser)) {
10797 lex_state_set(parser, PM_LEX_STATE_ARG);
10798 } else {
10799 lex_state_set(parser, PM_LEX_STATE_BEG);
10800 }
10801 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10802
10803 // ~ ~@
10804 case '~':
10805 if (lex_state_operator_p(parser)) {
10806 (void) match(parser, '@');
10807 lex_state_set(parser, PM_LEX_STATE_ARG);
10808 } else {
10809 lex_state_set(parser, PM_LEX_STATE_BEG);
10810 }
10811
10812 LEX(PM_TOKEN_TILDE);
10813
10814 // % %= %i %I %q %Q %w %W
10815 case '%': {
10816 // If there is no subsequent character then we have an
10817 // invalid token. We're going to say it's the percent
10818 // operator because we don't want to move into the string
10819 // lex mode unnecessarily.
10820 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
10821 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10822 LEX(PM_TOKEN_PERCENT);
10823 }
10824
10825 if (!lex_state_beg_p(parser) && match(parser, '=')) {
10826 lex_state_set(parser, PM_LEX_STATE_BEG);
10827 LEX(PM_TOKEN_PERCENT_EQUAL);
10828 } else if (
10829 lex_state_beg_p(parser) ||
10830 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
10831 lex_state_spcarg_p(parser, space_seen)
10832 ) {
10833 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
10834 if (*parser->current.end >= 0x80) {
10835 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10836 }
10837
10838 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10839 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10840 LEX(PM_TOKEN_STRING_BEGIN);
10841 }
10842
10843 // Delimiters for %-literals cannot be alphanumeric. We
10844 // validate that here.
10845 uint8_t delimiter = peek_offset(parser, 1);
10846 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
10847 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10848 goto lex_next_token;
10849 }
10850
10851 switch (peek(parser)) {
10852 case 'i': {
10853 parser->current.end++;
10854
10855 if (parser->current.end < parser->end) {
10856 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10857 } else {
10858 lex_mode_push_list_eof(parser);
10859 }
10860
10861 LEX(PM_TOKEN_PERCENT_LOWER_I);
10862 }
10863 case 'I': {
10864 parser->current.end++;
10865
10866 if (parser->current.end < parser->end) {
10867 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10868 } else {
10869 lex_mode_push_list_eof(parser);
10870 }
10871
10872 LEX(PM_TOKEN_PERCENT_UPPER_I);
10873 }
10874 case 'r': {
10875 parser->current.end++;
10876
10877 if (parser->current.end < parser->end) {
10878 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10879 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10880 } else {
10881 lex_mode_push_regexp(parser, '\0', '\0');
10882 }
10883
10884 LEX(PM_TOKEN_REGEXP_BEGIN);
10885 }
10886 case 'q': {
10887 parser->current.end++;
10888
10889 if (parser->current.end < parser->end) {
10890 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10891 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10892 } else {
10893 lex_mode_push_string_eof(parser);
10894 }
10895
10896 LEX(PM_TOKEN_STRING_BEGIN);
10897 }
10898 case 'Q': {
10899 parser->current.end++;
10900
10901 if (parser->current.end < parser->end) {
10902 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10903 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10904 } else {
10905 lex_mode_push_string_eof(parser);
10906 }
10907
10908 LEX(PM_TOKEN_STRING_BEGIN);
10909 }
10910 case 's': {
10911 parser->current.end++;
10912
10913 if (parser->current.end < parser->end) {
10914 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10915 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10916 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10917 } else {
10918 lex_mode_push_string_eof(parser);
10919 }
10920
10921 LEX(PM_TOKEN_SYMBOL_BEGIN);
10922 }
10923 case 'w': {
10924 parser->current.end++;
10925
10926 if (parser->current.end < parser->end) {
10927 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10928 } else {
10929 lex_mode_push_list_eof(parser);
10930 }
10931
10932 LEX(PM_TOKEN_PERCENT_LOWER_W);
10933 }
10934 case 'W': {
10935 parser->current.end++;
10936
10937 if (parser->current.end < parser->end) {
10938 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10939 } else {
10940 lex_mode_push_list_eof(parser);
10941 }
10942
10943 LEX(PM_TOKEN_PERCENT_UPPER_W);
10944 }
10945 case 'x': {
10946 parser->current.end++;
10947
10948 if (parser->current.end < parser->end) {
10949 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10950 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10951 } else {
10952 lex_mode_push_string_eof(parser);
10953 }
10954
10955 LEX(PM_TOKEN_PERCENT_LOWER_X);
10956 }
10957 default:
10958 // If we get to this point, then we have a % that is completely
10959 // unparsable. In this case we'll just drop it from the parser
10960 // and skip past it and hope that the next token is something
10961 // that we can parse.
10962 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10963 goto lex_next_token;
10964 }
10965 }
10966
10967 if (ambiguous_operator_p(parser, space_seen)) {
10968 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
10969 }
10970
10971 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
10972 LEX(PM_TOKEN_PERCENT);
10973 }
10974
10975 // global variable
10976 case '$': {
10977 pm_token_type_t type = lex_global_variable(parser);
10978
10979 // If we're lexing an embedded variable, then we need to pop back into
10980 // the parent lex context.
10981 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10982 lex_mode_pop(parser);
10983 }
10984
10985 lex_state_set(parser, PM_LEX_STATE_END);
10986 LEX(type);
10987 }
10988
10989 // instance variable, class variable
10990 case '@':
10991 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
10992 LEX(lex_at_variable(parser));
10993
10994 default: {
10995 if (*parser->current.start != '_') {
10996 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
10997
10998 // If this isn't the beginning of an identifier, then
10999 // it's an invalid token as we've exhausted all of the
11000 // other options. We'll skip past it and return the next
11001 // token after adding an appropriate error message.
11002 if (!width) {
11003 if (*parser->current.start >= 0x80) {
11004 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11005 } else if (*parser->current.start == '\\') {
11006 switch (peek_at(parser, parser->current.start + 1)) {
11007 case ' ':
11008 parser->current.end++;
11009 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11010 break;
11011 case '\f':
11012 parser->current.end++;
11013 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11014 break;
11015 case '\t':
11016 parser->current.end++;
11017 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11018 break;
11019 case '\v':
11020 parser->current.end++;
11021 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11022 break;
11023 case '\r':
11024 if (peek_at(parser, parser->current.start + 2) != '\n') {
11025 parser->current.end++;
11026 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11027 break;
11028 }
11030 default:
11031 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11032 break;
11033 }
11034 } else if (char_is_ascii_printable(*parser->current.start)) {
11035 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11036 } else {
11037 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11038 }
11039
11040 goto lex_next_token;
11041 }
11042
11043 parser->current.end = parser->current.start + width;
11044 }
11045
11046 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11047
11048 // If we've hit a __END__ and it was at the start of the
11049 // line or the start of the file and it is followed by
11050 // either a \n or a \r\n, then this is the last token of the
11051 // file.
11052 if (
11053 ((parser->current.end - parser->current.start) == 7) &&
11054 current_token_starts_line(parser) &&
11055 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11056 (parser->current.end == parser->end || match_eol(parser))
11057 ) {
11058 // Since we know we're about to add an __END__ comment,
11059 // we know we need to add all of the newlines to get the
11060 // correct column information for it.
11061 const uint8_t *cursor = parser->current.end;
11062 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11063 pm_newline_list_append(&parser->newline_list, cursor++);
11064 }
11065
11066 parser->current.end = parser->end;
11067 parser->current.type = PM_TOKEN___END__;
11068 parser_lex_callback(parser);
11069
11070 parser->data_loc.start = parser->current.start;
11071 parser->data_loc.end = parser->current.end;
11072
11073 LEX(PM_TOKEN_EOF);
11074 }
11075
11076 pm_lex_state_t last_state = parser->lex_state;
11077
11078 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11079 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11080 if (previous_command_start) {
11081 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11082 } else {
11083 lex_state_set(parser, PM_LEX_STATE_ARG);
11084 }
11085 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11086 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11087 } else {
11088 lex_state_set(parser, PM_LEX_STATE_END);
11089 }
11090 }
11091
11092 if (
11093 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11094 (type == PM_TOKEN_IDENTIFIER) &&
11095 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11096 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11097 ) {
11098 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11099 }
11100
11101 LEX(type);
11102 }
11103 }
11104 }
11105 case PM_LEX_LIST: {
11106 if (parser->next_start != NULL) {
11107 parser->current.end = parser->next_start;
11108 parser->next_start = NULL;
11109 }
11110
11111 // First we'll set the beginning of the token.
11112 parser->current.start = parser->current.end;
11113
11114 // If there's any whitespace at the start of the list, then we're
11115 // going to trim it off the beginning and create a new token.
11116 size_t whitespace;
11117
11118 if (parser->heredoc_end) {
11119 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11120 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11121 whitespace += 1;
11122 }
11123 } else {
11124 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11125 }
11126
11127 if (whitespace > 0) {
11128 parser->current.end += whitespace;
11129 if (peek_offset(parser, -1) == '\n') {
11130 // mutates next_start
11131 parser_flush_heredoc_end(parser);
11132 }
11133 LEX(PM_TOKEN_WORDS_SEP);
11134 }
11135
11136 // We'll check if we're at the end of the file. If we are, then we
11137 // need to return the EOF token.
11138 if (parser->current.end >= parser->end) {
11139 LEX(PM_TOKEN_EOF);
11140 }
11141
11142 // Here we'll get a list of the places where strpbrk should break,
11143 // and then find the first one.
11144 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11145 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11146 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11147
11148 // If we haven't found an escape yet, then this buffer will be
11149 // unallocated since we can refer directly to the source string.
11150 pm_token_buffer_t token_buffer = { 0 };
11151
11152 while (breakpoint != NULL) {
11153 // If we hit whitespace, then we must have received content by
11154 // now, so we can return an element of the list.
11155 if (pm_char_is_whitespace(*breakpoint)) {
11156 parser->current.end = breakpoint;
11157 pm_token_buffer_flush(parser, &token_buffer);
11158 LEX(PM_TOKEN_STRING_CONTENT);
11159 }
11160
11161 // If we hit the terminator, we need to check which token to
11162 // return.
11163 if (*breakpoint == lex_mode->as.list.terminator) {
11164 // If this terminator doesn't actually close the list, then
11165 // we need to continue on past it.
11166 if (lex_mode->as.list.nesting > 0) {
11167 parser->current.end = breakpoint + 1;
11168 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11169 lex_mode->as.list.nesting--;
11170 continue;
11171 }
11172
11173 // If we've hit the terminator and we've already skipped
11174 // past content, then we can return a list node.
11175 if (breakpoint > parser->current.start) {
11176 parser->current.end = breakpoint;
11177 pm_token_buffer_flush(parser, &token_buffer);
11178 LEX(PM_TOKEN_STRING_CONTENT);
11179 }
11180
11181 // Otherwise, switch back to the default state and return
11182 // the end of the list.
11183 parser->current.end = breakpoint + 1;
11184 lex_mode_pop(parser);
11185 lex_state_set(parser, PM_LEX_STATE_END);
11186 LEX(PM_TOKEN_STRING_END);
11187 }
11188
11189 // If we hit a null byte, skip directly past it.
11190 if (*breakpoint == '\0') {
11191 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11192 continue;
11193 }
11194
11195 // If we hit escapes, then we need to treat the next token
11196 // literally. In this case we'll skip past the next character
11197 // and find the next breakpoint.
11198 if (*breakpoint == '\\') {
11199 parser->current.end = breakpoint + 1;
11200
11201 // If we've hit the end of the file, then break out of the
11202 // loop by setting the breakpoint to NULL.
11203 if (parser->current.end == parser->end) {
11204 breakpoint = NULL;
11205 continue;
11206 }
11207
11208 pm_token_buffer_escape(parser, &token_buffer);
11209 uint8_t peeked = peek(parser);
11210
11211 switch (peeked) {
11212 case ' ':
11213 case '\f':
11214 case '\t':
11215 case '\v':
11216 case '\\':
11217 pm_token_buffer_push_byte(&token_buffer, peeked);
11218 parser->current.end++;
11219 break;
11220 case '\r':
11221 parser->current.end++;
11222 if (peek(parser) != '\n') {
11223 pm_token_buffer_push_byte(&token_buffer, '\r');
11224 break;
11225 }
11227 case '\n':
11228 pm_token_buffer_push_byte(&token_buffer, '\n');
11229
11230 if (parser->heredoc_end) {
11231 // ... if we are on the same line as a heredoc,
11232 // flush the heredoc and continue parsing after
11233 // heredoc_end.
11234 parser_flush_heredoc_end(parser);
11235 pm_token_buffer_copy(parser, &token_buffer);
11236 LEX(PM_TOKEN_STRING_CONTENT);
11237 } else {
11238 // ... else track the newline.
11239 pm_newline_list_append(&parser->newline_list, parser->current.end);
11240 }
11241
11242 parser->current.end++;
11243 break;
11244 default:
11245 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11246 pm_token_buffer_push_byte(&token_buffer, peeked);
11247 parser->current.end++;
11248 } else if (lex_mode->as.list.interpolation) {
11249 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11250 } else {
11251 pm_token_buffer_push_byte(&token_buffer, '\\');
11252 pm_token_buffer_push_escaped(&token_buffer, parser);
11253 }
11254
11255 break;
11256 }
11257
11258 token_buffer.cursor = parser->current.end;
11259 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11260 continue;
11261 }
11262
11263 // If we hit a #, then we will attempt to lex interpolation.
11264 if (*breakpoint == '#') {
11265 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11266
11267 if (type == PM_TOKEN_NOT_PROVIDED) {
11268 // If we haven't returned at this point then we had something
11269 // that looked like an interpolated class or instance variable
11270 // like "#@" but wasn't actually. In this case we'll just skip
11271 // to the next breakpoint.
11272 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11273 continue;
11274 }
11275
11276 if (type == PM_TOKEN_STRING_CONTENT) {
11277 pm_token_buffer_flush(parser, &token_buffer);
11278 }
11279
11280 LEX(type);
11281 }
11282
11283 // If we've hit the incrementor, then we need to skip past it
11284 // and find the next breakpoint.
11285 assert(*breakpoint == lex_mode->as.list.incrementor);
11286 parser->current.end = breakpoint + 1;
11287 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11288 lex_mode->as.list.nesting++;
11289 continue;
11290 }
11291
11292 if (parser->current.end > parser->current.start) {
11293 pm_token_buffer_flush(parser, &token_buffer);
11294 LEX(PM_TOKEN_STRING_CONTENT);
11295 }
11296
11297 // If we were unable to find a breakpoint, then this token hits the
11298 // end of the file.
11299 parser->current.end = parser->end;
11300 pm_token_buffer_flush(parser, &token_buffer);
11301 LEX(PM_TOKEN_STRING_CONTENT);
11302 }
11303 case PM_LEX_REGEXP: {
11304 // First, we'll set to start of this token to be the current end.
11305 if (parser->next_start == NULL) {
11306 parser->current.start = parser->current.end;
11307 } else {
11308 parser->current.start = parser->next_start;
11309 parser->current.end = parser->next_start;
11310 parser->next_start = NULL;
11311 }
11312
11313 // We'll check if we're at the end of the file. If we are, then we
11314 // need to return the EOF token.
11315 if (parser->current.end >= parser->end) {
11316 LEX(PM_TOKEN_EOF);
11317 }
11318
11319 // Get a reference to the current mode.
11320 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11321
11322 // These are the places where we need to split up the content of the
11323 // regular expression. We'll use strpbrk to find the first of these
11324 // characters.
11325 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11326 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11327 pm_regexp_token_buffer_t token_buffer = { 0 };
11328
11329 while (breakpoint != NULL) {
11330 uint8_t term = lex_mode->as.regexp.terminator;
11331 bool is_terminator = (*breakpoint == term);
11332
11333 // If the terminator is newline, we need to consider \r\n _also_ a newline
11334 // For example: `%\nfoo\r\n`
11335 // The string should be "foo", not "foo\r"
11336 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11337 if (term == '\n') {
11338 is_terminator = true;
11339 }
11340
11341 // If the terminator is a CR, but we see a CRLF, we need to
11342 // treat the CRLF as a newline, meaning this is _not_ the
11343 // terminator
11344 if (term == '\r') {
11345 is_terminator = false;
11346 }
11347 }
11348
11349 // If we hit the terminator, we need to determine what kind of
11350 // token to return.
11351 if (is_terminator) {
11352 if (lex_mode->as.regexp.nesting > 0) {
11353 parser->current.end = breakpoint + 1;
11354 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11355 lex_mode->as.regexp.nesting--;
11356 continue;
11357 }
11358
11359 // Here we've hit the terminator. If we have already consumed
11360 // content then we need to return that content as string content
11361 // first.
11362 if (breakpoint > parser->current.start) {
11363 parser->current.end = breakpoint;
11364 pm_regexp_token_buffer_flush(parser, &token_buffer);
11365 LEX(PM_TOKEN_STRING_CONTENT);
11366 }
11367
11368 // Check here if we need to track the newline.
11369 size_t eol_length = match_eol_at(parser, breakpoint);
11370 if (eol_length) {
11371 parser->current.end = breakpoint + eol_length;
11372
11373 // Track the newline if we're not in a heredoc that
11374 // would have already have added the newline to the
11375 // list.
11376 if (parser->heredoc_end == NULL) {
11377 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11378 }
11379 } else {
11380 parser->current.end = breakpoint + 1;
11381 }
11382
11383 // Since we've hit the terminator of the regular expression,
11384 // we now need to parse the options.
11385 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11386
11387 lex_mode_pop(parser);
11388 lex_state_set(parser, PM_LEX_STATE_END);
11389 LEX(PM_TOKEN_REGEXP_END);
11390 }
11391
11392 // If we've hit the incrementor, then we need to skip past it
11393 // and find the next breakpoint.
11394 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11395 parser->current.end = breakpoint + 1;
11396 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11397 lex_mode->as.regexp.nesting++;
11398 continue;
11399 }
11400
11401 switch (*breakpoint) {
11402 case '\0':
11403 // If we hit a null byte, skip directly past it.
11404 parser->current.end = breakpoint + 1;
11405 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11406 break;
11407 case '\r':
11408 if (peek_at(parser, breakpoint + 1) != '\n') {
11409 parser->current.end = breakpoint + 1;
11410 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11411 break;
11412 }
11413
11414 breakpoint++;
11415 parser->current.end = breakpoint;
11416 pm_regexp_token_buffer_escape(parser, &token_buffer);
11417 token_buffer.base.cursor = breakpoint;
11418
11420 case '\n':
11421 // If we've hit a newline, then we need to track that in
11422 // the list of newlines.
11423 if (parser->heredoc_end == NULL) {
11424 pm_newline_list_append(&parser->newline_list, breakpoint);
11425 parser->current.end = breakpoint + 1;
11426 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11427 break;
11428 }
11429
11430 parser->current.end = breakpoint + 1;
11431 parser_flush_heredoc_end(parser);
11432 pm_regexp_token_buffer_flush(parser, &token_buffer);
11433 LEX(PM_TOKEN_STRING_CONTENT);
11434 case '\\': {
11435 // If we hit escapes, then we need to treat the next
11436 // token literally. In this case we'll skip past the
11437 // next character and find the next breakpoint.
11438 parser->current.end = breakpoint + 1;
11439
11440 // If we've hit the end of the file, then break out of
11441 // the loop by setting the breakpoint to NULL.
11442 if (parser->current.end == parser->end) {
11443 breakpoint = NULL;
11444 break;
11445 }
11446
11447 pm_regexp_token_buffer_escape(parser, &token_buffer);
11448 uint8_t peeked = peek(parser);
11449
11450 switch (peeked) {
11451 case '\r':
11452 parser->current.end++;
11453 if (peek(parser) != '\n') {
11454 if (lex_mode->as.regexp.terminator != '\r') {
11455 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11456 }
11457 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11458 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11459 break;
11460 }
11462 case '\n':
11463 if (parser->heredoc_end) {
11464 // ... if we are on the same line as a heredoc,
11465 // flush the heredoc and continue parsing after
11466 // heredoc_end.
11467 parser_flush_heredoc_end(parser);
11468 pm_regexp_token_buffer_copy(parser, &token_buffer);
11469 LEX(PM_TOKEN_STRING_CONTENT);
11470 } else {
11471 // ... else track the newline.
11472 pm_newline_list_append(&parser->newline_list, parser->current.end);
11473 }
11474
11475 parser->current.end++;
11476 break;
11477 case 'c':
11478 case 'C':
11479 case 'M':
11480 case 'u':
11481 case 'x':
11482 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11483 break;
11484 default:
11485 if (lex_mode->as.regexp.terminator == peeked) {
11486 // Some characters when they are used as the
11487 // terminator also receive an escape. They are
11488 // enumerated here.
11489 switch (peeked) {
11490 case '$': case ')': case '*': case '+':
11491 case '.': case '>': case '?': case ']':
11492 case '^': case '|': case '}':
11493 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11494 break;
11495 default:
11496 break;
11497 }
11498
11499 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11500 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11501 parser->current.end++;
11502 break;
11503 }
11504
11505 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11506 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11507 break;
11508 }
11509
11510 token_buffer.base.cursor = parser->current.end;
11511 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11512 break;
11513 }
11514 case '#': {
11515 // If we hit a #, then we will attempt to lex
11516 // interpolation.
11517 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11518
11519 if (type == PM_TOKEN_NOT_PROVIDED) {
11520 // If we haven't returned at this point then we had
11521 // something that looked like an interpolated class or
11522 // instance variable like "#@" but wasn't actually. In
11523 // this case we'll just skip to the next breakpoint.
11524 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11525 break;
11526 }
11527
11528 if (type == PM_TOKEN_STRING_CONTENT) {
11529 pm_regexp_token_buffer_flush(parser, &token_buffer);
11530 }
11531
11532 LEX(type);
11533 }
11534 default:
11535 assert(false && "unreachable");
11536 break;
11537 }
11538 }
11539
11540 if (parser->current.end > parser->current.start) {
11541 pm_regexp_token_buffer_flush(parser, &token_buffer);
11542 LEX(PM_TOKEN_STRING_CONTENT);
11543 }
11544
11545 // If we were unable to find a breakpoint, then this token hits the
11546 // end of the file.
11547 parser->current.end = parser->end;
11548 pm_regexp_token_buffer_flush(parser, &token_buffer);
11549 LEX(PM_TOKEN_STRING_CONTENT);
11550 }
11551 case PM_LEX_STRING: {
11552 // First, we'll set to start of this token to be the current end.
11553 if (parser->next_start == NULL) {
11554 parser->current.start = parser->current.end;
11555 } else {
11556 parser->current.start = parser->next_start;
11557 parser->current.end = parser->next_start;
11558 parser->next_start = NULL;
11559 }
11560
11561 // We'll check if we're at the end of the file. If we are, then we need to
11562 // return the EOF token.
11563 if (parser->current.end >= parser->end) {
11564 LEX(PM_TOKEN_EOF);
11565 }
11566
11567 // These are the places where we need to split up the content of the
11568 // string. We'll use strpbrk to find the first of these characters.
11569 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11570 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11571 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11572
11573 // If we haven't found an escape yet, then this buffer will be
11574 // unallocated since we can refer directly to the source string.
11575 pm_token_buffer_t token_buffer = { 0 };
11576
11577 while (breakpoint != NULL) {
11578 // If we hit the incrementor, then we'll increment then nesting and
11579 // continue lexing.
11580 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11581 lex_mode->as.string.nesting++;
11582 parser->current.end = breakpoint + 1;
11583 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11584 continue;
11585 }
11586
11587 uint8_t term = lex_mode->as.string.terminator;
11588 bool is_terminator = (*breakpoint == term);
11589
11590 // If the terminator is newline, we need to consider \r\n _also_ a newline
11591 // For example: `%r\nfoo\r\n`
11592 // The string should be /foo/, not /foo\r/
11593 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11594 if (term == '\n') {
11595 is_terminator = true;
11596 }
11597
11598 // If the terminator is a CR, but we see a CRLF, we need to
11599 // treat the CRLF as a newline, meaning this is _not_ the
11600 // terminator
11601 if (term == '\r') {
11602 is_terminator = false;
11603 }
11604 }
11605
11606 // Note that we have to check the terminator here first because we could
11607 // potentially be parsing a % string that has a # character as the
11608 // terminator.
11609 if (is_terminator) {
11610 // If this terminator doesn't actually close the string, then we need
11611 // to continue on past it.
11612 if (lex_mode->as.string.nesting > 0) {
11613 parser->current.end = breakpoint + 1;
11614 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11615 lex_mode->as.string.nesting--;
11616 continue;
11617 }
11618
11619 // Here we've hit the terminator. If we have already consumed content
11620 // then we need to return that content as string content first.
11621 if (breakpoint > parser->current.start) {
11622 parser->current.end = breakpoint;
11623 pm_token_buffer_flush(parser, &token_buffer);
11624 LEX(PM_TOKEN_STRING_CONTENT);
11625 }
11626
11627 // Otherwise we need to switch back to the parent lex mode and
11628 // return the end of the string.
11629 size_t eol_length = match_eol_at(parser, breakpoint);
11630 if (eol_length) {
11631 parser->current.end = breakpoint + eol_length;
11632
11633 // Track the newline if we're not in a heredoc that
11634 // would have already have added the newline to the
11635 // list.
11636 if (parser->heredoc_end == NULL) {
11637 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11638 }
11639 } else {
11640 parser->current.end = breakpoint + 1;
11641 }
11642
11643 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11644 parser->current.end++;
11645 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11646 lex_mode_pop(parser);
11647 LEX(PM_TOKEN_LABEL_END);
11648 }
11649
11650 // When the delimiter itself is a newline, we won't
11651 // get a chance to flush heredocs in the usual places since
11652 // the newline is already consumed.
11653 if (term == '\n' && parser->heredoc_end) {
11654 parser_flush_heredoc_end(parser);
11655 }
11656
11657 lex_state_set(parser, PM_LEX_STATE_END);
11658 lex_mode_pop(parser);
11659 LEX(PM_TOKEN_STRING_END);
11660 }
11661
11662 switch (*breakpoint) {
11663 case '\0':
11664 // Skip directly past the null character.
11665 parser->current.end = breakpoint + 1;
11666 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11667 break;
11668 case '\r':
11669 if (peek_at(parser, breakpoint + 1) != '\n') {
11670 parser->current.end = breakpoint + 1;
11671 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11672 break;
11673 }
11674
11675 // If we hit a \r\n sequence, then we need to treat it
11676 // as a newline.
11677 breakpoint++;
11678 parser->current.end = breakpoint;
11679 pm_token_buffer_escape(parser, &token_buffer);
11680 token_buffer.cursor = breakpoint;
11681
11683 case '\n':
11684 // When we hit a newline, we need to flush any potential
11685 // heredocs. Note that this has to happen after we check
11686 // for the terminator in case the terminator is a
11687 // newline character.
11688 if (parser->heredoc_end == NULL) {
11689 pm_newline_list_append(&parser->newline_list, breakpoint);
11690 parser->current.end = breakpoint + 1;
11691 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11692 break;
11693 }
11694
11695 parser->current.end = breakpoint + 1;
11696 parser_flush_heredoc_end(parser);
11697 pm_token_buffer_flush(parser, &token_buffer);
11698 LEX(PM_TOKEN_STRING_CONTENT);
11699 case '\\': {
11700 // Here we hit escapes.
11701 parser->current.end = breakpoint + 1;
11702
11703 // If we've hit the end of the file, then break out of
11704 // the loop by setting the breakpoint to NULL.
11705 if (parser->current.end == parser->end) {
11706 breakpoint = NULL;
11707 continue;
11708 }
11709
11710 pm_token_buffer_escape(parser, &token_buffer);
11711 uint8_t peeked = peek(parser);
11712
11713 switch (peeked) {
11714 case '\\':
11715 pm_token_buffer_push_byte(&token_buffer, '\\');
11716 parser->current.end++;
11717 break;
11718 case '\r':
11719 parser->current.end++;
11720 if (peek(parser) != '\n') {
11721 if (!lex_mode->as.string.interpolation) {
11722 pm_token_buffer_push_byte(&token_buffer, '\\');
11723 }
11724 pm_token_buffer_push_byte(&token_buffer, '\r');
11725 break;
11726 }
11728 case '\n':
11729 if (!lex_mode->as.string.interpolation) {
11730 pm_token_buffer_push_byte(&token_buffer, '\\');
11731 pm_token_buffer_push_byte(&token_buffer, '\n');
11732 }
11733
11734 if (parser->heredoc_end) {
11735 // ... if we are on the same line as a heredoc,
11736 // flush the heredoc and continue parsing after
11737 // heredoc_end.
11738 parser_flush_heredoc_end(parser);
11739 pm_token_buffer_copy(parser, &token_buffer);
11740 LEX(PM_TOKEN_STRING_CONTENT);
11741 } else {
11742 // ... else track the newline.
11743 pm_newline_list_append(&parser->newline_list, parser->current.end);
11744 }
11745
11746 parser->current.end++;
11747 break;
11748 default:
11749 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
11750 pm_token_buffer_push_byte(&token_buffer, peeked);
11751 parser->current.end++;
11752 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
11753 pm_token_buffer_push_byte(&token_buffer, peeked);
11754 parser->current.end++;
11755 } else if (lex_mode->as.string.interpolation) {
11756 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11757 } else {
11758 pm_token_buffer_push_byte(&token_buffer, '\\');
11759 pm_token_buffer_push_escaped(&token_buffer, parser);
11760 }
11761
11762 break;
11763 }
11764
11765 token_buffer.cursor = parser->current.end;
11766 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11767 break;
11768 }
11769 case '#': {
11770 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11771
11772 if (type == PM_TOKEN_NOT_PROVIDED) {
11773 // If we haven't returned at this point then we had something that
11774 // looked like an interpolated class or instance variable like "#@"
11775 // but wasn't actually. In this case we'll just skip to the next
11776 // breakpoint.
11777 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11778 break;
11779 }
11780
11781 if (type == PM_TOKEN_STRING_CONTENT) {
11782 pm_token_buffer_flush(parser, &token_buffer);
11783 }
11784
11785 LEX(type);
11786 }
11787 default:
11788 assert(false && "unreachable");
11789 }
11790 }
11791
11792 if (parser->current.end > parser->current.start) {
11793 pm_token_buffer_flush(parser, &token_buffer);
11794 LEX(PM_TOKEN_STRING_CONTENT);
11795 }
11796
11797 // If we've hit the end of the string, then this is an unterminated
11798 // string. In that case we'll return a string content token.
11799 parser->current.end = parser->end;
11800 pm_token_buffer_flush(parser, &token_buffer);
11801 LEX(PM_TOKEN_STRING_CONTENT);
11802 }
11803 case PM_LEX_HEREDOC: {
11804 // First, we'll set to start of this token.
11805 if (parser->next_start == NULL) {
11806 parser->current.start = parser->current.end;
11807 } else {
11808 parser->current.start = parser->next_start;
11809 parser->current.end = parser->next_start;
11810 parser->heredoc_end = NULL;
11811 parser->next_start = NULL;
11812 }
11813
11814 // Now let's grab the information about the identifier off of the
11815 // current lex mode.
11816 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11817 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
11818
11819 bool line_continuation = lex_mode->as.heredoc.line_continuation;
11820 lex_mode->as.heredoc.line_continuation = false;
11821
11822 // We'll check if we're at the end of the file. If we are, then we
11823 // will add an error (because we weren't able to find the
11824 // terminator) but still continue parsing so that content after the
11825 // declaration of the heredoc can be parsed.
11826 if (parser->current.end >= parser->end) {
11827 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
11828 parser->next_start = lex_mode->as.heredoc.next_start;
11829 parser->heredoc_end = parser->current.end;
11830 lex_state_set(parser, PM_LEX_STATE_END);
11831 lex_mode_pop(parser);
11832 LEX(PM_TOKEN_HEREDOC_END);
11833 }
11834
11835 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
11836 size_t ident_length = heredoc_lex_mode->ident_length;
11837
11838 // If we are immediately following a newline and we have hit the
11839 // terminator, then we need to return the ending of the heredoc.
11840 if (current_token_starts_line(parser)) {
11841 const uint8_t *start = parser->current.start;
11842
11843 if (!line_continuation && (start + ident_length <= parser->end)) {
11844 const uint8_t *newline = next_newline(start, parser->end - start);
11845 const uint8_t *ident_end = newline;
11846 const uint8_t *terminator_end = newline;
11847
11848 if (newline == NULL) {
11849 terminator_end = parser->end;
11850 ident_end = parser->end;
11851 } else {
11852 terminator_end++;
11853 if (newline[-1] == '\r') {
11854 ident_end--; // Remove \r
11855 }
11856 }
11857
11858 const uint8_t *terminator_start = ident_end - ident_length;
11859 const uint8_t *cursor = start;
11860
11861 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11862 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11863 cursor++;
11864 }
11865 }
11866
11867 if (
11868 (cursor == terminator_start) &&
11869 (memcmp(terminator_start, ident_start, ident_length) == 0)
11870 ) {
11871 if (newline != NULL) {
11872 pm_newline_list_append(&parser->newline_list, newline);
11873 }
11874
11875 parser->current.end = terminator_end;
11876 if (*lex_mode->as.heredoc.next_start == '\\') {
11877 parser->next_start = NULL;
11878 } else {
11879 parser->next_start = lex_mode->as.heredoc.next_start;
11880 parser->heredoc_end = parser->current.end;
11881 }
11882
11883 lex_state_set(parser, PM_LEX_STATE_END);
11884 lex_mode_pop(parser);
11885 LEX(PM_TOKEN_HEREDOC_END);
11886 }
11887 }
11888
11889 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
11890 if (
11891 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
11892 lex_mode->as.heredoc.common_whitespace != NULL &&
11893 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
11894 peek_at(parser, start) != '\n'
11895 ) {
11896 *lex_mode->as.heredoc.common_whitespace = whitespace;
11897 }
11898 }
11899
11900 // Otherwise we'll be parsing string content. These are the places
11901 // where we need to split up the content of the heredoc. We'll use
11902 // strpbrk to find the first of these characters.
11903 uint8_t breakpoints[] = "\r\n\\#";
11904
11905 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
11906 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11907 breakpoints[3] = '\0';
11908 }
11909
11910 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11911 pm_token_buffer_t token_buffer = { 0 };
11912 bool was_line_continuation = false;
11913
11914 while (breakpoint != NULL) {
11915 switch (*breakpoint) {
11916 case '\0':
11917 // Skip directly past the null character.
11918 parser->current.end = breakpoint + 1;
11919 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11920 break;
11921 case '\r':
11922 parser->current.end = breakpoint + 1;
11923
11924 if (peek_at(parser, breakpoint + 1) != '\n') {
11925 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11926 break;
11927 }
11928
11929 // If we hit a \r\n sequence, then we want to replace it
11930 // with a single \n character in the final string.
11931 breakpoint++;
11932 pm_token_buffer_escape(parser, &token_buffer);
11933 token_buffer.cursor = breakpoint;
11934
11936 case '\n': {
11937 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
11938 parser_flush_heredoc_end(parser);
11939 parser->current.end = breakpoint + 1;
11940 pm_token_buffer_flush(parser, &token_buffer);
11941 LEX(PM_TOKEN_STRING_CONTENT);
11942 }
11943
11944 pm_newline_list_append(&parser->newline_list, breakpoint);
11945
11946 // If we have a - or ~ heredoc, then we can match after
11947 // some leading whitespace.
11948 const uint8_t *start = breakpoint + 1;
11949
11950 if (!was_line_continuation && (start + ident_length <= parser->end)) {
11951 // We want to match the terminator starting from the end of the line in case
11952 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
11953 const uint8_t *newline = next_newline(start, parser->end - start);
11954
11955 if (newline == NULL) {
11956 newline = parser->end;
11957 } else if (newline[-1] == '\r') {
11958 newline--; // Remove \r
11959 }
11960
11961 // Start of a possible terminator.
11962 const uint8_t *terminator_start = newline - ident_length;
11963
11964 // Cursor to check for the leading whitespace. We skip the
11965 // leading whitespace if we have a - or ~ heredoc.
11966 const uint8_t *cursor = start;
11967
11968 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11969 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11970 cursor++;
11971 }
11972 }
11973
11974 if (
11975 cursor == terminator_start &&
11976 (memcmp(terminator_start, ident_start, ident_length) == 0)
11977 ) {
11978 parser->current.end = breakpoint + 1;
11979 pm_token_buffer_flush(parser, &token_buffer);
11980 LEX(PM_TOKEN_STRING_CONTENT);
11981 }
11982 }
11983
11984 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
11985
11986 // If we have hit a newline that is followed by a valid
11987 // terminator, then we need to return the content of the
11988 // heredoc here as string content. Then, the next time a
11989 // token is lexed, it will match again and return the
11990 // end of the heredoc.
11991 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
11992 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
11993 *lex_mode->as.heredoc.common_whitespace = whitespace;
11994 }
11995
11996 parser->current.end = breakpoint + 1;
11997 pm_token_buffer_flush(parser, &token_buffer);
11998 LEX(PM_TOKEN_STRING_CONTENT);
11999 }
12000
12001 // Otherwise we hit a newline and it wasn't followed by
12002 // a terminator, so we can continue parsing.
12003 parser->current.end = breakpoint + 1;
12004 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12005 break;
12006 }
12007 case '\\': {
12008 // If we hit an escape, then we need to skip past
12009 // however many characters the escape takes up. However
12010 // it's important that if \n or \r\n are escaped, we
12011 // stop looping before the newline and not after the
12012 // newline so that we can still potentially find the
12013 // terminator of the heredoc.
12014 parser->current.end = breakpoint + 1;
12015
12016 // If we've hit the end of the file, then break out of
12017 // the loop by setting the breakpoint to NULL.
12018 if (parser->current.end == parser->end) {
12019 breakpoint = NULL;
12020 continue;
12021 }
12022
12023 pm_token_buffer_escape(parser, &token_buffer);
12024 uint8_t peeked = peek(parser);
12025
12026 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12027 switch (peeked) {
12028 case '\r':
12029 parser->current.end++;
12030 if (peek(parser) != '\n') {
12031 pm_token_buffer_push_byte(&token_buffer, '\\');
12032 pm_token_buffer_push_byte(&token_buffer, '\r');
12033 break;
12034 }
12036 case '\n':
12037 pm_token_buffer_push_byte(&token_buffer, '\\');
12038 pm_token_buffer_push_byte(&token_buffer, '\n');
12039 token_buffer.cursor = parser->current.end + 1;
12040 breakpoint = parser->current.end;
12041 continue;
12042 default:
12043 pm_token_buffer_push_byte(&token_buffer, '\\');
12044 pm_token_buffer_push_escaped(&token_buffer, parser);
12045 break;
12046 }
12047 } else {
12048 switch (peeked) {
12049 case '\r':
12050 parser->current.end++;
12051 if (peek(parser) != '\n') {
12052 pm_token_buffer_push_byte(&token_buffer, '\r');
12053 break;
12054 }
12056 case '\n':
12057 // If we are in a tilde here, we should
12058 // break out of the loop and return the
12059 // string content.
12060 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12061 const uint8_t *end = parser->current.end;
12062
12063 if (parser->heredoc_end == NULL) {
12064 pm_newline_list_append(&parser->newline_list, end);
12065 }
12066
12067 // Here we want the buffer to only
12068 // include up to the backslash.
12069 parser->current.end = breakpoint;
12070 pm_token_buffer_flush(parser, &token_buffer);
12071
12072 // Now we can advance the end of the
12073 // token past the newline.
12074 parser->current.end = end + 1;
12075 lex_mode->as.heredoc.line_continuation = true;
12076 LEX(PM_TOKEN_STRING_CONTENT);
12077 }
12078
12079 was_line_continuation = true;
12080 token_buffer.cursor = parser->current.end + 1;
12081 breakpoint = parser->current.end;
12082 continue;
12083 default:
12084 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12085 break;
12086 }
12087 }
12088
12089 token_buffer.cursor = parser->current.end;
12090 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12091 break;
12092 }
12093 case '#': {
12094 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12095
12096 if (type == PM_TOKEN_NOT_PROVIDED) {
12097 // If we haven't returned at this point then we had
12098 // something that looked like an interpolated class
12099 // or instance variable like "#@" but wasn't
12100 // actually. In this case we'll just skip to the
12101 // next breakpoint.
12102 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12103 break;
12104 }
12105
12106 if (type == PM_TOKEN_STRING_CONTENT) {
12107 pm_token_buffer_flush(parser, &token_buffer);
12108 }
12109
12110 LEX(type);
12111 }
12112 default:
12113 assert(false && "unreachable");
12114 }
12115
12116 was_line_continuation = false;
12117 }
12118
12119 if (parser->current.end > parser->current.start) {
12120 parser->current.end = parser->end;
12121 pm_token_buffer_flush(parser, &token_buffer);
12122 LEX(PM_TOKEN_STRING_CONTENT);
12123 }
12124
12125 // If we've hit the end of the string, then this is an unterminated
12126 // heredoc. In that case we'll return a string content token.
12127 parser->current.end = parser->end;
12128 pm_token_buffer_flush(parser, &token_buffer);
12129 LEX(PM_TOKEN_STRING_CONTENT);
12130 }
12131 }
12132
12133 assert(false && "unreachable");
12134}
12135
12136#undef LEX
12137
12138/******************************************************************************/
12139/* Parse functions */
12140/******************************************************************************/
12141
12150typedef enum {
12151 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12152 PM_BINDING_POWER_STATEMENT = 2,
12153 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12154 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12155 PM_BINDING_POWER_COMPOSITION = 8, // and or
12156 PM_BINDING_POWER_NOT = 10, // not
12157 PM_BINDING_POWER_MATCH = 12, // => in
12158 PM_BINDING_POWER_DEFINED = 14, // defined?
12159 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12160 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12161 PM_BINDING_POWER_TERNARY = 20, // ?:
12162 PM_BINDING_POWER_RANGE = 22, // .. ...
12163 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12164 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12165 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12166 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12167 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12168 PM_BINDING_POWER_BITWISE_AND = 34, // &
12169 PM_BINDING_POWER_SHIFT = 36, // << >>
12170 PM_BINDING_POWER_TERM = 38, // + -
12171 PM_BINDING_POWER_FACTOR = 40, // * / %
12172 PM_BINDING_POWER_UMINUS = 42, // -@
12173 PM_BINDING_POWER_EXPONENT = 44, // **
12174 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12175 PM_BINDING_POWER_INDEX = 48, // [] []=
12176 PM_BINDING_POWER_CALL = 50, // :: .
12177 PM_BINDING_POWER_MAX = 52
12178} pm_binding_power_t;
12179
12184typedef struct {
12186 pm_binding_power_t left;
12187
12189 pm_binding_power_t right;
12190
12193
12200
12201#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12202#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12203#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12204#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12205#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12206
12207pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12208 // rescue
12209 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12210
12211 // if unless until while
12212 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12213 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12214 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12215 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12216
12217 // and or
12218 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12219 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12220
12221 // => in
12222 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12223 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12224
12225 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12226 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12227 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12228 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12229 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12230 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12231 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12232 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12233 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12234 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12235 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12236 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12237 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12238 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12239 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12240
12241 // ?:
12242 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12243
12244 // .. ...
12245 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12246 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12247 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12248 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12249
12250 // ||
12251 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12252
12253 // &&
12254 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12255
12256 // != !~ == === =~ <=>
12257 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12258 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12259 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12260 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12261 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12262 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12263
12264 // > >= < <=
12265 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12266 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12267 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12268 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12269
12270 // ^ |
12271 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12272 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12273
12274 // &
12275 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12276
12277 // >> <<
12278 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12279 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12280
12281 // - +
12282 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12283 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12284
12285 // % / *
12286 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12287 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12288 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12289 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12290
12291 // -@
12292 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12293 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12294
12295 // **
12296 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12297 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12298
12299 // ! ~ +@
12300 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12301 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12302 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12303
12304 // [
12305 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12306
12307 // :: . &.
12308 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12309 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12310 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12311};
12312
12313#undef BINDING_POWER_ASSIGNMENT
12314#undef LEFT_ASSOCIATIVE
12315#undef RIGHT_ASSOCIATIVE
12316#undef RIGHT_ASSOCIATIVE_UNARY
12317
12321static inline bool
12322match1(const pm_parser_t *parser, pm_token_type_t type) {
12323 return parser->current.type == type;
12324}
12325
12329static inline bool
12330match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12331 return match1(parser, type1) || match1(parser, type2);
12332}
12333
12337static inline bool
12338match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12339 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12340}
12341
12345static inline bool
12346match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12347 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12348}
12349
12353static inline bool
12354match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12355 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12356}
12357
12361static inline bool
12362match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12363 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12364}
12365
12372static bool
12373accept1(pm_parser_t *parser, pm_token_type_t type) {
12374 if (match1(parser, type)) {
12375 parser_lex(parser);
12376 return true;
12377 }
12378 return false;
12379}
12380
12385static inline bool
12386accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12387 if (match2(parser, type1, type2)) {
12388 parser_lex(parser);
12389 return true;
12390 }
12391 return false;
12392}
12393
12405static void
12406expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12407 if (accept1(parser, type)) return;
12408
12409 const uint8_t *location = parser->previous.end;
12410 pm_parser_err(parser, location, location, diag_id);
12411
12412 parser->previous.start = location;
12413 parser->previous.type = PM_TOKEN_MISSING;
12414}
12415
12420static void
12421expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12422 if (accept2(parser, type1, type2)) return;
12423
12424 const uint8_t *location = parser->previous.end;
12425 pm_parser_err(parser, location, location, diag_id);
12426
12427 parser->previous.start = location;
12428 parser->previous.type = PM_TOKEN_MISSING;
12429}
12430
12435static void
12436expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12437 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12438 parser_lex(parser);
12439 } else {
12440 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12441 parser->previous.start = parser->previous.end;
12442 parser->previous.type = PM_TOKEN_MISSING;
12443 }
12444}
12445
12452static void
12453expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12454 if (accept1(parser, type)) return;
12455
12456 pm_parser_err(parser, opening->start, opening->end, diag_id);
12457
12458 parser->previous.start = opening->end;
12459 parser->previous.type = PM_TOKEN_MISSING;
12460}
12461
12462static pm_node_t *
12463parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
12464
12469static pm_node_t *
12470parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
12471 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
12472 pm_assert_value_expression(parser, node);
12473 return node;
12474}
12475
12494static inline bool
12495token_begins_expression_p(pm_token_type_t type) {
12496 switch (type) {
12497 case PM_TOKEN_EQUAL_GREATER:
12498 case PM_TOKEN_KEYWORD_IN:
12499 // We need to special case this because it is a binary operator that
12500 // should not be marked as beginning an expression.
12501 return false;
12502 case PM_TOKEN_BRACE_RIGHT:
12503 case PM_TOKEN_BRACKET_RIGHT:
12504 case PM_TOKEN_COLON:
12505 case PM_TOKEN_COMMA:
12506 case PM_TOKEN_EMBEXPR_END:
12507 case PM_TOKEN_EOF:
12508 case PM_TOKEN_LAMBDA_BEGIN:
12509 case PM_TOKEN_KEYWORD_DO:
12510 case PM_TOKEN_KEYWORD_DO_LOOP:
12511 case PM_TOKEN_KEYWORD_END:
12512 case PM_TOKEN_KEYWORD_ELSE:
12513 case PM_TOKEN_KEYWORD_ELSIF:
12514 case PM_TOKEN_KEYWORD_ENSURE:
12515 case PM_TOKEN_KEYWORD_THEN:
12516 case PM_TOKEN_KEYWORD_RESCUE:
12517 case PM_TOKEN_KEYWORD_WHEN:
12518 case PM_TOKEN_NEWLINE:
12519 case PM_TOKEN_PARENTHESIS_RIGHT:
12520 case PM_TOKEN_SEMICOLON:
12521 // The reason we need this short-circuit is because we're using the
12522 // binding powers table to tell us if the subsequent token could
12523 // potentially be the start of an expression. If there _is_ a binding
12524 // power for one of these tokens, then we should remove it from this list
12525 // and let it be handled by the default case below.
12526 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12527 return false;
12528 case PM_TOKEN_UAMPERSAND:
12529 // This is a special case because this unary operator cannot appear
12530 // as a general operator, it only appears in certain circumstances.
12531 return false;
12532 case PM_TOKEN_UCOLON_COLON:
12533 case PM_TOKEN_UMINUS:
12534 case PM_TOKEN_UMINUS_NUM:
12535 case PM_TOKEN_UPLUS:
12536 case PM_TOKEN_BANG:
12537 case PM_TOKEN_TILDE:
12538 case PM_TOKEN_UDOT_DOT:
12539 case PM_TOKEN_UDOT_DOT_DOT:
12540 // These unary tokens actually do have binding power associated with them
12541 // so that we can correctly place them into the precedence order. But we
12542 // want them to be marked as beginning an expression, so we need to
12543 // special case them here.
12544 return true;
12545 default:
12546 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12547 }
12548}
12549
12554static pm_node_t *
12555parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
12556 if (accept1(parser, PM_TOKEN_USTAR)) {
12557 pm_token_t operator = parser->previous;
12558 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12559 return UP(pm_splat_node_create(parser, &operator, expression));
12560 }
12561
12562 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
12563}
12564
12565static bool
12566pm_node_unreference_each(const pm_node_t *node, void *data) {
12567 switch (PM_NODE_TYPE(node)) {
12568 /* When we are about to destroy a set of nodes that could potentially
12569 * contain block exits for the current scope, we need to check if they
12570 * are contained in the list of block exits and remove them if they are.
12571 */
12572 case PM_BREAK_NODE:
12573 case PM_NEXT_NODE:
12574 case PM_REDO_NODE: {
12575 pm_parser_t *parser = (pm_parser_t *) data;
12576 size_t index = 0;
12577
12578 while (index < parser->current_block_exits->size) {
12579 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12580
12581 if (block_exit == node) {
12582 if (index + 1 < parser->current_block_exits->size) {
12583 memmove(
12584 &parser->current_block_exits->nodes[index],
12585 &parser->current_block_exits->nodes[index + 1],
12586 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12587 );
12588 }
12589 parser->current_block_exits->size--;
12590
12591 /* Note returning true here because these nodes could have
12592 * arguments that are themselves block exits. */
12593 return true;
12594 }
12595
12596 index++;
12597 }
12598
12599 return true;
12600 }
12601 /* When an implicit local variable is written to or targeted, it becomes
12602 * a regular, named local variable. This branch removes it from the list
12603 * of implicit parameters when that happens. */
12604 case PM_LOCAL_VARIABLE_READ_NODE:
12605 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12606 pm_parser_t *parser = (pm_parser_t *) data;
12607 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12608
12609 for (size_t index = 0; index < implicit_parameters->size; index++) {
12610 if (implicit_parameters->nodes[index] == node) {
12611 /* If the node is not the last one in the list, we need to
12612 * shift the remaining nodes down to fill the gap. This is
12613 * extremely unlikely to happen. */
12614 if (index != implicit_parameters->size - 1) {
12615 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12616 }
12617
12618 implicit_parameters->size--;
12619 break;
12620 }
12621 }
12622
12623 return false;
12624 }
12625 default:
12626 return true;
12627 }
12628}
12629
12635static void
12636pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12637 pm_visit_node(node, pm_node_unreference_each, parser);
12638}
12639
12644static void
12645parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12646 // The method name needs to change. If we previously had
12647 // foo, we now need foo=. In this case we'll allocate a new
12648 // owned string, copy the previous method name in, and
12649 // append an =.
12650 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12651 size_t length = constant->length;
12652 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
12653 if (name == NULL) return;
12654
12655 memcpy(name, constant->start, length);
12656 name[length] = '=';
12657
12658 // Now switch the name to the new string.
12659 // This silences clang analyzer warning about leak of memory pointed by `name`.
12660 // NOLINTNEXTLINE(clang-analyzer-*)
12661 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12662}
12663
12670static pm_node_t *
12671parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12672 switch (PM_NODE_TYPE(target)) {
12673 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12674 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12675 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12676 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12677 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12678 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12679 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12680 default: break;
12681 }
12682
12683 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
12684 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12685
12686 pm_node_destroy(parser, target);
12687 return UP(result);
12688}
12689
12698static pm_node_t *
12699parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12700 switch (PM_NODE_TYPE(target)) {
12701 case PM_MISSING_NODE:
12702 return target;
12703 case PM_SOURCE_ENCODING_NODE:
12704 case PM_FALSE_NODE:
12705 case PM_SOURCE_FILE_NODE:
12706 case PM_SOURCE_LINE_NODE:
12707 case PM_NIL_NODE:
12708 case PM_SELF_NODE:
12709 case PM_TRUE_NODE: {
12710 // In these special cases, we have specific error messages and we
12711 // will replace them with local variable writes.
12712 return parse_unwriteable_target(parser, target);
12713 }
12714 case PM_CLASS_VARIABLE_READ_NODE:
12715 assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
12716 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12717 return target;
12718 case PM_CONSTANT_PATH_NODE:
12719 if (context_def_p(parser)) {
12720 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12721 }
12722
12723 assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
12724 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12725
12726 return target;
12727 case PM_CONSTANT_READ_NODE:
12728 if (context_def_p(parser)) {
12729 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12730 }
12731
12732 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12733 target->type = PM_CONSTANT_TARGET_NODE;
12734
12735 return target;
12736 case PM_BACK_REFERENCE_READ_NODE:
12737 case PM_NUMBERED_REFERENCE_READ_NODE:
12738 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12739 return target;
12740 case PM_GLOBAL_VARIABLE_READ_NODE:
12741 assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
12742 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12743 return target;
12744 case PM_LOCAL_VARIABLE_READ_NODE: {
12745 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12746 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
12747 pm_node_unreference(parser, target);
12748 }
12749
12750 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
12751 uint32_t name = cast->name;
12752 uint32_t depth = cast->depth;
12753 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12754
12755 assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
12756 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
12757
12758 return target;
12759 }
12760 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12761 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12762 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
12763
12764 pm_node_unreference(parser, target);
12765 pm_node_destroy(parser, target);
12766
12767 return node;
12768 }
12769 case PM_INSTANCE_VARIABLE_READ_NODE:
12770 assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
12771 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12772 return target;
12773 case PM_MULTI_TARGET_NODE:
12774 if (splat_parent) {
12775 // Multi target is not accepted in all positions. If this is one
12776 // of them, then we need to add an error.
12777 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12778 }
12779
12780 return target;
12781 case PM_SPLAT_NODE: {
12782 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12783
12784 if (splat->expression != NULL) {
12785 splat->expression = parse_target(parser, splat->expression, multiple, true);
12786 }
12787
12788 return UP(splat);
12789 }
12790 case PM_CALL_NODE: {
12791 pm_call_node_t *call = (pm_call_node_t *) target;
12792
12793 // If we have no arguments to the call node and we need this to be a
12794 // target then this is either a method call or a local variable
12795 // write.
12796 if (
12797 (call->message_loc.start != NULL) &&
12798 (call->message_loc.end[-1] != '!') &&
12799 (call->message_loc.end[-1] != '?') &&
12800 (call->opening_loc.start == NULL) &&
12801 (call->arguments == NULL) &&
12802 (call->block == NULL)
12803 ) {
12804 if (call->receiver == NULL) {
12805 // When we get here, we have a local variable write, because it
12806 // was previously marked as a method call but now we have an =.
12807 // This looks like:
12808 //
12809 // foo = 1
12810 //
12811 // When it was parsed in the prefix position, foo was seen as a
12812 // method call with no receiver and no arguments. Now we have an
12813 // =, so we know it's a local variable write.
12814 const pm_location_t message_loc = call->message_loc;
12815
12816 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
12817 pm_node_destroy(parser, target);
12818
12819 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12820 }
12821
12822 if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
12823 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12824 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12825 }
12826
12827 parse_write_name(parser, &call->name);
12828 return UP(pm_call_target_node_create(parser, call));
12829 }
12830 }
12831
12832 // If there is no call operator and the message is "[]" then this is
12833 // an aref expression, and we can transform it into an aset
12834 // expression.
12835 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12836 return UP(pm_index_target_node_create(parser, call));
12837 }
12838 }
12840 default:
12841 // In this case we have a node that we don't know how to convert
12842 // into a target. We need to treat it as an error. For now, we'll
12843 // mark it as an error and just skip right past it.
12844 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12845 return target;
12846 }
12847}
12848
12853static pm_node_t *
12854parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12855 pm_node_t *result = parse_target(parser, target, multiple, false);
12856
12857 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
12858 // parens after the targets.
12859 if (
12860 !match1(parser, PM_TOKEN_EQUAL) &&
12861 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
12862 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12863 ) {
12864 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12865 }
12866
12867 return result;
12868}
12869
12874static pm_node_t *
12875parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
12876 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
12877
12878 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12879 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12880 }
12881
12882 return write;
12883}
12884
12888static pm_node_t *
12889parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
12890 switch (PM_NODE_TYPE(target)) {
12891 case PM_MISSING_NODE:
12892 pm_node_destroy(parser, value);
12893 return target;
12894 case PM_CLASS_VARIABLE_READ_NODE: {
12895 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
12896 pm_node_destroy(parser, target);
12897 return UP(node);
12898 }
12899 case PM_CONSTANT_PATH_NODE: {
12900 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
12901
12902 if (context_def_p(parser)) {
12903 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12904 }
12905
12906 return parse_shareable_constant_write(parser, node);
12907 }
12908 case PM_CONSTANT_READ_NODE: {
12909 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
12910
12911 if (context_def_p(parser)) {
12912 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12913 }
12914
12915 pm_node_destroy(parser, target);
12916 return parse_shareable_constant_write(parser, node);
12917 }
12918 case PM_BACK_REFERENCE_READ_NODE:
12919 case PM_NUMBERED_REFERENCE_READ_NODE:
12920 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12922 case PM_GLOBAL_VARIABLE_READ_NODE: {
12923 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
12924 pm_node_destroy(parser, target);
12925 return UP(node);
12926 }
12927 case PM_LOCAL_VARIABLE_READ_NODE: {
12928 pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
12929
12930 pm_constant_id_t name = local_read->name;
12931 pm_location_t name_loc = target->location;
12932
12933 uint32_t depth = local_read->depth;
12934 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12935
12936 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12937 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12938 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
12939 pm_node_unreference(parser, target);
12940 }
12941
12942 pm_locals_unread(&scope->locals, name);
12943 pm_node_destroy(parser, target);
12944
12945 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator));
12946 }
12947 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12948 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12949 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
12950
12951 pm_node_unreference(parser, target);
12952 pm_node_destroy(parser, target);
12953
12954 return node;
12955 }
12956 case PM_INSTANCE_VARIABLE_READ_NODE: {
12957 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
12958 pm_node_destroy(parser, target);
12959 return write_node;
12960 }
12961 case PM_MULTI_TARGET_NODE:
12962 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
12963 case PM_SPLAT_NODE: {
12964 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12965
12966 if (splat->expression != NULL) {
12967 splat->expression = parse_write(parser, splat->expression, operator, value);
12968 }
12969
12970 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
12971 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
12972
12973 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
12974 }
12975 case PM_CALL_NODE: {
12976 pm_call_node_t *call = (pm_call_node_t *) target;
12977
12978 // If we have no arguments to the call node and we need this to be a
12979 // target then this is either a method call or a local variable
12980 // write.
12981 if (
12982 (call->message_loc.start != NULL) &&
12983 (call->message_loc.end[-1] != '!') &&
12984 (call->message_loc.end[-1] != '?') &&
12985 (call->opening_loc.start == NULL) &&
12986 (call->arguments == NULL) &&
12987 (call->block == NULL)
12988 ) {
12989 if (call->receiver == NULL) {
12990 // When we get here, we have a local variable write, because it
12991 // was previously marked as a method call but now we have an =.
12992 // This looks like:
12993 //
12994 // foo = 1
12995 //
12996 // When it was parsed in the prefix position, foo was seen as a
12997 // method call with no receiver and no arguments. Now we have an
12998 // =, so we know it's a local variable write.
12999 const pm_location_t message = call->message_loc;
13000
13001 pm_parser_local_add_location(parser, message.start, message.end, 0);
13002 pm_node_destroy(parser, target);
13003
13004 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13005 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator));
13006
13007 pm_refute_numbered_parameter(parser, message.start, message.end);
13008 return target;
13009 }
13010
13011 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13012 // When we get here, we have a method call, because it was
13013 // previously marked as a method call but now we have an =. This
13014 // looks like:
13015 //
13016 // foo.bar = 1
13017 //
13018 // When it was parsed in the prefix position, foo.bar was seen as a
13019 // method call with no arguments. Now we have an =, so we know it's
13020 // a method call with an argument. In this case we will create the
13021 // arguments node, parse the argument, and add it to the list.
13022 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13023 call->arguments = arguments;
13024
13025 pm_arguments_node_arguments_append(arguments, value);
13026 call->base.location.end = arguments->base.location.end;
13027 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
13028
13029 parse_write_name(parser, &call->name);
13030 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13031
13032 return UP(call);
13033 }
13034 }
13035
13036 // If there is no call operator and the message is "[]" then this is
13037 // an aref expression, and we can transform it into an aset
13038 // expression.
13039 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13040 if (call->arguments == NULL) {
13041 call->arguments = pm_arguments_node_create(parser);
13042 }
13043
13044 pm_arguments_node_arguments_append(call->arguments, value);
13045 target->location.end = value->location.end;
13046
13047 // Replace the name with "[]=".
13048 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13049 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
13050
13051 // Ensure that the arguments for []= don't contain keywords
13052 pm_index_arguments_check(parser, call->arguments, call->block);
13053 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13054
13055 return target;
13056 }
13057
13058 // If there are arguments on the call node, then it can't be a
13059 // method call ending with = or a local variable write, so it must
13060 // be a syntax error. In this case we'll fall through to our default
13061 // handling. We need to free the value that we parsed because there
13062 // is no way for us to attach it to the tree at this point.
13063 //
13064 // Since it is possible for the value to contain an implicit
13065 // parameter somewhere in its subtree, we need to walk it and remove
13066 // any implicit parameters from the list of implicit parameters for
13067 // the current scope.
13068 pm_node_unreference(parser, value);
13069 pm_node_destroy(parser, value);
13070 }
13072 default:
13073 // In this case we have a node that we don't know how to convert into a
13074 // target. We need to treat it as an error. For now, we'll mark it as an
13075 // error and just skip right past it.
13076 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13077 return target;
13078 }
13079}
13080
13087static pm_node_t *
13088parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13089 switch (PM_NODE_TYPE(target)) {
13090 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13091 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13092 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13093 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13094 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13095 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13096 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13097 default: break;
13098 }
13099
13100 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13101 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13102
13103 pm_node_destroy(parser, target);
13104 return UP(result);
13105}
13106
13117static pm_node_t *
13118parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13119 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13120
13121 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13122 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13123
13124 while (accept1(parser, PM_TOKEN_COMMA)) {
13125 if (accept1(parser, PM_TOKEN_USTAR)) {
13126 // Here we have a splat operator. It can have a name or be
13127 // anonymous. It can be the final target or be in the middle if
13128 // there haven't been any others yet.
13129 if (has_rest) {
13130 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13131 }
13132
13133 pm_token_t star_operator = parser->previous;
13134 pm_node_t *name = NULL;
13135
13136 if (token_begins_expression_p(parser->current.type)) {
13137 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13138 name = parse_target(parser, name, true, true);
13139 }
13140
13141 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13142 pm_multi_target_node_targets_append(parser, result, splat);
13143 has_rest = true;
13144 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13145 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13146 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13147 target = parse_target(parser, target, true, false);
13148
13149 pm_multi_target_node_targets_append(parser, result, target);
13150 context_pop(parser);
13151 } else if (token_begins_expression_p(parser->current.type)) {
13152 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13153 target = parse_target(parser, target, true, false);
13154
13155 pm_multi_target_node_targets_append(parser, result, target);
13156 } else if (!match1(parser, PM_TOKEN_EOF)) {
13157 // If we get here, then we have a trailing , in a multi target node.
13158 // We'll add an implicit rest node to represent this.
13159 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13160 pm_multi_target_node_targets_append(parser, result, rest);
13161 break;
13162 }
13163 }
13164
13165 return UP(result);
13166}
13167
13172static pm_node_t *
13173parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13174 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13175 accept1(parser, PM_TOKEN_NEWLINE);
13176
13177 // Ensure that we have either an = or a ) after the targets.
13178 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13179 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13180 }
13181
13182 return result;
13183}
13184
13188static pm_statements_node_t *
13189parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13190 // First, skip past any optional terminators that might be at the beginning
13191 // of the statements.
13192 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13193
13194 // If we have a terminator, then we can just return NULL.
13195 if (context_terminator(context, &parser->current)) return NULL;
13196
13197 pm_statements_node_t *statements = pm_statements_node_create(parser);
13198
13199 // At this point we know we have at least one statement, and that it
13200 // immediately follows the current token.
13201 context_push(parser, context);
13202
13203 while (true) {
13204 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13205 pm_statements_node_body_append(parser, statements, node, true);
13206
13207 // If we're recovering from a syntax error, then we need to stop parsing
13208 // the statements now.
13209 if (parser->recovering) {
13210 // If this is the level of context where the recovery has happened,
13211 // then we can mark the parser as done recovering.
13212 if (context_terminator(context, &parser->current)) parser->recovering = false;
13213 break;
13214 }
13215
13216 // If we have a terminator, then we will parse all consecutive
13217 // terminators and then continue parsing the statements list.
13218 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13219 // If we have a terminator, then we will continue parsing the
13220 // statements list.
13221 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13222 if (context_terminator(context, &parser->current)) break;
13223
13224 // Now we can continue parsing the list of statements.
13225 continue;
13226 }
13227
13228 // At this point we have a list of statements that are not terminated by
13229 // a newline or semicolon. At this point we need to check if we're at
13230 // the end of the statements list. If we are, then we should break out
13231 // of the loop.
13232 if (context_terminator(context, &parser->current)) break;
13233
13234 // At this point, we have a syntax error, because the statement was not
13235 // terminated by a newline or semicolon, and we're not at the end of the
13236 // statements list. Ideally we should scan forward to determine if we
13237 // should insert a missing terminator or break out of parsing the
13238 // statements list at this point.
13239 //
13240 // We don't have that yet, so instead we'll do a more naive approach. If
13241 // we were unable to parse an expression, then we will skip past this
13242 // token and continue parsing the statements list. Otherwise we'll add
13243 // an error and continue parsing the statements list.
13244 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13245 parser_lex(parser);
13246
13247 // If we are at the end of the file, then we need to stop parsing
13248 // the statements entirely at this point. Mark the parser as
13249 // recovering, as we know that EOF closes the top-level context, and
13250 // then break out of the loop.
13251 if (match1(parser, PM_TOKEN_EOF)) {
13252 parser->recovering = true;
13253 break;
13254 }
13255
13256 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13257 if (context_terminator(context, &parser->current)) break;
13258 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13259 // This is an inlined version of accept1 because the error that we
13260 // want to add has varargs. If this happens again, we should
13261 // probably extract a helper function.
13262 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13263 parser->previous.start = parser->previous.end;
13264 parser->previous.type = PM_TOKEN_MISSING;
13265 }
13266 }
13267
13268 context_pop(parser);
13269 bool last_value = true;
13270 switch (context) {
13273 last_value = false;
13274 break;
13275 default:
13276 break;
13277 }
13278 pm_void_statements_check(parser, statements, last_value);
13279
13280 return statements;
13281}
13282
13287static void
13288pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13289 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13290
13291 if (duplicated != NULL) {
13292 pm_buffer_t buffer = { 0 };
13293 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13294
13295 pm_diagnostic_list_append_format(
13296 &parser->warning_list,
13297 duplicated->location.start,
13298 duplicated->location.end,
13299 PM_WARN_DUPLICATED_HASH_KEY,
13300 (int) pm_buffer_length(&buffer),
13301 pm_buffer_value(&buffer),
13302 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13303 );
13304
13305 pm_buffer_free(&buffer);
13306 }
13307}
13308
13313static void
13314pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13315 pm_node_t *previous;
13316
13317 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13318 pm_diagnostic_list_append_format(
13319 &parser->warning_list,
13320 node->location.start,
13321 node->location.end,
13322 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13323 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13324 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13325 );
13326 }
13327}
13328
13332static bool
13333parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13334 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13335 bool contains_keyword_splat = false;
13336
13337 while (true) {
13338 pm_node_t *element;
13339
13340 switch (parser->current.type) {
13341 case PM_TOKEN_USTAR_STAR: {
13342 parser_lex(parser);
13343 pm_token_t operator = parser->previous;
13344 pm_node_t *value = NULL;
13345
13346 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13347 // If we're about to parse a nested hash that is being
13348 // pushed into this hash directly with **, then we want the
13349 // inner hash to share the static literals with the outer
13350 // hash.
13351 parser->current_hash_keys = literals;
13352 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13353 } else if (token_begins_expression_p(parser->current.type)) {
13354 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13355 } else {
13356 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13357 }
13358
13359 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13360 contains_keyword_splat = true;
13361 break;
13362 }
13363 case PM_TOKEN_LABEL: {
13364 pm_token_t label = parser->current;
13365 parser_lex(parser);
13366
13367 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13368 pm_hash_key_static_literals_add(parser, literals, key);
13369
13370 pm_token_t operator = not_provided(parser);
13371 pm_node_t *value = NULL;
13372
13373 if (token_begins_expression_p(parser->current.type)) {
13374 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13375 } else {
13376 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13377 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13378 value = UP(pm_constant_read_node_create(parser, &constant));
13379 } else {
13380 int depth = -1;
13381 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13382
13383 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13384 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13385 } else {
13386 depth = pm_parser_local_depth(parser, &identifier);
13387 }
13388
13389 if (depth == -1) {
13390 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13391 } else {
13392 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13393 }
13394 }
13395
13396 value->location.end++;
13397 value = UP(pm_implicit_node_create(parser, value));
13398 }
13399
13400 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13401 break;
13402 }
13403 default: {
13404 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13405
13406 // Hash keys that are strings are automatically frozen. We will
13407 // mark that here.
13408 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13409 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13410 }
13411
13412 pm_hash_key_static_literals_add(parser, literals, key);
13413
13414 pm_token_t operator;
13415 if (pm_symbol_node_label_p(key)) {
13416 operator = not_provided(parser);
13417 } else {
13418 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13419 operator = parser->previous;
13420 }
13421
13422 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13423 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13424 break;
13425 }
13426 }
13427
13428 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13429 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
13430 } else {
13431 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
13432 }
13433
13434 // If there's no comma after the element, then we're done.
13435 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13436
13437 // If the next element starts with a label or a **, then we know we have
13438 // another element in the hash, so we'll continue parsing.
13439 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13440
13441 // Otherwise we need to check if the subsequent token begins an expression.
13442 // If it does, then we'll continue parsing.
13443 if (token_begins_expression_p(parser->current.type)) continue;
13444
13445 // Otherwise by default we will exit out of this loop.
13446 break;
13447 }
13448
13449 return contains_keyword_splat;
13450}
13451
13452static inline bool
13453argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13454 if (pm_symbol_node_label_p(argument)) {
13455 return true;
13456 }
13457
13458 switch (PM_NODE_TYPE(argument)) {
13459 case PM_CALL_NODE: {
13460 pm_call_node_t *cast = (pm_call_node_t *) argument;
13461 if (cast->opening_loc.start == NULL && cast->arguments != NULL) {
13462 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13463 return false;
13464 }
13465 if (cast->block != NULL) {
13466 return false;
13467 }
13468 }
13469 break;
13470 }
13471 default: break;
13472 }
13473 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13474}
13475
13479static inline void
13480parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13481 if (arguments->arguments == NULL) {
13482 arguments->arguments = pm_arguments_node_create(parser);
13483 }
13484
13485 pm_arguments_node_arguments_append(arguments->arguments, argument);
13486}
13487
13491static void
13492parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
13493 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13494
13495 // First we need to check if the next token is one that could be the start
13496 // of an argument. If it's not, then we can just return.
13497 if (
13498 match2(parser, terminator, PM_TOKEN_EOF) ||
13499 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13500 context_terminator(parser->current_context->context, &parser->current)
13501 ) {
13502 return;
13503 }
13504
13505 bool parsed_first_argument = false;
13506 bool parsed_bare_hash = false;
13507 bool parsed_block_argument = false;
13508 bool parsed_forwarding_arguments = false;
13509
13510 while (!match1(parser, PM_TOKEN_EOF)) {
13511 if (parsed_forwarding_arguments) {
13512 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13513 }
13514
13515 pm_node_t *argument = NULL;
13516
13517 switch (parser->current.type) {
13518 case PM_TOKEN_USTAR_STAR:
13519 case PM_TOKEN_LABEL: {
13520 if (parsed_bare_hash) {
13521 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13522 }
13523
13524 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13525 argument = UP(hash);
13526
13527 pm_static_literals_t hash_keys = { 0 };
13528 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13529
13530 parse_arguments_append(parser, arguments, argument);
13531
13532 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13533 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13534 pm_node_flag_set(UP(arguments->arguments), flags);
13535
13536 pm_static_literals_free(&hash_keys);
13537 parsed_bare_hash = true;
13538
13539 break;
13540 }
13541 case PM_TOKEN_UAMPERSAND: {
13542 parser_lex(parser);
13543 pm_token_t operator = parser->previous;
13544 pm_node_t *expression = NULL;
13545
13546 if (token_begins_expression_p(parser->current.type)) {
13547 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13548 } else {
13549 pm_parser_scope_forwarding_block_check(parser, &operator);
13550 }
13551
13552 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13553 if (parsed_block_argument) {
13554 parse_arguments_append(parser, arguments, argument);
13555 } else {
13556 arguments->block = argument;
13557 }
13558
13559 if (match1(parser, PM_TOKEN_COMMA)) {
13560 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13561 }
13562
13563 parsed_block_argument = true;
13564 break;
13565 }
13566 case PM_TOKEN_USTAR: {
13567 parser_lex(parser);
13568 pm_token_t operator = parser->previous;
13569
13570 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13571 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13572 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13573 if (parsed_bare_hash) {
13574 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13575 }
13576 } else {
13577 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13578
13579 if (parsed_bare_hash) {
13580 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13581 }
13582
13583 argument = UP(pm_splat_node_create(parser, &operator, expression));
13584 }
13585
13586 parse_arguments_append(parser, arguments, argument);
13587 break;
13588 }
13589 case PM_TOKEN_UDOT_DOT_DOT: {
13590 if (accepts_forwarding) {
13591 parser_lex(parser);
13592
13593 if (token_begins_expression_p(parser->current.type)) {
13594 // If the token begins an expression then this ... was
13595 // not actually argument forwarding but was instead a
13596 // range.
13597 pm_token_t operator = parser->previous;
13598 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13599
13600 // If we parse a range, we need to validate that we
13601 // didn't accidentally violate the nonassoc rules of the
13602 // ... operator.
13603 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13604 pm_range_node_t *range = (pm_range_node_t *) right;
13605 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13606 }
13607
13608 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13609 } else {
13610 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13611 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13612 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13613 }
13614
13615 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13616 parse_arguments_append(parser, arguments, argument);
13617 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13618 arguments->has_forwarding = true;
13619 parsed_forwarding_arguments = true;
13620 break;
13621 }
13622 }
13623 }
13625 default: {
13626 if (argument == NULL) {
13627 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13628 }
13629
13630 bool contains_keywords = false;
13631 bool contains_keyword_splat = false;
13632
13633 if (argument_allowed_for_bare_hash(parser, argument)){
13634 if (parsed_bare_hash) {
13635 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13636 }
13637
13638 pm_token_t operator;
13639 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13640 operator = parser->previous;
13641 } else {
13642 operator = not_provided(parser);
13643 }
13644
13645 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13646 contains_keywords = true;
13647
13648 // Create the set of static literals for this hash.
13649 pm_static_literals_t hash_keys = { 0 };
13650 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13651
13652 // Finish parsing the one we are part way through.
13653 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13654 argument = UP(pm_assoc_node_create(parser, argument, &operator, value));
13655
13656 pm_keyword_hash_node_elements_append(bare_hash, argument);
13657 argument = UP(bare_hash);
13658
13659 // Then parse more if we have a comma
13660 if (accept1(parser, PM_TOKEN_COMMA) && (
13661 token_begins_expression_p(parser->current.type) ||
13662 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13663 )) {
13664 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13665 }
13666
13667 pm_static_literals_free(&hash_keys);
13668 parsed_bare_hash = true;
13669 }
13670
13671 parse_arguments_append(parser, arguments, argument);
13672
13673 pm_node_flags_t flags = 0;
13674 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13675 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13676 pm_node_flag_set(UP(arguments->arguments), flags);
13677
13678 break;
13679 }
13680 }
13681
13682 parsed_first_argument = true;
13683
13684 // If parsing the argument failed, we need to stop parsing arguments.
13685 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
13686
13687 // If the terminator of these arguments is not EOF, then we have a
13688 // specific token we're looking for. In that case we can accept a
13689 // newline here because it is not functioning as a statement terminator.
13690 bool accepted_newline = false;
13691 if (terminator != PM_TOKEN_EOF) {
13692 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13693 }
13694
13695 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13696 // If we previously were on a comma and we just parsed a bare hash,
13697 // then we want to continue parsing arguments. This is because the
13698 // comma was grabbed up by the hash parser.
13699 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13700 // If there was a comma, then we need to check if we also accepted a
13701 // newline. If we did, then this is a syntax error.
13702 if (accepted_newline) {
13703 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13704 }
13705
13706 // If this is a command call and an argument takes a block,
13707 // there can be no further arguments. For example,
13708 // `foo(bar 1 do end, 2)` should be rejected.
13709 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13710 pm_call_node_t *call = (pm_call_node_t *) argument;
13711 if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
13712 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13713 break;
13714 }
13715 }
13716 } else {
13717 // If there is no comma at the end of the argument list then we're
13718 // done parsing arguments and can break out of this loop.
13719 break;
13720 }
13721
13722 // If we hit the terminator, then that means we have a trailing comma so
13723 // we can accept that output as well.
13724 if (match1(parser, terminator)) break;
13725 }
13726}
13727
13738static pm_multi_target_node_t *
13739parse_required_destructured_parameter(pm_parser_t *parser) {
13740 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13741
13742 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
13743 pm_multi_target_node_opening_set(node, &parser->previous);
13744
13745 do {
13746 pm_node_t *param;
13747
13748 // If we get here then we have a trailing comma, which isn't allowed in
13749 // the grammar. In other places, multi targets _do_ allow trailing
13750 // commas, so here we'll assume this is a mistake of the user not
13751 // knowing it's not allowed here.
13752 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13753 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13754 pm_multi_target_node_targets_append(parser, node, param);
13755 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13756 break;
13757 }
13758
13759 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13760 param = UP(parse_required_destructured_parameter(parser));
13761 } else if (accept1(parser, PM_TOKEN_USTAR)) {
13762 pm_token_t star = parser->previous;
13763 pm_node_t *value = NULL;
13764
13765 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13766 pm_token_t name = parser->previous;
13767 value = UP(pm_required_parameter_node_create(parser, &name));
13768 if (pm_parser_parameter_name_check(parser, &name)) {
13769 pm_node_flag_set_repeated_parameter(value);
13770 }
13771 pm_parser_local_add_token(parser, &name, 1);
13772 }
13773
13774 param = UP(pm_splat_node_create(parser, &star, value));
13775 } else {
13776 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13777 pm_token_t name = parser->previous;
13778
13779 param = UP(pm_required_parameter_node_create(parser, &name));
13780 if (pm_parser_parameter_name_check(parser, &name)) {
13781 pm_node_flag_set_repeated_parameter(param);
13782 }
13783 pm_parser_local_add_token(parser, &name, 1);
13784 }
13785
13786 pm_multi_target_node_targets_append(parser, node, param);
13787 } while (accept1(parser, PM_TOKEN_COMMA));
13788
13789 accept1(parser, PM_TOKEN_NEWLINE);
13790 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13791 pm_multi_target_node_closing_set(node, &parser->previous);
13792
13793 return node;
13794}
13795
13800typedef enum {
13801 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
13802 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13803 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13804 PM_PARAMETERS_ORDER_KEYWORDS,
13805 PM_PARAMETERS_ORDER_REST,
13806 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13807 PM_PARAMETERS_ORDER_OPTIONAL,
13808 PM_PARAMETERS_ORDER_NAMED,
13809 PM_PARAMETERS_ORDER_NONE,
13810} pm_parameters_order_t;
13811
13815static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13816 [0] = PM_PARAMETERS_NO_CHANGE,
13817 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13818 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13819 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13820 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13821 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13822 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13823 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13824 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13825 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13826 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13827 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13828};
13829
13837static bool
13838update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13839 pm_parameters_order_t state = parameters_ordering[token->type];
13840 if (state == PM_PARAMETERS_NO_CHANGE) return true;
13841
13842 // If we see another ordered argument after a optional argument
13843 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13844 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13845 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13846 return true;
13847 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13848 return true;
13849 }
13850
13851 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13852 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13853 return false;
13854 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13855 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13856 return false;
13857 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13858 // We know what transition we failed on, so we can provide a better error here.
13859 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13860 return false;
13861 }
13862
13863 if (state < *current) *current = state;
13864 return true;
13865}
13866
13870static pm_parameters_node_t *
13871parse_parameters(
13872 pm_parser_t *parser,
13873 pm_binding_power_t binding_power,
13874 bool uses_parentheses,
13875 bool allows_trailing_comma,
13876 bool allows_forwarding_parameters,
13877 bool accepts_blocks_in_defaults,
13878 bool in_block,
13879 pm_diagnostic_id_t diag_id_forwarding,
13880 uint16_t depth
13881) {
13882 pm_do_loop_stack_push(parser, false);
13883
13884 pm_parameters_node_t *params = pm_parameters_node_create(parser);
13885 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13886
13887 while (true) {
13888 bool parsing = true;
13889
13890 switch (parser->current.type) {
13891 case PM_TOKEN_PARENTHESIS_LEFT: {
13892 update_parameter_state(parser, &parser->current, &order);
13893 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13894
13895 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13896 pm_parameters_node_requireds_append(params, param);
13897 } else {
13898 pm_parameters_node_posts_append(params, param);
13899 }
13900 break;
13901 }
13902 case PM_TOKEN_UAMPERSAND:
13903 case PM_TOKEN_AMPERSAND: {
13904 update_parameter_state(parser, &parser->current, &order);
13905 parser_lex(parser);
13906
13907 pm_token_t operator = parser->previous;
13908 pm_token_t name;
13909
13910 bool repeated = false;
13911 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13912 name = parser->previous;
13913 repeated = pm_parser_parameter_name_check(parser, &name);
13914 pm_parser_local_add_token(parser, &name, 1);
13915 } else {
13916 name = not_provided(parser);
13917 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
13918 }
13919
13920 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
13921 if (repeated) {
13922 pm_node_flag_set_repeated_parameter(UP(param));
13923 }
13924 if (params->block == NULL) {
13925 pm_parameters_node_block_set(params, param);
13926 } else {
13927 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_BLOCK_MULTI);
13928 pm_parameters_node_posts_append(params, UP(param));
13929 }
13930
13931 break;
13932 }
13933 case PM_TOKEN_UDOT_DOT_DOT: {
13934 if (!allows_forwarding_parameters) {
13935 pm_parser_err_current(parser, diag_id_forwarding);
13936 }
13937
13938 bool succeeded = update_parameter_state(parser, &parser->current, &order);
13939 parser_lex(parser);
13940
13941 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
13942 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13943
13944 if (params->keyword_rest != NULL) {
13945 // If we already have a keyword rest parameter, then we replace it with the
13946 // forwarding parameter and move the keyword rest parameter to the posts list.
13947 pm_node_t *keyword_rest = params->keyword_rest;
13948 pm_parameters_node_posts_append(params, keyword_rest);
13949 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13950 params->keyword_rest = NULL;
13951 }
13952
13953 pm_parameters_node_keyword_rest_set(params, UP(param));
13954 break;
13955 }
13956 case PM_TOKEN_CLASS_VARIABLE:
13957 case PM_TOKEN_IDENTIFIER:
13958 case PM_TOKEN_CONSTANT:
13959 case PM_TOKEN_INSTANCE_VARIABLE:
13960 case PM_TOKEN_GLOBAL_VARIABLE:
13961 case PM_TOKEN_METHOD_NAME: {
13962 parser_lex(parser);
13963 switch (parser->previous.type) {
13964 case PM_TOKEN_CONSTANT:
13965 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13966 break;
13967 case PM_TOKEN_INSTANCE_VARIABLE:
13968 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
13969 break;
13970 case PM_TOKEN_GLOBAL_VARIABLE:
13971 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
13972 break;
13973 case PM_TOKEN_CLASS_VARIABLE:
13974 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
13975 break;
13976 case PM_TOKEN_METHOD_NAME:
13977 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
13978 break;
13979 default: break;
13980 }
13981
13982 if (parser->current.type == PM_TOKEN_EQUAL) {
13983 update_parameter_state(parser, &parser->current, &order);
13984 } else {
13985 update_parameter_state(parser, &parser->previous, &order);
13986 }
13987
13988 pm_token_t name = parser->previous;
13989 bool repeated = pm_parser_parameter_name_check(parser, &name);
13990 pm_parser_local_add_token(parser, &name, 1);
13991
13992 if (match1(parser, PM_TOKEN_EQUAL)) {
13993 pm_token_t operator = parser->current;
13994 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
13995 parser_lex(parser);
13996
13997 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
13998 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
13999
14000 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14001 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14002 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14003
14004 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14005
14006 if (repeated) {
14007 pm_node_flag_set_repeated_parameter(UP(param));
14008 }
14009 pm_parameters_node_optionals_append(params, param);
14010
14011 // If the value of the parameter increased the number of
14012 // reads of that parameter, then we need to warn that we
14013 // have a circular definition.
14014 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14015 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14016 }
14017
14018 context_pop(parser);
14019
14020 // If parsing the value of the parameter resulted in error recovery,
14021 // then we can put a missing node in its place and stop parsing the
14022 // parameters entirely now.
14023 if (parser->recovering) {
14024 parsing = false;
14025 break;
14026 }
14027 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14028 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14029 if (repeated) {
14030 pm_node_flag_set_repeated_parameter(UP(param));
14031 }
14032 pm_parameters_node_requireds_append(params, UP(param));
14033 } else {
14034 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14035 if (repeated) {
14036 pm_node_flag_set_repeated_parameter(UP(param));
14037 }
14038 pm_parameters_node_posts_append(params, UP(param));
14039 }
14040
14041 break;
14042 }
14043 case PM_TOKEN_LABEL: {
14044 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14045 update_parameter_state(parser, &parser->current, &order);
14046
14047 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14048 parser_lex(parser);
14049
14050 pm_token_t name = parser->previous;
14051 pm_token_t local = name;
14052 local.end -= 1;
14053
14054 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14055 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14056 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14057 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14058 }
14059
14060 bool repeated = pm_parser_parameter_name_check(parser, &local);
14061 pm_parser_local_add_token(parser, &local, 1);
14062
14063 switch (parser->current.type) {
14064 case PM_TOKEN_COMMA:
14065 case PM_TOKEN_PARENTHESIS_RIGHT:
14066 case PM_TOKEN_PIPE: {
14067 context_pop(parser);
14068
14069 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14070 if (repeated) {
14071 pm_node_flag_set_repeated_parameter(param);
14072 }
14073
14074 pm_parameters_node_keywords_append(params, param);
14075 break;
14076 }
14077 case PM_TOKEN_SEMICOLON:
14078 case PM_TOKEN_NEWLINE: {
14079 context_pop(parser);
14080
14081 if (uses_parentheses) {
14082 parsing = false;
14083 break;
14084 }
14085
14086 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14087 if (repeated) {
14088 pm_node_flag_set_repeated_parameter(param);
14089 }
14090
14091 pm_parameters_node_keywords_append(params, param);
14092 break;
14093 }
14094 default: {
14095 pm_node_t *param;
14096
14097 if (token_begins_expression_p(parser->current.type)) {
14098 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14099 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14100
14101 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14102 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14103 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14104
14105 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14106 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14107 }
14108
14109 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14110 }
14111 else {
14112 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14113 }
14114
14115 if (repeated) {
14116 pm_node_flag_set_repeated_parameter(param);
14117 }
14118
14119 context_pop(parser);
14120 pm_parameters_node_keywords_append(params, param);
14121
14122 // If parsing the value of the parameter resulted in error recovery,
14123 // then we can put a missing node in its place and stop parsing the
14124 // parameters entirely now.
14125 if (parser->recovering) {
14126 parsing = false;
14127 break;
14128 }
14129 }
14130 }
14131
14132 parser->in_keyword_arg = false;
14133 break;
14134 }
14135 case PM_TOKEN_USTAR:
14136 case PM_TOKEN_STAR: {
14137 update_parameter_state(parser, &parser->current, &order);
14138 parser_lex(parser);
14139
14140 pm_token_t operator = parser->previous;
14141 pm_token_t name;
14142 bool repeated = false;
14143
14144 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14145 name = parser->previous;
14146 repeated = pm_parser_parameter_name_check(parser, &name);
14147 pm_parser_local_add_token(parser, &name, 1);
14148 } else {
14149 name = not_provided(parser);
14150 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14151 }
14152
14153 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name));
14154 if (repeated) {
14155 pm_node_flag_set_repeated_parameter(param);
14156 }
14157
14158 if (params->rest == NULL) {
14159 pm_parameters_node_rest_set(params, param);
14160 } else {
14161 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14162 pm_parameters_node_posts_append(params, param);
14163 }
14164
14165 break;
14166 }
14167 case PM_TOKEN_STAR_STAR:
14168 case PM_TOKEN_USTAR_STAR: {
14169 pm_parameters_order_t previous_order = order;
14170 update_parameter_state(parser, &parser->current, &order);
14171 parser_lex(parser);
14172
14173 pm_token_t operator = parser->previous;
14174 pm_node_t *param;
14175
14176 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14177 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14178 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14179 }
14180
14181 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14182 } else {
14183 pm_token_t name;
14184
14185 bool repeated = false;
14186 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14187 name = parser->previous;
14188 repeated = pm_parser_parameter_name_check(parser, &name);
14189 pm_parser_local_add_token(parser, &name, 1);
14190 } else {
14191 name = not_provided(parser);
14192 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14193 }
14194
14195 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name));
14196 if (repeated) {
14197 pm_node_flag_set_repeated_parameter(param);
14198 }
14199 }
14200
14201 if (params->keyword_rest == NULL) {
14202 pm_parameters_node_keyword_rest_set(params, param);
14203 } else {
14204 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14205 pm_parameters_node_posts_append(params, param);
14206 }
14207
14208 break;
14209 }
14210 default:
14211 if (parser->previous.type == PM_TOKEN_COMMA) {
14212 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14213 // If we get here, then we have a trailing comma in a
14214 // block parameter list.
14215 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14216
14217 if (params->rest == NULL) {
14218 pm_parameters_node_rest_set(params, param);
14219 } else {
14220 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
14221 pm_parameters_node_posts_append(params, UP(param));
14222 }
14223 } else {
14224 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14225 }
14226 }
14227
14228 parsing = false;
14229 break;
14230 }
14231
14232 // If we hit some kind of issue while parsing the parameter, this would
14233 // have been set to false. In that case, we need to break out of the
14234 // loop.
14235 if (!parsing) break;
14236
14237 bool accepted_newline = false;
14238 if (uses_parentheses) {
14239 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14240 }
14241
14242 if (accept1(parser, PM_TOKEN_COMMA)) {
14243 // If there was a comma, but we also accepted a newline, then this
14244 // is a syntax error.
14245 if (accepted_newline) {
14246 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14247 }
14248 } else {
14249 // If there was no comma, then we're done parsing parameters.
14250 break;
14251 }
14252 }
14253
14254 pm_do_loop_stack_pop(parser);
14255
14256 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14257 if (params->base.location.start == params->base.location.end) {
14258 pm_node_destroy(parser, UP(params));
14259 return NULL;
14260 }
14261
14262 return params;
14263}
14264
14269static size_t
14270token_newline_index(const pm_parser_t *parser) {
14271 if (parser->heredoc_end == NULL) {
14272 // This is the common case. In this case we can look at the previously
14273 // recorded newline in the newline list and subtract from the current
14274 // offset.
14275 return parser->newline_list.size - 1;
14276 } else {
14277 // This is unlikely. This is the case that we have already parsed the
14278 // start of a heredoc, so we cannot rely on looking at the previous
14279 // offset of the newline list, and instead must go through the whole
14280 // process of a binary search for the line number.
14281 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14282 }
14283}
14284
14289static int64_t
14290token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14291 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14292 const uint8_t *end = token->start;
14293
14294 // Skip over the BOM if it is present.
14295 if (
14296 newline_index == 0 &&
14297 parser->start[0] == 0xef &&
14298 parser->start[1] == 0xbb &&
14299 parser->start[2] == 0xbf
14300 ) cursor += 3;
14301
14302 int64_t column = 0;
14303 for (; cursor < end; cursor++) {
14304 switch (*cursor) {
14305 case '\t':
14306 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14307 break;
14308 case ' ':
14309 column++;
14310 break;
14311 default:
14312 column++;
14313 if (break_on_non_space) return -1;
14314 break;
14315 }
14316 }
14317
14318 return column;
14319}
14320
14325static void
14326parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14327 // If these warnings are disabled (unlikely), then we can just return.
14328 if (!parser->warn_mismatched_indentation) return;
14329
14330 // If the tokens are on the same line, we do not warn.
14331 size_t closing_newline_index = token_newline_index(parser);
14332 if (opening_newline_index == closing_newline_index) return;
14333
14334 // If the opening token has anything other than spaces or tabs before it,
14335 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14336 // and the `if` immediately follows an `else` keyword.
14337 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14338 if (!if_after_else && (opening_column == -1)) return;
14339
14340 // Get a reference to the closing token off the current parser. This assumes
14341 // that the caller has placed this in the correct position.
14342 pm_token_t *closing_token = &parser->current;
14343
14344 // If the tokens are at the same indentation, we do not warn.
14345 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14346 if ((closing_column == -1) || (opening_column == closing_column)) return;
14347
14348 // If the closing column is greater than the opening column and we are
14349 // allowing indentation, then we do not warn.
14350 if (allow_indent && (closing_column > opening_column)) return;
14351
14352 // Otherwise, add a warning.
14353 PM_PARSER_WARN_FORMAT(
14354 parser,
14355 closing_token->start,
14356 closing_token->end,
14357 PM_WARN_INDENTATION_MISMATCH,
14358 (int) (closing_token->end - closing_token->start),
14359 (const char *) closing_token->start,
14360 (int) (opening_token->end - opening_token->start),
14361 (const char *) opening_token->start,
14362 ((int32_t) opening_newline_index) + parser->start_line
14363 );
14364}
14365
14366typedef enum {
14367 PM_RESCUES_BEGIN = 1,
14368 PM_RESCUES_BLOCK,
14369 PM_RESCUES_CLASS,
14370 PM_RESCUES_DEF,
14371 PM_RESCUES_LAMBDA,
14372 PM_RESCUES_MODULE,
14373 PM_RESCUES_SCLASS
14374} pm_rescues_type_t;
14375
14380static inline void
14381parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14382 pm_rescue_node_t *current = NULL;
14383
14384 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14385 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14386 parser_lex(parser);
14387
14388 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14389
14390 switch (parser->current.type) {
14391 case PM_TOKEN_EQUAL_GREATER: {
14392 // Here we have an immediate => after the rescue keyword, in which case
14393 // we're going to have an empty list of exceptions to rescue (which
14394 // implies StandardError).
14395 parser_lex(parser);
14396 pm_rescue_node_operator_set(rescue, &parser->previous);
14397
14398 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14399 reference = parse_target(parser, reference, false, false);
14400
14401 pm_rescue_node_reference_set(rescue, reference);
14402 break;
14403 }
14404 case PM_TOKEN_NEWLINE:
14405 case PM_TOKEN_SEMICOLON:
14406 case PM_TOKEN_KEYWORD_THEN:
14407 // Here we have a terminator for the rescue keyword, in which
14408 // case we're going to just continue on.
14409 break;
14410 default: {
14411 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14412 // Here we have something that could be an exception expression, so
14413 // we'll attempt to parse it here and any others delimited by commas.
14414
14415 do {
14416 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14417 pm_rescue_node_exceptions_append(rescue, expression);
14418
14419 // If we hit a newline, then this is the end of the rescue expression. We
14420 // can continue on to parse the statements.
14421 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14422
14423 // If we hit a `=>` then we're going to parse the exception variable. Once
14424 // we've done that, we'll break out of the loop and parse the statements.
14425 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14426 pm_rescue_node_operator_set(rescue, &parser->previous);
14427
14428 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14429 reference = parse_target(parser, reference, false, false);
14430
14431 pm_rescue_node_reference_set(rescue, reference);
14432 break;
14433 }
14434 } while (accept1(parser, PM_TOKEN_COMMA));
14435 }
14436 }
14437 }
14438
14439 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14440 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14441 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14442 }
14443 } else {
14444 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14445 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14446 }
14447
14448 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14449 pm_accepts_block_stack_push(parser, true);
14450 pm_context_t context;
14451
14452 switch (type) {
14453 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14454 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14455 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14456 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14457 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14458 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14459 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14460 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14461 }
14462
14463 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14464 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14465
14466 pm_accepts_block_stack_pop(parser);
14467 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14468 }
14469
14470 if (current == NULL) {
14471 pm_begin_node_rescue_clause_set(parent_node, rescue);
14472 } else {
14473 pm_rescue_node_subsequent_set(current, rescue);
14474 }
14475
14476 current = rescue;
14477 }
14478
14479 // The end node locations on rescue nodes will not be set correctly
14480 // since we won't know the end until we've found all subsequent
14481 // clauses. This sets the end location on all rescues once we know it.
14482 if (current != NULL) {
14483 const uint8_t *end_to_set = current->base.location.end;
14484 pm_rescue_node_t *clause = parent_node->rescue_clause;
14485
14486 while (clause != NULL) {
14487 clause->base.location.end = end_to_set;
14488 clause = clause->subsequent;
14489 }
14490 }
14491
14492 pm_token_t else_keyword;
14493 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14494 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14495 opening_newline_index = token_newline_index(parser);
14496
14497 else_keyword = parser->current;
14498 opening = &else_keyword;
14499
14500 parser_lex(parser);
14501 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14502
14503 pm_statements_node_t *else_statements = NULL;
14504 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14505 pm_accepts_block_stack_push(parser, true);
14506 pm_context_t context;
14507
14508 switch (type) {
14509 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14510 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14511 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14512 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14513 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14514 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14515 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14516 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14517 }
14518
14519 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14520 pm_accepts_block_stack_pop(parser);
14521
14522 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14523 }
14524
14525 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14526 pm_begin_node_else_clause_set(parent_node, else_clause);
14527
14528 // If we don't have a `current` rescue node, then this is a dangling
14529 // else, and it's an error.
14530 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14531 }
14532
14533 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14534 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14535 pm_token_t ensure_keyword = parser->current;
14536
14537 parser_lex(parser);
14538 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14539
14540 pm_statements_node_t *ensure_statements = NULL;
14541 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14542 pm_accepts_block_stack_push(parser, true);
14543 pm_context_t context;
14544
14545 switch (type) {
14546 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14547 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14548 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14549 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14550 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14551 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14552 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14553 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14554 }
14555
14556 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14557 pm_accepts_block_stack_pop(parser);
14558
14559 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14560 }
14561
14562 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14563 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14564 }
14565
14566 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14567 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14568 pm_begin_node_end_keyword_set(parent_node, &parser->current);
14569 } else {
14570 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
14571 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
14572 }
14573}
14574
14579static pm_begin_node_t *
14580parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14581 pm_token_t begin_keyword = not_provided(parser);
14582 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
14583
14584 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14585 node->base.location.start = start;
14586
14587 return node;
14588}
14589
14593static pm_block_parameters_node_t *
14594parse_block_parameters(
14595 pm_parser_t *parser,
14596 bool allows_trailing_comma,
14597 const pm_token_t *opening,
14598 bool is_lambda_literal,
14599 bool accepts_blocks_in_defaults,
14600 uint16_t depth
14601) {
14602 pm_parameters_node_t *parameters = NULL;
14603 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14604 if (!is_lambda_literal) {
14605 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14606 }
14607 parameters = parse_parameters(
14608 parser,
14609 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14610 false,
14611 allows_trailing_comma,
14612 false,
14613 accepts_blocks_in_defaults,
14614 true,
14615 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14616 (uint16_t) (depth + 1)
14617 );
14618 if (!is_lambda_literal) {
14619 context_pop(parser);
14620 }
14621 }
14622
14623 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14624 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
14625 accept1(parser, PM_TOKEN_NEWLINE);
14626
14627 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14628 do {
14629 switch (parser->current.type) {
14630 case PM_TOKEN_CONSTANT:
14631 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14632 parser_lex(parser);
14633 break;
14634 case PM_TOKEN_INSTANCE_VARIABLE:
14635 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14636 parser_lex(parser);
14637 break;
14638 case PM_TOKEN_GLOBAL_VARIABLE:
14639 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14640 parser_lex(parser);
14641 break;
14642 case PM_TOKEN_CLASS_VARIABLE:
14643 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14644 parser_lex(parser);
14645 break;
14646 default:
14647 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14648 break;
14649 }
14650
14651 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14652 pm_parser_local_add_token(parser, &parser->previous, 1);
14653
14654 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14655 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14656
14657 pm_block_parameters_node_append_local(block_parameters, local);
14658 } while (accept1(parser, PM_TOKEN_COMMA));
14659 }
14660 }
14661
14662 return block_parameters;
14663}
14664
14669static bool
14670outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14671 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14672 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14673 }
14674
14675 return false;
14676}
14677
14683static const char * const pm_numbered_parameter_names[] = {
14684 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14685};
14686
14692static pm_node_t *
14693parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14694 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14695
14696 // If we have ordinary parameters, then we will return them as the set of
14697 // parameters.
14698 if (parameters != NULL) {
14699 // If we also have implicit parameters, then this is an error.
14700 if (implicit_parameters->size > 0) {
14701 pm_node_t *node = implicit_parameters->nodes[0];
14702
14703 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14704 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14705 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14706 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14707 } else {
14708 assert(false && "unreachable");
14709 }
14710 }
14711
14712 return parameters;
14713 }
14714
14715 // If we don't have any implicit parameters, then the set of parameters is
14716 // NULL.
14717 if (implicit_parameters->size == 0) {
14718 return NULL;
14719 }
14720
14721 // If we don't have ordinary parameters, then we now must validate our set
14722 // of implicit parameters. We can only have numbered parameters or it, but
14723 // they cannot be mixed.
14724 uint8_t numbered_parameter = 0;
14725 bool it_parameter = false;
14726
14727 for (size_t index = 0; index < implicit_parameters->size; index++) {
14728 pm_node_t *node = implicit_parameters->nodes[index];
14729
14730 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14731 if (it_parameter) {
14732 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14733 } else if (outer_scope_using_numbered_parameters_p(parser)) {
14734 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14735 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14736 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14737 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14738 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14739 } else {
14740 assert(false && "unreachable");
14741 }
14742 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14743 if (numbered_parameter > 0) {
14744 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14745 } else {
14746 it_parameter = true;
14747 }
14748 }
14749 }
14750
14751 if (numbered_parameter > 0) {
14752 // Go through the parent scopes and mark them as being disallowed from
14753 // using numbered parameters because this inner scope is using them.
14754 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14755 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14756 }
14757
14758 const pm_location_t location = { .start = opening->start, .end = closing->end };
14759 return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter));
14760 }
14761
14762 if (it_parameter) {
14763 return UP(pm_it_parameters_node_create(parser, opening, closing));
14764 }
14765
14766 return NULL;
14767}
14768
14772static pm_block_node_t *
14773parse_block(pm_parser_t *parser, uint16_t depth) {
14774 pm_token_t opening = parser->previous;
14775 accept1(parser, PM_TOKEN_NEWLINE);
14776
14777 pm_accepts_block_stack_push(parser, true);
14778 pm_parser_scope_push(parser, false);
14779
14780 pm_block_parameters_node_t *block_parameters = NULL;
14781
14782 if (accept1(parser, PM_TOKEN_PIPE)) {
14783 pm_token_t block_parameters_opening = parser->previous;
14784 if (match1(parser, PM_TOKEN_PIPE)) {
14785 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14786 parser->command_start = true;
14787 parser_lex(parser);
14788 } else {
14789 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
14790 accept1(parser, PM_TOKEN_NEWLINE);
14791 parser->command_start = true;
14792 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14793 }
14794
14795 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
14796 }
14797
14798 accept1(parser, PM_TOKEN_NEWLINE);
14799 pm_node_t *statements = NULL;
14800
14801 if (opening.type == PM_TOKEN_BRACE_LEFT) {
14802 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14803 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
14804 }
14805
14806 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
14807 } else {
14808 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14809 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14810 pm_accepts_block_stack_push(parser, true);
14811 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
14812 pm_accepts_block_stack_pop(parser);
14813 }
14814
14815 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14816 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14817 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14818 }
14819 }
14820
14821 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
14822 }
14823
14824 pm_constant_id_list_t locals;
14825 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
14826 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
14827
14828 pm_parser_scope_pop(parser);
14829 pm_accepts_block_stack_pop(parser);
14830
14831 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
14832}
14833
14839static bool
14840parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
14841 bool found = false;
14842
14843 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14844 found |= true;
14845 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14846
14847 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14848 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14849 } else {
14850 pm_accepts_block_stack_push(parser, true);
14851 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
14852
14853 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14854 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
14855 parser->previous.start = parser->previous.end;
14856 parser->previous.type = PM_TOKEN_MISSING;
14857 }
14858
14859 pm_accepts_block_stack_pop(parser);
14860 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14861 }
14862 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14863 found |= true;
14864 pm_accepts_block_stack_push(parser, false);
14865
14866 // If we get here, then the subsequent token cannot be used as an infix
14867 // operator. In this case we assume the subsequent token is part of an
14868 // argument to this method call.
14869 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
14870
14871 // If we have done with the arguments and still not consumed the comma,
14872 // then we have a trailing comma where we need to check whether it is
14873 // allowed or not.
14874 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14875 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
14876 }
14877
14878 pm_accepts_block_stack_pop(parser);
14879 }
14880
14881 // If we're at the end of the arguments, we can now check if there is a block
14882 // node that starts with a {. If there is, then we can parse it and add it to
14883 // the arguments.
14884 if (accepts_block) {
14885 pm_block_node_t *block = NULL;
14886
14887 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14888 found |= true;
14889 block = parse_block(parser, (uint16_t) (depth + 1));
14890 pm_arguments_validate_block(parser, arguments, block);
14891 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14892 found |= true;
14893 block = parse_block(parser, (uint16_t) (depth + 1));
14894 }
14895
14896 if (block != NULL) {
14897 if (arguments->block == NULL && !arguments->has_forwarding) {
14898 arguments->block = UP(block);
14899 } else {
14900 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14901
14902 if (arguments->block != NULL) {
14903 if (arguments->arguments == NULL) {
14904 arguments->arguments = pm_arguments_node_create(parser);
14905 }
14906 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
14907 }
14908 arguments->block = UP(block);
14909 }
14910 }
14911 }
14912
14913 return found;
14914}
14915
14920static void
14921parse_return(pm_parser_t *parser, pm_node_t *node) {
14922 bool in_sclass = false;
14923 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14924 switch (context_node->context) {
14928 case PM_CONTEXT_BEGIN:
14929 case PM_CONTEXT_CASE_IN:
14932 case PM_CONTEXT_DEFINED:
14933 case PM_CONTEXT_ELSE:
14934 case PM_CONTEXT_ELSIF:
14935 case PM_CONTEXT_EMBEXPR:
14937 case PM_CONTEXT_FOR:
14938 case PM_CONTEXT_IF:
14940 case PM_CONTEXT_MAIN:
14942 case PM_CONTEXT_PARENS:
14943 case PM_CONTEXT_POSTEXE:
14945 case PM_CONTEXT_PREEXE:
14947 case PM_CONTEXT_TERNARY:
14948 case PM_CONTEXT_UNLESS:
14949 case PM_CONTEXT_UNTIL:
14950 case PM_CONTEXT_WHILE:
14951 // Keep iterating up the lists of contexts, because returns can
14952 // see through these.
14953 continue;
14957 case PM_CONTEXT_SCLASS:
14958 in_sclass = true;
14959 continue;
14963 case PM_CONTEXT_CLASS:
14967 case PM_CONTEXT_MODULE:
14968 // These contexts are invalid for a return.
14969 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14970 return;
14981 case PM_CONTEXT_DEF:
14987 // These contexts are valid for a return, and we should not
14988 // continue to loop.
14989 return;
14990 case PM_CONTEXT_NONE:
14991 // This case should never happen.
14992 assert(false && "unreachable");
14993 break;
14994 }
14995 }
14996 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
14997 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14998 }
14999}
15000
15005static void
15006parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15007 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15008 switch (context_node->context) {
15015 case PM_CONTEXT_DEFINED:
15016 case PM_CONTEXT_FOR:
15023 case PM_CONTEXT_POSTEXE:
15024 case PM_CONTEXT_UNTIL:
15025 case PM_CONTEXT_WHILE:
15026 // These are the good cases. We're allowed to have a block exit
15027 // in these contexts.
15028 return;
15029 case PM_CONTEXT_DEF:
15034 case PM_CONTEXT_MAIN:
15035 case PM_CONTEXT_PREEXE:
15036 case PM_CONTEXT_SCLASS:
15040 // These are the bad cases. We're not allowed to have a block
15041 // exit in these contexts.
15042 //
15043 // If we get here, then we're about to mark this block exit
15044 // as invalid. However, it could later _become_ valid if we
15045 // find a trailing while/until on the expression. In this
15046 // case instead of adding the error here, we'll add the
15047 // block exit to the list of exits for the expression, and
15048 // the node parsing will handle validating it instead.
15049 assert(parser->current_block_exits != NULL);
15050 pm_node_list_append(parser->current_block_exits, node);
15051 return;
15055 case PM_CONTEXT_BEGIN:
15056 case PM_CONTEXT_CASE_IN:
15061 case PM_CONTEXT_CLASS:
15063 case PM_CONTEXT_ELSE:
15064 case PM_CONTEXT_ELSIF:
15065 case PM_CONTEXT_EMBEXPR:
15067 case PM_CONTEXT_IF:
15071 case PM_CONTEXT_MODULE:
15073 case PM_CONTEXT_PARENS:
15076 case PM_CONTEXT_TERNARY:
15077 case PM_CONTEXT_UNLESS:
15078 // In these contexts we should continue walking up the list of
15079 // contexts.
15080 break;
15081 case PM_CONTEXT_NONE:
15082 // This case should never happen.
15083 assert(false && "unreachable");
15084 break;
15085 }
15086 }
15087}
15088
15093static pm_node_list_t *
15094push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15095 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15096 parser->current_block_exits = current_block_exits;
15097 return previous_block_exits;
15098}
15099
15105static void
15106flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15107 pm_node_t *block_exit;
15108 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15109 const char *type;
15110
15111 switch (PM_NODE_TYPE(block_exit)) {
15112 case PM_BREAK_NODE: type = "break"; break;
15113 case PM_NEXT_NODE: type = "next"; break;
15114 case PM_REDO_NODE: type = "redo"; break;
15115 default: assert(false && "unreachable"); type = ""; break;
15116 }
15117
15118 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15119 }
15120
15121 parser->current_block_exits = previous_block_exits;
15122}
15123
15128static void
15129pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15130 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15131 // If we matched a trailing while/until, then all of the block exits in
15132 // the contained list are valid. In this case we do not need to do
15133 // anything.
15134 parser->current_block_exits = previous_block_exits;
15135 } else if (previous_block_exits != NULL) {
15136 // If we did not matching a trailing while/until, then all of the block
15137 // exits contained in the list are invalid for this specific context.
15138 // However, they could still become valid in a higher level context if
15139 // there is another list above this one. In this case we'll push all of
15140 // the block exits up to the previous list.
15141 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15142 parser->current_block_exits = previous_block_exits;
15143 } else {
15144 // If we did not match a trailing while/until and this was the last
15145 // chance to do so, then all of the block exits in the list are invalid
15146 // and we need to add an error for each of them.
15147 flush_block_exits(parser, previous_block_exits);
15148 }
15149}
15150
15151static inline pm_node_t *
15152parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15153 context_push(parser, PM_CONTEXT_PREDICATE);
15154 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15155 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15156
15157 // Predicates are closed by a term, a "then", or a term and then a "then".
15158 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15159
15160 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15161 predicate_closed = true;
15162 *then_keyword = parser->previous;
15163 }
15164
15165 if (!predicate_closed) {
15166 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15167 }
15168
15169 context_pop(parser);
15170 return predicate;
15171}
15172
15173static inline pm_node_t *
15174parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15175 pm_node_list_t current_block_exits = { 0 };
15176 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15177
15178 pm_token_t keyword = parser->previous;
15179 pm_token_t then_keyword = not_provided(parser);
15180
15181 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15182 pm_statements_node_t *statements = NULL;
15183
15184 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15185 pm_accepts_block_stack_push(parser, true);
15186 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15187 pm_accepts_block_stack_pop(parser);
15188 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15189 }
15190
15191 pm_token_t end_keyword = not_provided(parser);
15192 pm_node_t *parent = NULL;
15193
15194 switch (context) {
15195 case PM_CONTEXT_IF:
15196 parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15197 break;
15198 case PM_CONTEXT_UNLESS:
15199 parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements));
15200 break;
15201 default:
15202 assert(false && "unreachable");
15203 break;
15204 }
15205
15206 pm_node_t *current = parent;
15207
15208 // Parse any number of elsif clauses. This will form a linked list of if
15209 // nodes pointing to each other from the top.
15210 if (context == PM_CONTEXT_IF) {
15211 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15212 if (parser_end_of_line_p(parser)) {
15213 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15214 }
15215
15216 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15217 pm_token_t elsif_keyword = parser->current;
15218 parser_lex(parser);
15219
15220 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15221 pm_accepts_block_stack_push(parser, true);
15222
15223 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15224 pm_accepts_block_stack_pop(parser);
15225 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15226
15227 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15228 ((pm_if_node_t *) current)->subsequent = elsif;
15229 current = elsif;
15230 }
15231 }
15232
15233 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15234 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15235 opening_newline_index = token_newline_index(parser);
15236
15237 parser_lex(parser);
15238 pm_token_t else_keyword = parser->previous;
15239
15240 pm_accepts_block_stack_push(parser, true);
15241 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15242 pm_accepts_block_stack_pop(parser);
15243
15244 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15245 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15246 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15247
15248 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15249
15250 switch (context) {
15251 case PM_CONTEXT_IF:
15252 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15253 break;
15254 case PM_CONTEXT_UNLESS:
15255 ((pm_unless_node_t *) parent)->else_clause = else_node;
15256 break;
15257 default:
15258 assert(false && "unreachable");
15259 break;
15260 }
15261 } else {
15262 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15263 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15264 }
15265
15266 // Set the appropriate end location for all of the nodes in the subtree.
15267 switch (context) {
15268 case PM_CONTEXT_IF: {
15269 pm_node_t *current = parent;
15270 bool recursing = true;
15271
15272 while (recursing) {
15273 switch (PM_NODE_TYPE(current)) {
15274 case PM_IF_NODE:
15275 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15276 current = ((pm_if_node_t *) current)->subsequent;
15277 recursing = current != NULL;
15278 break;
15279 case PM_ELSE_NODE:
15280 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15281 recursing = false;
15282 break;
15283 default: {
15284 recursing = false;
15285 break;
15286 }
15287 }
15288 }
15289 break;
15290 }
15291 case PM_CONTEXT_UNLESS:
15292 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15293 break;
15294 default:
15295 assert(false && "unreachable");
15296 break;
15297 }
15298
15299 pop_block_exits(parser, previous_block_exits);
15300 pm_node_list_free(&current_block_exits);
15301
15302 return parent;
15303}
15304
15309#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15310 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15311 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15312 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15313 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15314 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15315 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15316 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15317 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15318 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15319 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15320
15325#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15326 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15327 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15328 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15329 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15330 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15331 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15332 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15333
15339#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15340 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15341 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15342 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15343 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15344 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15345 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15346 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15347 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15348
15353#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15354 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15355 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15356 case PM_TOKEN_CLASS_VARIABLE
15357
15362#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15363 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15364 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15365 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15366
15367// Assert here that the flags are the same so that we can safely switch the type
15368// of the node without having to move the flags.
15369PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15370
15375static inline pm_node_flags_t
15376parse_unescaped_encoding(const pm_parser_t *parser) {
15377 if (parser->explicit_encoding != NULL) {
15379 // If the there's an explicit encoding and it's using a UTF-8 escape
15380 // sequence, then mark the string as UTF-8.
15381 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15382 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15383 // If there's a non-UTF-8 escape sequence being used, then the
15384 // string uses the source encoding, unless the source is marked as
15385 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15386 // order to keep the string valid.
15387 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15388 }
15389 }
15390 return 0;
15391}
15392
15397static pm_node_t *
15398parse_string_part(pm_parser_t *parser, uint16_t depth) {
15399 switch (parser->current.type) {
15400 // Here the lexer has returned to us plain string content. In this case
15401 // we'll create a string node that has no opening or closing and return that
15402 // as the part. These kinds of parts look like:
15403 //
15404 // "aaa #{bbb} #@ccc ddd"
15405 // ^^^^ ^ ^^^^
15406 case PM_TOKEN_STRING_CONTENT: {
15407 pm_token_t opening = not_provided(parser);
15408 pm_token_t closing = not_provided(parser);
15409
15410 pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
15411 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15412
15413 parser_lex(parser);
15414 return node;
15415 }
15416 // Here the lexer has returned the beginning of an embedded expression. In
15417 // that case we'll parse the inner statements and return that as the part.
15418 // These kinds of parts look like:
15419 //
15420 // "aaa #{bbb} #@ccc ddd"
15421 // ^^^^^^
15422 case PM_TOKEN_EMBEXPR_BEGIN: {
15423 // Ruby disallows seeing encoding around interpolation in strings,
15424 // even though it is known at parse time.
15425 parser->explicit_encoding = NULL;
15426
15427 pm_lex_state_t state = parser->lex_state;
15428 int brace_nesting = parser->brace_nesting;
15429
15430 parser->brace_nesting = 0;
15431 lex_state_set(parser, PM_LEX_STATE_BEG);
15432 parser_lex(parser);
15433
15434 pm_token_t opening = parser->previous;
15435 pm_statements_node_t *statements = NULL;
15436
15437 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
15438 pm_accepts_block_stack_push(parser, true);
15439 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15440 pm_accepts_block_stack_pop(parser);
15441 }
15442
15443 parser->brace_nesting = brace_nesting;
15444 lex_state_set(parser, state);
15445
15446 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15447 pm_token_t closing = parser->previous;
15448
15449 // If this set of embedded statements only contains a single
15450 // statement, then Ruby does not consider it as a possible statement
15451 // that could emit a line event.
15452 if (statements != NULL && statements->body.size == 1) {
15453 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15454 }
15455
15456 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing));
15457 }
15458
15459 // Here the lexer has returned the beginning of an embedded variable.
15460 // In that case we'll parse the variable and create an appropriate node
15461 // for it and then return that node. These kinds of parts look like:
15462 //
15463 // "aaa #{bbb} #@ccc ddd"
15464 // ^^^^^
15465 case PM_TOKEN_EMBVAR: {
15466 // Ruby disallows seeing encoding around interpolation in strings,
15467 // even though it is known at parse time.
15468 parser->explicit_encoding = NULL;
15469
15470 lex_state_set(parser, PM_LEX_STATE_BEG);
15471 parser_lex(parser);
15472
15473 pm_token_t operator = parser->previous;
15474 pm_node_t *variable;
15475
15476 switch (parser->current.type) {
15477 // In this case a back reference is being interpolated. We'll
15478 // create a global variable read node.
15479 case PM_TOKEN_BACK_REFERENCE:
15480 parser_lex(parser);
15481 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15482 break;
15483 // In this case an nth reference is being interpolated. We'll
15484 // create a global variable read node.
15485 case PM_TOKEN_NUMBERED_REFERENCE:
15486 parser_lex(parser);
15487 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15488 break;
15489 // In this case a global variable is being interpolated. We'll
15490 // create a global variable read node.
15491 case PM_TOKEN_GLOBAL_VARIABLE:
15492 parser_lex(parser);
15493 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15494 break;
15495 // In this case an instance variable is being interpolated.
15496 // We'll create an instance variable read node.
15497 case PM_TOKEN_INSTANCE_VARIABLE:
15498 parser_lex(parser);
15499 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15500 break;
15501 // In this case a class variable is being interpolated. We'll
15502 // create a class variable read node.
15503 case PM_TOKEN_CLASS_VARIABLE:
15504 parser_lex(parser);
15505 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15506 break;
15507 // We can hit here if we got an invalid token. In that case
15508 // we'll not attempt to lex this token and instead just return a
15509 // missing node.
15510 default:
15511 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15512 variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15513 break;
15514 }
15515
15516 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15517 }
15518 default:
15519 parser_lex(parser);
15520 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15521 return NULL;
15522 }
15523}
15524
15530static const uint8_t *
15531parse_operator_symbol_name(const pm_token_t *name) {
15532 switch (name->type) {
15533 case PM_TOKEN_TILDE:
15534 case PM_TOKEN_BANG:
15535 if (name->end[-1] == '@') return name->end - 1;
15537 default:
15538 return name->end;
15539 }
15540}
15541
15542static pm_node_t *
15543parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15544 pm_token_t closing = not_provided(parser);
15545 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
15546
15547 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15548
15549 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15550 parser_lex(parser);
15551
15552 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15553 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15554
15555 return UP(symbol);
15556}
15557
15563static pm_node_t *
15564parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15565 const pm_token_t opening = parser->previous;
15566
15567 if (lex_mode->mode != PM_LEX_STRING) {
15568 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15569
15570 switch (parser->current.type) {
15571 case PM_CASE_OPERATOR:
15572 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15573 case PM_TOKEN_IDENTIFIER:
15574 case PM_TOKEN_CONSTANT:
15575 case PM_TOKEN_INSTANCE_VARIABLE:
15576 case PM_TOKEN_METHOD_NAME:
15577 case PM_TOKEN_CLASS_VARIABLE:
15578 case PM_TOKEN_GLOBAL_VARIABLE:
15579 case PM_TOKEN_NUMBERED_REFERENCE:
15580 case PM_TOKEN_BACK_REFERENCE:
15581 case PM_CASE_KEYWORD:
15582 parser_lex(parser);
15583 break;
15584 default:
15585 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15586 break;
15587 }
15588
15589 pm_token_t closing = not_provided(parser);
15590 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15591
15592 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15593 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15594
15595 return UP(symbol);
15596 }
15597
15598 if (lex_mode->as.string.interpolation) {
15599 // If we have the end of the symbol, then we can return an empty symbol.
15600 if (match1(parser, PM_TOKEN_STRING_END)) {
15601 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15602 parser_lex(parser);
15603
15604 pm_token_t content = not_provided(parser);
15605 pm_token_t closing = parser->previous;
15606 return UP(pm_symbol_node_create(parser, &opening, &content, &closing));
15607 }
15608
15609 // Now we can parse the first part of the symbol.
15610 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15611
15612 // If we got a string part, then it's possible that we could transform
15613 // what looks like an interpolated symbol into a regular symbol.
15614 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15615 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15616 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15617
15618 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15619 }
15620
15621 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15622 if (part) pm_interpolated_symbol_node_append(symbol, part);
15623
15624 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15625 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15626 pm_interpolated_symbol_node_append(symbol, part);
15627 }
15628 }
15629
15630 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15631 if (match1(parser, PM_TOKEN_EOF)) {
15632 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15633 } else {
15634 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15635 }
15636
15637 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15638 return UP(symbol);
15639 }
15640
15641 pm_token_t content;
15642 pm_string_t unescaped;
15643
15644 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15645 content = parser->current;
15646 unescaped = parser->current_string;
15647 parser_lex(parser);
15648
15649 // If we have two string contents in a row, then the content of this
15650 // symbol is split because of heredoc contents. This looks like:
15651 //
15652 // <<A; :'a
15653 // A
15654 // b'
15655 //
15656 // In this case, the best way we have to represent this is as an
15657 // interpolated string node, so that's what we'll do here.
15658 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15659 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15660 pm_token_t bounds = not_provided(parser);
15661
15662 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped));
15663 pm_interpolated_symbol_node_append(symbol, part);
15664
15665 part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string));
15666 pm_interpolated_symbol_node_append(symbol, part);
15667
15668 if (next_state != PM_LEX_STATE_NONE) {
15669 lex_state_set(parser, next_state);
15670 }
15671
15672 parser_lex(parser);
15673 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15674
15675 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15676 return UP(symbol);
15677 }
15678 } else {
15679 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15680 pm_string_shared_init(&unescaped, content.start, content.end);
15681 }
15682
15683 if (next_state != PM_LEX_STATE_NONE) {
15684 lex_state_set(parser, next_state);
15685 }
15686
15687 if (match1(parser, PM_TOKEN_EOF)) {
15688 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15689 } else {
15690 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15691 }
15692
15693 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15694}
15695
15700static inline pm_node_t *
15701parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15702 switch (parser->current.type) {
15703 case PM_CASE_OPERATOR: {
15704 const pm_token_t opening = not_provided(parser);
15705 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
15706 }
15707 case PM_CASE_KEYWORD:
15708 case PM_TOKEN_CONSTANT:
15709 case PM_TOKEN_IDENTIFIER:
15710 case PM_TOKEN_METHOD_NAME: {
15711 parser_lex(parser);
15712
15713 pm_token_t opening = not_provided(parser);
15714 pm_token_t closing = not_provided(parser);
15715 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15716
15717 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15718 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15719
15720 return UP(symbol);
15721 }
15722 case PM_TOKEN_SYMBOL_BEGIN: {
15723 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15724 parser_lex(parser);
15725
15726 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15727 }
15728 default:
15729 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15730 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15731 }
15732}
15733
15740static inline pm_node_t *
15741parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
15742 switch (parser->current.type) {
15743 case PM_CASE_OPERATOR: {
15744 const pm_token_t opening = not_provided(parser);
15745 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15746 }
15747 case PM_CASE_KEYWORD:
15748 case PM_TOKEN_CONSTANT:
15749 case PM_TOKEN_IDENTIFIER:
15750 case PM_TOKEN_METHOD_NAME: {
15751 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15752 parser_lex(parser);
15753
15754 pm_token_t opening = not_provided(parser);
15755 pm_token_t closing = not_provided(parser);
15756 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15757
15758 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15759 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15760
15761 return UP(symbol);
15762 }
15763 case PM_TOKEN_SYMBOL_BEGIN: {
15764 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15765 parser_lex(parser);
15766
15767 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15768 }
15769 case PM_TOKEN_BACK_REFERENCE:
15770 parser_lex(parser);
15771 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
15772 case PM_TOKEN_NUMBERED_REFERENCE:
15773 parser_lex(parser);
15774 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15775 case PM_TOKEN_GLOBAL_VARIABLE:
15776 parser_lex(parser);
15777 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
15778 default:
15779 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15780 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15781 }
15782}
15783
15788static pm_node_t *
15789parse_variable(pm_parser_t *parser) {
15790 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15791 int depth;
15792 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
15793
15794 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15795 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
15796 }
15797
15798 pm_scope_t *current_scope = parser->current_scope;
15799 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15800 if (is_numbered_param) {
15801 // When you use a numbered parameter, it implies the existence of
15802 // all of the locals that exist before it. For example, referencing
15803 // _2 means that _1 must exist. Therefore here we loop through all
15804 // of the possibilities and add them into the constant pool.
15805 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15806 for (uint8_t number = 1; number <= maximum; number++) {
15807 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15808 }
15809
15810 if (!match1(parser, PM_TOKEN_EQUAL)) {
15811 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15812 }
15813
15814 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
15815 pm_node_list_append(&current_scope->implicit_parameters, node);
15816
15817 return node;
15818 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15819 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
15820 pm_node_list_append(&current_scope->implicit_parameters, node);
15821
15822 return node;
15823 }
15824 }
15825
15826 return NULL;
15827}
15828
15832static pm_node_t *
15833parse_variable_call(pm_parser_t *parser) {
15834 pm_node_flags_t flags = 0;
15835
15836 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15837 pm_node_t *node = parse_variable(parser);
15838 if (node != NULL) return node;
15839 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15840 }
15841
15842 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
15843 pm_node_flag_set(UP(node), flags);
15844
15845 return UP(node);
15846}
15847
15853static inline pm_token_t
15854parse_method_definition_name(pm_parser_t *parser) {
15855 switch (parser->current.type) {
15856 case PM_CASE_KEYWORD:
15857 case PM_TOKEN_CONSTANT:
15858 case PM_TOKEN_METHOD_NAME:
15859 parser_lex(parser);
15860 return parser->previous;
15861 case PM_TOKEN_IDENTIFIER:
15862 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
15863 parser_lex(parser);
15864 return parser->previous;
15865 case PM_CASE_OPERATOR:
15866 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15867 parser_lex(parser);
15868 return parser->previous;
15869 default:
15870 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
15871 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
15872 }
15873}
15874
15875static void
15876parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
15877 // Get a reference to the string struct that is being held by the string
15878 // node. This is the value we're going to actually manipulate.
15879 pm_string_ensure_owned(string);
15880
15881 // Now get the bounds of the existing string. We'll use this as a
15882 // destination to move bytes into. We'll also use it for bounds checking
15883 // since we don't require that these strings be null terminated.
15884 size_t dest_length = pm_string_length(string);
15885 const uint8_t *source_cursor = (uint8_t *) string->source;
15886 const uint8_t *source_end = source_cursor + dest_length;
15887
15888 // We're going to move bytes backward in the string when we get leading
15889 // whitespace, so we'll maintain a pointer to the current position in the
15890 // string that we're writing to.
15891 size_t trimmed_whitespace = 0;
15892
15893 // While we haven't reached the amount of common whitespace that we need to
15894 // trim and we haven't reached the end of the string, we'll keep trimming
15895 // whitespace. Trimming in this context means skipping over these bytes such
15896 // that they aren't copied into the new string.
15897 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15898 if (*source_cursor == '\t') {
15899 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15900 if (trimmed_whitespace > common_whitespace) break;
15901 } else {
15902 trimmed_whitespace++;
15903 }
15904
15905 source_cursor++;
15906 dest_length--;
15907 }
15908
15909 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
15910 string->length = dest_length;
15911}
15912
15916static void
15917parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
15918 // The next node should be dedented if it's the first node in the list or if
15919 // it follows a string node.
15920 bool dedent_next = true;
15921
15922 // Iterate over all nodes, and trim whitespace accordingly. We're going to
15923 // keep around two indices: a read and a write. If we end up trimming all of
15924 // the whitespace from a node, then we'll drop it from the list entirely.
15925 size_t write_index = 0;
15926
15927 pm_node_t *node;
15928 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
15929 // We're not manipulating child nodes that aren't strings. In this case
15930 // we'll skip past it and indicate that the subsequent node should not
15931 // be dedented.
15932 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15933 nodes->nodes[write_index++] = node;
15934 dedent_next = false;
15935 continue;
15936 }
15937
15938 pm_string_node_t *string_node = ((pm_string_node_t *) node);
15939 if (dedent_next) {
15940 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
15941 }
15942
15943 if (string_node->unescaped.length == 0) {
15944 pm_node_destroy(parser, node);
15945 } else {
15946 nodes->nodes[write_index++] = node;
15947 }
15948
15949 // We always dedent the next node if it follows a string node.
15950 dedent_next = true;
15951 }
15952
15953 nodes->size = write_index;
15954}
15955
15959static pm_token_t
15960parse_strings_empty_content(const uint8_t *location) {
15961 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15962}
15963
15967static inline pm_node_t *
15968parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
15969 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15970 bool concating = false;
15971
15972 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
15973 pm_node_t *node = NULL;
15974
15975 // Here we have found a string literal. We'll parse it and add it to
15976 // the list of strings.
15977 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
15978 assert(lex_mode->mode == PM_LEX_STRING);
15979 bool lex_interpolation = lex_mode->as.string.interpolation;
15980 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
15981
15982 pm_token_t opening = parser->current;
15983 parser_lex(parser);
15984
15985 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15986 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15987 // If we get here, then we have an end immediately after a
15988 // start. In that case we'll create an empty content token and
15989 // return an uninterpolated string.
15990 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15991 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15992
15993 pm_string_shared_init(&string->unescaped, content.start, content.end);
15994 node = UP(string);
15995 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15996 // If we get here, then we have an end of a label immediately
15997 // after a start. In that case we'll create an empty symbol
15998 // node.
15999 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16000 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16001
16002 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16003 node = UP(symbol);
16004
16005 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16006 } else if (!lex_interpolation) {
16007 // If we don't accept interpolation then we expect the string to
16008 // start with a single string content node.
16009 pm_string_t unescaped;
16010 pm_token_t content;
16011
16012 if (match1(parser, PM_TOKEN_EOF)) {
16013 unescaped = PM_STRING_EMPTY;
16014 content = not_provided(parser);
16015 } else {
16016 unescaped = parser->current_string;
16017 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16018 content = parser->previous;
16019 }
16020
16021 // It is unfortunately possible to have multiple string content
16022 // nodes in a row in the case that there's heredoc content in
16023 // the middle of the string, like this cursed example:
16024 //
16025 // <<-END+'b
16026 // a
16027 // END
16028 // c'+'d'
16029 //
16030 // In that case we need to switch to an interpolated string to
16031 // be able to contain all of the parts.
16032 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16033 pm_node_list_t parts = { 0 };
16034
16035 pm_token_t delimiters = not_provided(parser);
16036 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped));
16037 pm_node_list_append(&parts, part);
16038
16039 do {
16040 part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters));
16041 pm_node_list_append(&parts, part);
16042 parser_lex(parser);
16043 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16044
16045 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16046 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16047
16048 pm_node_list_free(&parts);
16049 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16050 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16051 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16052 } else if (match1(parser, PM_TOKEN_EOF)) {
16053 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16054 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16055 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16056 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16057 } else {
16058 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16059 parser->previous.start = parser->previous.end;
16060 parser->previous.type = PM_TOKEN_MISSING;
16061 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16062 }
16063 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16064 // In this case we've hit string content so we know the string
16065 // at least has something in it. We'll need to check if the
16066 // following token is the end (in which case we can return a
16067 // plain string) or if it's not then it has interpolation.
16068 pm_token_t content = parser->current;
16069 pm_string_t unescaped = parser->current_string;
16070 parser_lex(parser);
16071
16072 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16073 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16074 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16075
16076 // Kind of odd behavior, but basically if we have an
16077 // unterminated string and it ends in a newline, we back up one
16078 // character so that the error message is on the last line of
16079 // content in the string.
16080 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16081 const uint8_t *location = parser->previous.end;
16082 if (location > parser->start && location[-1] == '\n') location--;
16083 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16084
16085 parser->previous.start = parser->previous.end;
16086 parser->previous.type = PM_TOKEN_MISSING;
16087 }
16088 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16089 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16090 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16091 } else {
16092 // If we get here, then we have interpolation so we'll need
16093 // to create a string or symbol node with interpolation.
16094 pm_node_list_t parts = { 0 };
16095 pm_token_t string_opening = not_provided(parser);
16096 pm_token_t string_closing = not_provided(parser);
16097
16098 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped));
16099 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16100 pm_node_list_append(&parts, part);
16101
16102 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16103 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16104 pm_node_list_append(&parts, part);
16105 }
16106 }
16107
16108 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16109 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16110 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16111 } else if (match1(parser, PM_TOKEN_EOF)) {
16112 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16113 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16114 } else {
16115 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16116 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16117 }
16118
16119 pm_node_list_free(&parts);
16120 }
16121 } else {
16122 // If we get here, then the first part of the string is not plain
16123 // string content, in which case we need to parse the string as an
16124 // interpolated string.
16125 pm_node_list_t parts = { 0 };
16126 pm_node_t *part;
16127
16128 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16129 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16130 pm_node_list_append(&parts, part);
16131 }
16132 }
16133
16134 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16135 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16136 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16137 } else if (match1(parser, PM_TOKEN_EOF)) {
16138 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16139 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16140 } else {
16141 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16142 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16143 }
16144
16145 pm_node_list_free(&parts);
16146 }
16147
16148 if (current == NULL) {
16149 // If the node we just parsed is a symbol node, then we can't
16150 // concatenate it with anything else, so we can now return that
16151 // node.
16152 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16153 return node;
16154 }
16155
16156 // If we don't already have a node, then it's fine and we can just
16157 // set the result to be the node we just parsed.
16158 current = node;
16159 } else {
16160 // Otherwise we need to check the type of the node we just parsed.
16161 // If it cannot be concatenated with the previous node, then we'll
16162 // need to add a syntax error.
16163 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16164 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16165 }
16166
16167 // If we haven't already created our container for concatenation,
16168 // we'll do that now.
16169 if (!concating) {
16170 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16171 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16172 }
16173
16174 concating = true;
16175 pm_token_t bounds = not_provided(parser);
16176
16177 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16178 pm_interpolated_string_node_append(container, current);
16179 current = UP(container);
16180 }
16181
16182 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16183 }
16184 }
16185
16186 return current;
16187}
16188
16189#define PM_PARSE_PATTERN_SINGLE 0
16190#define PM_PARSE_PATTERN_TOP 1
16191#define PM_PARSE_PATTERN_MULTI 2
16192
16193static pm_node_t *
16194parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16195
16201static void
16202parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16203 // Skip this capture if it starts with an underscore.
16204 if (peek_at(parser, location->start) == '_') return;
16205
16206 if (pm_constant_id_list_includes(captures, capture)) {
16207 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16208 } else {
16209 pm_constant_id_list_append(captures, capture);
16210 }
16211}
16212
16216static pm_node_t *
16217parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16218 // Now, if there are any :: operators that follow, parse them as constant
16219 // path nodes.
16220 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16221 pm_token_t delimiter = parser->previous;
16222 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16223 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16224 }
16225
16226 // If there is a [ or ( that follows, then this is part of a larger pattern
16227 // expression. We'll parse the inner pattern here, then modify the returned
16228 // inner pattern with our constant path attached.
16229 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16230 return node;
16231 }
16232
16233 pm_token_t opening;
16234 pm_token_t closing;
16235 pm_node_t *inner = NULL;
16236
16237 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16238 opening = parser->previous;
16239 accept1(parser, PM_TOKEN_NEWLINE);
16240
16241 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16242 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16243 accept1(parser, PM_TOKEN_NEWLINE);
16244 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16245 }
16246
16247 closing = parser->previous;
16248 } else {
16249 parser_lex(parser);
16250 opening = parser->previous;
16251 accept1(parser, PM_TOKEN_NEWLINE);
16252
16253 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16254 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16255 accept1(parser, PM_TOKEN_NEWLINE);
16256 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16257 }
16258
16259 closing = parser->previous;
16260 }
16261
16262 if (!inner) {
16263 // If there was no inner pattern, then we have something like Foo() or
16264 // Foo[]. In that case we'll create an array pattern with no requireds.
16265 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16266 }
16267
16268 // Now that we have the inner pattern, check to see if it's an array, find,
16269 // or hash pattern. If it is, then we'll attach our constant path to it if
16270 // it doesn't already have a constant. If it's not one of those node types
16271 // or it does have a constant, then we'll create an array pattern.
16272 switch (PM_NODE_TYPE(inner)) {
16273 case PM_ARRAY_PATTERN_NODE: {
16274 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16275
16276 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16277 pattern_node->base.location.start = node->location.start;
16278 pattern_node->base.location.end = closing.end;
16279
16280 pattern_node->constant = node;
16281 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16282 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16283
16284 return UP(pattern_node);
16285 }
16286
16287 break;
16288 }
16289 case PM_FIND_PATTERN_NODE: {
16290 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16291
16292 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16293 pattern_node->base.location.start = node->location.start;
16294 pattern_node->base.location.end = closing.end;
16295
16296 pattern_node->constant = node;
16297 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16298 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16299
16300 return UP(pattern_node);
16301 }
16302
16303 break;
16304 }
16305 case PM_HASH_PATTERN_NODE: {
16306 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16307
16308 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16309 pattern_node->base.location.start = node->location.start;
16310 pattern_node->base.location.end = closing.end;
16311
16312 pattern_node->constant = node;
16313 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16314 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16315
16316 return UP(pattern_node);
16317 }
16318
16319 break;
16320 }
16321 default:
16322 break;
16323 }
16324
16325 // If we got here, then we didn't return one of the inner patterns by
16326 // attaching its constant. In this case we'll create an array pattern and
16327 // attach our constant to it.
16328 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16329 pm_array_pattern_node_requireds_append(pattern_node, inner);
16330 return UP(pattern_node);
16331}
16332
16336static pm_splat_node_t *
16337parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16338 assert(parser->previous.type == PM_TOKEN_USTAR);
16339 pm_token_t operator = parser->previous;
16340 pm_node_t *name = NULL;
16341
16342 // Rest patterns don't necessarily have a name associated with them. So we
16343 // will check for that here. If they do, then we'll add it to the local
16344 // table since this pattern will cause it to become a local variable.
16345 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16346 pm_token_t identifier = parser->previous;
16347 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16348
16349 int depth;
16350 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16351 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16352 }
16353
16354 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16355 name = UP(pm_local_variable_target_node_create(
16356 parser,
16357 &PM_LOCATION_TOKEN_VALUE(&identifier),
16358 constant_id,
16359 (uint32_t) (depth == -1 ? 0 : depth)
16360 ));
16361 }
16362
16363 // Finally we can return the created node.
16364 return pm_splat_node_create(parser, &operator, name);
16365}
16366
16370static pm_node_t *
16371parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16372 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16373 parser_lex(parser);
16374
16375 pm_token_t operator = parser->previous;
16376 pm_node_t *value = NULL;
16377
16378 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16379 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16380 }
16381
16382 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16383 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16384
16385 int depth;
16386 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16387 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16388 }
16389
16390 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16391 value = UP(pm_local_variable_target_node_create(
16392 parser,
16393 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16394 constant_id,
16395 (uint32_t) (depth == -1 ? 0 : depth)
16396 ));
16397 }
16398
16399 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16400}
16401
16406static bool
16407pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16408 ptrdiff_t length = end - start;
16409 if (length == 0) return false;
16410
16411 // First ensure that it starts with a valid identifier starting character.
16412 size_t width = char_is_identifier_start(parser, start, end - start);
16413 if (width == 0) return false;
16414
16415 // Next, ensure that it's not an uppercase character.
16416 if (parser->encoding_changed) {
16417 if (parser->encoding->isupper_char(start, length)) return false;
16418 } else {
16419 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16420 }
16421
16422 // Next, iterate through all of the bytes of the string to ensure that they
16423 // are all valid identifier characters.
16424 const uint8_t *cursor = start + width;
16425 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16426 return cursor == end;
16427}
16428
16433static pm_node_t *
16434parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16435 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16436
16437 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16438 int depth = -1;
16439
16440 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16441 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16442 } else {
16443 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16444
16445 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16446 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16447 }
16448 }
16449
16450 if (depth == -1) {
16451 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
16452 }
16453
16454 parse_pattern_capture(parser, captures, constant_id, value_loc);
16455 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16456 parser,
16457 value_loc,
16458 constant_id,
16459 (uint32_t) (depth == -1 ? 0 : depth)
16460 );
16461
16462 return UP(pm_implicit_node_create(parser, UP(target)));
16463}
16464
16469static void
16470parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16471 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16472 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16473 }
16474}
16475
16479static pm_hash_pattern_node_t *
16480parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16481 pm_node_list_t assocs = { 0 };
16482 pm_static_literals_t keys = { 0 };
16483 pm_node_t *rest = NULL;
16484
16485 switch (PM_NODE_TYPE(first_node)) {
16486 case PM_ASSOC_SPLAT_NODE:
16487 case PM_NO_KEYWORDS_PARAMETER_NODE:
16488 rest = first_node;
16489 break;
16490 case PM_SYMBOL_NODE: {
16491 if (pm_symbol_node_label_p(first_node)) {
16492 parse_pattern_hash_key(parser, &keys, first_node);
16493 pm_node_t *value;
16494
16495 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16496 // Otherwise, we will create an implicit local variable
16497 // target for the value.
16498 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16499 } else {
16500 // Here we have a value for the first assoc in the list, so
16501 // we will parse it now.
16502 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16503 }
16504
16505 pm_token_t operator = not_provided(parser);
16506 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16507
16508 pm_node_list_append(&assocs, assoc);
16509 break;
16510 }
16511 }
16513 default: {
16514 // If we get anything else, then this is an error. For this we'll
16515 // create a missing node for the value and create an assoc node for
16516 // the first node in the list.
16517 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16518 pm_parser_err_node(parser, first_node, diag_id);
16519
16520 pm_token_t operator = not_provided(parser);
16521 pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end));
16522 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16523
16524 pm_node_list_append(&assocs, assoc);
16525 break;
16526 }
16527 }
16528
16529 // If there are any other assocs, then we'll parse them now.
16530 while (accept1(parser, PM_TOKEN_COMMA)) {
16531 // Here we need to break to support trailing commas.
16532 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16533 // Trailing commas are not allowed to follow a rest pattern.
16534 if (rest != NULL) {
16535 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16536 }
16537
16538 break;
16539 }
16540
16541 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16542 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16543
16544 if (rest == NULL) {
16545 rest = assoc;
16546 } else {
16547 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16548 pm_node_list_append(&assocs, assoc);
16549 }
16550 } else {
16551 pm_node_t *key;
16552
16553 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16554 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16555
16556 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16557 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16558 } else if (!pm_symbol_node_label_p(key)) {
16559 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16560 }
16561 } else {
16562 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16563 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16564 }
16565
16566 parse_pattern_hash_key(parser, &keys, key);
16567 pm_node_t *value = NULL;
16568
16569 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16570 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16571 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16572 } else {
16573 value = UP(pm_missing_node_create(parser, key->location.end, key->location.end));
16574 }
16575 } else {
16576 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16577 }
16578
16579 pm_token_t operator = not_provided(parser);
16580 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value));
16581
16582 if (rest != NULL) {
16583 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16584 }
16585
16586 pm_node_list_append(&assocs, assoc);
16587 }
16588 }
16589
16590 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16591 xfree(assocs.nodes);
16592
16593 pm_static_literals_free(&keys);
16594 return node;
16595}
16596
16600static pm_node_t *
16601parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16602 switch (parser->current.type) {
16603 case PM_TOKEN_IDENTIFIER:
16604 case PM_TOKEN_METHOD_NAME: {
16605 parser_lex(parser);
16606 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16607
16608 int depth;
16609 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16610 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16611 }
16612
16613 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16614 return UP(pm_local_variable_target_node_create(
16615 parser,
16616 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16617 constant_id,
16618 (uint32_t) (depth == -1 ? 0 : depth)
16619 ));
16620 }
16621 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16622 pm_token_t opening = parser->current;
16623 parser_lex(parser);
16624
16625 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16626 // If we have an empty array pattern, then we'll just return a new
16627 // array pattern node.
16628 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16629 }
16630
16631 // Otherwise, we'll parse the inner pattern, then deal with it depending
16632 // on the type it returns.
16633 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16634
16635 accept1(parser, PM_TOKEN_NEWLINE);
16636 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16637 pm_token_t closing = parser->previous;
16638
16639 switch (PM_NODE_TYPE(inner)) {
16640 case PM_ARRAY_PATTERN_NODE: {
16641 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16642 if (pattern_node->opening_loc.start == NULL) {
16643 pattern_node->base.location.start = opening.start;
16644 pattern_node->base.location.end = closing.end;
16645
16646 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16647 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16648
16649 return UP(pattern_node);
16650 }
16651
16652 break;
16653 }
16654 case PM_FIND_PATTERN_NODE: {
16655 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16656 if (pattern_node->opening_loc.start == NULL) {
16657 pattern_node->base.location.start = opening.start;
16658 pattern_node->base.location.end = closing.end;
16659
16660 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16661 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16662
16663 return UP(pattern_node);
16664 }
16665
16666 break;
16667 }
16668 default:
16669 break;
16670 }
16671
16672 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16673 pm_array_pattern_node_requireds_append(node, inner);
16674 return UP(node);
16675 }
16676 case PM_TOKEN_BRACE_LEFT: {
16677 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16678 parser->pattern_matching_newlines = false;
16679
16680 pm_hash_pattern_node_t *node;
16681 pm_token_t opening = parser->current;
16682 parser_lex(parser);
16683
16684 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16685 // If we have an empty hash pattern, then we'll just return a new hash
16686 // pattern node.
16687 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16688 } else {
16689 pm_node_t *first_node;
16690
16691 switch (parser->current.type) {
16692 case PM_TOKEN_LABEL:
16693 parser_lex(parser);
16694 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16695 break;
16696 case PM_TOKEN_USTAR_STAR:
16697 first_node = parse_pattern_keyword_rest(parser, captures);
16698 break;
16699 case PM_TOKEN_STRING_BEGIN:
16700 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16701 break;
16702 default: {
16703 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16704 parser_lex(parser);
16705
16706 first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
16707 break;
16708 }
16709 }
16710
16711 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16712
16713 accept1(parser, PM_TOKEN_NEWLINE);
16714 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
16715 pm_token_t closing = parser->previous;
16716
16717 node->base.location.start = opening.start;
16718 node->base.location.end = closing.end;
16719
16720 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16721 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16722 }
16723
16724 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16725 return UP(node);
16726 }
16727 case PM_TOKEN_UDOT_DOT:
16728 case PM_TOKEN_UDOT_DOT_DOT: {
16729 pm_token_t operator = parser->current;
16730 parser_lex(parser);
16731
16732 // Since we have a unary range operator, we need to parse the subsequent
16733 // expression as the right side of the range.
16734 switch (parser->current.type) {
16735 case PM_CASE_PRIMITIVE: {
16736 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16737 return UP(pm_range_node_create(parser, NULL, &operator, right));
16738 }
16739 default: {
16740 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16741 pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end));
16742 return UP(pm_range_node_create(parser, NULL, &operator, right));
16743 }
16744 }
16745 }
16746 case PM_CASE_PRIMITIVE: {
16747 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
16748
16749 // If we found a label, we need to immediately return to the caller.
16750 if (pm_symbol_node_label_p(node)) return node;
16751
16752 // Call nodes (arithmetic operations) are not allowed in patterns
16753 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16754 pm_parser_err_node(parser, node, diag_id);
16755 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
16756
16757 pm_node_unreference(parser, node);
16758 pm_node_destroy(parser, node);
16759 return UP(missing_node);
16760 }
16761
16762 // Now that we have a primitive, we need to check if it's part of a range.
16763 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16764 pm_token_t operator = parser->previous;
16765
16766 // Now that we have the operator, we need to check if this is followed
16767 // by another expression. If it is, then we will create a full range
16768 // node. Otherwise, we'll create an endless range.
16769 switch (parser->current.type) {
16770 case PM_CASE_PRIMITIVE: {
16771 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16772 return UP(pm_range_node_create(parser, node, &operator, right));
16773 }
16774 default:
16775 return UP(pm_range_node_create(parser, node, &operator, NULL));
16776 }
16777 }
16778
16779 return node;
16780 }
16781 case PM_TOKEN_CARET: {
16782 parser_lex(parser);
16783 pm_token_t operator = parser->previous;
16784
16785 // At this point we have a pin operator. We need to check the subsequent
16786 // expression to determine if it's a variable or an expression.
16787 switch (parser->current.type) {
16788 case PM_TOKEN_IDENTIFIER: {
16789 parser_lex(parser);
16790 pm_node_t *variable = UP(parse_variable(parser));
16791
16792 if (variable == NULL) {
16793 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16794 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
16795 }
16796
16797 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16798 }
16799 case PM_TOKEN_INSTANCE_VARIABLE: {
16800 parser_lex(parser);
16801 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
16802
16803 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16804 }
16805 case PM_TOKEN_CLASS_VARIABLE: {
16806 parser_lex(parser);
16807 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
16808
16809 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16810 }
16811 case PM_TOKEN_GLOBAL_VARIABLE: {
16812 parser_lex(parser);
16813 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
16814
16815 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16816 }
16817 case PM_TOKEN_NUMBERED_REFERENCE: {
16818 parser_lex(parser);
16819 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16820
16821 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16822 }
16823 case PM_TOKEN_BACK_REFERENCE: {
16824 parser_lex(parser);
16825 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
16826
16827 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16828 }
16829 case PM_TOKEN_PARENTHESIS_LEFT: {
16830 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16831 parser->pattern_matching_newlines = false;
16832
16833 pm_token_t lparen = parser->current;
16834 parser_lex(parser);
16835
16836 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16837 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16838
16839 accept1(parser, PM_TOKEN_NEWLINE);
16840 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
16841 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
16842 }
16843 default: {
16844 // If we get here, then we have a pin operator followed by something
16845 // not understood. We'll create a missing node and return that.
16846 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16847 pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end));
16848 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16849 }
16850 }
16851 }
16852 case PM_TOKEN_UCOLON_COLON: {
16853 pm_token_t delimiter = parser->current;
16854 parser_lex(parser);
16855
16856 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16857 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16858
16859 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16860 }
16861 case PM_TOKEN_CONSTANT: {
16862 pm_token_t constant = parser->current;
16863 parser_lex(parser);
16864
16865 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16866 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16867 }
16868 default:
16869 pm_parser_err_current(parser, diag_id);
16870 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16871 }
16872}
16873
16874static bool
16875parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
16876 switch (PM_NODE_TYPE(node)) {
16877 case PM_LOCAL_VARIABLE_TARGET_NODE:
16878 pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16879 return false;
16880 default:
16881 return true;
16882 }
16883}
16884
16889static void
16890parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
16891 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16892}
16893
16898static pm_node_t *
16899parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
16900 pm_node_t *node = first_node;
16901 bool alternation = false;
16902
16903 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16904 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
16905 parse_pattern_alternation_error(parser, node);
16906 }
16907
16908 switch (parser->current.type) {
16909 case PM_TOKEN_IDENTIFIER:
16910 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16911 case PM_TOKEN_BRACE_LEFT:
16912 case PM_TOKEN_CARET:
16913 case PM_TOKEN_CONSTANT:
16914 case PM_TOKEN_UCOLON_COLON:
16915 case PM_TOKEN_UDOT_DOT:
16916 case PM_TOKEN_UDOT_DOT_DOT:
16917 case PM_CASE_PRIMITIVE: {
16918 if (!alternation) {
16919 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16920 } else {
16921 pm_token_t operator = parser->previous;
16922 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16923
16924 if (captures->size) parse_pattern_alternation_error(parser, right);
16925 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16926 }
16927
16928 break;
16929 }
16930 case PM_TOKEN_PARENTHESIS_LEFT:
16931 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16932 pm_token_t operator = parser->previous;
16933 pm_token_t opening = parser->current;
16934 parser_lex(parser);
16935
16936 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16937 accept1(parser, PM_TOKEN_NEWLINE);
16938 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16939 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
16940
16941 if (!alternation) {
16942 node = right;
16943 } else {
16944 if (captures->size) parse_pattern_alternation_error(parser, right);
16945 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16946 }
16947
16948 break;
16949 }
16950 default: {
16951 pm_parser_err_current(parser, diag_id);
16952 pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16953
16954 if (!alternation) {
16955 node = right;
16956 } else {
16957 if (captures->size) parse_pattern_alternation_error(parser, right);
16958 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
16959 }
16960
16961 break;
16962 }
16963 }
16964 }
16965
16966 // If we have an =>, then we are assigning this pattern to a variable.
16967 // In this case we should create an assignment node.
16968 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
16969 pm_token_t operator = parser->previous;
16970 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
16971
16972 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16973 int depth;
16974
16975 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16976 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16977 }
16978
16979 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16980 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16981 parser,
16982 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16983 constant_id,
16984 (uint32_t) (depth == -1 ? 0 : depth)
16985 );
16986
16987 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
16988 }
16989
16990 return node;
16991}
16992
16996static pm_node_t *
16997parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
16998 pm_node_t *node = NULL;
16999
17000 bool leading_rest = false;
17001 bool trailing_rest = false;
17002
17003 switch (parser->current.type) {
17004 case PM_TOKEN_LABEL: {
17005 parser_lex(parser);
17006 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
17007 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
17008
17009 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17010 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17011 }
17012
17013 return node;
17014 }
17015 case PM_TOKEN_USTAR_STAR: {
17016 node = parse_pattern_keyword_rest(parser, captures);
17017 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17018
17019 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17020 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17021 }
17022
17023 return node;
17024 }
17025 case PM_TOKEN_STRING_BEGIN: {
17026 // We need special handling for string beginnings because they could
17027 // be dynamic symbols leading to hash patterns.
17028 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17029
17030 if (pm_symbol_node_label_p(node)) {
17031 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17032
17033 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17034 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17035 }
17036
17037 return node;
17038 }
17039
17040 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17041 break;
17042 }
17043 case PM_TOKEN_USTAR: {
17044 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17045 parser_lex(parser);
17046 node = UP(parse_pattern_rest(parser, captures));
17047 leading_rest = true;
17048 break;
17049 }
17050 }
17052 default:
17053 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17054 break;
17055 }
17056
17057 // If we got a dynamic label symbol, then we need to treat it like the
17058 // beginning of a hash pattern.
17059 if (pm_symbol_node_label_p(node)) {
17060 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17061 }
17062
17063 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17064 // If we have a comma, then we are now parsing either an array pattern
17065 // or a find pattern. We need to parse all of the patterns, put them
17066 // into a big list, and then determine which type of node we have.
17067 pm_node_list_t nodes = { 0 };
17068 pm_node_list_append(&nodes, node);
17069
17070 // Gather up all of the patterns into the list.
17071 while (accept1(parser, PM_TOKEN_COMMA)) {
17072 // Break early here in case we have a trailing comma.
17073 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17074 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17075 pm_node_list_append(&nodes, node);
17076 trailing_rest = true;
17077 break;
17078 }
17079
17080 if (accept1(parser, PM_TOKEN_USTAR)) {
17081 node = UP(parse_pattern_rest(parser, captures));
17082
17083 // If we have already parsed a splat pattern, then this is an
17084 // error. We will continue to parse the rest of the patterns,
17085 // but we will indicate it as an error.
17086 if (trailing_rest) {
17087 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17088 }
17089
17090 trailing_rest = true;
17091 } else {
17092 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17093 }
17094
17095 pm_node_list_append(&nodes, node);
17096 }
17097
17098 // If the first pattern and the last pattern are rest patterns, then we
17099 // will call this a find pattern, regardless of how many rest patterns
17100 // are in between because we know we already added the appropriate
17101 // errors. Otherwise we will create an array pattern.
17102 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17103 node = UP(pm_find_pattern_node_create(parser, &nodes));
17104
17105 if (nodes.size == 2) {
17106 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17107 }
17108 } else {
17109 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17110
17111 if (leading_rest && trailing_rest) {
17112 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17113 }
17114 }
17115
17116 xfree(nodes.nodes);
17117 } else if (leading_rest) {
17118 // Otherwise, if we parsed a single splat pattern, then we know we have
17119 // an array pattern, so we can go ahead and create that node.
17120 node = UP(pm_array_pattern_node_rest_create(parser, node));
17121 }
17122
17123 return node;
17124}
17125
17131static inline void
17132parse_negative_numeric(pm_node_t *node) {
17133 switch (PM_NODE_TYPE(node)) {
17134 case PM_INTEGER_NODE: {
17135 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17136 cast->base.location.start--;
17137 cast->value.negative = true;
17138 break;
17139 }
17140 case PM_FLOAT_NODE: {
17141 pm_float_node_t *cast = (pm_float_node_t *) node;
17142 cast->base.location.start--;
17143 cast->value = -cast->value;
17144 break;
17145 }
17146 case PM_RATIONAL_NODE: {
17147 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17148 cast->base.location.start--;
17149 cast->numerator.negative = true;
17150 break;
17151 }
17152 case PM_IMAGINARY_NODE:
17153 node->location.start--;
17154 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17155 break;
17156 default:
17157 assert(false && "unreachable");
17158 break;
17159 }
17160}
17161
17167static void
17168pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17169 switch (diag_id) {
17170 case PM_ERR_HASH_KEY: {
17171 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17172 break;
17173 }
17174 case PM_ERR_HASH_VALUE:
17175 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17176 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17177 break;
17178 }
17179 case PM_ERR_UNARY_RECEIVER: {
17180 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17181 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17182 break;
17183 }
17184 case PM_ERR_UNARY_DISALLOWED:
17185 case PM_ERR_EXPECT_ARGUMENT: {
17186 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17187 break;
17188 }
17189 default:
17190 pm_parser_err_previous(parser, diag_id);
17191 break;
17192 }
17193}
17194
17198static void
17199parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17200#define CONTEXT_NONE 0
17201#define CONTEXT_THROUGH_ENSURE 1
17202#define CONTEXT_THROUGH_ELSE 2
17203
17204 pm_context_node_t *context_node = parser->current_context;
17205 int context = CONTEXT_NONE;
17206
17207 while (context_node != NULL) {
17208 switch (context_node->context) {
17216 case PM_CONTEXT_DEFINED:
17218 // These are the good cases. We're allowed to have a retry here.
17219 return;
17220 case PM_CONTEXT_CLASS:
17221 case PM_CONTEXT_DEF:
17223 case PM_CONTEXT_MAIN:
17224 case PM_CONTEXT_MODULE:
17225 case PM_CONTEXT_PREEXE:
17226 case PM_CONTEXT_SCLASS:
17227 // These are the bad cases. We're not allowed to have a retry in
17228 // these contexts.
17229 if (context == CONTEXT_NONE) {
17230 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17231 } else if (context == CONTEXT_THROUGH_ENSURE) {
17232 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17233 } else if (context == CONTEXT_THROUGH_ELSE) {
17234 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17235 }
17236 return;
17244 // These are also bad cases, but with a more specific error
17245 // message indicating the else.
17246 context = CONTEXT_THROUGH_ELSE;
17247 break;
17255 // These are also bad cases, but with a more specific error
17256 // message indicating the ensure.
17257 context = CONTEXT_THROUGH_ENSURE;
17258 break;
17259 case PM_CONTEXT_NONE:
17260 // This case should never happen.
17261 assert(false && "unreachable");
17262 break;
17263 case PM_CONTEXT_BEGIN:
17267 case PM_CONTEXT_CASE_IN:
17270 case PM_CONTEXT_ELSE:
17271 case PM_CONTEXT_ELSIF:
17272 case PM_CONTEXT_EMBEXPR:
17274 case PM_CONTEXT_FOR:
17275 case PM_CONTEXT_IF:
17280 case PM_CONTEXT_PARENS:
17281 case PM_CONTEXT_POSTEXE:
17283 case PM_CONTEXT_TERNARY:
17284 case PM_CONTEXT_UNLESS:
17285 case PM_CONTEXT_UNTIL:
17286 case PM_CONTEXT_WHILE:
17287 // In these contexts we should continue walking up the list of
17288 // contexts.
17289 break;
17290 }
17291
17292 context_node = context_node->prev;
17293 }
17294
17295#undef CONTEXT_NONE
17296#undef CONTEXT_ENSURE
17297#undef CONTEXT_ELSE
17298}
17299
17303static void
17304parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17305 pm_context_node_t *context_node = parser->current_context;
17306
17307 while (context_node != NULL) {
17308 switch (context_node->context) {
17309 case PM_CONTEXT_DEF:
17311 case PM_CONTEXT_DEFINED:
17315 // These are the good cases. We're allowed to have a block exit
17316 // in these contexts.
17317 return;
17318 case PM_CONTEXT_CLASS:
17322 case PM_CONTEXT_MAIN:
17323 case PM_CONTEXT_MODULE:
17327 case PM_CONTEXT_SCLASS:
17331 // These are the bad cases. We're not allowed to have a retry in
17332 // these contexts.
17333 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17334 return;
17335 case PM_CONTEXT_NONE:
17336 // This case should never happen.
17337 assert(false && "unreachable");
17338 break;
17339 case PM_CONTEXT_BEGIN:
17349 case PM_CONTEXT_CASE_IN:
17352 case PM_CONTEXT_ELSE:
17353 case PM_CONTEXT_ELSIF:
17354 case PM_CONTEXT_EMBEXPR:
17356 case PM_CONTEXT_FOR:
17357 case PM_CONTEXT_IF:
17365 case PM_CONTEXT_PARENS:
17366 case PM_CONTEXT_POSTEXE:
17368 case PM_CONTEXT_PREEXE:
17370 case PM_CONTEXT_TERNARY:
17371 case PM_CONTEXT_UNLESS:
17372 case PM_CONTEXT_UNTIL:
17373 case PM_CONTEXT_WHILE:
17374 // In these contexts we should continue walking up the list of
17375 // contexts.
17376 break;
17377 }
17378
17379 context_node = context_node->prev;
17380 }
17381}
17382
17387typedef struct {
17390
17392 const uint8_t *start;
17393
17395 const uint8_t *end;
17396
17405
17410static void
17411parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17413 pm_location_t location;
17414
17415 if (callback_data->shared) {
17416 location = (pm_location_t) { .start = start, .end = end };
17417 } else {
17418 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17419 }
17420
17421 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17422}
17423
17427static void
17428parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17429 const pm_string_t *unescaped = &node->unescaped;
17431 .parser = parser,
17432 .start = node->base.location.start,
17433 .end = node->base.location.end,
17434 .shared = unescaped->type == PM_STRING_SHARED
17435 };
17436
17437 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17438}
17439
17443static inline pm_node_t *
17444parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17445 switch (parser->current.type) {
17446 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17447 parser_lex(parser);
17448
17449 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17450 pm_accepts_block_stack_push(parser, true);
17451 bool parsed_bare_hash = false;
17452
17453 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17454 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17455
17456 // Handle the case where we don't have a comma and we have a
17457 // newline followed by a right bracket.
17458 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17459 break;
17460 }
17461
17462 // Ensure that we have a comma between elements in the array.
17463 if (array->elements.size > 0) {
17464 if (accept1(parser, PM_TOKEN_COMMA)) {
17465 // If there was a comma but we also accepts a newline,
17466 // then this is a syntax error.
17467 if (accepted_newline) {
17468 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17469 }
17470 } else {
17471 // If there was no comma, then we need to add a syntax
17472 // error.
17473 const uint8_t *location = parser->previous.end;
17474 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17475
17476 parser->previous.start = location;
17477 parser->previous.type = PM_TOKEN_MISSING;
17478 }
17479 }
17480
17481 // If we have a right bracket immediately following a comma,
17482 // this is allowed since it's a trailing comma. In this case we
17483 // can break out of the loop.
17484 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
17485
17486 pm_node_t *element;
17487
17488 if (accept1(parser, PM_TOKEN_USTAR)) {
17489 pm_token_t operator = parser->previous;
17490 pm_node_t *expression = NULL;
17491
17492 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17493 pm_parser_scope_forwarding_positionals_check(parser, &operator);
17494 } else {
17495 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17496 }
17497
17498 element = UP(pm_splat_node_create(parser, &operator, expression));
17499 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17500 if (parsed_bare_hash) {
17501 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17502 }
17503
17504 element = UP(pm_keyword_hash_node_create(parser));
17505 pm_static_literals_t hash_keys = { 0 };
17506
17507 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17508 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17509 }
17510
17511 pm_static_literals_free(&hash_keys);
17512 parsed_bare_hash = true;
17513 } else {
17514 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17515
17516 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17517 if (parsed_bare_hash) {
17518 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17519 }
17520
17521 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
17522 pm_static_literals_t hash_keys = { 0 };
17523 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17524
17525 pm_token_t operator;
17526 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
17527 operator = parser->previous;
17528 } else {
17529 operator = not_provided(parser);
17530 }
17531
17532 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17533 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value));
17534 pm_keyword_hash_node_elements_append(hash, assoc);
17535
17536 element = UP(hash);
17537 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17538 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17539 }
17540
17541 pm_static_literals_free(&hash_keys);
17542 parsed_bare_hash = true;
17543 }
17544 }
17545
17546 pm_array_node_elements_append(array, element);
17547 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
17548 }
17549
17550 accept1(parser, PM_TOKEN_NEWLINE);
17551
17552 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17553 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
17554 parser->previous.start = parser->previous.end;
17555 parser->previous.type = PM_TOKEN_MISSING;
17556 }
17557
17558 pm_array_node_close_set(array, &parser->previous);
17559 pm_accepts_block_stack_pop(parser);
17560
17561 return UP(array);
17562 }
17563 case PM_TOKEN_PARENTHESIS_LEFT:
17564 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17565 pm_token_t opening = parser->current;
17566 pm_node_flags_t flags = 0;
17567
17568 pm_node_list_t current_block_exits = { 0 };
17569 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17570
17571 parser_lex(parser);
17572 while (true) {
17573 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17574 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17575 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17576 break;
17577 }
17578 }
17579
17580 // If this is the end of the file or we match a right parenthesis, then
17581 // we have an empty parentheses node, and we can immediately return.
17582 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17583 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17584
17585 pop_block_exits(parser, previous_block_exits);
17586 pm_node_list_free(&current_block_exits);
17587
17588 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
17589 }
17590
17591 // Otherwise, we're going to parse the first statement in the list
17592 // of statements within the parentheses.
17593 pm_accepts_block_stack_push(parser, true);
17594 context_push(parser, PM_CONTEXT_PARENS);
17595 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17596 context_pop(parser);
17597
17598 // Determine if this statement is followed by a terminator. In the
17599 // case of a single statement, this is fine. But in the case of
17600 // multiple statements it's required.
17601 bool terminator_found = false;
17602
17603 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17604 terminator_found = true;
17605 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17606 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17607 terminator_found = true;
17608 }
17609
17610 if (terminator_found) {
17611 while (true) {
17612 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17613 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17614 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17615 break;
17616 }
17617 }
17618 }
17619
17620 // If we hit a right parenthesis, then we're done parsing the
17621 // parentheses node, and we can check which kind of node we should
17622 // return.
17623 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17624 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17625 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17626 }
17627
17628 parser_lex(parser);
17629 pm_accepts_block_stack_pop(parser);
17630
17631 pop_block_exits(parser, previous_block_exits);
17632 pm_node_list_free(&current_block_exits);
17633
17634 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17635 // If we have a single statement and are ending on a right
17636 // parenthesis, then we need to check if this is possibly a
17637 // multiple target node.
17638 pm_multi_target_node_t *multi_target;
17639
17640 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
17641 multi_target = (pm_multi_target_node_t *) statement;
17642 } else {
17643 multi_target = pm_multi_target_node_create(parser);
17644 pm_multi_target_node_targets_append(parser, multi_target, statement);
17645 }
17646
17647 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17648 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
17649
17650 multi_target->lparen_loc = lparen_loc;
17651 multi_target->rparen_loc = rparen_loc;
17652 multi_target->base.location.start = lparen_loc.start;
17653 multi_target->base.location.end = rparen_loc.end;
17654
17655 pm_node_t *result;
17656 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17657 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17658 accept1(parser, PM_TOKEN_NEWLINE);
17659 } else {
17660 result = UP(multi_target);
17661 }
17662
17663 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
17664 // All set, this is explicitly allowed by the parent
17665 // context.
17666 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
17667 // All set, we're inside a for loop and we're parsing
17668 // multiple targets.
17669 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17670 // Multi targets are not allowed when it's not a
17671 // statement level.
17672 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17673 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17674 // Multi targets must be followed by an equal sign in
17675 // order to be valid (or a right parenthesis if they are
17676 // nested).
17677 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17678 }
17679
17680 return result;
17681 }
17682
17683 // If we have a single statement and are ending on a right parenthesis
17684 // and we didn't return a multiple assignment node, then we can return a
17685 // regular parentheses node now.
17686 pm_statements_node_t *statements = pm_statements_node_create(parser);
17687 pm_statements_node_body_append(parser, statements, statement, true);
17688
17689 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17690 }
17691
17692 // If we have more than one statement in the set of parentheses,
17693 // then we are going to parse all of them as a list of statements.
17694 // We'll do that here.
17695 context_push(parser, PM_CONTEXT_PARENS);
17696 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17697
17698 pm_statements_node_t *statements = pm_statements_node_create(parser);
17699 pm_statements_node_body_append(parser, statements, statement, true);
17700
17701 // If we didn't find a terminator and we didn't find a right
17702 // parenthesis, then this is a syntax error.
17703 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17704 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17705 }
17706
17707 // Parse each statement within the parentheses.
17708 while (true) {
17709 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17710 pm_statements_node_body_append(parser, statements, node, true);
17711
17712 // If we're recovering from a syntax error, then we need to stop
17713 // parsing the statements now.
17714 if (parser->recovering) {
17715 // If this is the level of context where the recovery has
17716 // happened, then we can mark the parser as done recovering.
17717 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
17718 break;
17719 }
17720
17721 // If we couldn't parse an expression at all, then we need to
17722 // bail out of the loop.
17723 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
17724
17725 // If we successfully parsed a statement, then we are going to
17726 // need terminator to delimit them.
17727 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17728 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17729 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17730 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17731 break;
17732 } else if (!match1(parser, PM_TOKEN_EOF)) {
17733 // If we're at the end of the file, then we're going to add
17734 // an error after this for the ) anyway.
17735 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17736 }
17737 }
17738
17739 context_pop(parser);
17740 pm_accepts_block_stack_pop(parser);
17741 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17742
17743 // When we're parsing multi targets, we allow them to be followed by
17744 // a right parenthesis if they are at the statement level. This is
17745 // only possible if they are the final statement in a parentheses.
17746 // We need to explicitly reject that here.
17747 {
17748 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
17749
17750 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17751 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
17752 pm_multi_target_node_targets_append(parser, multi_target, statement);
17753
17754 statement = UP(multi_target);
17755 statements->body.nodes[statements->body.size - 1] = statement;
17756 }
17757
17758 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17759 const uint8_t *offset = statement->location.end;
17760 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17761 pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset));
17762
17763 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
17764 statements->body.nodes[statements->body.size - 1] = statement;
17765
17766 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17767 }
17768 }
17769
17770 pop_block_exits(parser, previous_block_exits);
17771 pm_node_list_free(&current_block_exits);
17772
17773 pm_void_statements_check(parser, statements, true);
17774 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17775 }
17776 case PM_TOKEN_BRACE_LEFT: {
17777 // If we were passed a current_hash_keys via the parser, then that
17778 // means we're already parsing a hash and we want to share the set
17779 // of hash keys with this inner hash we're about to parse for the
17780 // sake of warnings. We'll set it to NULL after we grab it to make
17781 // sure subsequent expressions don't use it. Effectively this is a
17782 // way of getting around passing it to every call to
17783 // parse_expression.
17784 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17785 parser->current_hash_keys = NULL;
17786
17787 pm_accepts_block_stack_push(parser, true);
17788 parser_lex(parser);
17789
17790 pm_token_t opening = parser->previous;
17791 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
17792
17793 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17794 if (current_hash_keys != NULL) {
17795 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17796 } else {
17797 pm_static_literals_t hash_keys = { 0 };
17798 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17799 pm_static_literals_free(&hash_keys);
17800 }
17801
17802 accept1(parser, PM_TOKEN_NEWLINE);
17803 }
17804
17805 pm_accepts_block_stack_pop(parser);
17806 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
17807 pm_hash_node_closing_loc_set(node, &parser->previous);
17808
17809 return UP(node);
17810 }
17811 case PM_TOKEN_CHARACTER_LITERAL: {
17812 pm_token_t closing = not_provided(parser);
17813 pm_node_t *node = UP(pm_string_node_create_current_string(
17814 parser,
17815 &(pm_token_t) {
17816 .type = PM_TOKEN_STRING_BEGIN,
17817 .start = parser->current.start,
17818 .end = parser->current.start + 1
17819 },
17820 &(pm_token_t) {
17821 .type = PM_TOKEN_STRING_CONTENT,
17822 .start = parser->current.start + 1,
17823 .end = parser->current.end
17824 },
17825 &closing
17826 ));
17827
17828 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17829
17830 // Skip past the character literal here, since now we have handled
17831 // parser->explicit_encoding correctly.
17832 parser_lex(parser);
17833
17834 // Characters can be followed by strings in which case they are
17835 // automatically concatenated.
17836 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17837 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
17838 }
17839
17840 return node;
17841 }
17842 case PM_TOKEN_CLASS_VARIABLE: {
17843 parser_lex(parser);
17844 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17845
17846 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17847 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17848 }
17849
17850 return node;
17851 }
17852 case PM_TOKEN_CONSTANT: {
17853 parser_lex(parser);
17854 pm_token_t constant = parser->previous;
17855
17856 // If a constant is immediately followed by parentheses, then this is in
17857 // fact a method call, not a constant read.
17858 if (
17859 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17860 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17861 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17862 match1(parser, PM_TOKEN_BRACE_LEFT)
17863 ) {
17864 pm_arguments_t arguments = { 0 };
17865 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17866 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17867 }
17868
17869 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
17870
17871 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17872 // If we get here, then we have a comma immediately following a
17873 // constant, so we're going to parse this as a multiple assignment.
17874 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17875 }
17876
17877 return node;
17878 }
17879 case PM_TOKEN_UCOLON_COLON: {
17880 parser_lex(parser);
17881 pm_token_t delimiter = parser->previous;
17882
17883 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17884 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
17885
17886 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17887 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17888 }
17889
17890 return node;
17891 }
17892 case PM_TOKEN_UDOT_DOT:
17893 case PM_TOKEN_UDOT_DOT_DOT: {
17894 pm_token_t operator = parser->current;
17895 parser_lex(parser);
17896
17897 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17898
17899 // Unary .. and ... are special because these are non-associative
17900 // operators that can also be unary operators. In this case we need
17901 // to explicitly reject code that has a .. or ... that follows this
17902 // expression.
17903 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17904 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17905 }
17906
17907 return UP(pm_range_node_create(parser, NULL, &operator, right));
17908 }
17909 case PM_TOKEN_FLOAT:
17910 parser_lex(parser);
17911 return UP(pm_float_node_create(parser, &parser->previous));
17912 case PM_TOKEN_FLOAT_IMAGINARY:
17913 parser_lex(parser);
17914 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
17915 case PM_TOKEN_FLOAT_RATIONAL:
17916 parser_lex(parser);
17917 return UP(pm_float_node_rational_create(parser, &parser->previous));
17918 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17919 parser_lex(parser);
17920 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
17921 case PM_TOKEN_NUMBERED_REFERENCE: {
17922 parser_lex(parser);
17923 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17924
17925 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17926 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17927 }
17928
17929 return node;
17930 }
17931 case PM_TOKEN_GLOBAL_VARIABLE: {
17932 parser_lex(parser);
17933 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17934
17935 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17936 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17937 }
17938
17939 return node;
17940 }
17941 case PM_TOKEN_BACK_REFERENCE: {
17942 parser_lex(parser);
17943 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17944
17945 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17946 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17947 }
17948
17949 return node;
17950 }
17951 case PM_TOKEN_IDENTIFIER:
17952 case PM_TOKEN_METHOD_NAME: {
17953 parser_lex(parser);
17954 pm_token_t identifier = parser->previous;
17955 pm_node_t *node = parse_variable_call(parser);
17956
17957 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17958 // If parse_variable_call returned with a call node, then we
17959 // know the identifier is not in the local table. In that case
17960 // we need to check if there are arguments following the
17961 // identifier.
17962 pm_call_node_t *call = (pm_call_node_t *) node;
17963 pm_arguments_t arguments = { 0 };
17964
17965 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
17966 // Since we found arguments, we need to turn off the
17967 // variable call bit in the flags.
17968 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
17969
17970 call->opening_loc = arguments.opening_loc;
17971 call->arguments = arguments.arguments;
17972 call->closing_loc = arguments.closing_loc;
17973 call->block = arguments.block;
17974
17975 const uint8_t *end = pm_arguments_end(&arguments);
17976 if (!end) {
17977 end = call->message_loc.end;
17978 }
17979 call->base.location.end = end;
17980 }
17981 } else {
17982 // Otherwise, we know the identifier is in the local table. This
17983 // can still be a method call if it is followed by arguments or
17984 // a block, so we need to check for that here.
17985 if (
17986 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17987 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17988 match1(parser, PM_TOKEN_BRACE_LEFT)
17989 ) {
17990 pm_arguments_t arguments = { 0 };
17991 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17992 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17993
17994 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17995 // If we're about to convert an 'it' implicit local
17996 // variable read into a method call, we need to remove
17997 // it from the list of implicit local variables.
17998 pm_node_unreference(parser, node);
17999 } else {
18000 // Otherwise, we're about to convert a regular local
18001 // variable read into a method call, in which case we
18002 // need to indicate that this was not a read for the
18003 // purposes of warnings.
18004 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
18005
18006 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18007 pm_node_unreference(parser, node);
18008 } else {
18009 pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
18010 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18011 }
18012 }
18013
18014 pm_node_destroy(parser, node);
18015 return UP(fcall);
18016 }
18017 }
18018
18019 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18020 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18021 }
18022
18023 return node;
18024 }
18025 case PM_TOKEN_HEREDOC_START: {
18026 // Here we have found a heredoc. We'll parse it and add it to the
18027 // list of strings.
18028 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18029 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18030
18031 size_t common_whitespace = (size_t) -1;
18032 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18033
18034 parser_lex(parser);
18035 pm_token_t opening = parser->previous;
18036
18037 pm_node_t *node;
18038 pm_node_t *part;
18039
18040 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18041 // If we get here, then we have an empty heredoc. We'll create
18042 // an empty content token and return an empty string node.
18043 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18044 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18045
18046 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18047 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18048 } else {
18049 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18050 }
18051
18052 node->location.end = opening.end;
18053 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18054 // If we get here, then we tried to find something in the
18055 // heredoc but couldn't actually parse anything, so we'll just
18056 // return a missing node.
18057 //
18058 // parse_string_part handles its own errors, so there is no need
18059 // for us to add one here.
18060 node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
18061 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18062 // If we get here, then the part that we parsed was plain string
18063 // content and we're at the end of the heredoc, so we can return
18064 // just a string node with the heredoc opening and closing as
18065 // its opening and closing.
18066 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18067 pm_string_node_t *cast = (pm_string_node_t *) part;
18068
18069 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18070 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18071 cast->base.location = cast->opening_loc;
18072
18073 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18074 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18075 cast->base.type = PM_X_STRING_NODE;
18076 }
18077
18078 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18079 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18080 }
18081
18082 node = UP(cast);
18083 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18084 } else {
18085 // If we get here, then we have multiple parts in the heredoc,
18086 // so we'll need to create an interpolated string node to hold
18087 // them all.
18088 pm_node_list_t parts = { 0 };
18089 pm_node_list_append(&parts, part);
18090
18091 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18092 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18093 pm_node_list_append(&parts, part);
18094 }
18095 }
18096
18097 // Now that we have all of the parts, create the correct type of
18098 // interpolated node.
18099 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18100 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18101 cast->parts = parts;
18102
18103 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18104 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18105
18106 cast->base.location = cast->opening_loc;
18107 node = UP(cast);
18108 } else {
18109 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18110 pm_node_list_free(&parts);
18111
18112 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18113 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18114
18115 cast->base.location = cast->opening_loc;
18116 node = UP(cast);
18117 }
18118
18119 // If this is a heredoc that is indented with a ~, then we need
18120 // to dedent each line by the common leading whitespace.
18121 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18122 pm_node_list_t *nodes;
18123 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18124 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18125 } else {
18126 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18127 }
18128
18129 parse_heredoc_dedent(parser, nodes, common_whitespace);
18130 }
18131 }
18132
18133 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18134 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18135 }
18136
18137 return node;
18138 }
18139 case PM_TOKEN_INSTANCE_VARIABLE: {
18140 parser_lex(parser);
18141 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
18142
18143 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18144 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18145 }
18146
18147 return node;
18148 }
18149 case PM_TOKEN_INTEGER: {
18150 pm_node_flags_t base = parser->integer_base;
18151 parser_lex(parser);
18152 return UP(pm_integer_node_create(parser, base, &parser->previous));
18153 }
18154 case PM_TOKEN_INTEGER_IMAGINARY: {
18155 pm_node_flags_t base = parser->integer_base;
18156 parser_lex(parser);
18157 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
18158 }
18159 case PM_TOKEN_INTEGER_RATIONAL: {
18160 pm_node_flags_t base = parser->integer_base;
18161 parser_lex(parser);
18162 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
18163 }
18164 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18165 pm_node_flags_t base = parser->integer_base;
18166 parser_lex(parser);
18167 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
18168 }
18169 case PM_TOKEN_KEYWORD___ENCODING__:
18170 parser_lex(parser);
18171 return UP(pm_source_encoding_node_create(parser, &parser->previous));
18172 case PM_TOKEN_KEYWORD___FILE__:
18173 parser_lex(parser);
18174 return UP(pm_source_file_node_create(parser, &parser->previous));
18175 case PM_TOKEN_KEYWORD___LINE__:
18176 parser_lex(parser);
18177 return UP(pm_source_line_node_create(parser, &parser->previous));
18178 case PM_TOKEN_KEYWORD_ALIAS: {
18179 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18180 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18181 }
18182
18183 parser_lex(parser);
18184 pm_token_t keyword = parser->previous;
18185
18186 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18187 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18188
18189 switch (PM_NODE_TYPE(new_name)) {
18190 case PM_BACK_REFERENCE_READ_NODE:
18191 case PM_NUMBERED_REFERENCE_READ_NODE:
18192 case PM_GLOBAL_VARIABLE_READ_NODE: {
18193 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18194 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18195 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18196 }
18197 } else {
18198 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18199 }
18200
18201 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18202 }
18203 case PM_SYMBOL_NODE:
18204 case PM_INTERPOLATED_SYMBOL_NODE: {
18205 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18206 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18207 }
18208 }
18210 default:
18211 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18212 }
18213 }
18214 case PM_TOKEN_KEYWORD_CASE: {
18215 size_t opening_newline_index = token_newline_index(parser);
18216 parser_lex(parser);
18217
18218 pm_token_t case_keyword = parser->previous;
18219 pm_node_t *predicate = NULL;
18220
18221 pm_node_list_t current_block_exits = { 0 };
18222 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18223
18224 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18225 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18226 predicate = NULL;
18227 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18228 predicate = NULL;
18229 } else if (!token_begins_expression_p(parser->current.type)) {
18230 predicate = NULL;
18231 } else {
18232 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18233 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18234 }
18235
18236 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18237 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18238 parser_lex(parser);
18239
18240 pop_block_exits(parser, previous_block_exits);
18241 pm_node_list_free(&current_block_exits);
18242
18243 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18244 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
18245 }
18246
18247 // At this point we can create a case node, though we don't yet know
18248 // if it is a case-in or case-when node.
18249 pm_token_t end_keyword = not_provided(parser);
18250 pm_node_t *node;
18251
18252 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18253 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18254 pm_static_literals_t literals = { 0 };
18255
18256 // At this point we've seen a when keyword, so we know this is a
18257 // case-when node. We will continue to parse the when nodes
18258 // until we hit the end of the list.
18259 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18260 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18261 parser_lex(parser);
18262
18263 pm_token_t when_keyword = parser->previous;
18264 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18265
18266 do {
18267 if (accept1(parser, PM_TOKEN_USTAR)) {
18268 pm_token_t operator = parser->previous;
18269 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18270
18271 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18272 pm_when_node_conditions_append(when_node, UP(splat_node));
18273
18274 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18275 } else {
18276 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18277 pm_when_node_conditions_append(when_node, condition);
18278
18279 // If we found a missing node, then this is a syntax
18280 // error and we should stop looping.
18281 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18282
18283 // If this is a string node, then we need to mark it
18284 // as frozen because when clause strings are frozen.
18285 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18286 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18287 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18288 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18289 }
18290
18291 pm_when_clause_static_literals_add(parser, &literals, condition);
18292 }
18293 } while (accept1(parser, PM_TOKEN_COMMA));
18294
18295 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18296 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18297 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18298 }
18299 } else {
18300 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18301 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18302 }
18303
18304 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18305 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18306 if (statements != NULL) {
18307 pm_when_node_statements_set(when_node, statements);
18308 }
18309 }
18310
18311 pm_case_node_condition_append(case_node, UP(when_node));
18312 }
18313
18314 // If we didn't parse any conditions (in or when) then we need
18315 // to indicate that we have an error.
18316 if (case_node->conditions.size == 0) {
18317 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18318 }
18319
18320 pm_static_literals_free(&literals);
18321 node = UP(case_node);
18322 } else {
18323 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18324
18325 // If this is a case-match node (i.e., it is a pattern matching
18326 // case statement) then we must have a predicate.
18327 if (predicate == NULL) {
18328 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18329 }
18330
18331 // At this point we expect that we're parsing a case-in node. We
18332 // will continue to parse the in nodes until we hit the end of
18333 // the list.
18334 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18335 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18336
18337 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18338 parser->pattern_matching_newlines = true;
18339
18340 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18341 parser->command_start = false;
18342 parser_lex(parser);
18343
18344 pm_token_t in_keyword = parser->previous;
18345
18346 pm_constant_id_list_t captures = { 0 };
18347 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18348
18349 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18350 pm_constant_id_list_free(&captures);
18351
18352 // Since we're in the top-level of the case-in node we need
18353 // to check for guard clauses in the form of `if` or
18354 // `unless` statements.
18355 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18356 pm_token_t keyword = parser->previous;
18357 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18358 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18359 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18360 pm_token_t keyword = parser->previous;
18361 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18362 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18363 }
18364
18365 // Now we need to check for the terminator of the in node's
18366 // pattern. It can be a newline or semicolon optionally
18367 // followed by a `then` keyword.
18368 pm_token_t then_keyword;
18369 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18370 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18371 then_keyword = parser->previous;
18372 } else {
18373 then_keyword = not_provided(parser);
18374 }
18375 } else {
18376 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18377 then_keyword = parser->previous;
18378 }
18379
18380 // Now we can actually parse the statements associated with
18381 // the in node.
18382 pm_statements_node_t *statements;
18383 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18384 statements = NULL;
18385 } else {
18386 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18387 }
18388
18389 // Now that we have the full pattern and statements, we can
18390 // create the node and attach it to the case node.
18391 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword));
18392 pm_case_match_node_condition_append(case_node, condition);
18393 }
18394
18395 // If we didn't parse any conditions (in or when) then we need
18396 // to indicate that we have an error.
18397 if (case_node->conditions.size == 0) {
18398 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18399 }
18400
18401 node = UP(case_node);
18402 }
18403
18404 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18405 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18406 pm_token_t else_keyword = parser->previous;
18407 pm_else_node_t *else_node;
18408
18409 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18410 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18411 } else {
18412 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18413 }
18414
18415 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18416 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18417 } else {
18418 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18419 }
18420 }
18421
18422 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18423 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
18424
18425 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18426 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18427 } else {
18428 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18429 }
18430
18431 pop_block_exits(parser, previous_block_exits);
18432 pm_node_list_free(&current_block_exits);
18433
18434 return node;
18435 }
18436 case PM_TOKEN_KEYWORD_BEGIN: {
18437 size_t opening_newline_index = token_newline_index(parser);
18438 parser_lex(parser);
18439
18440 pm_token_t begin_keyword = parser->previous;
18441 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18442
18443 pm_node_list_t current_block_exits = { 0 };
18444 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18445 pm_statements_node_t *begin_statements = NULL;
18446
18447 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18448 pm_accepts_block_stack_push(parser, true);
18449 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18450 pm_accepts_block_stack_pop(parser);
18451 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18452 }
18453
18454 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18455 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18456 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
18457
18458 begin_node->base.location.end = parser->previous.end;
18459 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
18460
18461 pop_block_exits(parser, previous_block_exits);
18462 pm_node_list_free(&current_block_exits);
18463
18464 return UP(begin_node);
18465 }
18466 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18467 pm_node_list_t current_block_exits = { 0 };
18468 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18469
18470 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18471 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18472 }
18473
18474 parser_lex(parser);
18475 pm_token_t keyword = parser->previous;
18476
18477 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18478 pm_token_t opening = parser->previous;
18479 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18480
18481 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
18482 pm_context_t context = parser->current_context->context;
18483 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18484 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18485 }
18486
18487 flush_block_exits(parser, previous_block_exits);
18488 pm_node_list_free(&current_block_exits);
18489
18490 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18491 }
18492 case PM_TOKEN_KEYWORD_BREAK:
18493 case PM_TOKEN_KEYWORD_NEXT:
18494 case PM_TOKEN_KEYWORD_RETURN: {
18495 parser_lex(parser);
18496
18497 pm_token_t keyword = parser->previous;
18498 pm_arguments_t arguments = { 0 };
18499
18500 if (
18501 token_begins_expression_p(parser->current.type) ||
18502 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18503 ) {
18504 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
18505
18506 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18507 pm_token_t next = parser->current;
18508 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
18509
18510 // Reject `foo && return bar`.
18511 if (!accepts_command_call && arguments.arguments != NULL) {
18512 PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
18513 }
18514 }
18515 }
18516
18517 switch (keyword.type) {
18518 case PM_TOKEN_KEYWORD_BREAK: {
18519 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
18520 if (!parser->partial_script) parse_block_exit(parser, node);
18521 return node;
18522 }
18523 case PM_TOKEN_KEYWORD_NEXT: {
18524 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
18525 if (!parser->partial_script) parse_block_exit(parser, node);
18526 return node;
18527 }
18528 case PM_TOKEN_KEYWORD_RETURN: {
18529 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
18530 parse_return(parser, node);
18531 return node;
18532 }
18533 default:
18534 assert(false && "unreachable");
18535 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
18536 }
18537 }
18538 case PM_TOKEN_KEYWORD_SUPER: {
18539 parser_lex(parser);
18540
18541 pm_token_t keyword = parser->previous;
18542 pm_arguments_t arguments = { 0 };
18543 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18544
18545 if (
18546 arguments.opening_loc.start == NULL &&
18547 arguments.arguments == NULL &&
18548 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
18549 ) {
18550 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18551 }
18552
18553 return UP(pm_super_node_create(parser, &keyword, &arguments));
18554 }
18555 case PM_TOKEN_KEYWORD_YIELD: {
18556 parser_lex(parser);
18557
18558 pm_token_t keyword = parser->previous;
18559 pm_arguments_t arguments = { 0 };
18560 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
18561
18562 // It's possible that we've parsed a block argument through our
18563 // call to parse_arguments_list. If we found one, we should mark it
18564 // as invalid and destroy it, as we don't have a place for it on the
18565 // yield node.
18566 if (arguments.block != NULL) {
18567 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18568 pm_node_unreference(parser, arguments.block);
18569 pm_node_destroy(parser, arguments.block);
18570 arguments.block = NULL;
18571 }
18572
18573 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
18574 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
18575
18576 return node;
18577 }
18578 case PM_TOKEN_KEYWORD_CLASS: {
18579 size_t opening_newline_index = token_newline_index(parser);
18580 parser_lex(parser);
18581
18582 pm_token_t class_keyword = parser->previous;
18583 pm_do_loop_stack_push(parser, false);
18584
18585 pm_node_list_t current_block_exits = { 0 };
18586 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18587
18588 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18589 pm_token_t operator = parser->previous;
18590 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18591
18592 pm_parser_scope_push(parser, true);
18593 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18594 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
18595 }
18596
18597 pm_node_t *statements = NULL;
18598 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18599 pm_accepts_block_stack_push(parser, true);
18600 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18601 pm_accepts_block_stack_pop(parser);
18602 }
18603
18604 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18605 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18606 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18607 } else {
18608 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18609 }
18610
18611 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18612
18613 pm_constant_id_list_t locals;
18614 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18615
18616 pm_parser_scope_pop(parser);
18617 pm_do_loop_stack_pop(parser);
18618
18619 flush_block_exits(parser, previous_block_exits);
18620 pm_node_list_free(&current_block_exits);
18621
18622 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18623 }
18624
18625 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18626 pm_token_t name = parser->previous;
18627 if (name.type != PM_TOKEN_CONSTANT) {
18628 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18629 }
18630
18631 pm_token_t inheritance_operator;
18632 pm_node_t *superclass;
18633
18634 if (match1(parser, PM_TOKEN_LESS)) {
18635 inheritance_operator = parser->current;
18636 lex_state_set(parser, PM_LEX_STATE_BEG);
18637
18638 parser->command_start = true;
18639 parser_lex(parser);
18640
18641 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18642 } else {
18643 inheritance_operator = not_provided(parser);
18644 superclass = NULL;
18645 }
18646
18647 pm_parser_scope_push(parser, true);
18648
18649 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
18650 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18651 } else {
18652 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18653 }
18654 pm_node_t *statements = NULL;
18655
18656 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18657 pm_accepts_block_stack_push(parser, true);
18658 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18659 pm_accepts_block_stack_pop(parser);
18660 }
18661
18662 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18663 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18664 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18665 } else {
18666 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18667 }
18668
18669 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18670
18671 if (context_def_p(parser)) {
18672 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18673 }
18674
18675 pm_constant_id_list_t locals;
18676 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18677
18678 pm_parser_scope_pop(parser);
18679 pm_do_loop_stack_pop(parser);
18680
18681 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18682 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18683 }
18684
18685 pop_block_exits(parser, previous_block_exits);
18686 pm_node_list_free(&current_block_exits);
18687
18688 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous));
18689 }
18690 case PM_TOKEN_KEYWORD_DEF: {
18691 pm_node_list_t current_block_exits = { 0 };
18692 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18693
18694 pm_token_t def_keyword = parser->current;
18695 size_t opening_newline_index = token_newline_index(parser);
18696
18697 pm_node_t *receiver = NULL;
18698 pm_token_t operator = not_provided(parser);
18699 pm_token_t name;
18700
18701 // This context is necessary for lexing `...` in a bare params
18702 // correctly. It must be pushed before lexing the first param, so it
18703 // is here.
18704 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18705 parser_lex(parser);
18706
18707 // This will be false if the method name is not a valid identifier
18708 // but could be followed by an operator.
18709 bool valid_name = true;
18710
18711 switch (parser->current.type) {
18712 case PM_CASE_OPERATOR:
18713 pm_parser_scope_push(parser, true);
18714 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18715 parser_lex(parser);
18716
18717 name = parser->previous;
18718 break;
18719 case PM_TOKEN_IDENTIFIER: {
18720 parser_lex(parser);
18721
18722 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18723 receiver = parse_variable_call(parser);
18724
18725 pm_parser_scope_push(parser, true);
18726 lex_state_set(parser, PM_LEX_STATE_FNAME);
18727 parser_lex(parser);
18728
18729 operator = parser->previous;
18730 name = parse_method_definition_name(parser);
18731 } else {
18732 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
18733 pm_parser_scope_push(parser, true);
18734
18735 name = parser->previous;
18736 }
18737
18738 break;
18739 }
18740 case PM_TOKEN_INSTANCE_VARIABLE:
18741 case PM_TOKEN_CLASS_VARIABLE:
18742 case PM_TOKEN_GLOBAL_VARIABLE:
18743 valid_name = false;
18745 case PM_TOKEN_CONSTANT:
18746 case PM_TOKEN_KEYWORD_NIL:
18747 case PM_TOKEN_KEYWORD_SELF:
18748 case PM_TOKEN_KEYWORD_TRUE:
18749 case PM_TOKEN_KEYWORD_FALSE:
18750 case PM_TOKEN_KEYWORD___FILE__:
18751 case PM_TOKEN_KEYWORD___LINE__:
18752 case PM_TOKEN_KEYWORD___ENCODING__: {
18753 pm_parser_scope_push(parser, true);
18754 parser_lex(parser);
18755
18756 pm_token_t identifier = parser->previous;
18757
18758 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18759 lex_state_set(parser, PM_LEX_STATE_FNAME);
18760 parser_lex(parser);
18761 operator = parser->previous;
18762
18763 switch (identifier.type) {
18764 case PM_TOKEN_CONSTANT:
18765 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18766 break;
18767 case PM_TOKEN_INSTANCE_VARIABLE:
18768 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18769 break;
18770 case PM_TOKEN_CLASS_VARIABLE:
18771 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18772 break;
18773 case PM_TOKEN_GLOBAL_VARIABLE:
18774 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18775 break;
18776 case PM_TOKEN_KEYWORD_NIL:
18777 receiver = UP(pm_nil_node_create(parser, &identifier));
18778 break;
18779 case PM_TOKEN_KEYWORD_SELF:
18780 receiver = UP(pm_self_node_create(parser, &identifier));
18781 break;
18782 case PM_TOKEN_KEYWORD_TRUE:
18783 receiver = UP(pm_true_node_create(parser, &identifier));
18784 break;
18785 case PM_TOKEN_KEYWORD_FALSE:
18786 receiver = UP(pm_false_node_create(parser, &identifier));
18787 break;
18788 case PM_TOKEN_KEYWORD___FILE__:
18789 receiver = UP(pm_source_file_node_create(parser, &identifier));
18790 break;
18791 case PM_TOKEN_KEYWORD___LINE__:
18792 receiver = UP(pm_source_line_node_create(parser, &identifier));
18793 break;
18794 case PM_TOKEN_KEYWORD___ENCODING__:
18795 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18796 break;
18797 default:
18798 break;
18799 }
18800
18801 name = parse_method_definition_name(parser);
18802 } else {
18803 if (!valid_name) {
18804 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
18805 }
18806
18807 name = identifier;
18808 }
18809 break;
18810 }
18811 case PM_TOKEN_PARENTHESIS_LEFT: {
18812 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
18813 // the inner expression of this parenthesis should not be
18814 // processed under this context. Thus, the context is popped
18815 // here.
18816 context_pop(parser);
18817 parser_lex(parser);
18818
18819 pm_token_t lparen = parser->previous;
18820 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18821
18822 accept1(parser, PM_TOKEN_NEWLINE);
18823 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18824 pm_token_t rparen = parser->previous;
18825
18826 lex_state_set(parser, PM_LEX_STATE_FNAME);
18827 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18828
18829 operator = parser->previous;
18830 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18831
18832 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
18833 // reason as described the above.
18834 pm_parser_scope_push(parser, true);
18835 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18836 name = parse_method_definition_name(parser);
18837 break;
18838 }
18839 default:
18840 pm_parser_scope_push(parser, true);
18841 name = parse_method_definition_name(parser);
18842 break;
18843 }
18844
18845 pm_token_t lparen;
18846 pm_token_t rparen;
18847 pm_parameters_node_t *params;
18848
18849 bool accept_endless_def = true;
18850 switch (parser->current.type) {
18851 case PM_TOKEN_PARENTHESIS_LEFT: {
18852 parser_lex(parser);
18853 lparen = parser->previous;
18854
18855 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18856 params = NULL;
18857 } else {
18858 params = parse_parameters(
18859 parser,
18860 PM_BINDING_POWER_DEFINED,
18861 true,
18862 false,
18863 true,
18864 true,
18865 false,
18866 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18867 (uint16_t) (depth + 1)
18868 );
18869 }
18870
18871 lex_state_set(parser, PM_LEX_STATE_BEG);
18872 parser->command_start = true;
18873
18874 context_pop(parser);
18875 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18876 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18877 parser->previous.start = parser->previous.end;
18878 parser->previous.type = PM_TOKEN_MISSING;
18879 }
18880
18881 rparen = parser->previous;
18882 break;
18883 }
18884 case PM_CASE_PARAMETER: {
18885 // If we're about to lex a label, we need to add the label
18886 // state to make sure the next newline is ignored.
18887 if (parser->current.type == PM_TOKEN_LABEL) {
18888 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18889 }
18890
18891 lparen = not_provided(parser);
18892 rparen = not_provided(parser);
18893 params = parse_parameters(
18894 parser,
18895 PM_BINDING_POWER_DEFINED,
18896 false,
18897 false,
18898 true,
18899 true,
18900 false,
18901 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18902 (uint16_t) (depth + 1)
18903 );
18904
18905 // Reject `def * = 1` and similar. We have to specifically check
18906 // for them because they create ambiguity with optional arguments.
18907 accept_endless_def = false;
18908
18909 context_pop(parser);
18910 break;
18911 }
18912 default: {
18913 lparen = not_provided(parser);
18914 rparen = not_provided(parser);
18915 params = NULL;
18916
18917 context_pop(parser);
18918 break;
18919 }
18920 }
18921
18922 pm_node_t *statements = NULL;
18923 pm_token_t equal;
18924 pm_token_t end_keyword;
18925
18926 if (accept1(parser, PM_TOKEN_EQUAL)) {
18927 if (token_is_setter_name(&name)) {
18928 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18929 }
18930 if (!accept_endless_def) {
18931 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18932 }
18933 if (
18936 ) {
18937 PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18938 }
18939 equal = parser->previous;
18940
18941 context_push(parser, PM_CONTEXT_DEF);
18942 pm_do_loop_stack_push(parser, false);
18943 statements = UP(pm_statements_node_create(parser));
18944
18945 bool allow_command_call;
18946 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18947 allow_command_call = accepts_command_call;
18948 } else {
18949 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
18950 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
18951 }
18952
18953 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18954
18955 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18956 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18957
18958 pm_token_t rescue_keyword = parser->previous;
18959 pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18960 context_pop(parser);
18961
18962 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18963 }
18964
18965 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18966 pm_do_loop_stack_pop(parser);
18967 context_pop(parser);
18968 end_keyword = not_provided(parser);
18969 } else {
18970 equal = not_provided(parser);
18971
18972 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
18973 lex_state_set(parser, PM_LEX_STATE_BEG);
18974 parser->command_start = true;
18975 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18976 } else {
18977 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18978 }
18979
18980 pm_accepts_block_stack_push(parser, true);
18981 pm_do_loop_stack_push(parser, false);
18982
18983 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18984 pm_accepts_block_stack_push(parser, true);
18985 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18986 pm_accepts_block_stack_pop(parser);
18987 }
18988
18989 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18990 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18991 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18992 } else {
18993 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18994 }
18995
18996 pm_accepts_block_stack_pop(parser);
18997 pm_do_loop_stack_pop(parser);
18998
18999 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
19000 end_keyword = parser->previous;
19001 }
19002
19003 pm_constant_id_list_t locals;
19004 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19005 pm_parser_scope_pop(parser);
19006
19012 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19013
19014 flush_block_exits(parser, previous_block_exits);
19015 pm_node_list_free(&current_block_exits);
19016
19017 return UP(pm_def_node_create(
19018 parser,
19019 name_id,
19020 &name,
19021 receiver,
19022 params,
19023 statements,
19024 &locals,
19025 &def_keyword,
19026 &operator,
19027 &lparen,
19028 &rparen,
19029 &equal,
19030 &end_keyword
19031 ));
19032 }
19033 case PM_TOKEN_KEYWORD_DEFINED: {
19034 parser_lex(parser);
19035 pm_token_t keyword = parser->previous;
19036
19037 pm_token_t lparen;
19038 pm_token_t rparen;
19039 pm_node_t *expression;
19040
19041 context_push(parser, PM_CONTEXT_DEFINED);
19042 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19043
19044 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19045 lparen = parser->previous;
19046
19047 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19048 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19049 lparen = not_provided(parser);
19050 rparen = not_provided(parser);
19051 } else {
19052 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19053
19054 if (parser->recovering) {
19055 rparen = not_provided(parser);
19056 } else {
19057 accept1(parser, PM_TOKEN_NEWLINE);
19058 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19059 rparen = parser->previous;
19060 }
19061 }
19062 } else {
19063 lparen = not_provided(parser);
19064 rparen = not_provided(parser);
19065 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19066 }
19067
19068 context_pop(parser);
19069 return UP(pm_defined_node_create(
19070 parser,
19071 &lparen,
19072 expression,
19073 &rparen,
19074 &keyword
19075 ));
19076 }
19077 case PM_TOKEN_KEYWORD_END_UPCASE: {
19078 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19079 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19080 }
19081
19082 parser_lex(parser);
19083 pm_token_t keyword = parser->previous;
19084
19085 if (context_def_p(parser)) {
19086 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19087 }
19088
19089 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19090 pm_token_t opening = parser->previous;
19091 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19092
19093 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19094 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19095 }
19096 case PM_TOKEN_KEYWORD_FALSE:
19097 parser_lex(parser);
19098 return UP(pm_false_node_create(parser, &parser->previous));
19099 case PM_TOKEN_KEYWORD_FOR: {
19100 size_t opening_newline_index = token_newline_index(parser);
19101 parser_lex(parser);
19102
19103 pm_token_t for_keyword = parser->previous;
19104 pm_node_t *index;
19105
19106 context_push(parser, PM_CONTEXT_FOR_INDEX);
19107
19108 // First, parse out the first index expression.
19109 if (accept1(parser, PM_TOKEN_USTAR)) {
19110 pm_token_t star_operator = parser->previous;
19111 pm_node_t *name = NULL;
19112
19113 if (token_begins_expression_p(parser->current.type)) {
19114 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19115 }
19116
19117 index = UP(pm_splat_node_create(parser, &star_operator, name));
19118 } else if (token_begins_expression_p(parser->current.type)) {
19119 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19120 } else {
19121 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19122 index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end));
19123 }
19124
19125 // Now, if there are multiple index expressions, parse them out.
19126 if (match1(parser, PM_TOKEN_COMMA)) {
19127 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19128 } else {
19129 index = parse_target(parser, index, false, false);
19130 }
19131
19132 context_pop(parser);
19133 pm_do_loop_stack_push(parser, true);
19134
19135 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19136 pm_token_t in_keyword = parser->previous;
19137
19138 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19139 pm_do_loop_stack_pop(parser);
19140
19141 pm_token_t do_keyword;
19142 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19143 do_keyword = parser->previous;
19144 } else {
19145 do_keyword = not_provided(parser);
19146 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19147 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19148 }
19149 }
19150
19151 pm_statements_node_t *statements = NULL;
19152 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19153 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19154 }
19155
19156 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19157 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19158
19159 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous));
19160 }
19161 case PM_TOKEN_KEYWORD_IF:
19162 if (parser_end_of_line_p(parser)) {
19163 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19164 }
19165
19166 size_t opening_newline_index = token_newline_index(parser);
19167 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19168 parser_lex(parser);
19169
19170 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19171 case PM_TOKEN_KEYWORD_UNDEF: {
19172 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19173 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19174 }
19175
19176 parser_lex(parser);
19177 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19178 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19179
19180 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19181 pm_node_destroy(parser, name);
19182 } else {
19183 pm_undef_node_append(undef, name);
19184
19185 while (match1(parser, PM_TOKEN_COMMA)) {
19186 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19187 parser_lex(parser);
19188 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19189
19190 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19191 pm_node_destroy(parser, name);
19192 break;
19193 }
19194
19195 pm_undef_node_append(undef, name);
19196 }
19197 }
19198
19199 return UP(undef);
19200 }
19201 case PM_TOKEN_KEYWORD_NOT: {
19202 parser_lex(parser);
19203
19204 pm_token_t message = parser->previous;
19205 pm_arguments_t arguments = { 0 };
19206 pm_node_t *receiver = NULL;
19207
19208 // If we do not accept a command call, then we also do not accept a
19209 // not without parentheses. In this case we need to reject this
19210 // syntax.
19211 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19212 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19213 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19214 } else {
19215 accept1(parser, PM_TOKEN_NEWLINE);
19216 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19217 }
19218
19219 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
19220 }
19221
19222 accept1(parser, PM_TOKEN_NEWLINE);
19223
19224 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19225 pm_token_t lparen = parser->previous;
19226
19227 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19228 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19229 } else {
19230 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19231 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19232
19233 if (!parser->recovering) {
19234 accept1(parser, PM_TOKEN_NEWLINE);
19235 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19236 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19237 }
19238 }
19239 } else {
19240 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19241 }
19242
19243 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19244 }
19245 case PM_TOKEN_KEYWORD_UNLESS: {
19246 size_t opening_newline_index = token_newline_index(parser);
19247 parser_lex(parser);
19248
19249 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19250 }
19251 case PM_TOKEN_KEYWORD_MODULE: {
19252 pm_node_list_t current_block_exits = { 0 };
19253 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19254
19255 size_t opening_newline_index = token_newline_index(parser);
19256 parser_lex(parser);
19257 pm_token_t module_keyword = parser->previous;
19258
19259 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19260 pm_token_t name;
19261
19262 // If we can recover from a syntax error that occurred while parsing
19263 // the name of the module, then we'll handle that here.
19264 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19265 pop_block_exits(parser, previous_block_exits);
19266 pm_node_list_free(&current_block_exits);
19267
19268 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19269 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19270 }
19271
19272 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19273 pm_token_t double_colon = parser->previous;
19274
19275 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19276 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
19277 }
19278
19279 // Here we retrieve the name of the module. If it wasn't a constant,
19280 // then it's possible that `module foo` was passed, which is a
19281 // syntax error. We handle that here as well.
19282 name = parser->previous;
19283 if (name.type != PM_TOKEN_CONSTANT) {
19284 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19285 }
19286
19287 pm_parser_scope_push(parser, true);
19288 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19289 pm_node_t *statements = NULL;
19290
19291 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19292 pm_accepts_block_stack_push(parser, true);
19293 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19294 pm_accepts_block_stack_pop(parser);
19295 }
19296
19297 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19298 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19299 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19300 } else {
19301 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19302 }
19303
19304 pm_constant_id_list_t locals;
19305 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19306
19307 pm_parser_scope_pop(parser);
19308 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
19309
19310 if (context_def_p(parser)) {
19311 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19312 }
19313
19314 pop_block_exits(parser, previous_block_exits);
19315 pm_node_list_free(&current_block_exits);
19316
19317 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
19318 }
19319 case PM_TOKEN_KEYWORD_NIL:
19320 parser_lex(parser);
19321 return UP(pm_nil_node_create(parser, &parser->previous));
19322 case PM_TOKEN_KEYWORD_REDO: {
19323 parser_lex(parser);
19324
19325 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19326 if (!parser->partial_script) parse_block_exit(parser, node);
19327
19328 return node;
19329 }
19330 case PM_TOKEN_KEYWORD_RETRY: {
19331 parser_lex(parser);
19332
19333 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19334 parse_retry(parser, node);
19335
19336 return node;
19337 }
19338 case PM_TOKEN_KEYWORD_SELF:
19339 parser_lex(parser);
19340 return UP(pm_self_node_create(parser, &parser->previous));
19341 case PM_TOKEN_KEYWORD_TRUE:
19342 parser_lex(parser);
19343 return UP(pm_true_node_create(parser, &parser->previous));
19344 case PM_TOKEN_KEYWORD_UNTIL: {
19345 size_t opening_newline_index = token_newline_index(parser);
19346
19347 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19348 pm_do_loop_stack_push(parser, true);
19349
19350 parser_lex(parser);
19351 pm_token_t keyword = parser->previous;
19352 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19353
19354 pm_do_loop_stack_pop(parser);
19355 context_pop(parser);
19356
19357 pm_token_t do_keyword;
19358 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19359 do_keyword = parser->previous;
19360 } else {
19361 do_keyword = not_provided(parser);
19362 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19363 }
19364
19365 pm_statements_node_t *statements = NULL;
19366 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19367 pm_accepts_block_stack_push(parser, true);
19368 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19369 pm_accepts_block_stack_pop(parser);
19370 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19371 }
19372
19373 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19374 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
19375
19376 return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19377 }
19378 case PM_TOKEN_KEYWORD_WHILE: {
19379 size_t opening_newline_index = token_newline_index(parser);
19380
19381 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19382 pm_do_loop_stack_push(parser, true);
19383
19384 parser_lex(parser);
19385 pm_token_t keyword = parser->previous;
19386 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19387
19388 pm_do_loop_stack_pop(parser);
19389 context_pop(parser);
19390
19391 pm_token_t do_keyword;
19392 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19393 do_keyword = parser->previous;
19394 } else {
19395 do_keyword = not_provided(parser);
19396 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19397 }
19398
19399 pm_statements_node_t *statements = NULL;
19400 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19401 pm_accepts_block_stack_push(parser, true);
19402 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19403 pm_accepts_block_stack_pop(parser);
19404 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19405 }
19406
19407 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19408 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
19409
19410 return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19411 }
19412 case PM_TOKEN_PERCENT_LOWER_I: {
19413 parser_lex(parser);
19414 pm_token_t opening = parser->previous;
19415 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19416 pm_node_t *current = NULL;
19417
19418 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19419 accept1(parser, PM_TOKEN_WORDS_SEP);
19420 if (match1(parser, PM_TOKEN_STRING_END)) break;
19421
19422 // Interpolation is not possible but nested heredocs can still lead to
19423 // consecutive (disjoint) string tokens when the final newline is escaped.
19424 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19425 pm_token_t opening = not_provided(parser);
19426 pm_token_t closing = not_provided(parser);
19427
19428 // Record the string node, moving to interpolation if needed.
19429 if (current == NULL) {
19430 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19431 parser_lex(parser);
19432 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19433 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19434 parser_lex(parser);
19435 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19436 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19437 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19438 pm_token_t bounds = not_provided(parser);
19439
19440 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19441 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19442 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19443 parser_lex(parser);
19444
19445 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19446 pm_interpolated_symbol_node_append(interpolated, first_string);
19447 pm_interpolated_symbol_node_append(interpolated, second_string);
19448
19449 xfree(current);
19450 current = UP(interpolated);
19451 } else {
19452 assert(false && "unreachable");
19453 }
19454 }
19455
19456 if (current) {
19457 pm_array_node_elements_append(array, current);
19458 current = NULL;
19459 } else {
19460 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19461 }
19462 }
19463
19464 pm_token_t closing = parser->current;
19465 if (match1(parser, PM_TOKEN_EOF)) {
19466 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19467 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19468 } else {
19469 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19470 }
19471 pm_array_node_close_set(array, &closing);
19472
19473 return UP(array);
19474 }
19475 case PM_TOKEN_PERCENT_UPPER_I: {
19476 parser_lex(parser);
19477 pm_token_t opening = parser->previous;
19478 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19479
19480 // This is the current node that we are parsing that will be added to the
19481 // list of elements.
19482 pm_node_t *current = NULL;
19483
19484 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19485 switch (parser->current.type) {
19486 case PM_TOKEN_WORDS_SEP: {
19487 if (current == NULL) {
19488 // If we hit a separator before we have any content, then we don't
19489 // need to do anything.
19490 } else {
19491 // If we hit a separator after we've hit content, then we need to
19492 // append that content to the list and reset the current node.
19493 pm_array_node_elements_append(array, current);
19494 current = NULL;
19495 }
19496
19497 parser_lex(parser);
19498 break;
19499 }
19500 case PM_TOKEN_STRING_CONTENT: {
19501 pm_token_t opening = not_provided(parser);
19502 pm_token_t closing = not_provided(parser);
19503
19504 if (current == NULL) {
19505 // If we hit content and the current node is NULL, then this is
19506 // the first string content we've seen. In that case we're going
19507 // to create a new string node and set that to the current.
19508 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19509 parser_lex(parser);
19510 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19511 // If we hit string content and the current node is an
19512 // interpolated string, then we need to append the string content
19513 // to the list of child nodes.
19514 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19515 parser_lex(parser);
19516
19517 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19518 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19519 // If we hit string content and the current node is a symbol node,
19520 // then we need to convert the current node into an interpolated
19521 // string and add the string content to the list of child nodes.
19522 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19523 pm_token_t bounds = not_provided(parser);
19524
19525 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19526 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19527 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19528 parser_lex(parser);
19529
19530 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19531 pm_interpolated_symbol_node_append(interpolated, first_string);
19532 pm_interpolated_symbol_node_append(interpolated, second_string);
19533
19534 xfree(current);
19535 current = UP(interpolated);
19536 } else {
19537 assert(false && "unreachable");
19538 }
19539
19540 break;
19541 }
19542 case PM_TOKEN_EMBVAR: {
19543 bool start_location_set = false;
19544 if (current == NULL) {
19545 // If we hit an embedded variable and the current node is NULL,
19546 // then this is the start of a new string. We'll set the current
19547 // node to a new interpolated string.
19548 pm_token_t opening = not_provided(parser);
19549 pm_token_t closing = not_provided(parser);
19550 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19551 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19552 // If we hit an embedded variable and the current node is a string
19553 // node, then we'll convert the current into an interpolated
19554 // string and add the string node to the list of parts.
19555 pm_token_t opening = not_provided(parser);
19556 pm_token_t closing = not_provided(parser);
19557 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19558
19559 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19560 pm_interpolated_symbol_node_append(interpolated, current);
19561 interpolated->base.location.start = current->location.start;
19562 start_location_set = true;
19563 current = UP(interpolated);
19564 } else {
19565 // If we hit an embedded variable and the current node is an
19566 // interpolated string, then we'll just add the embedded variable.
19567 }
19568
19569 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19570 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19571 if (!start_location_set) {
19572 current->location.start = part->location.start;
19573 }
19574 break;
19575 }
19576 case PM_TOKEN_EMBEXPR_BEGIN: {
19577 bool start_location_set = false;
19578 if (current == NULL) {
19579 // If we hit an embedded expression and the current node is NULL,
19580 // then this is the start of a new string. We'll set the current
19581 // node to a new interpolated string.
19582 pm_token_t opening = not_provided(parser);
19583 pm_token_t closing = not_provided(parser);
19584 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19585 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19586 // If we hit an embedded expression and the current node is a
19587 // string node, then we'll convert the current into an
19588 // interpolated string and add the string node to the list of
19589 // parts.
19590 pm_token_t opening = not_provided(parser);
19591 pm_token_t closing = not_provided(parser);
19592 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19593
19594 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19595 pm_interpolated_symbol_node_append(interpolated, current);
19596 interpolated->base.location.start = current->location.start;
19597 start_location_set = true;
19598 current = UP(interpolated);
19599 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19600 // If we hit an embedded expression and the current node is an
19601 // interpolated string, then we'll just continue on.
19602 } else {
19603 assert(false && "unreachable");
19604 }
19605
19606 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19607 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19608 if (!start_location_set) {
19609 current->location.start = part->location.start;
19610 }
19611 break;
19612 }
19613 default:
19614 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19615 parser_lex(parser);
19616 break;
19617 }
19618 }
19619
19620 // If we have a current node, then we need to append it to the list.
19621 if (current) {
19622 pm_array_node_elements_append(array, current);
19623 }
19624
19625 pm_token_t closing = parser->current;
19626 if (match1(parser, PM_TOKEN_EOF)) {
19627 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19628 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19629 } else {
19630 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19631 }
19632 pm_array_node_close_set(array, &closing);
19633
19634 return UP(array);
19635 }
19636 case PM_TOKEN_PERCENT_LOWER_W: {
19637 parser_lex(parser);
19638 pm_token_t opening = parser->previous;
19639 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19640 pm_node_t *current = NULL;
19641
19642 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19643 accept1(parser, PM_TOKEN_WORDS_SEP);
19644 if (match1(parser, PM_TOKEN_STRING_END)) break;
19645
19646 // Interpolation is not possible but nested heredocs can still lead to
19647 // consecutive (disjoint) string tokens when the final newline is escaped.
19648 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19649 pm_token_t opening = not_provided(parser);
19650 pm_token_t closing = not_provided(parser);
19651
19652 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19653
19654 // Record the string node, moving to interpolation if needed.
19655 if (current == NULL) {
19656 current = string;
19657 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19658 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19659 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19660 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19661 pm_interpolated_string_node_append(interpolated, current);
19662 pm_interpolated_string_node_append(interpolated, string);
19663 current = UP(interpolated);
19664 } else {
19665 assert(false && "unreachable");
19666 }
19667 parser_lex(parser);
19668 }
19669
19670 if (current) {
19671 pm_array_node_elements_append(array, current);
19672 current = NULL;
19673 } else {
19674 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19675 }
19676 }
19677
19678 pm_token_t closing = parser->current;
19679 if (match1(parser, PM_TOKEN_EOF)) {
19680 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19681 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19682 } else {
19683 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19684 }
19685
19686 pm_array_node_close_set(array, &closing);
19687 return UP(array);
19688 }
19689 case PM_TOKEN_PERCENT_UPPER_W: {
19690 parser_lex(parser);
19691 pm_token_t opening = parser->previous;
19692 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19693
19694 // This is the current node that we are parsing that will be added
19695 // to the list of elements.
19696 pm_node_t *current = NULL;
19697
19698 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19699 switch (parser->current.type) {
19700 case PM_TOKEN_WORDS_SEP: {
19701 // Reset the explicit encoding if we hit a separator
19702 // since each element can have its own encoding.
19703 parser->explicit_encoding = NULL;
19704
19705 if (current == NULL) {
19706 // If we hit a separator before we have any content,
19707 // then we don't need to do anything.
19708 } else {
19709 // If we hit a separator after we've hit content,
19710 // then we need to append that content to the list
19711 // and reset the current node.
19712 pm_array_node_elements_append(array, current);
19713 current = NULL;
19714 }
19715
19716 parser_lex(parser);
19717 break;
19718 }
19719 case PM_TOKEN_STRING_CONTENT: {
19720 pm_token_t opening = not_provided(parser);
19721 pm_token_t closing = not_provided(parser);
19722
19723 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19724 pm_node_flag_set(string, parse_unescaped_encoding(parser));
19725 parser_lex(parser);
19726
19727 if (current == NULL) {
19728 // If we hit content and the current node is NULL,
19729 // then this is the first string content we've seen.
19730 // In that case we're going to create a new string
19731 // node and set that to the current.
19732 current = string;
19733 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19734 // If we hit string content and the current node is
19735 // an interpolated string, then we need to append
19736 // the string content to the list of child nodes.
19737 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19738 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19739 // If we hit string content and the current node is
19740 // a string node, then we need to convert the
19741 // current node into an interpolated string and add
19742 // the string content to the list of child nodes.
19743 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19744 pm_interpolated_string_node_append(interpolated, current);
19745 pm_interpolated_string_node_append(interpolated, string);
19746 current = UP(interpolated);
19747 } else {
19748 assert(false && "unreachable");
19749 }
19750
19751 break;
19752 }
19753 case PM_TOKEN_EMBVAR: {
19754 if (current == NULL) {
19755 // If we hit an embedded variable and the current
19756 // node is NULL, then this is the start of a new
19757 // string. We'll set the current node to a new
19758 // interpolated string.
19759 pm_token_t opening = not_provided(parser);
19760 pm_token_t closing = not_provided(parser);
19761 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19762 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19763 // If we hit an embedded variable and the current
19764 // node is a string node, then we'll convert the
19765 // current into an interpolated string and add the
19766 // string node to the list of parts.
19767 pm_token_t opening = not_provided(parser);
19768 pm_token_t closing = not_provided(parser);
19769 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19770 pm_interpolated_string_node_append(interpolated, current);
19771 current = UP(interpolated);
19772 } else {
19773 // If we hit an embedded variable and the current
19774 // node is an interpolated string, then we'll just
19775 // add the embedded variable.
19776 }
19777
19778 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19779 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19780 break;
19781 }
19782 case PM_TOKEN_EMBEXPR_BEGIN: {
19783 if (current == NULL) {
19784 // If we hit an embedded expression and the current
19785 // node is NULL, then this is the start of a new
19786 // string. We'll set the current node to a new
19787 // interpolated string.
19788 pm_token_t opening = not_provided(parser);
19789 pm_token_t closing = not_provided(parser);
19790 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19791 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19792 // If we hit an embedded expression and the current
19793 // node is a string node, then we'll convert the
19794 // current into an interpolated string and add the
19795 // string node to the list of parts.
19796 pm_token_t opening = not_provided(parser);
19797 pm_token_t closing = not_provided(parser);
19798 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19799 pm_interpolated_string_node_append(interpolated, current);
19800 current = UP(interpolated);
19801 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19802 // If we hit an embedded expression and the current
19803 // node is an interpolated string, then we'll just
19804 // continue on.
19805 } else {
19806 assert(false && "unreachable");
19807 }
19808
19809 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19810 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19811 break;
19812 }
19813 default:
19814 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19815 parser_lex(parser);
19816 break;
19817 }
19818 }
19819
19820 // If we have a current node, then we need to append it to the list.
19821 if (current) {
19822 pm_array_node_elements_append(array, current);
19823 }
19824
19825 pm_token_t closing = parser->current;
19826 if (match1(parser, PM_TOKEN_EOF)) {
19827 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19828 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19829 } else {
19830 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19831 }
19832
19833 pm_array_node_close_set(array, &closing);
19834 return UP(array);
19835 }
19836 case PM_TOKEN_REGEXP_BEGIN: {
19837 pm_token_t opening = parser->current;
19838 parser_lex(parser);
19839
19840 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19841 // If we get here, then we have an end immediately after a start. In
19842 // that case we'll create an empty content token and return an
19843 // uninterpolated regular expression.
19844 pm_token_t content = (pm_token_t) {
19845 .type = PM_TOKEN_STRING_CONTENT,
19846 .start = parser->previous.end,
19847 .end = parser->previous.end
19848 };
19849
19850 parser_lex(parser);
19851
19852 pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
19853 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
19854
19855 return node;
19856 }
19857
19858 pm_interpolated_regular_expression_node_t *interpolated;
19859
19860 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19861 // In this case we've hit string content so we know the regular
19862 // expression at least has something in it. We'll need to check if the
19863 // following token is the end (in which case we can return a plain
19864 // regular expression) or if it's not then it has interpolation.
19865 pm_string_t unescaped = parser->current_string;
19866 pm_token_t content = parser->current;
19867 bool ascii_only = parser->current_regular_expression_ascii_only;
19868 parser_lex(parser);
19869
19870 // If we hit an end, then we can create a regular expression
19871 // node without interpolation, which can be represented more
19872 // succinctly and more easily compiled.
19873 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19874 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19875
19876 // If we're not immediately followed by a =~, then we want
19877 // to parse all of the errors at this point. If it is
19878 // followed by a =~, then it will get parsed higher up while
19879 // parsing the named captures as well.
19880 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19881 parse_regular_expression_errors(parser, node);
19882 }
19883
19884 pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
19885 return UP(node);
19886 }
19887
19888 // If we get here, then we have interpolation so we'll need to create
19889 // a regular expression node with interpolation.
19890 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19891
19892 pm_token_t opening = not_provided(parser);
19893 pm_token_t closing = not_provided(parser);
19894 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19895
19896 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19897 // This is extremely strange, but the first string part of a
19898 // regular expression will always be tagged as binary if we
19899 // are in a US-ASCII file, no matter its contents.
19900 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19901 }
19902
19903 pm_interpolated_regular_expression_node_append(interpolated, part);
19904 } else {
19905 // If the first part of the body of the regular expression is not a
19906 // string content, then we have interpolation and we need to create an
19907 // interpolated regular expression node.
19908 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19909 }
19910
19911 // Now that we're here and we have interpolation, we'll parse all of the
19912 // parts into the list.
19913 pm_node_t *part;
19914 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19915 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19916 pm_interpolated_regular_expression_node_append(interpolated, part);
19917 }
19918 }
19919
19920 pm_token_t closing = parser->current;
19921 if (match1(parser, PM_TOKEN_EOF)) {
19922 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19923 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19924 } else {
19925 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19926 }
19927
19928 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19929 return UP(interpolated);
19930 }
19931 case PM_TOKEN_BACKTICK:
19932 case PM_TOKEN_PERCENT_LOWER_X: {
19933 parser_lex(parser);
19934 pm_token_t opening = parser->previous;
19935
19936 // When we get here, we don't know if this string is going to have
19937 // interpolation or not, even though it is allowed. Still, we want to be
19938 // able to return a string node without interpolation if we can since
19939 // it'll be faster.
19940 if (match1(parser, PM_TOKEN_STRING_END)) {
19941 // If we get here, then we have an end immediately after a start. In
19942 // that case we'll create an empty content token and return an
19943 // uninterpolated string.
19944 pm_token_t content = (pm_token_t) {
19945 .type = PM_TOKEN_STRING_CONTENT,
19946 .start = parser->previous.end,
19947 .end = parser->previous.end
19948 };
19949
19950 parser_lex(parser);
19951 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
19952 }
19953
19954 pm_interpolated_x_string_node_t *node;
19955
19956 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19957 // In this case we've hit string content so we know the string
19958 // at least has something in it. We'll need to check if the
19959 // following token is the end (in which case we can return a
19960 // plain string) or if it's not then it has interpolation.
19961 pm_string_t unescaped = parser->current_string;
19962 pm_token_t content = parser->current;
19963 parser_lex(parser);
19964
19965 if (match1(parser, PM_TOKEN_STRING_END)) {
19966 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
19967 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19968 parser_lex(parser);
19969 return node;
19970 }
19971
19972 // If we get here, then we have interpolation so we'll need to
19973 // create a string node with interpolation.
19974 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19975
19976 pm_token_t opening = not_provided(parser);
19977 pm_token_t closing = not_provided(parser);
19978
19979 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19980 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19981
19982 pm_interpolated_xstring_node_append(node, part);
19983 } else {
19984 // If the first part of the body of the string is not a string
19985 // content, then we have interpolation and we need to create an
19986 // interpolated string node.
19987 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19988 }
19989
19990 pm_node_t *part;
19991 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19992 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19993 pm_interpolated_xstring_node_append(node, part);
19994 }
19995 }
19996
19997 pm_token_t closing = parser->current;
19998 if (match1(parser, PM_TOKEN_EOF)) {
19999 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20000 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20001 } else {
20002 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20003 }
20004 pm_interpolated_xstring_node_closing_set(node, &closing);
20005
20006 return UP(node);
20007 }
20008 case PM_TOKEN_USTAR: {
20009 parser_lex(parser);
20010
20011 // * operators at the beginning of expressions are only valid in the
20012 // context of a multiple assignment. We enforce that here. We'll
20013 // still lex past it though and create a missing node place.
20014 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20015 pm_parser_err_prefix(parser, diag_id);
20016 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
20017 }
20018
20019 pm_token_t operator = parser->previous;
20020 pm_node_t *name = NULL;
20021
20022 if (token_begins_expression_p(parser->current.type)) {
20023 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20024 }
20025
20026 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
20027
20028 if (match1(parser, PM_TOKEN_COMMA)) {
20029 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20030 } else {
20031 return parse_target_validate(parser, splat, true);
20032 }
20033 }
20034 case PM_TOKEN_BANG: {
20035 if (binding_power > PM_BINDING_POWER_UNARY) {
20036 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20037 }
20038
20039 parser_lex(parser);
20040
20041 pm_token_t operator = parser->previous;
20042 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20043 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20044
20045 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20046 return UP(node);
20047 }
20048 case PM_TOKEN_TILDE: {
20049 if (binding_power > PM_BINDING_POWER_UNARY) {
20050 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20051 }
20052 parser_lex(parser);
20053
20054 pm_token_t operator = parser->previous;
20055 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20056 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20057
20058 return UP(node);
20059 }
20060 case PM_TOKEN_UMINUS: {
20061 if (binding_power > PM_BINDING_POWER_UNARY) {
20062 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20063 }
20064 parser_lex(parser);
20065
20066 pm_token_t operator = parser->previous;
20067 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20068 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20069
20070 return UP(node);
20071 }
20072 case PM_TOKEN_UMINUS_NUM: {
20073 parser_lex(parser);
20074
20075 pm_token_t operator = parser->previous;
20076 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20077
20078 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20079 pm_token_t exponent_operator = parser->previous;
20080 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20081 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
20082 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20083 } else {
20084 switch (PM_NODE_TYPE(node)) {
20085 case PM_INTEGER_NODE:
20086 case PM_FLOAT_NODE:
20087 case PM_RATIONAL_NODE:
20088 case PM_IMAGINARY_NODE:
20089 parse_negative_numeric(node);
20090 break;
20091 default:
20092 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20093 break;
20094 }
20095 }
20096
20097 return node;
20098 }
20099 case PM_TOKEN_MINUS_GREATER: {
20100 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20102
20103 size_t opening_newline_index = token_newline_index(parser);
20104 pm_accepts_block_stack_push(parser, true);
20105 parser_lex(parser);
20106
20107 pm_token_t operator = parser->previous;
20108 pm_parser_scope_push(parser, false);
20109
20110 pm_block_parameters_node_t *block_parameters;
20111
20112 switch (parser->current.type) {
20113 case PM_TOKEN_PARENTHESIS_LEFT: {
20114 pm_token_t opening = parser->current;
20115 parser_lex(parser);
20116
20117 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20118 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20119 } else {
20120 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20121 }
20122
20123 accept1(parser, PM_TOKEN_NEWLINE);
20124 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20125
20126 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20127 break;
20128 }
20129 case PM_CASE_PARAMETER: {
20130 pm_accepts_block_stack_push(parser, false);
20131 pm_token_t opening = not_provided(parser);
20132 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20133 pm_accepts_block_stack_pop(parser);
20134 break;
20135 }
20136 default: {
20137 block_parameters = NULL;
20138 break;
20139 }
20140 }
20141
20142 pm_token_t opening;
20143 pm_node_t *body = NULL;
20144 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20145
20146 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20147 opening = parser->previous;
20148
20149 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20150 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20151 }
20152
20153 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20154 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20155 } else {
20156 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20157 opening = parser->previous;
20158
20159 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20160 pm_accepts_block_stack_push(parser, true);
20161 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20162 pm_accepts_block_stack_pop(parser);
20163 }
20164
20165 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20166 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20167 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20168 } else {
20169 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20170 }
20171
20172 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20173 }
20174
20175 pm_constant_id_list_t locals;
20176 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20177 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20178
20179 pm_parser_scope_pop(parser);
20180 pm_accepts_block_stack_pop(parser);
20181
20182 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20183 }
20184 case PM_TOKEN_UPLUS: {
20185 if (binding_power > PM_BINDING_POWER_UNARY) {
20186 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20187 }
20188 parser_lex(parser);
20189
20190 pm_token_t operator = parser->previous;
20191 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20192 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20193
20194 return UP(node);
20195 }
20196 case PM_TOKEN_STRING_BEGIN:
20197 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20198 case PM_TOKEN_SYMBOL_BEGIN: {
20199 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20200 parser_lex(parser);
20201
20202 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20203 }
20204 default: {
20205 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20206
20207 if (recoverable != PM_CONTEXT_NONE) {
20208 parser->recovering = true;
20209
20210 // If the given error is not the generic one, then we'll add it
20211 // here because it will provide more context in addition to the
20212 // recoverable error that we will also add.
20213 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20214 pm_parser_err_prefix(parser, diag_id);
20215 }
20216
20217 // If we get here, then we are assuming this token is closing a
20218 // parent context, so we'll indicate that to the user so that
20219 // they know how we behaved.
20220 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20221 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20222 // We're going to make a special case here, because "cannot
20223 // parse expression" is pretty generic, and we know here that we
20224 // have an unexpected token.
20225 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20226 } else {
20227 pm_parser_err_prefix(parser, diag_id);
20228 }
20229
20230 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
20231 }
20232 }
20233}
20234
20244static pm_node_t *
20245parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20246 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20247
20248 // Contradicting binding powers, the right-hand-side value of the assignment
20249 // allows the `rescue` modifier.
20250 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20251 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20252
20253 pm_token_t rescue = parser->current;
20254 parser_lex(parser);
20255
20256 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20257 context_pop(parser);
20258
20259 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20260 }
20261
20262 return value;
20263}
20264
20269static void
20270parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20271 switch (PM_NODE_TYPE(node)) {
20272 case PM_BEGIN_NODE: {
20273 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20274 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20275 break;
20276 }
20277 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20278 const pm_local_variable_write_node_t *cast = (const pm_local_variable_write_node_t *) node;
20279 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20280 break;
20281 }
20282 case PM_PARENTHESES_NODE: {
20283 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20284 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20285 break;
20286 }
20287 case PM_STATEMENTS_NODE: {
20288 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20289 const pm_node_t *statement;
20290
20291 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20292 parse_assignment_value_local(parser, statement);
20293 }
20294 break;
20295 }
20296 default:
20297 break;
20298 }
20299}
20300
20313static pm_node_t *
20314parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20315 bool permitted = true;
20316 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20317
20318 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20319 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20320
20321 parse_assignment_value_local(parser, value);
20322 bool single_value = true;
20323
20324 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20325 single_value = false;
20326
20327 pm_token_t opening = not_provided(parser);
20328 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20329
20330 pm_array_node_elements_append(array, value);
20331 value = UP(array);
20332
20333 while (accept1(parser, PM_TOKEN_COMMA)) {
20334 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20335
20336 pm_array_node_elements_append(array, element);
20337 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20338
20339 parse_assignment_value_local(parser, element);
20340 }
20341 }
20342
20343 // Contradicting binding powers, the right-hand-side value of the assignment
20344 // allows the `rescue` modifier.
20345 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20346 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20347
20348 pm_token_t rescue = parser->current;
20349 parser_lex(parser);
20350
20351 bool accepts_command_call_inner = false;
20352
20353 // RHS can accept command call iff the value is a call with arguments
20354 // but without parenthesis.
20355 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20356 pm_call_node_t *call_node = (pm_call_node_t *) value;
20357 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20358 accepts_command_call_inner = true;
20359 }
20360 }
20361
20362 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20363 context_pop(parser);
20364
20365 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20366 }
20367
20368 return value;
20369}
20370
20378static void
20379parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20380 if (call_node->arguments != NULL) {
20381 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20382 pm_node_unreference(parser, UP(call_node->arguments));
20383 pm_node_destroy(parser, UP(call_node->arguments));
20384 call_node->arguments = NULL;
20385 }
20386
20387 if (call_node->block != NULL) {
20388 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20389 pm_node_unreference(parser, UP(call_node->block));
20390 pm_node_destroy(parser, UP(call_node->block));
20391 call_node->block = NULL;
20392 }
20393}
20394
20399typedef struct {
20402
20404 pm_call_node_t *call;
20405
20407 pm_match_write_node_t *match;
20408
20411
20419
20420static inline const uint8_t *
20421pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20422 cursor++;
20423
20424 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20425 uint8_t value = escape_hexadecimal_digit(*cursor);
20426 cursor++;
20427
20428 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20429 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20430 cursor++;
20431 }
20432
20433 pm_buffer_append_byte(unescaped, value);
20434 } else {
20435 pm_buffer_append_string(unescaped, "\\x", 2);
20436 }
20437
20438 return cursor;
20439}
20440
20441static inline const uint8_t *
20442pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20443 uint8_t value = (uint8_t) (*cursor - '0');
20444 cursor++;
20445
20446 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20447 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20448 cursor++;
20449
20450 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20451 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20452 cursor++;
20453 }
20454 }
20455
20456 pm_buffer_append_byte(unescaped, value);
20457 return cursor;
20458}
20459
20460static inline const uint8_t *
20461pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20462 const uint8_t *start = cursor - 1;
20463 cursor++;
20464
20465 if (cursor >= end) {
20466 pm_buffer_append_string(unescaped, "\\u", 2);
20467 return cursor;
20468 }
20469
20470 if (*cursor != '{') {
20471 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20472 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20473
20474 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20475 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20476 }
20477
20478 return cursor + length;
20479 }
20480
20481 cursor++;
20482 for (;;) {
20483 while (cursor < end && *cursor == ' ') cursor++;
20484
20485 if (cursor >= end) break;
20486 if (*cursor == '}') {
20487 cursor++;
20488 break;
20489 }
20490
20491 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20492 if (length == 0) {
20493 break;
20494 }
20495 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20496
20497 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20498 cursor += length;
20499 }
20500
20501 return cursor;
20502}
20503
20504static void
20505pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20506 const uint8_t *end = source + length;
20507 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20508
20509 for (;;) {
20510 if (++cursor >= end) {
20511 pm_buffer_append_byte(unescaped, '\\');
20512 return;
20513 }
20514
20515 switch (*cursor) {
20516 case 'x':
20517 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20518 break;
20519 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20520 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20521 break;
20522 case 'u':
20523 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20524 break;
20525 default:
20526 pm_buffer_append_byte(unescaped, '\\');
20527 break;
20528 }
20529
20530 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20531 if (next_cursor == NULL) break;
20532
20533 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20534 cursor = next_cursor;
20535 }
20536
20537 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20538}
20539
20544static void
20545parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20547
20548 pm_parser_t *parser = callback_data->parser;
20549 pm_call_node_t *call = callback_data->call;
20550 pm_constant_id_list_t *names = &callback_data->names;
20551
20552 const uint8_t *source = pm_string_source(capture);
20553 size_t length = pm_string_length(capture);
20554 pm_buffer_t unescaped = { 0 };
20555
20556 // First, we need to handle escapes within the name of the capture group.
20557 // This is because regular expressions have three different representations
20558 // in prism. The first is the plain source code. The second is the
20559 // representation that will be sent to the regular expression engine, which
20560 // is the value of the "unescaped" field. This is poorly named, because it
20561 // actually still contains escapes, just a subset of them that the regular
20562 // expression engine knows how to handle. The third representation is fully
20563 // unescaped, which is what we need.
20564 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20565 if (PRISM_UNLIKELY(cursor != NULL)) {
20566 pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location);
20567 source = (const uint8_t *) pm_buffer_value(&unescaped);
20568 length = pm_buffer_length(&unescaped);
20569 }
20570
20571 pm_location_t location;
20572 pm_constant_id_t name;
20573
20574 // If the name of the capture group isn't a valid identifier, we do
20575 // not add it to the local table.
20576 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20577 pm_buffer_free(&unescaped);
20578 return;
20579 }
20580
20581 if (callback_data->shared) {
20582 // If the unescaped string is a slice of the source, then we can
20583 // copy the names directly. The pointers will line up.
20584 location = (pm_location_t) { .start = source, .end = source + length };
20585 name = pm_parser_constant_id_location(parser, location.start, location.end);
20586 } else {
20587 // Otherwise, the name is a slice of the malloc-ed owned string,
20588 // in which case we need to copy it out into a new string.
20589 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20590
20591 void *memory = xmalloc(length);
20592 if (memory == NULL) abort();
20593
20594 memcpy(memory, source, length);
20595 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20596 }
20597
20598 // Add this name to the list of constants if it is valid, not duplicated,
20599 // and not a keyword.
20600 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20601 pm_constant_id_list_append(names, name);
20602
20603 int depth;
20604 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20605 // If the local is not already a local but it is a keyword, then we
20606 // do not want to add a capture for this.
20607 if (pm_local_is_keyword((const char *) source, length)) {
20608 pm_buffer_free(&unescaped);
20609 return;
20610 }
20611
20612 // If the identifier is not already a local, then we will add it to
20613 // the local table.
20614 pm_parser_local_add(parser, name, location.start, location.end, 0);
20615 }
20616
20617 // Here we lazily create the MatchWriteNode since we know we're
20618 // about to add a target.
20619 if (callback_data->match == NULL) {
20620 callback_data->match = pm_match_write_node_create(parser, call);
20621 }
20622
20623 // Next, create the local variable target and add it to the list of
20624 // targets for the match.
20625 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth));
20626 pm_node_list_append(&callback_data->match->targets, target);
20627 }
20628
20629 pm_buffer_free(&unescaped);
20630}
20631
20636static pm_node_t *
20637parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20639 .parser = parser,
20640 .call = call,
20641 .names = { 0 },
20642 .shared = content->type == PM_STRING_SHARED
20643 };
20644
20646 .parser = parser,
20647 .start = call->receiver->location.start,
20648 .end = call->receiver->location.end,
20649 .shared = content->type == PM_STRING_SHARED
20650 };
20651
20652 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20653 pm_constant_id_list_free(&callback_data.names);
20654
20655 if (callback_data.match != NULL) {
20656 return UP(callback_data.match);
20657 } else {
20658 return UP(call);
20659 }
20660}
20661
20662static inline pm_node_t *
20663parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20664 pm_token_t token = parser->current;
20665
20666 switch (token.type) {
20667 case PM_TOKEN_EQUAL: {
20668 switch (PM_NODE_TYPE(node)) {
20669 case PM_CALL_NODE: {
20670 // If we have no arguments to the call node and we need this
20671 // to be a target then this is either a method call or a
20672 // local variable write. This _must_ happen before the value
20673 // is parsed because it could be referenced in the value.
20674 pm_call_node_t *call_node = (pm_call_node_t *) node;
20675 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20676 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20677 }
20678 }
20680 case PM_CASE_WRITABLE: {
20681 // When we have `it = value`, we need to add `it` as a local
20682 // variable before parsing the value, in case the value
20683 // references the variable.
20684 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20685 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
20686 }
20687
20688 parser_lex(parser);
20689 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20690
20691 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20692 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20693 }
20694
20695 return parse_write(parser, node, &token, value);
20696 }
20697 case PM_SPLAT_NODE: {
20698 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20699 pm_multi_target_node_targets_append(parser, multi_target, node);
20700
20701 parser_lex(parser);
20702 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20703 return parse_write(parser, UP(multi_target), &token, value);
20704 }
20705 case PM_SOURCE_ENCODING_NODE:
20706 case PM_FALSE_NODE:
20707 case PM_SOURCE_FILE_NODE:
20708 case PM_SOURCE_LINE_NODE:
20709 case PM_NIL_NODE:
20710 case PM_SELF_NODE:
20711 case PM_TRUE_NODE: {
20712 // In these special cases, we have specific error messages
20713 // and we will replace them with local variable writes.
20714 parser_lex(parser);
20715 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20716 return parse_unwriteable_write(parser, node, &token, value);
20717 }
20718 default:
20719 // In this case we have an = sign, but we don't know what
20720 // it's for. We need to treat it as an error. We'll mark it
20721 // as an error and skip past it.
20722 parser_lex(parser);
20723 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20724 return node;
20725 }
20726 }
20727 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20728 switch (PM_NODE_TYPE(node)) {
20729 case PM_BACK_REFERENCE_READ_NODE:
20730 case PM_NUMBERED_REFERENCE_READ_NODE:
20731 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20733 case PM_GLOBAL_VARIABLE_READ_NODE: {
20734 parser_lex(parser);
20735
20736 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20737 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20738
20739 pm_node_destroy(parser, node);
20740 return result;
20741 }
20742 case PM_CLASS_VARIABLE_READ_NODE: {
20743 parser_lex(parser);
20744
20745 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20746 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20747
20748 pm_node_destroy(parser, node);
20749 return result;
20750 }
20751 case PM_CONSTANT_PATH_NODE: {
20752 parser_lex(parser);
20753
20754 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20755 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20756
20757 return parse_shareable_constant_write(parser, write);
20758 }
20759 case PM_CONSTANT_READ_NODE: {
20760 parser_lex(parser);
20761
20762 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20763 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20764
20765 pm_node_destroy(parser, node);
20766 return parse_shareable_constant_write(parser, write);
20767 }
20768 case PM_INSTANCE_VARIABLE_READ_NODE: {
20769 parser_lex(parser);
20770
20771 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20772 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20773
20774 pm_node_destroy(parser, node);
20775 return result;
20776 }
20777 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20778 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20779 parser_lex(parser);
20780
20781 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20782 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20783
20784 pm_node_unreference(parser, node);
20785 pm_node_destroy(parser, node);
20786 return result;
20787 }
20788 case PM_LOCAL_VARIABLE_READ_NODE: {
20789 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20790 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20791 pm_node_unreference(parser, node);
20792 }
20793
20794 pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
20795 parser_lex(parser);
20796
20797 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20798 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20799
20800 pm_node_destroy(parser, node);
20801 return result;
20802 }
20803 case PM_CALL_NODE: {
20804 pm_call_node_t *cast = (pm_call_node_t *) node;
20805
20806 // If we have a vcall (a method with no arguments and no
20807 // receiver that could have been a local variable) then we
20808 // will transform it into a local variable write.
20809 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20810 pm_location_t *message_loc = &cast->message_loc;
20811 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20812
20813 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20814 parser_lex(parser);
20815
20816 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20817 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20818
20819 pm_node_destroy(parser, UP(cast));
20820 return result;
20821 }
20822
20823 // Move past the token here so that we have already added
20824 // the local variable by this point.
20825 parser_lex(parser);
20826
20827 // If there is no call operator and the message is "[]" then
20828 // this is an aref expression, and we can transform it into
20829 // an aset expression.
20830 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20831 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20832 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20833 }
20834
20835 // If this node cannot be writable, then we have an error.
20836 if (pm_call_node_writable_p(parser, cast)) {
20837 parse_write_name(parser, &cast->name);
20838 } else {
20839 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20840 }
20841
20842 parse_call_operator_write(parser, cast, &token);
20843 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20844 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20845 }
20846 case PM_MULTI_WRITE_NODE: {
20847 parser_lex(parser);
20848 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20849 return node;
20850 }
20851 default:
20852 parser_lex(parser);
20853
20854 // In this case we have an &&= sign, but we don't know what it's for.
20855 // We need to treat it as an error. For now, we'll mark it as an error
20856 // and just skip right past it.
20857 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20858 return node;
20859 }
20860 }
20861 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20862 switch (PM_NODE_TYPE(node)) {
20863 case PM_BACK_REFERENCE_READ_NODE:
20864 case PM_NUMBERED_REFERENCE_READ_NODE:
20865 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20867 case PM_GLOBAL_VARIABLE_READ_NODE: {
20868 parser_lex(parser);
20869
20870 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20871 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20872
20873 pm_node_destroy(parser, node);
20874 return result;
20875 }
20876 case PM_CLASS_VARIABLE_READ_NODE: {
20877 parser_lex(parser);
20878
20879 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20880 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20881
20882 pm_node_destroy(parser, node);
20883 return result;
20884 }
20885 case PM_CONSTANT_PATH_NODE: {
20886 parser_lex(parser);
20887
20888 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20889 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20890
20891 return parse_shareable_constant_write(parser, write);
20892 }
20893 case PM_CONSTANT_READ_NODE: {
20894 parser_lex(parser);
20895
20896 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20897 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20898
20899 pm_node_destroy(parser, node);
20900 return parse_shareable_constant_write(parser, write);
20901 }
20902 case PM_INSTANCE_VARIABLE_READ_NODE: {
20903 parser_lex(parser);
20904
20905 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20906 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20907
20908 pm_node_destroy(parser, node);
20909 return result;
20910 }
20911 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20912 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20913 parser_lex(parser);
20914
20915 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20916 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20917
20918 pm_node_unreference(parser, node);
20919 pm_node_destroy(parser, node);
20920 return result;
20921 }
20922 case PM_LOCAL_VARIABLE_READ_NODE: {
20923 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20924 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20925 pm_node_unreference(parser, node);
20926 }
20927
20928 pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
20929 parser_lex(parser);
20930
20931 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20932 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20933
20934 pm_node_destroy(parser, node);
20935 return result;
20936 }
20937 case PM_CALL_NODE: {
20938 pm_call_node_t *cast = (pm_call_node_t *) node;
20939
20940 // If we have a vcall (a method with no arguments and no
20941 // receiver that could have been a local variable) then we
20942 // will transform it into a local variable write.
20943 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20944 pm_location_t *message_loc = &cast->message_loc;
20945 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20946
20947 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20948 parser_lex(parser);
20949
20950 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20951 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20952
20953 pm_node_destroy(parser, UP(cast));
20954 return result;
20955 }
20956
20957 // Move past the token here so that we have already added
20958 // the local variable by this point.
20959 parser_lex(parser);
20960
20961 // If there is no call operator and the message is "[]" then
20962 // this is an aref expression, and we can transform it into
20963 // an aset expression.
20964 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20965 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20966 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20967 }
20968
20969 // If this node cannot be writable, then we have an error.
20970 if (pm_call_node_writable_p(parser, cast)) {
20971 parse_write_name(parser, &cast->name);
20972 } else {
20973 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20974 }
20975
20976 parse_call_operator_write(parser, cast, &token);
20977 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20978 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20979 }
20980 case PM_MULTI_WRITE_NODE: {
20981 parser_lex(parser);
20982 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20983 return node;
20984 }
20985 default:
20986 parser_lex(parser);
20987
20988 // In this case we have an ||= sign, but we don't know what it's for.
20989 // We need to treat it as an error. For now, we'll mark it as an error
20990 // and just skip right past it.
20991 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20992 return node;
20993 }
20994 }
20995 case PM_TOKEN_AMPERSAND_EQUAL:
20996 case PM_TOKEN_CARET_EQUAL:
20997 case PM_TOKEN_GREATER_GREATER_EQUAL:
20998 case PM_TOKEN_LESS_LESS_EQUAL:
20999 case PM_TOKEN_MINUS_EQUAL:
21000 case PM_TOKEN_PERCENT_EQUAL:
21001 case PM_TOKEN_PIPE_EQUAL:
21002 case PM_TOKEN_PLUS_EQUAL:
21003 case PM_TOKEN_SLASH_EQUAL:
21004 case PM_TOKEN_STAR_EQUAL:
21005 case PM_TOKEN_STAR_STAR_EQUAL: {
21006 switch (PM_NODE_TYPE(node)) {
21007 case PM_BACK_REFERENCE_READ_NODE:
21008 case PM_NUMBERED_REFERENCE_READ_NODE:
21009 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21011 case PM_GLOBAL_VARIABLE_READ_NODE: {
21012 parser_lex(parser);
21013
21014 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21015 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
21016
21017 pm_node_destroy(parser, node);
21018 return result;
21019 }
21020 case PM_CLASS_VARIABLE_READ_NODE: {
21021 parser_lex(parser);
21022
21023 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21024 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21025
21026 pm_node_destroy(parser, node);
21027 return result;
21028 }
21029 case PM_CONSTANT_PATH_NODE: {
21030 parser_lex(parser);
21031
21032 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21033 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21034
21035 return parse_shareable_constant_write(parser, write);
21036 }
21037 case PM_CONSTANT_READ_NODE: {
21038 parser_lex(parser);
21039
21040 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21041 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21042
21043 pm_node_destroy(parser, node);
21044 return parse_shareable_constant_write(parser, write);
21045 }
21046 case PM_INSTANCE_VARIABLE_READ_NODE: {
21047 parser_lex(parser);
21048
21049 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21050 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21051
21052 pm_node_destroy(parser, node);
21053 return result;
21054 }
21055 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21056 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21057 parser_lex(parser);
21058
21059 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21060 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
21061
21062 pm_node_unreference(parser, node);
21063 pm_node_destroy(parser, node);
21064 return result;
21065 }
21066 case PM_LOCAL_VARIABLE_READ_NODE: {
21067 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21068 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21069 pm_node_unreference(parser, node);
21070 }
21071
21072 pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
21073 parser_lex(parser);
21074
21075 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21076 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21077
21078 pm_node_destroy(parser, node);
21079 return result;
21080 }
21081 case PM_CALL_NODE: {
21082 parser_lex(parser);
21083 pm_call_node_t *cast = (pm_call_node_t *) node;
21084
21085 // If we have a vcall (a method with no arguments and no
21086 // receiver that could have been a local variable) then we
21087 // will transform it into a local variable write.
21088 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21089 pm_location_t *message_loc = &cast->message_loc;
21090 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21091
21092 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21093 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21094 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21095
21096 pm_node_destroy(parser, UP(cast));
21097 return result;
21098 }
21099
21100 // If there is no call operator and the message is "[]" then
21101 // this is an aref expression, and we can transform it into
21102 // an aset expression.
21103 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21104 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21105 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
21106 }
21107
21108 // If this node cannot be writable, then we have an error.
21109 if (pm_call_node_writable_p(parser, cast)) {
21110 parse_write_name(parser, &cast->name);
21111 } else {
21112 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21113 }
21114
21115 parse_call_operator_write(parser, cast, &token);
21116 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21117 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21118 }
21119 case PM_MULTI_WRITE_NODE: {
21120 parser_lex(parser);
21121 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21122 return node;
21123 }
21124 default:
21125 parser_lex(parser);
21126
21127 // In this case we have an operator but we don't know what it's for.
21128 // We need to treat it as an error. For now, we'll mark it as an error
21129 // and just skip right past it.
21130 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21131 return node;
21132 }
21133 }
21134 case PM_TOKEN_AMPERSAND_AMPERSAND:
21135 case PM_TOKEN_KEYWORD_AND: {
21136 parser_lex(parser);
21137
21138 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21139 return UP(pm_and_node_create(parser, node, &token, right));
21140 }
21141 case PM_TOKEN_KEYWORD_OR:
21142 case PM_TOKEN_PIPE_PIPE: {
21143 parser_lex(parser);
21144
21145 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21146 return UP(pm_or_node_create(parser, node, &token, right));
21147 }
21148 case PM_TOKEN_EQUAL_TILDE: {
21149 // Note that we _must_ parse the value before adding the local
21150 // variables in order to properly mirror the behavior of Ruby. For
21151 // example,
21152 //
21153 // /(?<foo>bar)/ =~ foo
21154 //
21155 // In this case, `foo` should be a method call and not a local yet.
21156 parser_lex(parser);
21157 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21158
21159 // By default, we're going to create a call node and then return it.
21160 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21161 pm_node_t *result = UP(call);
21162
21163 // If the receiver of this =~ is a regular expression node, then we
21164 // need to introduce local variables for it based on its named
21165 // capture groups.
21166 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21167 // It's possible to have an interpolated regular expression node
21168 // that only contains strings. This is because it can be split
21169 // up by a heredoc. In this case we need to concat the unescaped
21170 // strings together and then parse them as a regular expression.
21171 pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
21172
21173 bool interpolated = false;
21174 size_t total_length = 0;
21175
21176 pm_node_t *part;
21177 PM_NODE_LIST_FOREACH(parts, index, part) {
21178 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21179 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21180 } else {
21181 interpolated = true;
21182 break;
21183 }
21184 }
21185
21186 if (!interpolated && total_length > 0) {
21187 void *memory = xmalloc(total_length);
21188 if (!memory) abort();
21189
21190 uint8_t *cursor = memory;
21191 PM_NODE_LIST_FOREACH(parts, index, part) {
21192 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21193 size_t length = pm_string_length(unescaped);
21194
21195 memcpy(cursor, pm_string_source(unescaped), length);
21196 cursor += length;
21197 }
21198
21199 pm_string_t owned;
21200 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21201
21202 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21203 pm_string_free(&owned);
21204 }
21205 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21206 // If we have a regular expression node, then we can just parse
21207 // the named captures directly off the unescaped string.
21208 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21209 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21210 }
21211
21212 return result;
21213 }
21214 case PM_TOKEN_UAMPERSAND:
21215 case PM_TOKEN_USTAR:
21216 case PM_TOKEN_USTAR_STAR:
21217 // The only times this will occur are when we are in an error state,
21218 // but we'll put them in here so that errors can propagate.
21219 case PM_TOKEN_BANG_EQUAL:
21220 case PM_TOKEN_BANG_TILDE:
21221 case PM_TOKEN_EQUAL_EQUAL:
21222 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21223 case PM_TOKEN_LESS_EQUAL_GREATER:
21224 case PM_TOKEN_CARET:
21225 case PM_TOKEN_PIPE:
21226 case PM_TOKEN_AMPERSAND:
21227 case PM_TOKEN_GREATER_GREATER:
21228 case PM_TOKEN_LESS_LESS:
21229 case PM_TOKEN_MINUS:
21230 case PM_TOKEN_PLUS:
21231 case PM_TOKEN_PERCENT:
21232 case PM_TOKEN_SLASH:
21233 case PM_TOKEN_STAR:
21234 case PM_TOKEN_STAR_STAR: {
21235 parser_lex(parser);
21236 pm_token_t operator = parser->previous;
21237 switch (PM_NODE_TYPE(node)) {
21238 case PM_RESCUE_MODIFIER_NODE: {
21239 pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21240 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21241 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21242 }
21243 break;
21244 }
21245 case PM_AND_NODE: {
21246 pm_and_node_t *cast = (pm_and_node_t *) node;
21247 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21248 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21249 }
21250 break;
21251 }
21252 case PM_OR_NODE: {
21253 pm_or_node_t *cast = (pm_or_node_t *) node;
21254 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21255 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21256 }
21257 break;
21258 }
21259 default:
21260 break;
21261 }
21262
21263 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21264 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21265 }
21266 case PM_TOKEN_GREATER:
21267 case PM_TOKEN_GREATER_EQUAL:
21268 case PM_TOKEN_LESS:
21269 case PM_TOKEN_LESS_EQUAL: {
21270 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21271 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21272 }
21273
21274 parser_lex(parser);
21275 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21276 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21277 }
21278 case PM_TOKEN_AMPERSAND_DOT:
21279 case PM_TOKEN_DOT: {
21280 parser_lex(parser);
21281 pm_token_t operator = parser->previous;
21282 pm_arguments_t arguments = { 0 };
21283
21284 // This if statement handles the foo.() syntax.
21285 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21286 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21287 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21288 }
21289
21290 switch (PM_NODE_TYPE(node)) {
21291 case PM_RESCUE_MODIFIER_NODE: {
21292 pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21293 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21294 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21295 }
21296 break;
21297 }
21298 case PM_AND_NODE: {
21299 pm_and_node_t *cast = (pm_and_node_t *) node;
21300 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21301 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21302 }
21303 break;
21304 }
21305 case PM_OR_NODE: {
21306 pm_or_node_t *cast = (pm_or_node_t *) node;
21307 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21308 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21309 }
21310 break;
21311 }
21312 default:
21313 break;
21314 }
21315
21316 pm_token_t message;
21317
21318 switch (parser->current.type) {
21319 case PM_CASE_OPERATOR:
21320 case PM_CASE_KEYWORD:
21321 case PM_TOKEN_CONSTANT:
21322 case PM_TOKEN_IDENTIFIER:
21323 case PM_TOKEN_METHOD_NAME: {
21324 parser_lex(parser);
21325 message = parser->previous;
21326 break;
21327 }
21328 default: {
21329 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21330 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21331 }
21332 }
21333
21334 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21335 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21336
21337 if (
21338 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21339 arguments.arguments == NULL &&
21340 arguments.opening_loc.start == NULL &&
21341 match1(parser, PM_TOKEN_COMMA)
21342 ) {
21343 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21344 } else {
21345 return UP(call);
21346 }
21347 }
21348 case PM_TOKEN_DOT_DOT:
21349 case PM_TOKEN_DOT_DOT_DOT: {
21350 parser_lex(parser);
21351
21352 pm_node_t *right = NULL;
21353 if (token_begins_expression_p(parser->current.type)) {
21354 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21355 }
21356
21357 return UP(pm_range_node_create(parser, node, &token, right));
21358 }
21359 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21360 pm_token_t keyword = parser->current;
21361 parser_lex(parser);
21362
21363 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21364 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21365 }
21366 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21367 pm_token_t keyword = parser->current;
21368 parser_lex(parser);
21369
21370 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21371 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21372 }
21373 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21374 parser_lex(parser);
21375 pm_statements_node_t *statements = pm_statements_node_create(parser);
21376 pm_statements_node_body_append(parser, statements, node, true);
21377
21378 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21379 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21380 }
21381 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21382 parser_lex(parser);
21383 pm_statements_node_t *statements = pm_statements_node_create(parser);
21384 pm_statements_node_body_append(parser, statements, node, true);
21385
21386 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21387 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21388 }
21389 case PM_TOKEN_QUESTION_MARK: {
21390 context_push(parser, PM_CONTEXT_TERNARY);
21391 pm_node_list_t current_block_exits = { 0 };
21392 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21393
21394 pm_token_t qmark = parser->current;
21395 parser_lex(parser);
21396
21397 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21398
21399 if (parser->recovering) {
21400 // If parsing the true expression of this ternary resulted in a syntax
21401 // error that we can recover from, then we're going to put missing nodes
21402 // and tokens into the remaining places. We want to be sure to do this
21403 // before the `expect` function call to make sure it doesn't
21404 // accidentally move past a ':' token that occurs after the syntax
21405 // error.
21406 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21407 pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end));
21408
21409 context_pop(parser);
21410 pop_block_exits(parser, previous_block_exits);
21411 pm_node_list_free(&current_block_exits);
21412
21413 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21414 }
21415
21416 accept1(parser, PM_TOKEN_NEWLINE);
21417 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21418
21419 pm_token_t colon = parser->previous;
21420 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21421
21422 context_pop(parser);
21423 pop_block_exits(parser, previous_block_exits);
21424 pm_node_list_free(&current_block_exits);
21425
21426 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21427 }
21428 case PM_TOKEN_COLON_COLON: {
21429 parser_lex(parser);
21430 pm_token_t delimiter = parser->previous;
21431
21432 switch (parser->current.type) {
21433 case PM_TOKEN_CONSTANT: {
21434 parser_lex(parser);
21435 pm_node_t *path;
21436
21437 if (
21438 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21439 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21440 ) {
21441 // If we have a constant immediately following a '::' operator, then
21442 // this can either be a constant path or a method call, depending on
21443 // what follows the constant.
21444 //
21445 // If we have parentheses, then this is a method call. That would
21446 // look like Foo::Bar().
21447 pm_token_t message = parser->previous;
21448 pm_arguments_t arguments = { 0 };
21449
21450 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21451 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21452 } else {
21453 // Otherwise, this is a constant path. That would look like Foo::Bar.
21454 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21455 }
21456
21457 // If this is followed by a comma then it is a multiple assignment.
21458 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21459 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21460 }
21461
21462 return path;
21463 }
21464 case PM_CASE_OPERATOR:
21465 case PM_CASE_KEYWORD:
21466 case PM_TOKEN_IDENTIFIER:
21467 case PM_TOKEN_METHOD_NAME: {
21468 parser_lex(parser);
21469 pm_token_t message = parser->previous;
21470
21471 // If we have an identifier following a '::' operator, then it is for
21472 // sure a method call.
21473 pm_arguments_t arguments = { 0 };
21474 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21475 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21476
21477 // If this is followed by a comma then it is a multiple assignment.
21478 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21479 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21480 }
21481
21482 return UP(call);
21483 }
21484 case PM_TOKEN_PARENTHESIS_LEFT: {
21485 // If we have a parenthesis following a '::' operator, then it is the
21486 // method call shorthand. That would look like Foo::(bar).
21487 pm_arguments_t arguments = { 0 };
21488 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21489
21490 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21491 }
21492 default: {
21493 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21494 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21495 }
21496 }
21497 }
21498 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21499 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21500 parser_lex(parser);
21501 accept1(parser, PM_TOKEN_NEWLINE);
21502
21503 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21504 context_pop(parser);
21505
21506 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21507 }
21508 case PM_TOKEN_BRACKET_LEFT: {
21509 parser_lex(parser);
21510
21511 pm_arguments_t arguments = { 0 };
21512 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21513
21514 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21515 pm_accepts_block_stack_push(parser, true);
21516 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21517 pm_accepts_block_stack_pop(parser);
21518 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21519 }
21520
21521 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21522
21523 // If we have a comma after the closing bracket then this is a multiple
21524 // assignment and we should parse the targets.
21525 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21526 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21527 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21528 }
21529
21530 // If we're at the end of the arguments, we can now check if there is a
21531 // block node that starts with a {. If there is, then we can parse it and
21532 // add it to the arguments.
21533 pm_block_node_t *block = NULL;
21534 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21535 block = parse_block(parser, (uint16_t) (depth + 1));
21536 pm_arguments_validate_block(parser, &arguments, block);
21537 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21538 block = parse_block(parser, (uint16_t) (depth + 1));
21539 }
21540
21541 if (block != NULL) {
21542 if (arguments.block != NULL) {
21543 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21544 if (arguments.arguments == NULL) {
21545 arguments.arguments = pm_arguments_node_create(parser);
21546 }
21547 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21548 }
21549
21550 arguments.block = UP(block);
21551 }
21552
21553 return UP(pm_call_node_aref_create(parser, node, &arguments));
21554 }
21555 case PM_TOKEN_KEYWORD_IN: {
21556 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21557 parser->pattern_matching_newlines = true;
21558
21559 pm_token_t operator = parser->current;
21560 parser->command_start = false;
21561 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21562 parser_lex(parser);
21563
21564 pm_constant_id_list_t captures = { 0 };
21565 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21566
21567 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21568 pm_constant_id_list_free(&captures);
21569
21570 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21571 }
21572 case PM_TOKEN_EQUAL_GREATER: {
21573 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21574 parser->pattern_matching_newlines = true;
21575
21576 pm_token_t operator = parser->current;
21577 parser->command_start = false;
21578 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21579 parser_lex(parser);
21580
21581 pm_constant_id_list_t captures = { 0 };
21582 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21583
21584 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21585 pm_constant_id_list_free(&captures);
21586
21587 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21588 }
21589 default:
21590 assert(false && "unreachable");
21591 return NULL;
21592 }
21593}
21594
21595#undef PM_PARSE_PATTERN_SINGLE
21596#undef PM_PARSE_PATTERN_TOP
21597#undef PM_PARSE_PATTERN_MULTI
21598
21603static inline bool
21604pm_call_node_command_p(const pm_call_node_t *node) {
21605 return (
21606 (node->opening_loc.start == NULL) &&
21607 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21608 (node->arguments != NULL || node->block != NULL)
21609 );
21610}
21611
21620static pm_node_t *
21621parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21622 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21623 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21624 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
21625 }
21626
21627 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21628
21629 switch (PM_NODE_TYPE(node)) {
21630 case PM_MISSING_NODE:
21631 // If we found a syntax error, then the type of node returned by
21632 // parse_expression_prefix is going to be a missing node.
21633 return node;
21634 case PM_PRE_EXECUTION_NODE:
21635 case PM_POST_EXECUTION_NODE:
21636 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21637 case PM_ALIAS_METHOD_NODE:
21638 case PM_MULTI_WRITE_NODE:
21639 case PM_UNDEF_NODE:
21640 // These expressions are statements, and cannot be followed by
21641 // operators (except modifiers).
21642 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21643 return node;
21644 }
21645 break;
21646 case PM_CALL_NODE:
21647 // If we have a call node, then we need to check if it looks like a
21648 // method call without parentheses that contains arguments. If it
21649 // does, then it has different rules for parsing infix operators,
21650 // namely that it only accepts composition (and/or) and modifiers
21651 // (if/unless/etc.).
21652 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21653 return node;
21654 }
21655 break;
21656 case PM_SYMBOL_NODE:
21657 // If we have a symbol node that is being parsed as a label, then we
21658 // need to immediately return, because there should never be an
21659 // infix operator following this node.
21660 if (pm_symbol_node_label_p(node)) {
21661 return node;
21662 }
21663 break;
21664 default:
21665 break;
21666 }
21667
21668 // Otherwise we'll look and see if the next token can be parsed as an infix
21669 // operator. If it can, then we'll parse it using parse_expression_infix.
21670 pm_binding_powers_t current_binding_powers;
21671 pm_token_type_t current_token_type;
21672
21673 while (
21674 current_token_type = parser->current.type,
21675 current_binding_powers = pm_binding_powers[current_token_type],
21676 binding_power <= current_binding_powers.left &&
21677 current_binding_powers.binary
21678 ) {
21679 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21680
21681 switch (PM_NODE_TYPE(node)) {
21682 case PM_MULTI_WRITE_NODE:
21683 // Multi-write nodes are statements, and cannot be followed by
21684 // operators except modifiers.
21685 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21686 return node;
21687 }
21688 break;
21689 case PM_CLASS_VARIABLE_WRITE_NODE:
21690 case PM_CONSTANT_PATH_WRITE_NODE:
21691 case PM_CONSTANT_WRITE_NODE:
21692 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21693 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21694 case PM_LOCAL_VARIABLE_WRITE_NODE:
21695 // These expressions are statements, by virtue of the right-hand
21696 // side of their write being an implicit array.
21697 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21698 return node;
21699 }
21700 break;
21701 case PM_CALL_NODE:
21702 // These expressions are also statements, by virtue of the
21703 // right-hand side of the expression (i.e., the last argument to
21704 // the call node) being an implicit array.
21705 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21706 return node;
21707 }
21708 break;
21709 default:
21710 break;
21711 }
21712
21713 // If the operator is nonassoc and we should not be able to parse the
21714 // upcoming infix operator, break.
21715 if (current_binding_powers.nonassoc) {
21716 // If this is a non-assoc operator and we are about to parse the
21717 // exact same operator, then we need to add an error.
21718 if (match1(parser, current_token_type)) {
21719 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21720 break;
21721 }
21722
21723 // If this is an endless range, then we need to reject a couple of
21724 // additional operators because it violates the normal operator
21725 // precedence rules. Those patterns are:
21726 //
21727 // 1.. & 2
21728 // 1.. * 2
21729 //
21730 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21731 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21732 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21733 break;
21734 }
21735
21736 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21737 break;
21738 }
21739 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21740 break;
21741 }
21742 }
21743
21744 if (accepts_command_call) {
21745 // A command-style method call is only accepted on method chains.
21746 // Thus, we check whether the parsed node can continue method chains.
21747 // The method chain can continue if the parsed node is one of the following five kinds:
21748 // (1) index access: foo[1]
21749 // (2) attribute access: foo.bar
21750 // (3) method call with parenthesis: foo.bar(1)
21751 // (4) method call with a block: foo.bar do end
21752 // (5) constant path: foo::Bar
21753 switch (node->type) {
21754 case PM_CALL_NODE: {
21755 pm_call_node_t *cast = (pm_call_node_t *)node;
21756 if (
21757 // (1) foo[1]
21758 !(
21759 cast->call_operator_loc.start == NULL &&
21760 cast->message_loc.start != NULL &&
21761 cast->message_loc.start[0] == '[' &&
21762 cast->message_loc.end[-1] == ']'
21763 ) &&
21764 // (2) foo.bar
21765 !(
21766 cast->call_operator_loc.start != NULL &&
21767 cast->arguments == NULL &&
21768 cast->block == NULL &&
21769 cast->opening_loc.start == NULL
21770 ) &&
21771 // (3) foo.bar(1)
21772 !(
21773 cast->call_operator_loc.start != NULL &&
21774 cast->opening_loc.start != NULL
21775 ) &&
21776 // (4) foo.bar do end
21777 !(
21778 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21779 )
21780 ) {
21781 accepts_command_call = false;
21782 }
21783 break;
21784 }
21785 // (5) foo::Bar
21786 case PM_CONSTANT_PATH_NODE:
21787 break;
21788 default:
21789 accepts_command_call = false;
21790 break;
21791 }
21792 }
21793
21794 if (context_terminator(parser->current_context->context, &parser->current)) {
21795 pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
21796 if (
21797 !next_binding_powers.binary ||
21798 binding_power > next_binding_powers.left ||
21799 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
21800 ) {
21801 return node;
21802 }
21803 }
21804 }
21805
21806 return node;
21807}
21808
21813static pm_statements_node_t *
21814wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21815 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21816 if (statements == NULL) {
21817 statements = pm_statements_node_create(parser);
21818 }
21819
21820 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21821 pm_arguments_node_arguments_append(
21822 arguments,
21823 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
21824 );
21825
21826 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21827 parser,
21828 arguments,
21829 pm_parser_constant_id_constant(parser, "print", 5)
21830 )), true);
21831 }
21832
21833 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21834 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21835 if (statements == NULL) {
21836 statements = pm_statements_node_create(parser);
21837 }
21838
21839 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21840 pm_arguments_node_arguments_append(
21841 arguments,
21842 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
21843 );
21844
21845 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21846 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
21847
21848 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21849 parser,
21850 pm_parser_constant_id_constant(parser, "$F", 2),
21851 UP(call)
21852 );
21853
21854 pm_statements_node_body_prepend(statements, UP(write));
21855 }
21856
21857 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21858 pm_arguments_node_arguments_append(
21859 arguments,
21860 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
21861 );
21862
21863 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21864 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21865 pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create(
21866 parser,
21867 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
21868 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
21869 UP(pm_true_node_synthesized_create(parser))
21870 )));
21871
21872 pm_arguments_node_arguments_append(arguments, UP(keywords));
21873 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21874 }
21875
21876 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21877 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21878 parser,
21879 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
21880 statements
21881 )), true);
21882
21883 statements = wrapped_statements;
21884 }
21885
21886 return statements;
21887}
21888
21892static pm_node_t *
21893parse_program(pm_parser_t *parser) {
21894 // If the current scope is NULL, then we want to push a new top level scope.
21895 // The current scope could exist in the event that we are parsing an eval
21896 // and the user has passed into scopes that already exist.
21897 if (parser->current_scope == NULL) {
21898 pm_parser_scope_push(parser, true);
21899 }
21900
21901 pm_node_list_t current_block_exits = { 0 };
21902 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21903
21904 parser_lex(parser);
21905 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
21906
21907 if (statements != NULL && !parser->parsing_eval) {
21908 // If we have statements, then the top-level statement should be
21909 // explicitly checked as well. We have to do this here because
21910 // everywhere else we check all but the last statement.
21911 assert(statements->body.size > 0);
21912 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
21913 }
21914
21915 pm_constant_id_list_t locals;
21916 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
21917 pm_parser_scope_pop(parser);
21918
21919 // At the top level, see if we need to wrap the statements in a program
21920 // node with a while loop based on the options.
21922 statements = wrap_statements(parser, statements);
21923 } else {
21924 flush_block_exits(parser, previous_block_exits);
21925 }
21926
21927 pm_node_list_free(&current_block_exits);
21928
21929 // If this is an empty file, then we're still going to parse all of the
21930 // statements in order to gather up all of the comments and such. Here we'll
21931 // correct the location information.
21932 if (statements == NULL) {
21933 statements = pm_statements_node_create(parser);
21934 pm_statements_node_location_set(statements, parser->start, parser->start);
21935 }
21936
21937 return UP(pm_program_node_create(parser, &locals, statements));
21938}
21939
21940/******************************************************************************/
21941/* External functions */
21942/******************************************************************************/
21943
21953static const char *
21954pm_strnstr(const char *big, const char *little, size_t big_length) {
21955 size_t little_length = strlen(little);
21956
21957 for (const char *max = big + big_length - little_length; big <= max; big++) {
21958 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
21959 }
21960
21961 return NULL;
21962}
21963
21964#ifdef _WIN32
21965#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21966#else
21972static void
21973pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
21974 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
21975 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
21976 }
21977}
21978#endif
21979
21984static void
21985pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
21986 const char *switches = pm_strnstr(engine, " -", length);
21987 if (switches == NULL) return;
21988
21989 pm_options_t next_options = *options;
21990 options->shebang_callback(
21991 &next_options,
21992 (const uint8_t *) (switches + 1),
21993 length - ((size_t) (switches - engine)) - 1,
21994 options->shebang_callback_data
21995 );
21996
21997 size_t encoding_length;
21998 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
21999 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22000 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22001 }
22002
22003 parser->command_line = next_options.command_line;
22004 parser->frozen_string_literal = next_options.frozen_string_literal;
22005}
22006
22011pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22012 assert(source != NULL);
22013
22014 *parser = (pm_parser_t) {
22015 .node_id = 0,
22016 .lex_state = PM_LEX_STATE_BEG,
22017 .enclosure_nesting = 0,
22018 .lambda_enclosure_nesting = -1,
22019 .brace_nesting = 0,
22020 .do_loop_stack = 0,
22021 .accepts_block_stack = 0,
22022 .lex_modes = {
22023 .index = 0,
22024 .stack = {{ .mode = PM_LEX_DEFAULT }},
22025 .current = &parser->lex_modes.stack[0],
22026 },
22027 .start = source,
22028 .end = source + size,
22029 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22030 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22031 .next_start = NULL,
22032 .heredoc_end = NULL,
22033 .data_loc = { .start = NULL, .end = NULL },
22034 .comment_list = { 0 },
22035 .magic_comment_list = { 0 },
22036 .warning_list = { 0 },
22037 .error_list = { 0 },
22038 .current_scope = NULL,
22039 .current_context = NULL,
22040 .encoding = PM_ENCODING_UTF_8_ENTRY,
22041 .encoding_changed_callback = NULL,
22042 .encoding_comment_start = source,
22043 .lex_callback = NULL,
22044 .filepath = { 0 },
22045 .constant_pool = { 0 },
22046 .newline_list = { 0 },
22047 .integer_base = 0,
22048 .current_string = PM_STRING_EMPTY,
22049 .start_line = 1,
22050 .explicit_encoding = NULL,
22051 .command_line = 0,
22052 .parsing_eval = false,
22053 .partial_script = false,
22054 .command_start = true,
22055 .recovering = false,
22056 .encoding_locked = false,
22057 .encoding_changed = false,
22058 .pattern_matching_newlines = false,
22059 .in_keyword_arg = false,
22060 .current_block_exits = NULL,
22061 .semantic_token_seen = false,
22062 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22063 .current_regular_expression_ascii_only = false,
22064 .warn_mismatched_indentation = true
22065 };
22066
22067 // Initialize the constant pool. We're going to completely guess as to the
22068 // number of constants that we'll need based on the size of the input. The
22069 // ratio we chose here is actually less arbitrary than you might think.
22070 //
22071 // We took ~50K Ruby files and measured the size of the file versus the
22072 // number of constants that were found in those files. Then we found the
22073 // average and standard deviation of the ratios of constants/bytesize. Then
22074 // we added 1.34 standard deviations to the average to get a ratio that
22075 // would fit 75% of the files (for a two-tailed distribution). This works
22076 // because there was about a 0.77 correlation and the distribution was
22077 // roughly normal.
22078 //
22079 // This ratio will need to change if we add more constants to the constant
22080 // pool for another node type.
22081 uint32_t constant_size = ((uint32_t) size) / 95;
22082 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22083
22084 // Initialize the newline list. Similar to the constant pool, we're going to
22085 // guess at the number of newlines that we'll need based on the size of the
22086 // input.
22087 size_t newline_size = size / 22;
22088 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22089
22090 // If options were provided to this parse, establish them here.
22091 if (options != NULL) {
22092 // filepath option
22093 parser->filepath = options->filepath;
22094
22095 // line option
22096 parser->start_line = options->line;
22097
22098 // encoding option
22099 size_t encoding_length = pm_string_length(&options->encoding);
22100 if (encoding_length > 0) {
22101 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22102 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22103 }
22104
22105 // encoding_locked option
22106 parser->encoding_locked = options->encoding_locked;
22107
22108 // frozen_string_literal option
22110
22111 // command_line option
22112 parser->command_line = options->command_line;
22113
22114 // version option
22115 parser->version = options->version;
22116
22117 // partial_script
22118 parser->partial_script = options->partial_script;
22119
22120 // scopes option
22121 parser->parsing_eval = options->scopes_count > 0;
22122 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22123
22124 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22125 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22126 pm_parser_scope_push(parser, scope_index == 0);
22127
22128 // Scopes given from the outside are not allowed to have numbered
22129 // parameters.
22130 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22131
22132 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22133 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22134
22135 const uint8_t *source = pm_string_source(local);
22136 size_t length = pm_string_length(local);
22137
22138 void *allocated = xmalloc(length);
22139 if (allocated == NULL) continue;
22140
22141 memcpy(allocated, source, length);
22142 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22143 }
22144 }
22145 }
22146
22147 // Now that we have established the user-provided options, check if
22148 // a version was given and parse as the latest version otherwise.
22149 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22151 }
22152
22153 pm_accepts_block_stack_push(parser, true);
22154
22155 // Skip past the UTF-8 BOM if it exists.
22156 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22157 parser->current.end += 3;
22158 parser->encoding_comment_start += 3;
22159
22160 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22162 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22163 }
22164 }
22165
22166 // If the -x command line flag is set, or the first shebang of the file does
22167 // not include "ruby", then we'll search for a shebang that does include
22168 // "ruby" and start parsing from there.
22169 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22170
22171 // If the first two bytes of the source are a shebang, then we will do a bit
22172 // of extra processing.
22173 //
22174 // First, we'll indicate that the encoding comment is at the end of the
22175 // shebang. This means that when a shebang is present the encoding comment
22176 // can begin on the second line.
22177 //
22178 // Second, we will check if the shebang includes "ruby". If it does, then we
22179 // we will start parsing from there. We will also potentially warning the
22180 // user if there is a carriage return at the end of the shebang. We will
22181 // also potentially call the shebang callback if this is the main script to
22182 // allow the caller to parse the shebang and find any command-line options.
22183 // If the shebang does not include "ruby" and this is the main script being
22184 // parsed, then we will start searching the file for a shebang that does
22185 // contain "ruby" as if -x were passed on the command line.
22186 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22187 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22188
22189 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22190 const char *engine;
22191
22192 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22193 if (newline != NULL) {
22194 parser->encoding_comment_start = newline + 1;
22195
22196 if (options == NULL || options->main_script) {
22197 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22198 }
22199 }
22200
22201 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22202 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22203 }
22204
22205 search_shebang = false;
22206 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22207 search_shebang = true;
22208 }
22209 }
22210
22211 // Here we're going to find the first shebang that includes "ruby" and start
22212 // parsing from there.
22213 if (search_shebang) {
22214 // If a shebang that includes "ruby" is not found, then we're going to a
22215 // a load error to the list of errors on the parser.
22216 bool found_shebang = false;
22217
22218 // This is going to point to the start of each line as we check it.
22219 // We'll maintain a moving window looking at each line at they come.
22220 const uint8_t *cursor = parser->start;
22221
22222 // The newline pointer points to the end of the current line that we're
22223 // considering. If it is NULL, then we're at the end of the file.
22224 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22225
22226 while (newline != NULL) {
22227 pm_newline_list_append(&parser->newline_list, newline);
22228
22229 cursor = newline + 1;
22230 newline = next_newline(cursor, parser->end - cursor);
22231
22232 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22233 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22234 const char *engine;
22235 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22236 found_shebang = true;
22237
22238 if (newline != NULL) {
22239 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22240 parser->encoding_comment_start = newline + 1;
22241 }
22242
22243 if (options != NULL && options->shebang_callback != NULL) {
22244 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22245 }
22246
22247 break;
22248 }
22249 }
22250 }
22251
22252 if (found_shebang) {
22253 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22254 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22255 } else {
22256 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22257 pm_newline_list_clear(&parser->newline_list);
22258 }
22259 }
22260
22261 // The encoding comment can start after any amount of inline whitespace, so
22262 // here we'll advance it to the first non-inline-whitespace character so
22263 // that it is ready for future comparisons.
22264 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22265}
22266
22275
22279static inline void
22280pm_comment_list_free(pm_list_t *list) {
22281 pm_list_node_t *node, *next;
22282
22283 for (node = list->head; node != NULL; node = next) {
22284 next = node->next;
22285
22286 pm_comment_t *comment = (pm_comment_t *) node;
22287 xfree(comment);
22288 }
22289}
22290
22294static inline void
22295pm_magic_comment_list_free(pm_list_t *list) {
22296 pm_list_node_t *node, *next;
22297
22298 for (node = list->head; node != NULL; node = next) {
22299 next = node->next;
22300
22303 }
22304}
22305
22311 pm_string_free(&parser->filepath);
22312 pm_diagnostic_list_free(&parser->error_list);
22313 pm_diagnostic_list_free(&parser->warning_list);
22314 pm_comment_list_free(&parser->comment_list);
22315 pm_magic_comment_list_free(&parser->magic_comment_list);
22316 pm_constant_pool_free(&parser->constant_pool);
22317 pm_newline_list_free(&parser->newline_list);
22318
22319 while (parser->current_scope != NULL) {
22320 // Normally, popping the scope doesn't free the locals since it is
22321 // assumed that ownership has transferred to the AST. However if we have
22322 // scopes while we're freeing the parser, it's likely they came from
22323 // eval scopes and we need to free them explicitly here.
22324 pm_parser_scope_pop(parser);
22325 }
22326
22327 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22328 lex_mode_pop(parser);
22329 }
22330}
22331
22335PRISM_EXPORTED_FUNCTION pm_node_t *
22337 return parse_program(parser);
22338}
22339
22345static bool
22346pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22347#define LINE_SIZE 4096
22348 char line[LINE_SIZE];
22349
22350 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22351 size_t length = LINE_SIZE;
22352 while (length > 0 && line[length - 1] == '\n') length--;
22353
22354 if (length == LINE_SIZE) {
22355 // If we read a line that is the maximum size and it doesn't end
22356 // with a newline, then we'll just append it to the buffer and
22357 // continue reading.
22358 length--;
22359 pm_buffer_append_string(buffer, line, length);
22360 continue;
22361 }
22362
22363 // Append the line to the buffer.
22364 length--;
22365 pm_buffer_append_string(buffer, line, length);
22366
22367 // Check if the line matches the __END__ marker. If it does, then stop
22368 // reading and return false. In most circumstances, this means we should
22369 // stop reading from the stream so that the DATA constant can pick it
22370 // up.
22371 switch (length) {
22372 case 7:
22373 if (strncmp(line, "__END__", 7) == 0) return false;
22374 break;
22375 case 8:
22376 if (strncmp(line, "__END__\n", 8) == 0) return false;
22377 break;
22378 case 9:
22379 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22380 break;
22381 }
22382
22383 // All data should be read via gets. If the string returned by gets
22384 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22385 if (stream_feof(stream)) {
22386 break;
22387 }
22388 }
22389
22390 return true;
22391#undef LINE_SIZE
22392}
22393
22403static bool
22404pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22405 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22406
22407 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22408 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22409 return true;
22410 }
22411 }
22412
22413 return false;
22414}
22415
22422PRISM_EXPORTED_FUNCTION pm_node_t *
22423pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22424 pm_buffer_init(buffer);
22425
22426 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22427
22428 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22429 pm_node_t *node = pm_parse(parser);
22430
22431 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22432 pm_node_destroy(parser, node);
22433 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22434
22435 pm_parser_free(parser);
22436 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22437 node = pm_parse(parser);
22438 }
22439
22440 return node;
22441}
22442
22447pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22448 pm_options_t options = { 0 };
22449 pm_options_read(&options, data);
22450
22451 pm_parser_t parser;
22452 pm_parser_init(&parser, source, size, &options);
22453
22454 pm_node_t *node = pm_parse(&parser);
22455 pm_node_destroy(&parser, node);
22456
22457 bool result = parser.error_list.size == 0;
22458 pm_parser_free(&parser);
22459 pm_options_free(&options);
22460
22461 return result;
22462}
22463
22464#undef PM_CASE_KEYWORD
22465#undef PM_CASE_OPERATOR
22466#undef PM_CASE_WRITABLE
22467#undef PM_STRING_EMPTY
22468
22469// We optionally support serializing to a binary string. For systems that don't
22470// want or need this functionality, it can be turned off with the
22471// PRISM_EXCLUDE_SERIALIZATION define.
22472#ifndef PRISM_EXCLUDE_SERIALIZATION
22473
22474static inline void
22475pm_serialize_header(pm_buffer_t *buffer) {
22476 pm_buffer_append_string(buffer, "PRISM", 5);
22477 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22478 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22479 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22480 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22481}
22482
22487pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22488 pm_serialize_header(buffer);
22489 pm_serialize_content(parser, node, buffer);
22490 pm_buffer_append_byte(buffer, '\0');
22491}
22492
22498pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22499 pm_options_t options = { 0 };
22500 pm_options_read(&options, data);
22501
22502 pm_parser_t parser;
22503 pm_parser_init(&parser, source, size, &options);
22504
22505 pm_node_t *node = pm_parse(&parser);
22506
22507 pm_serialize_header(buffer);
22508 pm_serialize_content(&parser, node, buffer);
22509 pm_buffer_append_byte(buffer, '\0');
22510
22511 pm_node_destroy(&parser, node);
22512 pm_parser_free(&parser);
22513 pm_options_free(&options);
22514}
22515
22521pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22522 pm_parser_t parser;
22523 pm_options_t options = { 0 };
22524 pm_options_read(&options, data);
22525
22526 pm_buffer_t parser_buffer;
22527 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22528 pm_serialize_header(buffer);
22529 pm_serialize_content(&parser, node, buffer);
22530 pm_buffer_append_byte(buffer, '\0');
22531
22532 pm_node_destroy(&parser, node);
22533 pm_buffer_free(&parser_buffer);
22534 pm_parser_free(&parser);
22535 pm_options_free(&options);
22536}
22537
22542pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22543 pm_options_t options = { 0 };
22544 pm_options_read(&options, data);
22545
22546 pm_parser_t parser;
22547 pm_parser_init(&parser, source, size, &options);
22548
22549 pm_node_t *node = pm_parse(&parser);
22550 pm_serialize_header(buffer);
22551 pm_serialize_encoding(parser.encoding, buffer);
22552 pm_buffer_append_varsint(buffer, parser.start_line);
22553 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22554
22555 pm_node_destroy(&parser, node);
22556 pm_parser_free(&parser);
22557 pm_options_free(&options);
22558}
22559
22560#endif
22561
22562/******************************************************************************/
22563/* Slice queries for the Ruby API */
22564/******************************************************************************/
22565
22567typedef enum {
22569 PM_SLICE_TYPE_ERROR = -1,
22570
22572 PM_SLICE_TYPE_NONE,
22573
22575 PM_SLICE_TYPE_LOCAL,
22576
22578 PM_SLICE_TYPE_CONSTANT,
22579
22581 PM_SLICE_TYPE_METHOD_NAME
22582} pm_slice_type_t;
22583
22587pm_slice_type_t
22588pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22589 // first, get the right encoding object
22590 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22591 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22592
22593 // check that there is at least one character
22594 if (length == 0) return PM_SLICE_TYPE_NONE;
22595
22596 size_t width;
22597 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22598 // valid because alphabetical
22599 } else if (*source == '_') {
22600 // valid because underscore
22601 width = 1;
22602 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22603 // valid because multibyte
22604 } else {
22605 // invalid because no match
22606 return PM_SLICE_TYPE_NONE;
22607 }
22608
22609 // determine the type of the slice based on the first character
22610 const uint8_t *end = source + length;
22611 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22612
22613 // next, iterate through all of the bytes of the string to ensure that they
22614 // are all valid identifier characters
22615 source += width;
22616
22617 while (source < end) {
22618 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22619 // valid because alphanumeric
22620 source += width;
22621 } else if (*source == '_') {
22622 // valid because underscore
22623 source++;
22624 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22625 // valid because multibyte
22626 source += width;
22627 } else {
22628 // invalid because no match
22629 break;
22630 }
22631 }
22632
22633 // accept a ! or ? at the end of the slice as a method name
22634 if (*source == '!' || *source == '?' || *source == '=') {
22635 source++;
22636 result = PM_SLICE_TYPE_METHOD_NAME;
22637 }
22638
22639 // valid if we are at the end of the slice
22640 return source == end ? result : PM_SLICE_TYPE_NONE;
22641}
22642
22647pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22648 switch (pm_slice_type(source, length, encoding_name)) {
22649 case PM_SLICE_TYPE_ERROR:
22650 return PM_STRING_QUERY_ERROR;
22651 case PM_SLICE_TYPE_NONE:
22652 case PM_SLICE_TYPE_CONSTANT:
22653 case PM_SLICE_TYPE_METHOD_NAME:
22654 return PM_STRING_QUERY_FALSE;
22655 case PM_SLICE_TYPE_LOCAL:
22656 return PM_STRING_QUERY_TRUE;
22657 }
22658
22659 assert(false && "unreachable");
22660 return PM_STRING_QUERY_FALSE;
22661}
22662
22667pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22668 switch (pm_slice_type(source, length, encoding_name)) {
22669 case PM_SLICE_TYPE_ERROR:
22670 return PM_STRING_QUERY_ERROR;
22671 case PM_SLICE_TYPE_NONE:
22672 case PM_SLICE_TYPE_LOCAL:
22673 case PM_SLICE_TYPE_METHOD_NAME:
22674 return PM_STRING_QUERY_FALSE;
22675 case PM_SLICE_TYPE_CONSTANT:
22676 return PM_STRING_QUERY_TRUE;
22677 }
22678
22679 assert(false && "unreachable");
22680 return PM_STRING_QUERY_FALSE;
22681}
22682
22687pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22688#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22689#define C1(c) (*source == c)
22690#define C2(s) (memcmp(source, s, 2) == 0)
22691#define C3(s) (memcmp(source, s, 3) == 0)
22692
22693 switch (pm_slice_type(source, length, encoding_name)) {
22694 case PM_SLICE_TYPE_ERROR:
22695 return PM_STRING_QUERY_ERROR;
22696 case PM_SLICE_TYPE_NONE:
22697 break;
22698 case PM_SLICE_TYPE_LOCAL:
22699 // numbered parameters are not valid method names
22700 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22701 case PM_SLICE_TYPE_CONSTANT:
22702 // all constants are valid method names
22703 case PM_SLICE_TYPE_METHOD_NAME:
22704 // all method names are valid method names
22705 return PM_STRING_QUERY_TRUE;
22706 }
22707
22708 switch (length) {
22709 case 1:
22710 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22711 case 2:
22712 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22713 case 3:
22714 return B(C3("===") || C3("<=>") || C3("[]="));
22715 default:
22716 return PM_STRING_QUERY_FALSE;
22717 }
22718
22719#undef B
22720#undef C1
22721#undef C2
22722#undef C3
22723}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
struct pm_options_scope pm_options_scope_t
A scope of locals surrounding the code that is being parsed.
struct pm_options pm_options_t
The options that can be passed to the parser.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:225
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:231
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:104
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
Definition options.h:98
struct pm_locals pm_locals_t
This is a set of local variables in a certain lexical context (method, class, module,...
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_lex_mode pm_lex_mode_t
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:499
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:324
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:351
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:336
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:348
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:360
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:306
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:387
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:339
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:312
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:381
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:417
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:432
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:405
@ PM_CONTEXT_IF
an if statement
Definition parser.h:363
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:399
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:378
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:366
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:402
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
Definition parser.h:303
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:426
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:411
@ PM_CONTEXT_DEFINED
a defined?
Definition parser.h:342
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:435
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:357
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:408
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:345
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:315
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:384
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:369
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:414
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:354
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:429
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:396
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:438
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:569
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:525
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:451
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
struct pm_list_node pm_list_node_t
This struct represents an abstract linked list that provides common functionality.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:265
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:273
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:267
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:270
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2147
int pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:109
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2124
char * pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:102
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2054
PRISM_EXPORTED_FUNCTION const char * pm_version(void)
The prism version and the serialization format.
Definition prism.c:7
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17387
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17389
const uint8_t * start
The start of the regular expression.
Definition prism.c:17392
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17403
const uint8_t * end
The end of the regular expression.
Definition prism.c:17395
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20399
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20410
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20401
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20407
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20404
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20417
struct pm_node * left
AndNode::left.
Definition ast.h:1288
struct pm_node * right
AndNode::right.
Definition ast.h:1301
pm_node_t base
The embedded base node.
Definition ast.h:1335
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1346
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1586
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1597
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1600
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1588
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1591
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1594
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1374
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1444
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1484
pm_node_t base
The embedded base node.
Definition ast.h:1427
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1494
struct pm_node * value
AssocNode::value.
Definition ast.h:1541
struct pm_node * key
AssocNode::key.
Definition ast.h:1528
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1688
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1668
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1658
pm_node_t base
The embedded base node.
Definition ast.h:1637
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1678
This struct represents a set of binding powers used for a given token.
Definition prism.c:12184
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12192
pm_binding_power_t left
The left binding power.
Definition prism.c:12186
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12198
pm_binding_power_t right
The right binding power.
Definition prism.c:12189
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2200
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2220
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2158
pm_constant_id_t name
CallNode::name.
Definition ast.h:2181
pm_node_t base
The embedded base node.
Definition ast.h:2141
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2233
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2171
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2191
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2210
struct pm_node * block
CallNode::block.
Definition ast.h:2243
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2601
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2671
pm_location_t location
The location of the comment in the source.
Definition parser.h:466
A list of constant IDs.
size_t size
The number of constant ids in the list.
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
pm_context_t context
The context that this node represents.
Definition parser.h:444
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:447
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:368
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3686
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3784
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3844
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3896
pm_node_t base
The embedded base node.
Definition ast.h:3833
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3909
double value
FloatNode::value.
Definition ast.h:3970
pm_node_t base
The embedded base node.
Definition ast.h:3962
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4403
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4492
pm_node_t base
The embedded base node.
Definition ast.h:4439
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4505
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4453
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4586
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4605
pm_integer_t value
IntegerNode::value.
Definition ast.h:5252
pm_node_t base
The embedded base node.
Definition ast.h:5244
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
pm_node_t base
The embedded base node.
Definition ast.h:5365
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5371
pm_node_t base
The embedded base node.
Definition ast.h:5398
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5437
pm_node_t base
The embedded base node.
Definition ast.h:5431
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5442
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:521
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:515
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
union pm_lex_mode::@303336126360075302344075121136356113360170030306 as
The data associated with this type of lex mode.
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
enum pm_lex_mode::@204051102252353332352362146052355003264223055126 mode
The type of this lex mode.
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:535
pm_constant_id_t name
The name of the local variable.
Definition parser.h:537
pm_location_t location
The location of the local variable in the source.
Definition parser.h:540
uint32_t hash
The hash of the local variable.
Definition parser.h:549
uint32_t index
The index of the local variable in the local table.
Definition parser.h:543
uint32_t reads
The number of times the local variable is read.
Definition parser.h:546
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5774
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5761
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5847
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5834
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:565
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:562
uint32_t size
The number of local variables in the set.
Definition parser.h:559
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:478
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6083
pm_node_t base
The embedded base node.
Definition ast.h:6168
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6224
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6184
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6234
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1057
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1075
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:159
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:121
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:175
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:182
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:136
int32_t line
The line within the file that the parse starts on.
Definition options.h:130
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:115
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:168
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:192
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:141
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:124
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:156
struct pm_node * left
OrNode::left.
Definition ast.h:6576
struct pm_node * right
OrNode::right.
Definition ast.h:6589
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6633
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6653
pm_node_t base
The embedded base node.
Definition ast.h:6617
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6648
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6679
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:843
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:652
uint8_t command_line
The command line flags given from the options.
Definition parser.h:862
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Initiate the parser with the given parser.
Definition prism.c:22336
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:758
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:885
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22272
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:912
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:697
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:891
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:800
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:933
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:789
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:915
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22310
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:710
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:752
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:724
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:22423
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:661
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:777
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:859
pm_token_t previous
The previous token we were considering.
Definition parser.h:700
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:806
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:878
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:927
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:906
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:731
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:743
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:694
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:655
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:737
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:872
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:856
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:771
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:687
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:734
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:718
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:22011
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:667
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:765
int32_t start_line
The line number at the start of the parse.
Definition parser.h:812
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:899
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:684
struct pm_parser::@236040131255244317313236162207277265316171136011 lex_modes
A stack of lex modes.
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:721
size_t index
The current index into the lexer mode stack.
Definition parser.h:690
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:783
pm_scope_t * current_scope
The current local scope.
Definition parser.h:740
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:888
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:792
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:921
struct pm_node * right
RangeNode::right.
Definition ast.h:6937
struct pm_node * left
RangeNode::left.
Definition ast.h:6923
pm_node_t base
The embedded base node.
Definition ast.h:6967
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6977
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9491
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9496
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9493
pm_node_t base
The embedded base node.
Definition ast.h:7034
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:7055
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7147
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7205
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7195
pm_node_t base
The embedded base node.
Definition ast.h:7169
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:585
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:596
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:623
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:588
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:617
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:629
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7480
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7503
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
pm_node_t base
The embedded base node.
Definition ast.h:7532
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7553
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7548
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7538
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@346265266332060241255337121126133217326336224105 type
The type of the string.
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7639
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7649
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9465
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9470
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9476
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7763
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7773