libpulsar
A modular compiler for the pulsar programming language
Loading...
Searching...
No Matches
parser.c
Go to the documentation of this file.
1// Copyright (C) 2023 Ethan Uppal. All rights reserved.
2
3// TODO: this whole file
4
5#include <stddef.h> // NULL
6#include <math.h> // pow
7#include "parser.h"
8#include "tstream_short.h"
9#include "error/error.h"
10#include "util/arena.h"
11#include "util/abort.h"
14
15#define try_get_float(from, bind_to, do_) \
16 if (sscanf(from, "%lf", &bind_to) == 1) { \
17 do_ \
18 }
19
20#define PS_GROUP_MISMATCH_REFER(tkn) \
21 ps_error(PS_SCOPE_NOTE, PS_ECODE_MISMATCH, ts->file_ctx->buffer, \
22 (tkn)->loc, (tkn)->length, "to match opening parentheses", \
23 "Group opened here", NULL)
24
25#define PS_END_STM(ctx) \
26 do { \
27 if (!ps_tstream_peek(ts, 0, PS_TOKEN_RBRACE)) { \
28 match(PS_TOKEN_NL, "at end of let statement") else { \
29 return NULL; \
30 } \
31 } \
32 } while (0)
33
34static enum ps_token_type literals_token_types[] = {
35 PS_TOKEN_LIT_UNIT,
36 PS_TOKEN_LIT_BIN,
37 PS_TOKEN_LIT_OCT,
38 PS_TOKEN_LIT_DEC,
39 PS_TOKEN_LIT_HEX,
40 PS_TOKEN_LIT_FLT,
41 PS_TOKEN_LIT_CHR,
42 PS_TOKEN_LIT_STR,
43 PS_TOKEN_LIT_STR_INTERP,
44 PS_TOKEN_LIT_FALSE,
45 PS_TOKEN_LIT_TRUE,
46};
47
48static enum ps_token_type funcs_token_types[] = {
49 PS_TOKEN_FUNC,
50 PS_TOKEN_FUNC_INLINE,
51 PS_TOKEN_FUNC_PRIVATE,
52 PS_TOKEN_FUNC_PUBLIC,
53 PS_TOKEN_FUNC_STATIC,
54};
55
59static void ps_parser_skip(struct ps_tstream* ts) {
60 while (ps_tstream_peek(ts, 0, PS_TOKEN_NL)) {
61 adv();
62 }
63}
64
65struct ps_type_tuple* ps_parse_type_tuple(struct ps_tstream* ts) {
66 // (
67 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_LPAR) != NULL, "precondition");
68 struct ps_token* open = tcur();
69 adv();
70
71 // <type>, <type2>, ...
72 struct ps_type_tuple* tuple = ps_type_tuple_new();
73 while (!ps_tstream_peek(ts, 0, PS_TOKEN_RPAR)) {
74 // get next type element
75 struct ps_type* next = ps_parse_type_primary(ts);
76 if (!next) {
77 return NULL;
78 }
79 ps_push(&tuple, next);
80
81 // if after a type we don't have a ), we should have a comma
82 if (!ps_tstream_peek(ts, 0, PS_TOKEN_RPAR)) {
83 match(PS_TOKEN_COMMA, "separating tuple type elements") else {
84 return NULL;
85 }
86 }
87 }
88
89 // )
90 match(PS_TOKEN_RPAR, "at end of tuple") else {
92 return NULL;
93 }
94
95 return tuple;
96}
97
99 struct ps_token* token = tcur();
100 if (!token) {
101 ps_tstream_eof_error(ts, PS_ECODE_INVALID_TYPE,
102 "where type was expected", NULL);
103 return NULL;
104 }
105 switch (token->type) {
106 case PS_TOKEN_ID:
107 adv();
108 return ps_type_from_name(token);
109 case PS_TOKEN_LIT_UNIT:
110 adv();
111 return ps_type_prim_new(PS_TYPE_UNIT);
112 case PS_TOKEN_LPAR:
114 default:
115 ps_error(PS_SCOPE_ERROR, PS_ECODE_INVALID_TYPE,
116 ts->file_ctx->buffer, token->loc, token->length,
117 "Invalid start of type", NULL, NULL);
118 return NULL;
119 }
120}
121
122struct ps_type* ps_parse_type(struct ps_tstream* ts) {
123 struct ps_type* type = ps_parse_type_primary(ts);
124 if (!type) {
125 return NULL;
126 }
127 // while (ps_tstream_peek(ts, 0, PS_TOKEN_ARROW)) {
128
129 // }
130 return type;
131}
132
133struct ps_name* ps_parse_name(struct ps_tstream* ts, struct ps_token* first) {
134 struct ps_name* name = ps_name_new(); // leak
135
136 ps_push(&name, first);
137
138 while (ps_tstream_peek(ts, 0, PS_TOKEN_SCOPE)) {
139 adv();
140 bindmatch(next, PS_TOKEN_ID, "after '::' in name") else {
141 return NULL;
142 }
143 ps_push(&name, next);
144 }
145
146 return name;
147}
148
149struct ps_node_block* ps_parse_block(struct ps_tstream* ts) {
150 // remember to use ps_parser_skip so that extraneous newlines get ignored
151
152 // {
153 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_LBRACE) != NULL, "precondition");
154 struct ps_token* open = tcur();
155 adv();
156
157 ps_parser_skip(ts);
158
159 struct ps_node_block* block = ps_node_block_new();
160 // <statements>...
161 while (!ps_tstream_peek(ts, 0, PS_TOKEN_RBRACE)) {
162 struct ps_node* next = ps_parse_node(ts);
163 if (!next) {
164 return NULL;
165 }
166 ps_push(&block, next);
167 ps_parser_skip(ts);
168 }
169
170 // }
171 match(PS_TOKEN_RBRACE, "at end of block") else {
173 return NULL;
174 }
175
176 return block;
177}
178
179struct ps_node* ps_parse_let(struct ps_tstream* ts) {
180 // let
181 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_LET) != NULL, "precondition");
182 adv();
183
184 // mut
185 bool is_mutable = false;
186 if (ps_tstream_peek(ts, 0, PS_TOKEN_MUT)) {
187 is_mutable = true;
188 adv();
189 }
190
191 // <name>
192 bindmatch(name, PS_TOKEN_ID, "for variable name") else {
193 return NULL;
194 }
195
196 // : <type>
197 struct ps_type* type = NULL;
198 if (ps_tstream_peek(ts, 0, PS_TOKEN_COLON)) {
199 adv();
200 type = ps_parse_type(ts);
201 if (!type) {
202 return NULL;
203 }
204 }
205
206 // =
207 match(PS_TOKEN_ASSIGN, "in let statement") else {
208 return NULL;
209 }
210
211 // <value>
212 struct ps_expr* value = ps_parse_expr(ts);
213 if (!value) {
214 return NULL;
215 }
216
217 // \n
218 PS_END_STM("at end of let statement");
219
220 return ps_node_let_new(name, is_mutable, type, value);
221}
222
223#define _PS_IS_FN_QUAL() \
224 (ps_tstream_peek(ts, 0, PS_TOKEN_FUNC_INLINE) \
225 || ps_tstream_peek(ts, 0, PS_TOKEN_FUNC_PRIVATE) \
226 || ps_tstream_peek(ts, 0, PS_TOKEN_FUNC_PUBLIC) \
227 || ps_tstream_peek(ts, 0, PS_TOKEN_FUNC_STATIC))
228
229struct ps_node* ps_parse_fn(struct ps_tstream* ts) {
230 /*
231 static private inline fn name(arg1: Type1, arg2: Type2) -> ReturnType {
232 <statement>*
233 }
234 */
235
236 // fn
237 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_FUNC) || _PS_IS_FN_QUAL(),
238 "precondition");
239
240 enum ps_fn_qualifiers qualifiers = 0;
241 while (_PS_IS_FN_QUAL()) {
242 bindmatch_any(qualifier, funcs_token_types,
243 "as function qualifier") else {
244 return NULL;
245 }
246 switch (qualifier->type) {
247 case PS_TOKEN_FUNC_INLINE:
248 qualifiers |= PS_FN_INLINE;
249 break;
250 case PS_TOKEN_FUNC_PRIVATE: // TODO: probably make these mutually
251 // exclusive
252 qualifiers |= PS_FN_PRIVATE;
253 break;
254 case PS_TOKEN_FUNC_PUBLIC:
255 qualifiers |= PS_FN_PUBLIC;
256 break;
257 case PS_TOKEN_FUNC_STATIC:
258 qualifiers |= PS_FN_STATIC;
259 break;
260 default:
261 PS_NO_IMPL();
262 }
263 }
264
265 match(PS_TOKEN_FUNC, "after function qualifiers") else {
266 return NULL;
267 }
268
269 // <name>
270 bindmatch(name, PS_TOKEN_ID, "for function name") else {
271 return NULL;
272 }
273
274 struct ps_type_field_arr* params = ps_type_field_arr_new();
275
276 if (ps_tstream_peek(ts, 0, PS_TOKEN_LIT_UNIT)) {
277 // ()
278 adv();
279 } else {
280 // (
281 bindmatch(open, PS_TOKEN_LPAR, "after function name") else {
282 return NULL;
283 }
284
285 // (<param>: <type>, <param2>: <type2>, ...)
286 while (ps_tstream_peek(ts, 0, PS_TOKEN_ID)) {
287 // <param>
288 struct ps_token* param_name = tcur();
289 adv();
290
291 // :
292 match(PS_TOKEN_COLON, "after argument name") else {
293 return NULL;
294 }
295
296 // <type>
297 struct ps_type* param_type = ps_parse_type(ts);
298 if (!param_type) {
299 return NULL;
300 }
301
302 /* clang-format off */
303 struct ps_type_field field = {
304 .name = param_name->start, .type = param_type};
305 /* clang-format on */
306 ps_push(&params, field);
307
308 // ,
309 // after <param>: <type> we either want ) to end it or , for
310 // another
311 if (ps_tstream_peek(ts, 0, PS_TOKEN_COMMA)) {
312 // if it is, skip past it
313 adv();
314 } else {
315 // otherwise we need it to be a )
316 match(PS_TOKEN_RPAR, "after arguments") else {
318 return NULL;
319 }
320 break; // stop looking for new params
321 }
322 }
323 }
324
325 // -> <ret-type>
326 struct ps_type* ret_type = NULL;
327 if (ps_tstream_peek(ts, 0, PS_TOKEN_ARROW)) {
328 adv();
329 ret_type = ps_parse_type(ts);
330 if (!ret_type) {
331 return NULL;
332 }
333 }
334
335 // { <body> }
336 struct ps_node_block* body = ps_parse_block(ts);
337 if (!body) {
338 return NULL;
339 }
340
341 return ps_node_fn_new(name, ret_type, params, qualifiers, body);
342}
343
344struct ps_node* ps_parse_import(struct ps_tstream* ts) {
345 // import
346 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_IMPORT) != NULL, "precondition");
347 adv();
348
349 bindmatch(first, PS_TOKEN_ID, "to start name") else {
350 return NULL;
351 }
352 struct ps_name* name = ps_parse_name(ts, first);
353 if (!name) {
354 return NULL;
355 }
356
357 PS_END_STM("at end of import statement");
358
359 return ps_node_import_new(name);
360}
361
362struct ps_node* ps_parse_extern(struct ps_tstream* ts) {
363 // extern
364 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_EXTERN) != NULL, "precondition");
365 adv();
366
367 // <name>
368 bindmatch(name, PS_TOKEN_ID, "for external function name") else {
369 return NULL;
370 }
371
372 // (<sig>)
373 struct ps_type_tuple* sig = ps_parse_type_tuple(ts);
374 if (!sig) {
375 return NULL;
376 }
377
378 PS_END_STM("at end of external declaration");
379
380 return ps_node_extern_new(name, sig);
381}
382
383struct ps_node* ps_parse_struct(struct ps_tstream* ts) {
384 // struct
385 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_STRUCT) != NULL, "precondition");
386 adv();
387 PS_NO_IMPL();
388 return NULL;
389}
390
391struct ps_node* ps_parse_enum(struct ps_tstream* ts) {
392 // enum
393 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_ENUM) != NULL, "precondition");
394 adv();
395 PS_NO_IMPL();
396 return NULL;
397}
398
399struct ps_node* ps_parse_for(struct ps_tstream* ts) {
400 // for
401 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_FOR) != NULL, "precondition");
402 adv();
403 PS_NO_IMPL();
404 return NULL;
405}
406
407struct ps_node* ps_parse_while(struct ps_tstream* ts) {
408 // while
409 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_WHILE) != NULL, "precondition");
410 adv();
411
412 // <guard>
413 struct ps_expr* guard = ps_parse_expr(ts);
414 if (!guard) {
415 return NULL;
416 }
417
418 // { ... }
419 struct ps_node_block* block = ps_parse_block(ts);
420 if (!block) {
421 return NULL;
422 }
423
424 return ps_node_while_new(guard, block);
425}
426
427struct ps_node* ps_parse_if(struct ps_tstream* ts) {
428 // if
429 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_IF) != NULL, "precondition");
430 adv();
431
432 // <cond>
433 struct ps_expr* cond = ps_parse_expr(ts);
434 if (!cond) {
435 return NULL;
436 }
437
438 // { ... }
439 struct ps_node_block* then_block = ps_parse_block(ts);
440 if (!then_block) {
441 return NULL;
442 }
443
444 struct ps_node_block* else_block = NULL;
445 if (ps_tstream_peek(ts, 0, PS_TOKEN_ELSE)) {
446 // TODO: else if
447 adv();
448 else_block = ps_parse_block(ts);
449 if (!else_block) {
450 return NULL;
451 }
452 }
453
454 return ps_node_if_new(cond, then_block, else_block);
455}
456
457struct ps_node* ps_parse_return(struct ps_tstream* ts) {
458 // return
459 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_RET) != NULL, "precondition");
460 adv();
461
462 struct ps_expr* value = NULL;
463 if (!(ps_tstream_peek(ts, 0, PS_TOKEN_NL)
464 || ps_tstream_peek(ts, 0, PS_TOKEN_RBRACE))) {
465 // <value>
466 value = ps_parse_expr(ts);
467 if (!value) {
468 return NULL;
469 }
470 // \n
471 PS_END_STM("at end of return statement");
472 } else {
473 // \n
474 PS_END_STM("at end of return statement");
475 }
476
478}
479
481 struct ps_expr* expr = ps_parse_expr(ts);
482 PS_END_STM("at end of expression statement");
483 return ps_node_expr_stm_new(expr);
484}
485
487 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_LIT_STR_INTERP) != NULL,
488 "precondition");
489
490 struct ps_interp_str* string = ps_interp_str_new();
491
492 // the format that the lexer produces is as follows:
493 // (<str-interp-lit> <expr>)+ <str-lit>
494
495 // continue to take interpolation literals followed by their expressions
496 // e.g. <str-interp-lit> <expr>
497 while (ps_tstream_peek(ts, 0, PS_TOKEN_LIT_STR_INTERP)) {
498 ps_push(&string, ps_expr_lit_new_str(tcur()->start, tcur()));
499 adv();
500 struct ps_expr* following_expr = ps_parse_expr(ts);
501 if (!following_expr) {
502 return NULL;
503 }
504 ps_push(&string, following_expr);
505 }
506
507 // all interpolated strings end with a string literal token
508 // e.g. <str-lit>
509 bindmatch(end_str_literal, PS_TOKEN_LIT_STR,
510 "at end of interpolated string") else {
511 return NULL;
512 }
513 ps_push(&string,
514 ps_expr_lit_new_str(end_str_literal->start, end_str_literal));
515
516 return ps_expr_lit_new_interp_str(string);
517}
518
520 bindmatch_any(token, literals_token_types, "in expression literal") else {
521 return NULL;
522 }
523
524 // TODO: check for integer out of bounds
525 switch (token->type) {
526 case PS_TOKEN_LIT_UNIT:
527 return ps_expr_lit_new_unit(token);
528 case PS_TOKEN_LIT_BIN:
529 return ps_expr_lit_new_u64(strtoull(token->start + 2, NULL, 2),
530 token);
531 case PS_TOKEN_LIT_OCT:
532 return ps_expr_lit_new_u64(strtoull(token->start + 2, NULL, 8),
533 token);
534 case PS_TOKEN_LIT_DEC:
535 return ps_expr_lit_new_i64(strtoll(token->start, NULL, 10), token);
536 case PS_TOKEN_LIT_HEX:
537 return ps_expr_lit_new_u64(strtoull(token->start + 2, NULL, 16),
538 token);
539 case PS_TOKEN_LIT_FLT: {
540 // parse float
541 double float_val;
542 try_get_float(token->start, float_val, {
543 // handle scientific notation
544 if (!eof() && tcur()->type == PS_TOKEN_E) {
545 adv();
546 bindmatch(exp_token, PS_TOKEN_LIT_DEC,
547 "for scientific notation exponent") else {
548 return NULL;
549 }
550 i64 exp_value = strtoll(exp_token->start, NULL, 10);
551 float_val *= pow(10.0, exp_value);
552 }
553
554 return ps_expr_lit_new_f64(float_val, token);
555 }) else {
556 ps_abort("lexer tokenized invalid floating point literal");
557 return NULL;
558 }
559 }
560 case PS_TOKEN_LIT_CHR:
561 return ps_expr_lit_new_i64(token->start[0], token);
562 case PS_TOKEN_LIT_STR:
563 return ps_expr_lit_new_str(token->start, token);
564 case PS_TOKEN_LIT_STR_INTERP: {
565 unwind(); // see precondition for ps_parse_interp_str
566 return ps_parse_interp_str(ts);
567 }
568 case PS_TOKEN_LIT_FALSE:
569 return ps_expr_lit_new_bool(false, token);
570 case PS_TOKEN_LIT_TRUE:
571 return ps_expr_lit_new_bool(true, token);
572 default:
573 ps_abort("should not reach here");
574 return NULL;
575 }
576}
577
579 struct ps_token* callee) {
580 // (
581 ps_assert(ps_tstream_peek(ts, 0, PS_TOKEN_LPAR) != NULL, "precondition");
582 adv();
583
584 struct ps_expr_arr* args = ps_expr_arr_new(); // leak
585 while (!ps_tstream_peek(ts, 0, PS_TOKEN_RPAR)) {
586 // <arg>
587 struct ps_expr* next = ps_parse_expr(ts);
588 if (!next) {
589 return NULL;
590 }
591 ps_push(&args, next);
592
593 // ,
594 if (!ps_tstream_peek(ts, 0, PS_TOKEN_RPAR)) {
595 match(PS_TOKEN_COMMA, "between arguments") else {
596 return NULL;
597 }
598 }
599 }
600
601 // )
602 match(PS_TOKEN_RPAR, "at end of call") else {
603 return NULL;
604 }
605
606 return ps_expr_call_new(callee, args);
607}
608
610 // handle end of file
611 if (eof()) {
612 ps_tstream_eof_error(ts, PS_ECODE_EXPECTED_EXPR,
613 "where expression was expected", NULL);
614 return NULL;
615 }
616
617 struct ps_token* cur = tcur();
618
619 // unary operators
620 if (ps_token_type_is_op(cur->type)) {
621 const struct ps_operator* op = ps_token_type_get_op(cur->type);
622 if (!op->is_unary) {
623 ps_error(PS_SCOPE_ERROR, PS_ECODE_INVALID_OP, ts->file_ctx->buffer,
624 cur->loc, cur->length,
625 ast_sprintf("'%s' cannot be used here", cur->start),
626 "Binary operator used as unary", NULL);
627 return NULL;
628 }
629
630 // delegate to the helper but with `NULL` lhs
631 return ps_parse_expr_helper(ts, NULL, op->unary_prec);
632 }
633
634 // parentheses: (<expr>)
635 if (cur->type == PS_TOKEN_LPAR) {
636 // (
637 adv();
638
639 // <expr>
640 struct ps_expr* expr = ps_parse_expr(ts);
641 if (!expr) {
642 return NULL;
643 }
644
645 // )
646 bindmatch(rpar, PS_TOKEN_RPAR, "in expression") else {
648 return NULL;
649 }
650
651 return expr;
652 }
653
654 // variables
655 if (cur->type == PS_TOKEN_ID) {
656 adv();
657 if (ps_tstream_peek(ts, 0, PS_TOKEN_LPAR)) {
658 // calls
659 return ps_parse_expr_call(ts, cur);
660 } else if (ps_tstream_peek(ts, 0, PS_TOKEN_LIT_UNIT)) {
661 // calls
662 adv();
663 return ps_expr_call_new(cur, ps_expr_arr_new()); // leak
664 } else {
665 return ps_expr_id_new(cur);
666 }
667 }
668
669 // otherwise must be a literal
670 return ps_parse_expr_lit(ts);
671}
672
673// https://en.wikipedia.org/wiki/Operator-precedence_parser#Pseudocode
674struct ps_expr* ps_parse_expr_helper(struct ps_tstream* ts, struct ps_expr* lhs,
675 ps_operator_precedence_t min_prec) {
676 struct ps_token* lookahead = tcur();
677 const struct ps_operator* op;
678
679 while (lookahead && ps_token_type_is_op(lookahead->type)) {
680 struct ps_token* old_lookahead = lookahead;
681 op = ps_token_type_get_op(lookahead->type);
682 if (!op->is_binary || !(op->binary_prec >= min_prec)) {
683 break;
684 }
685 adv(); // past operator
686
687 struct ps_expr* rhs = ps_parse_expr_primary(ts);
688
689 lookahead = tcur();
690 const struct ps_operator* op2;
691
692 while (lookahead && ps_token_type_is_op(lookahead->type)) {
693 op2 = ps_token_type_get_op(lookahead->type);
694 if (!op2->is_binary) {
695 break;
696 }
697 if (!(op2->is_left_associative
698 && op2->binary_prec > op->binary_prec)
699 && !(!op2->is_left_associative
700 && op2->binary_prec >= op->binary_prec)) {
701 break;
702 }
703 rhs = ps_parse_expr_helper(ts, rhs,
704 op->binary_prec + (op2->binary_prec > op->binary_prec));
705 lookahead = tcur();
706 }
707 if (lhs) {
708 lhs = ps_expr_binary_new(lhs, old_lookahead, rhs);
709 } else {
710 lhs = ps_expr_unary_new(old_lookahead, rhs);
711 }
712 }
713
714 return lhs;
715}
716
717struct ps_expr* ps_parse_expr(struct ps_tstream* ts) {
719}
720
721struct ps_node* ps_parse_node(struct ps_tstream* ts) {
722 // TODO: impl
723 switch (tcur()->type) {
724 case PS_TOKEN_LET:
725 return ps_parse_let(ts);
726 case PS_TOKEN_FUNC_STATIC:
727 case PS_TOKEN_FUNC_INLINE:
728 case PS_TOKEN_FUNC_PRIVATE:
729 case PS_TOKEN_FUNC_PUBLIC:
730 case PS_TOKEN_FUNC:
731 return ps_parse_fn(ts);
732 case PS_TOKEN_IMPORT:
733 return ps_parse_import(ts);
734 case PS_TOKEN_EXTERN:
735 return ps_parse_extern(ts);
736 case PS_TOKEN_LBRACE: {
737 struct ps_node_block* block = ps_parse_block(ts);
738 return block ? ps_node_block_stm_new(block) : NULL;
739 }
740 case PS_TOKEN_STRUCT:
741 return ps_parse_struct(ts);
742 case PS_TOKEN_TRAIT:
743 PS_NO_IMPL();
744 return NULL;
745 // return ps_parse_trait(ts);
746 case PS_TOKEN_ENUM:
747 return ps_parse_enum(ts);
748 case PS_TOKEN_FOR:
749 return ps_parse_for(ts);
750 case PS_TOKEN_WHILE:
751 return ps_parse_while(ts);
752 case PS_TOKEN_IF:
753 return ps_parse_if(ts);
754 case PS_TOKEN_RET:
755 return ps_parse_return(ts);
756 default:
757 return ps_parse_expr_stm(ts);
758 }
759}
760
761struct ps_node_arr* ps_parse(struct ps_token_arr* tokens,
762 const struct ps_file_ctx* file_ctx) {
763 struct ps_tstream ts;
765 ps_parser_skip(&ts);
766
767 struct ps_node_arr* nodes = ps_node_arr_new();
768
769 // TODO: very similar to lexer lex code, see if can remove code duplication
770 // Process tokens until 5 errors, then exit if any (up to 5) errors.
771 int status = 0;
772 while (!ps_tstream_is_eof(&ts)) {
773 struct ps_node* next = ps_parse_node(&ts);
774 ps_parser_skip(&ts);
775 if (next) {
776 ps_push(&nodes, next);
777 } else {
778 status = 1;
780 ps_parser_skip(&ts);
781 } else {
782 ps_error(PS_SCOPE_INFO, PS_ECODE_STOP, ts.file_ctx->buffer,
783 PS_LOC_NONE(), 1, "Stopping after max errors received.",
784 NULL, NULL);
785 break;
786 }
787 }
788 }
789
790 if (status != 0) {
791 ps_node_arr_free(nodes);
792 return NULL;
793 }
794 return nodes;
795}
Defines assertion and abortion functionality.
#define ps_abort(msg)
Aborts the program with the given message.
Definition abort.h:47
#define ps_assert(cond, msg)
Asserts the given condition cond, aborting via ps_abort with the given msg otherwise.
Definition abort.h:53
#define PS_NO_IMPL()
Aborts the program due to a function taking a path not yet implemented.
Definition abort.h:70
Defines an arena allocator for the compiler.
#define ast_sprintf(fmt,...)
Definition arena.h:77
struct ps_expr * ps_expr_lit_new_f64(f64 value, struct ps_token *token)
Creates a new literal expression for a floating point number (of at most 64 bits).
Definition ast.c:361
struct ps_expr * ps_expr_unary_new(struct ps_token *op, struct ps_expr *rhs)
Creates a new unary expression.
Definition ast.c:439
struct ps_expr * ps_expr_lit_new_unit(struct ps_token *token)
Creates a new literal expression for a unit literal.
Definition ast.c:413
struct ps_expr * ps_expr_lit_new_u64(u64 value, struct ps_token *token)
Creates a new literal expression for an unsigned integer (of at most 64 bits).
Definition ast.c:335
struct ps_node * ps_node_return_new(struct ps_expr *value)
Creates a new return statement that returns the expression value.
Definition ast.c:151
struct ps_expr * ps_expr_lit_new_bool(bool value, struct ps_token *token)
Creates a new literal expression for a boolean.
Definition ast.c:374
struct ps_node * ps_node_extern_new(struct ps_token *name, struct ps_type_tuple *sig)
Definition ast.c:57
struct ps_node * ps_node_let_new(struct ps_token *name, bool is_mutable, struct ps_type *type, struct ps_expr *value)
Creates a new let statement initializing the variable name to the expression value.
Definition ast.c:8
struct ps_node * ps_node_expr_stm_new(struct ps_expr *expr)
Creates a new expression statement for expr.
Definition ast.c:69
struct ps_expr * ps_expr_lit_new_i64(i64 value, struct ps_token *token)
Creates a new literal expression for a signed integer (of at most 64 bits).
Definition ast.c:348
struct ps_expr * ps_expr_call_new(struct ps_token *callee, struct ps_expr_arr *args)
Creates a new call expression.
Definition ast.c:451
struct ps_expr * ps_expr_lit_new_str(STR value, struct ps_token *token)
Creates a new literal expression for a string.
Definition ast.c:387
struct ps_node * ps_node_fn_new(struct ps_token *name, struct ps_type *ret_type, struct ps_type_field_arr *params, enum ps_fn_qualifiers qualifiers, struct ps_node_block *body)
Creates a new function with the given name name, parameters params, and function body body.
Definition ast.c:22
struct ps_node * ps_node_block_stm_new(struct ps_node_block *block)
Creates a new block statement.
Definition ast.c:79
struct ps_node * ps_node_if_new(struct ps_expr *cond, struct ps_node_block *body, struct ps_node_block *else_body)
Creates a new if statement on condition cond.
Definition ast.c:138
struct ps_expr * ps_expr_binary_new(struct ps_expr *lhs, struct ps_token *op, struct ps_expr *rhs)
Creates a new binary expression.
Definition ast.c:425
struct ps_node * ps_node_while_new(struct ps_expr *cond, struct ps_node_block *body)
Creates a new while loop.
Definition ast.c:126
struct ps_expr * ps_expr_lit_new_interp_str(struct ps_interp_str *value)
Creates a new literal expression for an interpolated string.
Definition ast.c:400
#define ps_name_new()
Definition ast.h:31
#define ps_expr_arr_new()
Definition ast.h:258
struct ps_expr * ps_expr_id_new(struct ps_token *name)
Creates a new id expression.
struct ps_node * ps_node_import_new(struct ps_name *name)
Creates a new import statement.
#define ps_node_arr_free(arr)
Definition ast.h:234
#define ps_interp_str_new()
Definition ast.h:248
ps_fn_qualifiers
Function qualifier flags.
Definition ast.h:52
@ PS_FN_INLINE
Inlined everywhere.
Definition ast.h:56
@ PS_FN_PUBLIC
Visible outside the module.
Definition ast.h:53
@ PS_FN_PRIVATE
Hidden outside the module.
Definition ast.h:54
@ PS_FN_STATIC
TODO:
Definition ast.h:55
#define ps_node_arr_new()
Definition ast.h:233
#define ps_node_block_new()
Definition ast.h:240
#define ps_push(__arrptr, __new_elem)
Definition dynarr.h:46
usize ps_error_count(void)
The number of errors that have been reported in the error reporting system.
Definition error.c:240
Error reporting and displaying utilities.
struct ps_loc PS_LOC_NONE(void)
Represents the absence token location.
bool ps_token_type_is_op(enum ps_token_type type)
Whether type represents an operator token.
Definition operator.c:209
const struct ps_operator * ps_token_type_get_op(enum ps_token_type type)
Returns: the operator information associated with the given operator token.
Definition operator.c:213
Operator precedence and associativity info.
i32 ps_operator_precedence_t
Definition operator.h:13
struct ps_expr * ps_parse_expr_lit(struct ps_tstream *ts)
Parses an expression literal.
Definition parser.c:519
struct ps_node * ps_parse_extern(struct ps_tstream *ts)
Parses an eexternal declaration.
Definition parser.c:362
#define _PS_IS_FN_QUAL()
Definition parser.c:223
struct ps_node * ps_parse_if(struct ps_tstream *ts)
Parses an if-else statement.
Definition parser.c:427
struct ps_type_tuple * ps_parse_type_tuple(struct ps_tstream *ts)
Parses a tuple type.
Definition parser.c:65
#define PS_GROUP_MISMATCH_REFER(tkn)
Definition parser.c:20
#define PS_END_STM(ctx)
Definition parser.c:25
struct ps_node * ps_parse_enum(struct ps_tstream *ts)
Parses an enumeration declaration.
Definition parser.c:391
struct ps_node * ps_parse_for(struct ps_tstream *ts)
Parses a for loop.
Definition parser.c:399
struct ps_expr * ps_parse_expr_call(struct ps_tstream *ts, struct ps_token *callee)
Parses a call expression, which involves a token (TODO: an expression) followed by a set of parenthes...
Definition parser.c:578
struct ps_expr * ps_parse_expr_primary(struct ps_tstream *ts)
A primary expression is a literal or call expression.
Definition parser.c:609
struct ps_expr * ps_parse_expr(struct ps_tstream *ts)
Parses a general expression.
Definition parser.c:717
struct ps_type * ps_parse_type(struct ps_tstream *ts)
Parses a type such as Rectangle or (Point, Point) from the token stream.
Definition parser.c:122
struct ps_expr * ps_parse_interp_str(struct ps_tstream *ts)
Definition parser.c:486
struct ps_node * ps_parse_while(struct ps_tstream *ts)
Parses a while loop.
Definition parser.c:407
struct ps_node * ps_parse_return(struct ps_tstream *ts)
Parses a return statement.
Definition parser.c:457
struct ps_node_block * ps_parse_block(struct ps_tstream *ts)
Parses a block like:
Definition parser.c:149
struct ps_node * ps_parse_let(struct ps_tstream *ts)
Definition parser.c:179
struct ps_node * ps_parse_import(struct ps_tstream *ts)
Parses an import statement.
Definition parser.c:344
struct ps_type * ps_parse_type_primary(struct ps_tstream *ts)
Parses a primary type such as a tuple or unresolved (struct, enum).
Definition parser.c:98
struct ps_node * ps_parse_fn(struct ps_tstream *ts)
Parses a function definition.
Definition parser.c:229
struct ps_node * ps_parse_node(struct ps_tstream *ts)
Parses and returns the next node in ts.
Definition parser.c:721
struct ps_node * ps_parse_struct(struct ps_tstream *ts)
Parses a structure declaration.
Definition parser.c:383
struct ps_name * ps_parse_name(struct ps_tstream *ts, struct ps_token *first)
Parses a name such as foo::bar::baz.
Definition parser.c:133
#define try_get_float(from, bind_to, do_)
Definition parser.c:15
struct ps_node_arr * ps_parse(struct ps_token_arr *tokens, const struct ps_file_ctx *file_ctx)
Constructs a series of nodes from the given tokens.
Definition parser.c:761
struct ps_expr * ps_parse_expr_helper(struct ps_tstream *ts, struct ps_expr *lhs, ps_operator_precedence_t min_prec)
Handles predence and associativity in parsing expressions.
Definition parser.c:674
struct ps_node * ps_parse_expr_stm(struct ps_tstream *ts)
An expression statement consists of an expression followed by a newline.
Definition parser.c:480
Parsing routines for nodes and expressions.
#define PS_PARSER_MAX_ERRORS
The parser will stop in its tracks after it encounters this many errors.
Definition parser.h:21
Represents an error or source-referencing display message.
Definition error.h:43
Expression nodes are represented with tagged unions.
Definition ast.h:187
union ps_expr::@1 value
struct ps_type * type
Definition ast.h:200
Information captured in a file necessary for effective info/error reporting.
Definition io.h:15
char * buffer
Definition io.h:17
AST nodes are represented with tagged unions.
Definition ast.h:206
struct ps_node_block * block
Definition ast.h:218
enum ps_node::ps_node_type type
Operator information.
Definition operator.h:18
bool is_binary
Enables processing this operator as a binary operator.
Definition operator.h:25
ps_operator_precedence_t unary_prec
The operator's unary precedence.
Definition operator.h:42
bool is_left_associative
This defines operator assocativity as follows:
Definition operator.h:39
ps_operator_precedence_t binary_prec
The operator's binary precedence.
Definition operator.h:45
bool is_unary
Enables processing this operator as an unary operator.
Definition operator.h:32
Represents a token.
Definition token.h:34
usize length
Definition token.h:38
enum ps_token_type type
Definition token.h:36
struct ps_loc loc
Definition token.h:35
STR start
Definition token.h:37
Processes tokens in a stream.
Definition tstream.h:16
const struct ps_token_arr * tokens
The tokens in the stream.
Definition tstream.h:21
const struct ps_file_ctx * file_ctx
The file context for the stream.
Definition tstream.h:24
A pair (string, type).
Definition type.h:15
STR name
Definition type.h:16
Definition type.h:66
struct ps_type_tuple * tuple
Definition type.h:69
enum ps_type_type type
Definition type.h:67
ps_token_type
The type of a token.
Definition token.h:18
void ps_tstream_eof_error(struct ps_tstream *ts, int code, STR ctx, STR fix)
Reports an end-of-file error with the given context ctx, using the last token in ts for location.
Definition tstream.c:99
void ps_tstream_init(struct ps_tstream *ts, const struct ps_token_arr *tokens, const struct ps_file_ctx *file_ctx)
Points tstream to the first token in tokens and reads from it there on.
Definition tstream.c:8
bool ps_tstream_is_eof(struct ps_tstream *ts)
Whether tstream has no more tokens.
Definition tstream.c:15
struct ps_token * ps_tstream_peek(struct ps_tstream *ts, usize n, enum ps_token_type type)
If the nth next token in ts is of the given type, returns it.
Definition tstream.c:27
#define match(type, ctx)
Tries to match the next token with the given token type, opening a block for if no match succeeded.
#define tcur()
#define bindmatch_any(val, types, ctx)
#define bindmatch(val, type, ctx)
Tries to match the next token with the given token type and assign the token to the given variable,...
#define eof()
#define unwind()
#define adv()
struct ps_type * ps_type_from_name(struct ps_token *name)
Returns the type associated with the given name (e.g.
Definition type.c:76
struct ps_type * ps_type_from_tuple(struct ps_type_tuple *tuple)
Constructs and returns a type for the given tuple type tuple.
Definition type.c:86
struct ps_type * ps_type_prim_new(enum ps_type_type prim_type)
Constructs a new primitive type specified by prim_type.
Definition type.c:23
#define ps_type_field_arr_new()
Definition type.h:30
#define ps_type_tuple_new()
Definition type.h:45