VapourSynth-llvmexpr
Loading...
Searching...
No Matches
Parser.cpp
Go to the documentation of this file.
1
19
20#include "Parser.hpp"
21#include "Builtins.hpp"
22
23#include <algorithm>
24#include <cctype>
25#include <format>
26#include <utility>
27
28namespace infix2postfix {
29
30Parser::Parser(const std::vector<Token>& tokens) : tokens(tokens) {}
31
33 auto program = std::make_unique<Program>();
34 while (!isAtEnd()) {
35 // Skip any leading empty statements
36 while (match({TokenType::Newline, TokenType::Semicolon})) {
37 }
38 if (isAtEnd()) {
39 break;
40 }
41
42 int pos_before = current;
43 auto stmt = parseDeclaration();
44 if (stmt) {
45 program->statements.push_back(std::move(stmt));
46 }
47 panic_mode = false;
48
49 if (current == pos_before && !isAtEnd() && !errors.empty()) {
50 advance();
51 }
52 }
53
54 return ParseResult{.ast = std::move(program), .errors = std::move(errors)};
55}
56
57std::unique_ptr<Stmt> Parser::parseDeclaration() {
58 std::unique_ptr<Stmt> stmt;
59 if (peek().type == TokenType::Global) {
60 auto global_decl = parseGlobalDecl();
61
62 while (match({TokenType::Newline})) {
63 }
64
65 // Global declaration must be followed by a function definition
66 if (peek().type != TokenType::Function) {
67 error(peek(), "Global declaration must be followed by a function "
68 "definition.");
69 }
70 auto func_def = parseFunctionDef();
71 // Attach the global declaration to the function
72 func_def->global_decl = std::move(global_decl);
73 stmt = make_node<FunctionDef>(std::move(*func_def));
74 } else if (peek().type == TokenType::Function) {
75 auto func_def = parseFunctionDef();
76 stmt = std::make_unique<Stmt>(FunctionDef(std::move(*func_def)));
77 } else {
78 stmt = parseStatement();
79 }
80
81 // Block statements are not followed by a terminator here.
82 if ((get_if<FunctionDef>(stmt.get()) != nullptr) ||
83 (get_if<IfStmt>(stmt.get()) != nullptr) ||
84 (get_if<WhileStmt>(stmt.get()) != nullptr) ||
85 (get_if<BlockStmt>(stmt.get()) != nullptr)) {
86 return stmt;
87 }
88
89 // Last statement in a block or file.
90 if (isAtEnd() || peek().type == TokenType::RBrace) {
91 return stmt;
92 }
93
94 // A simple statement must be followed by a terminator.
95 if (peek().type == TokenType::Newline ||
96 peek().type == TokenType::Semicolon) {
97 // The terminator is consumed by the main loop.
98 return stmt;
99 }
100
101 error(peek(), "Expected newline or semicolon after statement.");
102 synchronize();
103 return stmt;
104}
105
106std::unique_ptr<Stmt> Parser::parseStatement() {
107 if (peek().type == TokenType::If) {
108 return parseIfStatement();
109 }
110 if (peek().type == TokenType::While) {
111 return parseWhileStatement();
112 }
113 if (peek().type == TokenType::LBrace) {
114 error(peek(), "Standalone blocks are not allowed. Braces can only be "
115 "used for function, if, else, or while bodies.");
116 synchronize();
117 return nullptr;
118 }
119 if (peek().type == TokenType::Goto) {
120 return parseGotoStatement();
121 }
122 if (peek().type == TokenType::Return) {
123 return parseReturnStatement();
124 }
125 if (peek().type == TokenType::Identifier &&
126 peek(1).type == TokenType::Colon) {
127 return parseLabelStatement();
128 }
129 return parseExprStatement();
130}
131
132std::unique_ptr<Stmt> Parser::parseIfStatement() {
133 consume(TokenType::If, "Expect 'if'.");
134 consume(TokenType::LParen, "Expect '(' after 'if'.");
135 auto condition = parseTernary();
136 consume(TokenType::RParen, "Expect ')' after if condition.");
137
138 if (peek().type != TokenType::LBrace) {
139 error(peek(), "The body of an if statement must be a block statement "
140 "enclosed in {}.");
141 }
142 auto then_branch = std::make_unique<Stmt>(std::move(*parseBlock()));
143
144 std::unique_ptr<Stmt> else_branch = nullptr;
145 if (match({TokenType::Else})) {
146 if (peek().type == TokenType::If) {
147 else_branch = parseIfStatement();
148 } else {
149 if (peek().type != TokenType::LBrace) {
150 error(peek(), "The body of an else statement must be a block "
151 "statement enclosed in {}.");
152 }
153 else_branch = std::make_unique<Stmt>(std::move(*parseBlock()));
154 }
155 }
156 return make_node<IfStmt>(std::move(condition), std::move(then_branch),
157 std::move(else_branch));
158}
159
160std::unique_ptr<Stmt> Parser::parseWhileStatement() {
161 consume(TokenType::While, "Expect 'while'.");
162 consume(TokenType::LParen, "Expect '(' after 'while'.");
163 auto condition = parseTernary();
164 consume(TokenType::RParen, "Expect ')' after while condition.");
165
166 if (peek().type != TokenType::LBrace) {
167 error(peek(), "The body of a while statement must be a block "
168 "statement enclosed in {}.");
169 }
170 auto body = std::make_unique<Stmt>(std::move(*parseBlock()));
171
172 return make_node<WhileStmt>(std::move(condition), std::move(body));
173}
174
175std::unique_ptr<Stmt> Parser::parseGotoStatement() {
176 Token keyword = consume(TokenType::Goto, "Expect 'goto'.");
177 Token label =
178 consume(TokenType::Identifier, "Expect label name after 'goto'.");
179 if (label.value.starts_with("__internal_")) {
180 error(label, "goto target cannot start with '__internal_'.");
181 }
182 return make_node<GotoStmt>(keyword, label, nullptr);
183}
184
185std::unique_ptr<Stmt> Parser::parseLabelStatement() {
186 Token name = consume(TokenType::Identifier, "Expect label name.");
187 if (name.value.starts_with("__internal_")) {
188 error(name, "Label name cannot start with '__internal_'.");
189 }
190 consume(TokenType::Colon, "Expect ':' after label name.");
191 return make_node<LabelStmt>(name);
192}
193
194std::unique_ptr<Stmt> Parser::parseReturnStatement() {
195 Token keyword = consume(TokenType::Return, "Expect 'return'.");
196 std::unique_ptr<Expr> value = nullptr;
197 // Check if there is a value to return.
198 if (peek().type != TokenType::Newline &&
199 peek().type != TokenType::Semicolon &&
200 peek().type != TokenType::EndOfFile &&
201 peek().type != TokenType::RBrace) {
202 value = parseTernary();
203 }
204 return make_node<ReturnStmt>(keyword, std::move(value));
205}
206
207std::unique_ptr<BlockStmt> Parser::parseBlock() {
208 consume(TokenType::LBrace, "Expect '{' to start a block.");
209 std::vector<std::unique_ptr<Stmt>> statements;
210 while (peek().type != TokenType::RBrace && !isAtEnd()) {
211 // Skip any empty statements
212 while (match({TokenType::Newline, TokenType::Semicolon})) {
213 }
214 if (peek().type == TokenType::RBrace || isAtEnd()) {
215 break;
216 }
217
218 int pos_before = current;
219 auto stmt = parseDeclaration();
220 if (stmt) {
221 statements.push_back(std::move(stmt));
222 }
223 panic_mode = false;
224
225 if (peek().type == TokenType::RBrace || isAtEnd()) {
226 break;
227 }
228
229 if (current == pos_before && !isAtEnd() &&
230 peek().type != TokenType::RBrace && !errors.empty()) {
231 advance();
232 }
233 }
234 consume(TokenType::RBrace, "Expect '}' to end a block.");
235 return std::make_unique<BlockStmt>(BlockStmt(std::move(statements)));
236}
237
238std::unique_ptr<Stmt> Parser::parseExprStatement() {
239 if (peek().type == TokenType::Identifier &&
240 peek(1).type == TokenType::Assign) {
241 Token name = advance(); // identifier
242 if (name.value.starts_with("__internal_")) {
243 error(name, "Variable name cannot start with '__internal_'.");
244 }
245 advance(); // '='
246 auto value = parseTernary();
247 return make_node<AssignStmt>(name, std::move(value));
248 }
249
250 if (peek().type == TokenType::Identifier &&
251 peek(1).type == TokenType::LBracket) {
252 auto left_expr = parsePostfix();
253
254 if (match({TokenType::Assign})) {
255 auto right_expr = parseTernary();
256
257 if (get_if<ArrayAccessExpr>(left_expr.get()) != nullptr) {
258 return make_node<ArrayAssignStmt>(std::move(left_expr),
259 std::move(right_expr));
260 }
261 error(peek(), "Invalid assignment target.");
262 }
263
264 return make_node<ExprStmt>(std::move(left_expr));
265 }
266
267 auto expr = parseTernary();
268 return make_node<ExprStmt>(std::move(expr));
269}
270
271std::unique_ptr<FunctionDef> Parser::parseFunctionDef() {
272 consume(TokenType::Function, "Expect 'function'.");
273 Token name = consume(TokenType::Identifier, "Expect function name.");
274 if (name.value.starts_with("__internal_")) {
275 error(name, "Function name cannot start with '__internal_'.");
276 }
277
278 const auto& builtins = get_builtin_functions();
279 // TODO: resize is not a built-in function in Expr mode.
280 if ((builtins.contains(name.value)) || name.value.starts_with("nth_") ||
281 name.value == "new" || name.value == "resize") {
282 error(name,
283 std::format(
284 "Function name '{}' conflicts with a built-in function.",
285 name.value));
286 }
287
288 defined_functions.insert(name.value);
289
290 consume(TokenType::LParen, "Expect '(' after function name.");
291 std::vector<Parameter> params;
292 if (peek().type != TokenType::RParen) {
293 do {
294 Token type_token;
295 Token name_token;
296 Type param_type = Type::Value;
297
298 if (peek().type == TokenType::Identifier &&
299 peek(1).type == TokenType::Identifier) {
300 type_token = advance(); // Consume potential type
301 if (type_token.value == "Value") {
302 param_type = Type::Value;
303 } else if (type_token.value == "Clip") {
304 param_type = Type::Clip;
305 } else if (type_token.value == "Literal") {
306 param_type = Type::Literal;
307 } else if (type_token.value == "Array") {
308 param_type = Type::Array;
309 } else {
310 error(type_token,
311 std::format("Unknown type '{}' for parameter.",
312 type_token.value));
313 }
314 name_token =
315 consume(TokenType::Identifier, "Expect parameter name.");
316 } else if (peek().type == TokenType::Identifier) {
317 // Untyped parameter, default to Value
318 name_token =
319 consume(TokenType::Identifier, "Expect parameter name.");
320 type_token = {.type = TokenType::Identifier,
321 .value = "Value",
322 .range = name_token.range};
323 param_type = Type::Value;
324 } else {
325 error(peek(), "Expect a parameter declaration.");
326 break; // Avoid infinite loop on error
327 }
328 if (name_token.type == TokenType::Identifier &&
329 name_token.value.starts_with("__internal_")) {
330 error(name_token,
331 "Parameter name cannot start with '__internal_'.");
332 }
333 params.push_back({type_token, name_token, param_type});
334 } while (match({TokenType::Comma}));
335 }
336 consume(TokenType::RParen, "Expect ')' after parameters.");
337 auto body = parseBlock();
338 return std::make_unique<FunctionDef>(
339 FunctionDef(name, std::move(params), std::move(body), nullptr));
340}
341
342std::unique_ptr<GlobalDecl> Parser::parseGlobalDecl() {
343 Token keyword = consume(TokenType::Global, "Expect '<global...>'.");
344 std::string content = keyword.value.substr(1, keyword.value.length() - 2);
345 if (content == "global.all") {
346 return std::make_unique<GlobalDecl>(
347 GlobalDecl(keyword, GlobalMode::All));
348 }
349 if (content == "global.none") {
350 return std::make_unique<GlobalDecl>(
351 GlobalDecl(keyword, GlobalMode::None));
352 }
353
354 // <global<var1><var2>...>
355 std::vector<Token> globals;
356
357 size_t pos = 0;
358 if (content.starts_with("global")) {
359 pos = std::string("global").length();
360 } else {
361 error(keyword, "Invalid global declaration format.");
362 }
363
364 // Parse each <varname>
365 while (pos < content.length()) {
366 if (content[pos] != '<') {
367 error(keyword, "Expected '<' in global variable list.");
368 }
369 pos++; // '<'
370
371 size_t start = pos;
372 while (pos < content.length() && content[pos] != '>') {
373 pos++;
374 }
375
376 if (pos >= content.length()) {
377 error(keyword, "Unclosed '<' in global variable list.");
378 }
379
380 std::string var_name = content.substr(start, pos - start);
381
382 if (var_name.empty()) {
383 error(keyword, "Empty variable name in global declaration.");
384 }
385
386 if (var_name.starts_with("__internal_")) {
387 error(keyword, std::format("Invalid identifier '{}': cannot "
388 "start with '__internal_'.",
389 var_name));
390 }
391
392 if ((std::isalpha(var_name[0]) == 0) && var_name[0] != '_') {
393 error(keyword, std::format("Invalid identifier '{}': must "
394 "start with letter or underscore.",
395 var_name));
396 }
397
398 for (size_t i = 1; i < var_name.length(); i++) {
399 if ((std::isalnum(var_name[i]) == 0) && var_name[i] != '_') {
400 error(keyword, std::format("Invalid identifier '{}': "
401 "contains invalid character.",
402 var_name));
403 }
404 }
405
406 globals.push_back({TokenType::Identifier, var_name, keyword.range});
407 pos++; // '>'
408 }
409
410 if (globals.empty()) {
411 error(keyword,
412 "Global declaration must specify at least one variable.");
413 }
414
415 return std::make_unique<GlobalDecl>(
416 GlobalDecl(keyword, GlobalMode::Specific, globals));
417}
418
419std::unique_ptr<Expr> Parser::parseTernary() {
420 auto expr = parseLogicalOr();
421 if (match({TokenType::Question})) {
422 auto then_branch = parseTernary();
423 consume(TokenType::Colon, "Expect ':' for ternary operator.");
424 auto else_branch = parseTernary();
425 expr = make_node<TernaryExpr>(std::move(expr), std::move(then_branch),
426 std::move(else_branch));
427 }
428 return expr;
429}
430
431template <typename NextLevel, typename... TokenTypes>
432std::unique_ptr<Expr> Parser::parseBinary(NextLevel next_level,
433 TokenTypes... token_types) {
434 auto expr = (this->*next_level)();
435 while (match({token_types...})) {
436 Token op = previous();
437 auto right = (this->*next_level)();
438 expr = make_node<BinaryExpr>(std::move(expr), op, std::move(right));
439 }
440 return expr;
441}
442
443std::unique_ptr<Expr> Parser::parseLogicalOr() {
444 return parseBinary(&Parser::parseLogicalAnd, TokenType::LogicalOr);
445}
446
447std::unique_ptr<Expr> Parser::parseLogicalAnd() {
448 return parseBinary(&Parser::parseBitwiseOr, TokenType::LogicalAnd);
449}
450
451std::unique_ptr<Expr> Parser::parseBitwiseOr() {
452 return parseBinary(&Parser::parseBitwiseXor, TokenType::BitOr);
453}
454
455std::unique_ptr<Expr> Parser::parseBitwiseXor() {
456 return parseBinary(&Parser::parseBitwiseAnd, TokenType::BitXor);
457}
458
459std::unique_ptr<Expr> Parser::parseBitwiseAnd() {
460 return parseBinary(&Parser::parseEquality, TokenType::BitAnd);
461}
462
463std::unique_ptr<Expr> Parser::parseEquality() {
464 return parseBinary(&Parser::parseComparison, TokenType::Eq, TokenType::Ne);
465}
466
467std::unique_ptr<Expr> Parser::parseComparison() {
468 return parseBinary(&Parser::parseTerm, TokenType::Gt, TokenType::Ge,
470}
471
472std::unique_ptr<Expr> Parser::parseTerm() {
473 return parseBinary(&Parser::parseFactor, TokenType::Plus, TokenType::Minus);
474}
475
476std::unique_ptr<Expr> Parser::parseFactor() {
477 return parseBinary(&Parser::parseExponent, TokenType::Star,
478 TokenType::Slash, TokenType::Percent);
479}
480
481std::unique_ptr<Expr> Parser::parseExponent() {
482 // '**' is right-associative: a ** b ** c = a ** (b ** c)
483 auto expr = parseUnary();
484 if (match({TokenType::StarStar})) {
485 Token op = previous();
486 auto right = parseExponent();
487 expr = make_node<BinaryExpr>(std::move(expr), op, std::move(right));
488 }
489 return expr;
490}
491
492std::unique_ptr<Expr> Parser::parseUnary() {
493 if (match({TokenType::Not, TokenType::Minus, TokenType::BitNot})) {
494 Token op = previous();
495 if (op.type == TokenType::Minus && peek().type == TokenType::Number) {
496 Token number = advance();
497 number.value = std::format("-{}", number.value);
498 number.range.start = op.range.start;
499 return make_node<NumberExpr>(number);
500 }
501 auto right = parseUnary();
502 return make_node<UnaryExpr>(op, std::move(right));
503 }
504 return parsePostfix();
505}
506
507std::unique_ptr<Expr> Parser::parsePostfix() {
508 auto expr = parsePrimary();
509 while (true) {
510 if (match({TokenType::LParen})) {
511 expr = finishCall(std::move(expr));
512 } else if (match({TokenType::LBracket})) {
513 auto index1 = parseTernary();
514
515 if (match({TokenType::Comma})) {
516 // Pixel access: array[offsetX, offsetY]
517 auto index2 = parseTernary();
518 consume(TokenType::RBracket, "Expect ']' after indices");
519 std::string suffix;
520 if (match({TokenType::Colon})) {
521 Token s = consume(TokenType::Identifier,
522 "Expect boundary suffix");
523 suffix = std::format(":{}", s.value);
524 }
525 if (auto* var = get_if<VariableExpr>(expr.get())) {
526 auto get_constant_token = [](Expr* e) -> Token* {
527 if (auto* num = get_if<NumberExpr>(e)) {
528 return &num->value;
529 }
530 if (auto* unary = get_if<UnaryExpr>(e)) {
531 if (unary->op.type == TokenType::Minus) {
532 if (auto* num = get_if<NumberExpr>(
533 unary->right.get())) {
534 static Token neg_token;
535 neg_token = num->value;
536 neg_token.value =
537 std::format("-{}", neg_token.value);
538 return &neg_token;
539 }
540 }
541 }
542 return nullptr;
543 };
544
545 Token* x_tok = get_constant_token(index1.get());
546 Token* y_tok = get_constant_token(index2.get());
547
548 if ((x_tok != nullptr) && (y_tok != nullptr)) {
550 var->name, *x_tok, *y_tok, suffix);
551 }
552 }
553 error(peek(), "Dynamic pixel access should use dyn().");
554 } else {
555 // Array access: array[index]
556 consume(TokenType::RBracket, "Expect ']' after array index.");
557 expr = make_node<ArrayAccessExpr>(std::move(expr),
558 std::move(index1));
559 }
560 } else if (match({TokenType::Dot})) {
561 Token prop = consume(TokenType::Identifier,
562 "Expect property name after '.'");
563 if (auto* var = get_if<VariableExpr>(expr.get())) {
564 // Check if this is frame.width[N] or frame.height[N]
565 if (var->name.value == "frame" &&
566 (prop.value == "width" || prop.value == "height")) {
567 if (match({TokenType::LBracket})) {
568 auto plane_index_expr = parseTernary();
569 consume(TokenType::RBracket,
570 "Expect ']' after plane index");
572 prop, std::move(plane_index_expr));
573 }
574 }
575 return make_node<PropAccessExpr>(var->name, prop);
576 }
577 error(prop, "Invalid property access target.");
578 } else {
579 break;
580 }
581 }
582 return expr;
583}
584
585std::unique_ptr<Expr> Parser::finishCall(std::unique_ptr<Expr> callee) {
586 if (auto* var = get_if<VariableExpr>(callee.get())) {
587 std::vector<std::unique_ptr<Expr>> args;
588 if (peek().type != TokenType::RParen) {
589 do {
590 args.push_back(parseTernary());
591 } while (match({TokenType::Comma}));
592 }
593 consume(TokenType::RParen, "Expect ')' after arguments.");
594 return make_node<CallExpr>(var->name, std::move(args));
595 }
596 error(peek(), "Invalid call target.");
597 Token placeholder{
598 .type = TokenType::Identifier, .value = "error", .range = peek().range};
599 return make_node<CallExpr>(placeholder,
600 std::vector<std::unique_ptr<Expr>>{});
601}
602
603std::unique_ptr<Expr> Parser::parsePrimary() {
604 if (match({TokenType::Number})) {
605 return make_node<NumberExpr>(previous());
606 }
607 if (match({TokenType::Identifier})) {
608 return make_node<VariableExpr>(previous());
609 }
610 if (match({TokenType::LParen})) {
611 auto expr = parseTernary();
612 consume(TokenType::RParen, "Expect ')' after expression.");
613 return expr;
614 }
615 error(peek(), "Expect expression.");
616 Token placeholder{
617 .type = TokenType::Number, .value = "0", .range = peek().range};
618 return make_node<NumberExpr>(placeholder);
619}
620
621bool Parser::match(const std::vector<TokenType>& types) {
622 if (std::ranges::any_of(
623 types, [this](TokenType type) { return peek().type == type; })) {
624 advance();
625 return true;
626 }
627 return false;
628}
629
630Token Parser::consume(TokenType type, const std::string& message) {
631 if (peek().type == type) {
632 return advance();
633 }
634 error(peek(), message);
635 return peek();
636}
637
638Token Parser::advance() {
639 if (!isAtEnd()) {
640 current++;
641 }
642 return previous();
643}
644
645Token Parser::peek() const { return tokens[current]; }
646Token Parser::peek(int offset) const {
647 if (current + offset >= static_cast<int>(tokens.size())) {
648 return tokens.back(); // Return EOF token
649 }
650 return tokens[current + offset];
651}
652Token Parser::previous() const { return tokens[current - 1]; }
653bool Parser::isAtEnd() const { return peek().type == TokenType::EndOfFile; }
654
655void Parser::reportError(const Token& token, const std::string& message) {
656 if (panic_mode) {
657 return;
658 }
659
660 std::string error_message;
661 if (token.type == TokenType::EndOfFile) {
662 error_message = std::format("at end: {}", message);
663 } else {
664 error_message = std::format("at '{}': {}", token.value, message);
665 }
666
667 errors.push_back({error_message, token.range});
668}
669
670void Parser::error(const Token& token, const std::string& message) {
671 reportError(token, message);
672 if (!panic_mode) {
673 panic_mode = true;
674 }
675}
676
677void Parser::synchronize() {
678 panic_mode = false;
679
680 while (!isAtEnd()) {
681 if (current > 0) {
682 TokenType prev = tokens[current - 1].type;
683 if (prev == TokenType::Semicolon || prev == TokenType::Newline) {
684 while (peek().type == TokenType::Semicolon ||
685 peek().type == TokenType::Newline) {
686 advance();
687 }
688 return;
689 }
690 }
691
692 switch (peek().type) {
693 case TokenType::Function:
694 case TokenType::Global:
695 case TokenType::If:
696 case TokenType::While:
697 case TokenType::Return:
698 case TokenType::Goto:
699 case TokenType::RBrace:
700 case TokenType::Semicolon:
701 case TokenType::Newline:
702 return;
703 default:
704 break;
705 }
706
707 advance();
708 }
709}
710
711} // namespace infix2postfix
Parser(const std::vector< Token > &tokens)
Definition Parser.cpp:30
ParseResult parse()
Definition Parser.cpp:32
auto get_if(Wrapper *wrapper) -> decltype(std::get_if< T >(&wrapper->value))
Definition AST.hpp:442
const std::map< std::string, std::vector< BuiltinFunction > > & get_builtin_functions()
Definition Builtins.cpp:400
preprocessor_detail::Token Token
auto make_node(Args &&... args)
Definition AST.hpp:437
TokenType type