VapourSynth-llvmexpr
Loading...
Searching...
No Matches
Preprocessor.cpp
Go to the documentation of this file.
1
19
20#include "Preprocessor.hpp"
21#include "StandardLibrary.hpp"
22
23#include <algorithm>
24#include <cctype>
25#include <cmath>
26#include <cstdint>
27#include <deque>
28#include <format>
29#include <ranges>
30#include <set>
31#include <sstream>
32#include <stdexcept>
33#include <string_view>
34#include <unordered_map>
35#include <utility>
36#include <variant>
37
38namespace infix2postfix {
39
41
94
95struct Token {
97 std::string text;
98 int line;
99 int column;
100 std::variant<int64_t, double> numeric_value;
101 bool has_numeric_value = false;
102 size_t expansion_idx = static_cast<size_t>(-1);
103
105
106 Token(TokenType t, std::string txt, int ln, int col)
107 : type(t), text(std::move(txt)), line(ln), column(col) {}
108
109 Token(TokenType t, std::string txt, int ln, int col,
110 const std::variant<int64_t, double>& val)
111 : type(t), text(std::move(txt)), line(ln), column(col),
112 numeric_value(val), has_numeric_value(true) {}
113};
114
116 public:
117 explicit PreprocessorTokenizer(std::string_view source) : source(source) {}
118
119 std::vector<Token> tokenize() {
120 std::vector<Token> tokens;
121 while (!eof()) {
122 Token tok = nextToken();
123 tokens.push_back(tok);
124 if (tok.type == TokenType::EndOfFile) {
125 break;
126 }
127 }
128 return tokens;
129 }
130
131 private:
132 std::string_view source;
133 size_t pos = 0;
134 int line = 1;
135 int column = 1;
136
137 [[nodiscard]] bool eof() const { return pos >= source.length(); }
138
139 [[nodiscard]] char peek(size_t offset = 0) const {
140 size_t p = pos + offset;
141 return p < source.length() ? source[p] : '\0';
142 }
143
144 [[nodiscard]] char peekSigned(int offset) const {
145 if (offset < 0) {
146 auto abs_offset = static_cast<size_t>(-offset);
147 if (abs_offset > pos) {
148 return '\0';
149 }
150 return source[pos - abs_offset];
151 }
152 return peek(static_cast<size_t>(offset));
153 }
154
155 char consume() {
156 if (eof()) {
157 return '\0';
158 }
159 char c = source[pos++];
160 if (c == '\n') {
161 line++;
162 column = 1;
163 } else {
164 column++;
165 }
166 return c;
167 }
168
169 Token nextToken() {
170 if (eof()) {
171 return {TokenType::EndOfFile, "", line, column};
172 }
173
174 int start_line = line;
175 int start_column = column;
176 char c = peek();
177
178 if (c == ' ' || c == '\t' || c == '\r') {
179 return consumeWhitespace(start_line, start_column);
180 }
181 if (c == '\n') {
182 consume();
183 return {TokenType::Newline, "\n", start_line, start_column};
184 }
185 if (c == '#') {
186 return consumeComment(start_line, start_column);
187 }
188 if (std::isdigit(c) != 0 || (c == '.' && std::isdigit(peek(1)) != 0)) {
189 return consumeNumber(start_line, start_column);
190 }
191 if (c == '@') {
192 if (peek(1) == '@') {
193 consume();
194 consume();
195 return {TokenType::Concat, "@@", start_line, start_column};
196 }
197 return consumeDirective(start_line, start_column);
198 }
199 if (std::isalpha(c) != 0 || c == '_' || c == '$') {
200 return consumeIdentifier(start_line, start_column);
201 }
202 return consumeOperator(start_line, start_column);
203 }
204
205 Token consumeWhitespace(int start_line, int start_column) {
206 size_t start = pos;
207 while (!eof() && (peek() == ' ' || peek() == '\t' || peek() == '\r')) {
208 consume();
209 }
210 std::string text(source.substr(start, pos - start));
211 return {TokenType::Whitespace, text, start_line, start_column};
212 }
213
214 Token consumeComment(int start_line, int start_column) {
215 size_t start = pos;
216 consume();
217 while (!eof() && peek() != '\n') {
218 consume();
219 }
220 std::string text(source.substr(start, pos - start));
221 return {TokenType::Comment, text, start_line, start_column};
222 }
223
224 Token consumeNumber(int start_line, int start_column) {
225 size_t start = pos;
226
227 if (peek() == '0' && (peek(1) == 'x' || peek(1) == 'X')) {
228 consume();
229 consume();
230 while (!eof() && (std::isxdigit(peek()) != 0 || peek() == '.' ||
231 peek() == 'p' || peek() == 'P' ||
232 ((peek() == '+' || peek() == '-') &&
233 (std::tolower(peekSigned(-1)) == 'p')))) {
234 consume();
235 }
236 } else if (peek() == '0' && std::isdigit(peek(1)) != 0) {
237 while (!eof() && peek() >= '0' && peek() <= '7') {
238 consume();
239 }
240 } else {
241 while (!eof() && (std::isdigit(peek()) != 0 || peek() == '.' ||
242 peek() == 'e' || peek() == 'E' ||
243 ((peek() == '+' || peek() == '-') &&
244 (std::tolower(peekSigned(-1)) == 'e')))) {
245 consume();
246 }
247 }
248
249 std::string text(source.substr(start, pos - start));
250
251 try {
252 if (text.find('.') != std::string::npos ||
253 text.find('e') != std::string::npos ||
254 text.find('E') != std::string::npos) {
255 double val = std::stod(text);
256 return {TokenType::Number, text, start_line, start_column, val};
257 }
258 int64_t val = std::stoll(text, nullptr, 0);
259 return {TokenType::Number, text, start_line, start_column, val};
260 } catch (...) {
261 return {TokenType::Number, text, start_line, start_column};
262 }
263 }
264
265 Token consumeDirective(int start_line, int start_column) {
266 consume();
267
268 if (eof() || (std::isalpha(peek()) == 0 && peek() != '_')) {
269 return {TokenType::At, "@", start_line, start_column};
270 }
271
272 size_t start = pos;
273 while (!eof() && (std::isalnum(peek()) != 0 || peek() == '_')) {
274 consume();
275 }
276
277 std::string directive(source.substr(start, pos - start));
278 std::string full_text = "@" + directive;
279
280 TokenType type = TokenType::At;
281 if (directive == "define") {
282 type = TokenType::AtDefine;
283 } else if (directive == "undef") {
284 type = TokenType::AtUndef;
285 } else if (directive == "ifdef") {
286 type = TokenType::AtIfdef;
287 } else if (directive == "ifndef") {
288 type = TokenType::AtIfndef;
289 } else if (directive == "if") {
290 type = TokenType::AtIf;
291 } else if (directive == "else") {
292 type = TokenType::AtElse;
293 } else if (directive == "endif") {
294 type = TokenType::AtEndif;
295 } else if (directive == "error") {
296 type = TokenType::AtError;
297 } else if (directive == "requires") {
298 type = TokenType::AtRequires;
299 }
300
301 return {type, full_text, start_line, start_column};
302 }
303
304 Token consumeIdentifier(int start_line, int start_column) {
305 size_t start = pos;
306 while (!eof() &&
307 (std::isalnum(peek()) != 0 || peek() == '_' || peek() == '$')) {
308 consume();
309 }
310 std::string text(source.substr(start, pos - start));
311 return {TokenType::Identifier, text, start_line, start_column};
312 }
313
314 Token consumeOperator(int start_line, int start_column) {
315 char c = peek();
316 char next = peek(1);
317
318 if (c == '*' && next == '*') {
319 consume();
320 consume();
321 return {TokenType::Power, "**", start_line, start_column};
322 }
323 if (c == '=' && next == '=') {
324 consume();
325 consume();
326 return {TokenType::Equal, "==", start_line, start_column};
327 }
328 if (c == '!' && next == '=') {
329 consume();
330 consume();
331 return {TokenType::NotEqual, "!=", start_line, start_column};
332 }
333 if (c == '>' && next == '=') {
334 consume();
335 consume();
336 return {TokenType::GreaterEqual, ">=", start_line, start_column};
337 }
338 if (c == '<' && next == '=') {
339 consume();
340 consume();
341 return {TokenType::LessEqual, "<=", start_line, start_column};
342 }
343 if (c == '&' && next == '&') {
344 consume();
345 consume();
346 return {TokenType::LogicalAnd, "&&", start_line, start_column};
347 }
348 if (c == '|' && next == '|') {
349 consume();
350 consume();
351 return {TokenType::LogicalOr, "||", start_line, start_column};
352 }
353
354 consume();
355 TokenType type = TokenType::EndOfFile;
356 std::string text(1, c);
357
358 switch (c) {
359 case '+':
360 type = TokenType::Plus;
361 break;
362 case '-':
363 type = TokenType::Minus;
364 break;
365 case '*':
366 type = TokenType::Multiply;
367 break;
368 case '/':
369 type = TokenType::Divide;
370 break;
371 case '%':
372 type = TokenType::Modulo;
373 break;
374 case '>':
375 type = TokenType::Greater;
376 break;
377 case '<':
378 type = TokenType::Less;
379 break;
380 case '!':
381 type = TokenType::LogicalNot;
382 break;
383 case '&':
384 type = TokenType::BitAnd;
385 break;
386 case '|':
387 type = TokenType::BitOr;
388 break;
389 case '^':
390 type = TokenType::BitXor;
391 break;
392 case '~':
393 type = TokenType::BitNot;
394 break;
395 case '(':
396 type = TokenType::Lparen;
397 break;
398 case ')':
399 type = TokenType::Rparen;
400 break;
401 case '[':
402 type = TokenType::Lbracket;
403 break;
404 case ']':
405 type = TokenType::Rbracket;
406 break;
407 case '{':
408 type = TokenType::Lbrace;
409 break;
410 case '}':
411 type = TokenType::Rbrace;
412 break;
413 case ',':
414 type = TokenType::Comma;
415 break;
416 case '.':
417 type = TokenType::Dot;
418 break;
419 case '?':
420 type = TokenType::Question;
421 break;
422 case ':':
423 type = TokenType::Colon;
424 break;
425 case ';':
426 type = TokenType::Semicolon;
427 break;
428 case '=':
429 type = TokenType::Assign;
430 break;
431 default:
432 throw PreprocessorError(
433 std::format("Unexpected character '{}' at line {}, column {}",
434 c, start_line, start_column));
435 }
436
437 return {type, text, start_line, start_column};
438 }
439};
440
441std::string tokensToString(const std::vector<Token>& tokens,
442 bool preserve_whitespace = false) {
443 std::string result;
444 for (const auto& tok : tokens) {
445 if (!preserve_whitespace && (tok.type == TokenType::Whitespace ||
446 tok.type == TokenType::Comment)) {
447 continue;
448 }
449 result += tok.text;
450 }
451 return result;
452}
453
454std::vector<Token> trimTokens(const std::vector<Token>& tokens) {
455 if (tokens.empty()) {
456 return tokens;
457 }
458
459 size_t start = 0;
460 while (start < tokens.size() &&
461 (tokens[start].type == TokenType::Whitespace ||
462 tokens[start].type == TokenType::Comment)) {
463 start++;
464 }
465
466 size_t end = tokens.size();
467 while (end > start && (tokens[end - 1].type == TokenType::Whitespace ||
468 tokens[end - 1].type == TokenType::Comment)) {
469 end--;
470 }
471
472 return {tokens.begin() + static_cast<std::ptrdiff_t>(start),
473 tokens.begin() + static_cast<std::ptrdiff_t>(end)};
474}
475
476bool isSkippable(const Token& t) {
477 return t.type == TokenType::Whitespace || t.type == TokenType::Comment ||
478 t.type == TokenType::BeginMacroExpansion ||
479 t.type == TokenType::EndMacroExpansion;
480}
481
482} // namespace preprocessor_detail
483
487
488namespace preprocessor {
489
490struct Macro {
491 std::string name;
492 bool is_function_like = false;
493 std::vector<std::string> params;
494 std::vector<Token> body;
495};
496
498 public:
499 void define(Macro macro) { macros[macro.name] = std::move(macro); }
500
501 void undef(const std::string& name) { macros.erase(name); }
502
503 [[nodiscard]] const Macro* find(const std::string& name) const {
504 auto it = macros.find(name);
505 return it != macros.end() ? &it->second : nullptr;
506 }
507
508 [[nodiscard]] bool contains(const std::string& name) const {
509 return macros.contains(name);
510 }
511
512 [[nodiscard]] auto begin() const { return macros.begin(); }
513 [[nodiscard]] auto end() const { return macros.end(); }
514
515 private:
516 std::unordered_map<std::string, Macro> macros;
517};
518
520 public:
521 explicit TokenStream(std::vector<Token> p_tokens) {
522 for (auto&& tok : p_tokens) {
523 tokens.push_back(std::move(tok));
524 }
525 }
526
527 [[nodiscard]] bool is_eof() const {
528 return tokens.empty() || tokens.front().type == TokenType::EndOfFile;
529 }
530
531 [[nodiscard]] Token peek(size_t offset = 0) const {
532 if (offset >= tokens.size()) {
533 static Token eof_token{TokenType::EndOfFile, "", 0, 0};
534 return eof_token;
535 }
536 return tokens[offset];
537 }
538
540 if (is_eof()) {
541 static Token eof_token{TokenType::EndOfFile, "", 0, 0};
542 return eof_token;
543 }
544 Token tok = std::move(tokens.front());
545 tokens.pop_front();
546 return tok;
547 }
548
549 void prepend(const std::vector<Token>& tokens) {
550 for (const auto& tok : std::views::reverse(tokens)) {
551 this->tokens.push_front(tok);
552 }
553 }
554
557 consume();
558 }
559 }
560
561 private:
562 std::deque<Token> tokens;
563};
564
566 public:
567 explicit Evaluator(const std::vector<Token>& tokens)
568 : tokens(tokens), stream(tokens) {}
569
570 std::variant<int64_t, double> evaluate() {
571 pos = 0;
572 skipWhitespace();
573
574 if (is_eof()) {
575 throw std::runtime_error("Cannot evaluate an empty expression");
576 }
577
578 Value result = parseConditional();
579 skipWhitespace();
580
581 if (!is_eof()) {
582 throw std::runtime_error("Unexpected tokens at end of expression");
583 }
584
585 return result.val;
586 }
587
588 std::optional<std::variant<int64_t, double>> tryEvaluate() {
589 try {
590 return evaluate();
591 } catch (const PreprocessorError&) {
592 throw;
593 } catch (const std::runtime_error&) {
594 return std::nullopt;
595 }
596 }
597
598 static bool is_truthy(const std::variant<int64_t, double>& val) {
599 return Value(val).is_truthy();
600 }
601
602 static std::string toString(const std::variant<int64_t, double>& val) {
603 return Value(val).to_string();
604 }
605
606 private:
607 struct Value {
608 std::variant<int64_t, double> val;
609
610 explicit Value(std::variant<int64_t, double> v = int64_t(0)) : val(v) {}
611 Value(int64_t v) : val(v) {}
612 Value(double v) : val(v) {}
613
614 [[nodiscard]] bool is_double() const {
615 return std::holds_alternative<double>(val);
616 }
617
618 [[nodiscard]] double to_double() const {
619 if (is_double()) {
620 return std::get<double>(val);
621 }
622 return static_cast<double>(std::get<int64_t>(val));
623 }
624
625 [[nodiscard]] bool is_truthy() const { return to_double() != 0.0; }
626
627 [[nodiscard]] std::string to_string() const {
628 if (is_double()) {
629 double d = std::get<double>(val);
630 std::string str = std::format("{}", d);
631 if (str.find('.') == std::string::npos &&
632 str.find('e') == std::string::npos &&
633 str.find('E') == std::string::npos) {
634 str += ".0";
635 }
636 return str;
637 }
638 return std::to_string(std::get<int64_t>(val));
639 }
640 };
641
642 const std::vector<Token>& tokens;
643 TokenStream stream;
644 size_t pos = 0;
645
646 [[nodiscard]] bool is_eof() const {
647 return pos >= tokens.size() || tokens[pos].type == TokenType::EndOfFile;
648 }
649
650 [[nodiscard]] const Token& peek(size_t offset = 0) const {
651 size_t p = pos + offset;
652 if (p >= tokens.size()) {
653 static Token eof_token{TokenType::EndOfFile, "", 0, 0};
654 return eof_token;
655 }
656 return tokens[p];
657 }
658
659 Token consume() {
660 if (is_eof()) {
661 static Token eof_token{TokenType::EndOfFile, "", 0, 0};
662 return eof_token;
663 }
664 return tokens[pos++];
665 }
666
667 void skipWhitespace() {
668 while (!is_eof() && preprocessor_detail::isSkippable(peek())) {
669 consume();
670 }
671 }
672
673 Value parsePrimary() {
674 skipWhitespace();
675
676 const Token& tok = peek();
677
678 if (tok.type == TokenType::Number) {
679 consume();
680 if (tok.has_numeric_value) {
681 return Value(tok.numeric_value);
682 }
683 try {
684 if (tok.text.find('.') != std::string::npos ||
685 tok.text.find('e') != std::string::npos ||
686 tok.text.find('E') != std::string::npos) {
687 return {std::stod(tok.text)};
688 }
689 return {static_cast<int64_t>(std::stoll(tok.text, nullptr, 0))};
690 } catch (...) {
691 throw std::runtime_error("Invalid number: " + tok.text);
692 }
693 }
694
695 if (tok.type == TokenType::Identifier) {
696 std::string name = tok.text;
697 throw std::runtime_error(
698 "Unexpanded identifier in constant expression: " + name);
699 }
700
701 if (tok.type == TokenType::Lparen) {
702 consume();
703 skipWhitespace();
704 Value val = parseConditional();
705 skipWhitespace();
706 if (peek().type != TokenType::Rparen) {
707 throw std::runtime_error("Expected ')'");
708 }
709 consume();
710 return val;
711 }
712
713 throw std::runtime_error("Unexpected token in expression: " + tok.text);
714 }
715
716 Value parseUnary() {
717 skipWhitespace();
718
719 if (peek().type == TokenType::Minus) {
720 consume();
721 skipWhitespace();
722 Value val = parseUnary();
723 if (val.is_double()) {
724 return {-val.to_double()};
725 }
726 return {-std::get<int64_t>(val.val)};
727 }
728
729 if (peek().type == TokenType::LogicalNot) {
730 consume();
731 skipWhitespace();
732 return {static_cast<int64_t>(!parseUnary().is_truthy())};
733 }
734
735 if (peek().type == TokenType::Plus) {
736 consume();
737 skipWhitespace();
738 return parseUnary();
739 }
740
741 if (peek().type == TokenType::BitNot) {
742 consume();
743 skipWhitespace();
744 Value val = parseUnary();
745 if (val.is_double()) {
746 return {(~static_cast<int64_t>(std::round(val.to_double())))};
747 }
748 return {~std::get<int64_t>(val.val)};
749 }
750
751 return parsePrimary();
752 }
753
754 Value parsePower() {
755 Value left = parseUnary();
756 skipWhitespace();
757
758 if (peek().type == TokenType::Power) {
759 consume();
760 skipWhitespace();
761 Value right = parsePower();
762 return {std::pow(left.to_double(), right.to_double())};
763 }
764
765 return left;
766 }
767
768 Value parseFactor() {
769 Value left = parsePower();
770
771 while (true) {
772 skipWhitespace();
773 const Token& op = peek();
774
775 if (op.type != TokenType::Multiply &&
776 op.type != TokenType::Divide && op.type != TokenType::Modulo) {
777 break;
778 }
779
780 consume();
781 skipWhitespace();
782 Value right = parsePower();
783
784 if (left.is_double() || right.is_double()) {
785 double l = left.to_double();
786 double r = right.to_double();
787
788 switch (op.type) {
789 case TokenType::Multiply:
790 left = Value(l * r);
791 break;
792 case TokenType::Divide:
793 left = Value(l / r);
794 break;
795 case TokenType::Modulo:
796 throw std::runtime_error(
797 "Modulo requires integer operands");
798 default:
799 std::unreachable();
800 }
801 } else {
802 int64_t l = std::get<int64_t>(left.val);
803 int64_t r = std::get<int64_t>(right.val);
804
805 switch (op.type) {
806 case TokenType::Multiply:
807 left = Value(l * r);
808 break;
809 case TokenType::Divide:
810 left = Value(l / r);
811 break;
812 case TokenType::Modulo:
813 left = Value(l % r);
814 break;
815 default:
816 std::unreachable();
817 }
818 }
819 }
820
821 return left;
822 }
823
824 Value parseTerm() {
825 Value left = parseFactor();
826
827 while (true) {
828 skipWhitespace();
829 const Token& op = peek();
830
831 if (op.type != TokenType::Plus && op.type != TokenType::Minus) {
832 break;
833 }
834
835 consume();
836 skipWhitespace();
837 Value right = parseFactor();
838
839 if (left.is_double() || right.is_double()) {
840 double l = left.to_double();
841 double r = right.to_double();
842 left = Value(op.type == TokenType::Plus ? (l + r) : (l - r));
843 } else {
844 int64_t l = std::get<int64_t>(left.val);
845 int64_t r = std::get<int64_t>(right.val);
846 left = Value(op.type == TokenType::Plus ? (l + r) : (l - r));
847 }
848 }
849
850 return left;
851 }
852
853 Value parseBitwiseOr() {
854 Value left = parseBitwiseXor();
855
856 while (true) {
857 skipWhitespace();
858 const Token& op = peek();
859
860 if (op.type != TokenType::BitOr) {
861 break;
862 }
863
864 consume();
865 skipWhitespace();
866 Value right = parseBitwiseXor();
867
868 int64_t l = left.is_double()
869 ? static_cast<int64_t>(std::round(left.to_double()))
870 : std::get<int64_t>(left.val);
871 int64_t r =
872 right.is_double()
873 ? static_cast<int64_t>(std::round(right.to_double()))
874 : std::get<int64_t>(right.val);
875
876 left = Value(l | r);
877 }
878
879 return left;
880 }
881
882 Value parseBitwiseXor() {
883 Value left = parseBitwiseAnd();
884
885 while (true) {
886 skipWhitespace();
887 const Token& op = peek();
888
889 if (op.type != TokenType::BitXor) {
890 break;
891 }
892
893 consume();
894 skipWhitespace();
895 Value right = parseBitwiseAnd();
896
897 int64_t l = left.is_double()
898 ? static_cast<int64_t>(std::round(left.to_double()))
899 : std::get<int64_t>(left.val);
900 int64_t r =
901 right.is_double()
902 ? static_cast<int64_t>(std::round(right.to_double()))
903 : std::get<int64_t>(right.val);
904
905 left = Value(l ^ r);
906 }
907
908 return left;
909 }
910
911 Value parseBitwiseAnd() {
912 Value left = parseEquality();
913
914 while (true) {
915 skipWhitespace();
916 const Token& op = peek();
917
918 if (op.type != TokenType::BitAnd) {
919 break;
920 }
921
922 consume();
923 skipWhitespace();
924 Value right = parseEquality();
925
926 int64_t l = left.is_double()
927 ? static_cast<int64_t>(std::round(left.to_double()))
928 : std::get<int64_t>(left.val);
929 int64_t r =
930 right.is_double()
931 ? static_cast<int64_t>(std::round(right.to_double()))
932 : std::get<int64_t>(right.val);
933
934 left = Value(l & r);
935 }
936
937 return left;
938 }
939
940 Value parseEquality() {
941 Value left = parseComparison();
942
943 while (true) {
944 skipWhitespace();
945 const Token& op = peek();
946
947 if (op.type != TokenType::Equal && op.type != TokenType::NotEqual) {
948 break;
949 }
950
951 TokenType op_type = op.type;
952 consume();
953 skipWhitespace();
954 Value right = parseComparison();
955
956 double l = left.to_double();
957 double r = right.to_double();
958 bool result = (op_type == TokenType::Equal) ? (l == r) : (l != r);
959
960 left = Value(static_cast<int64_t>(result));
961 }
962
963 return left;
964 }
965
966 Value parseComparison() {
967 Value left = parseTerm();
968
969 while (true) {
970 skipWhitespace();
971 const Token& op = peek();
972
973 if (op.type != TokenType::Greater &&
974 op.type != TokenType::GreaterEqual &&
975 op.type != TokenType::Less && op.type != TokenType::LessEqual) {
976 break;
977 }
978
979 TokenType op_type = op.type;
980 consume();
981 skipWhitespace();
982 Value right = parseTerm();
983
984 double l = left.to_double();
985 double r = right.to_double();
986 bool result = false;
987
988 switch (op_type) {
989 case TokenType::Greater:
990 result = l > r;
991 break;
992 case TokenType::GreaterEqual:
993 result = l >= r;
994 break;
995 case TokenType::Less:
996 result = l < r;
997 break;
998 case TokenType::LessEqual:
999 result = l <= r;
1000 break;
1001 default:
1002 std::unreachable();
1003 }
1004
1005 left = Value(static_cast<int64_t>(result));
1006 }
1007
1008 return left;
1009 }
1010
1011 Value parseLogicalAnd() {
1012 Value left = parseBitwiseOr();
1013
1014 while (true) {
1015 skipWhitespace();
1016 if (peek().type != TokenType::LogicalAnd) {
1017 break;
1018 }
1019 consume();
1020 skipWhitespace();
1021 Value right = parseBitwiseOr();
1022 left = Value(
1023 static_cast<int64_t>(left.is_truthy() && right.is_truthy()));
1024 }
1025
1026 return left;
1027 }
1028
1029 Value parseLogicalOr() {
1030 Value left = parseLogicalAnd();
1031
1032 while (true) {
1033 skipWhitespace();
1034 if (peek().type != TokenType::LogicalOr) {
1035 break;
1036 }
1037 consume();
1038 skipWhitespace();
1039 Value right = parseLogicalAnd();
1040 left = Value(
1041 static_cast<int64_t>(left.is_truthy() || right.is_truthy()));
1042 }
1043
1044 return left;
1045 }
1046
1047 Value parseConditional() {
1048 Value condition = parseLogicalOr();
1049 skipWhitespace();
1050
1051 if (peek().type != TokenType::Question) {
1052 return condition;
1053 }
1054
1055 consume();
1056 bool cond_truthy = condition.is_truthy();
1057 skipWhitespace();
1058
1059 if (cond_truthy) {
1060 Value then_val = parseConditional();
1061 skipWhitespace();
1062 if (peek().type != TokenType::Colon) {
1063 throw std::runtime_error(
1064 "Expected ':' in conditional expression");
1065 }
1066 consume();
1067 skipElseBranch();
1068 return then_val;
1069 }
1070
1071 skipThenBranchToColon();
1072 skipWhitespace();
1073 if (peek().type != TokenType::Colon) {
1074 throw std::runtime_error("Expected ':' in conditional expression");
1075 }
1076 consume();
1077 skipWhitespace();
1078 Value else_val = parseConditional();
1079 return else_val;
1080 }
1081
1082 void skipThenBranchToColon() {
1083 int nested = 0;
1084 while (!is_eof()) {
1085 const Token& tok = peek();
1086
1087 if (tok.type == TokenType::Question) {
1088 nested++;
1089 consume();
1090 } else if (tok.type == TokenType::Colon) {
1091 if (nested == 0) {
1092 break;
1093 }
1094 nested--;
1095 consume();
1096 } else {
1097 consume();
1098 }
1099 }
1100 }
1101
1102 void skipElseBranch() {
1103 int nested = 0;
1104 int paren_depth = 0;
1105
1106 while (!is_eof()) {
1107 const Token& tok = peek();
1108
1109 if (tok.type == TokenType::Lparen) {
1110 paren_depth++;
1111 consume();
1112 } else if (tok.type == TokenType::Rparen) {
1113 if (paren_depth == 0) {
1114 break;
1115 }
1116 paren_depth--;
1117 consume();
1118 } else if (tok.type == TokenType::Comma && paren_depth == 0) {
1119 break;
1120 } else if (tok.type == TokenType::Question) {
1121 nested++;
1122 consume();
1123 } else if (tok.type == TokenType::Colon) {
1124 if (nested == 0) {
1125 break;
1126 }
1127 nested--;
1128 consume();
1129 } else {
1130 consume();
1131 }
1132 }
1133 }
1134};
1135
1137 public:
1138 Expander(const MacroTable& macros, int recursion_depth = 0)
1139 : macros(macros), recursion_depth(recursion_depth) {}
1140
1141 std::vector<Token> expand(const std::vector<Token>& input) {
1142 TokenStream stream(input);
1143 std::vector<Token> result;
1144
1145 while (!stream.is_eof()) {
1146 const auto tok = stream.peek();
1147
1148 if (tok.type == TokenType::Identifier) {
1149 if (tok.text == "defined") {
1150 handleDefinedOperator(stream, result);
1151 continue;
1152 }
1153
1154 if (tok.text == "consteval" || tok.text == "is_consteval") {
1155 handleConstevalIntrinsics(stream, result, tok);
1156 continue;
1157 }
1158
1159 if (tok.text == "static_assert") {
1160 handleStaticAssertIntrinsic(stream, result, tok);
1161 continue;
1162 }
1163
1164 const Macro* macro = macros.find(tok.text);
1165 if (macro == nullptr) {
1166 result.push_back(stream.consume());
1167 continue;
1168 }
1169
1170 Token macro_token = stream.consume();
1171
1172 if (macro->is_function_like) {
1173 expandFunctionLikeMacro(stream, result, macro_token,
1174 *macro);
1175 } else {
1176 expandObjectLikeMacro(stream, result, macro_token, *macro);
1177 }
1178 } else {
1179 result.push_back(stream.consume());
1180 }
1181 }
1182
1183 return result;
1184 }
1185
1186 [[nodiscard]] std::vector<MacroExpansion> getExpansions() const {
1187 return expansions;
1188 }
1189
1190 private:
1191 static constexpr int MAX_RECURSION = 1000;
1192
1193 const MacroTable& macros;
1194 int recursion_depth;
1195 std::vector<MacroExpansion> expansions;
1196
1197 void handleDefinedOperator(TokenStream& stream,
1198 std::vector<Token>& result) {
1199 const auto tok = stream.peek();
1200 int def_line = tok.line;
1201 int def_col = tok.column;
1202 stream.consume();
1203 stream.skipWhitespace();
1204
1205 bool has_paren = false;
1206 if (!stream.is_eof() && stream.peek().type == TokenType::Lparen) {
1207 has_paren = true;
1208 stream.consume();
1209 stream.skipWhitespace();
1210 }
1211
1212 if (!stream.is_eof() && stream.peek().type == TokenType::Identifier) {
1213 Token macro_token = stream.consume();
1214 std::string macro_name = macro_token.text;
1215
1216 if (has_paren) {
1217 stream.skipWhitespace();
1218 if (!stream.is_eof() &&
1219 stream.peek().type == TokenType::Rparen) {
1220 stream.consume();
1221 }
1222 }
1223
1224 std::string value = macros.contains(macro_name) ? "1" : "0";
1225 result.emplace_back(
1226 TokenType::Number, value, def_line, def_col,
1227 static_cast<int64_t>(macros.contains(macro_name) ? 1 : 0));
1228 }
1229 }
1230
1231 void handleConstevalIntrinsics(TokenStream& stream,
1232 std::vector<Token>& result,
1233 const Token& tok) {
1234 bool is_probe = (tok.text == "is_consteval");
1235 int start_line = tok.line;
1236 int start_col = tok.column;
1237
1238 stream.consume();
1239 stream.skipWhitespace();
1240
1241 if (stream.is_eof() || stream.peek().type != TokenType::Lparen) {
1242 result.emplace_back(TokenType::Identifier, tok.text, start_line,
1243 start_col);
1244 return;
1245 }
1246
1247 stream.consume();
1248
1249 std::vector<Token> arg_tokens;
1250 int depth = 0;
1251 bool found = false;
1252
1253 while (!stream.is_eof()) {
1254 const auto t = stream.peek();
1255
1256 if (t.type == TokenType::Lparen) {
1257 depth++;
1258 arg_tokens.push_back(stream.consume());
1259 } else if (t.type == TokenType::Rparen) {
1260 if (depth == 0) {
1261 found = true;
1262 stream.consume();
1263 break;
1264 }
1265 depth--;
1266 arg_tokens.push_back(stream.consume());
1267 } else {
1268 arg_tokens.push_back(stream.consume());
1269 }
1270 }
1271
1272 if (!found) {
1273 result.emplace_back(TokenType::Identifier, tok.text, start_line,
1274 start_col);
1275 return;
1276 }
1277
1278 arg_tokens = expand(arg_tokens);
1279
1280 std::optional<std::variant<int64_t, double>> maybe;
1281 try {
1282 Evaluator evaluator(arg_tokens);
1283 maybe = evaluator.tryEvaluate();
1284 } catch (const PreprocessorError&) {
1285 if (is_probe) {
1286 maybe = std::nullopt;
1287 } else {
1288 throw;
1289 }
1290 } catch (...) {
1291 maybe = std::nullopt;
1292 }
1293
1294 if (is_probe) {
1295 std::string value = maybe ? "1" : "0";
1296 result.emplace_back(TokenType::Number, value, start_line, start_col,
1297 static_cast<int64_t>(maybe ? 1 : 0));
1298 } else {
1299 if (!maybe) {
1300 throw PreprocessorError(
1301 "consteval() requires a constant expression");
1302 }
1303 std::string value = Evaluator::toString(*maybe);
1304 result.emplace_back(TokenType::Number, value, start_line, start_col,
1305 *maybe);
1306 }
1307 }
1308
1309 void handleStaticAssertIntrinsic(TokenStream& stream,
1310 std::vector<Token>& result,
1311 const Token& tok) {
1312 int start_line = tok.line;
1313 int start_col = tok.column;
1314
1315 stream.consume();
1316 stream.skipWhitespace();
1317
1318 if (stream.is_eof() || stream.peek().type != TokenType::Lparen) {
1319 result.emplace_back(TokenType::Identifier, tok.text, start_line,
1320 start_col);
1321 return;
1322 }
1323
1324 stream.consume();
1325
1326 std::vector<std::vector<Token>> args = parseMacroArguments(stream);
1327 if (args.size() != 2) {
1328 throw PreprocessorError(
1329 "static_assert() expects 2 arguments: condition, message");
1330 }
1331
1332 Expander nested_expander(macros, recursion_depth + 1);
1333 std::vector<Token> cond_tokens = nested_expander.expand(args[0]);
1334 std::vector<Token> msg_tokens = nested_expander.expand(args[1]);
1335
1336 std::optional<std::variant<int64_t, double>> maybe;
1337 try {
1338 Evaluator evaluator(cond_tokens);
1339 maybe = evaluator.tryEvaluate();
1340 } catch (const PreprocessorError&) {
1341 throw;
1342 } catch (...) {
1343 maybe = std::nullopt;
1344 }
1345
1346 if (!maybe) {
1347 throw PreprocessorError(
1348 "static_assert() requires a constant expression condition");
1349 }
1350
1351 if (!Evaluator::is_truthy(*maybe)) {
1352 std::string message =
1353 preprocessor_detail::tokensToString(msg_tokens, true);
1354 if (message.empty()) {
1355 message = "static_assert condition is false";
1356 }
1357 throw PreprocessorError(
1358 std::format("static_assert failed: {}", message));
1359 }
1360
1361 result.emplace_back(TokenType::Number, "1", start_line, start_col,
1362 static_cast<int64_t>(1));
1363 }
1364
1365 std::vector<std::vector<Token>> parseMacroArguments(TokenStream& stream) {
1366 std::vector<std::vector<Token>> arguments;
1367 std::vector<Token> current_arg;
1368 int paren_depth = 0;
1369
1370 while (!stream.is_eof()) {
1371 const auto arg_tok = stream.peek();
1372
1373 if (arg_tok.type == TokenType::Lparen) {
1374 paren_depth++;
1375 current_arg.push_back(stream.consume());
1376 } else if (arg_tok.type == TokenType::Rparen) {
1377 if (paren_depth == 0) {
1378 arguments.push_back(
1379 preprocessor_detail::trimTokens(current_arg));
1380 stream.consume();
1381 break;
1382 }
1383 paren_depth--;
1384 current_arg.push_back(stream.consume());
1385 } else if (arg_tok.type == TokenType::Comma && paren_depth == 0) {
1386 arguments.push_back(
1387 preprocessor_detail::trimTokens(current_arg));
1388 current_arg.clear();
1389 stream.consume();
1390 } else {
1391 current_arg.push_back(stream.consume());
1392 }
1393 }
1394
1395 return arguments;
1396 }
1397
1398 void expandFunctionLikeMacro(TokenStream& stream,
1399 std::vector<Token>& result, const Token& tok,
1400 const Macro& macro) {
1401 size_t pre_ws_start = result.size();
1402 while (!stream.is_eof() &&
1403 stream.peek().type == TokenType::Whitespace) {
1404 result.push_back(stream.consume());
1405 }
1406
1407 if (stream.is_eof() || stream.peek().type != TokenType::Lparen) {
1408 result.emplace_back(TokenType::Identifier, tok.text, tok.line,
1409 tok.column);
1410 return;
1411 }
1412
1413 result.erase(result.begin() + static_cast<std::ptrdiff_t>(pre_ws_start),
1414 result.end());
1415
1416 stream.consume();
1417
1418 std::vector<std::vector<Token>> arguments = parseMacroArguments(stream);
1419
1420 if (arguments.size() != macro.params.size()) {
1421 if (!macro.params.empty() || arguments.size() != 1 ||
1422 !arguments[0].empty()) {
1423 throw PreprocessorError(std::format(
1424 "Macro '{}' expects {} arguments, but {} were "
1425 "provided",
1426 macro.name, macro.params.size(), arguments.size()));
1427 }
1428 arguments.clear();
1429 }
1430
1431 if (recursion_depth > MAX_RECURSION) {
1432 throw PreprocessorError("Macro expansion recursion limit reached");
1433 }
1434
1435 std::set<std::string> params_next_to_concat;
1436 for (size_t i = 0; i < macro.body.size(); ++i) {
1437 if (macro.body[i].type != TokenType::Identifier) {
1438 continue;
1439 }
1440
1441 bool is_param =
1442 std::ranges::contains(macro.params, macro.body[i].text);
1443 if (!is_param) {
1444 continue;
1445 }
1446
1447 if (i > 0 && macro.body[i - 1].type == TokenType::Concat) {
1448 params_next_to_concat.insert(macro.body[i].text);
1449 }
1450 if (i + 1 < macro.body.size() &&
1451 macro.body[i + 1].type == TokenType::Concat) {
1452 params_next_to_concat.insert(macro.body[i].text);
1453 }
1454 }
1455
1456 std::vector<std::vector<Token>> processed_args;
1457 Expander arg_expander(macros, recursion_depth + 1);
1458 for (size_t i = 0; i < arguments.size(); ++i) {
1459 const auto& param_name = macro.params[i];
1460 const auto& arg = arguments[i];
1461
1462 if (params_next_to_concat.contains(param_name)) {
1463 processed_args.push_back(arg);
1464 } else {
1465 std::vector<Token> expanded_arg = arg_expander.expand(arg);
1466 processed_args.push_back(expanded_arg);
1467 }
1468 }
1469
1470 std::vector<Token> substituted =
1471 substituteParams(macro.body, macro.params, processed_args);
1472
1473 std::string initial_replacement =
1474 preprocessor_detail::tokensToString(substituted, false);
1475
1476 substituted = foldTopLevelTernary(substituted);
1477
1478 Expander nested_expander(macros, recursion_depth + 1);
1479 substituted = nested_expander.expand(substituted);
1480
1481 auto nested_expansions = nested_expander.getExpansions();
1482
1483 try {
1484 Evaluator evaluator(substituted);
1485 auto evaluated = evaluator.tryEvaluate();
1486 if (evaluated) {
1487 std::string value = Evaluator::toString(*evaluated);
1488 substituted.clear();
1489 substituted.emplace_back(TokenType::Number, value, tok.line,
1490 tok.column, *evaluated);
1491 }
1492 } catch (...) {
1493 }
1494
1495 MacroExpansion expansion;
1496 expansion.macro_name = tok.text;
1497 expansion.original_line = tok.line;
1498 expansion.original_column = tok.column;
1499 expansion.replacement_text = initial_replacement;
1500
1501 size_t expansion_idx = expansions.size();
1502 expansions.push_back(expansion);
1503
1504 expansions.insert(expansions.end(), nested_expansions.begin(),
1505 nested_expansions.end());
1506
1507 if (!substituted.empty()) {
1508 Token begin_tok(TokenType::BeginMacroExpansion, "", tok.line,
1509 tok.column);
1510 begin_tok.expansion_idx = expansion_idx;
1511 substituted.insert(substituted.begin(), begin_tok);
1512
1513 Token end_tok(TokenType::EndMacroExpansion, "", tok.line,
1514 tok.column);
1515 end_tok.expansion_idx = expansion_idx;
1516 substituted.push_back(end_tok);
1517 }
1518
1519 stream.prepend(substituted);
1520 }
1521
1522 void expandObjectLikeMacro(TokenStream& stream,
1523 [[maybe_unused]] std::vector<Token>& result,
1524 const Token& tok, const Macro& macro) {
1525 if (recursion_depth > MAX_RECURSION) {
1526 throw PreprocessorError("Macro expansion recursion limit reached");
1527 }
1528
1529 std::string replacement_text =
1530 preprocessor_detail::tokensToString(macro.body, false);
1531
1532 Expander nested_expander(macros, recursion_depth + 1);
1533 std::vector<Token> expanded = nested_expander.expand(macro.body);
1534 auto nested_expansions = nested_expander.getExpansions();
1535
1536 MacroExpansion expansion;
1537 expansion.macro_name = tok.text;
1538 expansion.original_line = tok.line;
1539 expansion.original_column = tok.column;
1540 expansion.replacement_text = replacement_text;
1541
1542 size_t expansion_idx = expansions.size();
1543 expansions.push_back(expansion);
1544
1545 expansions.insert(expansions.end(), nested_expansions.begin(),
1546 nested_expansions.end());
1547
1548 if (!expanded.empty()) {
1549 Token begin_tok(TokenType::BeginMacroExpansion, "", tok.line,
1550 tok.column);
1551 begin_tok.expansion_idx = expansion_idx;
1552 expanded.insert(expanded.begin(), begin_tok);
1553
1554 Token end_tok(TokenType::EndMacroExpansion, "", tok.line,
1555 tok.column);
1556 end_tok.expansion_idx = expansion_idx;
1557 expanded.push_back(end_tok);
1558 }
1559
1560 stream.prepend(expanded);
1561 }
1562
1563 std::vector<Token> foldTopLevelTernary(const std::vector<Token>& tokens) {
1564 if (tokens.empty()) {
1565 return tokens;
1566 }
1567
1568 auto first = std::ranges::find_if(tokens, [](const Token& t) {
1569 return t.type != TokenType::Whitespace;
1570 });
1571
1572 if (first == tokens.end() || first->type != TokenType::Lparen) {
1573 return tokens;
1574 }
1575
1576 auto last =
1577 std::ranges::find_if(
1578 std::ranges::reverse_view(tokens),
1579 [](const Token& t) { return t.type != TokenType::Whitespace; })
1580 .base() -
1581 1;
1582
1583 if (last == tokens.begin() || last->type != TokenType::Rparen) {
1584 return tokens;
1585 }
1586
1587 std::vector<Token> core_tokens(first + 1, last);
1588
1589 int paren_balance = 0;
1590 size_t question_pos = std::string::npos;
1591 size_t colon_pos = std::string::npos;
1592
1593 for (size_t i = 0; i < core_tokens.size(); ++i) {
1594 const auto& token = core_tokens[i];
1595 if (token.type == TokenType::Lparen) {
1596 paren_balance++;
1597 } else if (token.type == TokenType::Rparen) {
1598 paren_balance--;
1599 } else if (paren_balance == 0 &&
1600 token.type == TokenType::Question) {
1601 if (question_pos == std::string::npos) {
1602 question_pos = i;
1603 }
1604 }
1605 }
1606
1607 if (question_pos == std::string::npos) {
1608 return tokens;
1609 }
1610
1611 int ternary_balance = 0;
1612 for (size_t i = question_pos + 1; i < core_tokens.size(); ++i) {
1613 const auto& token = core_tokens[i];
1614 if (token.type == TokenType::Lparen) {
1615 paren_balance++;
1616 } else if (token.type == TokenType::Rparen) {
1617 paren_balance--;
1618 } else if (paren_balance == 0 &&
1619 token.type == TokenType::Question) {
1620 ternary_balance++;
1621 } else if (paren_balance == 0 && token.type == TokenType::Colon) {
1622 if (ternary_balance == 0) {
1623 colon_pos = i;
1624 break;
1625 }
1626 ternary_balance--;
1627 }
1628 }
1629
1630 if (colon_pos == std::string::npos) {
1631 return tokens;
1632 }
1633
1634 std::vector<Token> cond_tokens(
1635 core_tokens.begin(),
1636 core_tokens.begin() + static_cast<std::ptrdiff_t>(question_pos));
1637
1638 try {
1639 Expander cond_expander(macros, recursion_depth + 1);
1640 auto expanded_cond = cond_expander.expand(cond_tokens);
1641 Evaluator evaluator(expanded_cond);
1642 auto result = evaluator.tryEvaluate();
1643 if (!result) {
1644 return tokens;
1645 }
1646
1647 if (Evaluator::is_truthy(*result)) {
1648 return {core_tokens.begin() +
1649 static_cast<std::ptrdiff_t>(question_pos + 1),
1650 core_tokens.begin() +
1651 static_cast<std::ptrdiff_t>(colon_pos)};
1652 }
1653 return {core_tokens.begin() +
1654 static_cast<std::ptrdiff_t>(colon_pos + 1),
1655 core_tokens.end()};
1656 } catch (...) {
1657 return tokens;
1658 }
1659 }
1660
1661 std::vector<Token>
1662 substituteParams(const std::vector<Token>& body,
1663 const std::vector<std::string>& params,
1664 const std::vector<std::vector<Token>>& args) {
1665
1666 std::vector<Token> result;
1667
1668 for (const auto& tok : body) {
1669 if (tok.type == TokenType::Identifier) {
1670 auto it = std::ranges::find(params, tok.text);
1671 if (it != params.end()) {
1672 size_t idx = std::ranges::distance(params.begin(), it);
1673 if (!args[idx].empty()) {
1674 result.insert(result.end(), args[idx].begin(),
1675 args[idx].end());
1676 }
1677 continue;
1678 }
1679 }
1680 result.push_back(tok);
1681 }
1682
1683 while (true) {
1684 auto it = std::ranges::find_if(result, [](const Token& t) {
1685 return t.type == TokenType::Concat;
1686 });
1687
1688 if (it == result.end()) {
1689 break;
1690 }
1691
1692 auto lhs_it = it;
1693 bool left_has_value = false;
1694
1695 if (lhs_it != result.begin()) {
1696 auto search_it = lhs_it - 1;
1697 while (true) {
1698 if (!preprocessor_detail::isSkippable(*search_it)) {
1699 lhs_it = search_it;
1700 left_has_value = true;
1701 break;
1702 }
1703 if (search_it == result.begin()) {
1704 lhs_it = search_it;
1705 left_has_value = false;
1706 break;
1707 }
1708 search_it--;
1709 }
1710 } else {
1711 left_has_value = false;
1712 }
1713
1714 auto rhs_it = it + 1;
1715 while (rhs_it != result.end() &&
1717 rhs_it++;
1718 }
1719
1720 std::string lhs_text = left_has_value ? lhs_it->text : "";
1721 std::string rhs_text = (rhs_it != result.end()) ? rhs_it->text : "";
1722
1723 std::string new_text = lhs_text + rhs_text;
1724
1725 PreprocessorTokenizer tokenizer(new_text);
1726 auto new_tokens = tokenizer.tokenize();
1727 new_tokens.erase(std::ranges::remove_if(
1728 new_tokens,
1729 [](const Token& t) {
1730 return t.type == TokenType::EndOfFile;
1731 })
1732 .begin(),
1733 new_tokens.end());
1734
1735 auto erase_end =
1736 (rhs_it == result.end()) ? result.end() : (rhs_it + 1);
1737
1738 auto insert_pos = result.erase(lhs_it, erase_end);
1739 result.insert(insert_pos, new_tokens.begin(), new_tokens.end());
1740 }
1741
1742 return result;
1743 }
1744};
1745
1746} // namespace preprocessor
1747
1748} // namespace infix2postfix
1749
1750namespace infix2postfix {
1751
1753 public:
1754 explicit Impl(std::string source) : source(std::move(source)) {}
1755
1756 void addPredefinedMacro(std::string name, const std::string& value) {
1757 PreprocessorTokenizer tokenizer(value);
1758 std::vector<Token> body_tokens = tokenizer.tokenize();
1759
1760 std::erase_if(body_tokens, [](const Token& t) {
1761 return t.type == TokenType::EndOfFile;
1762 });
1763
1764 preprocessor::Macro macro;
1765 macro.name = std::move(name);
1766 macro.is_function_like = false;
1767 macro.body = std::move(body_tokens);
1768
1769 macros.define(std::move(macro));
1770 }
1771
1773 output_lines.clear();
1774 line_mappings.clear();
1775 errors.clear();
1776 conditional_stack.clear();
1777 current_output_line = 1;
1778 included_libraries.clear();
1779 library_line_count = 0;
1780
1781 PreprocessorTokenizer tokenizer(source);
1782 std::vector<Token> tokens = tokenizer.tokenize();
1783
1784 processTokens(tokens);
1785
1786 if (!conditional_stack.empty()) {
1787 addError(std::format(
1788 "Unclosed @ifdef/@ifndef directive started at line {}",
1789 conditional_stack.back().start_line),
1790 tokens.empty() ? 0 : tokens.back().line);
1791 }
1792
1793 PreprocessResult result;
1794 result.success = errors.empty();
1795 result.errors = errors;
1796 result.line_map = line_mappings;
1797 result.library_line_count = library_line_count;
1798
1799 std::ostringstream oss;
1800 for (size_t i = 0; i < output_lines.size(); ++i) {
1801 oss << output_lines[i];
1802 if (i < output_lines.size() - 1) {
1803 oss << '\n';
1804 }
1805 }
1806 result.source = oss.str();
1807
1808 return result;
1809 }
1810
1811 private:
1812 struct ConditionalBlock {
1813 int start_line;
1814 bool is_active;
1815 bool had_true_branch;
1816 };
1817
1818 std::string source;
1820 std::vector<std::string> output_lines;
1821 std::vector<LineMapping> line_mappings;
1822 std::vector<std::string> errors;
1823 std::vector<ConditionalBlock> conditional_stack;
1824 int current_output_line = 1;
1825 std::set<std::string_view, std::less<>> included_libraries;
1826 int library_line_count = 0;
1827
1828 void processTokens(std::vector<Token>& tokens) {
1829 std::vector<Token> current_line_tokens;
1830 int current_line_number = 1;
1831
1832 for (const Token& tok : tokens) {
1833 if (tok.type == TokenType::Newline) {
1834 processLineTokens(current_line_tokens, current_line_number);
1835 current_line_tokens.clear();
1836 current_line_number = tok.line + 1;
1837 continue;
1838 }
1839
1840 if (tok.type != TokenType::EndOfFile) {
1841 current_line_tokens.push_back(tok);
1842 } else {
1843 break;
1844 }
1845 }
1846
1847 if (!current_line_tokens.empty()) {
1848 processLineTokens(current_line_tokens, current_line_number);
1849 }
1850 }
1851
1852 void processLineTokens(std::vector<Token>& line_tokens, int line_number) {
1853 if (line_tokens.empty()) {
1854 addOutputLine("", line_number);
1855 return;
1856 }
1857
1858 size_t first_non_ws = 0;
1859 while (first_non_ws < line_tokens.size() &&
1860 (line_tokens[first_non_ws].type == TokenType::Whitespace ||
1861 line_tokens[first_non_ws].type == TokenType::Comment)) {
1862 first_non_ws++;
1863 }
1864
1865 if (first_non_ws >= line_tokens.size()) {
1866 addOutputLine("", line_number);
1867 return;
1868 }
1869
1870 const Token& first_tok = line_tokens[first_non_ws];
1871 if (first_tok.type >= TokenType::AtDefine &&
1872 first_tok.type <= TokenType::AtRequires) {
1873 handleDirective(line_tokens, line_number);
1874 addOutputLine("", line_number);
1875 } else if (!isCurrentBlockActive()) {
1876 addOutputLine("", line_number);
1877 } else {
1878 try {
1879 preprocessor::Expander expander(macros);
1880 std::vector<Token> expanded = expander.expand(line_tokens);
1881 auto expansions = expander.getExpansions();
1882
1883 std::string line_text;
1884 int current_column = 1;
1885 for (const auto& tok : expanded) {
1886 if (tok.type == TokenType::BeginMacroExpansion) {
1887 if (tok.expansion_idx < expansions.size()) {
1888 expansions[tok.expansion_idx]
1889 .preprocessed_start_column = current_column;
1890 }
1891 } else if (tok.type == TokenType::EndMacroExpansion) {
1892 if (tok.expansion_idx < expansions.size()) {
1893 expansions[tok.expansion_idx]
1894 .preprocessed_end_column = current_column;
1895 }
1896 } else {
1897 line_text += tok.text;
1898 current_column += static_cast<int>(tok.text.length());
1899 }
1900 }
1901
1902 addOutputLine(line_text, line_number);
1903
1904 if (!line_mappings.empty()) {
1905 line_mappings.back().expansions.insert(
1906 line_mappings.back().expansions.end(),
1907 expansions.begin(), expansions.end());
1908 }
1909 } catch (const PreprocessorError& e) {
1910 addError(e.what(), line_number);
1911 addOutputLine("", line_number);
1912 }
1913 }
1914 }
1915
1916 void handleDirective(std::vector<Token>& line_tokens, int line_number) {
1917 preprocessor::TokenStream stream(line_tokens);
1918
1919 stream.skipWhitespace();
1920 if (stream.is_eof()) {
1921 return;
1922 }
1923
1924 const Token& directive_tok = stream.consume();
1925
1926 switch (directive_tok.type) {
1927 case TokenType::AtDefine:
1928 if (isCurrentBlockActive()) {
1929 handleDefine(stream, line_number);
1930 }
1931 break;
1932 case TokenType::AtUndef:
1933 if (isCurrentBlockActive()) {
1934 handleUndef(stream, line_number);
1935 }
1936 break;
1937 case TokenType::AtIfdef:
1938 handleIfdef(stream, line_number, true);
1939 break;
1940 case TokenType::AtIfndef:
1941 handleIfdef(stream, line_number, false);
1942 break;
1943 case TokenType::AtIf:
1944 handleIf(stream, line_number);
1945 break;
1946 case TokenType::AtElse:
1947 handleElse(line_number);
1948 break;
1949 case TokenType::AtEndif:
1950 handleEndif(line_number);
1951 break;
1952 case TokenType::AtError:
1953 if (isCurrentBlockActive()) {
1954 handleError(stream, line_number);
1955 }
1956 break;
1957 case TokenType::AtRequires:
1958 if (isCurrentBlockActive()) {
1959 handleRequires(stream, line_number);
1960 }
1961 break;
1962 default:
1963 if (isCurrentBlockActive()) {
1964 addError(
1965 std::format("Unknown directive '{}'", directive_tok.text),
1966 line_number);
1967 }
1968 break;
1969 }
1970 }
1971
1972 void handleDefine(preprocessor::TokenStream& stream, int line_number) {
1973 stream.skipWhitespace();
1974
1975 if (stream.is_eof() || stream.peek().type != TokenType::Identifier) {
1976 addError("@define requires a macro name", line_number);
1977 return;
1978 }
1979
1980 std::string name = stream.consume().text;
1981
1982 preprocessor::Macro macro;
1983 macro.name = name;
1984 macro.is_function_like = false;
1985
1986 if (!stream.is_eof() && stream.peek().type == TokenType::Lparen) {
1987 macro.is_function_like = true;
1988 stream.consume();
1989
1990 stream.skipWhitespace();
1991 while (!stream.is_eof() &&
1992 stream.peek().type != TokenType::Rparen) {
1993 stream.skipWhitespace();
1994
1995 if (stream.peek().type != TokenType::Identifier) {
1996 addError("Expected parameter name in macro definition",
1997 line_number);
1998 return;
1999 }
2000
2001 std::string param = stream.consume().text;
2002
2003 if (std::ranges::contains(macro.params, param)) {
2004 addError(
2005 std::format("Duplicate parameter name '{}' in macro "
2006 "definition",
2007 param),
2008 line_number);
2009 return;
2010 }
2011
2012 macro.params.push_back(param);
2013 stream.skipWhitespace();
2014
2015 if (stream.is_eof()) {
2016 addError("Unterminated parameter list in macro definition",
2017 line_number);
2018 return;
2019 }
2020
2021 if (stream.peek().type == TokenType::Comma) {
2022 stream.consume();
2023 } else if (stream.peek().type != TokenType::Rparen) {
2024 addError("Unterminated parameter list in macro definition",
2025 line_number);
2026 return;
2027 }
2028 }
2029
2030 if (stream.is_eof() || stream.peek().type != TokenType::Rparen) {
2031 addError("Unterminated parameter list in macro definition",
2032 line_number);
2033 return;
2034 }
2035
2036 stream.consume();
2037 }
2038
2039 stream.skipWhitespace();
2040 std::vector<Token> body_tokens;
2041 while (!stream.is_eof()) {
2042 body_tokens.push_back(stream.consume());
2043 }
2044
2045 body_tokens = preprocessor_detail::trimTokens(body_tokens);
2046
2047 if (!macro.is_function_like && !body_tokens.empty()) {
2048 try {
2049 preprocessor::Expander expander(macros);
2050 body_tokens = expander.expand(body_tokens);
2051
2052 preprocessor::Evaluator evaluator(body_tokens);
2053 auto evaluated = evaluator.tryEvaluate();
2054 if (evaluated) {
2055 std::string value =
2057 body_tokens.clear();
2058 body_tokens.emplace_back(TokenType::Number, value,
2059 line_number, 0, *evaluated);
2060 }
2061 } catch (...) {
2062 }
2063 }
2064
2065 macro.body = std::move(body_tokens);
2066 macros.define(std::move(macro));
2067 }
2068
2069 void handleUndef(preprocessor::TokenStream& stream, int line_number) {
2070 stream.skipWhitespace();
2071
2072 if (stream.is_eof() || stream.peek().type != TokenType::Identifier) {
2073 addError("@undef requires a macro name", line_number);
2074 return;
2075 }
2076
2077 std::string name = stream.consume().text;
2078 macros.undef(name);
2079 }
2080
2081 void handleIfdef(preprocessor::TokenStream& stream, int line_number,
2082 bool check_defined) {
2083 stream.skipWhitespace();
2084
2085 if (stream.is_eof() || stream.peek().type != TokenType::Identifier) {
2086 const char* directive = check_defined ? "@ifdef" : "@ifndef";
2087 addError(std::format("{} requires a macro name", directive),
2088 line_number);
2089 conditional_stack.push_back({line_number, false, true});
2090 return;
2091 }
2092
2093 std::string name = stream.consume().text;
2094
2095 bool parent_active = isCurrentBlockActive();
2096 bool macro_defined = macros.contains(name);
2097 bool condition_met = check_defined ? macro_defined : !macro_defined;
2098 bool is_active = parent_active && condition_met;
2099
2100 conditional_stack.push_back({line_number, is_active, condition_met});
2101 }
2102
2103 void handleIf(preprocessor::TokenStream& stream, int line_number) {
2104 stream.skipWhitespace();
2105
2106 std::vector<Token> expr_tokens;
2107 while (!stream.is_eof()) {
2108 expr_tokens.push_back(stream.consume());
2109 }
2110
2111 if (expr_tokens.empty()) {
2112 addError("@if requires an expression", line_number);
2113 conditional_stack.push_back({line_number, false, false});
2114 return;
2115 }
2116
2117 bool parent_active = isCurrentBlockActive();
2118 bool condition_met = false;
2119
2120 if (parent_active) {
2121 try {
2122 preprocessor::Expander expander(macros);
2123 expr_tokens = expander.expand(expr_tokens);
2124
2125 preprocessor::Evaluator evaluator(expr_tokens);
2126 auto result = evaluator.evaluate();
2127 condition_met = preprocessor::Evaluator::is_truthy(result);
2128 } catch (const PreprocessorError& e) {
2129 addError(e.what(), line_number);
2130 } catch (const std::runtime_error& e) {
2131 addError(std::format("Failed to evaluate @if expression: {}",
2132 e.what()),
2133 line_number);
2134 }
2135 }
2136
2137 bool is_active = parent_active && condition_met;
2138 conditional_stack.push_back({line_number, is_active, condition_met});
2139 }
2140
2141 void handleElse(int line_number) {
2142 if (conditional_stack.empty()) {
2143 addError("@else without matching @ifdef/@ifndef", line_number);
2144 return;
2145 }
2146
2147 auto& block = conditional_stack.back();
2148
2149 bool parent_active = true;
2150 if (conditional_stack.size() > 1) {
2151 parent_active =
2152 conditional_stack[conditional_stack.size() - 2].is_active;
2153 }
2154
2155 block.is_active = parent_active && !block.had_true_branch;
2156 }
2157
2158 void handleEndif(int line_number) {
2159 if (conditional_stack.empty()) {
2160 addError("@endif without matching @ifdef/@ifndef", line_number);
2161 return;
2162 }
2163
2164 conditional_stack.pop_back();
2165 }
2166
2167 void handleError(preprocessor::TokenStream& stream, int line_number) {
2168 stream.skipWhitespace();
2169
2170 std::vector<Token> message_tokens;
2171 while (!stream.is_eof()) {
2172 message_tokens.push_back(stream.consume());
2173 }
2174
2175 message_tokens = preprocessor_detail::trimTokens(message_tokens);
2176 std::string message =
2177 preprocessor_detail::tokensToString(message_tokens, true);
2178
2179 addError(message.empty() ? "@error directive encountered"
2180 : std::format("@error: {}", message),
2181 line_number);
2182 }
2183
2184 void handleRequires(preprocessor::TokenStream& stream, int line_number) {
2185 stream.skipWhitespace();
2186
2187 if (stream.is_eof() || stream.peek().type != TokenType::Identifier) {
2188 addError("@requires requires a library name", line_number);
2189 return;
2190 }
2191
2192 std::string lib_name = stream.consume().text;
2193
2194 std::vector<std::string_view> libraries_to_include;
2195 try {
2196 libraries_to_include =
2198 } catch (const std::exception& e) {
2199 addError(std::format("Failed to resolve library '{}': {}", lib_name,
2200 e.what()),
2201 line_number);
2202 return;
2203 }
2204
2205 std::string_view explicitly_requested_lib = lib_name;
2206
2207 for (const auto& lib : libraries_to_include) {
2208 if (included_libraries.contains(lib)) {
2209 continue;
2210 }
2211
2212 auto lib_code_opt = StandardLibraryManager::getLibraryCode(lib);
2213 if (!lib_code_opt) {
2214 addError(
2215 std::format("Library '{}' not found", std::string(lib)),
2216 line_number);
2217 continue;
2218 }
2219
2220 std::string lib_code = std::string(lib_code_opt.value());
2221
2222 Preprocessor lib_preprocessor(lib_code);
2223 for (const auto& [name, macro] : macros) {
2224 lib_preprocessor.impl->macros.define(macro);
2225 }
2226 auto lib_result = lib_preprocessor.process();
2227
2228 if (!lib_result.success) {
2229 addError(std::format("Failed to preprocess library '{}': {}",
2230 std::string(lib),
2231 lib_result.errors.empty()
2232 ? "unknown error"
2233 : lib_result.errors[0]),
2234 line_number);
2235 continue;
2236 }
2237
2238 for (const auto& [name, macro] : lib_preprocessor.impl->macros) {
2239 macros.define(macro);
2240 }
2241
2242 std::vector<std::string> lib_lines;
2243 std::istringstream lib_stream(lib_result.source);
2244 std::string lib_line;
2245 while (std::getline(lib_stream, lib_line)) {
2246 lib_lines.push_back(lib_line);
2247 }
2248
2249 if (lib == explicitly_requested_lib) {
2250 auto exports_opt = StandardLibraryManager::getExports(lib);
2251 if (exports_opt) {
2252 std::string lib_name_upper;
2253 for (char c : lib) {
2254 lib_name_upper += static_cast<char>(std::toupper(c));
2255 }
2256
2257 bool is_expr = macros.contains("__EXPR__");
2258 bool is_single_expr = macros.contains("__SINGLEEXPR__");
2259
2260 for (const auto& exported : *exports_opt) {
2261 if (exported.mode == stdlib::ExportMode::Expr &&
2262 !is_expr) {
2263 continue;
2264 }
2265 if (exported.mode == stdlib::ExportMode::SingleExpr &&
2266 !is_single_expr) {
2267 continue;
2268 }
2269
2270 preprocessor::Macro alias_macro;
2271 alias_macro.name = std::string(exported.name);
2272
2273 if (exported.param_count == 0) {
2274 std::string body_str;
2275 if (!exported.internal_name_override.empty()) {
2276 body_str = std::string(
2277 exported.internal_name_override);
2278 } else {
2279 body_str = std::format(
2280 "___STDLIB_{}_{}", lib_name_upper,
2281 std::string(exported.name));
2282 }
2283 PreprocessorTokenizer tokenizer(body_str);
2284 alias_macro.body = tokenizer.tokenize();
2285 std::erase_if(alias_macro.body, [](const Token& t) {
2286 return t.type == TokenType::EndOfFile;
2287 });
2288 alias_macro.is_function_like = false;
2289 } else {
2290 std::string internal_name;
2291 if (!exported.internal_name_override.empty()) {
2292 internal_name = std::string(
2293 exported.internal_name_override);
2294 } else {
2295 internal_name = std::format(
2296 "___stdlib_{}_{}", std::string(lib),
2297 std::string(exported.name));
2298 }
2299
2300 alias_macro.is_function_like = true;
2301 for (int i = 0; i < exported.param_count; ++i) {
2302 alias_macro.params.push_back(
2303 std::format("__arg{}", i));
2304 }
2305
2306 std::string body_str = internal_name + "(";
2307 for (int i = 0; i < exported.param_count; ++i) {
2308 if (i > 0) {
2309 body_str += ", ";
2310 }
2311 body_str += std::format("__arg{}", i);
2312 }
2313 body_str += ")";
2314
2315 PreprocessorTokenizer tokenizer(body_str);
2316 alias_macro.body = tokenizer.tokenize();
2317 std::erase_if(alias_macro.body, [](const Token& t) {
2318 return t.type == TokenType::EndOfFile;
2319 });
2320 }
2321
2322 macros.define(std::move(alias_macro));
2323 }
2324 }
2325 }
2326
2327 for (const auto& lib_line : lib_lines | std::views::reverse) {
2328 output_lines.insert(output_lines.begin(), lib_line);
2329
2330 LineMapping mapping;
2331 mapping.preprocessed_line = 1;
2332 mapping.original_line = -1;
2333 line_mappings.insert(line_mappings.begin(), mapping);
2334 }
2335
2336 library_line_count += static_cast<int>(lib_lines.size());
2337
2338 for (size_t i = lib_lines.size(); i < line_mappings.size(); ++i) {
2339 if (line_mappings[i].original_line > 0) {
2340 line_mappings[i].preprocessed_line +=
2341 static_cast<int>(lib_lines.size());
2342 }
2343 }
2344
2345 for (size_t i = 0; i < lib_lines.size() && i < line_mappings.size();
2346 ++i) {
2347 line_mappings[i].preprocessed_line = static_cast<int>(i + 1);
2348 }
2349
2350 current_output_line += static_cast<int>(lib_lines.size());
2351
2352 included_libraries.insert(lib);
2353 }
2354 }
2355
2356 [[nodiscard]] bool isCurrentBlockActive() const {
2357 return std::ranges::all_of(conditional_stack, [](const auto& block) {
2358 return block.is_active;
2359 });
2360 }
2361
2362 void addError(const std::string& message, int line) {
2363 errors.push_back(std::format("Line {}: {}", line, message));
2364 }
2365
2366 void addOutputLine(const std::string& line, int original_line) {
2367 output_lines.push_back(line);
2368
2369 LineMapping mapping;
2370 mapping.preprocessed_line = current_output_line;
2371 mapping.original_line = original_line;
2372
2373 line_mappings.push_back(mapping);
2374 current_output_line++;
2375 }
2376};
2377
2379 : impl(std::make_unique<Impl>(std::move(source))) {}
2380
2381Preprocessor::~Preprocessor() = default;
2382
2384 const std::string& value) {
2385 impl->addPredefinedMacro(std::move(name), value);
2386}
2387
2388PreprocessResult Preprocessor::process() { return impl->process(); }
2389
2391 const std::string& message, int line,
2392 const std::vector<LineMapping>& line_map) {
2393
2394 const LineMapping* mapping = nullptr;
2395 for (const auto& m : line_map) {
2396 if (m.preprocessed_line == line) {
2397 mapping = &m;
2398 break;
2399 }
2400 }
2401
2402 if (mapping == nullptr || mapping->expansions.empty()) {
2403 return message;
2404 }
2405
2406 std::string result = message;
2407 result += "\n Macro expansion trace:";
2408
2409 for (const auto& expansion : mapping->expansions) {
2410 result += std::format("\n {}:{}: in expansion of macro '{}'",
2411 expansion.original_line,
2412 expansion.original_column, expansion.macro_name);
2413 if (!expansion.replacement_text.empty()) {
2414 result += std::format(" -> '{}'", expansion.replacement_text);
2415 }
2416 }
2417
2418 return result;
2419}
2420
2421std::string
2422Preprocessor::formatMacroExpansions(const std::vector<LineMapping>& line_map) {
2423 std::string result;
2424
2425 for (const auto& mapping : line_map) {
2426 if (!mapping.expansions.empty()) {
2427 for (const auto& expansion : mapping.expansions) {
2428 result += std::format(
2429 "Line {} (original line {}:{}): macro '{}' expanded to "
2430 "'{}'\n",
2431 mapping.preprocessed_line, expansion.original_line,
2432 expansion.original_column, expansion.macro_name,
2433 expansion.replacement_text.empty()
2434 ? "(empty)"
2435 : expansion.replacement_text);
2436 }
2437 }
2438 }
2439
2440 return result;
2441}
2442
2443} // namespace infix2postfix
void addPredefinedMacro(std::string name, const std::string &value)
static std::string formatMacroExpansions(const std::vector< LineMapping > &line_map)
Preprocessor(std::string source)
void addPredefinedMacro(std::string name, const std::string &value="")
static std::string formatDiagnosticWithExpansion(const std::string &message, int line, const std::vector< LineMapping > &line_map)
static std::optional< std::string_view > getLibraryCode(std::string_view library_name)
static std::optional< std::vector< ExportedFunction > > getExports(std::string_view library_name)
static std::vector< std::string_view > resolveDependencies(std::string_view library_name)
static std::string toString(const std::variant< int64_t, double > &val)
std::variant< int64_t, double > evaluate()
std::optional< std::variant< int64_t, double > > tryEvaluate()
Evaluator(const std::vector< Token > &tokens)
static bool is_truthy(const std::variant< int64_t, double > &val)
std::vector< Token > expand(const std::vector< Token > &input)
Expander(const MacroTable &macros, int recursion_depth=0)
std::vector< MacroExpansion > getExpansions() const
void undef(const std::string &name)
bool contains(const std::string &name) const
const Macro * find(const std::string &name) const
Token peek(size_t offset=0) const
void prepend(const std::vector< Token > &tokens)
TokenStream(std::vector< Token > p_tokens)
std::vector< Token > trimTokens(const std::vector< Token > &tokens)
std::string tokensToString(const std::vector< Token > &tokens, bool preserve_whitespace=false)
preprocessor_detail::PreprocessorTokenizer PreprocessorTokenizer
preprocessor_detail::Token Token
TokenType type
std::string text
std::vector< MacroExpansion > expansions
std::vector< LineMapping > line_map
std::vector< std::string > errors
std::vector< std::string > params
std::variant< int64_t, double > numeric_value
Token(TokenType t, std::string txt, int ln, int col, const std::variant< int64_t, double > &val)
Token(TokenType t, std::string txt, int ln, int col)