VapourSynth-llvmexpr
Loading...
Searching...
No Matches
Tokenizer.cpp
Go to the documentation of this file.
1
19
20#include "Tokenizer.hpp"
22
23#include <algorithm>
24#include <array>
25#include <charconv>
26#include <cmath>
27#include <cstdlib>
28#include <format>
29#include <limits>
30#include <locale>
31#include <optional>
32#include <ranges>
33#include <sstream>
34#include <stdexcept>
35#include <string>
36#include <string_view>
37#include <vector>
38
39#include <ctre.hpp>
40
41namespace {
42
43// TODO: Use std::from_chars when libc++ supports it.
44inline double locale_independent_stod(const std::string& s) {
45 std::istringstream iss(s);
46 iss.imbue(std::locale::classic());
47 double val = std::numeric_limits<double>::quiet_NaN();
48 if (!(iss >> val) || !iss.eof()) {
49 throw std::runtime_error(std::format("Failed to parse number: {}", s));
50 }
51 return val;
52}
53
54inline int svtoi(std::string_view sv) {
55 int val = 0;
56 const char* start = sv.data();
57 const char* end = std::next(start, static_cast<std::ptrdiff_t>(sv.size()));
58 // NOLINTNEXTLINE(readability-implicit-bool-conversion)
59 auto [ptr, ec] = std::from_chars(start, end, val);
60 if (ec == std::errc::invalid_argument) {
61 throw std::invalid_argument(
62 std::format("Failed to parse integer from '''{}'''", sv));
63 }
64 if (ec == std::errc::result_out_of_range) {
65 throw std::out_of_range(
66 std::format("Integer out of range for '''{}'''", sv));
67 }
68 if (ptr != end) {
69 throw std::invalid_argument(
70 std::format("Invalid integer format '''{}'''", sv));
71 }
72 return val;
73}
74
75using Availability = TokenDefinition::Availability;
76
77constexpr Availability operator|(Availability lhs, Availability rhs) {
78 return static_cast<Availability>(static_cast<std::uint8_t>(lhs) |
79 static_cast<std::uint8_t>(rhs));
80}
81
82constexpr Availability operator&(Availability lhs, Availability rhs) {
83 return static_cast<Availability>(static_cast<std::uint8_t>(lhs) &
84 static_cast<std::uint8_t>(rhs));
85}
86
87constexpr Availability AVAILABILITY_ALL =
88 Availability::Expr | Availability::SingleExpr | Availability::VkExpr;
89
90constexpr bool supports_mode(Availability availability, ExprMode mode) {
91 if (mode == ExprMode::Expr) {
92 return static_cast<std::uint8_t>(availability & Availability::Expr) !=
93 0;
94 }
95 if (mode == ExprMode::SingleExpr) {
96 return static_cast<std::uint8_t>(availability &
97 Availability::SingleExpr) != 0;
98 }
99 if (mode == ExprMode::VkExpr) {
100 return static_cast<std::uint8_t>(availability & Availability::VkExpr) !=
101 0;
102 }
103 return false;
104}
105
106template <FixedString Str, TokenType Type>
107std::optional<Token> parse_literal(std::string_view input) {
108 if (input == Str.view()) {
109 return Token{.type = Type,
110 .text = std::string(input),
111 .payload = std::monostate{}};
112 }
113 return std::nullopt;
114}
115
116template <FixedString Str, TokenType Type>
117consteval TokenDefinition
118make_literal_definition(TokenBehavior behavior,
119 Availability availability = AVAILABILITY_ALL) {
120 return {.type = Type,
121 .name = Str.view(),
122 .behavior = behavior,
123 .parser = parse_literal<Str, Type>,
124 .availability = availability};
125}
126
127constexpr TokenBehavior BEHAVIOR_BINARY{.arity = 2, .stack_effect = -1};
128constexpr TokenBehavior BEHAVIOR_UNARY{.arity = 1, .stack_effect = 0};
129constexpr TokenBehavior BEHAVIOR_ZERO_PUSH{.arity = 0, .stack_effect = 1};
130constexpr TokenBehavior BEHAVIOR_TERNARY{.arity = 3, .stack_effect = -2};
131constexpr TokenBehavior BEHAVIOR_NO_EFFECT{.arity = 0, .stack_effect = 0};
132
133// CTRE-based regex parsers
134inline std::optional<Token> parse_plane_width(std::string_view input) {
135 if (auto m = ctre::match<R"(^width\^(\d+)$)">(input)) {
136 int plane_idx = svtoi(m.template get<1>().to_view());
138 .text = std::string(input),
139 .payload = TokenPayloadPlaneDim{.plane_idx = plane_idx}};
140 }
141 return std::nullopt;
142}
143
144inline std::optional<Token> parse_plane_height(std::string_view input) {
145 if (auto m = ctre::match<R"(^height\^(\d+)$)">(input)) {
146 int plane_idx = svtoi(m.template get<1>().to_view());
148 .text = std::string(input),
149 .payload = TokenPayloadPlaneDim{.plane_idx = plane_idx}};
150 }
151 return std::nullopt;
152}
153
154inline std::optional<Token> parse_clip_width(std::string_view input) {
155 if (auto m = ctre::match<R"(^(?:src(\d+)|([x-za-w])):width$)">(input)) {
157 if (m.template get<1>()) {
158 data.clip_idx = svtoi(m.template get<1>().to_view());
159 } else if (m.template get<2>()) {
160 data.clip_idx =
161 parse_std_clip_idx(m.template get<2>().to_view()[0]);
162 }
163 return Token{.type = TokenType::ConstantClipWidth,
164 .text = std::string(input),
165 .payload = data};
166 }
167 return std::nullopt;
168}
169
170inline std::optional<Token> parse_clip_height(std::string_view input) {
171 if (auto m = ctre::match<R"(^(?:src(\d+)|([x-za-w])):height$)">(input)) {
173 if (m.template get<1>()) {
174 data.clip_idx = svtoi(m.template get<1>().to_view());
175 } else if (m.template get<2>()) {
176 data.clip_idx =
177 parse_std_clip_idx(m.template get<2>().to_view()[0]);
178 }
180 .text = std::string(input),
181 .payload = data};
182 }
183 return std::nullopt;
184}
185
186inline std::optional<Token> parse_clip_plane_width(std::string_view input) {
187 if (auto m =
188 ctre::match<R"(^(?:src(\d+)|([x-za-w])):width\^(\d+)$)">(input)) {
190 if (m.template get<1>()) {
191 data.clip_idx = svtoi(m.template get<1>().to_view());
192 } else if (m.template get<2>()) {
193 data.clip_idx =
194 parse_std_clip_idx(m.template get<2>().to_view()[0]);
195 }
196 data.plane_idx = svtoi(m.template get<3>().to_view());
198 .text = std::string(input),
199 .payload = data};
200 }
201 return std::nullopt;
202}
203
204inline std::optional<Token> parse_clip_plane_height(std::string_view input) {
205 if (auto m =
206 ctre::match<R"(^(?:src(\d+)|([x-za-w])):height\^(\d+)$)">(input)) {
208 if (m.template get<1>()) {
209 data.clip_idx = svtoi(m.template get<1>().to_view());
210 } else if (m.template get<2>()) {
211 data.clip_idx =
212 parse_std_clip_idx(m.template get<2>().to_view()[0]);
213 }
214 data.plane_idx = svtoi(m.template get<3>().to_view());
216 .text = std::string(input),
217 .payload = data};
218 }
219 return std::nullopt;
220}
221
222inline std::optional<Token> parse_dup(std::string_view input) {
223 if (auto m = ctre::match<R"(^dup(\d*)$)">(input)) {
224 int n = 0;
225 if (m.template get<1>()) {
226 auto digit_sv = m.template get<1>().to_view();
227 if (!digit_sv.empty()) {
228 n = svtoi(digit_sv);
229 }
230 }
231 if (n < 0) {
232 throw std::runtime_error("Invalid dupN value");
233 }
234 return Token{.type = TokenType::Dup,
235 .text = std::string(input),
236 .payload = TokenPayloadStackOp{n}};
237 }
238 return std::nullopt;
239}
240
241inline std::optional<Token> parse_drop(std::string_view input) {
242 if (auto m = ctre::match<R"(^drop(\d*)$)">(input)) {
243 int n = 1;
244 if (m.template get<1>()) {
245 auto digit_sv = m.template get<1>().to_view();
246 if (!digit_sv.empty()) {
247 n = svtoi(digit_sv);
248 }
249 }
250 if (n < 0) {
251 throw std::runtime_error("Invalid dropN value");
252 }
253 return Token{.type = TokenType::Drop,
254 .text = std::string(input),
255 .payload = TokenPayloadStackOp{n}};
256 }
257 return std::nullopt;
258}
259
260inline std::optional<Token> parse_swap(std::string_view input) {
261 if (auto m = ctre::match<R"(^swap(\d*)$)">(input)) {
262 int n = 1;
263 if (m.template get<1>()) {
264 auto digit_sv = m.template get<1>().to_view();
265 if (!digit_sv.empty()) {
266 n = svtoi(digit_sv);
267 }
268 }
269 if (n < 0) {
270 throw std::runtime_error("Invalid swapN value");
271 }
272 return Token{.type = TokenType::Swap,
273 .text = std::string(input),
274 .payload = TokenPayloadStackOp{n}};
275 }
276 return std::nullopt;
277}
278
279template <FixedString Prefix, TokenType Type>
280inline std::optional<Token> parse_stack_n(std::string_view input) {
281 if (input.starts_with(Prefix.view())) {
282 auto suffix = input.substr(Prefix.view().size());
283 if (auto m = ctre::match<R"(^(\d+)$)">(suffix)) {
284 int n = svtoi(m.template get<1>().to_view());
285 if (n < 0) {
286 throw std::runtime_error(
287 std::format("Invalid {}{} value", Prefix.view(), n));
288 }
289 return Token{.type = Type,
290 .text = std::string(input),
291 .payload = TokenPayloadStackOp{n}};
292 }
293 }
294 return std::nullopt;
295}
296
297inline std::optional<Token> parse_label_def(std::string_view input) {
298 if (auto m = ctre::match<R"(^#(.+)$)">(input)) {
299 return Token{.type = TokenType::LabelDef,
300 .text = std::string(input),
301 .payload = TokenPayloadLabel{
302 .name = std::string(m.template get<1>().to_view())}};
303 }
304 return std::nullopt;
305}
306
307inline std::optional<Token> parse_jump(std::string_view input) {
308 if (auto m = ctre::match<R"(^(.+)#$)">(input)) {
309 return Token{.type = TokenType::Jump,
310 .text = std::string(input),
311 .payload = TokenPayloadLabel{
312 .name = std::string(m.template get<1>().to_view())}};
313 }
314 return std::nullopt;
315}
316
317inline std::optional<Token> parse_var_store(std::string_view input) {
318 if (auto m = ctre::match<R"(^([a-zA-Z_][a-zA-Z0-9_]*)!$)">(input)) {
319 return Token{.type = TokenType::VarStore,
320 .text = std::string(input),
321 .payload = TokenPayloadVar{
322 .name = std::string(m.template get<1>().to_view())}};
323 }
324 return std::nullopt;
325}
326
327inline std::optional<Token> parse_var_load(std::string_view input) {
328 if (auto m = ctre::match<R"(^([a-zA-Z_][a-zA-Z0-9_]*)@$)">(input)) {
329 return Token{.type = TokenType::VarLoad,
330 .text = std::string(input),
331 .payload = TokenPayloadVar{
332 .name = std::string(m.template get<1>().to_view())}};
333 }
334 return std::nullopt;
335}
336
337inline std::optional<Token> parse_array_alloc_static(std::string_view input) {
338 if (auto m =
339 ctre::match<R"(^([a-zA-Z_][a-zA-Z0-9_]*)\{\}\^(\d+)$)">(input)) {
340 int static_size = svtoi(m.template get<2>().to_view());
341 return Token{.type = TokenType::ArrayAllocStatic,
342 .text = std::string(input),
343 .payload = TokenPayloadArrayOp{
344 .name = std::string(m.template get<1>().to_view()),
345 .static_size = static_size}};
346 }
347 return std::nullopt;
348}
349
350inline std::optional<Token> parse_array_alloc_dyn(std::string_view input) {
351 if (auto m = ctre::match<R"(^([a-zA-Z_][a-zA-Z0-9_]*)\{\}\^$)">(input)) {
352 return Token{.type = TokenType::ArrayAllocDyn,
353 .text = std::string(input),
354 .payload = TokenPayloadArrayOp{
355 .name = std::string(m.template get<1>().to_view())}};
356 }
357 return std::nullopt;
358}
359
360inline std::optional<Token> parse_array_store(std::string_view input) {
361 if (auto m = ctre::match<R"(^([a-zA-Z_][a-zA-Z0-9_]*)\{\}!$)">(input)) {
362 return Token{.type = TokenType::ArrayStore,
363 .text = std::string(input),
364 .payload = TokenPayloadArrayOp{
365 .name = std::string(m.template get<1>().to_view())}};
366 }
367 return std::nullopt;
368}
369
370inline std::optional<Token> parse_array_load(std::string_view input) {
371 if (auto m = ctre::match<R"(^([a-zA-Z_][a-zA-Z0-9_]*)\{\}@$)">(input)) {
372 return Token{.type = TokenType::ArrayLoad,
373 .text = std::string(input),
374 .payload = TokenPayloadArrayOp{
375 .name = std::string(m.template get<1>().to_view())}};
376 }
377 return std::nullopt;
378}
379
380inline std::optional<Token> parse_clip_rel(std::string_view input) {
381 if (auto m = ctre::match<
382 R"(^(?:src(\d+)|([x-za-w]))\‍[\s*(-?\d+)\s*,\s*(-?\d+)\s*\‍](?::([cm]))?$)">(
383 input)) {
385 if (m.template get<1>()) {
386 data.clip_idx = svtoi(m.template get<1>().to_view());
387 } else if (m.template get<2>()) {
388 data.clip_idx =
389 parse_std_clip_idx(m.template get<2>().to_view()[0]);
390 }
391 data.rel_x = svtoi(m.template get<3>().to_view());
392 data.rel_y = svtoi(m.template get<4>().to_view());
393
394 // NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers)
395 if (m.template get<5>()) {
396 data.has_mode = true;
397 data.use_mirror = (m.template get<5>().to_view() == "m");
398 }
399 // NOLINTEND(cppcoreguidelines-avoid-magic-numbers)
400 return Token{.type = TokenType::ClipRel,
401 .text = std::string(input),
402 .payload = data};
403 }
404 return std::nullopt;
405}
406
407inline std::optional<Token> parse_clip_abs(std::string_view input) {
408 if (auto m = ctre::match<R"(^(?:src(\d+)|([x-za-w]))\‍[\‍](?::([mcb]))?$)">(
409 input)) {
411 if (m.template get<1>()) {
412 data.clip_idx = svtoi(m.template get<1>().to_view());
413 } else if (m.template get<2>()) {
414 data.clip_idx =
415 parse_std_clip_idx(m.template get<2>().to_view()[0]);
416 }
417 if (m.template get<3>()) {
418 char mode_char = m.template get<3>().to_view()[0];
419 if (mode_char == 'm') {
420 data.has_mode = true;
421 data.use_mirror = true;
422 } else if (mode_char == 'c') {
423 data.has_mode = true;
424 data.use_mirror = false;
425 } else if (mode_char == 'b') {
426 data.has_mode = false;
427 }
428 } else {
429 data.has_mode = true;
430 data.use_mirror = false;
431 }
432 return Token{.type = TokenType::ClipAbs,
433 .text = std::string(input),
434 .payload = data};
435 }
436 return std::nullopt;
437}
438
439inline std::optional<Token> parse_clip_cur(std::string_view input) {
440 if (auto m = ctre::match<R"(^(?:src(\d+)|([x-za-w]))$)">(input)) {
442 if (m.template get<1>()) {
443 data.clip_idx = svtoi(m.template get<1>().to_view());
444 } else if (m.template get<2>()) {
445 data.clip_idx =
446 parse_std_clip_idx(m.template get<2>().to_view()[0]);
447 }
448 return Token{.type = TokenType::ClipCur,
449 .text = std::string(input),
450 .payload = data};
451 }
452 return std::nullopt;
453}
454
455inline std::optional<Token> parse_prop_access(std::string_view input) {
456 if (auto m = ctre::match<
457 R"(^(?:src(\d+)|([x-za-w]))\.([a-zA-Z_][a-zA-Z0-9_]*)$)">(input)) {
459 if (m.template get<1>()) {
460 data.clip_idx = svtoi(m.template get<1>().to_view());
461 } else if (m.template get<2>()) {
462 data.clip_idx =
463 parse_std_clip_idx(m.template get<2>().to_view()[0]);
464 }
465 data.prop_name = std::string(m.template get<3>().to_view());
466 return Token{.type = TokenType::PropAccess,
467 .text = std::string(input),
468 .payload = data};
469 }
470 return std::nullopt;
471}
472
473inline std::optional<Token> parse_prop_exists(std::string_view input) {
474 if (auto m = ctre::match<
475 R"(^(?:src(\d+)|([x-za-w]))\.([a-zA-Z_][a-zA-Z0-9_]*)\?$)">(
476 input)) {
478 if (m.template get<1>()) {
479 data.clip_idx = svtoi(m.template get<1>().to_view());
480 } else if (m.template get<2>()) {
481 data.clip_idx =
482 parse_std_clip_idx(m.template get<2>().to_view()[0]);
483 }
484 data.prop_name = std::string(m.template get<3>().to_view());
485 return Token{.type = TokenType::PropExists,
486 .text = std::string(input),
487 .payload = data};
488 }
489 return std::nullopt;
490}
491
492inline std::optional<Token> parse_clip_abs_plane(std::string_view input) {
493 if (auto m =
494 ctre::match<R"(^(?:src(\d+)|([x-za-w]))\^(\d+)\‍[\‍]$)">(input)) {
496 if (m.template get<1>()) {
497 data.clip_idx = svtoi(m.template get<1>().to_view());
498 } else if (m.template get<2>()) {
499 data.clip_idx =
500 parse_std_clip_idx(m.template get<2>().to_view()[0]);
501 }
502 data.plane_idx = svtoi(m.template get<3>().to_view());
503 return Token{.type = TokenType::ClipAbsPlane,
504 .text = std::string(input),
505 .payload = data};
506 }
507 return std::nullopt;
508}
509
510inline std::optional<Token> parse_store_abs_plane(std::string_view input) {
511 if (auto m = ctre::match<R"(^@\‍[\‍]\^(\d+)$)">(input)) {
512 int plane_idx = svtoi(m.template get<1>().to_view());
513 return Token{.type = TokenType::StoreAbsPlane,
514 .text = std::string(input),
515 .payload =
516 TokenPayloadStoreAbsPlane{.plane_idx = plane_idx}};
517 }
518 return std::nullopt;
519}
520
521inline std::optional<Token> parse_prop_store(std::string_view input) {
522 if (auto m = ctre::match<R"(^([a-zA-Z_][a-zA-Z0-9_]*)\$(af|ai|f|i|d)?$)">(
523 input)) {
524 PropWriteType type = PropWriteType::Float; // Default for bare '$'
525 if (m.template get<2>()) {
526 auto suffix = m.template get<2>().to_view();
527 if (suffix == "i") {
528 type = PropWriteType::Int;
529 } else if (suffix == "f") {
531 } else if (suffix == "ai") {
533 } else if (suffix == "af") {
535 } else if (suffix == "d") {
537 }
538 }
539
540 return Token{
541 .type = TokenType::PropStore,
542 .text = std::string(input),
543 .payload = TokenPayloadPropStore{
544 .prop_name = std::string(m.template get<1>().to_view()),
545 .type = type}};
546 }
547 return std::nullopt;
548}
549
550inline std::optional<Token> parse_buffer_access(std::string_view input) {
551 if (auto m = ctre::match<
552 R"(^buf(\d+)(?:(?:(\‍[\‍]))|(?:\‍[\s*(-?\d+)\s*,\s*(-?\d+)\s*\‍]))?(?::([cmb]))?$)">(
553 input)) {
555 data.buffer_idx = svtoi(m.template get<1>().to_view());
556
558
559 if (m.template get<2>()) {
561 } else if (m.template get<3>()) {
563 data.rel_x = svtoi(m.template get<3>().to_view());
564 data.rel_y = svtoi(m.template get<4>().to_view());
565 }
566
567 // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
568 if (m.template get<5>()) {
569 // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
570 char mode_char = m.template get<5>().to_view()[0];
571 if (mode_char == 'm') {
572 data.has_mode = true;
573 data.use_mirror = true;
574 } else if (mode_char == 'c') {
575 data.has_mode = true;
576 data.use_mirror = false;
577 } else if (mode_char == 'b') {
578 data.has_mode = false;
579 }
580 }
581
582 return Token{.type = type, .text = std::string(input), .payload = data};
583 }
584 return std::nullopt;
585}
586
587inline std::optional<Token> parse_number(std::string_view input) {
588 if (auto m = ctre::match<
589 R"(^(?:(0x[0-9a-fA-F]+(?:\.[0-9a-fA-F]+(?:p[+\-]?\d+)?)?)|(0[0-7]+)|([+\-]?\d+(?:\.\d+)?(?:[eE][+\-]?\d+)?))$)">(
590 input)) {
591 double val = NAN;
592 if (m.template get<2>()) { // Octal integer
593 long long llval = 0;
594 auto sv = m.template get<2>().to_view();
595 auto octal_sv = sv.substr(1); // Skip the leading '0'
596 const char* octal_begin = octal_sv.data();
597 const char* octal_end = std::next(
598 octal_begin, static_cast<std::ptrdiff_t>(octal_sv.size()));
599 auto res = std::from_chars(
600 octal_begin, octal_end,
601 llval, // NOLINT(readability-implicit-bool-conversion)
602 8); // NOLINT(cppcoreguidelines-avoid-magic-numbers)
603 if (res.ec != std::errc{} || res.ptr != octal_end) {
604 throw std::runtime_error(
605 std::format("Failed to parse octal number: {}", sv));
606 }
607 val = static_cast<double>(llval);
608 } else { // Hex or decimal float/integer
609 val = locale_independent_stod(std::string(input));
610 }
611 return Token{.type = TokenType::Number,
612 .text = std::string(input),
613 .payload = TokenPayloadNumber{val}};
614 }
615 return std::nullopt;
616}
617
618// Dynamic behavior resolvers for stack operations
619inline TokenBehavior dup_behavior(const Token& t) {
620 const auto& payload = std::get<TokenPayloadStackOp>(t.payload);
621 return {.arity = payload.n + 1, .stack_effect = 1};
622}
623
624inline TokenBehavior drop_behavior(const Token& t) {
625 const auto& payload = std::get<TokenPayloadStackOp>(t.payload);
626 return {.arity = payload.n, .stack_effect = -payload.n};
627}
628
629inline TokenBehavior swap_behavior(const Token& t) {
630 const auto& payload = std::get<TokenPayloadStackOp>(t.payload);
631 return {.arity = payload.n + 1, .stack_effect = 0};
632}
633
634inline TokenBehavior sortn_behavior(const Token& t) {
635 const auto& payload = std::get<TokenPayloadStackOp>(t.payload);
636 return {.arity = payload.n, .stack_effect = 0};
637}
638
639inline TokenBehavior argminn_behavior(const Token& t) {
640 const auto& payload = std::get<TokenPayloadStackOp>(t.payload);
641 return {.arity = payload.n, .stack_effect = 1 - payload.n};
642}
643
644inline TokenBehavior argmaxn_behavior(const Token& t) {
645 const auto& payload = std::get<TokenPayloadStackOp>(t.payload);
646 return {.arity = payload.n, .stack_effect = 1 - payload.n};
647}
648
649inline TokenBehavior argsortn_behavior(const Token& t) {
650 const auto& payload = std::get<TokenPayloadStackOp>(t.payload);
651 return {.arity = payload.n, .stack_effect = 0};
652}
653
654inline TokenBehavior prop_store_behavior(const Token& t) {
655 const auto& payload = std::get<TokenPayloadPropStore>(t.payload);
656 if (payload.type == PropWriteType::Delete) {
657 return {.arity = 0, .stack_effect = 0};
658 }
659 return {.arity = 1, .stack_effect = -1};
660}
661
662// Compile-time token definitions table
663constexpr auto get_token_definitions() {
664 return std::array{
665 make_literal_definition<FixedString{"+"}, TokenType::Add>(
666 BEHAVIOR_BINARY),
667 make_literal_definition<FixedString{"-"}, TokenType::Sub>(
668 BEHAVIOR_BINARY),
669 make_literal_definition<FixedString{"*"}, TokenType::Mul>(
670 BEHAVIOR_BINARY),
671 make_literal_definition<FixedString{"/"}, TokenType::Div>(
672 BEHAVIOR_BINARY),
673 make_literal_definition<FixedString{"%"}, TokenType::Mod>(
674 BEHAVIOR_BINARY),
675 make_literal_definition<FixedString{">"}, TokenType::Gt>(
676 BEHAVIOR_BINARY),
677 make_literal_definition<FixedString{"<"}, TokenType::Lt>(
678 BEHAVIOR_BINARY),
679 make_literal_definition<FixedString{"="}, TokenType::Eq>(
680 BEHAVIOR_BINARY),
681 make_literal_definition<FixedString{"?"}, TokenType::Ternary>(
682 BEHAVIOR_TERNARY),
683 make_literal_definition<FixedString{"X"}, TokenType::ConstantX>(
684 BEHAVIOR_ZERO_PUSH, Availability::Expr | Availability::VkExpr),
685 make_literal_definition<FixedString{"Y"}, TokenType::ConstantY>(
686 BEHAVIOR_ZERO_PUSH, Availability::Expr | Availability::VkExpr),
687 make_literal_definition<FixedString{"N"}, TokenType::ConstantN>(
688 BEHAVIOR_ZERO_PUSH),
689 make_literal_definition<FixedString{">="}, TokenType::Ge>(
690 BEHAVIOR_BINARY),
691 make_literal_definition<FixedString{"<="}, TokenType::Le>(
692 BEHAVIOR_BINARY),
693 make_literal_definition<FixedString{"**"}, TokenType::Pow>(
694 BEHAVIOR_BINARY),
695 make_literal_definition<FixedString{"or"}, TokenType::Or>(
696 BEHAVIOR_BINARY),
697 make_literal_definition<FixedString{"pi"}, TokenType::ConstantPi>(
698 BEHAVIOR_ZERO_PUSH),
699 make_literal_definition<FixedString{"and"}, TokenType::And>(
700 BEHAVIOR_BINARY),
701 make_literal_definition<FixedString{"xor"}, TokenType::Xor>(
702 BEHAVIOR_BINARY),
703 make_literal_definition<FixedString{"not"}, TokenType::Not>(
704 BEHAVIOR_UNARY),
705 make_literal_definition<FixedString{"pow"}, TokenType::Pow>(
706 BEHAVIOR_BINARY),
707 make_literal_definition<FixedString{"min"}, TokenType::Min>(
708 BEHAVIOR_BINARY),
709 make_literal_definition<FixedString{"max"}, TokenType::Max>(
710 BEHAVIOR_BINARY),
711 make_literal_definition<FixedString{"fma"}, TokenType::Fma>(
712 BEHAVIOR_TERNARY),
713 make_literal_definition<FixedString{"exp"}, TokenType::Exp>(
714 BEHAVIOR_UNARY),
715 make_literal_definition<FixedString{"log"}, TokenType::Log>(
716 BEHAVIOR_UNARY),
717 make_literal_definition<FixedString{"abs"}, TokenType::Abs>(
718 BEHAVIOR_UNARY),
719 make_literal_definition<FixedString{"sin"}, TokenType::Sin>(
720 BEHAVIOR_UNARY),
721 make_literal_definition<FixedString{"cos"}, TokenType::Cos>(
722 BEHAVIOR_UNARY),
723 make_literal_definition<FixedString{"tan"}, TokenType::Tan>(
724 BEHAVIOR_UNARY),
725 make_literal_definition<FixedString{"sgn"}, TokenType::Sgn>(
726 BEHAVIOR_UNARY),
727 make_literal_definition<FixedString{"neg"}, TokenType::Neg>(
728 BEHAVIOR_UNARY),
729 make_literal_definition<FixedString{"@[]"}, TokenType::StoreAbs>(
730 TokenBehavior{.arity = 3, .stack_effect = -3},
731 Availability::Expr | Availability::VkExpr),
732 make_literal_definition<FixedString{"clip"}, TokenType::Clip>(
733 TokenBehavior{.arity = 3, .stack_effect = -2}),
734 make_literal_definition<FixedString{"sqrt"}, TokenType::Sqrt>(
735 BEHAVIOR_UNARY),
736 make_literal_definition<FixedString{"ceil"}, TokenType::Ceil>(
737 BEHAVIOR_UNARY),
738 make_literal_definition<FixedString{"asin"}, TokenType::Asin>(
739 BEHAVIOR_UNARY),
740 make_literal_definition<FixedString{"acos"}, TokenType::Acos>(
741 BEHAVIOR_UNARY),
742 make_literal_definition<FixedString{"atan"}, TokenType::Atan>(
743 BEHAVIOR_UNARY),
744 make_literal_definition<FixedString{"exp2"}, TokenType::Exp2>(
745 BEHAVIOR_UNARY),
746 make_literal_definition<FixedString{"log2"}, TokenType::Log2>(
747 BEHAVIOR_UNARY),
748 make_literal_definition<FixedString{"sinh"}, TokenType::Sinh>(
749 BEHAVIOR_UNARY),
750 make_literal_definition<FixedString{"cosh"}, TokenType::Cosh>(
751 BEHAVIOR_UNARY),
752 make_literal_definition<FixedString{"tanh"}, TokenType::Tanh>(
753 BEHAVIOR_UNARY),
754 make_literal_definition<FixedString{"bitor"}, TokenType::Bitor>(
755 BEHAVIOR_BINARY),
756 make_literal_definition<FixedString{"atan2"}, TokenType::Atan2>(
757 BEHAVIOR_BINARY),
758 make_literal_definition<FixedString{"clamp"}, TokenType::Clamp>(
759 BEHAVIOR_TERNARY),
760 make_literal_definition<FixedString{"floor"}, TokenType::Floor>(
761 BEHAVIOR_UNARY),
762 make_literal_definition<FixedString{"trunc"}, TokenType::Trunc>(
763 BEHAVIOR_UNARY),
764 make_literal_definition<FixedString{"round"}, TokenType::Round>(
765 BEHAVIOR_UNARY),
766 make_literal_definition<FixedString{"log10"}, TokenType::Log10>(
767 BEHAVIOR_UNARY),
768 make_literal_definition<FixedString{"width"}, TokenType::ConstantWidth>(
769 BEHAVIOR_ZERO_PUSH),
770 make_literal_definition<FixedString{"bitand"}, TokenType::Bitand>(
771 BEHAVIOR_BINARY),
772 make_literal_definition<FixedString{"bitxor"}, TokenType::Bitxor>(
773 BEHAVIOR_BINARY),
774 make_literal_definition<FixedString{"bitnot"}, TokenType::Bitnot>(
775 BEHAVIOR_UNARY),
776 make_literal_definition<FixedString{"height"},
777 TokenType::ConstantHeight>(BEHAVIOR_ZERO_PUSH),
779 .name = "bufN",
780 .behavior = BEHAVIOR_ZERO_PUSH,
781 .parser = parse_buffer_access,
782 .availability = Availability::VkExpr},
784 .name = "bufN",
785 .behavior = BEHAVIOR_ZERO_PUSH,
786 .parser = parse_buffer_access,
787 .availability = Availability::VkExpr},
789 .name = "bufN",
790 .behavior =
791 TokenBehavior{.arity = 2, .stack_effect = -1},
792 .parser = parse_buffer_access,
793 .availability = Availability::VkExpr},
794 make_literal_definition<FixedString{"^exit^"}, TokenType::ExitNoWrite>(
795 BEHAVIOR_ZERO_PUSH, Availability::Expr | Availability::VkExpr),
796 make_literal_definition<FixedString{"copysign"}, TokenType::Copysign>(
797 BEHAVIOR_BINARY),
799 .name = "plane_width",
800 .behavior = BEHAVIOR_ZERO_PUSH,
801 .parser = parse_plane_width,
802 .availability = Availability::SingleExpr},
804 .name = "plane_height",
805 .behavior = BEHAVIOR_ZERO_PUSH,
806 .parser = parse_plane_height,
807 .availability = Availability::SingleExpr},
809 .name = "clip_width",
810 .behavior = BEHAVIOR_ZERO_PUSH,
811 .parser = parse_clip_width,
812 .availability = Availability::SingleExpr},
814 .name = "clip_height",
815 .behavior = BEHAVIOR_ZERO_PUSH,
816 .parser = parse_clip_height,
817 .availability = Availability::SingleExpr},
819 .name = "clip_plane_width",
820 .behavior = BEHAVIOR_ZERO_PUSH,
821 .parser = parse_clip_plane_width,
822 .availability = Availability::SingleExpr},
824 .name = "clip_plane_height",
825 .behavior = BEHAVIOR_ZERO_PUSH,
826 .parser = parse_clip_plane_height,
827 .availability = Availability::SingleExpr},
829 .name = "dupN",
830 .behavior = DynamicBehaviorFn(dup_behavior),
831 .parser = parse_dup,
832 .availability = AVAILABILITY_ALL},
834 .name = "dropN",
835 .behavior = DynamicBehaviorFn(drop_behavior),
836 .parser = parse_drop,
837 .availability = AVAILABILITY_ALL},
839 .name = "swapN",
840 .behavior = DynamicBehaviorFn(swap_behavior),
841 .parser = parse_swap,
842 .availability = AVAILABILITY_ALL},
844 .type = TokenType::SortN,
845 .name = "sortN",
846 .behavior = DynamicBehaviorFn(sortn_behavior),
847 .parser = parse_stack_n<FixedString{"sort"}, TokenType::SortN>,
848 .availability = AVAILABILITY_ALL},
850 .type = TokenType::ArgminN,
851 .name = "argminN",
852 .behavior = DynamicBehaviorFn(argminn_behavior),
853 .parser = parse_stack_n<FixedString{"argmin"}, TokenType::ArgminN>,
854 .availability = AVAILABILITY_ALL},
856 .type = TokenType::ArgmaxN,
857 .name = "argmaxN",
858 .behavior = DynamicBehaviorFn(argmaxn_behavior),
859 .parser = parse_stack_n<FixedString{"argmax"}, TokenType::ArgmaxN>,
860 .availability = AVAILABILITY_ALL},
862 .type = TokenType::ArgsortN,
863 .name = "argsortN",
864 .behavior = DynamicBehaviorFn(argsortn_behavior),
865 .parser =
866 parse_stack_n<FixedString{"argsort"}, TokenType::ArgsortN>,
867 .availability = AVAILABILITY_ALL},
869 .name = "label_def",
870 .behavior = BEHAVIOR_NO_EFFECT,
871 .parser = parse_label_def,
872 .availability = AVAILABILITY_ALL},
874 .name = "jump",
875 .behavior =
876 TokenBehavior{.arity = 1, .stack_effect = -1},
877 .parser = parse_jump,
878 .availability = AVAILABILITY_ALL},
880 .name = "VarStore",
881 .behavior =
882 TokenBehavior{.arity = 1, .stack_effect = -1},
883 .parser = parse_var_store,
884 .availability = AVAILABILITY_ALL},
886 .name = "var_load",
887 .behavior = BEHAVIOR_ZERO_PUSH,
888 .parser = parse_var_load,
889 .availability = AVAILABILITY_ALL},
891 .name = "array_alloc_static",
892 .behavior =
893 TokenBehavior{.arity = 0, .stack_effect = 0},
894 .parser = parse_array_alloc_static,
895 .availability = AVAILABILITY_ALL},
897 .name = "array_alloc_dyn",
898 .behavior =
899 TokenBehavior{.arity = 1, .stack_effect = -1},
900 .parser = parse_array_alloc_dyn,
901 .availability = Availability::SingleExpr},
903 .name = "array_store",
904 .behavior =
905 TokenBehavior{.arity = 2, .stack_effect = -2},
906 .parser = parse_array_store,
907 .availability = AVAILABILITY_ALL},
909 .name = "array_load",
910 .behavior =
911 TokenBehavior{.arity = 1, .stack_effect = 0},
912 .parser = parse_array_load,
913 .availability = AVAILABILITY_ALL},
915 .name = "clip_rel",
916 .behavior = BEHAVIOR_ZERO_PUSH,
917 .parser = parse_clip_rel,
918 .availability =
919 Availability::Expr | Availability::VkExpr},
921 .name = "clip_abs",
922 .behavior =
923 TokenBehavior{.arity = 2, .stack_effect = -1},
924 .parser = parse_clip_abs,
925 .availability = AVAILABILITY_ALL},
927 .name = "clip_cur",
928 .behavior = BEHAVIOR_ZERO_PUSH,
929 .parser = parse_clip_cur,
930 .availability =
931 Availability::Expr | Availability::VkExpr},
933 .name = "prop_access",
934 .behavior = BEHAVIOR_ZERO_PUSH,
935 .parser = parse_prop_access,
936 .availability = AVAILABILITY_ALL},
938 .name = "prop_exists",
939 .behavior = BEHAVIOR_ZERO_PUSH,
940 .parser = parse_prop_exists,
941 .availability = AVAILABILITY_ALL},
943 .name = "clip_abs_plane",
944 .behavior =
945 TokenBehavior{.arity = 2, .stack_effect = -1},
946 .parser = parse_clip_abs_plane,
947 .availability = Availability::SingleExpr},
949 .name = "store_abs_plane",
950 .behavior =
951 TokenBehavior{.arity = 3, .stack_effect = -3},
952 .parser = parse_store_abs_plane,
953 .availability = Availability::SingleExpr},
955 .name = "prop_store",
956 .behavior = DynamicBehaviorFn(prop_store_behavior),
957 .parser = parse_prop_store,
958 .availability = Availability::SingleExpr},
960 .name = "number",
961 .behavior = BEHAVIOR_ZERO_PUSH,
962 .parser = parse_number,
963 .availability = AVAILABILITY_ALL},
964 };
965}
966
967} // anonymous namespace
968
969std::vector<Token> tokenize(const std::string& expr, int num_inputs,
970 ExprMode mode, int num_intermediate_inputs) {
971 std::vector<Token> tokens;
972 int idx = 0;
973
974 auto is_space = [](char c) { return std::isspace(c); };
975 auto to_string_view = [](auto r) {
976 return std::string_view(r.begin(), r.end());
977 };
978
979 constexpr auto TOKEN_DEFS = get_token_definitions();
980
981 for (const auto str_token_view :
982 expr | std::views::chunk_by([=](char a, char b) {
983 return is_space(a) == is_space(b);
984 }) | std::views::filter([=](auto r) { return !is_space(r.front()); }) |
985 std::views::transform(to_string_view)) {
986 std::optional<Token> parsed_token;
987
988 for (const auto& definition : TOKEN_DEFS) {
989 // Check mode restrictions
990 if (!supports_mode(definition.availability, mode)) {
991 continue;
992 }
993
994 if ((parsed_token = definition.parser(str_token_view))) {
995 break;
996 }
997 }
998
999 if (!parsed_token) {
1000 throw std::runtime_error(std::format("Invalid token: {} (idx {})",
1001 std::string(str_token_view),
1002 idx));
1003 }
1004
1005 // Post-parse validation for clip indices
1006 if (parsed_token->type == TokenType::ClipRel ||
1007 parsed_token->type == TokenType::ClipAbs ||
1008 parsed_token->type == TokenType::ClipCur) {
1009 if (std::get<TokenPayloadClipAccess>(parsed_token->payload)
1010 .clip_idx < 0 ||
1011 std::get<TokenPayloadClipAccess>(parsed_token->payload)
1012 .clip_idx >= num_inputs) {
1013 throw std::runtime_error(
1014 std::format("Invalid clip index in token: {} (idx {})",
1015 std::string(str_token_view), idx));
1016 }
1017 } else if (parsed_token->type == TokenType::PropAccess) {
1018 if (std::get<TokenPayloadPropAccess>(parsed_token->payload)
1019 .clip_idx < 0 ||
1020 std::get<TokenPayloadPropAccess>(parsed_token->payload)
1021 .clip_idx >= num_inputs) {
1022 throw std::runtime_error(
1023 std::format("Invalid clip index in token: {} (idx {})",
1024 std::string(str_token_view), idx));
1025 }
1026 } else if (parsed_token->type == TokenType::ClipAbsPlane) {
1027 if (std::get<TokenPayloadClipAccessPlane>(parsed_token->payload)
1028 .clip_idx < 0 ||
1029 std::get<TokenPayloadClipAccessPlane>(parsed_token->payload)
1030 .clip_idx >= num_inputs) {
1031 throw std::runtime_error(
1032 std::format("Invalid clip index in token: {} (idx {})",
1033 std::string(str_token_view), idx));
1034 }
1035 } else if (parsed_token->type == TokenType::BufferRel ||
1036 parsed_token->type == TokenType::BufferAbs ||
1037 parsed_token->type == TokenType::BufferCur) {
1038 if (std::get<TokenPayloadBufferAccess>(parsed_token->payload)
1039 .buffer_idx < 0 ||
1040 std::get<TokenPayloadBufferAccess>(parsed_token->payload)
1041 .buffer_idx >= num_intermediate_inputs) {
1042 throw std::runtime_error(
1043 std::format("Invalid buffer index in token: {} (idx {})",
1044 std::string(str_token_view), idx));
1045 }
1046 }
1047
1048 tokens.push_back(*parsed_token);
1049 idx++;
1050 }
1051 return tokens;
1052}
1053
1055 constexpr auto TOKEN_DEFS = get_token_definitions();
1056
1057 const auto* it = std::ranges::find_if(
1058 TOKEN_DEFS, [&](const auto& def) { return def.type == token.type; });
1059
1060 return std::visit(
1061 [&token](auto&& arg) -> TokenBehavior {
1062 using T = std::decay_t<decltype(arg)>;
1063 if constexpr (std::is_same_v<T, TokenBehavior>) {
1064 return arg;
1065 } else if constexpr (std::is_same_v<T, DynamicBehaviorFn>) {
1066 return arg(token);
1067 }
1068 },
1069 it->behavior);
1070}
std::vector< Token > tokenize(const std::string &expr, int num_inputs, ExprMode mode, int num_intermediate_inputs)
TokenBehavior get_token_behavior(const Token &token)
TokenType
Definition Tokenizer.hpp:30
@ ConstantClipPlaneHeight
Definition Tokenizer.hpp:42
@ ConstantPlaneWidth
Definition Tokenizer.hpp:37
@ ArrayAllocStatic
Definition Tokenizer.hpp:51
@ ConstantPlaneHeight
Definition Tokenizer.hpp:38
@ ConstantClipHeight
Definition Tokenizer.hpp:40
@ ConstantClipPlaneWidth
Definition Tokenizer.hpp:41
@ ConstantClipWidth
Definition Tokenizer.hpp:39
ExprMode
PropWriteType
constexpr int parse_std_clip_idx(char c)
TokenBehavior(*)(const Token &) DynamicBehaviorFn
TokenType type
PayloadVariant payload