32 const std::vector<Token>& tokens,
int num_inputs,
33 int num_intermediate_inputs, [[maybe_unused]]
int width,
34 [[maybe_unused]]
int height,
bool mirror_boundary,
35 const std::map<std::pair<int, std::string>,
int>& prop_map,
37 : tokens(tokens), num_inputs(num_inputs),
38 num_intermediate_inputs(num_intermediate_inputs),
39 mirror_boundary(mirror_boundary), prop_map(prop_map),
42 const auto& var_result =
analysis.getVariableUsageResult();
43 for (
const auto& var_name : var_result.all_vars) {
44 user_variables.insert(var_name);
48 if (
const char* env = std::getenv(
"LLVMEXPR_GLSL_STRUCTURIZECFG_DEBUG")) {
50 debug_structurize_cfg = (env[0] !=
'\0' && env[0] !=
'0');
55void GLSLGenerator::emit(
const std::string& code) { out << code; }
57void GLSLGenerator::emitLine(
const std::string& code) {
58 for (
int i = 0; i < indent_level; ++i) {
64void GLSLGenerator::emitNewline() { out <<
"\n"; }
66void GLSLGenerator::indent() { ++indent_level; }
68void GLSLGenerator::dedent() {
69 if (indent_level > 0) {
74void GLSLGenerator::debugEmitCfgComment() {
78 if (!debug_structurize_cfg) {
81 const auto& cfg = getCodegenCfgBlocks();
82 const auto& structurize = analysis.getStructurizeCFGResult();
84 emitLine(
"// --- llvmexpr GLSL StructurizeCFG debug ---");
85 emitLine(std::format(
"// blocks = {}, structurize.success = {}", cfg.size(),
86 structurize.success ? 1 : 0));
87 for (
size_t i = 0; i < cfg.size(); ++i) {
88 const auto& b = cfg[i];
90 for (
size_t j = 0; j < b.successors.size(); ++j) {
91 succs += std::format(
"{}{}", b.successors[j],
92 (j + 1 == b.successors.size()) ?
"" :
",");
95 int ip = (i < structurize.ipdom.size()) ? structurize.ipdom[i] : -999;
96 emitLine(std::format(
"// B{}: [{}..{}) succ=[{}] ipdom={}", i,
97 b.start_token_idx, b.end_token_idx, succs, ip));
99 for (
const auto& [hdr, follow] : structurize.loop_follow) {
100 emitLine(std::format(
"// loop header {} follow {}", hdr, follow));
102 emitLine(
"// --- end structurize debug ---");
106const std::vector<analysis::CFGBlock>&
107GLSLGenerator::getCodegenCfgBlocks()
const {
108 const auto& structurize = analysis.getStructurizeCFGResult();
109 if (!structurize.structured_cfg_blocks.empty()) {
110 return structurize.structured_cfg_blocks;
112 return analysis.getCFGBlocks();
115const std::vector<int>& GLSLGenerator::getCodegenStackDepthIn()
const {
116 const auto& structurize = analysis.getStructurizeCFGResult();
117 if (!structurize.structured_stack_depth_in.empty()) {
118 return structurize.structured_stack_depth_in;
120 return analysis.getStackDepthIn();
123int GLSLGenerator::computeBranchJoin(
int t,
int f,
int stop_block)
const {
124 const auto& structurize = analysis.getStructurizeCFGResult();
125 int join = lcaPostdom(t, f, structurize.ipdom);
126 if (join == -1 && stop_block != -1) {
132std::string GLSLGenerator::getLoopBreakFlag(
int header) {
133 auto it = loop_break_flags.find(header);
134 if (it != loop_break_flags.end()) {
137 std::string name = std::format(
"_brk_{}", break_flag_counter++);
138 loop_break_flags.emplace(header, name);
143GLSLGenerator::findEnclosingLoopForFollow(
int target_block,
144 const LoopContext& loop_ctx)
const {
145 const auto& structurize = analysis.getStructurizeCFGResult();
146 for (
int header : loop_ctx.header_stack | std::views::reverse) {
147 auto fit = structurize.loop_follow.find(header);
148 if (fit != structurize.loop_follow.end() &&
149 fit->second == target_block) {
156void GLSLGenerator::emitUnwindBreakIfNeeded(
const LoopContext& loop_ctx) {
158 if (structured_exit_enabled) {
159 cond =
"_llvmexpr_exit";
161 for (
int header : loop_ctx.header_stack) {
162 auto it = loop_break_flags.find(header);
163 if (it == loop_break_flags.end()) {
175 emitLine(std::format(
"if ({}) {{", cond));
182std::string GLSLGenerator::newTemp() {
183 return std::format(
"t_{}", temp_counter++);
186std::string GLSLGenerator::newSlot() {
187 return std::format(
"s_{}", slot_counter++);
190std::string GLSLGenerator::pop() {
192 throw std::runtime_error(
"GLSLGenerator: stack underflow");
194 std::string val = stack.back();
199void GLSLGenerator::push(
const std::string& val) { stack.push_back(val); }
201std::string GLSLGenerator::peek(
int offset)
const {
202 if (
static_cast<size_t>(offset) >= stack.size()) {
203 throw std::runtime_error(
"GLSLGenerator: invalid stack peek");
205 return stack[stack.size() - 1 - offset];
208std::string GLSLGenerator::floatLiteral(
double val) {
209 if (std::isnan(val)) {
210 return "(0.0 / 0.0)";
212 if (std::isinf(val)) {
213 return val > 0 ?
"(1.0 / 0.0)" :
"(-1.0 / 0.0)";
215 std::string s = std::format(
"{:.10g}", val);
216 if (s.find(
'.') == std::string::npos && s.find(
'e') == std::string::npos) {
222std::string GLSLGenerator::binaryOp(
const std::string& op) {
223 std::string b = pop();
224 std::string a = pop();
225 std::string temp = newTemp();
226 emitLine(std::format(
"float {} = {} {} {};", temp, a, op, b));
230std::string GLSLGenerator::binaryCmp(
const std::string& op) {
231 std::string b = pop();
232 std::string a = pop();
233 std::string temp = newTemp();
234 emitLine(std::format(
"float {} = ({} {} {}) ? 1.0 : 0.0;", temp, a, op, b));
238std::string GLSLGenerator::unaryFn(
const std::string& fn) {
239 std::string a = pop();
240 std::string temp = newTemp();
241 emitLine(std::format(
"float {} = {}({});", temp, fn, a));
245std::string GLSLGenerator::binaryFn(
const std::string& fn) {
246 std::string b = pop();
247 std::string a = pop();
248 std::string temp = newTemp();
249 emitLine(std::format(
"float {} = {}({}, {});", temp, fn, a, b));
253std::string GLSLGenerator::emitClampCoord(
const std::string& coord,
254 const std::string& max_dim) {
255 std::string temp = newTemp();
257 std::format(
"int {} = clamp({}, 0, {} - 1);", temp, coord, max_dim));
261std::string GLSLGenerator::emitMirrorCoord(
const std::string& coord,
262 const std::string& max_dim) {
263 std::string temp = newTemp();
264 emitLine(std::format(
"int {};", temp));
267 emitLine(std::format(
"int _period = 2 * ({});", max_dim));
269 std::format(
"int _mod = int(mod(float({}), float(_period)));", coord));
270 emitLine(std::format(
"if (_mod >= ({})) {{ {} = _period - 1 - _mod; }}",
272 emitLine(std::format(
"else {{ {} = _mod; }}", temp));
278std::string GLSLGenerator::emitFinalCoord(
const std::string& coord,
279 const std::string& max_dim,
282 return emitMirrorCoord(coord, max_dim);
284 return emitClampCoord(coord, max_dim);
287std::string GLSLGenerator::emitPixelIndex(
const std::string& x,
288 const std::string& y) {
289 std::string temp = newTemp();
291 std::format(
"uint {} = uint({}) + uint({}) * pc.width;", temp, x, y));
295std::string GLSLGenerator::emitPixelLoad(
int clip_idx,
const std::string& x,
296 const std::string& y,
298 std::string final_x = emitFinalCoord(x,
"int(pc.width)", use_mirror);
299 std::string final_y = emitFinalCoord(y,
"int(pc.height)", use_mirror);
300 std::string idx = emitPixelIndex(final_x, final_y);
301 std::string temp = newTemp();
302 emitLine(std::format(
"float {} = src{}.data[{}];", temp, clip_idx, idx));
312 break_flag_counter = 0;
313 loop_break_flags.clear();
317 emitBufferDeclarations();
318 emitHelperFunctions();
324void GLSLGenerator::emitHeader() {
325 emitLine(
"#version 450");
326 emitLine(
"#extension GL_EXT_scalar_block_layout : enable");
329 "layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;");
331 emitLine(
"layout(push_constant) uniform PushConstants {");
333 emitLine(
"uint width;");
334 emitLine(
"uint height;");
335 emitLine(
"uint numInputs;");
336 emitLine(
"int frameNumber;");
342void GLSLGenerator::emitBufferDeclarations() {
344 for (
int i = 0; i < num_inputs; ++i) {
345 emitLine(std::format(
"layout(std430, set = 0, binding = {}) readonly "
346 "buffer InputBuffer{} {{",
349 emitLine(
"float data[];");
351 emitLine(std::format(
"}} src{};", i));
356 for (
int i = 0; i < num_intermediate_inputs; ++i) {
357 emitLine(std::format(
"layout(std430, set = 0, binding = {}) readonly "
358 "buffer IntermediateBuffer{} {{",
361 emitLine(
"float data[];");
363 emitLine(std::format(
"}} buf{};", i));
368 emitLine(std::format(
"layout(std430, set = 0, binding = {}) writeonly "
369 "buffer OutputBuffer {{",
370 num_inputs + num_intermediate_inputs));
372 emitLine(
"float data[];");
378 emitLine(std::format(
379 "layout(std430, set = 0, binding = {}) readonly buffer PropsBuffer {{",
380 num_inputs + num_intermediate_inputs + 1));
382 emitLine(
"float props[];");
384 emitLine(
"} propsData;");
388void GLSLGenerator::emitHelperFunctions() {
399 emitLine(
"float llvmexpr_round(float x) {");
401 emitLine(
"return (x >= 0.0) ? floor(x + 0.5) : ceil(x - 0.5);");
407void GLSLGenerator::emitVariableDeclarations() {
409 for (
const auto& var_name : user_variables) {
410 emitLine(std::format(
"float u_{};", var_name));
414 for (
const auto& token : tokens) {
416 const auto& payload = std::get<TokenPayloadArrayOp>(token.payload);
417 if (!arrays.contains(payload.name)) {
418 arrays[payload.name] = payload.static_size;
419 emitLine(std::format(
"float a_{}[{}];", payload.name,
420 payload.static_size));
426 const auto& cfg_blocks = getCodegenCfgBlocks();
427 const auto& stack_depth_in = getCodegenStackDepthIn();
428 const auto& structurize = analysis.getStructurizeCFGResult();
430 std::set<int> force_slots;
431 for (
const auto& [header, follow] : structurize.loop_follow) {
433 force_slots.insert(follow);
437 for (
size_t i = 0; i < cfg_blocks.size(); ++i) {
438 if (cfg_blocks[i].predecessors.size() > 1 ||
439 force_slots.contains((
int)i)) {
440 int depth = stack_depth_in[i];
441 std::vector<std::string> slots;
442 for (
int j = 0; j < depth; ++j) {
443 std::string slot = newSlot();
444 emitLine(std::format(
"float {};", slot));
445 slots.push_back(slot);
447 block_entry_stack[
static_cast<int>(i)] = slots;
452void GLSLGenerator::emitMainFunction() {
453 emitLine(
"void main() {");
456 emitLine(
"uint gid = gl_GlobalInvocationID.x;");
457 emitLine(
"uint totalPixels = pc.width * pc.height;");
459 emitLine(
"if (gid >= totalPixels) {");
466 emitLine(
"int X = int(gid % pc.width);");
467 emitLine(
"int Y = int(gid / pc.width);");
470 emitVariableDeclarations();
473 debugEmitCfgComment();
474 emitMainFunctionStructured();
480void GLSLGenerator::emitMainFunctionStateMachine() {
481 const auto& cfg_blocks = getCodegenCfgBlocks();
483 emitLine(
"int _state = 0;");
484 emitLine(
"float _result = 0.0;");
486 emitLine(
"while (_state != -1) {");
488 emitLine(
"switch (_state) {");
490 for (
size_t i = 0; i < cfg_blocks.size(); ++i) {
491 emitLine(std::format(
"case {}:", i));
494 if (block_entry_stack.contains(
static_cast<int>(i))) {
495 stack = block_entry_stack[
static_cast<int>(i)];
498 emitBlockCode(
static_cast<int>(i));
500 const auto& block = cfg_blocks[i];
501 if (block.successors.empty()) {
502 if (!stack.empty()) {
503 std::string result = pop();
504 emitLine(std::format(
"_result = {};", result));
506 emitLine(
"_state = -1;");
507 }
else if (block.successors.size() == 1) {
508 int next = block.successors[0];
509 emitStackToEntrySlots(next);
510 emitLine(std::format(
"_state = {};", next));
512 std::string cond = pop();
513 int true_target = block.successors[0];
514 int false_target = block.successors[1];
515 emitStackToEntrySlots(true_target);
516 emitStackToEntrySlots(false_target);
517 emitLine(std::format(
"_state = ({} > 0.0) ? {} : {};", cond,
518 true_target, false_target));
530 emitLine(
"if (floatBitsToUint(_result) != 0x7FC0E71Fu) {");
532 emitLine(
"dst.data[gid] = _result;");
537void GLSLGenerator::emitSetResultAndExit(
const std::string& result_expr) {
538 emitLine(std::format(
"_result = {};", result_expr));
539 emitLine(
"_llvmexpr_exit = true;");
543void GLSLGenerator::emitResultEpilogueStore() {
544 emitLine(
"if (floatBitsToUint(_result) != 0x7FC0E71Fu) {");
546 emitLine(
"dst.data[gid] = _result;");
552bool GLSLGenerator::isLoopHeaderActive(
int header,
553 const LoopContext& loop_ctx)
const {
554 return std::ranges::find(loop_ctx.header_stack, header) !=
555 loop_ctx.header_stack.end();
558bool GLSLGenerator::canEdgeToBlock(
int target_block,
int stop_block,
559 LoopContext& loop_ctx)
const {
560 CanKey key{.start_block = target_block,
561 .stop_block = stop_block,
562 .header_stack = loop_ctx.header_stack};
563 if (
auto it = can_edge_cache.find(key); it != can_edge_cache.end()) {
566 if (can_edge_in_progress.contains(key)) {
569 can_edge_in_progress.insert(key);
571 auto finish = [&](
bool ok) ->
bool {
572 can_edge_in_progress.erase(key);
573 can_edge_cache.emplace(std::move(key), ok);
578 const auto& structurize = analysis.getStructurizeCFGResult();
580 loop_ctx.header_stack.empty() ? -1 : loop_ctx.header_stack.back();
582 if (target_block == stop_block) {
583 if (current_header == -1) {
587 auto it = structurize.loop_follow.find(current_header);
588 if (it != structurize.loop_follow.end()) {
591 if (stop_block == follow) {
594 if (structurize.inLoop(current_header, stop_block)) {
599 findEnclosingLoopForFollow(stop_block, loop_ctx).has_value());
602 if (current_header != -1) {
604 auto it = structurize.loop_follow.find(current_header);
605 if (it != structurize.loop_follow.end()) {
608 if (target_block == current_header) {
611 if (target_block == follow) {
614 if (!structurize.inLoop(current_header, target_block)) {
616 return finish(findEnclosingLoopForFollow(target_block, loop_ctx)
620 return finish(canStructureFrom(target_block, stop_block, loop_ctx));
622 can_edge_in_progress.erase(key);
627bool GLSLGenerator::canStructureFrom(
int start_block,
int stop_block,
628 LoopContext& loop_ctx)
const {
629 CanKey key{.start_block = start_block,
630 .stop_block = stop_block,
631 .header_stack = loop_ctx.header_stack};
632 if (
auto it = can_structure_cache.find(key);
633 it != can_structure_cache.end()) {
636 if (can_structure_in_progress.contains(key)) {
639 can_structure_in_progress.insert(key);
642 const GLSLGenerator* gen;
644 bool handleLoop(
int block,
int follow, LoopContext& ctx)
const {
645 ctx.header_stack.push_back(block);
646 bool ok = gen->canStructureFrom(block, follow, ctx);
647 ctx.header_stack.pop_back();
650 [[nodiscard]]
bool visitBlock(
int )
const {
return true; }
651 [[nodiscard]]
bool handleNoSuccessors(
int )
const {
654 [[nodiscard]]
bool handleLoopExitOrContinue(
int ,
int ,
659 [[nodiscard]]
bool handleSimpleEdge(
int ,
int )
const {
662 [[nodiscard]]
bool handleNonlocalEdge(
int ,
int next,
664 const LoopContext& ctx)
const {
666 return gen->canEdgeToBlock(next, stop_block, saved);
668 [[nodiscard]]
bool handleBranch(
int ,
int t,
int f,
int join,
670 const LoopContext& ctx)
const {
673 return gen->canEdgeToBlock(t, join, saved_t) &&
674 gen->canEdgeToBlock(f, join, saved_f);
676 [[nodiscard]]
bool handleLoopBreak(
int )
const {
return true; }
680 bool ok = traverseStructure(start_block, stop_block, loop_ctx, visitor);
681 can_structure_in_progress.erase(key);
682 can_structure_cache.emplace(std::move(key), ok);
685 can_structure_in_progress.erase(key);
690int GLSLGenerator::lcaPostdom(
int a,
int b,
691 const std::vector<int>& ipdom)
const {
692 if (a == -1 || b == -1) {
695 std::set<int> ancestors;
699 if (x < 0 ||
static_cast<size_t>(x) >= ipdom.size()) {
706 if (ancestors.contains(x)) {
709 if (x < 0 ||
static_cast<size_t>(x) >= ipdom.size()) {
717void GLSLGenerator::emitStackToEntrySlots(
int target_block) {
718 if (!block_entry_stack.contains(target_block)) {
721 const auto& slots = block_entry_stack[target_block];
722 for (
size_t j = 0; j < slots.size() && j < stack.size(); ++j) {
723 emitLine(std::format(
"{} = {};", slots[j], stack[j]));
727void GLSLGenerator::emitEdgeToBlock(
int target_block,
int stop_block,
728 LoopContext& loop_ctx,
bool& ok) {
733 const auto& structurize = analysis.getStructurizeCFGResult();
735 loop_ctx.header_stack.empty() ? -1 : loop_ctx.header_stack.back();
737 if (current_header != -1) {
739 auto it = structurize.loop_follow.find(current_header);
740 if (it != structurize.loop_follow.end()) {
745 if (target_block == stop_block && stop_block == follow) {
746 emitStackToEntrySlots(stop_block);
751 if (target_block == current_header) {
752 emitStackToEntrySlots(target_block);
753 emitLine(
"continue;");
756 if (target_block == follow) {
757 emitStackToEntrySlots(follow);
761 if (!structurize.inLoop(current_header, target_block)) {
763 auto outer = findEnclosingLoopForFollow(target_block, loop_ctx);
764 if (!outer.has_value()) {
768 emitStackToEntrySlots(target_block);
769 emitLine(std::format(
"{} = true;", getLoopBreakFlag(*outer)));
775 if (target_block == stop_block) {
776 emitStackToEntrySlots(target_block);
780 emitStackToEntrySlots(target_block);
781 emitStructuredFrom(target_block, stop_block, loop_ctx, ok);
784void GLSLGenerator::emitStructuredFrom(
int start_block,
int stop_block,
785 LoopContext& loop_ctx,
bool& ok) {
794 bool handleLoop(
int block,
int follow, LoopContext& ctx)
const {
795 std::string flag = gen->getLoopBreakFlag(block);
796 gen->emitLine(std::format(
"bool {} = false;", flag));
797 gen->emitLine(
"while (true) {");
799 ctx.header_stack.push_back(block);
800 gen->emitStructuredFrom(block, follow, ctx, ok);
801 ctx.header_stack.pop_back();
804 gen->emitUnwindBreakIfNeeded(ctx);
807 [[nodiscard]]
bool visitBlock(
int block)
const {
808 if (gen->block_entry_stack.contains(block)) {
809 gen->stack = gen->block_entry_stack[block];
811 gen->emitBlockCode(block);
814 [[nodiscard]]
bool handleNoSuccessors(
int )
const {
815 std::string result_expr = gen->stack.empty() ?
"0.0" : gen->pop();
816 gen->emitSetResultAndExit(result_expr);
819 [[nodiscard]]
bool handleLoopExitOrContinue(
int ,
int next,
822 if (next == current_header) {
823 gen->emitStackToEntrySlots(next);
824 gen->emitLine(
"continue;");
827 if (next == follow) {
828 gen->emitStackToEntrySlots(next);
829 gen->emitLine(
"break;");
835 [[nodiscard]]
bool handleSimpleEdge(
int ,
int next)
const {
836 gen->emitStackToEntrySlots(next);
839 [[nodiscard]]
bool handleNonlocalEdge(
int ,
int next,
841 LoopContext& ctx)
const {
842 gen->emitEdgeToBlock(next, stop_block, ctx, ok);
845 bool handleBranch(
int ,
int t,
int f,
int join,
846 int , LoopContext& ctx)
const {
847 std::string cond = gen->pop();
848 auto base_stack = gen->stack;
851 gen->emitLine(std::format(
"if ({} > 0.0) {{", cond));
853 gen->emitEdgeToBlock(t, join, ctx, ok);
856 gen->stack = base_stack;
857 gen->emitUnwindBreakIfNeeded(ctx);
862 gen->emitLine(std::format(
"if (!({} > 0.0)) {{", cond));
864 gen->emitEdgeToBlock(f, join, ctx, ok);
867 gen->stack = base_stack;
868 gen->emitUnwindBreakIfNeeded(ctx);
872 gen->emitLine(std::format(
"if ({} > 0.0) {{", cond));
874 gen->emitEdgeToBlock(t, join, ctx, ok);
876 gen->emitLine(
"} else {");
878 gen->stack = base_stack;
879 gen->emitEdgeToBlock(f, join, ctx, ok);
882 gen->stack = base_stack;
883 gen->emitUnwindBreakIfNeeded(ctx);
886 [[nodiscard]]
bool handleLoopBreak(
int join)
const {
887 gen->emitStackToEntrySlots(join);
888 gen->emitLine(
"break;");
891 } visitor{.gen =
this, .ok = ok};
893 if (!traverseStructure(start_block, stop_block, loop_ctx, visitor)) {
898void GLSLGenerator::emitMainFunctionStructured() {
899 LoopContext loop_ctx;
900 bool ok = analysis.getStructurizeCFGResult().success &&
901 canStructureFrom(0, -1, loop_ctx);
905 if (debug_structurize_cfg) {
906 emitLine(
"// structurize: preflight can_structure_from() failed. "
907 "falling back to state machine");
910 emitMainFunctionStateMachine();
919 emitLine(
"float _result = 0.0;");
920 emitLine(
"bool _llvmexpr_exit = false;");
924 structured_exit_enabled =
true;
925 emitStructuredFrom(0, -1, loop_ctx, emit_ok);
926 structured_exit_enabled =
false;
929 emitLine(
"} while (false);");
931 emitResultEpilogueStore();
942 if (debug_structurize_cfg) {
943 emitLine(
"// structurize: unexpected emit-time failure. falling "
948 emitMainFunctionStateMachine();
952void GLSLGenerator::emitBlockCode(
int block_idx) {
953 const auto& cfg_blocks = getCodegenCfgBlocks();
954 const auto& block = cfg_blocks[block_idx];
956 for (
int i = block.start_token_idx; i < block.end_token_idx; ++i) {
957 processToken(tokens[i]);
961void GLSLGenerator::processToken(
const Token& token) {
962 switch (token.
type) {
966 const auto& payload = std::get<TokenPayloadNumber>(token.
payload);
967 push(floatLiteral(payload.value));
977 push(
"float(pc.width)");
980 push(
"float(pc.height)");
983 push(
"float(pc.frameNumber)");
986 push(floatLiteral(std::numbers::pi));
1000 push(binaryOp(
"/"));
1003 std::string b = pop();
1004 std::string a = pop();
1005 std::string temp = newTemp();
1006 emitLine(std::format(
"float {} = mod({}, {});", temp, a, b));
1011 push(binaryFn(
"pow"));
1014 push(binaryFn(
"min"));
1017 push(binaryFn(
"max"));
1020 std::string x_val = pop();
1021 std::string y_val = pop();
1022 std::string temp = newTemp();
1023 emitLine(std::format(
"float {} = atan({}, {});", temp, y_val, x_val));
1028 std::string sign_val = pop();
1029 std::string mag_val = pop();
1030 std::string temp = newTemp();
1031 emitLine(std::format(
"float {} = sign({}) * abs({});", temp, sign_val,
1039 push(binaryCmp(
">"));
1042 push(binaryCmp(
"<"));
1045 push(binaryCmp(
">="));
1048 push(binaryCmp(
"<="));
1051 push(binaryCmp(
"=="));
1056 std::string b = pop();
1057 std::string a = pop();
1058 std::string temp = newTemp();
1059 emitLine(std::format(
1060 "float {} = (({} > 0.0) && ({} > 0.0)) ? 1.0 : 0.0;", temp, a, b));
1065 std::string b = pop();
1066 std::string a = pop();
1067 std::string temp = newTemp();
1068 emitLine(std::format(
1069 "float {} = (({} > 0.0) || ({} > 0.0)) ? 1.0 : 0.0;", temp, a, b));
1074 std::string b = pop();
1075 std::string a = pop();
1076 std::string temp = newTemp();
1077 emitLine(std::format(
1078 "float {} = (({} > 0.0) != ({} > 0.0)) ? 1.0 : 0.0;", temp, a, b));
1083 std::string a = pop();
1084 std::string temp = newTemp();
1085 emitLine(std::format(
"float {} = ({} <= 0.0) ? 1.0 : 0.0;", temp, a));
1092 std::string b = pop();
1093 std::string a = pop();
1094 std::string temp = newTemp();
1095 emitLine(std::format(
"float {} = float(int(llvmexpr_round({})) & "
1096 "int(llvmexpr_round({})));",
1102 std::string b = pop();
1103 std::string a = pop();
1104 std::string temp = newTemp();
1105 emitLine(std::format(
"float {} = float(int(llvmexpr_round({})) | "
1106 "int(llvmexpr_round({})));",
1112 std::string b = pop();
1113 std::string a = pop();
1114 std::string temp = newTemp();
1115 emitLine(std::format(
"float {} = float(int(llvmexpr_round({})) ^ "
1116 "int(llvmexpr_round({})));",
1122 std::string a = pop();
1123 std::string temp = newTemp();
1124 emitLine(std::format(
"float {} = float(~int(llvmexpr_round({})));",
1132 std::string a = pop();
1133 std::string temp = newTemp();
1134 emitLine(std::format(
"float {} = sqrt(max({}, 0.0));", temp, a));
1139 push(unaryFn(
"exp"));
1142 push(unaryFn(
"log"));
1145 push(unaryFn(
"abs"));
1148 push(unaryFn(
"floor"));
1151 push(unaryFn(
"ceil"));
1154 push(unaryFn(
"trunc"));
1157 push(unaryFn(
"llvmexpr_round"));
1160 push(unaryFn(
"sin"));
1163 push(unaryFn(
"cos"));
1166 push(unaryFn(
"tan"));
1169 push(unaryFn(
"asin"));
1172 push(unaryFn(
"acos"));
1175 push(unaryFn(
"atan"));
1178 push(unaryFn(
"exp2"));
1182 std::string a = pop();
1183 std::string temp = newTemp();
1184 emitLine(std::format(
"float {} = log({}) / log(10.0);", temp, a));
1189 push(unaryFn(
"log2"));
1192 push(unaryFn(
"sinh"));
1195 push(unaryFn(
"cosh"));
1198 push(unaryFn(
"tanh"));
1201 std::string a = pop();
1202 std::string temp = newTemp();
1203 emitLine(std::format(
"float {} = ({} != 0.0) ? sign({}) : 0.0;", temp,
1209 std::string a = pop();
1210 std::string temp = newTemp();
1211 emitLine(std::format(
"float {} = -{};", temp, a));
1218 std::string c = pop();
1219 std::string b = pop();
1220 std::string a = pop();
1221 std::string temp = newTemp();
1223 std::format(
"float {} = ({} > 0.0) ? {} : {};", temp, a, b, c));
1229 std::string max_val = pop();
1230 std::string min_val = pop();
1231 std::string val = pop();
1232 std::string temp = newTemp();
1233 emitLine(std::format(
"float {} = clamp({}, {}, {});", temp, val,
1239 std::string c = pop();
1240 std::string b = pop();
1241 std::string a = pop();
1242 std::string temp = newTemp();
1243 emitLine(std::format(
"float {} = fma({}, {}, {});", temp, a, b, c));
1250 const auto& payload = std::get<TokenPayloadStackOp>(token.
payload);
1251 push(peek(payload.n));
1255 const auto& payload = std::get<TokenPayloadStackOp>(token.
payload);
1256 for (
int i = 0; i < payload.n; ++i) {
1262 const auto& payload = std::get<TokenPayloadStackOp>(token.
payload);
1263 size_t top_idx = stack.size() - 1;
1264 size_t other_idx = stack.size() - 1 - payload.n;
1265 std::swap(stack[top_idx], stack[other_idx]);
1269 const auto& payload = std::get<TokenPayloadStackOp>(token.
payload);
1276 std::vector<std::string> values(n);
1277 for (
int i = 0; i < n; ++i) {
1282 for (
const auto& pair : network) {
1283 std::string temp_min = newTemp();
1284 std::string temp_max = newTemp();
1285 int idx1 = pair.first;
1286 int idx2 = pair.second;
1287 emitLine(std::format(
"float {} = min({}, {});", temp_min,
1288 values[idx1], values[idx2]));
1289 emitLine(std::format(
"float {} = max({}, {});", temp_max,
1290 values[idx1], values[idx2]));
1291 values[idx1] = temp_min;
1292 values[idx2] = temp_max;
1296 for (
int i = n - 1; i >= 0; --i) {
1303 const auto& payload = std::get<TokenPayloadStackOp>(token.
payload);
1306 push(floatLiteral(0.0));
1310 std::vector<std::string> values(n);
1311 for (
int i = 0; i < n; ++i) {
1319 std::vector<Node> current_level;
1320 current_level.reserve(n);
1321 for (
int i = 0; i < n; ++i) {
1322 current_level.push_back(
1323 {values[i], floatLiteral(
static_cast<double>(n - 1 - i))});
1327 std::string cmp = is_max ?
">" :
"<";
1329 while (current_level.size() > 1) {
1330 std::vector<Node> next_level;
1331 for (
size_t i = 0; i < current_level.size(); i += 2) {
1332 if (i + 1 < current_level.size()) {
1333 std::string winner_val = newTemp();
1334 std::string winner_idx = newTemp();
1335 const auto& left = current_level[i];
1336 const auto& right = current_level[i + 1];
1339 std::string cond = std::format(
1340 "({} {} {}) || ({} == {} && {} < {})", left.val, cmp,
1341 right.val, left.val, right.val, left.idx, right.idx);
1343 emitLine(std::format(
"float {} = ({}) ? {} : {};",
1344 winner_val, cond, left.val,
1346 emitLine(std::format(
"float {} = ({}) ? {} : {};",
1347 winner_idx, cond, left.idx,
1349 next_level.push_back({winner_val, winner_idx});
1351 next_level.push_back(current_level[i]);
1354 current_level = std::move(next_level);
1356 push(current_level[0].idx);
1360 const auto& payload = std::get<TokenPayloadStackOp>(token.
payload);
1367 push(floatLiteral(0.0));
1371 std::vector<std::string> values(n);
1372 std::vector<std::string> indices(n);
1373 for (
int i = 0; i < n; ++i) {
1375 indices[i] = floatLiteral(
static_cast<double>(n - 1 - i));
1379 for (
const auto& pair : network) {
1380 int i1 = pair.first;
1381 int i2 = pair.second;
1383 std::string cond = std::format(
1384 "({} > {}) || ({} == {} && {} > {})", values[i1], values[i2],
1385 values[i1], values[i2], indices[i1], indices[i2]);
1387 std::string next_v1 = newTemp();
1388 std::string next_v2 = newTemp();
1389 std::string next_i1 = newTemp();
1390 std::string next_i2 = newTemp();
1392 emitLine(std::format(
"float {} = ({}) ? {} : {};", next_v1, cond,
1393 values[i2], values[i1]));
1394 emitLine(std::format(
"float {} = ({}) ? {} : {};", next_v2, cond,
1395 values[i1], values[i2]));
1396 emitLine(std::format(
"float {} = ({}) ? {} : {};", next_i1, cond,
1397 indices[i2], indices[i1]));
1398 emitLine(std::format(
"float {} = ({}) ? {} : {};", next_i2, cond,
1399 indices[i1], indices[i2]));
1401 values[i1] = next_v1;
1402 values[i2] = next_v2;
1403 indices[i1] = next_i1;
1404 indices[i2] = next_i2;
1407 for (
int i = n - 1; i >= 0; --i) {
1421 const auto& payload = std::get<TokenPayloadVar>(token.
payload);
1422 std::string val = pop();
1423 emitLine(std::format(
"u_{} = {};", payload.name, val));
1427 const auto& payload = std::get<TokenPayloadVar>(token.
payload);
1428 push(std::format(
"u_{}", payload.name));
1434 const auto& payload = std::get<TokenPayloadClipAccess>(token.
payload);
1435 std::string idx_temp = newTemp();
1436 emitLine(std::format(
"uint {} = gid;", idx_temp));
1437 std::string val_temp = newTemp();
1438 emitLine(std::format(
"float {} = src{}.data[{}];", val_temp,
1439 payload.clip_idx, idx_temp));
1444 const auto& payload = std::get<TokenPayloadClipAccess>(token.
payload);
1446 payload.has_mode ? payload.use_mirror : mirror_boundary;
1448 std::string x_expr = std::format(
"X + {}", payload.rel_x);
1449 std::string y_expr = std::format(
"Y + {}", payload.rel_y);
1451 std::string final_x =
1452 emitFinalCoord(x_expr,
"int(pc.width)", use_mirror);
1453 std::string final_y =
1454 emitFinalCoord(y_expr,
"int(pc.height)", use_mirror);
1455 std::string idx = emitPixelIndex(final_x, final_y);
1457 std::string val_temp = newTemp();
1458 emitLine(std::format(
"float {} = src{}.data[{}];", val_temp,
1459 payload.clip_idx, idx));
1464 const auto& payload = std::get<TokenPayloadClipAccess>(token.
payload);
1465 std::string coord_y = pop();
1466 std::string coord_x = pop();
1468 payload.has_mode ? payload.use_mirror : mirror_boundary;
1470 std::string x_int = newTemp();
1471 std::string y_int = newTemp();
1472 emitLine(std::format(
"int {} = int(roundEven({}));", x_int, coord_x));
1473 emitLine(std::format(
"int {} = int(roundEven({}));", y_int, coord_y));
1475 push(emitPixelLoad(payload.clip_idx, x_int, y_int, use_mirror));
1480 const auto& payload = std::get<TokenPayloadBufferAccess>(token.
payload);
1481 std::string temp = newTemp();
1482 emitLine(std::format(
"float {} = buf{}.data[gid];", temp,
1483 payload.buffer_idx));
1488 const auto& payload = std::get<TokenPayloadBufferAccess>(token.
payload);
1490 payload.has_mode ? payload.use_mirror : mirror_boundary;
1492 std::string x_expr = std::format(
"X + {}", payload.rel_x);
1493 std::string y_expr = std::format(
"Y + {}", payload.rel_y);
1495 std::string final_x =
1496 emitFinalCoord(x_expr,
"int(pc.width)", use_mirror);
1497 std::string final_y =
1498 emitFinalCoord(y_expr,
"int(pc.height)", use_mirror);
1499 std::string idx = emitPixelIndex(final_x, final_y);
1501 std::string temp = newTemp();
1502 emitLine(std::format(
"float {} = buf{}.data[{}];", temp,
1503 payload.buffer_idx, idx));
1508 const auto& payload = std::get<TokenPayloadBufferAccess>(token.
payload);
1509 std::string coord_y = pop();
1510 std::string coord_x = pop();
1512 payload.has_mode ? payload.use_mirror : mirror_boundary;
1514 std::string x_int = newTemp();
1515 std::string y_int = newTemp();
1517 emitLine(std::format(
"int {} = int(roundEven({}));", x_int, coord_x));
1518 emitLine(std::format(
"int {} = int(roundEven({}));", y_int, coord_y));
1520 std::string final_x =
1521 emitFinalCoord(x_int,
"int(pc.width)", use_mirror);
1522 std::string final_y =
1523 emitFinalCoord(y_int,
"int(pc.height)", use_mirror);
1524 std::string idx = emitPixelIndex(final_x, final_y);
1526 std::string temp = newTemp();
1527 emitLine(std::format(
"float {} = buf{}.data[{}];", temp,
1528 payload.buffer_idx, idx));
1535 const auto& payload = std::get<TokenPayloadPropAccess>(token.
payload);
1536 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
1537 int prop_idx = prop_map.at(key);
1538 std::string temp = newTemp();
1540 std::format(
"float {} = propsData.props[{}];", temp, prop_idx));
1545 const auto& payload = std::get<TokenPayloadPropAccess>(token.
payload);
1546 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
1547 if (prop_map.contains(key)) {
1548 int prop_idx = prop_map.at(key);
1549 std::string temp = newTemp();
1550 emitLine(std::format(
1551 "float {} = (floatBitsToUint(propsData.props[{}]) == "
1552 "0x7FC0BEEFu) ? 0.0 : 1.0;",
1566 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
1567 std::string idx = pop();
1568 std::string temp = newTemp();
1570 std::format(
"float {} = a_{}[int({})];", temp, payload.name, idx));
1575 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
1576 std::string idx = pop();
1577 std::string val = pop();
1578 emitLine(std::format(
"a_{}[int({})] = {};", payload.name, idx, val));
1583 std::string coord_y = pop();
1584 std::string coord_x = pop();
1585 std::string val = pop();
1586 std::string x_int = newTemp();
1587 std::string y_int = newTemp();
1588 emitLine(std::format(
"int {} = int({});", x_int, coord_x));
1589 emitLine(std::format(
"int {} = int({});", y_int, coord_y));
1590 std::string idx = emitPixelIndex(x_int, y_int);
1591 emitLine(std::format(
"dst.data[{}] = {};", idx, val));
1596 push(
"uintBitsToFloat(0x7FC0E71Fu)");
1601 throw std::runtime_error(
1602 std::format(
"GLSLGenerator: unhandled token type {}",
1603 static_cast<int>(token.
type)));
constexpr std::vector< std::pair< int, int > > get_sorting_network(int n)
GLSLGenerator(const std::vector< Token > &tokens, int num_inputs, int num_intermediate_inputs, int width, int height, bool mirror_boundary, const std::map< std::pair< int, std::string >, int > &prop_map, const analysis::ExpressionAnalysisResults &analysis_results)