77 llvm::BasicBlock* entry_bb =
79 builder.SetInsertPoint(entry_bb);
81 llvm::Function* parent_func =
builder.GetInsertBlock()->getParent();
87 llvm::Value* y_tile_var =
89 llvm::Value* x_tile_var =
95 llvm::Value* x_fp_var =
nullptr;
96 if (coord_usage.uses_x) {
99 llvm::Value* y_fp_var =
nullptr;
100 if (coord_usage.uses_y) {
102 builder.CreateStore(llvm::ConstantFP::get(
builder.getFloatTy(), 0.0),
110 llvm::Value* base_ptr_i =
builder.CreateLoad(
111 llvm::PointerType::get(
context, 0),
114 llvm::Value* stride_i =
builder.CreateLoad(
128 llvm::SmallVector<llvm::Metadata*, 2> elems;
129 elems.push_back(
nullptr);
130 llvm::Metadata* name_node = llvm::MDNode::get(
132 context, std::format(
"rwptrs_{}", i).c_str())});
133 elems.push_back(name_node);
140 std::vector<llvm::Metadata*> self_list = {
alias_scopes[i]};
142 std::vector<llvm::Metadata*> others;
152 const auto& clip_access_result =
157 llvm::Value* start_main_x =
builder.getInt32(-clip_access_result.min_rel_x);
158 llvm::Value* end_main_x =
162 clip_access_result.min_rel_x < 0;
163 bool has_right_peel =
164 clip_access_result.max_rel_x > 0;
166 const int effective_tile_x = (tile_x <= 0) ?
width : tile_x;
167 const int effective_tile_y = (tile_y <= 0) ?
height : tile_y;
169 auto min_i32 = [&](llvm::Value* lhs, llvm::Value* rhs,
170 const char* name) -> llvm::Value* {
171 llvm::Value* cond =
builder.CreateICmpSLT(lhs, rhs);
172 return builder.CreateSelect(cond, lhs, rhs, name);
175 auto emit_x_range_loop = [&](llvm::Value* end_x,
bool no_x_bounds_check,
176 const char* block_name_prefix) {
177 llvm::BasicBlock* header_bb = llvm::BasicBlock::Create(
178 context, std::format(
"{}_header", block_name_prefix), parent_func);
179 llvm::BasicBlock* body_bb = llvm::BasicBlock::Create(
180 context, std::format(
"{}_body", block_name_prefix), parent_func);
181 llvm::BasicBlock* exit_bb = llvm::BasicBlock::Create(
182 context, std::format(
"{}_exit", block_name_prefix), parent_func);
186 builder.SetInsertPoint(header_bb);
189 llvm::Value* cond =
builder.CreateICmpSLT(x_val, end_x);
190 llvm::BranchInst* range_br =
191 builder.CreateCondBr(cond, body_bb, exit_bb);
194 builder.SetInsertPoint(body_bb);
195 generate_x_loop_body(x_var, x_fp_var, y_var, y_fp_var,
199 builder.SetInsertPoint(exit_bb);
202 llvm::BasicBlock* y_tile_header =
203 llvm::BasicBlock::Create(
context,
"y_tile_header", parent_func);
204 llvm::BasicBlock* y_tile_body =
205 llvm::BasicBlock::Create(
context,
"y_tile_body", parent_func);
206 llvm::BasicBlock* y_tile_exit =
207 llvm::BasicBlock::Create(
context,
"y_tile_exit", parent_func);
209 builder.CreateBr(y_tile_header);
211 builder.SetInsertPoint(y_tile_header);
212 llvm::Value* y_tile_val =
214 llvm::Value* y_tile_cond =
builder.CreateICmpSLT(y_tile_val, height_val);
215 builder.CreateCondBr(y_tile_cond, y_tile_body, y_tile_exit);
217 builder.SetInsertPoint(y_tile_body);
218 llvm::Value* y_tile_next_unclamped =
219 builder.CreateAdd(y_tile_val,
builder.getInt32(effective_tile_y));
220 llvm::Value* y_tile_end =
221 min_i32(y_tile_next_unclamped, height_val,
"y_tile_end");
222 builder.CreateStore(y_tile_val, y_var);
223 if (coord_usage.uses_y) {
225 builder.CreateSIToFP(y_tile_val,
builder.getFloatTy()), y_fp_var);
228 llvm::BasicBlock* row_header =
229 llvm::BasicBlock::Create(
context,
"row_header", parent_func);
230 llvm::BasicBlock* row_body =
231 llvm::BasicBlock::Create(
context,
"row_body", parent_func);
232 llvm::BasicBlock* row_exit =
233 llvm::BasicBlock::Create(
context,
"row_exit", parent_func);
237 builder.SetInsertPoint(row_header);
238 llvm::Value* y_val =
builder.CreateLoad(
builder.getInt32Ty(), y_var,
"y");
239 llvm::Value* y_cond =
builder.CreateICmpSLT(y_val, y_tile_end,
"y.cond");
240 builder.CreateCondBr(y_cond, row_body, row_exit);
242 builder.SetInsertPoint(row_body);
246 for (
const auto& access : clip_access_result.unique_rel_y_accesses) {
247 int clip_idx = access.clip_idx;
248 int vs_clip_idx = clip_idx + 1;
249 int rel_y = access.rel_y;
251 llvm::Value* coord_y =
253 llvm::Value* final_y =
259 llvm::Value* y_offset =
builder.CreateMul(final_y, stride);
260 llvm::Value* row_ptr =
builder.CreateGEP(
builder.getInt8Ty(), base_ptr,
261 y_offset,
"row_ptr");
265 llvm::BasicBlock* x_tile_header =
266 llvm::BasicBlock::Create(
context,
"x_tile_header", parent_func);
267 llvm::BasicBlock* x_tile_body =
268 llvm::BasicBlock::Create(
context,
"x_tile_body", parent_func);
269 llvm::BasicBlock* x_tile_exit =
270 llvm::BasicBlock::Create(
context,
"x_tile_exit", parent_func);
273 builder.CreateBr(x_tile_header);
275 builder.SetInsertPoint(x_tile_header);
276 llvm::Value* x_tile_val =
278 llvm::Value* x_tile_cond =
279 builder.CreateICmpSLT(x_tile_val, width_val,
"x_tile.cond");
280 builder.CreateCondBr(x_tile_cond, x_tile_body, x_tile_exit);
282 builder.SetInsertPoint(x_tile_body);
283 llvm::Value* x_tile_next_unclamped =
284 builder.CreateAdd(x_tile_val,
builder.getInt32(effective_tile_x));
285 llvm::Value* x_tile_end =
286 min_i32(x_tile_next_unclamped, width_val,
"x_tile_end");
288 builder.CreateStore(x_tile_val, x_var);
289 if (coord_usage.uses_x) {
291 builder.CreateSIToFP(x_tile_val,
builder.getFloatTy()), x_fp_var);
295 llvm::Value* left_end =
296 min_i32(x_tile_end, start_main_x,
"left_peel_end");
297 emit_x_range_loop(left_end,
false,
"left_peel");
300 llvm::Value* main_end = min_i32(x_tile_end, end_main_x,
"main_end");
301 emit_x_range_loop(main_end,
true,
"main_loop");
303 if (has_right_peel) {
304 emit_x_range_loop(x_tile_end,
false,
"right_peel");
307 llvm::Value* x_tile_next =
308 builder.CreateAdd(x_tile_val,
builder.getInt32(effective_tile_x));
309 builder.CreateStore(x_tile_next, x_tile_var);
310 builder.CreateBr(x_tile_header);
312 builder.SetInsertPoint(x_tile_exit);
313 llvm::Value* y_next =
builder.CreateAdd(y_val,
builder.getInt32(1));
314 builder.CreateStore(y_next, y_var);
315 if (coord_usage.uses_y) {
316 llvm::Value* y_fp_val =
318 llvm::Value* y_fp_next =
builder.CreateFAdd(
319 y_fp_val, llvm::ConstantFP::get(
builder.getFloatTy(), 1.0));
320 builder.CreateStore(y_fp_next, y_fp_var);
324 builder.SetInsertPoint(row_exit);
325 llvm::Value* y_tile_next =
326 builder.CreateAdd(y_tile_val,
builder.getInt32(effective_tile_y));
327 builder.CreateStore(y_tile_next, y_tile_var);
328 builder.CreateBr(y_tile_header);
330 builder.SetInsertPoint(y_tile_exit);
365 const Token& token, std::vector<llvm::Value*>& rpn_stack, llvm::Value* x,
366 [[maybe_unused]] llvm::Value* y, llvm::Value* x_fp, llvm::Value* y_fp,
367 bool no_x_bounds_check) {
368 llvm::Type* float_ty =
builder.getFloatTy();
369 llvm::Type* i32_ty =
builder.getInt32Ty();
371 switch (token.
type) {
373 rpn_stack.push_back(x_fp);
376 rpn_stack.push_back(y_fp);
380 const auto& payload = std::get<TokenPayloadClipAccess>(token.
payload);
384 .rel_y = payload.rel_y,
385 .use_mirror = use_mirror};
388 payload.rel_x, use_mirror,
393 const auto& payload = std::get<TokenPayloadClipAccess>(token.
payload);
394 llvm::Value* coord_y_f = rpn_stack.back();
395 rpn_stack.pop_back();
396 llvm::Value* coord_x_f = rpn_stack.back();
397 rpn_stack.pop_back();
399 llvm::Value* coord_y =
400 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
401 &
module, llvm::Intrinsic::rint, {float_ty}),
403 coord_y =
builder.CreateFPToSI(coord_y, i32_ty);
405 llvm::Value* coord_x =
406 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
407 &
module, llvm::Intrinsic::rint, {float_ty}),
409 coord_x =
builder.CreateFPToSI(coord_x, i32_ty);
411 bool use_mirror_final =
false;
412 if (payload.has_mode) {
413 use_mirror_final = payload.use_mirror;
419 coord_y, use_mirror_final));
423 const auto& payload = std::get<TokenPayloadClipAccess>(token.
payload);
435 rpn_stack.push_back(llvm::ConstantFP::get(
441 const auto& payload = std::get<TokenPayloadPropAccess>(token.
payload);
442 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
445 llvm::Value* prop_val =
builder.CreateLoad(
448 rpn_stack.push_back(prop_val);
453 const auto& payload = std::get<TokenPayloadPropAccess>(token.
payload);
454 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
455 llvm::Value* exists_val =
nullptr;
458 llvm::Value* prop_val =
builder.CreateLoad(
462 llvm::Value* prop_val_int =
builder.CreateBitCast(prop_val, i32_ty);
465 llvm::Value* nan_payload_int =
builder.getInt32(0x7FC0BEEF);
466 llvm::Value* is_prop_read_nan =
467 builder.CreateICmpEQ(prop_val_int, nan_payload_int);
469 exists_val =
builder.CreateSelect(
470 is_prop_read_nan, llvm::ConstantFP::get(float_ty, 0.0),
471 llvm::ConstantFP::get(float_ty, 1.0));
473 exists_val = llvm::ConstantFP::get(float_ty, 0.0);
475 rpn_stack.push_back(exists_val);
480 llvm::Value* coord_y_f = rpn_stack.back();
481 rpn_stack.pop_back();
482 llvm::Value* coord_x_f = rpn_stack.back();
483 rpn_stack.pop_back();
484 llvm::Value* val_to_store = rpn_stack.back();
485 rpn_stack.pop_back();
486 llvm::Value* coord_y =
builder.CreateFPToSI(coord_y_f, i32_ty);
487 llvm::Value* coord_x =
builder.CreateFPToSI(coord_x_f, i32_ty);
494 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
495 if (!named_arrays.contains(payload.name)) {
496 llvm::ArrayType* array_ty =
497 llvm::ArrayType::get(float_ty, payload.static_size);
498 llvm::Value* array_ptr =
500 named_arrays[payload.name] = array_ptr;
506 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
507 llvm::Value* idx_f = rpn_stack.back();
508 rpn_stack.pop_back();
510 llvm::Value* idx =
builder.CreateFPToSI(idx_f, i32_ty);
512 llvm::Value* array_ptr = named_arrays.at(payload.name);
514 llvm::Value* elem_ptr =
builder.CreateInBoundsGEP(
515 llvm::cast<llvm::AllocaInst>(array_ptr)->getAllocatedType(),
516 array_ptr, {
builder.getInt32(0), idx});
518 llvm::Value* value =
builder.CreateLoad(float_ty, elem_ptr);
519 rpn_stack.push_back(value);
524 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
525 llvm::Value* idx_f = rpn_stack.back();
526 rpn_stack.pop_back();
527 llvm::Value* value = rpn_stack.back();
528 rpn_stack.pop_back();
530 llvm::Value* idx =
builder.CreateFPToSI(idx_f, i32_ty);
532 llvm::Value* array_ptr = named_arrays.at(payload.name);
534 llvm::Value* elem_ptr =
builder.CreateInBoundsGEP(
535 llvm::cast<llvm::AllocaInst>(array_ptr)->getAllocatedType(),
536 array_ptr, {
builder.getInt32(0), idx});
538 builder.CreateStore(value, elem_ptr);