VapourSynth-llvmexpr
Loading...
Searching...
No Matches
ExprIRGenerator.cpp
Go to the documentation of this file.
1
19
20#include "ExprIRGenerator.hpp"
21
22#include <bit>
23#include <format>
24
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/Instructions.h"
31
32constexpr uint32_t EXIT_NAN_PAYLOAD = 0x7FC0E71F; // qNaN with payload 0xE71F
33
35 const std::vector<Token>& tokens_in, const VSVideoInfo* out_vi,
36 const std::vector<const VSVideoInfo*>& in_vi, int width_in, int height_in,
37 bool mirror, const std::map<std::pair<int, std::string>, int>& p_map,
38 const analysis::ExpressionAnalysisResults& analysis_results_in,
39 llvm::LLVMContext& context_ref, llvm::Module& module_ref,
40 llvm::IRBuilder<>& builder_ref, MathLibraryManager& math_mgr,
41 std::string func_name_in, int approx_math_in, int tile_x_in, int tile_y_in)
42 : IRGeneratorBase(tokens_in, out_vi, in_vi, width_in, height_in, mirror,
43 p_map, analysis_results_in, context_ref, module_ref,
44 builder_ref, math_mgr, std::move(func_name_in),
45 approx_math_in),
46 tile_x(tile_x_in), tile_y(tile_y_in) {}
47
49 llvm::Type* void_ty = llvm::Type::getVoidTy(context);
50 llvm::Type* ptr_ty = llvm::PointerType::get(context, 0);
51 llvm::Type* context_ptr_ty = ptr_ty; // opaque pointer (void*)
52 llvm::Type* i8_ptr_ptr_ty = ptr_ty; // opaque pointer (represents uint8_t**)
53 llvm::Type* i32_ptr_ty = ptr_ty; // opaque pointer (represents int32_t*)
54 llvm::Type* float_ptr_ty = ptr_ty; // opaque pointer (represents float*)
55
56 llvm::FunctionType* func_ty = llvm::FunctionType::get(
57 void_ty, {context_ptr_ty, i8_ptr_ptr_ty, i32_ptr_ty, float_ptr_ty},
58 false);
59
60 func = llvm::Function::Create(func_ty, llvm::Function::ExternalLinkage,
62 func->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
63
64 // Context argument (index 0) not used in Expr mode
65 rwptrs_arg = func->getArg(1);
66 rwptrs_arg->setName("rwptrs");
67 strides_arg = func->getArg(2);
68 strides_arg->setName("strides");
69 props_arg = func->getArg(3);
70 props_arg->setName("props");
71
72 func->addParamAttr(2, llvm::Attribute::ReadOnly); // strides (int32_t*)
73 func->addParamAttr(3, llvm::Attribute::ReadOnly); // props (float*)
74}
75
77 llvm::BasicBlock* entry_bb =
78 llvm::BasicBlock::Create(context, "entry", func);
79 builder.SetInsertPoint(entry_bb);
80
81 llvm::Function* parent_func = builder.GetInsertBlock()->getParent();
82
83 llvm::Value* y_var =
84 builder.CreateAlloca(builder.getInt32Ty(), nullptr, "y.var");
85 llvm::Value* x_var =
86 builder.CreateAlloca(builder.getInt32Ty(), nullptr, "x.var");
87 llvm::Value* y_tile_var =
88 builder.CreateAlloca(builder.getInt32Ty(), nullptr, "y_tile.var");
89 llvm::Value* x_tile_var =
90 builder.CreateAlloca(builder.getInt32Ty(), nullptr, "x_tile.var");
91 builder.CreateStore(builder.getInt32(0), y_tile_var);
92
93 const auto& coord_usage = analysis_results.getCoordinateUsageResult();
94
95 llvm::Value* x_fp_var = nullptr;
96 if (coord_usage.uses_x) {
97 x_fp_var = createAllocaInEntry(builder.getFloatTy(), "x_fp.var");
98 }
99 llvm::Value* y_fp_var = nullptr;
100 if (coord_usage.uses_y) {
101 y_fp_var = createAllocaInEntry(builder.getFloatTy(), "y_fp.var");
102 builder.CreateStore(llvm::ConstantFP::get(builder.getFloatTy(), 0.0),
103 y_fp_var);
104 }
105
106 // Index 0 = dst, 1..num_inputs = sources
108 preloaded_strides.resize(num_inputs + 1);
109 for (int i = 0; i <= num_inputs; ++i) {
110 llvm::Value* base_ptr_i = builder.CreateLoad(
111 llvm::PointerType::get(context, 0),
112 builder.CreateGEP(llvm::PointerType::get(context, 0), rwptrs_arg,
113 builder.getInt32(i)));
114 llvm::Value* stride_i = builder.CreateLoad(
115 builder.getInt32Ty(),
116 builder.CreateGEP(builder.getInt32Ty(), strides_arg,
117 builder.getInt32(i)));
118 preloaded_base_ptrs[i] = base_ptr_i;
119 preloaded_strides[i] = stride_i;
120
121 assumeAligned(base_ptr_i,
122 32); // NOLINT(cppcoreguidelines-avoid-magic-numbers)
123 }
124
125 alias_scope_domain = llvm::MDNode::getDistinct(context, {});
126 alias_scopes.resize(num_inputs + 1);
127 for (int i = 0; i <= num_inputs; ++i) {
128 llvm::SmallVector<llvm::Metadata*, 2> elems;
129 elems.push_back(nullptr);
130 llvm::Metadata* name_node = llvm::MDNode::get(
131 context, {llvm::MDString::get(
132 context, std::format("rwptrs_{}", i).c_str())});
133 elems.push_back(name_node);
134 alias_scopes[i] = llvm::MDNode::getDistinct(context, elems);
135 alias_scopes[i]->replaceOperandWith(0, alias_scopes[i]);
136 }
137 alias_scope_lists.resize(num_inputs + 1);
139 for (int i = 0; i <= num_inputs; ++i) {
140 std::vector<llvm::Metadata*> self_list = {alias_scopes[i]};
141 alias_scope_lists[i] = llvm::MDNode::get(context, self_list);
142 std::vector<llvm::Metadata*> others;
143 for (int j = 0; j <= num_inputs; ++j) {
144 if (j == i) {
145 continue;
146 }
147 others.push_back(alias_scopes[j]);
148 }
149 noalias_scope_lists[i] = llvm::MDNode::get(context, others);
150 }
151
152 const auto& clip_access_result =
153 analysis_results.getRelAccessAnalysisResult();
154
155 llvm::Value* width_val = builder.getInt32(width);
156 llvm::Value* height_val = builder.getInt32(height);
157 llvm::Value* start_main_x = builder.getInt32(-clip_access_result.min_rel_x);
158 llvm::Value* end_main_x =
159 builder.getInt32(width - clip_access_result.max_rel_x);
160
161 bool has_left_peel = // NOLINT(cppcoreguidelines-init-variables)
162 clip_access_result.min_rel_x < 0;
163 bool has_right_peel = // NOLINT(cppcoreguidelines-init-variables)
164 clip_access_result.max_rel_x > 0;
165
166 const int effective_tile_x = (tile_x <= 0) ? width : tile_x;
167 const int effective_tile_y = (tile_y <= 0) ? height : tile_y;
168
169 auto min_i32 = [&](llvm::Value* lhs, llvm::Value* rhs,
170 const char* name) -> llvm::Value* {
171 llvm::Value* cond = builder.CreateICmpSLT(lhs, rhs);
172 return builder.CreateSelect(cond, lhs, rhs, name);
173 };
174
175 auto emit_x_range_loop = [&](llvm::Value* end_x, bool no_x_bounds_check,
176 const char* block_name_prefix) {
177 llvm::BasicBlock* header_bb = llvm::BasicBlock::Create(
178 context, std::format("{}_header", block_name_prefix), parent_func);
179 llvm::BasicBlock* body_bb = llvm::BasicBlock::Create(
180 context, std::format("{}_body", block_name_prefix), parent_func);
181 llvm::BasicBlock* exit_bb = llvm::BasicBlock::Create(
182 context, std::format("{}_exit", block_name_prefix), parent_func);
183
184 builder.CreateBr(header_bb);
185
186 builder.SetInsertPoint(header_bb);
187 llvm::Value* x_val =
188 builder.CreateLoad(builder.getInt32Ty(), x_var, "x_range");
189 llvm::Value* cond = builder.CreateICmpSLT(x_val, end_x);
190 llvm::BranchInst* range_br =
191 builder.CreateCondBr(cond, body_bb, exit_bb);
192 addLoopMetadata(range_br);
193
194 builder.SetInsertPoint(body_bb);
195 generate_x_loop_body(x_var, x_fp_var, y_var, y_fp_var,
196 no_x_bounds_check);
197 builder.CreateBr(header_bb);
198
199 builder.SetInsertPoint(exit_bb);
200 };
201
202 llvm::BasicBlock* y_tile_header =
203 llvm::BasicBlock::Create(context, "y_tile_header", parent_func);
204 llvm::BasicBlock* y_tile_body =
205 llvm::BasicBlock::Create(context, "y_tile_body", parent_func);
206 llvm::BasicBlock* y_tile_exit =
207 llvm::BasicBlock::Create(context, "y_tile_exit", parent_func);
208
209 builder.CreateBr(y_tile_header);
210
211 builder.SetInsertPoint(y_tile_header);
212 llvm::Value* y_tile_val =
213 builder.CreateLoad(builder.getInt32Ty(), y_tile_var, "y_tile");
214 llvm::Value* y_tile_cond = builder.CreateICmpSLT(y_tile_val, height_val);
215 builder.CreateCondBr(y_tile_cond, y_tile_body, y_tile_exit);
216
217 builder.SetInsertPoint(y_tile_body);
218 llvm::Value* y_tile_next_unclamped =
219 builder.CreateAdd(y_tile_val, builder.getInt32(effective_tile_y));
220 llvm::Value* y_tile_end =
221 min_i32(y_tile_next_unclamped, height_val, "y_tile_end");
222 builder.CreateStore(y_tile_val, y_var);
223 if (coord_usage.uses_y) {
224 builder.CreateStore(
225 builder.CreateSIToFP(y_tile_val, builder.getFloatTy()), y_fp_var);
226 }
227
228 llvm::BasicBlock* row_header =
229 llvm::BasicBlock::Create(context, "row_header", parent_func);
230 llvm::BasicBlock* row_body =
231 llvm::BasicBlock::Create(context, "row_body", parent_func);
232 llvm::BasicBlock* row_exit =
233 llvm::BasicBlock::Create(context, "row_exit", parent_func);
234
235 builder.CreateBr(row_header);
236
237 builder.SetInsertPoint(row_header);
238 llvm::Value* y_val = builder.CreateLoad(builder.getInt32Ty(), y_var, "y");
239 llvm::Value* y_cond = builder.CreateICmpSLT(y_val, y_tile_end, "y.cond");
240 builder.CreateCondBr(y_cond, row_body, row_exit);
241
242 builder.SetInsertPoint(row_body);
243
244 // Pre-calculate and cache row pointers for this row.
245 row_ptr_cache.clear();
246 for (const auto& access : clip_access_result.unique_rel_y_accesses) {
247 int clip_idx = access.clip_idx;
248 int vs_clip_idx = clip_idx + 1;
249 int rel_y = access.rel_y;
250
251 llvm::Value* coord_y =
252 builder.CreateAdd(y_val, builder.getInt32(rel_y));
253 llvm::Value* final_y =
254 getFinalCoord(coord_y, builder.getInt32(height), access.use_mirror);
255
256 llvm::Value* base_ptr = preloaded_base_ptrs[vs_clip_idx];
257 llvm::Value* stride = preloaded_strides[vs_clip_idx];
258
259 llvm::Value* y_offset = builder.CreateMul(final_y, stride);
260 llvm::Value* row_ptr = builder.CreateGEP(builder.getInt8Ty(), base_ptr,
261 y_offset, "row_ptr");
262 row_ptr_cache[access] = row_ptr;
263 }
264
265 llvm::BasicBlock* x_tile_header =
266 llvm::BasicBlock::Create(context, "x_tile_header", parent_func);
267 llvm::BasicBlock* x_tile_body =
268 llvm::BasicBlock::Create(context, "x_tile_body", parent_func);
269 llvm::BasicBlock* x_tile_exit =
270 llvm::BasicBlock::Create(context, "x_tile_exit", parent_func);
271
272 builder.CreateStore(builder.getInt32(0), x_tile_var);
273 builder.CreateBr(x_tile_header);
274
275 builder.SetInsertPoint(x_tile_header);
276 llvm::Value* x_tile_val =
277 builder.CreateLoad(builder.getInt32Ty(), x_tile_var, "x_tile");
278 llvm::Value* x_tile_cond =
279 builder.CreateICmpSLT(x_tile_val, width_val, "x_tile.cond");
280 builder.CreateCondBr(x_tile_cond, x_tile_body, x_tile_exit);
281
282 builder.SetInsertPoint(x_tile_body);
283 llvm::Value* x_tile_next_unclamped =
284 builder.CreateAdd(x_tile_val, builder.getInt32(effective_tile_x));
285 llvm::Value* x_tile_end =
286 min_i32(x_tile_next_unclamped, width_val, "x_tile_end");
287
288 builder.CreateStore(x_tile_val, x_var);
289 if (coord_usage.uses_x) {
290 builder.CreateStore(
291 builder.CreateSIToFP(x_tile_val, builder.getFloatTy()), x_fp_var);
292 }
293
294 if (has_left_peel) {
295 llvm::Value* left_end =
296 min_i32(x_tile_end, start_main_x, "left_peel_end");
297 emit_x_range_loop(left_end, false, "left_peel");
298 }
299
300 llvm::Value* main_end = min_i32(x_tile_end, end_main_x, "main_end");
301 emit_x_range_loop(main_end, true, "main_loop");
302
303 if (has_right_peel) {
304 emit_x_range_loop(x_tile_end, false, "right_peel");
305 }
306
307 llvm::Value* x_tile_next =
308 builder.CreateAdd(x_tile_val, builder.getInt32(effective_tile_x));
309 builder.CreateStore(x_tile_next, x_tile_var);
310 builder.CreateBr(x_tile_header);
311
312 builder.SetInsertPoint(x_tile_exit);
313 llvm::Value* y_next = builder.CreateAdd(y_val, builder.getInt32(1));
314 builder.CreateStore(y_next, y_var);
315 if (coord_usage.uses_y) {
316 llvm::Value* y_fp_val =
317 builder.CreateLoad(builder.getFloatTy(), y_fp_var);
318 llvm::Value* y_fp_next = builder.CreateFAdd(
319 y_fp_val, llvm::ConstantFP::get(builder.getFloatTy(), 1.0));
320 builder.CreateStore(y_fp_next, y_fp_var);
321 }
322 builder.CreateBr(row_header);
323
324 builder.SetInsertPoint(row_exit);
325 llvm::Value* y_tile_next =
326 builder.CreateAdd(y_tile_val, builder.getInt32(effective_tile_y));
327 builder.CreateStore(y_tile_next, y_tile_var);
328 builder.CreateBr(y_tile_header);
329
330 builder.SetInsertPoint(y_tile_exit);
331 builder.CreateRetVoid();
332}
333
334void ExprIRGenerator::generate_x_loop_body(llvm::Value* x_var,
335 llvm::Value* x_fp_var,
336 llvm::Value* y_var,
337 llvm::Value* y_fp_var,
338 bool no_x_bounds_check) {
339 const auto& coord_usage = analysis_results.getCoordinateUsageResult();
340 llvm::Value* x_val = builder.CreateLoad(builder.getInt32Ty(), x_var, "x");
341 llvm::Value* y_val =
342 builder.CreateLoad(builder.getInt32Ty(), y_var, "y_in_x_loop");
343
344 llvm::Value* x_fp = nullptr;
345 if (coord_usage.uses_x) {
346 x_fp = builder.CreateLoad(builder.getFloatTy(), x_fp_var, "x_fp");
347 }
348 llvm::Value* y_fp = nullptr;
349 if (coord_usage.uses_y) {
350 y_fp = builder.CreateLoad(builder.getFloatTy(), y_fp_var, "y_fp");
351 }
352
353 generateIRFromTokens(x_val, y_val, x_fp, y_fp, no_x_bounds_check);
354
355 llvm::Value* x_next = builder.CreateAdd(x_val, builder.getInt32(1));
356 builder.CreateStore(x_next, x_var);
357 if (coord_usage.uses_x) {
358 llvm::Value* x_fp_next = builder.CreateFAdd(
359 x_fp, llvm::ConstantFP::get(builder.getFloatTy(), 1.0));
360 builder.CreateStore(x_fp_next, x_fp_var);
361 }
362}
363
365 const Token& token, std::vector<llvm::Value*>& rpn_stack, llvm::Value* x,
366 [[maybe_unused]] llvm::Value* y, llvm::Value* x_fp, llvm::Value* y_fp,
367 bool no_x_bounds_check) {
368 llvm::Type* float_ty = builder.getFloatTy();
369 llvm::Type* i32_ty = builder.getInt32Ty();
370
371 switch (token.type) {
373 rpn_stack.push_back(x_fp);
374 return true;
376 rpn_stack.push_back(y_fp);
377 return true;
378
379 case TokenType::ClipRel: {
380 const auto& payload = std::get<TokenPayloadClipAccess>(token.payload);
381 bool use_mirror = // NOLINT(cppcoreguidelines-init-variables)
382 payload.has_mode ? payload.use_mirror : mirror_boundary;
383 analysis::RelYAccess access{.clip_idx = payload.clip_idx,
384 .rel_y = payload.rel_y,
385 .use_mirror = use_mirror};
386 llvm::Value* row_ptr = row_ptr_cache.at(access);
387 rpn_stack.push_back(generateLoadFromRowPtr(row_ptr, payload.clip_idx, x,
388 payload.rel_x, use_mirror,
389 no_x_bounds_check));
390 return true;
391 }
392 case TokenType::ClipAbs: {
393 const auto& payload = std::get<TokenPayloadClipAccess>(token.payload);
394 llvm::Value* coord_y_f = rpn_stack.back();
395 rpn_stack.pop_back();
396 llvm::Value* coord_x_f = rpn_stack.back();
397 rpn_stack.pop_back();
398
399 llvm::Value* coord_y =
400 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
401 &module, llvm::Intrinsic::rint, {float_ty}),
402 {coord_y_f});
403 coord_y = builder.CreateFPToSI(coord_y, i32_ty);
404
405 llvm::Value* coord_x =
406 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
407 &module, llvm::Intrinsic::rint, {float_ty}),
408 {coord_x_f});
409 coord_x = builder.CreateFPToSI(coord_x, i32_ty);
410
411 bool use_mirror_final = false;
412 if (payload.has_mode) {
413 use_mirror_final = payload.use_mirror;
414 } else {
415 use_mirror_final = mirror_boundary;
416 }
417
418 rpn_stack.push_back(generatePixelLoad(payload.clip_idx, coord_x,
419 coord_y, use_mirror_final));
420 return true;
421 }
422 case TokenType::ClipCur: {
423 const auto& payload = std::get<TokenPayloadClipAccess>(token.payload);
424 analysis::RelYAccess access{.clip_idx = payload.clip_idx,
425 .rel_y = 0,
426 .use_mirror = mirror_boundary};
427 llvm::Value* row_ptr = row_ptr_cache.at(access);
428 rpn_stack.push_back(generateLoadFromRowPtr(row_ptr, payload.clip_idx, x,
430 no_x_bounds_check));
431 return true;
432 }
433
435 rpn_stack.push_back(llvm::ConstantFP::get(
436 float_ty, std::bit_cast<float>(EXIT_NAN_PAYLOAD)));
437 return true;
438 }
439
441 const auto& payload = std::get<TokenPayloadPropAccess>(token.payload);
442 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
443 int prop_idx = // NOLINT(cppcoreguidelines-init-variables)
444 prop_map.at(key);
445 llvm::Value* prop_val = builder.CreateLoad(
446 float_ty,
447 builder.CreateGEP(float_ty, props_arg, builder.getInt32(prop_idx)));
448 rpn_stack.push_back(prop_val);
449 return true;
450 }
451
453 const auto& payload = std::get<TokenPayloadPropAccess>(token.payload);
454 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
455 llvm::Value* exists_val = nullptr;
456 if (prop_map.contains(key)) {
457 int prop_idx = prop_map.at(key);
458 llvm::Value* prop_val = builder.CreateLoad(
459 float_ty, builder.CreateGEP(float_ty, props_arg,
460 builder.getInt32(prop_idx)));
461
462 llvm::Value* prop_val_int = builder.CreateBitCast(prop_val, i32_ty);
463
464 // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
465 llvm::Value* nan_payload_int = builder.getInt32(0x7FC0BEEF);
466 llvm::Value* is_prop_read_nan =
467 builder.CreateICmpEQ(prop_val_int, nan_payload_int);
468
469 exists_val = builder.CreateSelect(
470 is_prop_read_nan, llvm::ConstantFP::get(float_ty, 0.0),
471 llvm::ConstantFP::get(float_ty, 1.0));
472 } else {
473 exists_val = llvm::ConstantFP::get(float_ty, 0.0);
474 }
475 rpn_stack.push_back(exists_val);
476 return true;
477 }
478
479 case TokenType::StoreAbs: {
480 llvm::Value* coord_y_f = rpn_stack.back();
481 rpn_stack.pop_back();
482 llvm::Value* coord_x_f = rpn_stack.back();
483 rpn_stack.pop_back();
484 llvm::Value* val_to_store = rpn_stack.back();
485 rpn_stack.pop_back();
486 llvm::Value* coord_y = builder.CreateFPToSI(coord_y_f, i32_ty);
487 llvm::Value* coord_x = builder.CreateFPToSI(coord_x_f, i32_ty);
488 generatePixelStore(val_to_store, coord_x, coord_y);
489 return true;
490 }
491
492 // Array
494 const auto& payload = std::get<TokenPayloadArrayOp>(token.payload);
495 if (!named_arrays.contains(payload.name)) {
496 llvm::ArrayType* array_ty =
497 llvm::ArrayType::get(float_ty, payload.static_size);
498 llvm::Value* array_ptr =
499 createAllocaInEntry(array_ty, payload.name + "_array");
500 named_arrays[payload.name] = array_ptr;
501 }
502 return true;
503 }
504
506 const auto& payload = std::get<TokenPayloadArrayOp>(token.payload);
507 llvm::Value* idx_f = rpn_stack.back();
508 rpn_stack.pop_back();
509
510 llvm::Value* idx = builder.CreateFPToSI(idx_f, i32_ty);
511
512 llvm::Value* array_ptr = named_arrays.at(payload.name);
513
514 llvm::Value* elem_ptr = builder.CreateInBoundsGEP(
515 llvm::cast<llvm::AllocaInst>(array_ptr)->getAllocatedType(),
516 array_ptr, {builder.getInt32(0), idx});
517
518 llvm::Value* value = builder.CreateLoad(float_ty, elem_ptr);
519 rpn_stack.push_back(value);
520 return true;
521 }
522
524 const auto& payload = std::get<TokenPayloadArrayOp>(token.payload);
525 llvm::Value* idx_f = rpn_stack.back();
526 rpn_stack.pop_back();
527 llvm::Value* value = rpn_stack.back();
528 rpn_stack.pop_back();
529
530 llvm::Value* idx = builder.CreateFPToSI(idx_f, i32_ty);
531
532 llvm::Value* array_ptr = named_arrays.at(payload.name);
533
534 llvm::Value* elem_ptr = builder.CreateInBoundsGEP(
535 llvm::cast<llvm::AllocaInst>(array_ptr)->getAllocatedType(),
536 array_ptr, {builder.getInt32(0), idx});
537
538 builder.CreateStore(value, elem_ptr);
539 return true;
540 }
541
542 default:
543 // Token not handled by this mode
544 return false;
545 }
546}
547
548void ExprIRGenerator::finalizeAndStoreResult(llvm::Value* result_val,
549 llvm::Value* x, llvm::Value* y) {
550 bool has_exit = false;
551 has_exit = std::ranges::any_of(tokens, [](const auto& token) {
552 return token.type == TokenType::ExitNoWrite;
553 });
554
555 if (has_exit) {
556 llvm::Function* parent_func = builder.GetInsertBlock()->getParent();
557 llvm::Value* result_int =
558 builder.CreateBitCast(result_val, builder.getInt32Ty());
559 llvm::Value* exit_nan_int = builder.getInt32(EXIT_NAN_PAYLOAD);
560 llvm::Value* is_exit_val =
561 builder.CreateICmpEQ(result_int, exit_nan_int);
562
563 llvm::BasicBlock* store_block =
564 llvm::BasicBlock::Create(context, "do_default_store", parent_func);
565 llvm::BasicBlock* after_store_block = llvm::BasicBlock::Create(
566 context, "after_default_store", parent_func);
567
568 builder.CreateCondBr(is_exit_val, after_store_block, store_block);
569
570 builder.SetInsertPoint(store_block);
571 generatePixelStore(result_val, x, y);
572 builder.CreateBr(after_store_block);
573
574 builder.SetInsertPoint(after_store_block);
575 } else {
576 generatePixelStore(result_val, x, y);
577 }
578}
constexpr uint32_t EXIT_NAN_PAYLOAD
@ ArrayAllocStatic
Definition Tokenizer.hpp:51
void defineFunctionSignature() override
void generateLoops() override
void finalizeAndStoreResult(llvm::Value *result_val, llvm::Value *x, llvm::Value *y) override
bool processModeSpecificToken(const Token &token, std::vector< llvm::Value * > &rpn_stack, llvm::Value *x, llvm::Value *y, llvm::Value *x_fp, llvm::Value *y_fp, bool no_x_bounds_check) override
ExprIRGenerator(const std::vector< Token > &tokens_in, const VSVideoInfo *out_vi, const std::vector< const VSVideoInfo * > &in_vi, int width_in, int height_in, bool mirror, const std::map< std::pair< int, std::string >, int > &p_map, const analysis::ExpressionAnalysisResults &analysis_results_in, llvm::LLVMContext &context_ref, llvm::Module &module_ref, llvm::IRBuilder<> &builder_ref, MathLibraryManager &math_mgr, std::string func_name_in, int approx_math_in, int tile_x_in, int tile_y_in)
const std::map< std::pair< int, std::string >, int > & prop_map
llvm::Value * getFinalCoord(llvm::Value *coord, llvm::Value *max_dim, bool use_mirror)
llvm::IRBuilder & builder
std::string func_name
llvm::LLVMContext & context
llvm::Value * generatePixelLoad(int clip_idx, llvm::Value *x, llvm::Value *y, bool mirror)
void generateIRFromTokens(llvm::Value *x, llvm::Value *y, llvm::Value *x_fp, llvm::Value *y_fp, bool no_x_bounds_check)
std::vector< llvm::MDNode * > alias_scopes
llvm::Value * rwptrs_arg
const std::vector< Token > & tokens
IRGeneratorBase(const std::vector< Token > &tokens_in, const VSVideoInfo *out_vi, const std::vector< const VSVideoInfo * > &in_vi, int width_in, int height_in, bool mirror, const std::map< std::pair< int, std::string >, int > &p_map, const analysis::ExpressionAnalysisResults &analysis_results_in, llvm::LLVMContext &context_ref, llvm::Module &module_ref, llvm::IRBuilder<> &builder_ref, MathLibraryManager &math_mgr, std::string func_name_in, int approx_math_in)
llvm::Value * generateLoadFromRowPtr(llvm::Value *row_ptr, int clip_idx, llvm::Value *x, int rel_x, bool use_mirror, bool no_x_bounds_check)
std::vector< llvm::MDNode * > alias_scope_lists
llvm::AllocaInst * createAllocaInEntry(llvm::Type *type, const std::string &name)
void assumeAligned(llvm::Value *ptr_value, unsigned alignment)
llvm::Function * func
std::vector< llvm::Value * > preloaded_base_ptrs
std::vector< llvm::Value * > preloaded_strides
llvm::Module & module
std::map< analysis::RelYAccess, llvm::Value * > row_ptr_cache
llvm::MDNode * alias_scope_domain
llvm::Value * props_arg
std::vector< llvm::MDNode * > noalias_scope_lists
void addLoopMetadata(llvm::BranchInst *loop_br)
const analysis::ExpressionAnalysisResults & analysis_results
void generatePixelStore(llvm::Value *value_to_store, llvm::Value *x, llvm::Value *y)
llvm::Value * strides_arg
const CoordinateUsageResult & getCoordinateUsageResult() const
TokenType type
PayloadVariant payload