51 llvm::Type* void_ty = llvm::Type::getVoidTy(
context);
52 llvm::Type* ptr_ty = llvm::PointerType::get(
context, 0);
53 llvm::Type* context_ptr_ty = ptr_ty;
54 llvm::Type* i8_ptr_ptr_ty = ptr_ty;
55 llvm::Type* i32_ptr_ty = ptr_ty;
56 llvm::Type* float_ptr_ty = ptr_ty;
58 llvm::FunctionType* func_ty = llvm::FunctionType::get(
59 void_ty, {context_ptr_ty, i8_ptr_ptr_ty, i32_ptr_ty, float_ptr_ty},
62 func = llvm::Function::Create(func_ty, llvm::Function::ExternalLinkage,
64 func->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
66 context_arg =
func->getArg(0);
67 context_arg->setName(
"context");
75 func->addParamAttr(2, llvm::Attribute::ReadOnly);
78 llvm::Type* i64_ty =
builder.getInt64Ty();
79 llvm::Type* i8_ptr_ty = ptr_ty;
82 llvm::FunctionType* ensure_buffer_ty =
83 llvm::FunctionType::get(float_ptr_ty, {i8_ptr_ty, i64_ty},
false);
84 llvmexpr_ensure_buffer_func = llvm::Function::Create(
85 ensure_buffer_ty, llvm::Function::ExternalLinkage,
86 "llvmexpr_ensure_buffer", &
module);
89 llvm::FunctionType* get_buffer_size_ty =
90 llvm::FunctionType::get(i64_ty, {i8_ptr_ty},
false);
91 llvmexpr_get_buffer_size_func = llvm::Function::Create(
92 get_buffer_size_ty, llvm::Function::ExternalLinkage,
93 "llvmexpr_get_buffer_size", &
module);
98 llvm::BasicBlock* entry_bb =
100 builder.SetInsertPoint(entry_bb);
105 const int num_planes =
106 vo->format.numPlanes;
108 plane_base_ptrs[i].resize(num_planes);
109 plane_strides[i].resize(num_planes);
110 for (
int p = 0; p < num_planes; ++p) {
111 int flat_idx = (i * num_planes) + p;
112 llvm::Value* base_ptr_i =
builder.CreateLoad(
113 llvm::PointerType::get(
context, 0),
116 llvm::Value* stride_i =
builder.CreateLoad(
120 plane_base_ptrs[i][p] = base_ptr_i;
121 plane_strides[i][p] = stride_i;
126 for (
const auto& [key, idx] :
prop_map) {
127 llvm::Value* prop_val =
builder.CreateLoad(
131 const std::string unique_prop_name =
132 std::format(
"prop_{}_{}", key.first, key.second);
133 llvm::Value* alloca =
135 builder.CreateStore(prop_val, alloca);
136 prop_allocas[unique_prop_name] = alloca;
140 llvm::APInt payload_bits(32, 0x7FC0DEAD);
141 llvm::APFloat nan_payload_apf(llvm::APFloat::IEEEsingle(), payload_bits);
142 llvm::Value* nan_with_payload =
143 llvm::ConstantFP::get(
context, nan_payload_apf);
145 for (
const auto& prop_name : output_props_list) {
146 const std::string mangled_input_name =
147 std::format(
"prop_0_{}", prop_name);
148 if (prop_allocas.contains(mangled_input_name)) {
149 prop_allocas[prop_name] = prop_allocas.at(mangled_input_name);
151 if (!prop_allocas.contains(prop_name)) {
152 llvm::Value* alloca =
154 builder.CreateStore(nan_with_payload, alloca);
155 prop_allocas[prop_name] = alloca;
166 for (
const auto& [name, idx] : output_prop_map) {
290 const Token& token, std::vector<llvm::Value*>& rpn_stack,
291 [[maybe_unused]] llvm::Value* x, [[maybe_unused]] llvm::Value* y,
292 [[maybe_unused]] llvm::Value* x_fp, [[maybe_unused]] llvm::Value* y_fp,
293 [[maybe_unused]]
bool no_x_bounds_check) {
294 llvm::Type* float_ty =
builder.getFloatTy();
295 llvm::Type* i32_ty =
builder.getInt32Ty();
297 switch (token.
type) {
299 const auto& payload = std::get<TokenPayloadClipDim>(token.
payload);
300 const VSVideoInfo* vinfo =
vi[payload.clip_idx];
306 const auto& payload = std::get<TokenPayloadClipDim>(token.
payload);
307 const VSVideoInfo* vinfo =
vi[payload.clip_idx];
313 const auto& payload = std::get<TokenPayloadClipPlaneDim>(token.
payload);
314 const VSVideoInfo* vinfo =
vi[payload.clip_idx];
315 int plane_w = vinfo->width;
316 if (vinfo->format.colorFamily == cfYUV && payload.plane_idx > 0) {
317 plane_w >>= vinfo->format.subSamplingW;
324 const auto& payload = std::get<TokenPayloadClipPlaneDim>(token.
payload);
325 const VSVideoInfo* vinfo =
vi[payload.clip_idx];
326 int plane_h = vinfo->height;
327 if (vinfo->format.colorFamily == cfYUV && payload.plane_idx > 0) {
328 plane_h >>= vinfo->format.subSamplingH;
335 const auto& payload = std::get<TokenPayloadPlaneDim>(token.
payload);
336 int plane_w =
vo->width;
337 if (
vo->format.colorFamily == cfYUV && payload.plane_idx > 0) {
338 plane_w >>=
vo->format.subSamplingW;
345 const auto& payload = std::get<TokenPayloadPlaneDim>(token.
payload);
346 int plane_h =
vo->height;
347 if (
vo->format.colorFamily == cfYUV && payload.plane_idx > 0) {
348 plane_h >>=
vo->format.subSamplingH;
355 const auto& payload =
356 std::get<TokenPayloadClipAccessPlane>(token.
payload);
357 llvm::Value* coord_y_f = rpn_stack.back();
358 rpn_stack.pop_back();
359 llvm::Value* coord_x_f = rpn_stack.back();
360 rpn_stack.pop_back();
362 llvm::Value* coord_y =
363 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
364 &
module, llvm::Intrinsic::rint, {float_ty}),
366 coord_y =
builder.CreateFPToSI(coord_y, i32_ty);
368 llvm::Value* coord_x =
369 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
370 &
module, llvm::Intrinsic::rint, {float_ty}),
372 coord_x =
builder.CreateFPToSI(coord_x, i32_ty);
374 rpn_stack.push_back(generatePixelLoadPlane(
375 payload.clip_idx, payload.plane_idx, coord_x, coord_y));
379 const auto& payload =
380 std::get<TokenPayloadStoreAbsPlane>(token.
payload);
381 llvm::Value* coord_y_f = rpn_stack.back();
382 rpn_stack.pop_back();
383 llvm::Value* coord_x_f = rpn_stack.back();
384 rpn_stack.pop_back();
385 llvm::Value* val_to_store = rpn_stack.back();
386 rpn_stack.pop_back();
388 llvm::Value* coord_y =
389 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
390 &
module, llvm::Intrinsic::rint, {float_ty}),
392 coord_y =
builder.CreateFPToSI(coord_y, i32_ty);
394 llvm::Value* coord_x =
395 builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
396 &
module, llvm::Intrinsic::rint, {float_ty}),
398 coord_x =
builder.CreateFPToSI(coord_x, i32_ty);
400 generatePixelStorePlane(val_to_store, payload.plane_idx, coord_x,
405 const auto& payload = std::get<TokenPayloadPropStore>(token.
payload);
408 llvm::APInt payload_bits(32, 0x7FC0DE1E);
409 llvm::APFloat nan_payload_apf(llvm::APFloat::IEEEsingle(),
411 llvm::Value* nan_with_payload =
412 llvm::ConstantFP::get(
context, nan_payload_apf);
413 builder.CreateStore(nan_with_payload,
414 prop_allocas.at(payload.prop_name));
416 llvm::Value* val_to_store = rpn_stack.back();
417 rpn_stack.pop_back();
418 builder.CreateStore(val_to_store,
419 prop_allocas.at(payload.prop_name));
424 const auto& payload = std::get<TokenPayloadPropAccess>(token.
payload);
425 if (payload.clip_idx == 0 &&
426 output_prop_map.contains(payload.prop_name)) {
427 rpn_stack.push_back(
builder.CreateLoad(
428 float_ty, prop_allocas.at(payload.prop_name)));
430 const std::string unique_prop_name =
431 std::format(
"prop_{}_{}", payload.clip_idx, payload.prop_name);
432 rpn_stack.push_back(
builder.CreateLoad(
433 float_ty, prop_allocas.at(unique_prop_name)));
439 const auto& payload = std::get<TokenPayloadPropAccess>(token.
payload);
440 llvm::Value* prop_val =
nullptr;
441 if (payload.clip_idx == 0 &&
442 output_prop_map.contains(payload.prop_name)) {
443 prop_val =
builder.CreateLoad(float_ty,
444 prop_allocas.at(payload.prop_name));
446 const std::string unique_prop_name =
447 std::format(
"prop_{}_{}", payload.clip_idx, payload.prop_name);
448 if (!prop_allocas.contains(unique_prop_name)) {
449 rpn_stack.push_back(llvm::ConstantFP::get(float_ty, 0.0));
453 builder.CreateLoad(float_ty, prop_allocas.at(unique_prop_name));
456 llvm::Value* prop_val_int =
builder.CreateBitCast(prop_val, i32_ty);
459 llvm::Value* read_nan_payload =
builder.getInt32(0x7FC0BEEF);
460 llvm::Value* is_read_nan =
461 builder.CreateICmpEQ(prop_val_int, read_nan_payload);
464 llvm::Value* delete_nan_payload =
builder.getInt32(0x7FC0DE1E);
465 llvm::Value* is_delete_nan =
466 builder.CreateICmpEQ(prop_val_int, delete_nan_payload);
468 llvm::Value* does_not_exist =
469 builder.CreateOr(is_read_nan, is_delete_nan);
471 llvm::Value* exists_val =
builder.CreateSelect(
472 does_not_exist, llvm::ConstantFP::get(float_ty, 0.0),
473 llvm::ConstantFP::get(float_ty, 1.0));
474 rpn_stack.push_back(exists_val);
480 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
481 llvm::Value* size_val =
builder.getInt64(payload.static_size);
482 llvm::Value* name_str =
483 builder.CreateGlobalString(payload.name, payload.name +
"_name");
484 llvm::Value* buffer_ptr =
builder.CreateCall(
485 llvmexpr_ensure_buffer_func, {name_str, size_val});
486 array_ptr_cache[payload.name] = buffer_ptr;
491 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
492 llvm::Value* size_f = rpn_stack.back();
493 rpn_stack.pop_back();
495 llvm::Value* size_val =
497 llvm::Value* name_str =
498 builder.CreateGlobalString(payload.name, payload.name +
"_name");
499 llvm::Value* buffer_ptr =
builder.CreateCall(
500 llvmexpr_ensure_buffer_func, {name_str, size_val});
501 array_ptr_cache[payload.name] = buffer_ptr;
506 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
507 llvm::Value* idx_f = rpn_stack.back();
508 rpn_stack.pop_back();
509 llvm::Value* idx =
builder.CreateFPToSI(idx_f, i32_ty);
511 llvm::Value* array_ptr = array_ptr_cache.at(payload.name);
512 llvm::Value* elem_ptr =
builder.CreateGEP(float_ty, array_ptr, idx);
513 llvm::Value* value =
builder.CreateLoad(float_ty, elem_ptr);
514 rpn_stack.push_back(value);
519 const auto& payload = std::get<TokenPayloadArrayOp>(token.
payload);
520 llvm::Value* idx_f = rpn_stack.back();
521 rpn_stack.pop_back();
522 llvm::Value* value = rpn_stack.back();
523 rpn_stack.pop_back();
525 llvm::Value* idx =
builder.CreateFPToSI(idx_f, i32_ty);
527 llvm::Value* array_ptr = array_ptr_cache.at(payload.name);
528 llvm::Value* elem_ptr =
builder.CreateGEP(float_ty, array_ptr, idx);
529 builder.CreateStore(value, elem_ptr);