VapourSynth-llvmexpr
Loading...
Searching...
No Matches
IRGeneratorBase Class Referenceabstract

#include <llvmexpr/codegen/llvm/IRGeneratorBase.hpp>

Inheritance diagram for IRGeneratorBase:
Collaboration diagram for IRGeneratorBase:

Public Member Functions

 IRGeneratorBase (const std::vector< Token > &tokens_in, const VSVideoInfo *out_vi, const std::vector< const VSVideoInfo * > &in_vi, int width_in, int height_in, bool mirror, const std::map< std::pair< int, std::string >, int > &p_map, const analysis::ExpressionAnalysisResults &analysis_results_in, llvm::LLVMContext &context_ref, llvm::Module &module_ref, llvm::IRBuilder<> &builder_ref, MathLibraryManager &math_mgr, std::string func_name_in, int approx_math_in)
virtual ~IRGeneratorBase ()=default
 IRGeneratorBase (const IRGeneratorBase &)=delete
IRGeneratorBaseoperator= (const IRGeneratorBase &)=delete
 IRGeneratorBase (IRGeneratorBase &&)=delete
IRGeneratorBaseoperator= (IRGeneratorBase &&)=delete
void generate ()

Protected Member Functions

virtual void defineFunctionSignature ()=0
virtual void generateLoops ()=0
llvm::AllocaInst * createAllocaInEntry (llvm::Type *type, const std::string &name)
template<typename... Args>
llvm::Value * createIntrinsicCall (llvm::Intrinsic::ID intrinsic_id, Args... args)
void assumeAligned (llvm::Value *ptr_value, unsigned alignment)
template<typename MemInstT>
void setMemoryInstAttrs (MemInstT *inst, unsigned alignment, int rwptr_index)
llvm::Value * getFinalCoord (llvm::Value *coord, llvm::Value *max_dim, bool use_mirror)
llvm::Value * generateLoadFromRowPtr (llvm::Value *row_ptr, int clip_idx, llvm::Value *x, int rel_x, bool use_mirror, bool no_x_bounds_check)
void addLoopMetadata (llvm::BranchInst *loop_br)
llvm::Value * generatePixelLoad (int clip_idx, llvm::Value *x, llvm::Value *y, bool mirror)
void generatePixelStore (llvm::Value *value_to_store, llvm::Value *x, llvm::Value *y)
void generateIRFromTokens (llvm::Value *x, llvm::Value *y, llvm::Value *x_fp, llvm::Value *y_fp, bool no_x_bounds_check)
bool processCommonToken (const Token &token, std::vector< llvm::Value * > &rpn_stack, llvm::Type *float_ty, llvm::Type *i32_ty, bool use_approx_math)
virtual bool processModeSpecificToken (const Token &token, std::vector< llvm::Value * > &rpn_stack, llvm::Value *x, llvm::Value *y, llvm::Value *x_fp, llvm::Value *y_fp, bool no_x_bounds_check)=0
virtual void finalizeAndStoreResult (llvm::Value *result_val, llvm::Value *x, llvm::Value *y)=0

Protected Attributes

const std::vector< Token > & tokens
const VSVideoInfo * vo
const std::vector< const VSVideoInfo * > & vi
int num_inputs
int width
int height
bool mirror_boundary
const std::map< std::pair< int, std::string >, int > & prop_map
const analysis::ExpressionAnalysisResultsanalysis_results
std::string func_name
int approx_math
llvm::LLVMContext & context
llvm::Module & module
llvm::IRBuilder & builder
MathLibraryManagermath_manager
llvm::Function * func
llvm::Value * rwptrs_arg
llvm::Value * strides_arg
llvm::Value * props_arg
std::vector< llvm::Value * > preloaded_base_ptrs
std::vector< llvm::Value * > preloaded_strides
llvm::MDNode * alias_scope_domain
std::vector< llvm::MDNode * > alias_scopes
std::vector< llvm::MDNode * > alias_scope_lists
std::vector< llvm::MDNode * > noalias_scope_lists
std::map< analysis::RelYAccess, llvm::Value * > row_ptr_cache

Detailed Description

Copyright (C) 2025 yuygfgg

This file is part of Vapoursynth-llvmexpr.

Vapoursynth-llvmexpr is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

Vapoursynth-llvmexpr is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with Vapoursynth-llvmexpr. If not, see https://www.gnu.org/licenses/.

Definition at line 38 of file IRGeneratorBase.hpp.

Constructor & Destructor Documentation

◆ IRGeneratorBase() [1/3]

IRGeneratorBase::IRGeneratorBase ( const std::vector< Token > & tokens_in,
const VSVideoInfo * out_vi,
const std::vector< const VSVideoInfo * > & in_vi,
int width_in,
int height_in,
bool mirror,
const std::map< std::pair< int, std::string >, int > & p_map,
const analysis::ExpressionAnalysisResults & analysis_results_in,
llvm::LLVMContext & context_ref,
llvm::Module & module_ref,
llvm::IRBuilder<> & builder_ref,
MathLibraryManager & math_mgr,
std::string func_name_in,
int approx_math_in )

Definition at line 37 of file IRGeneratorBase.cpp.

45 : tokens(tokens_in), vo(out_vi), vi(in_vi),
46 num_inputs(static_cast<int>(in_vi.size())), width(width_in),
47 height(height_in), mirror_boundary(mirror), prop_map(p_map),
48 analysis_results(analysis_results_in), func_name(std::move(func_name_in)),
49 approx_math(approx_math_in), context(context_ref), module(module_ref),
50 builder(builder_ref), math_manager(math_mgr), func(nullptr),
51 rwptrs_arg(nullptr), strides_arg(nullptr), props_arg(nullptr),
52 alias_scope_domain(nullptr) {}
const std::map< std::pair< int, std::string >, int > & prop_map
llvm::IRBuilder & builder
std::string func_name
llvm::LLVMContext & context
const std::vector< const VSVideoInfo * > & vi
llvm::Value * rwptrs_arg
const std::vector< Token > & tokens
llvm::Function * func
const VSVideoInfo * vo
llvm::Module & module
llvm::MDNode * alias_scope_domain
llvm::Value * props_arg
MathLibraryManager & math_manager
const analysis::ExpressionAnalysisResults & analysis_results
llvm::Value * strides_arg

References alias_scope_domain, analysis_results, approx_math, builder, context, func, func_name, height, math_manager, mirror_boundary, module, num_inputs, prop_map, props_arg, rwptrs_arg, strides_arg, tokens, vi, vo, and width.

Referenced by ExprIRGenerator::ExprIRGenerator(), IRGeneratorBase(), IRGeneratorBase(), operator=(), operator=(), and SingleExprIRGenerator::SingleExprIRGenerator().

◆ ~IRGeneratorBase()

virtual IRGeneratorBase::~IRGeneratorBase ( )
virtualdefault

◆ IRGeneratorBase() [2/3]

IRGeneratorBase::IRGeneratorBase ( const IRGeneratorBase & )
delete

References IRGeneratorBase().

◆ IRGeneratorBase() [3/3]

IRGeneratorBase::IRGeneratorBase ( IRGeneratorBase && )
delete

References IRGeneratorBase().

Member Function Documentation

◆ addLoopMetadata()

void IRGeneratorBase::addLoopMetadata ( llvm::BranchInst * loop_br)
protected

Definition at line 174 of file IRGeneratorBase.cpp.

175 { // NOLINT(readability-non-const-parameter)
176 llvm::StringMap<bool> host_features = llvm::sys::getHostCPUFeatures();
177 unsigned simd_width = 4;
178 if (!host_features.empty()) {
179 if (host_features["avx512f"]) {
180 simd_width = 16; // NOLINT(cppcoreguidelines-avoid-magic-numbers)
181 } else if (host_features["avx2"]) {
182 simd_width = 8; // NOLINT(cppcoreguidelines-avoid-magic-numbers)
183 }
184 }
185
186 auto create_md_node = [this](const char* name, llvm::Type* type,
187 uint64_t value) -> llvm::MDNode* {
188 std::array<llvm::Metadata*, 2> md = {
189 llvm::MDString::get(context, name),
190 llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(type, value))};
191 return llvm::MDNode::get(context, md);
192 };
193
194 llvm::MDNode* vec_width_node =
195 create_md_node("llvm.loop.vectorize.width",
196 llvm::Type::getInt32Ty(context), simd_width);
197
198 llvm::MDNode* enable_vec_node = create_md_node(
199 "llvm.loop.vectorize.enable", llvm::Type::getInt1Ty(context), 1);
200
201 llvm::MDNode* interleave_node = create_md_node(
202 "llvm.loop.interleave.count", llvm::Type::getInt32Ty(context), 4);
203
204 llvm::SmallVector<llvm::Metadata*,
205 5> // NOLINT(cppcoreguidelines-avoid-magic-numbers)
206 loop_md_elems;
207 loop_md_elems.push_back(nullptr); // to be replaced with self reference
208 loop_md_elems.push_back(enable_vec_node);
209 loop_md_elems.push_back(vec_width_node);
210 loop_md_elems.push_back(interleave_node);
211 llvm::MDNode* loop_id = llvm::MDNode::getDistinct(context, loop_md_elems);
212 loop_id->replaceOperandWith(0, loop_id);
213
214 loop_br->setMetadata(llvm::LLVMContext::MD_loop, loop_id);
215}

References context.

Referenced by ExprIRGenerator::generateLoops().

◆ assumeAligned()

void IRGeneratorBase::assumeAligned ( llvm::Value * ptr_value,
unsigned alignment )
protected

Definition at line 67 of file IRGeneratorBase.cpp.

68 {
69 llvm::Function* assume_fn = llvm::Intrinsic::getOrInsertDeclaration(
70 &module, llvm::Intrinsic::assume);
71 llvm::Value* cond = builder.getInt1(true);
72 llvm::SmallVector<llvm::Value*, 2> args;
73 args.push_back(ptr_value);
74 args.push_back(builder.getInt64(static_cast<uint64_t>(alignment)));
75 llvm::OperandBundleDefT<llvm::Value*> align_bundle("align", args);
76 builder.CreateCall(assume_fn, {cond}, {align_bundle});
77}

References builder, and module.

Referenced by generateLoadFromRowPtr(), ExprIRGenerator::generateLoops(), and generatePixelStore().

◆ createAllocaInEntry()

llvm::AllocaInst * IRGeneratorBase::createAllocaInEntry ( llvm::Type * type,
const std::string & name )
protected

Definition at line 60 of file IRGeneratorBase.cpp.

61 {
62 llvm::IRBuilder<> entry_builder(&func->getEntryBlock(),
63 func->getEntryBlock().begin());
64 return entry_builder.CreateAlloca(type, nullptr, name);
65}

References func.

Referenced by generateIRFromTokens(), ExprIRGenerator::generateLoops(), SingleExprIRGenerator::generateLoops(), and ExprIRGenerator::processModeSpecificToken().

◆ createIntrinsicCall()

template<typename... Args>
llvm::Value * IRGeneratorBase::createIntrinsicCall ( llvm::Intrinsic::ID intrinsic_id,
Args... args )
protected

Definition at line 149 of file IRGeneratorBase.hpp.

150 {
151 static_assert(sizeof...(Args) >= 1, "At least one argument required");
152 llvm::SmallVector<llvm::Value*, 4> arg_vec{args...};
153 auto* callee = llvm::Intrinsic::getOrInsertDeclaration(
154 &module, intrinsic_id, {arg_vec[0]->getType()});
155 auto* call = builder.CreateCall(callee, arg_vec);
156 call->setFastMathFlags(builder.getFastMathFlags());
157 return call;
158}

References builder, and module.

Referenced by generatePixelStore(), and processCommonToken().

◆ defineFunctionSignature()

virtual void IRGeneratorBase::defineFunctionSignature ( )
protectedpure virtual

Implemented in ExprIRGenerator, and SingleExprIRGenerator.

Referenced by generate().

◆ finalizeAndStoreResult()

virtual void IRGeneratorBase::finalizeAndStoreResult ( llvm::Value * result_val,
llvm::Value * x,
llvm::Value * y )
protectedpure virtual

Implemented in ExprIRGenerator, and SingleExprIRGenerator.

Referenced by generateIRFromTokens().

◆ generate()

void IRGeneratorBase::generate ( )

Definition at line 54 of file IRGeneratorBase.cpp.

54 {
57}
virtual void defineFunctionSignature()=0
virtual void generateLoops()=0

References defineFunctionSignature(), and generateLoops().

◆ generateIRFromTokens()

void IRGeneratorBase::generateIRFromTokens ( llvm::Value * x,
llvm::Value * y,
llvm::Value * x_fp,
llvm::Value * y_fp,
bool no_x_bounds_check )
protected

Definition at line 808 of file IRGeneratorBase.cpp.

810 {
811 llvm::Type* float_ty = builder.getFloatTy();
812 llvm::Type* i32_ty = builder.getInt32Ty();
813 llvm::Function* parent_func = builder.GetInsertBlock()->getParent();
814
815 bool use_approx_math = false;
816 if (approx_math == 1) {
817 use_approx_math = true;
818 } else if (approx_math == 2) {
819 // In auto mode, always try approx math first
820 use_approx_math = true;
821 }
822
823 if (tokens.empty()) {
824 generatePixelStore(llvm::ConstantFP::get(float_ty, 0.0), x, y);
825 return;
826 }
827
828 std::unordered_map<std::string, llvm::Value*> named_vars;
829 const auto& all_vars = analysis_results.getVariableUsageResult().all_vars;
830
831 for (const std::string& var_name : all_vars) {
832 named_vars[var_name] = createAllocaInEntry(float_ty, var_name);
833 }
834
835 std::map<int, llvm::BasicBlock*> llvm_blocks;
836 const auto& cfg_blocks = analysis_results.getCFGBlocks();
837 const auto& label_to_block_idx = analysis_results.getLabelToBlockIdx();
838 const auto& stack_depth_in = analysis_results.getStackDepthIn();
839
840 for (int i = 0; i < static_cast<int>(cfg_blocks.size()); ++i) {
841 std::string name = std::format("b{}", i);
842 for (const auto& [label_name, block_idx] : label_to_block_idx) {
843 if (block_idx == i) {
844 name = label_name;
845 break;
846 }
847 }
848 llvm_blocks[i] = llvm::BasicBlock::Create(context, name, parent_func);
849 }
850 llvm::BasicBlock* exit_bb =
851 llvm::BasicBlock::Create(context, "exit", parent_func);
852
853 // Branch from current block to the first CFG block
854 builder.CreateBr(llvm_blocks[0]);
855
856 // Initial PHI generation for merge blocks
857 std::map<int, std::vector<llvm::Value*>> block_initial_stacks;
858 for (int i = 0; i < static_cast<int>(cfg_blocks.size()); ++i) {
859 if (cfg_blocks[i].predecessors.size() > 1) {
860 builder.SetInsertPoint(llvm_blocks[i]);
861 std::vector<llvm::Value*> initial_stack;
862 int depth = stack_depth_in[i];
863 initial_stack.reserve(depth);
864 for (int j = 0; j < depth; ++j) {
865 initial_stack.push_back(builder.CreatePHI(
866 float_ty, cfg_blocks[i].predecessors.size()));
867 }
868 block_initial_stacks[i] = initial_stack;
869 }
870 }
871
872 // Process blocks
873 std::map<int, std::vector<llvm::Value*>> block_final_stacks;
874
875 for (int i = 0; i < static_cast<int>(cfg_blocks.size()); ++i) {
876 const auto& block_info = cfg_blocks[i];
877 builder.SetInsertPoint(llvm_blocks[i]);
878
879 std::vector<llvm::Value*> rpn_stack;
880 if (block_info.predecessors.empty()) {
881 // Entry block, empty stack
882 } else if (block_info.predecessors.size() == 1) {
883 int pred_idx = block_info.predecessors[0];
884 if (block_final_stacks.contains(pred_idx)) {
885 rpn_stack = block_final_stacks.at(pred_idx);
886 }
887 } else {
888 rpn_stack = block_initial_stacks.at(i);
889 }
890
891 for (int j = block_info.start_token_idx; j < block_info.end_token_idx;
892 ++j) {
893 const auto& token = tokens[j];
894
895 // Try common tokens first
896 if (processCommonToken(token, rpn_stack, float_ty, i32_ty,
897 use_approx_math)) {
898 continue;
899 }
900
901 // Variables
902 if (token.type == TokenType::VarStore) {
903 const auto& payload = std::get<TokenPayloadVar>(token.payload);
904 llvm::Value* val_to_store = rpn_stack.back();
905 rpn_stack.pop_back();
906 llvm::Value* var_ptr = named_vars[payload.name];
907 builder.CreateStore(val_to_store, var_ptr);
908 continue;
909 }
910 if (token.type == TokenType::VarLoad) {
911 const auto& payload = std::get<TokenPayloadVar>(token.payload);
912 llvm::Value* var_ptr = named_vars[payload.name];
913 rpn_stack.push_back(builder.CreateLoad(float_ty, var_ptr));
914 continue;
915 }
916
917 // Special tokens - delegate to derived class
918 if (!processModeSpecificToken(token, rpn_stack, x, y, x_fp, y_fp,
919 no_x_bounds_check)) {
920 throw std::runtime_error(std::format(
921 "Unhandled token type: {}", static_cast<int>(token.type)));
922 }
923 }
924
925 // Create Terminator
926 if (block_info.successors.empty()) {
927 builder.CreateBr(exit_bb);
928 } else if (block_info.successors.size() == 1) {
929 builder.CreateBr(llvm_blocks[block_info.successors[0]]);
930 } else { // size is 2, from a JUMP
931 llvm::Value* cond_val = rpn_stack.back();
932 llvm::Value* cond = builder.CreateFCmpOGT(
933 cond_val, llvm::ConstantFP::get(float_ty, 0.0));
934 builder.CreateCondBr(cond, llvm_blocks[block_info.successors[0]],
935 llvm_blocks[block_info.successors[1]]);
936 rpn_stack.pop_back();
937 }
938
939 block_final_stacks[i] = rpn_stack;
940 }
941
942 // Populate PHI nodes
943 for (int i = 0; i < static_cast<int>(cfg_blocks.size()); ++i) {
944 if (cfg_blocks[i].predecessors.size() > 1) {
945 auto& phis = block_initial_stacks.at(i);
946 for (int pred_idx : cfg_blocks[i].predecessors) {
947 auto& incoming_stack = block_final_stacks.at(pred_idx);
948 auto* incoming_block = llvm_blocks.at(pred_idx);
949 for (size_t j = 0; j < phis.size(); ++j) {
950 if (j < incoming_stack.size()) {
951 llvm::cast<llvm::PHINode>(phis[j])->addIncoming(
952 incoming_stack[j], incoming_block);
953 }
954 }
955 }
956 }
957 }
958
959 // Final Result PHI
960 builder.SetInsertPoint(exit_bb);
961 std::vector<std::pair<llvm::Value*, llvm::BasicBlock*>> final_values;
962 for (int i = 0; i < static_cast<int>(cfg_blocks.size()); ++i) {
963 if (cfg_blocks[i].successors.empty()) {
964 auto& stack = block_final_stacks.at(i);
965 if (!stack.empty()) {
966 final_values.emplace_back(stack.back(), llvm_blocks.at(i));
967 }
968 }
969 }
970
971 llvm::Value* result_val = nullptr;
972 if (final_values.empty()) {
973 result_val = llvm::UndefValue::get(float_ty);
974 } else if (final_values.size() == 1) {
975 result_val = final_values[0].first;
976 } else {
977 llvm::PHINode* phi =
978 builder.CreatePHI(float_ty, final_values.size(), "result_phi");
979 for (const auto& pair : final_values) {
980 phi->addIncoming(pair.first, pair.second);
981 }
982 result_val = phi;
983 }
984
985 // Let derived class handle exit logic (if any) and final store
986 finalizeAndStoreResult(result_val, x, y);
987}
virtual bool processModeSpecificToken(const Token &token, std::vector< llvm::Value * > &rpn_stack, llvm::Value *x, llvm::Value *y, llvm::Value *x_fp, llvm::Value *y_fp, bool no_x_bounds_check)=0
llvm::AllocaInst * createAllocaInEntry(llvm::Type *type, const std::string &name)
virtual void finalizeAndStoreResult(llvm::Value *result_val, llvm::Value *x, llvm::Value *y)=0
void generatePixelStore(llvm::Value *value_to_store, llvm::Value *x, llvm::Value *y)
bool processCommonToken(const Token &token, std::vector< llvm::Value * > &rpn_stack, llvm::Type *float_ty, llvm::Type *i32_ty, bool use_approx_math)

References analysis_results, approx_math, builder, context, createAllocaInEntry(), finalizeAndStoreResult(), generatePixelStore(), processCommonToken(), processModeSpecificToken(), tokens, VarLoad, and VarStore.

Referenced by SingleExprIRGenerator::generateLoops().

◆ generateLoadFromRowPtr()

llvm::Value * IRGeneratorBase::generateLoadFromRowPtr ( llvm::Value * row_ptr,
int clip_idx,
llvm::Value * x,
int rel_x,
bool use_mirror,
bool no_x_bounds_check )
protected

Definition at line 118 of file IRGeneratorBase.cpp.

122 {
123 const VSVideoInfo* vinfo = vi[clip_idx];
124 llvm::Value* coord_x = builder.CreateAdd(x, builder.getInt32(rel_x));
125 llvm::Value* final_x = nullptr;
126 if (no_x_bounds_check) {
127 final_x = coord_x;
128 } else {
129 final_x = getFinalCoord(coord_x, builder.getInt32(width), use_mirror);
130 }
131
132 const VSVideoFormat& format = vinfo->format;
133 int bpp = format.bytesPerSample;
134 int vs_clip_idx = clip_idx + 1;
135
136 llvm::Value* x_offset = builder.CreateMul(final_x, builder.getInt32(bpp));
137 llvm::Value* pixel_addr =
138 builder.CreateGEP(builder.getInt8Ty(), row_ptr, x_offset);
139
140 unsigned pixel_align = std::gcd(ALIGNMENT, bpp);
141 assumeAligned(pixel_addr, pixel_align);
142
143 llvm::Value* loaded_val = nullptr;
144 if (format.sampleType == stInteger) {
145 llvm::Type* load_type = nullptr;
146 if (bpp == 1) {
147 load_type = builder.getInt8Ty();
148 } else if (bpp == 2) {
149 load_type = builder.getInt16Ty();
150 } else {
151 load_type = builder.getInt32Ty();
152 }
153 llvm::LoadInst* li = builder.CreateLoad(load_type, pixel_addr);
154 setMemoryInstAttrs(li, pixel_align, vs_clip_idx);
155 loaded_val = builder.CreateZExtOrBitCast(li, builder.getInt32Ty());
156 return builder.CreateUIToFP(loaded_val, builder.getFloatTy());
157 }
158 // stFloat
159 if (bpp == 4) {
160 llvm::LoadInst* li =
161 builder.CreateLoad(builder.getFloatTy(), pixel_addr);
162 setMemoryInstAttrs(li, pixel_align, vs_clip_idx);
163 return li;
164 }
165 if (bpp == 2) {
166 llvm::LoadInst* li =
167 builder.CreateLoad(builder.getHalfTy(), pixel_addr);
168 setMemoryInstAttrs(li, pixel_align, vs_clip_idx);
169 return builder.CreateFPExt(li, builder.getFloatTy());
170 }
171 throw std::runtime_error("Unsupported float sample size.");
172}
constexpr unsigned ALIGNMENT
llvm::Value * getFinalCoord(llvm::Value *coord, llvm::Value *max_dim, bool use_mirror)
void setMemoryInstAttrs(MemInstT *inst, unsigned alignment, int rwptr_index)
void assumeAligned(llvm::Value *ptr_value, unsigned alignment)

References ALIGNMENT, assumeAligned(), builder, getFinalCoord(), setMemoryInstAttrs(), vi, and width.

Referenced by generatePixelLoad(), and ExprIRGenerator::processModeSpecificToken().

◆ generateLoops()

virtual void IRGeneratorBase::generateLoops ( )
protectedpure virtual

Implemented in ExprIRGenerator, and SingleExprIRGenerator.

Referenced by generate().

◆ generatePixelLoad()

llvm::Value * IRGeneratorBase::generatePixelLoad ( int clip_idx,
llvm::Value * x,
llvm::Value * y,
bool mirror )
protected

Definition at line 217 of file IRGeneratorBase.cpp.

218 {
219 llvm::Value* final_x = getFinalCoord(x, builder.getInt32(width), mirror);
220 llvm::Value* final_y = getFinalCoord(y, builder.getInt32(height), mirror);
221
222 int vs_clip_idx = clip_idx + 1;
223 llvm::Value* base_ptr = preloaded_base_ptrs[vs_clip_idx];
224 llvm::Value* stride = preloaded_strides[vs_clip_idx];
225
226 llvm::Value* y_offset = builder.CreateMul(final_y, stride);
227 llvm::Value* row_ptr =
228 builder.CreateGEP(builder.getInt8Ty(), base_ptr, y_offset);
229
230 return generateLoadFromRowPtr(row_ptr, clip_idx, final_x, 0, mirror, true);
231}
llvm::Value * generateLoadFromRowPtr(llvm::Value *row_ptr, int clip_idx, llvm::Value *x, int rel_x, bool use_mirror, bool no_x_bounds_check)
std::vector< llvm::Value * > preloaded_base_ptrs
std::vector< llvm::Value * > preloaded_strides

References builder, generateLoadFromRowPtr(), getFinalCoord(), height, preloaded_base_ptrs, preloaded_strides, and width.

Referenced by ExprIRGenerator::processModeSpecificToken().

◆ generatePixelStore()

void IRGeneratorBase::generatePixelStore ( llvm::Value * value_to_store,
llvm::Value * x,
llvm::Value * y )
protected

Definition at line 233 of file IRGeneratorBase.cpp.

234 {
235 const VSVideoFormat& format = vo->format;
236 int bpp = format.bytesPerSample;
237 constexpr int DST_IDX = 0;
238
239 llvm::Value* base_ptr = preloaded_base_ptrs[DST_IDX];
240 llvm::Value* stride = preloaded_strides[DST_IDX];
241
242 llvm::Value* y_offset = builder.CreateMul(y, stride);
243 llvm::Value* x_offset = builder.CreateMul(x, builder.getInt32(bpp));
244 llvm::Value* total_offset = builder.CreateAdd(y_offset, x_offset);
245 llvm::Value* pixel_addr =
246 builder.CreateGEP(builder.getInt8Ty(), base_ptr, total_offset);
247
248 unsigned pixel_align = std::gcd(ALIGNMENT, bpp);
249 assumeAligned(pixel_addr, pixel_align);
250
251 llvm::Value* final_val = nullptr;
252 if (format.sampleType == stInteger) {
253 int max_val = (1 << format.bitsPerSample) - 1;
254 llvm::Value* zero_f = llvm::ConstantFP::get(builder.getFloatTy(), 0.0);
255 llvm::Value* max_f = llvm::ConstantFP::get(
256 builder.getFloatTy(), static_cast<double>(max_val));
257
258 llvm::Value* temp = createIntrinsicCall(llvm::Intrinsic::maxnum,
259 value_to_store, zero_f);
260 llvm::Value* clamped_f =
261 createIntrinsicCall(llvm::Intrinsic::minnum, temp, max_f);
262
263 llvm::Value* rounded_f =
264 createIntrinsicCall(llvm::Intrinsic::roundeven, clamped_f);
265
266 llvm::Type* store_type = nullptr;
267 if (bpp == 1) {
268 store_type = builder.getInt8Ty();
269 } else if (bpp == 2) {
270 store_type = builder.getInt16Ty();
271 } else {
272 store_type = builder.getInt32Ty();
273 }
274 final_val = builder.CreateFPToUI(rounded_f, store_type);
275 llvm::StoreInst* si = builder.CreateStore(final_val, pixel_addr);
276 setMemoryInstAttrs(si, pixel_align, DST_IDX);
277 } else {
278 if (bpp == 4) {
279 llvm::StoreInst* si =
280 builder.CreateStore(value_to_store, pixel_addr);
281 setMemoryInstAttrs(si, pixel_align, DST_IDX);
282 } else if (bpp == 2) {
283 llvm::Value* truncated_val =
284 builder.CreateFPTrunc(value_to_store, builder.getHalfTy());
285 llvm::StoreInst* si =
286 builder.CreateStore(truncated_val, pixel_addr);
287 setMemoryInstAttrs(si, pixel_align, DST_IDX);
288 } else {
289 throw std::runtime_error("Unsupported float sample size.");
290 }
291 }
292}
llvm::Value * createIntrinsicCall(llvm::Intrinsic::ID intrinsic_id, Args... args)

References ALIGNMENT, assumeAligned(), builder, createIntrinsicCall(), preloaded_base_ptrs, preloaded_strides, setMemoryInstAttrs(), and vo.

Referenced by ExprIRGenerator::finalizeAndStoreResult(), generateIRFromTokens(), and ExprIRGenerator::processModeSpecificToken().

◆ getFinalCoord()

llvm::Value * IRGeneratorBase::getFinalCoord ( llvm::Value * coord,
llvm::Value * max_dim,
bool use_mirror )
protected

Definition at line 79 of file IRGeneratorBase.cpp.

81 {
82 llvm::Value* zero = builder.getInt32(0);
83 llvm::Value* one = builder.getInt32(1);
84
85 llvm::Value* result = nullptr;
86 if (use_mirror) {
87 auto* period = builder.CreateMul(max_dim, builder.getInt32(2));
88
89 auto* modulo_coord = builder.CreateSRem(coord, period);
90
91 auto* is_negative = builder.CreateICmpSLT(modulo_coord, zero);
92 auto* adjusted_modulo = builder.CreateAdd(modulo_coord, period);
93 modulo_coord =
94 builder.CreateSelect(is_negative, adjusted_modulo, modulo_coord);
95
96 auto* in_first_half = builder.CreateICmpSLT(modulo_coord, max_dim);
97 auto* period_minus_1 = builder.CreateSub(period, one);
98 auto* mirrored_coord = builder.CreateSub(period_minus_1, modulo_coord);
99
100 result =
101 builder.CreateSelect(in_first_half, modulo_coord, mirrored_coord);
102 } else { // Clamping
103 // clamp(coord, 0, max_dim - 1)
104 auto* dim_minus_1 = builder.CreateSub(max_dim, one);
105
106 llvm::Function* smax_func = llvm::Intrinsic::getOrInsertDeclaration(
107 &module, llvm::Intrinsic::smax, {builder.getInt32Ty()});
108 llvm::Function* smin_func = llvm::Intrinsic::getOrInsertDeclaration(
109 &module, llvm::Intrinsic::smin, {builder.getInt32Ty()});
110
111 auto* clamped_at_zero = builder.CreateCall(smax_func, {coord, zero});
112 result = builder.CreateCall(smin_func, {clamped_at_zero, dim_minus_1});
113 }
114
115 return result;
116}

References builder, and module.

Referenced by generateLoadFromRowPtr(), ExprIRGenerator::generateLoops(), and generatePixelLoad().

◆ operator=() [1/2]

IRGeneratorBase & IRGeneratorBase::operator= ( const IRGeneratorBase & )
delete

References IRGeneratorBase().

◆ operator=() [2/2]

IRGeneratorBase & IRGeneratorBase::operator= ( IRGeneratorBase && )
delete

References IRGeneratorBase().

◆ processCommonToken()

bool IRGeneratorBase::processCommonToken ( const Token & token,
std::vector< llvm::Value * > & rpn_stack,
llvm::Type * float_ty,
llvm::Type * i32_ty,
bool use_approx_math )
protected

Definition at line 294 of file IRGeneratorBase.cpp.

298 {
299 auto apply_stack_op = [&]<size_t ARITY>(auto&& op) {
300 std::array<llvm::Value*, ARITY> args{};
301 for (size_t i = ARITY; i > 0; --i) {
302 args.at(i - 1) = rpn_stack.back();
303 rpn_stack.pop_back();
304 }
305 rpn_stack.push_back(std::apply(op, args));
306 };
307
308 auto apply_intrinsic = [&]<size_t ARITY>(llvm::Intrinsic::ID id) {
309 apply_stack_op.operator()<ARITY>(
310 [&](auto... args) { return createIntrinsicCall(id, args...); });
311 };
312
313 auto apply_binary_op = [&](auto op_callable) {
314 apply_stack_op.operator()<2>(
315 [&](auto a, auto b) { return op_callable(a, b); });
316 };
317
318 auto apply_binary_cmp = [&](llvm::CmpInst::Predicate pred) {
319 apply_stack_op.operator()<2>([&](auto a, auto b) {
320 auto cmp = builder.CreateFCmp(pred, a, b);
321 return builder.CreateSelect(cmp,
322 llvm::ConstantFP::get(float_ty, 1.0),
323 llvm::ConstantFP::get(float_ty, 0.0));
324 });
325 };
326
327 auto apply_logical_op = [&](auto op) {
328 apply_stack_op.operator()<2>([&](auto a_val, auto b_val) {
329 auto a_bool = builder.CreateFCmpOGT(
330 a_val, llvm::ConstantFP::get(float_ty, 0.0));
331 auto b_bool = builder.CreateFCmpOGT(
332 b_val, llvm::ConstantFP::get(float_ty, 0.0));
333 auto logic_res = op(a_bool, b_bool);
334 return builder.CreateSelect(logic_res,
335 llvm::ConstantFP::get(float_ty, 1.0),
336 llvm::ConstantFP::get(float_ty, 0.0));
337 });
338 };
339
340 auto apply_bitwise_op = [&](auto op) {
341 apply_stack_op.operator()<2>([&](auto a, auto b) {
342 auto a_rounded = createIntrinsicCall(llvm::Intrinsic::nearbyint, a);
343 auto b_rounded = createIntrinsicCall(llvm::Intrinsic::nearbyint, b);
344 auto ai = builder.CreateFPToSI(a_rounded, i32_ty);
345 auto bi = builder.CreateFPToSI(b_rounded, i32_ty);
346 auto resi = op(ai, bi);
347 return builder.CreateSIToFP(resi, float_ty);
348 });
349 };
350
351 auto apply_approx_math_op =
352 [&]<size_t ARITY>(MathOp math_op, llvm::Intrinsic::ID intrinsic_id) {
353 static_assert(ARITY == 1 || ARITY == 2,
354 "Only unary or binary operations supported");
355
356 std::array<llvm::Value*, ARITY> args{};
357 for (size_t i = 0; i < ARITY; ++i) {
358 args.at(ARITY - 1 - i) = rpn_stack.back();
359 rpn_stack.pop_back();
360 }
361
362 if (use_approx_math) {
363 auto* callee = math_manager.getFunction(math_op);
364 llvm::SmallVector<llvm::Value*, 2> call_args(args.begin(),
365 args.end());
366 auto* call = builder.CreateCall(callee, call_args);
367 call->setFastMathFlags(builder.getFastMathFlags());
368 rpn_stack.push_back(call);
369 } else {
370 rpn_stack.push_back(std::apply(
371 [&](auto... args) {
372 return createIntrinsicCall(intrinsic_id, args...);
373 },
374 args));
375 }
376 };
377
378 switch (token.type) {
379 case TokenType::Number: {
380 const auto& payload = std::get<TokenPayloadNumber>(token.payload);
381 rpn_stack.push_back(llvm::ConstantFP::get(float_ty, payload.value));
382 return true;
383 }
385 rpn_stack.push_back(
386 builder.CreateSIToFP(builder.getInt32(width), float_ty));
387 return true;
389 rpn_stack.push_back(
390 builder.CreateSIToFP(builder.getInt32(height), float_ty));
391 return true;
393 rpn_stack.push_back(builder.CreateLoad(
394 float_ty,
395 builder.CreateGEP(float_ty, props_arg, builder.getInt32(0))));
396 return true;
398 rpn_stack.push_back(llvm::ConstantFP::get(float_ty, std::numbers::pi));
399 return true;
400
401 // Binary Operators
402 case TokenType::Add:
403 apply_binary_op([&](llvm::Value* a, llvm::Value* b) {
404 return builder.CreateFAdd(a, b);
405 });
406 return true;
407 case TokenType::Sub:
408 apply_binary_op([&](llvm::Value* a, llvm::Value* b) {
409 return builder.CreateFSub(a, b);
410 });
411 return true;
412 case TokenType::Mul:
413 apply_binary_op([&](llvm::Value* a, llvm::Value* b) {
414 return builder.CreateFMul(a, b);
415 });
416 return true;
417 case TokenType::Div:
418 apply_binary_op([&](llvm::Value* a, llvm::Value* b) {
419 return builder.CreateFDiv(a, b);
420 });
421 return true;
422 case TokenType::Mod:
423 apply_binary_op([&](llvm::Value* a, llvm::Value* b) {
424 return builder.CreateFRem(a, b);
425 });
426 return true;
427 case TokenType::Pow:
428 apply_intrinsic.operator()<2>(llvm::Intrinsic::pow);
429 return true;
430 case TokenType::Atan2:
431 apply_approx_math_op.operator()<2>(MathOp::Atan2,
432 llvm::Intrinsic::atan2);
433 return true;
435 apply_intrinsic.operator()<2>(llvm::Intrinsic::copysign);
436 return true;
437 case TokenType::Min:
438 apply_intrinsic.operator()<2>(llvm::Intrinsic::minnum);
439 return true;
440 case TokenType::Max:
441 apply_intrinsic.operator()<2>(llvm::Intrinsic::maxnum);
442 return true;
443
444 // Binary comparisons
445 case TokenType::Gt:
446 apply_binary_cmp(llvm::CmpInst::FCMP_OGT);
447 return true;
448 case TokenType::Lt:
449 apply_binary_cmp(llvm::CmpInst::FCMP_OLT);
450 return true;
451 case TokenType::Ge:
452 apply_binary_cmp(llvm::CmpInst::FCMP_OGE);
453 return true;
454 case TokenType::Le:
455 apply_binary_cmp(llvm::CmpInst::FCMP_OLE);
456 return true;
457 case TokenType::Eq:
458 apply_binary_cmp(llvm::CmpInst::FCMP_OEQ);
459 return true;
460
461 // Logical ops
462 case TokenType::And:
463 apply_logical_op(
464 [&](auto a, auto b) { return builder.CreateAnd(a, b); });
465 return true;
466 case TokenType::Or:
467 apply_logical_op(
468 [&](auto a, auto b) { return builder.CreateOr(a, b); });
469 return true;
470 case TokenType::Xor:
471 apply_logical_op(
472 [&](auto a, auto b) { return builder.CreateXor(a, b); });
473 return true;
474
475 // Bitwise ops
477 apply_bitwise_op(
478 [&](auto a, auto b) { return builder.CreateAnd(a, b); });
479 return true;
480 case TokenType::Bitor:
481 apply_bitwise_op(
482 [&](auto a, auto b) { return builder.CreateOr(a, b); });
483 return true;
485 apply_bitwise_op(
486 [&](auto a, auto b) { return builder.CreateXor(a, b); });
487 return true;
488
489 // Unary Operators
490 case TokenType::Sqrt: {
491 auto* a = rpn_stack.back();
492 rpn_stack.pop_back();
493 auto* zero = llvm::ConstantFP::get(float_ty, 0.0);
494 auto* max_val = createIntrinsicCall(llvm::Intrinsic::maxnum, a, zero);
495 rpn_stack.push_back(
496 createIntrinsicCall(llvm::Intrinsic::sqrt, max_val));
497 return true;
498 }
499 case TokenType::Exp:
500 apply_approx_math_op.operator()<1>(MathOp::Exp, llvm::Intrinsic::exp);
501 return true;
502 case TokenType::Log:
503 apply_approx_math_op.operator()<1>(MathOp::Log, llvm::Intrinsic::log);
504 return true;
505 case TokenType::Abs:
506 apply_intrinsic.operator()<1>(llvm::Intrinsic::fabs);
507 return true;
508 case TokenType::Floor:
509 apply_intrinsic.operator()<1>(llvm::Intrinsic::floor);
510 return true;
511 case TokenType::Ceil:
512 apply_intrinsic.operator()<1>(llvm::Intrinsic::ceil);
513 return true;
514 case TokenType::Trunc:
515 apply_intrinsic.operator()<1>(llvm::Intrinsic::trunc);
516 return true;
517 case TokenType::Round:
518 apply_intrinsic.operator()<1>(llvm::Intrinsic::round);
519 return true;
520 case TokenType::Sin:
521 apply_approx_math_op.operator()<1>(MathOp::Sin, llvm::Intrinsic::sin);
522 return true;
523 case TokenType::Cos:
524 apply_approx_math_op.operator()<1>(MathOp::Cos, llvm::Intrinsic::cos);
525 return true;
526 case TokenType::Tan:
527 apply_approx_math_op.operator()<1>(MathOp::Tan, llvm::Intrinsic::tan);
528 return true;
529 case TokenType::Asin:
530 apply_approx_math_op.operator()<1>(MathOp::Asin, llvm::Intrinsic::asin);
531 return true;
532 case TokenType::Acos:
533 apply_approx_math_op.operator()<1>(MathOp::Acos, llvm::Intrinsic::acos);
534 return true;
535 case TokenType::Atan:
536 apply_approx_math_op.operator()<1>(MathOp::Atan, llvm::Intrinsic::atan);
537 return true;
538 case TokenType::Exp2:
539 apply_intrinsic.operator()<1>(llvm::Intrinsic::exp2);
540 return true;
541 case TokenType::Log10:
542 apply_intrinsic.operator()<1>(llvm::Intrinsic::log10);
543 return true;
544 case TokenType::Log2:
545 apply_intrinsic.operator()<1>(llvm::Intrinsic::log2);
546 return true;
547 case TokenType::Sinh:
548 apply_intrinsic.operator()<1>(llvm::Intrinsic::sinh);
549 return true;
550 case TokenType::Cosh:
551 apply_intrinsic.operator()<1>(llvm::Intrinsic::cosh);
552 return true;
553 case TokenType::Tanh:
554 apply_intrinsic.operator()<1>(llvm::Intrinsic::tanh);
555 return true;
556 case TokenType::Sgn: {
557 auto* x = rpn_stack.back();
558 rpn_stack.pop_back();
559 auto* zero = llvm::ConstantFP::get(float_ty, 0.0);
560 auto* one = llvm::ConstantFP::get(float_ty, 1.0);
561 auto* nonzero = builder.CreateFCmpONE(x, zero);
562 auto* sign = builder.CreateCall(
563 llvm::Intrinsic::getOrInsertDeclaration(
564 &module, llvm::Intrinsic::copysign, {float_ty}),
565 {one, x});
566 rpn_stack.push_back(builder.CreateSelect(nonzero, sign, zero));
567 return true;
568 }
569 case TokenType::Neg: {
570 auto* a = rpn_stack.back();
571 rpn_stack.pop_back();
572 rpn_stack.push_back(builder.CreateFNeg(a));
573 return true;
574 }
575 case TokenType::Not: {
576 auto* a = rpn_stack.back();
577 rpn_stack.pop_back();
578 rpn_stack.push_back(builder.CreateSelect(
579 builder.CreateFCmpOLE(a, llvm::ConstantFP::get(float_ty, 0.0)),
580 llvm::ConstantFP::get(float_ty, 1.0),
581 llvm::ConstantFP::get(float_ty, 0.0)));
582 return true;
583 }
584 case TokenType::Bitnot: {
585 auto* a = rpn_stack.back();
586 rpn_stack.pop_back();
587 auto* a_rounded = createIntrinsicCall(llvm::Intrinsic::nearbyint, a);
588 rpn_stack.push_back(builder.CreateSIToFP(
589 builder.CreateNot(builder.CreateFPToSI(a_rounded, i32_ty)),
590 float_ty));
591 return true;
592 }
593
594 // Ternary and other multi-arg
595 case TokenType::Ternary: {
596 auto* c = rpn_stack.back();
597 rpn_stack.pop_back();
598 auto* b = rpn_stack.back();
599 rpn_stack.pop_back();
600 auto* a = rpn_stack.back();
601 rpn_stack.pop_back();
602 rpn_stack.push_back(builder.CreateSelect(
603 builder.CreateFCmpOGT(a, llvm::ConstantFP::get(float_ty, 0.0)), b,
604 c));
605 return true;
606 }
607 case TokenType::Clip:
608 case TokenType::Clamp: {
609 auto* max_val = rpn_stack.back();
610 rpn_stack.pop_back();
611 auto* min_val = rpn_stack.back();
612 rpn_stack.pop_back();
613 auto* val = rpn_stack.back();
614 rpn_stack.pop_back();
615 auto* temp = createIntrinsicCall(llvm::Intrinsic::maxnum, val, min_val);
616 auto* clamped =
617 createIntrinsicCall(llvm::Intrinsic::minnum, temp, max_val);
618 rpn_stack.push_back(clamped);
619 return true;
620 }
621 case TokenType::Fma: {
622 auto* c = rpn_stack.back();
623 rpn_stack.pop_back();
624 auto* b = rpn_stack.back();
625 rpn_stack.pop_back();
626 auto* a = rpn_stack.back();
627 rpn_stack.pop_back();
628 rpn_stack.push_back(builder.CreateCall(
629 llvm::Intrinsic::getOrInsertDeclaration(
630 &module, llvm::Intrinsic::fma, {builder.getFloatTy()}),
631 {a, b, c}));
632 return true;
633 }
634
635 // Stack manipulation
636 case TokenType::Dup: {
637 const auto& payload = std::get<TokenPayloadStackOp>(token.payload);
638 rpn_stack.push_back(rpn_stack[rpn_stack.size() - 1 - payload.n]);
639 return true;
640 }
641 case TokenType::Drop: {
642 const auto& payload = std::get<TokenPayloadStackOp>(token.payload);
643 if (payload.n > 0) {
644 rpn_stack.resize(rpn_stack.size() - payload.n);
645 }
646 return true;
647 }
648 case TokenType::Swap: {
649 const auto& payload = std::get<TokenPayloadStackOp>(token.payload);
650 std::swap(rpn_stack.back(),
651 rpn_stack[rpn_stack.size() - 1 - payload.n]);
652 return true;
653 }
654 case TokenType::SortN: {
655 const auto& payload = std::get<TokenPayloadStackOp>(token.payload);
656 int n = payload.n;
657 if (n < 2) {
658 return true;
659 }
660
661 std::vector<llvm::Value*> values;
662 values.reserve(n);
663 for (int k = 0; k < n; ++k) {
664 values.push_back(rpn_stack.back());
665 rpn_stack.pop_back();
666 }
667
668 auto compare_swap = [&](int i_idx, int j_idx) {
669 llvm::Value* val_i = values[i_idx];
670 llvm::Value* val_j = values[j_idx];
671 llvm::Value* cond = builder.CreateFCmpOGT(val_i, val_j);
672 values[i_idx] = builder.CreateSelect(cond, val_j, val_i); // min
673 values[j_idx] = builder.CreateSelect(cond, val_i, val_j); // max
674 };
675
676 auto network = get_sorting_network(n);
677 for (const auto& pair : network) {
678 compare_swap(pair.first, pair.second);
679 }
680
681 for (int k = n - 1; k >= 0; --k) {
682 rpn_stack.push_back(values[k]);
683 }
684 return true;
685 }
687 case TokenType::ArgmaxN: {
688 const auto& payload = std::get<TokenPayloadStackOp>(token.payload);
689 int n = payload.n;
690 if (n < 1) {
691 rpn_stack.push_back(
692 llvm::ConstantFP::get(builder.getFloatTy(), 0.0));
693 return true;
694 }
695
696 std::vector<llvm::Value*> values(n);
697 for (int i = 0; i < n; ++i) {
698 values[i] = rpn_stack.back();
699 rpn_stack.pop_back();
700 }
701
702 struct Node {
703 llvm::Value* val;
704 llvm::Value* idx;
705 };
706 std::vector<Node> current_level;
707 current_level.reserve(n);
708 for (int i = 0; i < n; ++i) {
709 current_level.push_back(
710 {values[i],
711 llvm::ConstantFP::get(builder.getFloatTy(),
712 static_cast<double>(n - 1 - i))});
713 }
714
715 bool is_max = (token.type == TokenType::ArgmaxN);
716
717 while (current_level.size() > 1) {
718 std::vector<Node> next_level;
719 for (size_t i = 0; i < current_level.size(); i += 2) {
720 if (i + 1 < current_level.size()) {
721 const auto& left = current_level[i];
722 const auto& right = current_level[i + 1];
723
724 llvm::Value* cmp_val =
725 is_max ? builder.CreateFCmpOGT(left.val, right.val)
726 : builder.CreateFCmpOLT(left.val, right.val);
727
728 llvm::Value* eq_val =
729 builder.CreateFCmpOEQ(left.val, right.val);
730 llvm::Value* cmp_idx =
731 builder.CreateFCmpOLT(left.idx, right.idx);
732 llvm::Value* tie_break = builder.CreateAnd(eq_val, cmp_idx);
733 llvm::Value* cond = builder.CreateOr(cmp_val, tie_break);
734
735 next_level.push_back(
736 {builder.CreateSelect(cond, left.val, right.val),
737 builder.CreateSelect(cond, left.idx, right.idx)});
738 } else {
739 next_level.push_back(current_level[i]);
740 }
741 }
742 current_level = std::move(next_level);
743 }
744 rpn_stack.push_back(current_level[0].idx);
745 return true;
746 }
747 case TokenType::ArgsortN: {
748 const auto& payload = std::get<TokenPayloadStackOp>(token.payload);
749 int n = payload.n;
750 if (n < 1) {
751 return true;
752 }
753 if (n == 1) {
754 rpn_stack.pop_back();
755 rpn_stack.push_back(
756 llvm::ConstantFP::get(builder.getFloatTy(), 0.0));
757 return true;
758 }
759
760 std::vector<llvm::Value*> values(n);
761 std::vector<llvm::Value*> indices(n);
762 for (int i = 0; i < n; ++i) {
763 values[i] = rpn_stack.back();
764 rpn_stack.pop_back();
765 indices[i] = llvm::ConstantFP::get(builder.getFloatTy(),
766 static_cast<double>(n - 1 - i));
767 }
768
769 auto network = get_sorting_network(n);
770 for (const auto& pair : network) {
771 int i1 = pair.first;
772 int i2 = pair.second;
773
774 llvm::Value* v1 = values[i1];
775 llvm::Value* v2 = values[i2];
776 llvm::Value* idx1 = indices[i1];
777 llvm::Value* idx2 = indices[i2];
778
779 llvm::Value* cmp_val = builder.CreateFCmpOGT(v1, v2);
780 llvm::Value* eq_val = builder.CreateFCmpOEQ(v1, v2);
781 llvm::Value* cmp_idx = builder.CreateFCmpOGT(idx1, idx2);
782 llvm::Value* tie_break = builder.CreateAnd(eq_val, cmp_idx);
783 llvm::Value* cond = builder.CreateOr(cmp_val, tie_break);
784
785 values[i1] = builder.CreateSelect(cond, v2, v1);
786 values[i2] = builder.CreateSelect(cond, v1, v2);
787 indices[i1] = builder.CreateSelect(cond, idx2, idx1);
788 indices[i2] = builder.CreateSelect(cond, idx1, idx2);
789 }
790
791 for (int i = n - 1; i >= 0; --i) {
792 rpn_stack.push_back(indices[i]);
793 }
794 return true;
795 }
796
797 // Control Flow (no-op during this pass)
799 case TokenType::Jump:
800 return true;
801
802 default:
803 // Not a common token - let derived class handle it
804 return false;
805 }
806}
MathOp
Definition Math.hpp:69
@ Sin
Definition Math.hpp:72
@ Tan
Definition Math.hpp:74
@ Atan2
Definition Math.hpp:76
@ Asin
Definition Math.hpp:78
@ Atan
Definition Math.hpp:75
@ Exp
Definition Math.hpp:70
@ Log
Definition Math.hpp:71
@ Acos
Definition Math.hpp:77
@ Cos
Definition Math.hpp:73
constexpr std::vector< std::pair< int, int > > get_sorting_network(int n)
Definition Sorting.hpp:2149
TokenType type
PayloadVariant payload

References Abs, Acos, Add, And, ArgmaxN, ArgminN, ArgsortN, Asin, Atan, Atan2, Bitand, Bitnot, Bitor, Bitxor, builder, Ceil, Clamp, Clip, ConstantHeight, ConstantN, ConstantPi, ConstantWidth, Copysign, Cos, Cosh, createIntrinsicCall(), Div, Drop, Dup, Eq, Exp, Exp2, Floor, Fma, Ge, get_sorting_network(), Gt, height, Jump, LabelDef, Le, Log, Log10, Log2, Lt, math_manager, Max, Min, Mod, module, Mul, Neg, Not, Number, Or, Token::payload, Pow, props_arg, Round, Sgn, Sin, Sinh, SortN, Sqrt, Sub, Swap, Tan, Tanh, Ternary, Trunc, Token::type, width, and Xor.

Referenced by generateIRFromTokens().

◆ processModeSpecificToken()

virtual bool IRGeneratorBase::processModeSpecificToken ( const Token & token,
std::vector< llvm::Value * > & rpn_stack,
llvm::Value * x,
llvm::Value * y,
llvm::Value * x_fp,
llvm::Value * y_fp,
bool no_x_bounds_check )
protectedpure virtual

Implemented in ExprIRGenerator, and SingleExprIRGenerator.

Referenced by generateIRFromTokens().

◆ setMemoryInstAttrs()

template<typename MemInstT>
void IRGeneratorBase::setMemoryInstAttrs ( MemInstT * inst,
unsigned alignment,
int rwptr_index )
protected

Definition at line 161 of file IRGeneratorBase.hpp.

162 {
163 inst->setAlignment(llvm::Align(alignment));
164 inst->setMetadata(llvm::LLVMContext::MD_alias_scope,
165 alias_scope_lists[rwptr_index]);
166 inst->setMetadata(llvm::LLVMContext::MD_noalias,
167 noalias_scope_lists[rwptr_index]);
168}
std::vector< llvm::MDNode * > alias_scope_lists
std::vector< llvm::MDNode * > noalias_scope_lists

References alias_scope_lists, and noalias_scope_lists.

Referenced by generateLoadFromRowPtr(), and generatePixelStore().

Member Data Documentation

◆ alias_scope_domain

llvm::MDNode* IRGeneratorBase::alias_scope_domain
protected

Definition at line 87 of file IRGeneratorBase.hpp.

Referenced by ExprIRGenerator::generateLoops(), and IRGeneratorBase().

◆ alias_scope_lists

std::vector<llvm::MDNode*> IRGeneratorBase::alias_scope_lists
protected

Definition at line 89 of file IRGeneratorBase.hpp.

Referenced by ExprIRGenerator::generateLoops(), and setMemoryInstAttrs().

◆ alias_scopes

std::vector<llvm::MDNode*> IRGeneratorBase::alias_scopes
protected

Definition at line 88 of file IRGeneratorBase.hpp.

Referenced by ExprIRGenerator::generateLoops().

◆ analysis_results

const analysis::ExpressionAnalysisResults& IRGeneratorBase::analysis_results
protected

◆ approx_math

int IRGeneratorBase::approx_math
protected

Definition at line 72 of file IRGeneratorBase.hpp.

Referenced by generateIRFromTokens(), and IRGeneratorBase().

◆ builder

◆ context

◆ func

◆ func_name

std::string IRGeneratorBase::func_name
protected

◆ height

◆ math_manager

MathLibraryManager& IRGeneratorBase::math_manager
protected

Definition at line 77 of file IRGeneratorBase.hpp.

Referenced by IRGeneratorBase(), and processCommonToken().

◆ mirror_boundary

bool IRGeneratorBase::mirror_boundary
protected

◆ module

◆ noalias_scope_lists

std::vector<llvm::MDNode*> IRGeneratorBase::noalias_scope_lists
protected

Definition at line 90 of file IRGeneratorBase.hpp.

Referenced by ExprIRGenerator::generateLoops(), and setMemoryInstAttrs().

◆ num_inputs

int IRGeneratorBase::num_inputs
protected

◆ preloaded_base_ptrs

std::vector<llvm::Value*> IRGeneratorBase::preloaded_base_ptrs
protected

◆ preloaded_strides

std::vector<llvm::Value*> IRGeneratorBase::preloaded_strides
protected

◆ prop_map

const std::map<std::pair<int, std::string>, int>& IRGeneratorBase::prop_map
protected

◆ props_arg

◆ row_ptr_cache

std::map<analysis::RelYAccess, llvm::Value*> IRGeneratorBase::row_ptr_cache
protected

◆ rwptrs_arg

◆ strides_arg

◆ tokens

const std::vector<Token>& IRGeneratorBase::tokens
protected

◆ vi

const std::vector<const VSVideoInfo*>& IRGeneratorBase::vi
protected

◆ vo

const VSVideoInfo* IRGeneratorBase::vo
protected

◆ width


The documentation for this class was generated from the following files: