VapourSynth-llvmexpr
Loading...
Searching...
No Matches
llvmexpr.cpp
Go to the documentation of this file.
1
19
20#include <array>
21#include <bit>
22#include <chrono>
23#include <cmath>
24#include <cstdint>
25#include <cstring>
26#include <format>
27#include <fstream>
28#include <limits>
29#include <map>
30#include <memory>
31#include <mutex>
32#include <stdexcept>
33#include <string>
34#include <unordered_map>
35#include <utility>
36#include <vector>
37
38#include "VSHelper4.h"
39#include "VapourSynth4.h"
40
49#include "runtime/llvm/Jit.hpp"
51
52constexpr uint32_t PROP_READ_NAN_PAYLOAD =
53 0x7FC0BEEF; // qNaN with payload 0xBEEF
54constexpr uint32_t PROP_WRITE_NAN_PAYLOAD =
55 0x7FC0DEAD; // qNaN with payload 0xDEAD
56constexpr uint32_t PROP_DELETE_NAN_PAYLOAD =
57 0x7FC0DE1E; // qNaN with payload DE1E
58
59namespace {
60
61enum class PlaneOp : std::uint8_t { PoProcess, PoCopy };
62
63struct BaseExprData {
64 std::vector<VSNode*> nodes;
65 VSVideoInfo vi = {};
66 int num_inputs = 0;
67 bool mirror_boundary = false;
68 std::string dump_ir_path;
69 int opt_level = 5; // NOLINT(cppcoreguidelines-avoid-magic-numbers)
70 int approx_math = 2;
71 std::vector<std::pair<int, std::string>> required_props;
72 std::map<std::pair<int, std::string>, int> prop_map;
73};
74
75struct ExprData : BaseExprData {
76 std::array<PlaneOp, 3> plane_op = {};
77 std::array<CompiledFunction, 3> compiled;
78 std::array<std::vector<Token>, 3> tokens;
79 std::array<std::unique_ptr<analysis::AnalysisManager>, 3> analysis_managers;
80 int tile_x = 0;
81 int tile_y = 0;
82};
83
84struct SingleExprData : BaseExprData {
85 CompiledFunction compiled;
86 std::vector<std::pair<std::string, PropWriteType>> output_props;
87 std::map<std::string, int> output_prop_map;
88 std::vector<Token> tokens;
89 std::unique_ptr<analysis::AnalysisManager> analysis_manager;
90};
91
92struct SingleExprFrameData {
93 struct DynamicArray {
94 std::vector<float> buffer;
95 };
96 std::map<std::string, DynamicArray> dynamic_arrays;
97};
98
99thread_local SingleExprFrameData
100 g_frame_data; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
101
102std::unordered_map<std::string, std::pair<int, int>> expr_autotune_cache;
103std::mutex expr_autotune_cache_mutex;
104
105template <bool check_dimensions>
106void validate_and_init_clips(BaseExprData* d, const VSMap* in,
107 const VSAPI* vsapi) {
108 int err = 0;
109 d->num_inputs = vsapi->mapNumElements(in, "clips");
110 if (d->num_inputs == 0) {
111 throw std::runtime_error("At least one clip must be provided.");
112 }
113
114 d->nodes.resize(d->num_inputs);
115 for (int i = 0; i < d->num_inputs; ++i) {
116 d->nodes[i] = vsapi->mapGetNode(in, "clips", i, &err);
117 }
118
119 std::vector<const VSVideoInfo*> vi(d->num_inputs);
120 for (int i = 0; i < d->num_inputs; ++i) {
121 vi[i] = vsapi->getVideoInfo(d->nodes[i]);
122 if (!vsh::isConstantVideoFormat(vi[i])) {
123 throw std::runtime_error(
124 "Only constant format clips are supported.");
125 }
126 }
127
128 if constexpr (check_dimensions) {
129 for (int i = 1; i < d->num_inputs; ++i) {
130 if (vi[i]->width != vi[0]->width ||
131 vi[i]->height != vi[0]->height) {
132 throw std::runtime_error(
133 "All clips must have the same dimensions.");
134 }
135 }
136 }
137
138 d->vi = *vi[0];
139}
140
141void parse_format_param(BaseExprData* d, const VSMap* in, const VSAPI* vsapi,
142 VSCore* core) {
143 int err = 0;
144 const int format_id =
145 static_cast<int>(vsapi->mapGetInt(in, "format", 0, &err));
146 if (err == 0) {
147 VSVideoFormat temp_format;
148 if (vsapi->getVideoFormatByID(&temp_format, format_id, core) != 0) {
149 if (d->vi.format.numPlanes != temp_format.numPlanes) {
150 throw std::runtime_error("The number of planes in the "
151 "inputs and output must match.");
152 }
153 VSVideoFormat new_format;
154 if (vsapi->queryVideoFormat(&new_format, d->vi.format.colorFamily,
155 temp_format.sampleType,
156 temp_format.bitsPerSample,
157 d->vi.format.subSamplingW,
158 d->vi.format.subSamplingH, core) != 0) {
159 d->vi.format = new_format;
160 } else {
161 throw std::runtime_error("Failed to query new format.");
162 }
163 }
164 }
165}
166
167void parse_common_params(BaseExprData* d, const VSMap* in, const VSAPI* vsapi) {
168 int err = 0;
169
170 const char* dump_path = vsapi->mapGetData(in, "dump_ir", 0, &err);
171 if ((err == 0) && (dump_path != nullptr)) {
172 d->dump_ir_path = dump_path;
173 }
174
175 d->opt_level = static_cast<int>(vsapi->mapGetInt(in, "opt_level", 0, &err));
176 if (err != 0) {
177 d->opt_level = 5; // NOLINT(cppcoreguidelines-avoid-magic-numbers)
178 }
179 if (d->opt_level <= 0) {
180 throw std::runtime_error("opt_level must be greater than 0.");
181 }
182
183 d->approx_math =
184 static_cast<int>(vsapi->mapGetInt(in, "approx_math", 0, &err));
185 if (err != 0) {
186 d->approx_math = 2; // Default to auto mode
187 }
188 if (d->approx_math < 0 || d->approx_math > 2) {
189 throw std::runtime_error(
190 "approx_math must be 0 (disabled), 1 (enabled), or 2 (auto).");
191 }
192}
193
194void parse_expr_tiling_params(ExprData* d, const VSMap* in,
195 const VSAPI* vsapi) {
196 int err = 0;
197 d->tile_x = static_cast<int>(vsapi->mapGetInt(in, "tile_x", 0, &err));
198 if (err != 0) {
199 d->tile_x = -1;
200 }
201
202 d->tile_y = static_cast<int>(vsapi->mapGetInt(in, "tile_y", 0, &err));
203 if (err != 0) {
204 d->tile_y = -1;
205 }
206
207 if (d->tile_x < -1) {
208 throw std::runtime_error("tile_x must be -1 or >= 0.");
209 }
210 if (d->tile_y < -1) {
211 throw std::runtime_error("tile_y must be -1 or >= 0.");
212 }
213}
214
215void read_frame_properties(
216 std::vector<float>& props, const std::vector<const VSFrame*>& src_frames,
217 const std::vector<std::pair<int, std::string>>& required_props, int n,
218 const VSAPI* vsapi) {
219
220 props[0] = static_cast<float>(n);
221
222 for (size_t i = 0; i < required_props.size(); ++i) {
223 const auto& prop_info = required_props[i];
224 int clip_idx = prop_info.first;
225 const std::string& prop_name = prop_info.second;
226 int prop_array_idx = static_cast<int>(i) + 1;
227
228 const VSMap* props_map =
229 vsapi->getFramePropertiesRO(src_frames[clip_idx]);
230 int err = 0;
231 int type = vsapi->mapGetType(props_map, prop_name.c_str());
232
233 if (type == ptInt) {
234 props[prop_array_idx] = static_cast<float>(
235 vsapi->mapGetInt(props_map, prop_name.c_str(), 0, &err));
236 } else if (type == ptFloat) {
237 props[prop_array_idx] = static_cast<float>(
238 vsapi->mapGetFloat(props_map, prop_name.c_str(), 0, &err));
239 } else if (type == ptData) {
240 if (vsapi->mapGetDataSize(props_map, prop_name.c_str(), 0, &err) >
241 0 &&
242 (err == 0)) {
243 props[prop_array_idx] = static_cast<float>(
244 *vsapi->mapGetData(props_map, prop_name.c_str(), 0, &err));
245 } else {
246 err = 1;
247 }
248 } else {
249 err = 1;
250 }
251
252 if (err != 0) {
253 props[prop_array_idx] = std::bit_cast<float>(PROP_READ_NAN_PAYLOAD);
254 }
255 }
256}
257
258// NOLINTBEGIN(readability-identifier-naming)
259template <typename T>
260void genericFree(void* instanceData, [[maybe_unused]] VSCore* core,
261 const VSAPI* vsapi) {
262 // NOLINTEND(readability-identifier-naming)
263 std::unique_ptr<T> d(static_cast<T*>(instanceData));
264 for (auto* node : d->nodes) {
265 vsapi->freeNode(node);
266 }
267}
268
269std::string generate_cache_key(
270 const std::string& expr, const VSVideoInfo* vo, const VSAPI* vsapi,
271 const std::vector<const VSVideoInfo*>& vi, bool mirror,
272 const std::map<std::pair<int, std::string>, int>& prop_map, int plane_width,
273 int plane_height, const std::vector<std::string>& output_props = {},
274 int tile_x = 0, int tile_y = 0, int opt_level = 0, int approx_math = 0) {
275 auto get_vf_name = [&](const VSVideoFormat* vf) {
276 std::array<char, 32> // NOLINT(cppcoreguidelines-avoid-magic-numbers)
277 vf_name_buffer{};
278 if (!vsapi->getVideoFormatName(vf, vf_name_buffer.data())) {
279 throw std::runtime_error("Failed to get video format name");
280 }
281 return std::string(vf_name_buffer.data());
282 };
283 std::string result =
284 std::format("expr={}|mirror={}|out={}|w={}|h={}", expr, mirror,
285 get_vf_name(&vo->format), plane_width, plane_height);
286
287 for (size_t i = 0; i < vi.size(); ++i) {
288 result += std::format("|in{}={}|in{}_w={}|in{}_h={}", i,
289 get_vf_name(&vi[i]->format), i, vi[i]->width, i,
290 vi[i]->height);
291 }
292
293 for (const auto& [key, val] : prop_map) {
294 result += std::format("|prop{}={}.{}", val, key.first, key.second);
295 }
296
297 for (const auto& prop : output_props) {
298 result += std::format("|out_prop={}", prop);
299 }
300
301 result += std::format("|tile_x={}|tile_y={}", tile_x, tile_y);
302 result +=
303 std::format("|opt_level={}|approx_math={}", opt_level, approx_math);
304
305 return result;
306}
307
308// NOLINTBEGIN(readability-identifier-naming)
309const VSFrame*
310 VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
311 exprGetFrame(int n, int activationReason, void* instanceData,
312 [[maybe_unused]] void** frameData, VSFrameContext* frameCtx,
313 VSCore* core, const VSAPI* vsapi) {
314 // NOLINTEND(readability-identifier-naming)
315 auto* d = static_cast<ExprData*>(instanceData);
316
317 if (activationReason == arInitial) {
318 for (int i = 0; i < d->num_inputs; ++i) {
319 vsapi->requestFrameFilter(n, d->nodes[i], frameCtx);
320 }
321 } else if (activationReason == arAllFramesReady) {
322 std::vector<const VSFrame*> src_frames(d->num_inputs);
323 for (int i = 0; i < d->num_inputs; ++i) {
324 src_frames[i] = vsapi->getFrameFilter(n, d->nodes[i], frameCtx);
325 }
326
327 std::array<const VSFrame*, 3> plane_src = {
328 d->plane_op.at(0) == PlaneOp::PoCopy ? src_frames[0] : nullptr,
329 d->plane_op.at(1) == PlaneOp::PoCopy ? src_frames[0] : nullptr,
330 d->plane_op.at(2) == PlaneOp::PoCopy ? src_frames[0] : nullptr};
331 std::array<int, 3> planes = {0, 1, 2};
332 VSFrame* dst_frame = vsapi->newVideoFrame2(
333 &d->vi.format, d->vi.width, d->vi.height, plane_src.data(),
334 planes.data(), src_frames[0], core);
335
336 std::vector<uint8_t*> rwptrs(d->num_inputs + 1);
337 std::vector<int> strides(d->num_inputs + 1);
338 std::vector<float> props(1 + d->required_props.size());
339
340 read_frame_properties(props, src_frames, d->required_props, n, vsapi);
341
342 for (int plane = 0; plane < d->vi.format.numPlanes; ++plane) {
343 if (d->plane_op.at(plane) == PlaneOp::PoProcess) {
344 rwptrs[0] = vsapi->getWritePtr(dst_frame, plane);
345 strides[0] =
346 static_cast<int>(vsapi->getStride(dst_frame, plane));
347 for (int i = 0; i < d->num_inputs; ++i) {
348 rwptrs[i + 1] =
349 const_cast< // NOLINT(cppcoreguidelines-pro-type-const-cast)
350 uint8_t*>(vsapi->getReadPtr(src_frames[i], plane));
351 strides[i + 1] = static_cast<int>(
352 vsapi->getStride(src_frames[i], plane));
353 }
354
355 if (d->compiled.at(plane).func_ptr == nullptr) {
356 int width = vsapi->getFrameWidth(dst_frame, plane);
357 int height = vsapi->getFrameHeight(dst_frame, plane);
358
359 std::vector<const VSVideoInfo*> vi(d->num_inputs);
360 for (int i = 0; i < d->num_inputs; ++i) {
361 vi[i] = vsapi->getVideoInfo(d->nodes[i]);
362 }
363
364 std::string expr_str;
365 for (const auto& token : d->tokens.at(plane)) {
366 if (!expr_str.empty()) {
367 expr_str += " ";
368 }
369 expr_str += token.text;
370 }
371
372 auto get_or_compile = [&](int resolved_tile_x,
373 int resolved_tile_y) {
374 const std::string key = generate_cache_key(
375 expr_str, &d->vi, vsapi, vi, d->mirror_boundary,
376 d->prop_map, width, height, {}, resolved_tile_x,
377 resolved_tile_y, d->opt_level, d->approx_math);
378
379 std::lock_guard<std::mutex> lock(cache_mutex);
380 if (!jit_cache.contains(key)) {
381 size_t key_hash = std::hash<std::string>{}(key);
382 std::string func_name = std::format(
383 "process_plane_{}_{}", plane, key_hash);
384
385 try {
387 *d->analysis_managers.at(plane));
388 Compiler compiler(
389 std::vector<Token>(d->tokens.at(plane)),
390 &d->vi, vi, width, height,
391 d->mirror_boundary, d->dump_ir_path,
392 d->prop_map, func_name, d->opt_level,
393 d->approx_math, results, resolved_tile_x,
394 resolved_tile_y);
395 jit_cache[key] = compiler.compile();
396 } catch (...) {
397 for (const auto& frame : src_frames) {
398 vsapi->freeFrame(frame);
399 }
400 vsapi->freeFrame(dst_frame);
401 throw;
402 }
403 }
404 return jit_cache.at(key);
405 };
406
407 const bool auto_tile_x = d->tile_x == -1;
408 const bool auto_tile_y = d->tile_y == -1;
409
410 if (!auto_tile_x && !auto_tile_y) {
411 d->compiled.at(plane) =
412 get_or_compile(d->tile_x, d->tile_y);
413 } else {
414 const std::string autotune_key = generate_cache_key(
415 expr_str, &d->vi, vsapi, vi, d->mirror_boundary,
416 d->prop_map, width, height, {}, d->tile_x,
417 d->tile_y, d->opt_level, d->approx_math);
418
419 int best_tile_x = 0;
420 int best_tile_y = 0;
421 bool has_autotuned = false;
422 {
423 std::lock_guard<std::mutex> lock(
424 expr_autotune_cache_mutex);
425 auto it = expr_autotune_cache.find(autotune_key);
426 if (it != expr_autotune_cache.end()) {
427 best_tile_x = it->second.first;
428 best_tile_y = it->second.second;
429 has_autotuned = true;
430 }
431 }
432
433 if (!has_autotuned) {
434 constexpr std::array<int, 8> AUTO_TILE_CANDIDATES = {
435 1, 4, 8, 16, 32,
436 64, 128, 256}; // NOLINT(cppcoreguidelines-avoid-magic-numbers)
437
438 std::vector<std::pair<int, int>> candidates;
439 if (auto_tile_x && auto_tile_y) {
440 candidates.reserve(AUTO_TILE_CANDIDATES.size() *
441 AUTO_TILE_CANDIDATES.size());
442 for (int tx : AUTO_TILE_CANDIDATES) {
443 for (int ty : AUTO_TILE_CANDIDATES) {
444 candidates.emplace_back(tx, ty);
445 }
446 }
447 } else if (auto_tile_x) {
448 candidates.reserve(AUTO_TILE_CANDIDATES.size());
449 for (int tx : AUTO_TILE_CANDIDATES) {
450 candidates.emplace_back(tx, d->tile_y);
451 }
452 } else {
453 candidates.reserve(AUTO_TILE_CANDIDATES.size());
454 for (int ty : AUTO_TILE_CANDIDATES) {
455 candidates.emplace_back(d->tile_x, ty);
456 }
457 }
458
459 double best_time_ns =
460 std::numeric_limits<double>::max();
461 CompiledFunction best_compiled;
462
463 for (const auto& [candidate_tile_x,
464 candidate_tile_y] : candidates) {
465 CompiledFunction candidate = get_or_compile(
466 candidate_tile_x, candidate_tile_y);
467
468 // Warm-up once before measuring.
469 candidate.func_ptr(nullptr, rwptrs.data(),
470 strides.data(),
471 props.data());
472
473 constexpr int MEASURED_RUNS =
474 2; // NOLINT(cppcoreguidelines-avoid-magic-numbers)
475 const auto start =
476 std::chrono::steady_clock::now();
477 for (int run = 0; run < MEASURED_RUNS; ++run) {
478 candidate.func_ptr(nullptr, rwptrs.data(),
479 strides.data(),
480 props.data());
481 }
482 const auto end =
483 std::chrono::steady_clock::now();
484
485 const double avg_time_ns =
486 static_cast<double>(
487 std::chrono::duration_cast<
488 std::chrono::nanoseconds>(end -
489 start)
490 .count()) /
491 static_cast<double>(MEASURED_RUNS);
492 if (avg_time_ns < best_time_ns) {
493 best_time_ns = avg_time_ns;
494 best_tile_x = candidate_tile_x;
495 best_tile_y = candidate_tile_y;
496 best_compiled = candidate;
497 }
498 }
499
500 if (best_compiled.func_ptr == nullptr) {
501 throw std::runtime_error(
502 "Auto tile benchmark failed to select a "
503 "candidate.");
504 }
505
506 {
507 std::lock_guard<std::mutex> lock(
508 expr_autotune_cache_mutex);
509 expr_autotune_cache[autotune_key] = {
510 best_tile_x, best_tile_y};
511 }
512 d->compiled.at(plane) = best_compiled;
513 } else {
514 d->compiled.at(plane) =
515 get_or_compile(best_tile_x, best_tile_y);
516 }
517 }
518 }
519
520 d->compiled.at(plane).func_ptr(nullptr, rwptrs.data(),
521 strides.data(), props.data());
522 }
523 }
524
525 for (const auto& frame : src_frames) {
526 vsapi->freeFrame(frame);
527 }
528 return dst_frame;
529 }
530
531 return nullptr;
532}
533
534// NOLINTBEGIN(readability-identifier-naming)
535void VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
536exprFree(void* instanceData, [[maybe_unused]] VSCore* core,
537 const VSAPI* vsapi) {
538 // NOLINTEND(readability-identifier-naming)
539 genericFree<ExprData>(instanceData, core, vsapi);
540}
541
542// NOLINTBEGIN(readability-identifier-naming)
543void VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
544exprCreate(const VSMap* in, VSMap* out, [[maybe_unused]] void* userData,
545 VSCore* core, const VSAPI* vsapi) {
546 // NOLINTEND(readability-identifier-naming)
547 auto d = std::make_unique<ExprData>();
548 int err = 0;
549
550 try {
551 validate_and_init_clips<true>(d.get(), in, vsapi);
552 parse_format_param(d.get(), in, vsapi, core);
553
554 d->mirror_boundary = vsapi->mapGetInt(in, "boundary", 0, &err) != 0;
555
556 const int nexpr = vsapi->mapNumElements(in, "expr");
557 if (nexpr == 0) {
558 throw std::runtime_error(
559 "At least one expression must be provided.");
560 }
561
562 bool use_infix = vsapi->mapGetInt(in, "infix", 0, &err) != 0;
563
564 std::array<std::string, 3> expr_strs;
565 for (int i = 0; i < nexpr; ++i) {
566 std::string input_expr = vsapi->mapGetData(in, "expr", i, &err);
567 if (use_infix && !input_expr.empty()) {
568 std::map<std::string, std::string> macros;
569 macros["__EXPR__"] = "";
570 macros["__NUM_PLANES__"] =
571 std::to_string(d->vi.format.numPlanes);
572 macros["__WIDTH__"] = std::to_string(d->vi.width);
573 macros["__HEIGHT__"] = std::to_string(d->vi.height);
574 macros["__INPUT_NUM__"] = std::to_string(d->num_inputs);
575 macros["__OUTPUT_BITDEPTH__"] =
576 std::to_string(d->vi.format.bitsPerSample);
577 macros["__OUTPUT_COLORFAMILY__"] =
578 std::to_string(d->vi.format.colorFamily);
579 macros["__SUBSAMPLE_W__"] =
580 std::to_string(d->vi.format.subSamplingW);
581 macros["__SUBSAMPLE_H__"] =
582 std::to_string(d->vi.format.subSamplingH);
583 macros["__PLANE_NO__"] = std::to_string(i);
584 macros["__OUTPUT_SAMPLETYPE__"] = std::to_string(
585 (d->vi.format.sampleType == stFloat) ? 1 : 0);
586
587 for (int j = 0; j < d->num_inputs; ++j) {
588 const VSVideoInfo* input_vi =
589 vsapi->getVideoInfo(d->nodes[j]);
590 macros[std::format("__INPUT_BITDEPTH_{}__", j)] =
591 std::to_string(input_vi->format.bitsPerSample);
592 macros[std::format("__INPUT_COLORFAMILY_{}__", j)] =
593 std::to_string(input_vi->format.colorFamily);
594 macros[std::format("__INPUT_NUM_PLANES_{}__", j)] =
595 std::to_string(input_vi->format.numPlanes);
596 macros[std::format("__INPUT_SAMPLETYPE_{}__", j)] =
597 std::to_string(
598 (input_vi->format.sampleType == stFloat) ? 1 : 0);
599 }
600
601 expr_strs.at(i) = convert_infix_to_postfix(
602 input_expr, d->num_inputs, infix2postfix::Mode::Expr,
603 &macros);
604 } else {
605 expr_strs.at(i) = input_expr;
606 }
607 }
608 for (int i = nexpr; i < d->vi.format.numPlanes; ++i) {
609 expr_strs.at(i) = expr_strs.at(nexpr - 1);
610 }
611
612 for (int i = 0; i < d->vi.format.numPlanes; ++i) {
613 if (expr_strs.at(i).empty()) {
614 d->plane_op.at(i) = PlaneOp::PoCopy;
615 continue;
616 }
617 d->plane_op.at(i) = PlaneOp::PoProcess;
618 d->tokens.at(i) =
619 tokenize(expr_strs.at(i), d->num_inputs, ExprMode::Expr);
620
621 for (const auto& token : d->tokens.at(i)) {
622 if (token.type == TokenType::PropAccess ||
623 token.type == TokenType::PropExists) {
624 const auto& payload =
625 std::get<TokenPayloadPropAccess>(token.payload);
626 auto key =
627 std::make_pair(payload.clip_idx, payload.prop_name);
628 if (!d->prop_map.contains(key)) {
629 d->prop_map[key] = static_cast<int>(
630 1 + d->required_props
631 .size()); // 0 is for frame number N
632 d->required_props.push_back(key);
633 }
634 }
635 }
636
637 auto analyser = std::make_unique<analysis::AnalysisManager>(
638 d->tokens.at(i), d->mirror_boundary);
639 analysis::ExpressionAnalyzer expr_analyzer(*analyser);
640 expr_analyzer.analyze();
641 d->analysis_managers.at(i) = std::move(analyser);
642 }
643
644 parse_common_params(d.get(), in, vsapi);
645 parse_expr_tiling_params(d.get(), in, vsapi);
646
647 } catch (const std::exception& e) {
648 for (auto* node : d->nodes) {
649 if (node != nullptr) {
650 vsapi->freeNode(node);
651 }
652 }
653 vsapi->mapSetError(out, std::format("Expr: {}", e.what()).c_str());
654 return;
655 }
656
657 std::vector<VSFilterDependency> deps;
658 deps.reserve(d->nodes.size());
659 for (auto* node : d->nodes) {
660 deps.push_back({node, rpStrictSpatial});
661 }
662
663 VSVideoInfo* vi_ptr = &d->vi;
664
665 vsapi->createVideoFilter(out, "Expr", vi_ptr, exprGetFrame, exprFree,
666 fmParallel, deps.data(),
667 static_cast<int>(deps.size()), d.release(), core);
668}
669
670// NOLINTBEGIN(readability-identifier-naming)
671void VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
672singleExprFree(void* instanceData, [[maybe_unused]] VSCore* core,
673 const VSAPI* vsapi) {
674 // NOLINTEND(readability-identifier-naming)
675 genericFree<SingleExprData>(instanceData, core, vsapi);
676}
677
678// NOLINTBEGIN(readability-identifier-naming)
679const VSFrame*
680 VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
681 singleExprGetFrame(int n, int activationReason, void* instanceData,
682 [[maybe_unused]] void** frameData,
683 VSFrameContext* frameCtx, VSCore* core,
684 const VSAPI* vsapi) {
685 // NOLINTEND(readability-identifier-naming)
686 auto* d = static_cast<SingleExprData*>(instanceData);
687
688 if (activationReason == arInitial) {
689 for (int i = 0; i < d->num_inputs; ++i) {
690 vsapi->requestFrameFilter(n, d->nodes[i], frameCtx);
691 }
692 } else if (activationReason == arAllFramesReady) {
693 g_frame_data.dynamic_arrays.clear();
694
695 std::vector<const VSFrame*> src_frames(d->num_inputs);
696 for (int i = 0; i < d->num_inputs; ++i) {
697 src_frames[i] = vsapi->getFrameFilter(n, d->nodes[i], frameCtx);
698 }
699
700 std::array<const VSFrame*, 3> plane_src = {src_frames[0], src_frames[0],
701 src_frames[0]};
702 std::array<int, 3> planes = {0, 1, 2};
703 VSFrame* dst_frame = vsapi->newVideoFrame2(
704 &d->vi.format, d->vi.width, d->vi.height, plane_src.data(),
705 planes.data(), src_frames[0], core);
706
707 int num_planes = d->vi.format.numPlanes;
708 std::vector<uint8_t*> rwptrs((d->num_inputs + 1) * num_planes);
709 std::vector<int> strides((d->num_inputs + 1) * num_planes);
710 std::vector<float> props(1 + d->required_props.size() +
711 d->output_props.size());
712
713 read_frame_properties(props, src_frames, d->required_props, n, vsapi);
714
715 for (size_t i = 0; i < d->output_props.size(); ++i) {
716 props[1 + d->required_props.size() + i] =
717 std::bit_cast<float>(PROP_WRITE_NAN_PAYLOAD);
718 }
719
720 for (int i = 0; i <= d->num_inputs; ++i) {
721 for (int p = 0; p < num_planes; ++p) {
722 rwptrs[(i * num_planes) + p] =
723 (i == 0)
724 ? vsapi->getWritePtr(dst_frame, p)
725 : const_cast< // NOLINT(cppcoreguidelines-pro-type-const-cast)
726 uint8_t*>(
727 vsapi->getReadPtr(src_frames[i - 1], p));
728 strides[(i * num_planes) + p] = static_cast<int>(
729 (i == 0) ? vsapi->getStride(dst_frame, p)
730 : vsapi->getStride(src_frames[i - 1], p));
731 }
732 }
733
734 if (d->compiled.func_ptr == nullptr) {
735 std::vector<const VSVideoInfo*> vi(d->num_inputs);
736 for (int i = 0; i < d->num_inputs; ++i) {
737 vi[i] = vsapi->getVideoInfo(d->nodes[i]);
738 }
739
740 std::string expr_str;
741 for (const auto& token : d->tokens) {
742 if (!expr_str.empty()) {
743 expr_str += " ";
744 }
745 expr_str += token.text;
746 }
747
748 std::vector<std::string> output_prop_names;
749 output_prop_names.reserve(d->output_props.size());
750 for (const auto& p : d->output_props) {
751 output_prop_names.push_back(p.first);
752 }
753
754 const std::string key = generate_cache_key(
755 expr_str, &d->vi, vsapi, vi, d->mirror_boundary, d->prop_map,
756 d->vi.width, d->vi.height, output_prop_names, 0, 0,
757 d->opt_level, d->approx_math);
758
759 std::lock_guard<std::mutex> lock(cache_mutex);
760 if (!jit_cache.contains(key)) {
761 size_t key_hash = std::hash<std::string>{}(key);
762 std::string func_name =
763 std::format("process_single_expr_{}", key_hash);
764
765 try {
767 *d->analysis_manager);
768 Compiler compiler(
769 std::vector<Token>(d->tokens), &d->vi, vi, d->vi.width,
770 d->vi.height, d->mirror_boundary, d->dump_ir_path,
771 d->prop_map, func_name, d->opt_level, d->approx_math,
772 results, 0, 0, ExprMode::SingleExpr, output_prop_names);
773 jit_cache[key] = compiler.compile();
774 } catch (const std::exception& e) {
775 for (const auto& frame : src_frames) {
776 vsapi->freeFrame(frame);
777 }
778 vsapi->freeFrame(dst_frame);
779 throw;
780 }
781 }
782 d->compiled = jit_cache.at(key);
783 }
784
785 d->compiled.func_ptr(d, rwptrs.data(), strides.data(), props.data());
786
787 // Resolve prop types and write to output frame
788 enum class ResolvedPropWriteType : std::uint8_t { Int, Float };
789 std::vector<ResolvedPropWriteType> resolved_types;
790 resolved_types.reserve(d->output_props.size());
791 const VSMap* src_props = vsapi->getFramePropertiesRO(src_frames[0]);
792
793 for (const auto& prop_info : d->output_props) {
794 const auto& prop_name = prop_info.first;
795 const auto prop_write_type = prop_info.second;
796
797 switch (prop_write_type) {
799 resolved_types.push_back(ResolvedPropWriteType::Int);
800 break;
802 resolved_types.push_back(ResolvedPropWriteType::Float);
803 break;
805 // The prop will be deleted so anything is fine.
806 resolved_types.push_back(ResolvedPropWriteType::Float);
807 break;
810 int existing_type =
811 vsapi->mapGetType(src_props, prop_name.c_str());
812 if (existing_type == ptInt) {
813 resolved_types.push_back(ResolvedPropWriteType::Int);
814 } else if (existing_type == ptFloat) {
815 resolved_types.push_back(ResolvedPropWriteType::Float);
816 } else {
817 if (prop_write_type == PropWriteType::AutoInt) {
818 resolved_types.push_back(ResolvedPropWriteType::Int);
819 } else {
820 resolved_types.push_back(ResolvedPropWriteType::Float);
821 }
822 }
823 break;
824 }
825 }
826
827 VSMap* dst_props = vsapi->getFramePropertiesRW(dst_frame);
828 for (size_t i = 0; i < d->output_props.size(); ++i) {
829 const auto& prop_name = d->output_props[i].first;
830 float value = props[1 + d->required_props.size() + i];
831
832 if (std::bit_cast<uint32_t>(value) == PROP_WRITE_NAN_PAYLOAD) {
833 continue;
834 }
835
836 if (std::bit_cast<uint32_t>(value) == PROP_DELETE_NAN_PAYLOAD) {
837 vsapi->mapDeleteKey(dst_props, prop_name.c_str());
838 continue;
839 }
840
841 if (resolved_types[i] == ResolvedPropWriteType::Int) {
842 auto int_value = static_cast<int64_t>(lroundf(value));
843 vsapi->mapSetInt(dst_props, prop_name.c_str(), int_value,
844 maReplace);
845 } else { // FLOAT
846 vsapi->mapSetFloat(dst_props, prop_name.c_str(), value,
847 maReplace);
848 }
849 }
850
851 for (const auto& frame : src_frames) {
852 vsapi->freeFrame(frame);
853 }
854 return dst_frame;
855 }
856
857 return nullptr;
858}
859
860// NOLINTBEGIN(readability-identifier-naming)
861void VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
862singleExprCreate(const VSMap* in, VSMap* out, [[maybe_unused]] void* userData,
863 VSCore* core, const VSAPI* vsapi) {
864 // NOLINTEND(readability-identifier-naming)
865 auto d = std::make_unique<SingleExprData>();
866 int err = 0;
867
868 try {
869 validate_and_init_clips<false>(d.get(), in, vsapi);
870 parse_format_param(d.get(), in, vsapi, core);
871
872 d->mirror_boundary = vsapi->mapGetInt(in, "boundary", 0, &err) != 0;
873
874 const char* expr_str = vsapi->mapGetData(in, "expr", 0, &err);
875 if (err != 0) {
876 throw std::runtime_error("An expression must be provided.");
877 }
878
879 bool use_infix = vsapi->mapGetInt(in, "infix", 0, &err) != 0;
880
881 std::string processed_expr;
882 if (use_infix) {
883 std::map<std::string, std::string> macros;
884 macros["__SINGLEEXPR__"] = "";
885 macros["__NUM_PLANES__"] = std::to_string(d->vi.format.numPlanes);
886 macros["__WIDTH__"] = std::to_string(d->vi.width);
887 macros["__HEIGHT__"] = std::to_string(d->vi.height);
888 macros["__INPUT_NUM__"] = std::to_string(d->num_inputs);
889 macros["__OUTPUT_BITDEPTH__"] =
890 std::to_string(d->vi.format.bitsPerSample);
891 macros["__OUTPUT_COLORFAMILY__"] =
892 std::to_string(d->vi.format.colorFamily);
893 macros["__SUBSAMPLE_W__"] =
894 std::to_string(d->vi.format.subSamplingW);
895 macros["__SUBSAMPLE_H__"] =
896 std::to_string(d->vi.format.subSamplingH);
897 macros["__OUTPUT_SAMPLETYPE__"] =
898 std::to_string((d->vi.format.sampleType == stFloat) ? 1 : 0);
899
900 for (int i = 0; i < d->num_inputs; ++i) {
901 const VSVideoInfo* input_vi = vsapi->getVideoInfo(d->nodes[i]);
902 macros[std::format("__INPUT_BITDEPTH_{}__", i)] =
903 std::to_string(input_vi->format.bitsPerSample);
904 macros[std::format("__INPUT_COLORFAMILY_{}__", i)] =
905 std::to_string(input_vi->format.colorFamily);
906 macros[std::format("__INPUT_NUM_PLANES_{}__", i)] =
907 std::to_string(input_vi->format.numPlanes);
908 macros[std::format("__INPUT_SAMPLETYPE_{}__", i)] =
909 std::to_string(
910 (input_vi->format.sampleType == stFloat) ? 1 : 0);
911 macros[std::format("__INPUT_WIDTH_{}__", i)] =
912 std::to_string(input_vi->width);
913 macros[std::format("__INPUT_HEIGHT_{}__", i)] =
914 std::to_string(input_vi->height);
915 macros[std::format("__INPUT_SUBSAMPLE_W_{}__", i)] =
916 std::to_string(input_vi->format.subSamplingW);
917 macros[std::format("__INPUT_SUBSAMPLE_H_{}__", i)] =
918 std::to_string(input_vi->format.subSamplingH);
919 }
920
921 processed_expr = convert_infix_to_postfix(
922 expr_str, d->num_inputs, infix2postfix::Mode::Single, &macros);
923 } else {
924 processed_expr = expr_str;
925 }
926
927 d->tokens =
928 tokenize(processed_expr, d->num_inputs, ExprMode::SingleExpr);
929
930 // Array optimization passes
931 {
932 analysis::AnalysisManager temp_am(d->tokens, d->mirror_boundary, 0);
933 analysis::StaticArrayOptPass static_opt_pass;
934 static_opt_pass.run(d->tokens, temp_am);
936 dyn_opt_pass.run(d->tokens, temp_am);
937 }
938
939 for (const auto& token : d->tokens) {
940 if (token.type == TokenType::ConstantPlaneWidth ||
941 token.type == TokenType::ConstantPlaneHeight) {
942 const auto& payload =
943 std::get<TokenPayloadPlaneDim>(token.payload);
944 if (payload.plane_idx < 0 ||
945 payload.plane_idx >= d->vi.format.numPlanes) {
946 throw std::runtime_error(
947 std::format("Invalid plane index {} in token '{}'",
948 payload.plane_idx, token.text));
949 }
950 } else if (token.type == TokenType::PropAccess ||
951 token.type == TokenType::PropExists) {
952 const auto& payload =
953 std::get<TokenPayloadPropAccess>(token.payload);
954 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
955 if (!d->prop_map.contains(key)) {
956 d->prop_map[key] = static_cast<int>(
957 1 +
958 d->required_props.size()); // 0 is for frame number N
959 d->required_props.push_back(key);
960 }
961 } else if (token.type == TokenType::PropStore) {
962 const auto& payload =
963 std::get<TokenPayloadPropStore>(token.payload);
964 if (!d->output_prop_map.contains(payload.prop_name)) {
965 d->output_prop_map[payload.prop_name] =
966 static_cast<int>(d->output_props.size());
967 d->output_props.emplace_back(payload.prop_name,
968 payload.type);
969 }
970 }
971 }
972
973 auto analyser = std::make_unique<analysis::AnalysisManager>(
974 d->tokens, d->mirror_boundary, 0);
975 analysis::ExpressionAnalyzer expr_analyzer(*analyser);
976 expr_analyzer.analyze();
977 d->analysis_manager = std::move(analyser);
978
979 parse_common_params(d.get(), in, vsapi);
980
981 } catch (const std::exception& e) {
982 for (auto* node : d->nodes) {
983 if (node != nullptr) {
984 vsapi->freeNode(node);
985 }
986 }
987 vsapi->mapSetError(out,
988 std::format("SingleExpr: {}", e.what()).c_str());
989 return;
990 }
991
992 std::vector<VSFilterDependency> deps;
993 deps.reserve(d->nodes.size());
994 for (auto* node : d->nodes) {
995 deps.push_back({node, rpStrictSpatial});
996 }
997
998 VSVideoInfo* vi_ptr = &d->vi;
999
1000 vsapi->createVideoFilter(out, "SingleExpr", vi_ptr, singleExprGetFrame,
1001 singleExprFree, fmParallel, deps.data(),
1002 static_cast<int>(deps.size()), d.release(), core);
1003}
1004
1005struct VkExprData : BaseExprData {
1006 std::array<PlaneOp, 3> plane_op = {};
1007 std::array<std::vector<std::vector<Token>>, 3> tokens_stages;
1008 std::array<std::vector<std::unique_ptr<analysis::AnalysisManager>>, 3>
1009 analysis_managers;
1010
1011 int device_id = -1;
1012 int num_streams = 8; // NOLINT(cppcoreguidelines-avoid-magic-numbers)
1013 std::unique_ptr<vkexpr::VkExprExecutor> executor;
1014
1015 std::string dump_glsl_path;
1016};
1017
1018// NOLINTBEGIN(readability-identifier-naming)
1019const VSFrame*
1020 VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
1021 vkExprGetFrame(int n, int activationReason, void* instanceData,
1022 [[maybe_unused]] void** frameData, VSFrameContext* frameCtx,
1023 VSCore* core, const VSAPI* vsapi) {
1024 // NOLINTEND(readability-identifier-naming)
1025 auto* d = static_cast<VkExprData*>(instanceData);
1026
1027 if (activationReason == arInitial) {
1028 for (int i = 0; i < d->num_inputs; ++i) {
1029 vsapi->requestFrameFilter(n, d->nodes[i], frameCtx);
1030 }
1031 } else if (activationReason == arAllFramesReady) {
1032 std::vector<const VSFrame*> src_frames(d->num_inputs);
1033 for (int i = 0; i < d->num_inputs; ++i) {
1034 src_frames[i] = vsapi->getFrameFilter(n, d->nodes[i], frameCtx);
1035 }
1036
1037 std::array<const VSFrame*, 3> plane_src = {
1038 d->plane_op.at(0) == PlaneOp::PoCopy ? src_frames[0] : nullptr,
1039 d->plane_op.at(1) == PlaneOp::PoCopy ? src_frames[0] : nullptr,
1040 d->plane_op.at(2) == PlaneOp::PoCopy ? src_frames[0] : nullptr};
1041 std::array<int, 3> planes = {0, 1, 2};
1042 VSFrame* dst_frame = vsapi->newVideoFrame2(
1043 &d->vi.format, d->vi.width, d->vi.height, plane_src.data(),
1044 planes.data(), src_frames[0], core);
1045
1046 std::vector<float> props(1 + d->required_props.size());
1047 read_frame_properties(props, src_frames, d->required_props, n, vsapi);
1048
1049 for (int plane = 0; plane < d->vi.format.numPlanes; ++plane) {
1050 if (d->plane_op.at(plane) != PlaneOp::PoProcess) {
1051 continue;
1052 }
1053
1054 try {
1055 d->executor->processPlane(plane, n, src_frames, dst_frame,
1056 props, vsapi);
1057
1058 } catch (const std::exception& e) {
1059 for (const auto& frame : src_frames) {
1060 vsapi->freeFrame(frame);
1061 }
1062 vsapi->freeFrame(dst_frame);
1063 vsapi->setFilterError(
1064 std::format("VkExpr: GPU error: {}", e.what()).c_str(),
1065 frameCtx);
1066 return nullptr;
1067 }
1068 }
1069
1070 for (const auto& frame : src_frames) {
1071 vsapi->freeFrame(frame);
1072 }
1073 return dst_frame;
1074 }
1075
1076 return nullptr;
1077}
1078
1079// NOLINTBEGIN(readability-identifier-naming)
1080void VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
1081vkExprFree(void* instanceData, [[maybe_unused]] VSCore* core,
1082 const VSAPI* vsapi) {
1083 // NOLINTEND(readability-identifier-naming)
1084 auto* raw = static_cast<VkExprData*>(instanceData);
1085 raw->executor.reset();
1086 std::unique_ptr<VkExprData> d(raw);
1087 for (auto* node : d->nodes) {
1088 vsapi->freeNode(node);
1089 }
1090}
1091
1092// NOLINTBEGIN(readability-identifier-naming)
1093void VS_CC // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
1094vkExprCreate(const VSMap* in, VSMap* out, [[maybe_unused]] void* userData,
1095 VSCore* core, const VSAPI* vsapi) {
1096 // NOLINTEND(readability-identifier-naming)
1097 auto d = std::make_unique<VkExprData>();
1098 int err = 0;
1099
1100 try {
1101 // Validate and initialize clips
1102 validate_and_init_clips<true>(d.get(), in, vsapi);
1103
1104 parse_format_param(d.get(), in, vsapi, core);
1105
1106 const int nexpr = vsapi->mapNumElements(in, "expr");
1107 if (nexpr == 0) {
1108 throw std::runtime_error(
1109 "At least one expression must be provided.");
1110 }
1111
1112 std::array<std::string, 3> expr_strs;
1113 for (int i = 0; i < nexpr && i < 3; ++i) {
1114 expr_strs.at(i) = vsapi->mapGetData(in, "expr", i, &err);
1115 }
1116 for (int i = nexpr; i < d->vi.format.numPlanes; ++i) {
1117 expr_strs.at(i) = expr_strs.at(nexpr - 1);
1118 }
1119
1120 d->mirror_boundary = vsapi->mapGetInt(in, "boundary", 0, &err) != 0;
1121
1122 const char* dump_glsl_path =
1123 vsapi->mapGetData(in, "dump_glsl", 0, &err);
1124 if ((err == 0) && (dump_glsl_path != nullptr)) {
1125 d->dump_glsl_path = dump_glsl_path;
1126 }
1127
1128 bool use_infix = vsapi->mapGetInt(in, "infix", 0, &err) != 0;
1129
1130 std::array<std::vector<std::string>, 3> processed_stages;
1131 for (int i = 0; i < nexpr && i < 3; ++i) {
1132 std::string raw_expr = expr_strs.at(i);
1133 std::vector<std::string> stages;
1134 constexpr std::string_view POSTFIX_STAGE_SEPARATOR = "##";
1135 constexpr std::string_view INFIX_STAGE_SEPARATOR = "---";
1136 const std::string_view stage_separator =
1137 use_infix ? INFIX_STAGE_SEPARATOR : POSTFIX_STAGE_SEPARATOR;
1138 size_t pos = 0;
1139 while ((pos = raw_expr.find(stage_separator)) !=
1140 std::string::npos) {
1141 stages.push_back(raw_expr.substr(0, pos));
1142 raw_expr.erase(0, pos + stage_separator.size());
1143 }
1144 stages.push_back(raw_expr);
1145
1146 if (use_infix) {
1147 std::map<std::string, std::string> macros;
1148 macros["__GPU__"] = "";
1149 macros["__EXPR__"] = "";
1150 macros["__NUM_PLANES__"] =
1151 std::to_string(d->vi.format.numPlanes);
1152 macros["__WIDTH__"] = std::to_string(d->vi.width);
1153 macros["__HEIGHT__"] = std::to_string(d->vi.height);
1154 macros["__INPUT_NUM__"] = std::to_string(d->num_inputs);
1155 macros["__OUTPUT_BITDEPTH__"] =
1156 std::to_string(d->vi.format.bitsPerSample);
1157 macros["__OUTPUT_COLORFAMILY__"] =
1158 std::to_string(d->vi.format.colorFamily);
1159 macros["__SUBSAMPLE_W__"] =
1160 std::to_string(d->vi.format.subSamplingW);
1161 macros["__SUBSAMPLE_H__"] =
1162 std::to_string(d->vi.format.subSamplingH);
1163 macros["__PLANE_NO__"] = std::to_string(i);
1164 macros["__OUTPUT_SAMPLETYPE__"] = std::to_string(
1165 (d->vi.format.sampleType == stFloat) ? 1 : 0);
1166
1167 for (int j = 0; j < d->num_inputs; ++j) {
1168 const VSVideoInfo* input_vi =
1169 vsapi->getVideoInfo(d->nodes[j]);
1170 macros[std::format("__INPUT_BITDEPTH_{}__", j)] =
1171 std::to_string(input_vi->format.bitsPerSample);
1172 macros[std::format("__INPUT_COLORFAMILY_{}__", j)] =
1173 std::to_string(input_vi->format.colorFamily);
1174 macros[std::format("__INPUT_NUM_PLANES_{}__", j)] =
1175 std::to_string(input_vi->format.numPlanes);
1176 macros[std::format("__INPUT_SAMPLETYPE_{}__", j)] =
1177 std::to_string(
1178 (input_vi->format.sampleType == stFloat) ? 1 : 0);
1179 }
1180
1181 for (size_t stage_idx = 0; stage_idx < stages.size();
1182 ++stage_idx) {
1183 auto& stage = stages[stage_idx];
1184 if (!stage.empty()) {
1186 stage, d->num_inputs, infix2postfix::Mode::VkExpr,
1187 &macros, static_cast<int>(stage_idx));
1188 }
1189 }
1190 }
1191 processed_stages.at(i) = stages;
1192 }
1193 for (int i = nexpr; i < d->vi.format.numPlanes; ++i) {
1194 processed_stages.at(i) = processed_stages.at(nexpr - 1);
1195 }
1196
1197 for (int i = 0; i < d->vi.format.numPlanes; ++i) {
1198 if (processed_stages.at(i).empty() ||
1199 (processed_stages.at(i).size() == 1 &&
1200 processed_stages.at(i)[0].empty())) {
1201 d->plane_op.at(i) = PlaneOp::PoCopy;
1202 } else {
1203 d->plane_op.at(i) = PlaneOp::PoProcess;
1204
1205 auto& plane_stages = processed_stages.at(i);
1206 d->tokens_stages.at(i).resize(plane_stages.size());
1207 d->analysis_managers.at(i).resize(plane_stages.size());
1208
1209 for (size_t s = 0; s < plane_stages.size(); ++s) {
1210 d->tokens_stages.at(i).at(s) =
1211 tokenize(plane_stages.at(s), d->num_inputs,
1212 ExprMode::VkExpr, static_cast<int>(s));
1213
1214 for (const auto& token : d->tokens_stages.at(i).at(s)) {
1215 if (token.type == TokenType::PropAccess ||
1216 token.type == TokenType::PropExists) {
1217 const auto& payload =
1218 std::get<TokenPayloadPropAccess>(token.payload);
1219 auto key = std::make_pair(payload.clip_idx,
1220 payload.prop_name);
1221 if (!d->prop_map.contains(key)) {
1222 d->prop_map[key] = static_cast<int>(
1223 1 + d->required_props.size()); // 0 is for N
1224 d->required_props.push_back(key);
1225 }
1226 }
1227 }
1228
1229 auto analyser = std::make_unique<analysis::AnalysisManager>(
1230 d->tokens_stages.at(i).at(s), d->mirror_boundary);
1231 analysis::ExpressionAnalyzer expr_analyzer(*analyser);
1232 expr_analyzer.analyze();
1233 d->analysis_managers.at(i).at(s) = std::move(analyser);
1234 }
1235 }
1236 }
1237
1238 d->num_streams =
1239 static_cast<int>(vsapi->mapGetInt(in, "num_streams", 0, &err));
1240 if (err != 0 || d->num_streams < 1) {
1241 // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
1242 d->num_streams = 8;
1243 }
1244
1245 d->device_id =
1246 static_cast<int>(vsapi->mapGetInt(in, "device_id", 0, &err));
1247 if (err != 0) {
1248 d->device_id = -1;
1249 }
1250 if (d->device_id < -1) {
1251 throw std::runtime_error("device_id must be >= -1");
1252 }
1253
1254 auto num_props_floats =
1255 static_cast<uint32_t>(1 + d->required_props.size()); // N + props
1256
1257 std::array<std::vector<std::string>, 3> glsl_stages;
1258 for (int i = 0; i < d->vi.format.numPlanes; ++i) {
1259 if (d->plane_op.at(i) != PlaneOp::PoProcess) {
1260 continue;
1261 }
1262
1263 auto num_stages = d->tokens_stages.at(i).size();
1264 glsl_stages.at(i).resize(num_stages);
1265
1266 for (size_t s = 0; s < num_stages; ++s) {
1267 analysis::ExpressionAnalysisResults analysis_results(
1268 *d->analysis_managers.at(i).at(s));
1269 GLSLGenerator generator(
1270 d->tokens_stages.at(i).at(s), d->num_inputs,
1271 static_cast<int>(s),
1272 d->vi.width / (i == 0 ? 1 : d->vi.format.subSamplingW),
1273 d->vi.height / (i == 0 ? 1 : d->vi.format.subSamplingH),
1274 d->mirror_boundary, d->prop_map, analysis_results);
1275
1276 glsl_stages.at(i).at(s) = generator.generate();
1277
1278 if (!d->dump_glsl_path.empty()) {
1279 std::string plane_specific_path = d->dump_glsl_path;
1280 size_t dot_pos = plane_specific_path.rfind('.');
1281 std::string suffix = std::format(".plane{}.stage{}", i, s);
1282 if (dot_pos != std::string::npos) {
1283 plane_specific_path.insert(dot_pos, suffix);
1284 } else {
1285 plane_specific_path += suffix;
1286 }
1287
1288 std::ofstream glsl_file(plane_specific_path);
1289 if (glsl_file.is_open()) {
1290 glsl_file << glsl_stages.at(i).at(s);
1291 glsl_file.close();
1292 }
1293 }
1294 }
1295 }
1296
1297 d->executor = std::make_unique<vkexpr::VkExprExecutor>(
1298 d->device_id, d->num_streams, d->num_inputs, std::move(glsl_stages),
1299 num_props_floats);
1300
1301 } catch (const std::exception& e) {
1302 for (auto* node : d->nodes) {
1303 if (node != nullptr) {
1304 vsapi->freeNode(node);
1305 }
1306 }
1307 vsapi->mapSetError(out, std::format("VkExpr: {}", e.what()).c_str());
1308 return;
1309 }
1310
1311 std::vector<VSFilterDependency> deps;
1312 deps.reserve(d->nodes.size());
1313 for (auto* node : d->nodes) {
1314 deps.push_back({node, rpStrictSpatial});
1315 }
1316
1317 VSVideoInfo* vi_ptr = &d->vi;
1318
1319 vsapi->createVideoFilter(out, "VkExpr", vi_ptr, vkExprGetFrame, vkExprFree,
1320 fmParallel, deps.data(),
1321 static_cast<int>(deps.size()), d.release(), core);
1322}
1323
1324} // anonymous namespace
1325
1326// Host API for JIT code to manage dynamic arrays
1327// TODO: Move this to a separate file.
1328// TODO: Optimize this.
1329extern "C" {
1330
1331float* llvmexpr_ensure_buffer(const char* name, int64_t requested_size) {
1332 auto& array = g_frame_data.dynamic_arrays[std::string(name)];
1333 if (static_cast<size_t>(requested_size) > array.buffer.size()) {
1334 array.buffer.resize(requested_size);
1335 }
1336 return array.buffer.data();
1337}
1338
1339int64_t llvmexpr_get_buffer_size(const char* name) {
1340 auto it = g_frame_data.dynamic_arrays.find(std::string(name));
1341 return (it != g_frame_data.dynamic_arrays.end())
1342 ? static_cast<int64_t>(it->second.buffer.size())
1343 : 0;
1344}
1345
1346} // extern "C"
1347
1348// NOLINTBEGIN(readability-identifier-naming)
1349VS_EXTERNAL_API(void)
1350VapourSynthPluginInit2(VSPlugin* plugin, const VSPLUGINAPI* vspapi) {
1351 // NOLINTEND(readability-identifier-naming)
1352 vspapi->configPlugin(
1353 "com.yuygfgg.llvmexpr", "llvmexpr", "LLVM JIT RPN Expression Filter",
1354 VS_MAKE_VERSION(4, 4), VAPOURSYNTH_API_VERSION, 0, plugin);
1355 vspapi->registerFunction(
1356 "Expr",
1357 "clips:vnode[];expr:data[];format:int:opt;boundary:int:opt;"
1358 "dump_ir:data:opt;opt_level:int:opt;approx_math:int:opt;infix:int:opt;"
1359 "tile_x:int:opt;tile_y:int:opt;",
1360 "clip:vnode;", exprCreate, nullptr, plugin);
1361 vspapi->registerFunction("SingleExpr",
1362 "clips:vnode[];expr:data;format:int:opt;boundary:"
1363 "int:opt;dump_ir:data:opt;opt_"
1364 "level:int:opt;approx_math:int:opt;infix:int:opt;",
1365 "clip:vnode;", singleExprCreate, nullptr, plugin);
1366
1367 vspapi->registerFunction("VkExpr",
1368 "clips:vnode[];expr:data[];format:int:opt;"
1369 "boundary:int:opt;num_streams:int:opt;device_id:"
1370 "int:opt;dump_glsl:data:opt;infix:int:opt;",
1371 "clip:vnode;", vkExprCreate, nullptr, plugin);
1372}
std::string convert_infix_to_postfix(const std::string &infix_expr, int num_inputs, infix2postfix::Mode mode, const std::map< std::string, std::string > *predefined_macros, int num_intermediate_inputs)
std::unordered_map< std::string, CompiledFunction > jit_cache
Definition Jit.cpp:215
std::mutex cache_mutex
Definition Jit.cpp:216
std::vector< Token > tokenize(const std::string &expr, int num_inputs, ExprMode mode, int num_intermediate_inputs)
@ ConstantPlaneWidth
Definition Tokenizer.hpp:37
@ ConstantPlaneHeight
Definition Tokenizer.hpp:38
CompiledFunction compile()
Definition Compiler.cpp:95
std::string generate()
PreservedAnalyses run(std::vector< Token > &tokens, AnalysisManager &am) override
PreservedAnalyses run(std::vector< Token > &tokens, AnalysisManager &am) override
float * llvmexpr_ensure_buffer(const char *name, int64_t requested_size)
constexpr uint32_t PROP_WRITE_NAN_PAYLOAD
Definition llvmexpr.cpp:54
constexpr uint32_t PROP_READ_NAN_PAYLOAD
Definition llvmexpr.cpp:52
constexpr uint32_t PROP_DELETE_NAN_PAYLOAD
Definition llvmexpr.cpp:56
int64_t llvmexpr_get_buffer_size(const char *name)
VapourSynthPluginInit2(VSPlugin *plugin, const VSPLUGINAPI *vspapi)
ProcessProc func_ptr
Definition Jit.hpp:39